summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r--src/mongo/db/repl/applier.cpp318
-rw-r--r--src/mongo/db/repl/applier.h243
-rw-r--r--src/mongo/db/repl/applier_test.cpp1177
-rw-r--r--src/mongo/db/repl/base_cloner.h76
-rw-r--r--src/mongo/db/repl/base_cloner_test_fixture.cpp433
-rw-r--r--src/mongo/db/repl/base_cloner_test_fixture.h201
-rw-r--r--src/mongo/db/repl/bgsync.cpp861
-rw-r--r--src/mongo/db/repl/bgsync.h220
-rw-r--r--src/mongo/db/repl/check_quorum_for_config_change.cpp409
-rw-r--r--src/mongo/db/repl/check_quorum_for_config_change.h200
-rw-r--r--src/mongo/db/repl/check_quorum_for_config_change_test.cpp1440
-rw-r--r--src/mongo/db/repl/collection_cloner.cpp438
-rw-r--r--src/mongo/db/repl/collection_cloner.h403
-rw-r--r--src/mongo/db/repl/collection_cloner_test.cpp745
-rw-r--r--src/mongo/db/repl/data_replicator.cpp2163
-rw-r--r--src/mongo/db/repl/data_replicator.h71
-rw-r--r--src/mongo/db/repl/data_replicator_test.cpp1067
-rw-r--r--src/mongo/db/repl/database_cloner.cpp483
-rw-r--r--src/mongo/db/repl/database_cloner.h303
-rw-r--r--src/mongo/db/repl/database_cloner_test.cpp913
-rw-r--r--src/mongo/db/repl/database_task.cpp117
-rw-r--r--src/mongo/db/repl/database_task.h54
-rw-r--r--src/mongo/db/repl/database_task_test.cpp282
-rw-r--r--src/mongo/db/repl/elect_cmd_runner.cpp203
-rw-r--r--src/mongo/db/repl/elect_cmd_runner.h151
-rw-r--r--src/mongo/db/repl/elect_cmd_runner_test.cpp672
-rw-r--r--src/mongo/db/repl/election_winner_declarer.cpp148
-rw-r--r--src/mongo/db/repl/election_winner_declarer.h143
-rw-r--r--src/mongo/db/repl/election_winner_declarer_test.cpp324
-rw-r--r--src/mongo/db/repl/freshness_checker.cpp330
-rw-r--r--src/mongo/db/repl/freshness_checker.h225
-rw-r--r--src/mongo/db/repl/freshness_checker_test.cpp1825
-rw-r--r--src/mongo/db/repl/handshake_args.cpp118
-rw-r--r--src/mongo/db/repl/handshake_args.h116
-rw-r--r--src/mongo/db/repl/heartbeat_response_action.cpp58
-rw-r--r--src/mongo/db/repl/heartbeat_response_action.h158
-rw-r--r--src/mongo/db/repl/initial_sync.cpp37
-rw-r--r--src/mongo/db/repl/initial_sync.h46
-rw-r--r--src/mongo/db/repl/is_master_response.cpp714
-rw-r--r--src/mongo/db/repl/is_master_response.h293
-rw-r--r--src/mongo/db/repl/isself.cpp423
-rw-r--r--src/mongo/db/repl/isself.h48
-rw-r--r--src/mongo/db/repl/isself_test.cpp58
-rw-r--r--src/mongo/db/repl/last_vote.cpp77
-rw-r--r--src/mongo/db/repl/last_vote.h34
-rw-r--r--src/mongo/db/repl/master_slave.cpp2280
-rw-r--r--src/mongo/db/repl/master_slave.h281
-rw-r--r--src/mongo/db/repl/member_config.cpp482
-rw-r--r--src/mongo/db/repl/member_config.h291
-rw-r--r--src/mongo/db/repl/member_config_test.cpp733
-rw-r--r--src/mongo/db/repl/member_heartbeat_data.cpp120
-rw-r--r--src/mongo/db/repl/member_heartbeat_data.h172
-rw-r--r--src/mongo/db/repl/member_state.h140
-rw-r--r--src/mongo/db/repl/minvalid.cpp122
-rw-r--r--src/mongo/db/repl/minvalid.h58
-rw-r--r--src/mongo/db/repl/operation_context_repl_mock.cpp68
-rw-r--r--src/mongo/db/repl/operation_context_repl_mock.h48
-rw-r--r--src/mongo/db/repl/oplog.cpp1329
-rw-r--r--src/mongo/db/repl/oplog.h189
-rw-r--r--src/mongo/db/repl/oplog_interface.h56
-rw-r--r--src/mongo/db/repl/oplog_interface_local.cpp106
-rw-r--r--src/mongo/db/repl/oplog_interface_local.h33
-rw-r--r--src/mongo/db/repl/oplog_interface_mock.cpp72
-rw-r--r--src/mongo/db/repl/oplog_interface_mock.h36
-rw-r--r--src/mongo/db/repl/oplog_interface_remote.cpp81
-rw-r--r--src/mongo/db/repl/oplog_interface_remote.h33
-rw-r--r--src/mongo/db/repl/oplogreader.cpp264
-rw-r--r--src/mongo/db/repl/oplogreader.h223
-rw-r--r--src/mongo/db/repl/optime.cpp46
-rw-r--r--src/mongo/db/repl/optime.h92
-rw-r--r--src/mongo/db/repl/read_after_optime_args.cpp94
-rw-r--r--src/mongo/db/repl/read_after_optime_args.h56
-rw-r--r--src/mongo/db/repl/read_after_optime_args_test.cpp145
-rw-r--r--src/mongo/db/repl/read_after_optime_response.cpp62
-rw-r--r--src/mongo/db/repl/read_after_optime_response.h100
-rw-r--r--src/mongo/db/repl/read_after_optime_response_test.cpp66
-rw-r--r--src/mongo/db/repl/repl_client_info.cpp14
-rw-r--r--src/mongo/db/repl/repl_client_info.h58
-rw-r--r--src/mongo/db/repl/repl_set_command.cpp20
-rw-r--r--src/mongo/db/repl/repl_set_command.h42
-rw-r--r--src/mongo/db/repl/repl_set_declare_election_winner.cpp55
-rw-r--r--src/mongo/db/repl/repl_set_declare_election_winner_args.cpp237
-rw-r--r--src/mongo/db/repl/repl_set_declare_election_winner_args.h64
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_args.cpp245
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_args.h210
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_args_v1.cpp178
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_args_v1.h168
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_response.cpp618
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_response.h399
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_response_test.cpp1450
-rw-r--r--src/mongo/db/repl/repl_set_html_summary.cpp306
-rw-r--r--src/mongo/db/repl/repl_set_html_summary.h121
-rw-r--r--src/mongo/db/repl/repl_set_request_votes.cpp55
-rw-r--r--src/mongo/db/repl/repl_set_request_votes_args.cpp326
-rw-r--r--src/mongo/db/repl/repl_set_request_votes_args.h106
-rw-r--r--src/mongo/db/repl/repl_settings.cpp13
-rw-r--r--src/mongo/db/repl/repl_settings.h171
-rw-r--r--src/mongo/db/repl/replica_set_config.cpp919
-rw-r--r--src/mongo/db/repl/replica_set_config.h446
-rw-r--r--src/mongo/db/repl/replica_set_config_checks.cpp409
-rw-r--r--src/mongo/db/repl/replica_set_config_checks.h100
-rw-r--r--src/mongo/db/repl/replica_set_config_checks_test.cpp1327
-rw-r--r--src/mongo/db/repl/replica_set_config_test.cpp2103
-rw-r--r--src/mongo/db/repl/replica_set_tag.cpp374
-rw-r--r--src/mongo/db/repl/replica_set_tag.h481
-rw-r--r--src/mongo/db/repl/replica_set_tag_test.cpp238
-rw-r--r--src/mongo/db/repl/replication_coordinator.cpp38
-rw-r--r--src/mongo/db/repl/replication_coordinator.h1192
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state.cpp8
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state.h288
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp464
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.h96
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.cpp247
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.h201
-rw-r--r--src/mongo/db/repl/replication_coordinator_global.cpp23
-rw-r--r--src/mongo/db/repl/replication_coordinator_global.h8
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp4668
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h1772
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect.cpp397
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp688
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp407
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp1085
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp715
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat_test.cpp394
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp394
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp999
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp4106
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.cpp615
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.h215
-rw-r--r--src/mongo/db/repl/replication_coordinator_test_fixture.cpp503
-rw-r--r--src/mongo/db/repl/replication_coordinator_test_fixture.h310
-rw-r--r--src/mongo/db/repl/replication_executor.cpp1016
-rw-r--r--src/mongo/db/repl/replication_executor.h638
-rw-r--r--src/mongo/db/repl/replication_executor_test.cpp943
-rw-r--r--src/mongo/db/repl/replication_executor_test_fixture.cpp65
-rw-r--r--src/mongo/db/repl/replication_executor_test_fixture.h123
-rw-r--r--src/mongo/db/repl/replication_info.cpp332
-rw-r--r--src/mongo/db/repl/replset_commands.cpp1365
-rw-r--r--src/mongo/db/repl/replset_web_handler.cpp85
-rw-r--r--src/mongo/db/repl/reporter.cpp204
-rw-r--r--src/mongo/db/repl/reporter.h174
-rw-r--r--src/mongo/db/repl/reporter_test.cpp476
-rw-r--r--src/mongo/db/repl/resync.cpp164
-rw-r--r--src/mongo/db/repl/roll_back_local_operations.cpp266
-rw-r--r--src/mongo/db/repl/roll_back_local_operations.h90
-rw-r--r--src/mongo/db/repl/roll_back_local_operations_test.cpp733
-rw-r--r--src/mongo/db/repl/rollback_source.h95
-rw-r--r--src/mongo/db/repl/rollback_source_impl.cpp88
-rw-r--r--src/mongo/db/repl/rollback_source_impl.h44
-rw-r--r--src/mongo/db/repl/rs_initialsync.cpp805
-rw-r--r--src/mongo/db/repl/rs_initialsync.h10
-rw-r--r--src/mongo/db/repl/rs_rollback.cpp1251
-rw-r--r--src/mongo/db/repl/rs_rollback.h90
-rw-r--r--src/mongo/db/repl/rs_rollback_test.cpp985
-rw-r--r--src/mongo/db/repl/rs_sync.cpp140
-rw-r--r--src/mongo/db/repl/rs_sync.h8
-rw-r--r--src/mongo/db/repl/rslog.cpp14
-rw-r--r--src/mongo/db/repl/rslog.h14
-rw-r--r--src/mongo/db/repl/scatter_gather_algorithm.cpp2
-rw-r--r--src/mongo/db/repl/scatter_gather_algorithm.h65
-rw-r--r--src/mongo/db/repl/scatter_gather_runner.cpp200
-rw-r--r--src/mongo/db/repl/scatter_gather_runner.h154
-rw-r--r--src/mongo/db/repl/scatter_gather_test.cpp602
-rw-r--r--src/mongo/db/repl/storage_interface.cpp8
-rw-r--r--src/mongo/db/repl/storage_interface.h32
-rw-r--r--src/mongo/db/repl/storage_interface_impl.cpp20
-rw-r--r--src/mongo/db/repl/storage_interface_impl.h17
-rw-r--r--src/mongo/db/repl/storage_interface_mock.cpp12
-rw-r--r--src/mongo/db/repl/storage_interface_mock.h16
-rw-r--r--src/mongo/db/repl/sync_source_feedback.cpp241
-rw-r--r--src/mongo/db/repl/sync_source_feedback.h88
-rw-r--r--src/mongo/db/repl/sync_tail.cpp1290
-rw-r--r--src/mongo/db/repl/sync_tail.h286
-rw-r--r--src/mongo/db/repl/sync_tail_test.cpp486
-rw-r--r--src/mongo/db/repl/task_runner.cpp255
-rw-r--r--src/mongo/db/repl/task_runner.h240
-rw-r--r--src/mongo/db/repl/task_runner_test.cpp567
-rw-r--r--src/mongo/db/repl/task_runner_test_fixture.cpp113
-rw-r--r--src/mongo/db/repl/task_runner_test_fixture.h65
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp24
-rw-r--r--src/mongo/db/repl/topology_coordinator.h850
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.cpp3770
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.h744
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl_test.cpp9615
-rw-r--r--src/mongo/db/repl/update_position_args.cpp171
-rw-r--r--src/mongo/db/repl/update_position_args.h77
-rw-r--r--src/mongo/db/repl/vote_requester.cpp219
-rw-r--r--src/mongo/db/repl/vote_requester.h143
-rw-r--r--src/mongo/db/repl/vote_requester_test.cpp623
189 files changed, 44348 insertions, 44647 deletions
diff --git a/src/mongo/db/repl/applier.cpp b/src/mongo/db/repl/applier.cpp
index f1d6942e87a..9000e0ebf65 100644
--- a/src/mongo/db/repl/applier.cpp
+++ b/src/mongo/db/repl/applier.cpp
@@ -40,199 +40,187 @@
namespace mongo {
namespace repl {
- Applier::Applier(ReplicationExecutor* executor,
- const Operations& operations,
- const ApplyOperationFn& applyOperation,
- const CallbackFn& onCompletion)
- : _executor(executor),
- _operations(operations),
- _applyOperation(applyOperation),
- _onCompletion(onCompletion),
- _active(false) {
-
- uassert(ErrorCodes::BadValue, "null replication executor", executor);
- uassert(ErrorCodes::BadValue, "empty list of operations", !operations.empty());
- uassert(ErrorCodes::FailedToParse,
- str::stream() << "last operation missing 'ts' field: " << operations.back(),
- operations.back().hasField("ts"));
- uassert(ErrorCodes::TypeMismatch,
- str::stream() << "'ts' in last operation not a timestamp: " << operations.back(),
- BSONType::bsonTimestamp == operations.back().getField("ts").type());
- uassert(ErrorCodes::BadValue, "apply operation function cannot be null", applyOperation);
- uassert(ErrorCodes::BadValue, "callback function cannot be null", onCompletion);
+Applier::Applier(ReplicationExecutor* executor,
+ const Operations& operations,
+ const ApplyOperationFn& applyOperation,
+ const CallbackFn& onCompletion)
+ : _executor(executor),
+ _operations(operations),
+ _applyOperation(applyOperation),
+ _onCompletion(onCompletion),
+ _active(false) {
+ uassert(ErrorCodes::BadValue, "null replication executor", executor);
+ uassert(ErrorCodes::BadValue, "empty list of operations", !operations.empty());
+ uassert(ErrorCodes::FailedToParse,
+ str::stream() << "last operation missing 'ts' field: " << operations.back(),
+ operations.back().hasField("ts"));
+ uassert(ErrorCodes::TypeMismatch,
+ str::stream() << "'ts' in last operation not a timestamp: " << operations.back(),
+ BSONType::bsonTimestamp == operations.back().getField("ts").type());
+ uassert(ErrorCodes::BadValue, "apply operation function cannot be null", applyOperation);
+ uassert(ErrorCodes::BadValue, "callback function cannot be null", onCompletion);
+}
+
+Applier::~Applier() {
+ DESTRUCTOR_GUARD(cancel(); wait(););
+}
+
+std::string Applier::getDiagnosticString() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ str::stream output;
+ output << "Applier";
+ output << " executor: " << _executor->getDiagnosticString();
+ output << " active: " << _active;
+ return output;
+}
+
+bool Applier::isActive() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _active;
+}
+
+Status Applier::start() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+
+ if (_active) {
+ return Status(ErrorCodes::IllegalOperation, "applier already started");
}
- Applier::~Applier() {
- DESTRUCTOR_GUARD(
- cancel();
- wait();
- );
+ auto scheduleResult =
+ _executor->scheduleDBWork(stdx::bind(&Applier::_callback, this, stdx::placeholders::_1));
+ if (!scheduleResult.isOK()) {
+ return scheduleResult.getStatus();
}
- std::string Applier::getDiagnosticString() const {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- str::stream output;
- output << "Applier";
- output << " executor: " << _executor->getDiagnosticString();
- output << " active: " << _active;
- return output;
- }
+ _active = true;
+ _dbWorkCallbackHandle = scheduleResult.getValue();
- bool Applier::isActive() const {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- return _active;
- }
+ return Status::OK();
+}
- Status Applier::start() {
+void Applier::cancel() {
+ ReplicationExecutor::CallbackHandle dbWorkCallbackHandle;
+ {
stdx::lock_guard<stdx::mutex> lk(_mutex);
- if (_active) {
- return Status(ErrorCodes::IllegalOperation, "applier already started");
+ if (!_active) {
+ return;
}
- auto scheduleResult = _executor->scheduleDBWork(
- stdx::bind(&Applier::_callback, this, stdx::placeholders::_1));
- if (!scheduleResult.isOK()) {
- return scheduleResult.getStatus();
- }
+ dbWorkCallbackHandle = _dbWorkCallbackHandle;
+ }
- _active = true;
- _dbWorkCallbackHandle = scheduleResult.getValue();
+ if (dbWorkCallbackHandle.isValid()) {
+ _executor->cancel(dbWorkCallbackHandle);
+ }
+}
+
+void Applier::wait() {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
- return Status::OK();
+ while (_active) {
+ _condition.wait(lk);
}
+}
- void Applier::cancel() {
- ReplicationExecutor::CallbackHandle dbWorkCallbackHandle;
- {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
+void Applier::_callback(const ReplicationExecutor::CallbackArgs& cbd) {
+ if (!cbd.status.isOK()) {
+ _finishCallback(cbd.status, _operations);
+ return;
+ }
- if (!_active) {
- return;
- }
+ invariant(cbd.txn);
- dbWorkCallbackHandle = _dbWorkCallbackHandle;
- }
+ // Refer to multiSyncApply() and multiInitialSyncApply() in sync_tail.cpp.
+ cbd.txn->setReplicatedWrites(false);
- if (dbWorkCallbackHandle.isValid()) {
- _executor->cancel(dbWorkCallbackHandle);
- }
- }
+ // allow us to get through the magic barrier
+ cbd.txn->lockState()->setIsBatchWriter(true);
- void Applier::wait() {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
+ Status applyStatus(ErrorCodes::InternalError, "not mutated");
- while (_active) {
- _condition.wait(lk);
+ invariant(!_operations.empty());
+ for (auto i = _operations.cbegin(); i != _operations.cend(); ++i) {
+ try {
+ applyStatus = _applyOperation(cbd.txn, *i);
+ } catch (...) {
+ applyStatus = exceptionToStatus();
}
- }
-
- void Applier::_callback(const ReplicationExecutor::CallbackArgs& cbd) {
- if (!cbd.status.isOK()) {
- _finishCallback(cbd.status, _operations);
+ if (!applyStatus.isOK()) {
+ // 'i' points to last operation that was not applied.
+ _finishCallback(applyStatus, Operations(i, _operations.cend()));
return;
}
-
- invariant(cbd.txn);
-
- // Refer to multiSyncApply() and multiInitialSyncApply() in sync_tail.cpp.
- cbd.txn->setReplicatedWrites(false);
-
- // allow us to get through the magic barrier
- cbd.txn->lockState()->setIsBatchWriter(true);
-
- Status applyStatus(ErrorCodes::InternalError, "not mutated");
-
- invariant(!_operations.empty());
- for (auto i = _operations.cbegin(); i != _operations.cend(); ++i) {
- try {
- applyStatus = _applyOperation(cbd.txn, *i);
- }
- catch (...) {
- applyStatus = exceptionToStatus();
- }
- if (!applyStatus.isOK()) {
- // 'i' points to last operation that was not applied.
- _finishCallback(applyStatus, Operations(i, _operations.cend()));
- return;
- }
- }
- _finishCallback(_operations.back().getField("ts").timestamp(), Operations());
}
+ _finishCallback(_operations.back().getField("ts").timestamp(), Operations());
+}
- void Applier::_finishCallback(const StatusWith<Timestamp>& result,
- const Operations& operations) {
- _onCompletion(result, operations);
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- _active = false;
- _condition.notify_all();
- }
+void Applier::_finishCallback(const StatusWith<Timestamp>& result, const Operations& operations) {
+ _onCompletion(result, operations);
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _active = false;
+ _condition.notify_all();
+}
namespace {
- void pauseBeforeCompletion(
- const StatusWith<Timestamp>& result,
- const Applier::Operations& operationsOnCompletion,
- const PauseDataReplicatorFn& pauseDataReplicator,
- const Applier::CallbackFn& onCompletion) {
-
- if (result.isOK()) {
- pauseDataReplicator();
+void pauseBeforeCompletion(const StatusWith<Timestamp>& result,
+ const Applier::Operations& operationsOnCompletion,
+ const PauseDataReplicatorFn& pauseDataReplicator,
+ const Applier::CallbackFn& onCompletion) {
+ if (result.isOK()) {
+ pauseDataReplicator();
+ }
+ onCompletion(result, operationsOnCompletion);
+};
+
+} // namespace
+
+StatusWith<std::pair<std::unique_ptr<Applier>, Applier::Operations>> applyUntilAndPause(
+ ReplicationExecutor* executor,
+ const Applier::Operations& operations,
+ const Applier::ApplyOperationFn& applyOperation,
+ const Timestamp& lastTimestampToApply,
+ const PauseDataReplicatorFn& pauseDataReplicator,
+ const Applier::CallbackFn& onCompletion) {
+ try {
+ auto comp = [](const BSONObj& left, const BSONObj& right) {
+ uassert(ErrorCodes::FailedToParse,
+ str::stream() << "Operation missing 'ts' field': " << left,
+ left.hasField("ts"));
+ uassert(ErrorCodes::FailedToParse,
+ str::stream() << "Operation missing 'ts' field': " << right,
+ right.hasField("ts"));
+ return left["ts"].timestamp() < right["ts"].timestamp();
+ };
+ auto wrapped = BSON("ts" << lastTimestampToApply);
+ auto i = std::lower_bound(operations.cbegin(), operations.cend(), wrapped, comp);
+ bool found = i != operations.cend() && !comp(wrapped, *i);
+ auto j = found ? i + 1 : i;
+ Applier::Operations operationsInRange(operations.cbegin(), j);
+ Applier::Operations operationsNotInRange(j, operations.cend());
+ if (!found) {
+ return std::make_pair(std::unique_ptr<Applier>(new Applier(
+ executor, operationsInRange, applyOperation, onCompletion)),
+ operationsNotInRange);
}
- onCompletion(result, operationsOnCompletion);
- };
-
-} // namespace
-
- StatusWith<std::pair<std::unique_ptr<Applier>, Applier::Operations> > applyUntilAndPause(
- ReplicationExecutor* executor,
- const Applier::Operations& operations,
- const Applier::ApplyOperationFn& applyOperation,
- const Timestamp& lastTimestampToApply,
- const PauseDataReplicatorFn& pauseDataReplicator,
- const Applier::CallbackFn& onCompletion) {
- try {
- auto comp = [](const BSONObj& left, const BSONObj& right) {
- uassert(ErrorCodes::FailedToParse,
- str::stream() << "Operation missing 'ts' field': " << left,
- left.hasField("ts"));
- uassert(ErrorCodes::FailedToParse,
- str::stream() << "Operation missing 'ts' field': " << right,
- right.hasField("ts"));
- return left["ts"].timestamp() < right["ts"].timestamp();
- };
- auto wrapped = BSON("ts" << lastTimestampToApply);
- auto i = std::lower_bound(operations.cbegin(), operations.cend(), wrapped, comp);
- bool found = i != operations.cend() && !comp(wrapped, *i);
- auto j = found ? i+1 : i;
- Applier::Operations operationsInRange(operations.cbegin(), j);
- Applier::Operations operationsNotInRange(j, operations.cend());
- if (!found) {
- return std::make_pair(
- std::unique_ptr<Applier>(
- new Applier(executor, operationsInRange, applyOperation, onCompletion)),
- operationsNotInRange);
- }
-
- return std::make_pair(
- std::unique_ptr<Applier>(new Applier(
- executor,
- operationsInRange,
- applyOperation,
- stdx::bind(pauseBeforeCompletion,
- stdx::placeholders::_1,
- stdx::placeholders::_2,
- pauseDataReplicator,
- onCompletion))),
- operationsNotInRange);
- }
- catch (...) {
- return exceptionToStatus();
- }
- MONGO_UNREACHABLE;
- return Status(ErrorCodes::InternalError, "unreachable");
+ return std::make_pair(
+ std::unique_ptr<Applier>(new Applier(executor,
+ operationsInRange,
+ applyOperation,
+ stdx::bind(pauseBeforeCompletion,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2,
+ pauseDataReplicator,
+ onCompletion))),
+ operationsNotInRange);
+ } catch (...) {
+ return exceptionToStatus();
}
+ MONGO_UNREACHABLE;
+ return Status(ErrorCodes::InternalError, "unreachable");
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/applier.h b/src/mongo/db/repl/applier.h
index 7be023683da..a8e48c0edb9 100644
--- a/src/mongo/db/repl/applier.h
+++ b/src/mongo/db/repl/applier.h
@@ -45,129 +45,128 @@
namespace mongo {
namespace repl {
- class Applier {
- MONGO_DISALLOW_COPYING(Applier);
- public:
-
- /**
- * Operations sorted by timestamp in ascending order.
- */
- using Operations = std::vector<BSONObj>;
-
- /**
- * Callback function to report final status of applying operations along with
- * list of operations (if applicable) that were not successfully applied.
- * On success, returns the timestamp of the last operation applied together with an empty
- * list of operations.
- */
- using CallbackFn = stdx::function<void (const StatusWith<Timestamp>&, const Operations&)>;
-
- /**
- * Type of function to to apply a single operation. In production, this function
- * would have the same outcome as calling SyncTail::syncApply() ('convertUpdatesToUpserts'
- * value will be embedded in the function implementation).
- */
- using ApplyOperationFn = stdx::function<Status (OperationContext*, const BSONObj&)>;
-
- /**
- * Creates Applier in inactive state.
- *
- * Accepts list of oplog entries to apply in 'operations'.
- *
- * The callback function will be invoked (after schedule()) when the applied has
- * successfully applied all the operations or encountered a failure. Failures may occur if
- * we failed to apply an operation; or if the underlying scheduled work item
- * on the replication executor was canceled.
- *
- * It is an error for 'operations' to be empty but individual oplog entries
- * contained in 'operations' are not validated.
- */
- Applier(ReplicationExecutor* executor,
- const Operations& operations,
- const ApplyOperationFn& applyOperation,
- const CallbackFn& onCompletion);
-
- /**
- * Blocks while applier is active.
- */
- virtual ~Applier();
-
- /**
- * Returns diagnostic information.
- */
- std::string getDiagnosticString() const;
-
- /**
- * Returns true if the applier has been started (but has not completed).
- */
- bool isActive() const;
-
- /**
- * Starts applier by scheduling initial db work to be run by the executor.
- */
- Status start();
-
- /**
- * Cancels current db work request.
- * Returns immediately if applier is not active.
- *
- * Callback function may be invoked with an ErrorCodes::CallbackCanceled status.
- */
- void cancel();
-
- /**
- * Waits for active database worker to complete.
- * Returns immediately if applier is not active.
- */
- void wait();
-
- private:
-
- /**
- * DB worker callback function - applies all operations.
- */
- void _callback(const ReplicationExecutor::CallbackArgs& cbd);
- void _finishCallback(const StatusWith<Timestamp>& result, const Operations& operations);
-
- // Not owned by us.
- ReplicationExecutor* _executor;
-
- Operations _operations;
- ApplyOperationFn _applyOperation;
- CallbackFn _onCompletion;
-
- // Protects member data of this Applier.
- mutable stdx::mutex _mutex;
-
- stdx::condition_variable _condition;
-
- // _active is true when Applier is scheduled to be run by the executor.
- bool _active;
-
- ReplicationExecutor::CallbackHandle _dbWorkCallbackHandle;
- };
+class Applier {
+ MONGO_DISALLOW_COPYING(Applier);
+public:
+ /**
+ * Operations sorted by timestamp in ascending order.
+ */
+ using Operations = std::vector<BSONObj>;
+
+ /**
+ * Callback function to report final status of applying operations along with
+ * list of operations (if applicable) that were not successfully applied.
+ * On success, returns the timestamp of the last operation applied together with an empty
+ * list of operations.
+ */
+ using CallbackFn = stdx::function<void(const StatusWith<Timestamp>&, const Operations&)>;
+
+ /**
+ * Type of function to to apply a single operation. In production, this function
+ * would have the same outcome as calling SyncTail::syncApply() ('convertUpdatesToUpserts'
+ * value will be embedded in the function implementation).
+ */
+ using ApplyOperationFn = stdx::function<Status(OperationContext*, const BSONObj&)>;
+
+ /**
+ * Creates Applier in inactive state.
+ *
+ * Accepts list of oplog entries to apply in 'operations'.
+ *
+ * The callback function will be invoked (after schedule()) when the applied has
+ * successfully applied all the operations or encountered a failure. Failures may occur if
+ * we failed to apply an operation; or if the underlying scheduled work item
+ * on the replication executor was canceled.
+ *
+ * It is an error for 'operations' to be empty but individual oplog entries
+ * contained in 'operations' are not validated.
+ */
+ Applier(ReplicationExecutor* executor,
+ const Operations& operations,
+ const ApplyOperationFn& applyOperation,
+ const CallbackFn& onCompletion);
+
+ /**
+ * Blocks while applier is active.
+ */
+ virtual ~Applier();
+
+ /**
+ * Returns diagnostic information.
+ */
+ std::string getDiagnosticString() const;
+
+ /**
+ * Returns true if the applier has been started (but has not completed).
+ */
+ bool isActive() const;
/**
- * Applies operations (sorted by timestamp) up to and including 'lastTimestampToApply'.
- * If 'lastTimestampToApply' is found in 'operations':
- * - The applier will be given a subset of 'operations' (includes 'lastTimestampToApply').
- * - On success, the applier will invoke the 'pause' function just before reporting
- * completion status.
- * Otherwise, all entries in 'operations' before 'lastTimestampToApply' will be forwarded to
- * the applier and the 'pause' function will be ignored.
- * If the applier is successfully created, returns the applier and a list of operations that
- * are skipped (operations with 'ts' field value after 'lastTimestampToApply).
+ * Starts applier by scheduling initial db work to be run by the executor.
*/
- using PauseDataReplicatorFn = stdx::function<void ()>;
-
- StatusWith<std::pair<std::unique_ptr<Applier>, Applier::Operations> > applyUntilAndPause(
- ReplicationExecutor* executor,
- const Applier::Operations& operations,
- const Applier::ApplyOperationFn& applyOperation,
- const Timestamp& lastTimestampToApply,
- const PauseDataReplicatorFn& pauseDataReplicator,
- const Applier::CallbackFn& onCompletion);
-
-} // namespace repl
-} // namespace mongo
+ Status start();
+
+ /**
+ * Cancels current db work request.
+ * Returns immediately if applier is not active.
+ *
+ * Callback function may be invoked with an ErrorCodes::CallbackCanceled status.
+ */
+ void cancel();
+
+ /**
+ * Waits for active database worker to complete.
+ * Returns immediately if applier is not active.
+ */
+ void wait();
+
+private:
+ /**
+ * DB worker callback function - applies all operations.
+ */
+ void _callback(const ReplicationExecutor::CallbackArgs& cbd);
+ void _finishCallback(const StatusWith<Timestamp>& result, const Operations& operations);
+
+ // Not owned by us.
+ ReplicationExecutor* _executor;
+
+ Operations _operations;
+ ApplyOperationFn _applyOperation;
+ CallbackFn _onCompletion;
+
+ // Protects member data of this Applier.
+ mutable stdx::mutex _mutex;
+
+ stdx::condition_variable _condition;
+
+ // _active is true when Applier is scheduled to be run by the executor.
+ bool _active;
+
+ ReplicationExecutor::CallbackHandle _dbWorkCallbackHandle;
+};
+
+
+/**
+ * Applies operations (sorted by timestamp) up to and including 'lastTimestampToApply'.
+ * If 'lastTimestampToApply' is found in 'operations':
+ * - The applier will be given a subset of 'operations' (includes 'lastTimestampToApply').
+ * - On success, the applier will invoke the 'pause' function just before reporting
+ * completion status.
+ * Otherwise, all entries in 'operations' before 'lastTimestampToApply' will be forwarded to
+ * the applier and the 'pause' function will be ignored.
+ * If the applier is successfully created, returns the applier and a list of operations that
+ * are skipped (operations with 'ts' field value after 'lastTimestampToApply).
+ */
+using PauseDataReplicatorFn = stdx::function<void()>;
+
+StatusWith<std::pair<std::unique_ptr<Applier>, Applier::Operations>> applyUntilAndPause(
+ ReplicationExecutor* executor,
+ const Applier::Operations& operations,
+ const Applier::ApplyOperationFn& applyOperation,
+ const Timestamp& lastTimestampToApply,
+ const PauseDataReplicatorFn& pauseDataReplicator,
+ const Applier::CallbackFn& onCompletion);
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/applier_test.cpp b/src/mongo/db/repl/applier_test.cpp
index f876a884605..5138d2561c6 100644
--- a/src/mongo/db/repl/applier_test.cpp
+++ b/src/mongo/db/repl/applier_test.cpp
@@ -42,624 +42,609 @@
namespace {
- using namespace mongo;
- using namespace mongo::repl;
-
- using Operations = Applier::Operations;
-
- class ApplierTest : public ReplicationExecutorTest {
- public:
-
- Applier* getApplier() const;
-
- protected:
-
- void setUp() override;
- void tearDown() override;
-
- /**
- * Test function to check behavior when we fail to apply one of the operations.
- */
- void _testApplyOperationFailed(size_t opIndex, stdx::function<Status ()> fail);
-
- std::unique_ptr<Applier> _applier;
- std::unique_ptr<unittest::Barrier> _barrier;
- };
-
- void ApplierTest::setUp() {
- ReplicationExecutorTest::setUp();
- launchExecutorThread();
- auto apply = [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); };
- _applier.reset(new Applier(&getExecutor(),
- {BSON("ts" << Timestamp(Seconds(123), 0))},
- apply,
- [this](const StatusWith<Timestamp>&, const Operations&) {
- if (_barrier.get()) {
- _barrier->countDownAndWait();
- }
- }));
- }
-
- void ApplierTest::tearDown() {
- ReplicationExecutorTest::tearDown();
- _applier.reset();
- _barrier.reset();
- }
-
- Applier* ApplierTest::getApplier() const {
- return _applier.get();
- }
-
- TEST_F(ApplierTest, InvalidConstruction) {
- const Operations operations{BSON("ts" << Timestamp(Seconds(123), 0))};
- auto apply = [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); };
- auto callback = [](const StatusWith<Timestamp>& status, const Operations& operations) { };
-
- // Null executor.
- ASSERT_THROWS_CODE(
- Applier(nullptr, operations, apply, callback),
- UserException,
- ErrorCodes::BadValue);
-
- // Empty list of operations.
- ASSERT_THROWS_CODE(
- Applier(&getExecutor(), {}, apply, callback),
- UserException,
- ErrorCodes::BadValue);
-
- // Last operation missing timestamp field.
- ASSERT_THROWS_CODE(
- Applier(&getExecutor(), {BSONObj()}, apply, callback),
- UserException,
- ErrorCodes::FailedToParse);
-
- // "ts" field in last operation not a timestamp.
- ASSERT_THROWS_CODE(
- Applier(&getExecutor(), {BSON("ts" << 99)}, apply, callback),
- UserException,
- ErrorCodes::TypeMismatch);
-
- // Invalid apply operation function.
- ASSERT_THROWS_CODE(
- Applier(&getExecutor(), operations, Applier::ApplyOperationFn(), callback),
- UserException,
- ErrorCodes::BadValue);
-
- // Invalid callback function.
- ASSERT_THROWS_CODE(
- Applier(&getExecutor(), operations, apply, Applier::CallbackFn()),
- UserException,
- ErrorCodes::BadValue);
- }
-
- TEST_F(ApplierTest, GetDiagnosticString) {
- ASSERT_FALSE(getApplier()->getDiagnosticString().empty());
- }
-
- TEST_F(ApplierTest, IsActiveAfterStart) {
- // Use a barrier to ensure that the callback blocks while
- // we check isActive().
- _barrier.reset(new unittest::Barrier(2U));
- ASSERT_FALSE(getApplier()->isActive());
- ASSERT_OK(getApplier()->start());
- ASSERT_TRUE(getApplier()->isActive());
- _barrier->countDownAndWait();
- }
-
- TEST_F(ApplierTest, StartWhenActive) {
- // Use a barrier to ensure that the callback blocks while
- // we check isActive().
- _barrier.reset(new unittest::Barrier(2U));
- ASSERT_OK(getApplier()->start());
- ASSERT_TRUE(getApplier()->isActive());
- ASSERT_NOT_OK(getApplier()->start());
- ASSERT_TRUE(getApplier()->isActive());
- _barrier->countDownAndWait();
- }
-
- TEST_F(ApplierTest, CancelWithoutStart) {
- ASSERT_FALSE(getApplier()->isActive());
- getApplier()->cancel();
- ASSERT_FALSE(getApplier()->isActive());
- }
-
- TEST_F(ApplierTest, WaitWithoutStart) {
- ASSERT_FALSE(getApplier()->isActive());
- getApplier()->wait();
- ASSERT_FALSE(getApplier()->isActive());
- }
-
- TEST_F(ApplierTest, ShutdownBeforeStart) {
- getExecutor().shutdown();
- ASSERT_NOT_OK(getApplier()->start());
- ASSERT_FALSE(getApplier()->isActive());
- }
-
- TEST_F(ApplierTest, CancelBeforeStartingDBWork) {
- // Schedule a blocking DB work item before the applier to allow us to cancel the applier
- // work item before the executor runs it.
- unittest::Barrier barrier(2U);
- using CallbackData = ReplicationExecutor::CallbackArgs;
- getExecutor().scheduleDBWork([&](const CallbackData& cbd) {
- barrier.countDownAndWait(); // generation 0
- });
- const BSONObj operation = BSON("ts" << Timestamp(Seconds(123), 0));
- stdx::mutex mutex;
- StatusWith<Timestamp> result = getDetectableErrorStatus();
- Applier::Operations operations;
- _applier.reset(new Applier(
- &getExecutor(),
- {operation},
- [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
- [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- result = theResult;
- operations = theOperations;
- }));
-
- getApplier()->start();
- getApplier()->cancel();
- ASSERT_TRUE(getApplier()->isActive());
-
- barrier.countDownAndWait(); // generation 0
-
- getApplier()->wait();
- ASSERT_FALSE(getApplier()->isActive());
-
- stdx::lock_guard<stdx::mutex> lock(mutex);
+using namespace mongo;
+using namespace mongo::repl;
+
+using Operations = Applier::Operations;
+
+class ApplierTest : public ReplicationExecutorTest {
+public:
+ Applier* getApplier() const;
+
+protected:
+ void setUp() override;
+ void tearDown() override;
+
+ /**
+ * Test function to check behavior when we fail to apply one of the operations.
+ */
+ void _testApplyOperationFailed(size_t opIndex, stdx::function<Status()> fail);
+
+ std::unique_ptr<Applier> _applier;
+ std::unique_ptr<unittest::Barrier> _barrier;
+};
+
+void ApplierTest::setUp() {
+ ReplicationExecutorTest::setUp();
+ launchExecutorThread();
+ auto apply = [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); };
+ _applier.reset(new Applier(&getExecutor(),
+ {BSON("ts" << Timestamp(Seconds(123), 0))},
+ apply,
+ [this](const StatusWith<Timestamp>&, const Operations&) {
+ if (_barrier.get()) {
+ _barrier->countDownAndWait();
+ }
+ }));
+}
+
+void ApplierTest::tearDown() {
+ ReplicationExecutorTest::tearDown();
+ _applier.reset();
+ _barrier.reset();
+}
+
+Applier* ApplierTest::getApplier() const {
+ return _applier.get();
+}
+
+TEST_F(ApplierTest, InvalidConstruction) {
+ const Operations operations{BSON("ts" << Timestamp(Seconds(123), 0))};
+ auto apply = [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); };
+ auto callback = [](const StatusWith<Timestamp>& status, const Operations& operations) {};
+
+ // Null executor.
+ ASSERT_THROWS_CODE(
+ Applier(nullptr, operations, apply, callback), UserException, ErrorCodes::BadValue);
+
+ // Empty list of operations.
+ ASSERT_THROWS_CODE(
+ Applier(&getExecutor(), {}, apply, callback), UserException, ErrorCodes::BadValue);
+
+ // Last operation missing timestamp field.
+ ASSERT_THROWS_CODE(Applier(&getExecutor(), {BSONObj()}, apply, callback),
+ UserException,
+ ErrorCodes::FailedToParse);
+
+ // "ts" field in last operation not a timestamp.
+ ASSERT_THROWS_CODE(Applier(&getExecutor(), {BSON("ts" << 99)}, apply, callback),
+ UserException,
+ ErrorCodes::TypeMismatch);
+
+ // Invalid apply operation function.
+ ASSERT_THROWS_CODE(Applier(&getExecutor(), operations, Applier::ApplyOperationFn(), callback),
+ UserException,
+ ErrorCodes::BadValue);
+
+ // Invalid callback function.
+ ASSERT_THROWS_CODE(Applier(&getExecutor(), operations, apply, Applier::CallbackFn()),
+ UserException,
+ ErrorCodes::BadValue);
+}
+
+TEST_F(ApplierTest, GetDiagnosticString) {
+ ASSERT_FALSE(getApplier()->getDiagnosticString().empty());
+}
+
+TEST_F(ApplierTest, IsActiveAfterStart) {
+ // Use a barrier to ensure that the callback blocks while
+ // we check isActive().
+ _barrier.reset(new unittest::Barrier(2U));
+ ASSERT_FALSE(getApplier()->isActive());
+ ASSERT_OK(getApplier()->start());
+ ASSERT_TRUE(getApplier()->isActive());
+ _barrier->countDownAndWait();
+}
+
+TEST_F(ApplierTest, StartWhenActive) {
+ // Use a barrier to ensure that the callback blocks while
+ // we check isActive().
+ _barrier.reset(new unittest::Barrier(2U));
+ ASSERT_OK(getApplier()->start());
+ ASSERT_TRUE(getApplier()->isActive());
+ ASSERT_NOT_OK(getApplier()->start());
+ ASSERT_TRUE(getApplier()->isActive());
+ _barrier->countDownAndWait();
+}
+
+TEST_F(ApplierTest, CancelWithoutStart) {
+ ASSERT_FALSE(getApplier()->isActive());
+ getApplier()->cancel();
+ ASSERT_FALSE(getApplier()->isActive());
+}
+
+TEST_F(ApplierTest, WaitWithoutStart) {
+ ASSERT_FALSE(getApplier()->isActive());
+ getApplier()->wait();
+ ASSERT_FALSE(getApplier()->isActive());
+}
+
+TEST_F(ApplierTest, ShutdownBeforeStart) {
+ getExecutor().shutdown();
+ ASSERT_NOT_OK(getApplier()->start());
+ ASSERT_FALSE(getApplier()->isActive());
+}
+
+TEST_F(ApplierTest, CancelBeforeStartingDBWork) {
+ // Schedule a blocking DB work item before the applier to allow us to cancel the applier
+ // work item before the executor runs it.
+ unittest::Barrier barrier(2U);
+ using CallbackData = ReplicationExecutor::CallbackArgs;
+ getExecutor().scheduleDBWork([&](const CallbackData& cbd) {
+ barrier.countDownAndWait(); // generation 0
+ });
+ const BSONObj operation = BSON("ts" << Timestamp(Seconds(123), 0));
+ stdx::mutex mutex;
+ StatusWith<Timestamp> result = getDetectableErrorStatus();
+ Applier::Operations operations;
+ _applier.reset(
+ new Applier(&getExecutor(),
+ {operation},
+ [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
+ [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ result = theResult;
+ operations = theOperations;
+ }));
+
+ getApplier()->start();
+ getApplier()->cancel();
+ ASSERT_TRUE(getApplier()->isActive());
+
+ barrier.countDownAndWait(); // generation 0
+
+ getApplier()->wait();
+ ASSERT_FALSE(getApplier()->isActive());
+
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, result.getStatus().code());
+ ASSERT_EQUALS(1U, operations.size());
+ ASSERT_EQUALS(operation, operations.front());
+}
+
+TEST_F(ApplierTest, DestroyBeforeStartingDBWork) {
+ // Schedule a blocking DB work item before the applier to allow us to destroy the applier
+ // before the executor runs the work item.
+ unittest::Barrier barrier(2U);
+ using CallbackData = ReplicationExecutor::CallbackArgs;
+ getExecutor().scheduleDBWork([&](const CallbackData& cbd) {
+ barrier.countDownAndWait(); // generation 0
+ // Give the main thread a head start in invoking the applier destructor.
+ sleepmillis(1);
+ });
+ const BSONObj operation = BSON("ts" << Timestamp(Seconds(123), 0));
+ stdx::mutex mutex;
+ StatusWith<Timestamp> result = getDetectableErrorStatus();
+ Applier::Operations operations;
+ _applier.reset(
+ new Applier(&getExecutor(),
+ {operation},
+ [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
+ [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ result = theResult;
+ operations = theOperations;
+ }));
+
+ getApplier()->start();
+ ASSERT_TRUE(getApplier()->isActive());
+
+ barrier.countDownAndWait(); // generation 0
+
+ // It is possible the executor may have invoked the callback before we
+ // destroy the applier. Therefore both OK and CallbackCanceled are acceptable
+ // statuses.
+ _applier.reset();
+
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ if (result.isOK()) {
+ ASSERT_TRUE(operations.empty());
+ } else {
ASSERT_EQUALS(ErrorCodes::CallbackCanceled, result.getStatus().code());
ASSERT_EQUALS(1U, operations.size());
ASSERT_EQUALS(operation, operations.front());
}
-
- TEST_F(ApplierTest, DestroyBeforeStartingDBWork) {
- // Schedule a blocking DB work item before the applier to allow us to destroy the applier
- // before the executor runs the work item.
- unittest::Barrier barrier(2U);
- using CallbackData = ReplicationExecutor::CallbackArgs;
- getExecutor().scheduleDBWork([&](const CallbackData& cbd) {
- barrier.countDownAndWait(); // generation 0
- // Give the main thread a head start in invoking the applier destructor.
- sleepmillis(1);
- });
- const BSONObj operation = BSON("ts" << Timestamp(Seconds(123), 0));
- stdx::mutex mutex;
- StatusWith<Timestamp> result = getDetectableErrorStatus();
- Applier::Operations operations;
- _applier.reset(new Applier(
- &getExecutor(),
- {operation},
- [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
- [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- result = theResult;
- operations = theOperations;
- }));
-
- getApplier()->start();
- ASSERT_TRUE(getApplier()->isActive());
-
- barrier.countDownAndWait(); // generation 0
-
- // It is possible the executor may have invoked the callback before we
- // destroy the applier. Therefore both OK and CallbackCanceled are acceptable
- // statuses.
- _applier.reset();
-
- stdx::lock_guard<stdx::mutex> lock(mutex);
- if (result.isOK()) {
- ASSERT_TRUE(operations.empty());
- }
- else {
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, result.getStatus().code());
- ASSERT_EQUALS(1U, operations.size());
- ASSERT_EQUALS(operation, operations.front());
- }
- }
-
- TEST_F(ApplierTest, WaitForCompletion) {
- const Timestamp timestamp(Seconds(123), 0);
- stdx::mutex mutex;
- StatusWith<Timestamp> result = getDetectableErrorStatus();
- Applier::Operations operations;
- _applier.reset(new Applier(
- &getExecutor(),
- {BSON("ts" << timestamp)},
- [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
- [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- result = theResult;
- operations = theOperations;
- }));
-
- getApplier()->start();
- getApplier()->wait();
- ASSERT_FALSE(getApplier()->isActive());
-
- stdx::lock_guard<stdx::mutex> lock(mutex);
- ASSERT_OK(result.getStatus());
- ASSERT_EQUALS(timestamp, result.getValue());
- ASSERT_TRUE(operations.empty());
- }
-
- TEST_F(ApplierTest, DestroyShouldBlockUntilInactive) {
- const Timestamp timestamp(Seconds(123), 0);
- unittest::Barrier barrier(2U);
- stdx::mutex mutex;
- StatusWith<Timestamp> result = getDetectableErrorStatus();
- Applier::Operations operations;
- _applier.reset(new Applier(
- &getExecutor(),
- {BSON("ts" << timestamp)},
- [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
- [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- result = theResult;
- operations = theOperations;
- barrier.countDownAndWait();
- }));
-
- getApplier()->start();
- barrier.countDownAndWait();
- _applier.reset();
-
+}
+
+TEST_F(ApplierTest, WaitForCompletion) {
+ const Timestamp timestamp(Seconds(123), 0);
+ stdx::mutex mutex;
+ StatusWith<Timestamp> result = getDetectableErrorStatus();
+ Applier::Operations operations;
+ _applier.reset(
+ new Applier(&getExecutor(),
+ {BSON("ts" << timestamp)},
+ [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
+ [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ result = theResult;
+ operations = theOperations;
+ }));
+
+ getApplier()->start();
+ getApplier()->wait();
+ ASSERT_FALSE(getApplier()->isActive());
+
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ ASSERT_OK(result.getStatus());
+ ASSERT_EQUALS(timestamp, result.getValue());
+ ASSERT_TRUE(operations.empty());
+}
+
+TEST_F(ApplierTest, DestroyShouldBlockUntilInactive) {
+ const Timestamp timestamp(Seconds(123), 0);
+ unittest::Barrier barrier(2U);
+ stdx::mutex mutex;
+ StatusWith<Timestamp> result = getDetectableErrorStatus();
+ Applier::Operations operations;
+ _applier.reset(
+ new Applier(&getExecutor(),
+ {BSON("ts" << timestamp)},
+ [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
+ [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ result = theResult;
+ operations = theOperations;
+ barrier.countDownAndWait();
+ }));
+
+ getApplier()->start();
+ barrier.countDownAndWait();
+ _applier.reset();
+
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ ASSERT_OK(result.getStatus());
+ ASSERT_EQUALS(timestamp, result.getValue());
+ ASSERT_TRUE(operations.empty());
+}
+
+TEST_F(ApplierTest, ApplyOperationSuccessful) {
+ // Bogus operations codes.
+ Applier::Operations operationsToApply{
+ BSON("op"
+ << "a"
+ << "ts" << Timestamp(Seconds(123), 0)),
+ BSON("op"
+ << "b"
+ << "ts" << Timestamp(Seconds(456), 0)),
+ BSON("op"
+ << "c"
+ << "ts" << Timestamp(Seconds(789), 0)),
+ };
+ stdx::mutex mutex;
+ StatusWith<Timestamp> result = getDetectableErrorStatus();
+ bool areWritesReplicationOnOperationContext = true;
+ bool isLockBatchWriter = false;
+ Applier::Operations operationsApplied;
+ Applier::Operations operationsOnCompletion;
+ auto apply = [&](OperationContext* txn, const BSONObj& operation) {
stdx::lock_guard<stdx::mutex> lock(mutex);
- ASSERT_OK(result.getStatus());
- ASSERT_EQUALS(timestamp, result.getValue());
- ASSERT_TRUE(operations.empty());
- }
-
- TEST_F(ApplierTest, ApplyOperationSuccessful) {
- // Bogus operations codes.
- Applier::Operations operationsToApply{
- BSON("op" << "a" << "ts" << Timestamp(Seconds(123), 0)),
- BSON("op" << "b" << "ts" << Timestamp(Seconds(456), 0)),
- BSON("op" << "c" << "ts" << Timestamp(Seconds(789), 0)),
- };
- stdx::mutex mutex;
- StatusWith<Timestamp> result = getDetectableErrorStatus();
- bool areWritesReplicationOnOperationContext = true;
- bool isLockBatchWriter = false;
- Applier::Operations operationsApplied;
- Applier::Operations operationsOnCompletion;
- auto apply = [&](OperationContext* txn, const BSONObj& operation) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- areWritesReplicationOnOperationContext = txn->writesAreReplicated();
- isLockBatchWriter = txn->lockState()->isBatchWriter();
- operationsApplied.push_back(operation);
- return Status::OK();
- };
- auto callback = [&](const StatusWith<Timestamp>& theResult,
- const Operations& theOperations) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- result = theResult;
- operationsOnCompletion = theOperations;
- };
-
- _applier.reset(new Applier(&getExecutor(), operationsToApply, apply, callback));
- _applier->start();
- _applier->wait();
-
+ areWritesReplicationOnOperationContext = txn->writesAreReplicated();
+ isLockBatchWriter = txn->lockState()->isBatchWriter();
+ operationsApplied.push_back(operation);
+ return Status::OK();
+ };
+ auto callback = [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
stdx::lock_guard<stdx::mutex> lock(mutex);
- ASSERT_FALSE(areWritesReplicationOnOperationContext);
- ASSERT_TRUE(isLockBatchWriter);
- ASSERT_EQUALS(operationsToApply.size(), operationsApplied.size());
- ASSERT_EQUALS(operationsToApply[0], operationsApplied[0]);
- ASSERT_EQUALS(operationsToApply[1], operationsApplied[1]);
- ASSERT_EQUALS(operationsToApply[2], operationsApplied[2]);
- ASSERT_OK(result.getStatus());
- ASSERT_EQUALS(operationsToApply[2]["ts"].timestamp(), result.getValue());
- ASSERT_TRUE(operationsOnCompletion.empty());
- }
-
- void ApplierTest::_testApplyOperationFailed(size_t opIndex, stdx::function<Status ()> fail) {
- // Bogus operations codes.
- Applier::Operations operationsToApply{
- BSON("op" << "a" << "ts" << Timestamp(Seconds(123), 0)),
- BSON("op" << "b" << "ts" << Timestamp(Seconds(456), 0)),
- BSON("op" << "c" << "ts" << Timestamp(Seconds(789), 0)),
- };
- stdx::mutex mutex;
- StatusWith<Timestamp> result = getDetectableErrorStatus();
- Applier::Operations operationsApplied;
- Applier::Operations operationsOnCompletion;
- auto apply = [&](OperationContext* txn, const BSONObj& operation) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- if (operationsApplied.size() == opIndex) {
- return fail();
- }
- operationsApplied.push_back(operation);
- return Status::OK();
- };
- auto callback = [&](const StatusWith<Timestamp>& theResult,
- const Operations& theOperations) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- result = theResult;
- operationsOnCompletion = theOperations;
- };
-
- _applier.reset(new Applier(&getExecutor(), operationsToApply, apply, callback));
- _applier->start();
- _applier->wait();
+ result = theResult;
+ operationsOnCompletion = theOperations;
+ };
+ _applier.reset(new Applier(&getExecutor(), operationsToApply, apply, callback));
+ _applier->start();
+ _applier->wait();
+
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ ASSERT_FALSE(areWritesReplicationOnOperationContext);
+ ASSERT_TRUE(isLockBatchWriter);
+ ASSERT_EQUALS(operationsToApply.size(), operationsApplied.size());
+ ASSERT_EQUALS(operationsToApply[0], operationsApplied[0]);
+ ASSERT_EQUALS(operationsToApply[1], operationsApplied[1]);
+ ASSERT_EQUALS(operationsToApply[2], operationsApplied[2]);
+ ASSERT_OK(result.getStatus());
+ ASSERT_EQUALS(operationsToApply[2]["ts"].timestamp(), result.getValue());
+ ASSERT_TRUE(operationsOnCompletion.empty());
+}
+
+void ApplierTest::_testApplyOperationFailed(size_t opIndex, stdx::function<Status()> fail) {
+ // Bogus operations codes.
+ Applier::Operations operationsToApply{
+ BSON("op"
+ << "a"
+ << "ts" << Timestamp(Seconds(123), 0)),
+ BSON("op"
+ << "b"
+ << "ts" << Timestamp(Seconds(456), 0)),
+ BSON("op"
+ << "c"
+ << "ts" << Timestamp(Seconds(789), 0)),
+ };
+ stdx::mutex mutex;
+ StatusWith<Timestamp> result = getDetectableErrorStatus();
+ Applier::Operations operationsApplied;
+ Applier::Operations operationsOnCompletion;
+ auto apply = [&](OperationContext* txn, const BSONObj& operation) {
stdx::lock_guard<stdx::mutex> lock(mutex);
- ASSERT_EQUALS(opIndex, operationsApplied.size());
- size_t i = 0;
- for (const auto& operation : operationsApplied) {
- ASSERT_EQUALS(operationsToApply[i], operation);
- i++;
+ if (operationsApplied.size() == opIndex) {
+ return fail();
}
- ASSERT_EQUALS(ErrorCodes::OperationFailed, result.getStatus().code());
- ASSERT_EQUALS(operationsToApply.size() - opIndex, operationsOnCompletion.size());
- ASSERT_EQUALS(opIndex, i);
- for (const auto& operation : operationsOnCompletion) {
- ASSERT_EQUALS(operationsToApply[i], operation);
- i++;
- }
- }
-
- TEST_F(ApplierTest, ApplyOperationFailedOnFirstOperation) {
- _testApplyOperationFailed(0U, []() {
- return Status(ErrorCodes::OperationFailed, "");
- });
- }
-
- TEST_F(ApplierTest, ApplyOperationThrowsExceptionOnFirstOperation) {
- _testApplyOperationFailed(0U, []() {
- uasserted(ErrorCodes::OperationFailed, "");
- MONGO_UNREACHABLE;
- return Status(ErrorCodes::InternalError, "unreachable");
- });
- }
-
- TEST_F(ApplierTest, ApplyOperationFailedOnSecondOperation) {
- _testApplyOperationFailed(1U, []() {
- return Status(ErrorCodes::OperationFailed, "");
- });
- }
-
- TEST_F(ApplierTest, ApplyOperationThrowsExceptionOnSecondOperation) {
- _testApplyOperationFailed(1U, []() {
- uasserted(ErrorCodes::OperationFailed, "");
- MONGO_UNREACHABLE;
- return Status(ErrorCodes::InternalError, "unreachable");
- });
- }
-
- TEST_F(ApplierTest, ApplyOperationFailedOnLastOperation) {
- _testApplyOperationFailed(2U, []() {
- return Status(ErrorCodes::OperationFailed, "");
- });
- }
-
- TEST_F(ApplierTest, ApplyOperationThrowsExceptionOnLastOperation) {
- _testApplyOperationFailed(2U, []() {
- uasserted(ErrorCodes::OperationFailed, "");
- MONGO_UNREACHABLE;
- return Status(ErrorCodes::InternalError, "unreachable");
- });
- }
+ operationsApplied.push_back(operation);
+ return Status::OK();
+ };
+ auto callback = [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ result = theResult;
+ operationsOnCompletion = theOperations;
+ };
- class ApplyUntilAndPauseTest : public ApplierTest {};
-
- TEST_F(ApplyUntilAndPauseTest, EmptyOperations) {
- auto result =
- applyUntilAndPause(
- &getExecutor(),
- {},
- [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
- Timestamp(Seconds(123), 0),
- [] {},
- [](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {});
- ASSERT_EQUALS(ErrorCodes::BadValue, result.getStatus().code());
- }
+ _applier.reset(new Applier(&getExecutor(), operationsToApply, apply, callback));
+ _applier->start();
+ _applier->wait();
- TEST_F(ApplyUntilAndPauseTest, NoOperationsInRange) {
- auto result =
- applyUntilAndPause(
- &getExecutor(),
- {
- BSON("ts" << Timestamp(Seconds(456), 0)),
- BSON("ts" << Timestamp(Seconds(789), 0)),
- },
- [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
- Timestamp(Seconds(123), 0),
- [] {},
- [](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {});
- ASSERT_EQUALS(ErrorCodes::BadValue, result.getStatus().code());
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ ASSERT_EQUALS(opIndex, operationsApplied.size());
+ size_t i = 0;
+ for (const auto& operation : operationsApplied) {
+ ASSERT_EQUALS(operationsToApply[i], operation);
+ i++;
}
-
- TEST_F(ApplyUntilAndPauseTest, OperationMissingTimestampField) {
- auto result =
- applyUntilAndPause(
- &getExecutor(),
- {BSONObj()},
- [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
- Timestamp(Seconds(123), 0),
- [] {},
- [](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {});
- ASSERT_EQUALS(ErrorCodes::FailedToParse, result.getStatus().code());
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, result.getStatus().code());
+ ASSERT_EQUALS(operationsToApply.size() - opIndex, operationsOnCompletion.size());
+ ASSERT_EQUALS(opIndex, i);
+ for (const auto& operation : operationsOnCompletion) {
+ ASSERT_EQUALS(operationsToApply[i], operation);
+ i++;
}
+}
+
+TEST_F(ApplierTest, ApplyOperationFailedOnFirstOperation) {
+ _testApplyOperationFailed(0U, []() { return Status(ErrorCodes::OperationFailed, ""); });
+}
+
+TEST_F(ApplierTest, ApplyOperationThrowsExceptionOnFirstOperation) {
+ _testApplyOperationFailed(0U,
+ []() {
+ uasserted(ErrorCodes::OperationFailed, "");
+ MONGO_UNREACHABLE;
+ return Status(ErrorCodes::InternalError, "unreachable");
+ });
+}
+
+TEST_F(ApplierTest, ApplyOperationFailedOnSecondOperation) {
+ _testApplyOperationFailed(1U, []() { return Status(ErrorCodes::OperationFailed, ""); });
+}
+
+TEST_F(ApplierTest, ApplyOperationThrowsExceptionOnSecondOperation) {
+ _testApplyOperationFailed(1U,
+ []() {
+ uasserted(ErrorCodes::OperationFailed, "");
+ MONGO_UNREACHABLE;
+ return Status(ErrorCodes::InternalError, "unreachable");
+ });
+}
+
+TEST_F(ApplierTest, ApplyOperationFailedOnLastOperation) {
+ _testApplyOperationFailed(2U, []() { return Status(ErrorCodes::OperationFailed, ""); });
+}
+
+TEST_F(ApplierTest, ApplyOperationThrowsExceptionOnLastOperation) {
+ _testApplyOperationFailed(2U,
+ []() {
+ uasserted(ErrorCodes::OperationFailed, "");
+ MONGO_UNREACHABLE;
+ return Status(ErrorCodes::InternalError, "unreachable");
+ });
+}
+
+class ApplyUntilAndPauseTest : public ApplierTest {};
+
+TEST_F(ApplyUntilAndPauseTest, EmptyOperations) {
+ auto result = applyUntilAndPause(
+ &getExecutor(),
+ {},
+ [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
+ Timestamp(Seconds(123), 0),
+ [] {},
+ [](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {});
+ ASSERT_EQUALS(ErrorCodes::BadValue, result.getStatus().code());
+}
+
+TEST_F(ApplyUntilAndPauseTest, NoOperationsInRange) {
+ auto result = applyUntilAndPause(
+ &getExecutor(),
+ {
+ BSON("ts" << Timestamp(Seconds(456), 0)), BSON("ts" << Timestamp(Seconds(789), 0)),
+ },
+ [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
+ Timestamp(Seconds(123), 0),
+ [] {},
+ [](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {});
+ ASSERT_EQUALS(ErrorCodes::BadValue, result.getStatus().code());
+}
+
+TEST_F(ApplyUntilAndPauseTest, OperationMissingTimestampField) {
+ auto result = applyUntilAndPause(
+ &getExecutor(),
+ {BSONObj()},
+ [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); },
+ Timestamp(Seconds(123), 0),
+ [] {},
+ [](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {});
+ ASSERT_EQUALS(ErrorCodes::FailedToParse, result.getStatus().code());
+}
+
+TEST_F(ApplyUntilAndPauseTest, ApplyUntilAndPauseSingleOperation) {
+ Timestamp ts(Seconds(123), 0);
+ const Operations operationsToApply{BSON("ts" << ts)};
+ stdx::mutex mutex;
+ StatusWith<Timestamp> completionResult = getDetectableErrorStatus();
+ bool pauseCalled = false;
+ Applier::Operations operationsOnCompletion;
+ auto apply = [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); };
+ auto pause = [&] {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ pauseCalled = true;
+ };
+ auto callback = [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ completionResult = theResult;
+ operationsOnCompletion = theOperations;
+ };
- TEST_F(ApplyUntilAndPauseTest, ApplyUntilAndPauseSingleOperation) {
- Timestamp ts(Seconds(123), 0);
- const Operations operationsToApply{BSON("ts" << ts)};
- stdx::mutex mutex;
- StatusWith<Timestamp> completionResult = getDetectableErrorStatus();
- bool pauseCalled = false;
- Applier::Operations operationsOnCompletion;
- auto apply = [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); };
- auto pause = [&] {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- pauseCalled = true;
- };
- auto callback = [&](const StatusWith<Timestamp>& theResult,
- const Operations& theOperations) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- completionResult = theResult;
- operationsOnCompletion = theOperations;
- };
-
- auto result =
- applyUntilAndPause(&getExecutor(), operationsToApply, apply, ts, pause, callback);
- ASSERT_OK(result.getStatus());
- _applier = std::move(result.getValue().first);
- ASSERT_TRUE(_applier);
-
- const Applier::Operations& operationsDiscarded = result.getValue().second;
- ASSERT_TRUE(operationsDiscarded.empty());
-
- _applier->start();
- _applier->wait();
-
+ auto result = applyUntilAndPause(&getExecutor(), operationsToApply, apply, ts, pause, callback);
+ ASSERT_OK(result.getStatus());
+ _applier = std::move(result.getValue().first);
+ ASSERT_TRUE(_applier);
+
+ const Applier::Operations& operationsDiscarded = result.getValue().second;
+ ASSERT_TRUE(operationsDiscarded.empty());
+
+ _applier->start();
+ _applier->wait();
+
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ ASSERT_TRUE(pauseCalled);
+ ASSERT_OK(completionResult.getStatus());
+ ASSERT_EQUALS(ts, completionResult.getValue());
+ ASSERT_TRUE(operationsOnCompletion.empty());
+}
+
+TEST_F(ApplyUntilAndPauseTest, ApplyUntilAndPauseSingleOperationTimestampNotInOperations) {
+ Timestamp ts(Seconds(123), 0);
+ const Operations operationsToApply{BSON("ts" << ts)};
+ stdx::mutex mutex;
+ StatusWith<Timestamp> completionResult = getDetectableErrorStatus();
+ bool pauseCalled = false;
+ Applier::Operations operationsOnCompletion;
+ auto apply = [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); };
+ auto pause = [&] {
stdx::lock_guard<stdx::mutex> lock(mutex);
- ASSERT_TRUE(pauseCalled);
- ASSERT_OK(completionResult.getStatus());
- ASSERT_EQUALS(ts, completionResult.getValue());
- ASSERT_TRUE(operationsOnCompletion.empty());
- }
+ pauseCalled = true;
+ };
+ auto callback = [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ completionResult = theResult;
+ operationsOnCompletion = theOperations;
+ };
- TEST_F(ApplyUntilAndPauseTest, ApplyUntilAndPauseSingleOperationTimestampNotInOperations) {
- Timestamp ts(Seconds(123), 0);
- const Operations operationsToApply{BSON("ts" << ts)};
- stdx::mutex mutex;
- StatusWith<Timestamp> completionResult = getDetectableErrorStatus();
- bool pauseCalled = false;
- Applier::Operations operationsOnCompletion;
- auto apply = [](OperationContext* txn, const BSONObj& operation) { return Status::OK(); };
- auto pause = [&] {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- pauseCalled = true;
- };
- auto callback = [&](const StatusWith<Timestamp>& theResult,
- const Operations& theOperations) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- completionResult = theResult;
- operationsOnCompletion = theOperations;
- };
-
- Timestamp ts2(Seconds(456), 0);
- auto result =
- applyUntilAndPause(&getExecutor(), operationsToApply, apply, ts2, pause, callback);
- ASSERT_OK(result.getStatus());
- _applier = std::move(result.getValue().first);
- ASSERT_TRUE(_applier);
-
- const Applier::Operations& operationsDiscarded = result.getValue().second;
- ASSERT_TRUE(operationsDiscarded.empty());
-
- _applier->start();
- _applier->wait();
+ Timestamp ts2(Seconds(456), 0);
+ auto result =
+ applyUntilAndPause(&getExecutor(), operationsToApply, apply, ts2, pause, callback);
+ ASSERT_OK(result.getStatus());
+ _applier = std::move(result.getValue().first);
+ ASSERT_TRUE(_applier);
+
+ const Applier::Operations& operationsDiscarded = result.getValue().second;
+ ASSERT_TRUE(operationsDiscarded.empty());
+
+ _applier->start();
+ _applier->wait();
+
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ ASSERT_FALSE(pauseCalled);
+ ASSERT_OK(completionResult.getStatus());
+ ASSERT_EQUALS(ts, completionResult.getValue());
+ ASSERT_TRUE(operationsOnCompletion.empty());
+}
+
+TEST_F(ApplyUntilAndPauseTest, ApplyUntilAndPauseSingleOperationAppliedFailed) {
+ Timestamp ts(Seconds(123), 0);
+ const Operations operationsToApply{BSON("ts" << ts)};
+ stdx::mutex mutex;
+ StatusWith<Timestamp> completionResult = getDetectableErrorStatus();
+ bool pauseCalled = false;
+ Applier::Operations operationsOnCompletion;
+ auto apply = [](OperationContext* txn, const BSONObj& operation) {
+ return Status(ErrorCodes::OperationFailed, "");
+ };
+ auto pause = [&] {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ pauseCalled = true;
+ };
+ auto callback = [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ completionResult = theResult;
+ operationsOnCompletion = theOperations;
+ };
+ auto result = applyUntilAndPause(&getExecutor(), operationsToApply, apply, ts, pause, callback);
+ ASSERT_OK(result.getStatus());
+ _applier = std::move(result.getValue().first);
+ ASSERT_TRUE(_applier);
+
+ const Applier::Operations& operationsDiscarded = result.getValue().second;
+ ASSERT_TRUE(operationsDiscarded.empty());
+
+ _applier->start();
+ _applier->wait();
+
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ ASSERT_FALSE(pauseCalled);
+ ASSERT_NOT_OK(completionResult.getStatus());
+ ASSERT_FALSE(operationsOnCompletion.empty());
+}
+
+void _testApplyUntilAndPauseDiscardOperations(ReplicationExecutor* executor,
+ const Timestamp& ts,
+ bool expectedPauseCalled) {
+ Applier::Operations operationsToApply{
+ BSON("op"
+ << "a"
+ << "ts" << Timestamp(Seconds(123), 0)),
+ BSON("op"
+ << "b"
+ << "ts" << Timestamp(Seconds(456), 0)),
+ BSON("op"
+ << "c"
+ << "ts" << Timestamp(Seconds(789), 0)),
+ };
+ stdx::mutex mutex;
+ StatusWith<Timestamp> completionResult = ApplyUntilAndPauseTest::getDetectableErrorStatus();
+ bool pauseCalled = false;
+ Applier::Operations operationsApplied;
+ Applier::Operations operationsOnCompletion;
+ auto apply = [&](OperationContext* txn, const BSONObj& operation) {
stdx::lock_guard<stdx::mutex> lock(mutex);
- ASSERT_FALSE(pauseCalled);
- ASSERT_OK(completionResult.getStatus());
- ASSERT_EQUALS(ts, completionResult.getValue());
- ASSERT_TRUE(operationsOnCompletion.empty());
- }
+ operationsApplied.push_back(operation);
+ return Status::OK();
+ };
+ auto pause = [&] {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ pauseCalled = true;
+ };
+ auto callback = [&](const StatusWith<Timestamp>& theResult, const Operations& theOperations) {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ completionResult = theResult;
+ operationsOnCompletion = theOperations;
+ };
- TEST_F(ApplyUntilAndPauseTest, ApplyUntilAndPauseSingleOperationAppliedFailed) {
- Timestamp ts(Seconds(123), 0);
- const Operations operationsToApply{BSON("ts" << ts)};
- stdx::mutex mutex;
- StatusWith<Timestamp> completionResult = getDetectableErrorStatus();
- bool pauseCalled = false;
- Applier::Operations operationsOnCompletion;
- auto apply = [](OperationContext* txn, const BSONObj& operation) {
- return Status(ErrorCodes::OperationFailed, "");
- };
- auto pause = [&] {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- pauseCalled = true;
- };
- auto callback = [&](const StatusWith<Timestamp>& theResult,
- const Operations& theOperations) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- completionResult = theResult;
- operationsOnCompletion = theOperations;
- };
-
- auto result =
- applyUntilAndPause(&getExecutor(), operationsToApply, apply, ts, pause, callback);
- ASSERT_OK(result.getStatus());
- _applier = std::move(result.getValue().first);
- ASSERT_TRUE(_applier);
-
- const Applier::Operations& operationsDiscarded = result.getValue().second;
- ASSERT_TRUE(operationsDiscarded.empty());
-
- _applier->start();
- _applier->wait();
+ auto result = applyUntilAndPause(executor, operationsToApply, apply, ts, pause, callback);
+ ASSERT_OK(result.getStatus());
+ ASSERT_TRUE(result.getValue().first);
+ Applier& applier = *result.getValue().first;
- stdx::lock_guard<stdx::mutex> lock(mutex);
- ASSERT_FALSE(pauseCalled);
- ASSERT_NOT_OK(completionResult.getStatus());
- ASSERT_FALSE(operationsOnCompletion.empty());
- }
+ const Applier::Operations& operationsDiscarded = result.getValue().second;
+ ASSERT_EQUALS(1U, operationsDiscarded.size());
+ ASSERT_EQUALS(operationsToApply[2], operationsDiscarded[0]);
- void _testApplyUntilAndPauseDiscardOperations(ReplicationExecutor* executor,
- const Timestamp& ts,
- bool expectedPauseCalled) {
-
- Applier::Operations operationsToApply{
- BSON("op" << "a" << "ts" << Timestamp(Seconds(123), 0)),
- BSON("op" << "b" << "ts" << Timestamp(Seconds(456), 0)),
- BSON("op" << "c" << "ts" << Timestamp(Seconds(789), 0)),
- };
- stdx::mutex mutex;
- StatusWith<Timestamp> completionResult =
- ApplyUntilAndPauseTest::getDetectableErrorStatus();
- bool pauseCalled = false;
- Applier::Operations operationsApplied;
- Applier::Operations operationsOnCompletion;
- auto apply = [&](OperationContext* txn, const BSONObj& operation) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- operationsApplied.push_back(operation);
- return Status::OK();
- };
- auto pause = [&] {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- pauseCalled = true;
- };
- auto callback = [&](const StatusWith<Timestamp>& theResult,
- const Operations& theOperations) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- completionResult = theResult;
- operationsOnCompletion = theOperations;
- };
-
- auto result =
- applyUntilAndPause(executor, operationsToApply, apply, ts, pause, callback);
- ASSERT_OK(result.getStatus());
- ASSERT_TRUE(result.getValue().first);
- Applier& applier = *result.getValue().first;
-
- const Applier::Operations& operationsDiscarded = result.getValue().second;
- ASSERT_EQUALS(1U, operationsDiscarded.size());
- ASSERT_EQUALS(operationsToApply[2], operationsDiscarded[0]);
-
- applier.start();
- applier.wait();
+ applier.start();
+ applier.wait();
- stdx::lock_guard<stdx::mutex> lock(mutex);
- ASSERT_EQUALS(2U, operationsApplied.size());
- ASSERT_EQUALS(operationsToApply[0], operationsApplied[0]);
- ASSERT_EQUALS(operationsToApply[1], operationsApplied[1]);
- ASSERT_EQUALS(expectedPauseCalled, pauseCalled);
- ASSERT_OK(completionResult.getStatus());
- ASSERT_TRUE(operationsOnCompletion.empty());
- }
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ ASSERT_EQUALS(2U, operationsApplied.size());
+ ASSERT_EQUALS(operationsToApply[0], operationsApplied[0]);
+ ASSERT_EQUALS(operationsToApply[1], operationsApplied[1]);
+ ASSERT_EQUALS(expectedPauseCalled, pauseCalled);
+ ASSERT_OK(completionResult.getStatus());
+ ASSERT_TRUE(operationsOnCompletion.empty());
+}
- TEST_F(ApplyUntilAndPauseTest, ApplyUntilAndPauseDiscardOperationsTimestampInOperations) {
- _testApplyUntilAndPauseDiscardOperations(&getExecutor(),
- Timestamp(Seconds(456), 0),
- true);
- }
+TEST_F(ApplyUntilAndPauseTest, ApplyUntilAndPauseDiscardOperationsTimestampInOperations) {
+ _testApplyUntilAndPauseDiscardOperations(&getExecutor(), Timestamp(Seconds(456), 0), true);
+}
- TEST_F(ApplyUntilAndPauseTest, ApplyUntilAndPauseDiscardOperationsTimestampNotInOperations) {
- _testApplyUntilAndPauseDiscardOperations(&getExecutor(),
- Timestamp(Seconds(500), 0),
- false);
- }
+TEST_F(ApplyUntilAndPauseTest, ApplyUntilAndPauseDiscardOperationsTimestampNotInOperations) {
+ _testApplyUntilAndPauseDiscardOperations(&getExecutor(), Timestamp(Seconds(500), 0), false);
+}
-} // namespace
+} // namespace
diff --git a/src/mongo/db/repl/base_cloner.h b/src/mongo/db/repl/base_cloner.h
index 8d6b8be8928..c6de09f8d31 100644
--- a/src/mongo/db/repl/base_cloner.h
+++ b/src/mongo/db/repl/base_cloner.h
@@ -34,51 +34,49 @@
namespace mongo {
namespace repl {
+/**
+ * Used by cloner test fixture to centralize life cycle testing.
+ *
+ * Life cycle interface for collection and database cloners.
+ */
+class BaseCloner {
+public:
/**
- * Used by cloner test fixture to centralize life cycle testing.
- *
- * Life cycle interface for collection and database cloners.
+ * Callback function to report final status of cloning.
*/
- class BaseCloner {
- public:
-
- /**
- * Callback function to report final status of cloning.
- */
- using CallbackFn = stdx::function<void (const Status&)>;
+ using CallbackFn = stdx::function<void(const Status&)>;
- virtual ~BaseCloner() { }
+ virtual ~BaseCloner() {}
- /**
- * Returns diagnostic information.
- */
- virtual std::string getDiagnosticString() const = 0;
-
- /**
- * Returns true if the cloner has been started (but has not completed).
- */
- virtual bool isActive() const = 0;
+ /**
+ * Returns diagnostic information.
+ */
+ virtual std::string getDiagnosticString() const = 0;
- /**
- * Starts cloning by scheduling initial command to be run by the executor.
- */
- virtual Status start() = 0;
+ /**
+ * Returns true if the cloner has been started (but has not completed).
+ */
+ virtual bool isActive() const = 0;
- /**
- * Cancels current remote command request.
- * Returns immediately if cloner is not active.
- *
- * Callback function may be invoked with an ErrorCodes::CallbackCanceled status.
- */
- virtual void cancel() = 0;
+ /**
+ * Starts cloning by scheduling initial command to be run by the executor.
+ */
+ virtual Status start() = 0;
- /**
- * Waits for active remote commands and database worker to complete.
- * Returns immediately if cloner is not active.
- */
- virtual void wait() = 0;
+ /**
+ * Cancels current remote command request.
+ * Returns immediately if cloner is not active.
+ *
+ * Callback function may be invoked with an ErrorCodes::CallbackCanceled status.
+ */
+ virtual void cancel() = 0;
- };
+ /**
+ * Waits for active remote commands and database worker to complete.
+ * Returns immediately if cloner is not active.
+ */
+ virtual void wait() = 0;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/base_cloner_test_fixture.cpp b/src/mongo/db/repl/base_cloner_test_fixture.cpp
index a8df9deb6ba..2f6ca23ab63 100644
--- a/src/mongo/db/repl/base_cloner_test_fixture.cpp
+++ b/src/mongo/db/repl/base_cloner_test_fixture.cpp
@@ -38,224 +38,215 @@
namespace mongo {
namespace repl {
- const HostAndPort BaseClonerTest::target("localhost", -1);
- const NamespaceString BaseClonerTest::nss("db.coll");
- const BSONObj BaseClonerTest::idIndexSpec =
- BSON("v" << 1 << "key" << BSON("_id" << 1) << "name" << "_id_" << "ns" << nss.ns());
-
- // static
- BSONObj BaseClonerTest::createCursorResponse(CursorId cursorId,
- const std::string& ns,
- const BSONArray& docs,
- const char* batchFieldName) {
- return BSON("cursor" << BSON("id" << cursorId <<
- "ns" << ns <<
- batchFieldName << docs) <<
- "ok" << 1);
- }
-
- // static
- BSONObj BaseClonerTest::createCursorResponse(CursorId cursorId,
- const BSONArray& docs,
- const char* batchFieldName) {
- return createCursorResponse(cursorId, nss.toString(), docs, batchFieldName);
- }
-
- // static
- BSONObj BaseClonerTest::createCursorResponse(CursorId cursorId,
- const BSONArray& docs) {
- return createCursorResponse(cursorId, docs, "firstBatch");
- }
-
- // static
- BSONObj BaseClonerTest::createListCollectionsResponse(CursorId cursorId,
- const BSONArray& colls,
- const char* fieldName) {
- return createCursorResponse(cursorId, "test.$cmd.listCollections.coll", colls, fieldName);
- }
-
- // static
- BSONObj BaseClonerTest::createListCollectionsResponse(CursorId cursorId,
- const BSONArray& colls) {
- return createListCollectionsResponse(cursorId, colls, "firstBatch");
- }
-
- // static
- BSONObj BaseClonerTest::createListIndexesResponse(CursorId cursorId,
- const BSONArray& specs,
- const char* batchFieldName) {
- return createCursorResponse(cursorId, "test.$cmd.listIndexes.coll", specs, batchFieldName);
- }
-
- // static
- BSONObj BaseClonerTest::createListIndexesResponse(CursorId cursorId,
- const BSONArray& specs) {
- return createListIndexesResponse(cursorId, specs, "firstBatch");
- }
-
- BaseClonerTest::BaseClonerTest()
- : _mutex(),
- _setStatusCondition(),
- _status(getDetectableErrorStatus()) { }
-
- void BaseClonerTest::setUp() {
- ReplicationExecutorTest::setUp();
- clear();
- launchExecutorThread();
- storageInterface.reset(new ClonerStorageInterfaceMock());
- }
-
- void BaseClonerTest::tearDown() {
- ReplicationExecutorTest::tearDown();
- storageInterface.reset();
- }
-
- void BaseClonerTest::clear() {
- _status = getDetectableErrorStatus();
- }
-
- void BaseClonerTest::setStatus(const Status& status) {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- _status = status;
- _setStatusCondition.notify_all();
- }
-
- const Status& BaseClonerTest::getStatus() const {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- return _status;
- }
-
- void BaseClonerTest::scheduleNetworkResponse(NetworkOperationIterator noi,
- const BSONObj& obj) {
- auto net = getNet();
- Milliseconds millis(0);
- RemoteCommandResponse response(obj, millis);
- ReplicationExecutor::ResponseStatus responseStatus(response);
- net->scheduleResponse(noi, net->now(), responseStatus);
- }
-
- void BaseClonerTest::scheduleNetworkResponse(NetworkOperationIterator noi,
- ErrorCodes::Error code,
- const std::string& reason) {
- auto net = getNet();
- ReplicationExecutor::ResponseStatus responseStatus(code, reason);
- net->scheduleResponse(noi, net->now(), responseStatus);
- }
-
- void BaseClonerTest::scheduleNetworkResponse(const BSONObj& obj) {
- ASSERT_TRUE(getNet()->hasReadyRequests());
- scheduleNetworkResponse(getNet()->getNextReadyRequest(), obj);
- }
-
- void BaseClonerTest::scheduleNetworkResponse(ErrorCodes::Error code,
- const std::string& reason) {
- ASSERT_TRUE(getNet()->hasReadyRequests());
- scheduleNetworkResponse(getNet()->getNextReadyRequest(), code, reason);
- }
-
- void BaseClonerTest::processNetworkResponse(const BSONObj& obj) {
- scheduleNetworkResponse(obj);
- finishProcessingNetworkResponse();
- }
-
- void BaseClonerTest::processNetworkResponse(ErrorCodes::Error code,
- const std::string& reason) {
- scheduleNetworkResponse(code, reason);
- finishProcessingNetworkResponse();
- }
-
- void BaseClonerTest::finishProcessingNetworkResponse() {
- clear();
- getNet()->runReadyNetworkOperations();
- }
-
- void BaseClonerTest::testLifeCycle() {
- // GetDiagnosticString
- ASSERT_FALSE(getCloner()->getDiagnosticString().empty());
-
- // IsActiveAfterStart
- ASSERT_FALSE(getCloner()->isActive());
- ASSERT_OK(getCloner()->start());
- ASSERT_TRUE(getCloner()->isActive());
- tearDown();
-
- // StartWhenActive
- setUp();
- ASSERT_OK(getCloner()->start());
- ASSERT_TRUE(getCloner()->isActive());
- ASSERT_NOT_OK(getCloner()->start());
- ASSERT_TRUE(getCloner()->isActive());
- tearDown();
-
- // CancelWithoutStart
- setUp();
- ASSERT_FALSE(getCloner()->isActive());
- getCloner()->cancel();
- ASSERT_FALSE(getCloner()->isActive());
- tearDown();
-
- // WaitWithoutStart
- setUp();
- ASSERT_FALSE(getCloner()->isActive());
- getCloner()->wait();
- ASSERT_FALSE(getCloner()->isActive());
- tearDown();
-
- // ShutdownBeforeStart
- setUp();
- getExecutor().shutdown();
- ASSERT_NOT_OK(getCloner()->start());
- ASSERT_FALSE(getCloner()->isActive());
- tearDown();
-
- // StartAndCancel
- setUp();
- ASSERT_OK(getCloner()->start());
- scheduleNetworkResponse(BSON("ok" << 1));
- getCloner()->cancel();
- finishProcessingNetworkResponse();
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, getStatus().code());
- ASSERT_FALSE(getCloner()->isActive());
- tearDown();
-
- // StartButShutdown
- setUp();
- ASSERT_OK(getCloner()->start());
- scheduleNetworkResponse(BSON("ok" << 1));
- getExecutor().shutdown();
- // Network interface should not deliver mock response to callback.
- finishProcessingNetworkResponse();
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, getStatus().code());
- ASSERT_FALSE(getCloner()->isActive());
- }
-
- Status ClonerStorageInterfaceMock::beginCollection(OperationContext* txn,
- const NamespaceString& nss,
- const CollectionOptions& options,
- const std::vector<BSONObj>& specs) {
- return beginCollectionFn ? beginCollectionFn(txn, nss, options, specs) : Status::OK();
- }
-
- Status ClonerStorageInterfaceMock::insertDocuments(OperationContext* txn,
- const NamespaceString& nss,
- const std::vector<BSONObj>& docs) {
- return insertDocumentsFn ? insertDocumentsFn(txn, nss, docs) : Status::OK();
- }
-
- Status ClonerStorageInterfaceMock::commitCollection(OperationContext* txn,
- const NamespaceString& nss) {
- return Status::OK();
- }
-
- Status ClonerStorageInterfaceMock::insertMissingDoc(OperationContext* txn,
- const NamespaceString& nss,
- const BSONObj& doc) {
- return Status::OK();
- }
-
- Status ClonerStorageInterfaceMock::dropUserDatabases(OperationContext* txn) {
- return dropUserDatabasesFn ? dropUserDatabasesFn(txn) : Status::OK();
- }
-
-} // namespace repl
-} // namespace mongo
+const HostAndPort BaseClonerTest::target("localhost", -1);
+const NamespaceString BaseClonerTest::nss("db.coll");
+const BSONObj BaseClonerTest::idIndexSpec = BSON("v" << 1 << "key" << BSON("_id" << 1) << "name"
+ << "_id_"
+ << "ns" << nss.ns());
+
+// static
+BSONObj BaseClonerTest::createCursorResponse(CursorId cursorId,
+ const std::string& ns,
+ const BSONArray& docs,
+ const char* batchFieldName) {
+ return BSON("cursor" << BSON("id" << cursorId << "ns" << ns << batchFieldName << docs) << "ok"
+ << 1);
+}
+
+// static
+BSONObj BaseClonerTest::createCursorResponse(CursorId cursorId,
+ const BSONArray& docs,
+ const char* batchFieldName) {
+ return createCursorResponse(cursorId, nss.toString(), docs, batchFieldName);
+}
+
+// static
+BSONObj BaseClonerTest::createCursorResponse(CursorId cursorId, const BSONArray& docs) {
+ return createCursorResponse(cursorId, docs, "firstBatch");
+}
+
+// static
+BSONObj BaseClonerTest::createListCollectionsResponse(CursorId cursorId,
+ const BSONArray& colls,
+ const char* fieldName) {
+ return createCursorResponse(cursorId, "test.$cmd.listCollections.coll", colls, fieldName);
+}
+
+// static
+BSONObj BaseClonerTest::createListCollectionsResponse(CursorId cursorId, const BSONArray& colls) {
+ return createListCollectionsResponse(cursorId, colls, "firstBatch");
+}
+
+// static
+BSONObj BaseClonerTest::createListIndexesResponse(CursorId cursorId,
+ const BSONArray& specs,
+ const char* batchFieldName) {
+ return createCursorResponse(cursorId, "test.$cmd.listIndexes.coll", specs, batchFieldName);
+}
+
+// static
+BSONObj BaseClonerTest::createListIndexesResponse(CursorId cursorId, const BSONArray& specs) {
+ return createListIndexesResponse(cursorId, specs, "firstBatch");
+}
+
+BaseClonerTest::BaseClonerTest()
+ : _mutex(), _setStatusCondition(), _status(getDetectableErrorStatus()) {}
+
+void BaseClonerTest::setUp() {
+ ReplicationExecutorTest::setUp();
+ clear();
+ launchExecutorThread();
+ storageInterface.reset(new ClonerStorageInterfaceMock());
+}
+
+void BaseClonerTest::tearDown() {
+ ReplicationExecutorTest::tearDown();
+ storageInterface.reset();
+}
+
+void BaseClonerTest::clear() {
+ _status = getDetectableErrorStatus();
+}
+
+void BaseClonerTest::setStatus(const Status& status) {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ _status = status;
+ _setStatusCondition.notify_all();
+}
+
+const Status& BaseClonerTest::getStatus() const {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ return _status;
+}
+
+void BaseClonerTest::scheduleNetworkResponse(NetworkOperationIterator noi, const BSONObj& obj) {
+ auto net = getNet();
+ Milliseconds millis(0);
+ RemoteCommandResponse response(obj, millis);
+ ReplicationExecutor::ResponseStatus responseStatus(response);
+ net->scheduleResponse(noi, net->now(), responseStatus);
+}
+
+void BaseClonerTest::scheduleNetworkResponse(NetworkOperationIterator noi,
+ ErrorCodes::Error code,
+ const std::string& reason) {
+ auto net = getNet();
+ ReplicationExecutor::ResponseStatus responseStatus(code, reason);
+ net->scheduleResponse(noi, net->now(), responseStatus);
+}
+
+void BaseClonerTest::scheduleNetworkResponse(const BSONObj& obj) {
+ ASSERT_TRUE(getNet()->hasReadyRequests());
+ scheduleNetworkResponse(getNet()->getNextReadyRequest(), obj);
+}
+
+void BaseClonerTest::scheduleNetworkResponse(ErrorCodes::Error code, const std::string& reason) {
+ ASSERT_TRUE(getNet()->hasReadyRequests());
+ scheduleNetworkResponse(getNet()->getNextReadyRequest(), code, reason);
+}
+
+void BaseClonerTest::processNetworkResponse(const BSONObj& obj) {
+ scheduleNetworkResponse(obj);
+ finishProcessingNetworkResponse();
+}
+
+void BaseClonerTest::processNetworkResponse(ErrorCodes::Error code, const std::string& reason) {
+ scheduleNetworkResponse(code, reason);
+ finishProcessingNetworkResponse();
+}
+
+void BaseClonerTest::finishProcessingNetworkResponse() {
+ clear();
+ getNet()->runReadyNetworkOperations();
+}
+
+void BaseClonerTest::testLifeCycle() {
+ // GetDiagnosticString
+ ASSERT_FALSE(getCloner()->getDiagnosticString().empty());
+
+ // IsActiveAfterStart
+ ASSERT_FALSE(getCloner()->isActive());
+ ASSERT_OK(getCloner()->start());
+ ASSERT_TRUE(getCloner()->isActive());
+ tearDown();
+
+ // StartWhenActive
+ setUp();
+ ASSERT_OK(getCloner()->start());
+ ASSERT_TRUE(getCloner()->isActive());
+ ASSERT_NOT_OK(getCloner()->start());
+ ASSERT_TRUE(getCloner()->isActive());
+ tearDown();
+
+ // CancelWithoutStart
+ setUp();
+ ASSERT_FALSE(getCloner()->isActive());
+ getCloner()->cancel();
+ ASSERT_FALSE(getCloner()->isActive());
+ tearDown();
+
+ // WaitWithoutStart
+ setUp();
+ ASSERT_FALSE(getCloner()->isActive());
+ getCloner()->wait();
+ ASSERT_FALSE(getCloner()->isActive());
+ tearDown();
+
+ // ShutdownBeforeStart
+ setUp();
+ getExecutor().shutdown();
+ ASSERT_NOT_OK(getCloner()->start());
+ ASSERT_FALSE(getCloner()->isActive());
+ tearDown();
+
+ // StartAndCancel
+ setUp();
+ ASSERT_OK(getCloner()->start());
+ scheduleNetworkResponse(BSON("ok" << 1));
+ getCloner()->cancel();
+ finishProcessingNetworkResponse();
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, getStatus().code());
+ ASSERT_FALSE(getCloner()->isActive());
+ tearDown();
+
+ // StartButShutdown
+ setUp();
+ ASSERT_OK(getCloner()->start());
+ scheduleNetworkResponse(BSON("ok" << 1));
+ getExecutor().shutdown();
+ // Network interface should not deliver mock response to callback.
+ finishProcessingNetworkResponse();
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, getStatus().code());
+ ASSERT_FALSE(getCloner()->isActive());
+}
+
+Status ClonerStorageInterfaceMock::beginCollection(OperationContext* txn,
+ const NamespaceString& nss,
+ const CollectionOptions& options,
+ const std::vector<BSONObj>& specs) {
+ return beginCollectionFn ? beginCollectionFn(txn, nss, options, specs) : Status::OK();
+}
+
+Status ClonerStorageInterfaceMock::insertDocuments(OperationContext* txn,
+ const NamespaceString& nss,
+ const std::vector<BSONObj>& docs) {
+ return insertDocumentsFn ? insertDocumentsFn(txn, nss, docs) : Status::OK();
+}
+
+Status ClonerStorageInterfaceMock::commitCollection(OperationContext* txn,
+ const NamespaceString& nss) {
+ return Status::OK();
+}
+
+Status ClonerStorageInterfaceMock::insertMissingDoc(OperationContext* txn,
+ const NamespaceString& nss,
+ const BSONObj& doc) {
+ return Status::OK();
+}
+
+Status ClonerStorageInterfaceMock::dropUserDatabases(OperationContext* txn) {
+ return dropUserDatabasesFn ? dropUserDatabasesFn(txn) : Status::OK();
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/base_cloner_test_fixture.h b/src/mongo/db/repl/base_cloner_test_fixture.h
index a7e5f68e448..cab5c517916 100644
--- a/src/mongo/db/repl/base_cloner_test_fixture.h
+++ b/src/mongo/db/repl/base_cloner_test_fixture.h
@@ -44,133 +44,126 @@
namespace mongo {
- struct CollectionOptions;
- class OperationContext;
+struct CollectionOptions;
+class OperationContext;
namespace repl {
- class BaseCloner;
- class ClonerStorageInterfaceMock;
-
- class BaseClonerTest : public ReplicationExecutorTest {
- public:
- typedef executor::NetworkInterfaceMock::NetworkOperationIterator NetworkOperationIterator;
-
- /**
- * Creates a cursor response with given array of documents.
- */
- static BSONObj createCursorResponse(CursorId cursorId,
- const std::string& ns,
- const BSONArray& docs,
- const char* batchFieldName);
-
- static BSONObj createCursorResponse(CursorId cursorId,
- const BSONArray& docs,
- const char* batchFieldName);
-
- static BSONObj createCursorResponse(CursorId cursorId,
- const BSONArray& docs);
-
- /**
- * Creates a listCollections response with given array of index specs.
- */
- static BSONObj createListCollectionsResponse(CursorId cursorId,
- const BSONArray& colls,
- const char* batchFieldName);
-
- static BSONObj createListCollectionsResponse(CursorId cursorId, const BSONArray& colls);
-
- /**
- * Creates a listIndexes response with given array of index specs.
- */
- static BSONObj createListIndexesResponse(CursorId cursorId,
- const BSONArray& specs,
- const char* batchFieldName);
+class BaseCloner;
+class ClonerStorageInterfaceMock;
+
+class BaseClonerTest : public ReplicationExecutorTest {
+public:
+ typedef executor::NetworkInterfaceMock::NetworkOperationIterator NetworkOperationIterator;
+
+ /**
+ * Creates a cursor response with given array of documents.
+ */
+ static BSONObj createCursorResponse(CursorId cursorId,
+ const std::string& ns,
+ const BSONArray& docs,
+ const char* batchFieldName);
- static BSONObj createListIndexesResponse(CursorId cursorId, const BSONArray& specs);
+ static BSONObj createCursorResponse(CursorId cursorId,
+ const BSONArray& docs,
+ const char* batchFieldName);
+
+ static BSONObj createCursorResponse(CursorId cursorId, const BSONArray& docs);
+
+ /**
+ * Creates a listCollections response with given array of index specs.
+ */
+ static BSONObj createListCollectionsResponse(CursorId cursorId,
+ const BSONArray& colls,
+ const char* batchFieldName);
- static const HostAndPort target;
- static const NamespaceString nss;
- static const BSONObj idIndexSpec;
+ static BSONObj createListCollectionsResponse(CursorId cursorId, const BSONArray& colls);
- BaseClonerTest();
+ /**
+ * Creates a listIndexes response with given array of index specs.
+ */
+ static BSONObj createListIndexesResponse(CursorId cursorId,
+ const BSONArray& specs,
+ const char* batchFieldName);
- virtual void clear();
+ static BSONObj createListIndexesResponse(CursorId cursorId, const BSONArray& specs);
- void setStatus(const Status& status);
- const Status& getStatus() const;
+ static const HostAndPort target;
+ static const NamespaceString nss;
+ static const BSONObj idIndexSpec;
- void scheduleNetworkResponse(NetworkOperationIterator noi,
- const BSONObj& obj);
- void scheduleNetworkResponse(NetworkOperationIterator noi,
- ErrorCodes::Error code, const std::string& reason);
- void scheduleNetworkResponse(const BSONObj& obj);
- void scheduleNetworkResponse(ErrorCodes::Error code, const std::string& reason);
- void processNetworkResponse(const BSONObj& obj);
- void processNetworkResponse(ErrorCodes::Error code, const std::string& reason);
- void finishProcessingNetworkResponse();
+ BaseClonerTest();
- /**
- * Tests life cycle functionality.
- */
- virtual BaseCloner* getCloner() const = 0;
- void testLifeCycle();
+ virtual void clear();
- protected:
+ void setStatus(const Status& status);
+ const Status& getStatus() const;
- void setUp() override;
- void tearDown() override;
+ void scheduleNetworkResponse(NetworkOperationIterator noi, const BSONObj& obj);
+ void scheduleNetworkResponse(NetworkOperationIterator noi,
+ ErrorCodes::Error code,
+ const std::string& reason);
+ void scheduleNetworkResponse(const BSONObj& obj);
+ void scheduleNetworkResponse(ErrorCodes::Error code, const std::string& reason);
+ void processNetworkResponse(const BSONObj& obj);
+ void processNetworkResponse(ErrorCodes::Error code, const std::string& reason);
+ void finishProcessingNetworkResponse();
- std::unique_ptr<ClonerStorageInterfaceMock> storageInterface;
+ /**
+ * Tests life cycle functionality.
+ */
+ virtual BaseCloner* getCloner() const = 0;
+ void testLifeCycle();
- private:
+protected:
+ void setUp() override;
+ void tearDown() override;
- // Protects member data of this base cloner fixture.
- mutable stdx::mutex _mutex;
+ std::unique_ptr<ClonerStorageInterfaceMock> storageInterface;
- stdx::condition_variable _setStatusCondition;
+private:
+ // Protects member data of this base cloner fixture.
+ mutable stdx::mutex _mutex;
- Status _status;
+ stdx::condition_variable _setStatusCondition;
- };
+ Status _status;
+};
- class ClonerStorageInterfaceMock : public CollectionCloner::StorageInterface {
- public:
- using InsertCollectionFn = stdx::function<Status (OperationContext*,
- const NamespaceString&,
- const std::vector<BSONObj>&)>;
- using BeginCollectionFn = stdx::function<Status (OperationContext*,
- const NamespaceString&,
- const CollectionOptions&,
- const std::vector<BSONObj>&)>;
- using InsertMissingDocFn = stdx::function<Status (OperationContext*,
- const NamespaceString&,
- const BSONObj&)>;
- using DropUserDatabases = stdx::function<Status (OperationContext*)>;
+class ClonerStorageInterfaceMock : public CollectionCloner::StorageInterface {
+public:
+ using InsertCollectionFn = stdx::function<Status(
+ OperationContext*, const NamespaceString&, const std::vector<BSONObj>&)>;
+ using BeginCollectionFn = stdx::function<Status(OperationContext*,
+ const NamespaceString&,
+ const CollectionOptions&,
+ const std::vector<BSONObj>&)>;
+ using InsertMissingDocFn =
+ stdx::function<Status(OperationContext*, const NamespaceString&, const BSONObj&)>;
+ using DropUserDatabases = stdx::function<Status(OperationContext*)>;
- Status beginCollection(OperationContext* txn,
- const NamespaceString& nss,
- const CollectionOptions& options,
- const std::vector<BSONObj>& specs) override;
+ Status beginCollection(OperationContext* txn,
+ const NamespaceString& nss,
+ const CollectionOptions& options,
+ const std::vector<BSONObj>& specs) override;
- Status insertDocuments(OperationContext* txn,
- const NamespaceString& nss,
- const std::vector<BSONObj>& docs) override;
+ Status insertDocuments(OperationContext* txn,
+ const NamespaceString& nss,
+ const std::vector<BSONObj>& docs) override;
- Status commitCollection(OperationContext* txn,
- const NamespaceString& nss) override;
+ Status commitCollection(OperationContext* txn, const NamespaceString& nss) override;
- Status insertMissingDoc(OperationContext* txn,
- const NamespaceString& nss,
- const BSONObj& doc) override;
+ Status insertMissingDoc(OperationContext* txn,
+ const NamespaceString& nss,
+ const BSONObj& doc) override;
- Status dropUserDatabases(OperationContext* txn);
+ Status dropUserDatabases(OperationContext* txn);
- BeginCollectionFn beginCollectionFn;
- InsertCollectionFn insertDocumentsFn;
- InsertMissingDocFn insertMissingDocFn;
- DropUserDatabases dropUserDatabasesFn;
- };
+ BeginCollectionFn beginCollectionFn;
+ InsertCollectionFn insertDocumentsFn;
+ InsertMissingDocFn insertMissingDocFn;
+ DropUserDatabases dropUserDatabasesFn;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp
index 5317ab36305..5c84a724b94 100644
--- a/src/mongo/db/repl/bgsync.cpp
+++ b/src/mongo/db/repl/bgsync.cpp
@@ -55,523 +55,510 @@
namespace mongo {
- using std::string;
+using std::string;
namespace repl {
namespace {
- const char hashFieldName[] = "h";
- int SleepToAllowBatchingMillis = 2;
- const int BatchIsSmallish = 40000; // bytes
-} // namespace
-
- MONGO_FP_DECLARE(rsBgSyncProduce);
-
- BackgroundSync* BackgroundSync::s_instance = 0;
- stdx::mutex BackgroundSync::s_mutex;
-
- //The number and time spent reading batches off the network
- static TimerStats getmoreReplStats;
- static ServerStatusMetricField<TimerStats> displayBatchesRecieved(
- "repl.network.getmores",
- &getmoreReplStats );
- //The oplog entries read via the oplog reader
- static Counter64 opsReadStats;
- static ServerStatusMetricField<Counter64> displayOpsRead( "repl.network.ops",
- &opsReadStats );
- //The bytes read via the oplog reader
- static Counter64 networkByteStats;
- static ServerStatusMetricField<Counter64> displayBytesRead( "repl.network.bytes",
- &networkByteStats );
-
- //The count of items in the buffer
- static Counter64 bufferCountGauge;
- static ServerStatusMetricField<Counter64> displayBufferCount( "repl.buffer.count",
- &bufferCountGauge );
- //The size (bytes) of items in the buffer
- static Counter64 bufferSizeGauge;
- static ServerStatusMetricField<Counter64> displayBufferSize( "repl.buffer.sizeBytes",
- &bufferSizeGauge );
- //The max size (bytes) of the buffer
- static int bufferMaxSizeGauge = 256*1024*1024;
- static ServerStatusMetricField<int> displayBufferMaxSize( "repl.buffer.maxSizeBytes",
- &bufferMaxSizeGauge );
-
-
- BackgroundSyncInterface::~BackgroundSyncInterface() {}
-
- size_t getSize(const BSONObj& o) {
- // SERVER-9808 Avoid Fortify complaint about implicit signed->unsigned conversion
- return static_cast<size_t>(o.objsize());
+const char hashFieldName[] = "h";
+int SleepToAllowBatchingMillis = 2;
+const int BatchIsSmallish = 40000; // bytes
+} // namespace
+
+MONGO_FP_DECLARE(rsBgSyncProduce);
+
+BackgroundSync* BackgroundSync::s_instance = 0;
+stdx::mutex BackgroundSync::s_mutex;
+
+// The number and time spent reading batches off the network
+static TimerStats getmoreReplStats;
+static ServerStatusMetricField<TimerStats> displayBatchesRecieved("repl.network.getmores",
+ &getmoreReplStats);
+// The oplog entries read via the oplog reader
+static Counter64 opsReadStats;
+static ServerStatusMetricField<Counter64> displayOpsRead("repl.network.ops", &opsReadStats);
+// The bytes read via the oplog reader
+static Counter64 networkByteStats;
+static ServerStatusMetricField<Counter64> displayBytesRead("repl.network.bytes", &networkByteStats);
+
+// The count of items in the buffer
+static Counter64 bufferCountGauge;
+static ServerStatusMetricField<Counter64> displayBufferCount("repl.buffer.count",
+ &bufferCountGauge);
+// The size (bytes) of items in the buffer
+static Counter64 bufferSizeGauge;
+static ServerStatusMetricField<Counter64> displayBufferSize("repl.buffer.sizeBytes",
+ &bufferSizeGauge);
+// The max size (bytes) of the buffer
+static int bufferMaxSizeGauge = 256 * 1024 * 1024;
+static ServerStatusMetricField<int> displayBufferMaxSize("repl.buffer.maxSizeBytes",
+ &bufferMaxSizeGauge);
+
+
+BackgroundSyncInterface::~BackgroundSyncInterface() {}
+
+size_t getSize(const BSONObj& o) {
+ // SERVER-9808 Avoid Fortify complaint about implicit signed->unsigned conversion
+ return static_cast<size_t>(o.objsize());
+}
+
+BackgroundSync::BackgroundSync()
+ : _buffer(bufferMaxSizeGauge, &getSize),
+ _lastOpTimeFetched(Timestamp(std::numeric_limits<int>::max(), 0),
+ std::numeric_limits<long long>::max()),
+ _lastAppliedHash(0),
+ _lastFetchedHash(0),
+ _pause(true),
+ _appliedBuffer(true),
+ _replCoord(getGlobalReplicationCoordinator()),
+ _initialSyncRequestedFlag(false),
+ _indexPrefetchConfig(PREFETCH_ALL) {}
+
+BackgroundSync* BackgroundSync::get() {
+ stdx::unique_lock<stdx::mutex> lock(s_mutex);
+ if (s_instance == NULL && !inShutdown()) {
+ s_instance = new BackgroundSync();
}
+ return s_instance;
+}
- BackgroundSync::BackgroundSync() : _buffer(bufferMaxSizeGauge, &getSize),
- _lastOpTimeFetched(
- Timestamp(std::numeric_limits<int>::max(), 0),
- std::numeric_limits<long long>::max()),
- _lastAppliedHash(0),
- _lastFetchedHash(0),
- _pause(true),
- _appliedBuffer(true),
- _replCoord(getGlobalReplicationCoordinator()),
- _initialSyncRequestedFlag(false),
- _indexPrefetchConfig(PREFETCH_ALL) {
- }
+void BackgroundSync::shutdown() {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
- BackgroundSync* BackgroundSync::get() {
- stdx::unique_lock<stdx::mutex> lock(s_mutex);
- if (s_instance == NULL && !inShutdown()) {
- s_instance = new BackgroundSync();
- }
- return s_instance;
- }
+ // Clear the buffer in case the producerThread is waiting in push() due to a full queue.
+ invariant(inShutdown());
+ _buffer.clear();
+ _pause = true;
- void BackgroundSync::shutdown() {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
+ // Wake up producerThread so it notices that we're in shutdown
+ _appliedBufferCondition.notify_all();
+ _pausedCondition.notify_all();
+}
- // Clear the buffer in case the producerThread is waiting in push() due to a full queue.
- invariant(inShutdown());
- _buffer.clear();
- _pause = true;
+void BackgroundSync::notify(OperationContext* txn) {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
- // Wake up producerThread so it notices that we're in shutdown
+ // If all ops in the buffer have been applied, unblock waitForRepl (if it's waiting)
+ if (_buffer.empty()) {
+ _appliedBuffer = true;
_appliedBufferCondition.notify_all();
- _pausedCondition.notify_all();
}
+}
- void BackgroundSync::notify(OperationContext* txn) {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
+void BackgroundSync::producerThread() {
+ Client::initThread("rsBackgroundSync");
+ AuthorizationSession::get(cc())->grantInternalAuthorization();
- // If all ops in the buffer have been applied, unblock waitForRepl (if it's waiting)
- if (_buffer.empty()) {
- _appliedBuffer = true;
- _appliedBufferCondition.notify_all();
+ while (!inShutdown()) {
+ try {
+ _producerThread();
+ } catch (const DBException& e) {
+ std::string msg(str::stream() << "sync producer problem: " << e.toString());
+ error() << msg;
+ _replCoord->setMyHeartbeatMessage(msg);
+ } catch (const std::exception& e2) {
+ severe() << "sync producer exception: " << e2.what();
+ fassertFailed(28546);
}
}
+}
- void BackgroundSync::producerThread() {
- Client::initThread("rsBackgroundSync");
- AuthorizationSession::get(cc())->grantInternalAuthorization();
-
- while (!inShutdown()) {
- try {
- _producerThread();
- }
- catch (const DBException& e) {
- std::string msg(str::stream() << "sync producer problem: " << e.toString());
- error() << msg;
- _replCoord->setMyHeartbeatMessage(msg);
- }
- catch (const std::exception& e2) {
- severe() << "sync producer exception: " << e2.what();
- fassertFailed(28546);
- }
+void BackgroundSync::_producerThread() {
+ const MemberState state = _replCoord->getMemberState();
+ // we want to pause when the state changes to primary
+ if (_replCoord->isWaitingForApplierToDrain() || state.primary()) {
+ if (!_pause) {
+ stop();
}
+ sleepsecs(1);
+ return;
}
- void BackgroundSync::_producerThread() {
- const MemberState state = _replCoord->getMemberState();
- // we want to pause when the state changes to primary
- if (_replCoord->isWaitingForApplierToDrain() || state.primary()) {
- if (!_pause) {
- stop();
- }
- sleepsecs(1);
- return;
- }
-
- // TODO(spencer): Use a condition variable to await loading a config.
- if (state.startup()) {
- // Wait for a config to be loaded
- sleepsecs(1);
- return;
- }
-
- // We need to wait until initial sync has started.
- if (_replCoord->getMyLastOptime().isNull()) {
- sleepsecs(1);
- return;
- }
- // we want to unpause when we're no longer primary
- // start() also loads _lastOpTimeFetched, which we know is set from the "if"
- OperationContextImpl txn;
- if (_pause) {
- start(&txn);
- }
+ // TODO(spencer): Use a condition variable to await loading a config.
+ if (state.startup()) {
+ // Wait for a config to be loaded
+ sleepsecs(1);
+ return;
+ }
- produce(&txn);
+ // We need to wait until initial sync has started.
+ if (_replCoord->getMyLastOptime().isNull()) {
+ sleepsecs(1);
+ return;
+ }
+ // we want to unpause when we're no longer primary
+ // start() also loads _lastOpTimeFetched, which we know is set from the "if"
+ OperationContextImpl txn;
+ if (_pause) {
+ start(&txn);
}
- void BackgroundSync::produce(OperationContext* txn) {
- // this oplog reader does not do a handshake because we don't want the server it's syncing
- // from to track how far it has synced
- {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- if (_lastOpTimeFetched.isNull()) {
- // then we're initial syncing and we're still waiting for this to be set
- lock.unlock();
- sleepsecs(1);
- // if there is no one to sync from
- return;
- }
+ produce(&txn);
+}
- // Wait until we've applied the ops we have before we choose a sync target
- while (!_appliedBuffer && !inShutdownStrict()) {
- _appliedBufferCondition.wait(lock);
- }
- if (inShutdownStrict()) {
- return;
- }
+void BackgroundSync::produce(OperationContext* txn) {
+ // this oplog reader does not do a handshake because we don't want the server it's syncing
+ // from to track how far it has synced
+ {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ if (_lastOpTimeFetched.isNull()) {
+ // then we're initial syncing and we're still waiting for this to be set
+ lock.unlock();
+ sleepsecs(1);
+ // if there is no one to sync from
+ return;
}
- while (MONGO_FAIL_POINT(rsBgSyncProduce)) {
- sleepmillis(0);
+ // Wait until we've applied the ops we have before we choose a sync target
+ while (!_appliedBuffer && !inShutdownStrict()) {
+ _appliedBufferCondition.wait(lock);
}
-
-
- // find a target to sync from the last optime fetched
- OpTime lastOpTimeFetched;
- {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- lastOpTimeFetched = _lastOpTimeFetched;
- _syncSourceHost = HostAndPort();
+ if (inShutdownStrict()) {
+ return;
}
- _syncSourceReader.resetConnection();
- _syncSourceReader.connectToSyncSource(txn, lastOpTimeFetched, _replCoord);
+ }
- {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- // no server found
- if (_syncSourceReader.getHost().empty()) {
- lock.unlock();
- sleepsecs(1);
- // if there is no one to sync from
- return;
- }
- lastOpTimeFetched = _lastOpTimeFetched;
- _syncSourceHost = _syncSourceReader.getHost();
- _replCoord->signalUpstreamUpdater();
- }
+ while (MONGO_FAIL_POINT(rsBgSyncProduce)) {
+ sleepmillis(0);
+ }
- _syncSourceReader.tailingQueryGTE(rsOplogName.c_str(), lastOpTimeFetched.getTimestamp());
- // if target cut connections between connecting and querying (for
- // example, because it stepped down) we might not have a cursor
- if (!_syncSourceReader.haveCursor()) {
- return;
- }
+ // find a target to sync from the last optime fetched
+ OpTime lastOpTimeFetched;
+ {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ lastOpTimeFetched = _lastOpTimeFetched;
+ _syncSourceHost = HostAndPort();
+ }
+ _syncSourceReader.resetConnection();
+ _syncSourceReader.connectToSyncSource(txn, lastOpTimeFetched, _replCoord);
- if (_rollbackIfNeeded(txn, _syncSourceReader)) {
- stop();
+ {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ // no server found
+ if (_syncSourceReader.getHost().empty()) {
+ lock.unlock();
+ sleepsecs(1);
+ // if there is no one to sync from
return;
}
+ lastOpTimeFetched = _lastOpTimeFetched;
+ _syncSourceHost = _syncSourceReader.getHost();
+ _replCoord->signalUpstreamUpdater();
+ }
- while (!inShutdown()) {
- if (!_syncSourceReader.moreInCurrentBatch()) {
- // Check some things periodically
- // (whenever we run out of items in the
- // current cursor batch)
-
- int bs = _syncSourceReader.currentBatchMessageSize();
- if( bs > 0 && bs < BatchIsSmallish ) {
- // on a very low latency network, if we don't wait a little, we'll be
- // getting ops to write almost one at a time. this will both be expensive
- // for the upstream server as well as potentially defeating our parallel
- // application of batches on the secondary.
- //
- // the inference here is basically if the batch is really small, we are
- // "caught up".
- //
- sleepmillis(SleepToAllowBatchingMillis);
- }
-
- // If we are transitioning to primary state, we need to leave
- // this loop in order to go into bgsync-pause mode.
- if (_replCoord->isWaitingForApplierToDrain() ||
- _replCoord->getMemberState().primary()) {
- return;
- }
-
- // re-evaluate quality of sync target
- if (shouldChangeSyncSource()) {
- return;
- }
+ _syncSourceReader.tailingQueryGTE(rsOplogName.c_str(), lastOpTimeFetched.getTimestamp());
- {
- //record time for each getmore
- TimerHolder batchTimer(&getmoreReplStats);
-
- // This calls receiveMore() on the oplogreader cursor.
- // It can wait up to five seconds for more data.
- _syncSourceReader.more();
- }
- networkByteStats.increment(_syncSourceReader.currentBatchMessageSize());
-
- if (!_syncSourceReader.moreInCurrentBatch()) {
- // If there is still no data from upstream, check a few more things
- // and then loop back for another pass at getting more data
- {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- if (_pause) {
- return;
- }
- }
+ // if target cut connections between connecting and querying (for
+ // example, because it stepped down) we might not have a cursor
+ if (!_syncSourceReader.haveCursor()) {
+ return;
+ }
- _syncSourceReader.tailCheck();
- if( !_syncSourceReader.haveCursor() ) {
- LOG(1) << "replSet end syncTail pass";
- return;
- }
+ if (_rollbackIfNeeded(txn, _syncSourceReader)) {
+ stop();
+ return;
+ }
- continue;
- }
+ while (!inShutdown()) {
+ if (!_syncSourceReader.moreInCurrentBatch()) {
+ // Check some things periodically
+ // (whenever we run out of items in the
+ // current cursor batch)
+
+ int bs = _syncSourceReader.currentBatchMessageSize();
+ if (bs > 0 && bs < BatchIsSmallish) {
+ // on a very low latency network, if we don't wait a little, we'll be
+ // getting ops to write almost one at a time. this will both be expensive
+ // for the upstream server as well as potentially defeating our parallel
+ // application of batches on the secondary.
+ //
+ // the inference here is basically if the batch is really small, we are
+ // "caught up".
+ //
+ sleepmillis(SleepToAllowBatchingMillis);
}
// If we are transitioning to primary state, we need to leave
// this loop in order to go into bgsync-pause mode.
if (_replCoord->isWaitingForApplierToDrain() ||
_replCoord->getMemberState().primary()) {
- LOG(1) << "waiting for draining or we are primary, not adding more ops to buffer";
return;
}
- // At this point, we are guaranteed to have at least one thing to read out
- // of the oplogreader cursor.
- BSONObj o = _syncSourceReader.nextSafe().getOwned();
- opsReadStats.increment();
+ // re-evaluate quality of sync target
+ if (shouldChangeSyncSource()) {
+ return;
+ }
{
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- _appliedBuffer = false;
- }
+ // record time for each getmore
+ TimerHolder batchTimer(&getmoreReplStats);
- OCCASIONALLY {
- LOG(2) << "bgsync buffer has " << _buffer.size() << " bytes";
+ // This calls receiveMore() on the oplogreader cursor.
+ // It can wait up to five seconds for more data.
+ _syncSourceReader.more();
}
+ networkByteStats.increment(_syncSourceReader.currentBatchMessageSize());
- bufferCountGauge.increment();
- bufferSizeGauge.increment(getSize(o));
- _buffer.push(o);
+ if (!_syncSourceReader.moreInCurrentBatch()) {
+ // If there is still no data from upstream, check a few more things
+ // and then loop back for another pass at getting more data
+ {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ if (_pause) {
+ return;
+ }
+ }
- {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- _lastFetchedHash = o["h"].numberLong();
- _lastOpTimeFetched = extractOpTime(o);
- LOG(3) << "lastOpTimeFetched: " << _lastOpTimeFetched;
+ _syncSourceReader.tailCheck();
+ if (!_syncSourceReader.haveCursor()) {
+ LOG(1) << "replSet end syncTail pass";
+ return;
+ }
+
+ continue;
}
}
- }
- bool BackgroundSync::shouldChangeSyncSource() {
- // is it even still around?
- if (getSyncTarget().empty() || _syncSourceReader.getHost().empty()) {
- return true;
+ // If we are transitioning to primary state, we need to leave
+ // this loop in order to go into bgsync-pause mode.
+ if (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary()) {
+ LOG(1) << "waiting for draining or we are primary, not adding more ops to buffer";
+ return;
}
- // check other members: is any member's optime more than MaxSyncSourceLag seconds
- // ahead of the current sync source?
- return _replCoord->shouldChangeSyncSource(_syncSourceReader.getHost());
- }
-
-
- bool BackgroundSync::peek(BSONObj* op) {
- return _buffer.peek(*op);
- }
-
- void BackgroundSync::waitForMore() {
- BSONObj op;
- // Block for one second before timing out.
- // Ignore the value of the op we peeked at.
- _buffer.blockingPeek(op, 1);
- }
-
- void BackgroundSync::consume() {
- // this is just to get the op off the queue, it's been peeked at
- // and queued for application already
- BSONObj op = _buffer.blockingPop();
- bufferCountGauge.decrement(1);
- bufferSizeGauge.decrement(getSize(op));
- }
+ // At this point, we are guaranteed to have at least one thing to read out
+ // of the oplogreader cursor.
+ BSONObj o = _syncSourceReader.nextSafe().getOwned();
+ opsReadStats.increment();
- bool BackgroundSync::_rollbackIfNeeded(OperationContext* txn, OplogReader& r) {
- string hn = r.conn()->getServerAddress();
-
- // Abort only when syncRollback detects we are in a unrecoverable state.
- // In other cases, we log the message contained in the error status and retry later.
- auto fassertRollbackStatusNoTrace = [](int msgid, const Status& status) {
- if (status.isOK()) {
- return;
- }
- if (ErrorCodes::UnrecoverableRollbackError == status.code()) {
- fassertNoTrace(msgid, status);
- }
- warning() << "rollback cannot proceed at this time (retrying later): "
- << status;
- };
-
- if (!r.more()) {
- try {
- BSONObj theirLastOp = r.getLastOp(rsOplogName.c_str());
- if (theirLastOp.isEmpty()) {
- error() << "empty query result from " << hn << " oplog";
- sleepsecs(2);
- return true;
- }
- OpTime theirOpTime = extractOpTime(theirLastOp);
- if (theirOpTime < _lastOpTimeFetched) {
- log() << "we are ahead of the sync source, will try to roll back";
- fassertRollbackStatusNoTrace(
- 28656,
- syncRollback(txn,
- _replCoord->getMyLastOptime(),
- OplogInterfaceLocal(txn, rsOplogName),
- RollbackSourceImpl(r.conn(), rsOplogName),
- _replCoord));
-
- return true;
- }
- /* we're not ahead? maybe our new query got fresher data. best to come back and try again */
- log() << "syncTail condition 1";
- sleepsecs(1);
- }
- catch(DBException& e) {
- error() << "querying " << hn << ' ' << e.toString();
- sleepsecs(2);
- }
- return true;
+ {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ _appliedBuffer = false;
}
- BSONObj o = r.nextSafe();
- OpTime opTime = extractOpTime(o);
- long long hash = o["h"].numberLong();
- if ( opTime != _lastOpTimeFetched || hash != _lastFetchedHash ) {
- log() << "our last op time fetched: " << _lastOpTimeFetched;
- log() << "source's GTE: " << opTime;
- fassertRollbackStatusNoTrace(
- 28657,
- syncRollback(txn,
- _replCoord->getMyLastOptime(),
- OplogInterfaceLocal(txn, rsOplogName),
- RollbackSourceImpl(r.conn(), rsOplogName),
- _replCoord));
- return true;
+ OCCASIONALLY {
+ LOG(2) << "bgsync buffer has " << _buffer.size() << " bytes";
}
- return false;
- }
-
- HostAndPort BackgroundSync::getSyncTarget() {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- return _syncSourceHost;
- }
+ bufferCountGauge.increment();
+ bufferSizeGauge.increment(getSize(o));
+ _buffer.push(o);
- void BackgroundSync::clearSyncTarget() {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- _syncSourceHost = HostAndPort();
+ {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ _lastFetchedHash = o["h"].numberLong();
+ _lastOpTimeFetched = extractOpTime(o);
+ LOG(3) << "lastOpTimeFetched: " << _lastOpTimeFetched;
+ }
}
+}
- void BackgroundSync::stop() {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
-
- _pause = true;
- _syncSourceHost = HostAndPort();
- _lastOpTimeFetched = OpTime();
- _lastFetchedHash = 0;
- _appliedBufferCondition.notify_all();
- _pausedCondition.notify_all();
+bool BackgroundSync::shouldChangeSyncSource() {
+ // is it even still around?
+ if (getSyncTarget().empty() || _syncSourceReader.getHost().empty()) {
+ return true;
}
- void BackgroundSync::start(OperationContext* txn) {
- massert(16235, "going to start syncing, but buffer is not empty", _buffer.empty());
-
- long long updatedLastAppliedHash = _readLastAppliedHash(txn);
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- _pause = false;
-
- // reset _last fields with current oplog data
- _lastAppliedHash = updatedLastAppliedHash;
- _lastOpTimeFetched = _replCoord->getMyLastOptime();
- _lastFetchedHash = _lastAppliedHash;
-
- LOG(1) << "bgsync fetch queue set to: " << _lastOpTimeFetched <<
- " " << _lastFetchedHash;
- }
+ // check other members: is any member's optime more than MaxSyncSourceLag seconds
+ // ahead of the current sync source?
+ return _replCoord->shouldChangeSyncSource(_syncSourceReader.getHost());
+}
+
+
+bool BackgroundSync::peek(BSONObj* op) {
+ return _buffer.peek(*op);
+}
+
+void BackgroundSync::waitForMore() {
+ BSONObj op;
+ // Block for one second before timing out.
+ // Ignore the value of the op we peeked at.
+ _buffer.blockingPeek(op, 1);
+}
+
+void BackgroundSync::consume() {
+ // this is just to get the op off the queue, it's been peeked at
+ // and queued for application already
+ BSONObj op = _buffer.blockingPop();
+ bufferCountGauge.decrement(1);
+ bufferSizeGauge.decrement(getSize(op));
+}
+
+bool BackgroundSync::_rollbackIfNeeded(OperationContext* txn, OplogReader& r) {
+ string hn = r.conn()->getServerAddress();
+
+ // Abort only when syncRollback detects we are in a unrecoverable state.
+ // In other cases, we log the message contained in the error status and retry later.
+ auto fassertRollbackStatusNoTrace = [](int msgid, const Status& status) {
+ if (status.isOK()) {
+ return;
+ }
+ if (ErrorCodes::UnrecoverableRollbackError == status.code()) {
+ fassertNoTrace(msgid, status);
+ }
+ warning() << "rollback cannot proceed at this time (retrying later): " << status;
+ };
- void BackgroundSync::waitUntilPaused() {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- while (!_pause) {
- _pausedCondition.wait(lock);
+ if (!r.more()) {
+ try {
+ BSONObj theirLastOp = r.getLastOp(rsOplogName.c_str());
+ if (theirLastOp.isEmpty()) {
+ error() << "empty query result from " << hn << " oplog";
+ sleepsecs(2);
+ return true;
+ }
+ OpTime theirOpTime = extractOpTime(theirLastOp);
+ if (theirOpTime < _lastOpTimeFetched) {
+ log() << "we are ahead of the sync source, will try to roll back";
+ fassertRollbackStatusNoTrace(28656,
+ syncRollback(txn,
+ _replCoord->getMyLastOptime(),
+ OplogInterfaceLocal(txn, rsOplogName),
+ RollbackSourceImpl(r.conn(), rsOplogName),
+ _replCoord));
+
+ return true;
+ }
+ /* we're not ahead? maybe our new query got fresher data. best to come back and try again */
+ log() << "syncTail condition 1";
+ sleepsecs(1);
+ } catch (DBException& e) {
+ error() << "querying " << hn << ' ' << e.toString();
+ sleepsecs(2);
}
+ return true;
}
- long long BackgroundSync::getLastAppliedHash() const {
- stdx::lock_guard<stdx::mutex> lck(_mutex);
- return _lastAppliedHash;
+ BSONObj o = r.nextSafe();
+ OpTime opTime = extractOpTime(o);
+ long long hash = o["h"].numberLong();
+ if (opTime != _lastOpTimeFetched || hash != _lastFetchedHash) {
+ log() << "our last op time fetched: " << _lastOpTimeFetched;
+ log() << "source's GTE: " << opTime;
+ fassertRollbackStatusNoTrace(28657,
+ syncRollback(txn,
+ _replCoord->getMyLastOptime(),
+ OplogInterfaceLocal(txn, rsOplogName),
+ RollbackSourceImpl(r.conn(), rsOplogName),
+ _replCoord));
+ return true;
}
- void BackgroundSync::clearBuffer() {
- _buffer.clear();
+ return false;
+}
+
+HostAndPort BackgroundSync::getSyncTarget() {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ return _syncSourceHost;
+}
+
+void BackgroundSync::clearSyncTarget() {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ _syncSourceHost = HostAndPort();
+}
+
+void BackgroundSync::stop() {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+
+ _pause = true;
+ _syncSourceHost = HostAndPort();
+ _lastOpTimeFetched = OpTime();
+ _lastFetchedHash = 0;
+ _appliedBufferCondition.notify_all();
+ _pausedCondition.notify_all();
+}
+
+void BackgroundSync::start(OperationContext* txn) {
+ massert(16235, "going to start syncing, but buffer is not empty", _buffer.empty());
+
+ long long updatedLastAppliedHash = _readLastAppliedHash(txn);
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _pause = false;
+
+ // reset _last fields with current oplog data
+ _lastAppliedHash = updatedLastAppliedHash;
+ _lastOpTimeFetched = _replCoord->getMyLastOptime();
+ _lastFetchedHash = _lastAppliedHash;
+
+ LOG(1) << "bgsync fetch queue set to: " << _lastOpTimeFetched << " " << _lastFetchedHash;
+}
+
+void BackgroundSync::waitUntilPaused() {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ while (!_pause) {
+ _pausedCondition.wait(lock);
}
-
- void BackgroundSync::setLastAppliedHash(long long newHash) {
- stdx::lock_guard<stdx::mutex> lck(_mutex);
- _lastAppliedHash = newHash;
+}
+
+long long BackgroundSync::getLastAppliedHash() const {
+ stdx::lock_guard<stdx::mutex> lck(_mutex);
+ return _lastAppliedHash;
+}
+
+void BackgroundSync::clearBuffer() {
+ _buffer.clear();
+}
+
+void BackgroundSync::setLastAppliedHash(long long newHash) {
+ stdx::lock_guard<stdx::mutex> lck(_mutex);
+ _lastAppliedHash = newHash;
+}
+
+void BackgroundSync::loadLastAppliedHash(OperationContext* txn) {
+ long long result = _readLastAppliedHash(txn);
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _lastAppliedHash = result;
+}
+
+long long BackgroundSync::_readLastAppliedHash(OperationContext* txn) {
+ BSONObj oplogEntry;
+ try {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock lk(txn->lockState(), "local", MODE_X);
+ bool success = Helpers::getLast(txn, rsOplogName.c_str(), oplogEntry);
+ if (!success) {
+ // This can happen when we are to do an initial sync. lastHash will be set
+ // after the initial sync is complete.
+ return 0;
+ }
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "readLastAppliedHash", rsOplogName);
+ } catch (const DBException& ex) {
+ severe() << "Problem reading " << rsOplogName << ": " << ex.toStatus();
+ fassertFailed(18904);
}
-
- void BackgroundSync::loadLastAppliedHash(OperationContext* txn) {
- long long result = _readLastAppliedHash(txn);
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- _lastAppliedHash = result;
+ BSONElement hashElement = oplogEntry[hashFieldName];
+ if (hashElement.eoo()) {
+ severe() << "Most recent entry in " << rsOplogName << " missing \"" << hashFieldName
+ << "\" field";
+ fassertFailed(18902);
}
-
- long long BackgroundSync::_readLastAppliedHash(OperationContext* txn) {
- BSONObj oplogEntry;
- try {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock lk(txn->lockState(), "local", MODE_X);
- bool success = Helpers::getLast(txn, rsOplogName.c_str(), oplogEntry);
- if (!success) {
- // This can happen when we are to do an initial sync. lastHash will be set
- // after the initial sync is complete.
- return 0;
- }
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "readLastAppliedHash", rsOplogName);
- }
- catch (const DBException& ex) {
- severe() << "Problem reading " << rsOplogName << ": " << ex.toStatus();
- fassertFailed(18904);
- }
- BSONElement hashElement = oplogEntry[hashFieldName];
- if (hashElement.eoo()) {
- severe() << "Most recent entry in " << rsOplogName << " missing \"" << hashFieldName <<
- "\" field";
- fassertFailed(18902);
- }
- if (hashElement.type() != NumberLong) {
- severe() << "Expected type of \"" << hashFieldName << "\" in most recent " <<
- rsOplogName << " entry to have type NumberLong, but found " <<
- typeName(hashElement.type());
- fassertFailed(18903);
- }
- return hashElement.safeNumberLong();
+ if (hashElement.type() != NumberLong) {
+ severe() << "Expected type of \"" << hashFieldName << "\" in most recent " << rsOplogName
+ << " entry to have type NumberLong, but found " << typeName(hashElement.type());
+ fassertFailed(18903);
}
+ return hashElement.safeNumberLong();
+}
- bool BackgroundSync::getInitialSyncRequestedFlag() {
- stdx::lock_guard<stdx::mutex> lock(_initialSyncMutex);
- return _initialSyncRequestedFlag;
- }
+bool BackgroundSync::getInitialSyncRequestedFlag() {
+ stdx::lock_guard<stdx::mutex> lock(_initialSyncMutex);
+ return _initialSyncRequestedFlag;
+}
- void BackgroundSync::setInitialSyncRequestedFlag(bool value) {
- stdx::lock_guard<stdx::mutex> lock(_initialSyncMutex);
- _initialSyncRequestedFlag = value;
- }
+void BackgroundSync::setInitialSyncRequestedFlag(bool value) {
+ stdx::lock_guard<stdx::mutex> lock(_initialSyncMutex);
+ _initialSyncRequestedFlag = value;
+}
- void BackgroundSync::pushTestOpToBuffer(const BSONObj& op) {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- _buffer.push(op);
- }
+void BackgroundSync::pushTestOpToBuffer(const BSONObj& op) {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ _buffer.push(op);
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/bgsync.h b/src/mongo/db/repl/bgsync.h
index d1e7e7ea692..8fe0c9e2b02 100644
--- a/src/mongo/db/repl/bgsync.h
+++ b/src/mongo/db/repl/bgsync.h
@@ -37,158 +37,156 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace repl {
- class Member;
- class ReplicationCoordinator;
+class Member;
+class ReplicationCoordinator;
- // This interface exists to facilitate easier testing;
- // the test infrastructure implements these functions with stubs.
- class BackgroundSyncInterface {
- public:
- virtual ~BackgroundSyncInterface();
+// This interface exists to facilitate easier testing;
+// the test infrastructure implements these functions with stubs.
+class BackgroundSyncInterface {
+public:
+ virtual ~BackgroundSyncInterface();
- // Gets the head of the buffer, but does not remove it.
- // Returns true if an element was present at the head;
- // false if the queue was empty.
- virtual bool peek(BSONObj* op) = 0;
+ // Gets the head of the buffer, but does not remove it.
+ // Returns true if an element was present at the head;
+ // false if the queue was empty.
+ virtual bool peek(BSONObj* op) = 0;
- // Deletes objects in the queue;
- // called by sync thread after it has applied an op
- virtual void consume() = 0;
+ // Deletes objects in the queue;
+ // called by sync thread after it has applied an op
+ virtual void consume() = 0;
- // wait up to 1 second for more ops to appear
- virtual void waitForMore() = 0;
- };
+ // wait up to 1 second for more ops to appear
+ virtual void waitForMore() = 0;
+};
- /**
- * Lock order:
- * 1. rslock
- * 2. rwlock
- * 3. BackgroundSync::_mutex
- */
- class BackgroundSync : public BackgroundSyncInterface {
- public:
- // Allow index prefetching to be turned on/off
- enum IndexPrefetchConfig {
- PREFETCH_NONE=0, PREFETCH_ID_ONLY=1, PREFETCH_ALL=2
- };
+/**
+ * Lock order:
+ * 1. rslock
+ * 2. rwlock
+ * 3. BackgroundSync::_mutex
+ */
+class BackgroundSync : public BackgroundSyncInterface {
+public:
+ // Allow index prefetching to be turned on/off
+ enum IndexPrefetchConfig { PREFETCH_NONE = 0, PREFETCH_ID_ONLY = 1, PREFETCH_ALL = 2 };
- static BackgroundSync* get();
+ static BackgroundSync* get();
- // stop syncing (when this node becomes a primary, e.g.)
- void stop();
+ // stop syncing (when this node becomes a primary, e.g.)
+ void stop();
- void shutdown();
- void notify(OperationContext* txn);
+ void shutdown();
+ void notify(OperationContext* txn);
- // Blocks until _pause becomes true from a call to stop() or shutdown()
- void waitUntilPaused();
+ // Blocks until _pause becomes true from a call to stop() or shutdown()
+ void waitUntilPaused();
- virtual ~BackgroundSync() {}
+ virtual ~BackgroundSync() {}
- // starts the producer thread
- void producerThread();
- // starts the sync target notifying thread
- void notifierThread();
+ // starts the producer thread
+ void producerThread();
+ // starts the sync target notifying thread
+ void notifierThread();
- HostAndPort getSyncTarget();
+ HostAndPort getSyncTarget();
- // Interface implementation
+ // Interface implementation
- virtual bool peek(BSONObj* op);
- virtual void consume();
- virtual void clearSyncTarget();
- virtual void waitForMore();
+ virtual bool peek(BSONObj* op);
+ virtual void consume();
+ virtual void clearSyncTarget();
+ virtual void waitForMore();
- // For monitoring
- BSONObj getCounters();
+ // For monitoring
+ BSONObj getCounters();
- long long getLastAppliedHash() const;
- void setLastAppliedHash(long long oldH);
- void loadLastAppliedHash(OperationContext* txn);
+ long long getLastAppliedHash() const;
+ void setLastAppliedHash(long long oldH);
+ void loadLastAppliedHash(OperationContext* txn);
- // Clears any fetched and buffered oplog entries.
- void clearBuffer();
+ // Clears any fetched and buffered oplog entries.
+ void clearBuffer();
- bool getInitialSyncRequestedFlag();
- void setInitialSyncRequestedFlag(bool value);
+ bool getInitialSyncRequestedFlag();
+ void setInitialSyncRequestedFlag(bool value);
- void setIndexPrefetchConfig(const IndexPrefetchConfig cfg) {
- _indexPrefetchConfig = cfg;
- }
+ void setIndexPrefetchConfig(const IndexPrefetchConfig cfg) {
+ _indexPrefetchConfig = cfg;
+ }
- IndexPrefetchConfig getIndexPrefetchConfig() {
- return _indexPrefetchConfig;
- }
+ IndexPrefetchConfig getIndexPrefetchConfig() {
+ return _indexPrefetchConfig;
+ }
- // Testing related stuff
- void pushTestOpToBuffer(const BSONObj& op);
- private:
- static BackgroundSync *s_instance;
- // protects creation of s_instance
- static stdx::mutex s_mutex;
+ // Testing related stuff
+ void pushTestOpToBuffer(const BSONObj& op);
- // Production thread
- BlockingQueue<BSONObj> _buffer;
- OplogReader _syncSourceReader;
+private:
+ static BackgroundSync* s_instance;
+ // protects creation of s_instance
+ static stdx::mutex s_mutex;
- // _mutex protects all of the class variables except _syncSourceReader and _buffer
- mutable stdx::mutex _mutex;
+ // Production thread
+ BlockingQueue<BSONObj> _buffer;
+ OplogReader _syncSourceReader;
- OpTime _lastOpTimeFetched;
+ // _mutex protects all of the class variables except _syncSourceReader and _buffer
+ mutable stdx::mutex _mutex;
- // lastAppliedHash is used to generate a new hash for the following op, when primary.
- long long _lastAppliedHash;
- // lastFetchedHash is used to match ops to determine if we need to rollback, when
- // a secondary.
- long long _lastFetchedHash;
+ OpTime _lastOpTimeFetched;
- // if produce thread should be running
- bool _pause;
- stdx::condition_variable _pausedCondition;
- bool _appliedBuffer;
- stdx::condition_variable _appliedBufferCondition;
+ // lastAppliedHash is used to generate a new hash for the following op, when primary.
+ long long _lastAppliedHash;
+ // lastFetchedHash is used to match ops to determine if we need to rollback, when
+ // a secondary.
+ long long _lastFetchedHash;
- HostAndPort _syncSourceHost;
+ // if produce thread should be running
+ bool _pause;
+ stdx::condition_variable _pausedCondition;
+ bool _appliedBuffer;
+ stdx::condition_variable _appliedBufferCondition;
- BackgroundSync();
- BackgroundSync(const BackgroundSync& s);
- BackgroundSync operator=(const BackgroundSync& s);
+ HostAndPort _syncSourceHost;
- // Production thread
- void _producerThread();
- // Adds elements to the list, up to maxSize.
- void produce(OperationContext* txn);
- // Checks the criteria for rolling back and executes a rollback if warranted.
- bool _rollbackIfNeeded(OperationContext* txn, OplogReader& r);
+ BackgroundSync();
+ BackgroundSync(const BackgroundSync& s);
+ BackgroundSync operator=(const BackgroundSync& s);
- // Evaluate if the current sync target is still good
- bool shouldChangeSyncSource();
+ // Production thread
+ void _producerThread();
+ // Adds elements to the list, up to maxSize.
+ void produce(OperationContext* txn);
+ // Checks the criteria for rolling back and executes a rollback if warranted.
+ bool _rollbackIfNeeded(OperationContext* txn, OplogReader& r);
- // restart syncing
- void start(OperationContext* txn);
+ // Evaluate if the current sync target is still good
+ bool shouldChangeSyncSource();
- long long _readLastAppliedHash(OperationContext* txn);
+ // restart syncing
+ void start(OperationContext* txn);
- // A pointer to the replication coordinator running the show.
- ReplicationCoordinator* _replCoord;
+ long long _readLastAppliedHash(OperationContext* txn);
- // bool for indicating resync need on this node and the mutex that protects it
- // The resync command sets this flag; the Applier thread observes and clears it.
- bool _initialSyncRequestedFlag;
- stdx::mutex _initialSyncMutex;
+ // A pointer to the replication coordinator running the show.
+ ReplicationCoordinator* _replCoord;
- // This setting affects the Applier prefetcher behavior.
- IndexPrefetchConfig _indexPrefetchConfig;
+ // bool for indicating resync need on this node and the mutex that protects it
+ // The resync command sets this flag; the Applier thread observes and clears it.
+ bool _initialSyncRequestedFlag;
+ stdx::mutex _initialSyncMutex;
- };
+ // This setting affects the Applier prefetcher behavior.
+ IndexPrefetchConfig _indexPrefetchConfig;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/check_quorum_for_config_change.cpp b/src/mongo/db/repl/check_quorum_for_config_change.cpp
index 5e9899897ed..437dca34701 100644
--- a/src/mongo/db/repl/check_quorum_for_config_change.cpp
+++ b/src/mongo/db/repl/check_quorum_for_config_change.cpp
@@ -45,256 +45,247 @@
namespace mongo {
namespace repl {
- QuorumChecker::QuorumChecker(const ReplicaSetConfig* rsConfig, int myIndex)
- : _rsConfig(rsConfig),
- _myIndex(myIndex),
- _numResponses(1), // We "responded" to ourself already.
- _numElectable(0),
- _vetoStatus(Status::OK()),
- _finalStatus(ErrorCodes::CallbackCanceled, "Quorum check canceled") {
-
- invariant(myIndex < _rsConfig->getNumMembers());
- const MemberConfig& myConfig = _rsConfig->getMemberAt(_myIndex);
-
- if (myConfig.isVoter()) {
- _voters.push_back(myConfig.getHostAndPort());
- }
- if (myConfig.isElectable()) {
- _numElectable = 1;
- }
-
- if (hasReceivedSufficientResponses()) {
- _onQuorumCheckComplete();
- }
+QuorumChecker::QuorumChecker(const ReplicaSetConfig* rsConfig, int myIndex)
+ : _rsConfig(rsConfig),
+ _myIndex(myIndex),
+ _numResponses(1), // We "responded" to ourself already.
+ _numElectable(0),
+ _vetoStatus(Status::OK()),
+ _finalStatus(ErrorCodes::CallbackCanceled, "Quorum check canceled") {
+ invariant(myIndex < _rsConfig->getNumMembers());
+ const MemberConfig& myConfig = _rsConfig->getMemberAt(_myIndex);
+
+ if (myConfig.isVoter()) {
+ _voters.push_back(myConfig.getHostAndPort());
+ }
+ if (myConfig.isElectable()) {
+ _numElectable = 1;
}
- QuorumChecker::~QuorumChecker() {}
+ if (hasReceivedSufficientResponses()) {
+ _onQuorumCheckComplete();
+ }
+}
- std::vector<RemoteCommandRequest> QuorumChecker::getRequests() const {
- const bool isInitialConfig = _rsConfig->getConfigVersion() == 1;
- const MemberConfig& myConfig = _rsConfig->getMemberAt(_myIndex);
+QuorumChecker::~QuorumChecker() {}
- std::vector<RemoteCommandRequest> requests;
- if (hasReceivedSufficientResponses()) {
- return requests;
- }
+std::vector<RemoteCommandRequest> QuorumChecker::getRequests() const {
+ const bool isInitialConfig = _rsConfig->getConfigVersion() == 1;
+ const MemberConfig& myConfig = _rsConfig->getMemberAt(_myIndex);
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setSetName(_rsConfig->getReplSetName());
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(_rsConfig->getConfigVersion());
- hbArgs.setCheckEmpty(isInitialConfig);
- hbArgs.setSenderHost(myConfig.getHostAndPort());
- hbArgs.setSenderId(myConfig.getId());
- const BSONObj hbRequest = hbArgs.toBSON();
+ std::vector<RemoteCommandRequest> requests;
+ if (hasReceivedSufficientResponses()) {
+ return requests;
+ }
- // Send a bunch of heartbeat requests.
- // Schedule an operation when a "sufficient" number of them have completed, and use that
- // to compute the quorum check results.
- // Wait for the "completion" callback to finish, and then it's OK to return the results.
- for (int i = 0; i < _rsConfig->getNumMembers(); ++i) {
- if (_myIndex == i) {
- // No need to check self for liveness or unreadiness.
- continue;
- }
- requests.push_back(RemoteCommandRequest(
- _rsConfig->getMemberAt(i).getHostAndPort(),
- "admin",
- hbRequest,
- _rsConfig->getHeartbeatTimeoutPeriodMillis()));
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setSetName(_rsConfig->getReplSetName());
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(_rsConfig->getConfigVersion());
+ hbArgs.setCheckEmpty(isInitialConfig);
+ hbArgs.setSenderHost(myConfig.getHostAndPort());
+ hbArgs.setSenderId(myConfig.getId());
+ const BSONObj hbRequest = hbArgs.toBSON();
+
+ // Send a bunch of heartbeat requests.
+ // Schedule an operation when a "sufficient" number of them have completed, and use that
+ // to compute the quorum check results.
+ // Wait for the "completion" callback to finish, and then it's OK to return the results.
+ for (int i = 0; i < _rsConfig->getNumMembers(); ++i) {
+ if (_myIndex == i) {
+ // No need to check self for liveness or unreadiness.
+ continue;
}
-
- return requests;
+ requests.push_back(RemoteCommandRequest(_rsConfig->getMemberAt(i).getHostAndPort(),
+ "admin",
+ hbRequest,
+ _rsConfig->getHeartbeatTimeoutPeriodMillis()));
}
- void QuorumChecker::processResponse(
- const RemoteCommandRequest& request,
- const ResponseStatus& response) {
+ return requests;
+}
- _tabulateHeartbeatResponse(request, response);
- if (hasReceivedSufficientResponses()) {
- _onQuorumCheckComplete();
- }
+void QuorumChecker::processResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response) {
+ _tabulateHeartbeatResponse(request, response);
+ if (hasReceivedSufficientResponses()) {
+ _onQuorumCheckComplete();
}
+}
- void QuorumChecker::_onQuorumCheckComplete() {
- if (!_vetoStatus.isOK()) {
- _finalStatus = _vetoStatus;
- return;
+void QuorumChecker::_onQuorumCheckComplete() {
+ if (!_vetoStatus.isOK()) {
+ _finalStatus = _vetoStatus;
+ return;
+ }
+ if (_rsConfig->getConfigVersion() == 1 && !_badResponses.empty()) {
+ str::stream message;
+ message << "replSetInitiate quorum check failed because not all proposed set members "
+ "responded affirmatively: ";
+ for (std::vector<std::pair<HostAndPort, Status>>::const_iterator it = _badResponses.begin();
+ it != _badResponses.end();
+ ++it) {
+ if (it != _badResponses.begin()) {
+ message << ", ";
+ }
+ message << it->first.toString() << " failed with " << it->second.reason();
}
- if (_rsConfig->getConfigVersion() == 1 && !_badResponses.empty()) {
- str::stream message;
- message << "replSetInitiate quorum check failed because not all proposed set members "
- "responded affirmatively: ";
- for (std::vector<std::pair<HostAndPort, Status> >::const_iterator it =
- _badResponses.begin();
- it != _badResponses.end();
- ++it) {
+ _finalStatus = Status(ErrorCodes::NodeNotFound, message);
+ return;
+ }
+ if (_numElectable == 0) {
+ _finalStatus = Status(ErrorCodes::NodeNotFound,
+ "Quorum check failed because no "
+ "electable nodes responded; at least one required for config");
+ return;
+ }
+ if (int(_voters.size()) < _rsConfig->getMajorityVoteCount()) {
+ str::stream message;
+ message << "Quorum check failed because not enough voting nodes responded; required "
+ << _rsConfig->getMajorityVoteCount() << " but ";
+
+ if (_voters.size() == 0) {
+ message << "none responded";
+ } else {
+ message << "only the following " << _voters.size()
+ << " voting nodes responded: " << _voters.front().toString();
+ for (size_t i = 1; i < _voters.size(); ++i) {
+ message << ", " << _voters[i].toString();
+ }
+ }
+ if (!_badResponses.empty()) {
+ message << "; the following nodes did not respond affirmatively: ";
+ for (std::vector<std::pair<HostAndPort, Status>>::const_iterator it =
+ _badResponses.begin();
+ it != _badResponses.end();
+ ++it) {
if (it != _badResponses.begin()) {
message << ", ";
}
message << it->first.toString() << " failed with " << it->second.reason();
}
- _finalStatus = Status(ErrorCodes::NodeNotFound, message);
- return;
}
- if (_numElectable == 0) {
- _finalStatus = Status(
- ErrorCodes::NodeNotFound, "Quorum check failed because no "
- "electable nodes responded; at least one required for config");
- return;
- }
- if (int(_voters.size()) < _rsConfig->getMajorityVoteCount()) {
- str::stream message;
- message << "Quorum check failed because not enough voting nodes responded; required " <<
- _rsConfig->getMajorityVoteCount() << " but ";
-
- if (_voters.size() == 0) {
- message << "none responded";
- }
- else {
- message << "only the following " << _voters.size() <<
- " voting nodes responded: " << _voters.front().toString();
- for (size_t i = 1; i < _voters.size(); ++i) {
- message << ", " << _voters[i].toString();
- }
- }
- if (!_badResponses.empty()) {
- message << "; the following nodes did not respond affirmatively: ";
- for (std::vector<std::pair<HostAndPort, Status> >::const_iterator it =
- _badResponses.begin();
- it != _badResponses.end();
- ++it) {
- if (it != _badResponses.begin()) {
- message << ", ";
- }
- message << it->first.toString() << " failed with " << it->second.reason();
- }
- }
- _finalStatus = Status(ErrorCodes::NodeNotFound, message);
- return;
- }
- _finalStatus = Status::OK();
+ _finalStatus = Status(ErrorCodes::NodeNotFound, message);
+ return;
+ }
+ _finalStatus = Status::OK();
+}
+
+void QuorumChecker::_tabulateHeartbeatResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response) {
+ ++_numResponses;
+ if (!response.isOK()) {
+ warning() << "Failed to complete heartbeat request to " << request.target << "; "
+ << response.getStatus();
+ _badResponses.push_back(std::make_pair(request.target, response.getStatus()));
+ return;
}
- void QuorumChecker::_tabulateHeartbeatResponse(
- const RemoteCommandRequest& request,
- const ResponseStatus& response) {
+ BSONObj resBSON = response.getValue().data;
+ ReplSetHeartbeatResponse hbResp;
+ Status hbStatus = hbResp.initialize(resBSON, 0);
- ++_numResponses;
- if (!response.isOK()) {
- warning() << "Failed to complete heartbeat request to " << request.target <<
- "; " << response.getStatus();
- _badResponses.push_back(std::make_pair(request.target, response.getStatus()));
- return;
- }
+ if (hbStatus.code() == ErrorCodes::InconsistentReplicaSetNames) {
+ std::string message = str::stream() << "Our set name did not match that of "
+ << request.target.toString();
+ _vetoStatus = Status(ErrorCodes::NewReplicaSetConfigurationIncompatible, message);
+ warning() << message;
+ return;
+ }
- BSONObj resBSON = response.getValue().data;
- ReplSetHeartbeatResponse hbResp;
- Status hbStatus = hbResp.initialize(resBSON, 0);
+ if (!hbStatus.isOK() && hbStatus != ErrorCodes::InvalidReplicaSetConfig) {
+ warning() << "Got error (" << hbStatus << ") response on heartbeat request to "
+ << request.target << "; " << hbResp;
+ _badResponses.push_back(std::make_pair(request.target, hbStatus));
+ return;
+ }
- if (hbStatus.code() == ErrorCodes::InconsistentReplicaSetNames) {
- std::string message = str::stream() << "Our set name did not match that of " <<
- request.target.toString();
+ if (!hbResp.getReplicaSetName().empty()) {
+ if (hbResp.getConfigVersion() >= _rsConfig->getConfigVersion()) {
+ std::string message = str::stream()
+ << "Our config version of " << _rsConfig->getConfigVersion()
+ << " is no larger than the version on " << request.target.toString()
+ << ", which is " << hbResp.getConfigVersion();
_vetoStatus = Status(ErrorCodes::NewReplicaSetConfigurationIncompatible, message);
warning() << message;
return;
}
+ }
- if (!hbStatus.isOK() && hbStatus != ErrorCodes::InvalidReplicaSetConfig) {
- warning() << "Got error (" << hbStatus
- << ") response on heartbeat request to " << request.target
- << "; " << hbResp;
- _badResponses.push_back(std::make_pair(request.target, hbStatus));
- return;
- }
-
- if (!hbResp.getReplicaSetName().empty()) {
- if (hbResp.getConfigVersion() >= _rsConfig->getConfigVersion()) {
- std::string message = str::stream() << "Our config version of " <<
- _rsConfig->getConfigVersion() <<
- " is no larger than the version on " << request.target.toString() <<
- ", which is " << hbResp.getConfigVersion();
- _vetoStatus = Status(ErrorCodes::NewReplicaSetConfigurationIncompatible, message);
- warning() << message;
- return;
- }
- }
-
- const bool isInitialConfig = _rsConfig->getConfigVersion() == 1;
- if (isInitialConfig && hbResp.hasData()) {
- std::string message = str::stream() << "'" << request.target.toString()
- << "' has data already, cannot initiate set.";
- _vetoStatus = Status(ErrorCodes::CannotInitializeNodeWithData, message);
- warning() << message;
- return;
- }
-
- for (int i = 0; i < _rsConfig->getNumMembers(); ++i) {
- const MemberConfig& memberConfig = _rsConfig->getMemberAt(i);
- if (memberConfig.getHostAndPort() != request.target) {
- continue;
- }
- if (memberConfig.isElectable()) {
- ++_numElectable;
- }
- if (memberConfig.isVoter()) {
- _voters.push_back(request.target);
- }
- return;
- }
- invariant(false);
+ const bool isInitialConfig = _rsConfig->getConfigVersion() == 1;
+ if (isInitialConfig && hbResp.hasData()) {
+ std::string message = str::stream() << "'" << request.target.toString()
+ << "' has data already, cannot initiate set.";
+ _vetoStatus = Status(ErrorCodes::CannotInitializeNodeWithData, message);
+ warning() << message;
+ return;
}
- bool QuorumChecker::hasReceivedSufficientResponses() const {
- if (!_vetoStatus.isOK() || _numResponses == _rsConfig->getNumMembers()) {
- // Vetoed or everybody has responded. All done.
- return true;
- }
- if (_rsConfig->getConfigVersion() == 1) {
- // Have not received responses from every member, and the proposed config
- // version is 1 (initial configuration). Keep waiting.
- return false;
+ for (int i = 0; i < _rsConfig->getNumMembers(); ++i) {
+ const MemberConfig& memberConfig = _rsConfig->getMemberAt(i);
+ if (memberConfig.getHostAndPort() != request.target) {
+ continue;
}
- if (_numElectable == 0) {
- // Have not heard from at least one electable node. Keep waiting.
- return false;
+ if (memberConfig.isElectable()) {
+ ++_numElectable;
}
- if (int(_voters.size()) < _rsConfig->getMajorityVoteCount()) {
- // Have not heard from a majority of voters. Keep waiting.
- return false;
+ if (memberConfig.isVoter()) {
+ _voters.push_back(request.target);
}
+ return;
+ }
+ invariant(false);
+}
- // Have heard from a majority of voters and one electable node. All done.
+bool QuorumChecker::hasReceivedSufficientResponses() const {
+ if (!_vetoStatus.isOK() || _numResponses == _rsConfig->getNumMembers()) {
+ // Vetoed or everybody has responded. All done.
return true;
}
-
- Status checkQuorumGeneral(ReplicationExecutor* executor,
- const ReplicaSetConfig& rsConfig,
- const int myIndex) {
- QuorumChecker checker(&rsConfig, myIndex);
- ScatterGatherRunner runner(&checker);
- Status status = runner.run(executor);
- if (!status.isOK()) {
- return status;
- }
-
- return checker.getFinalStatus();
+ if (_rsConfig->getConfigVersion() == 1) {
+ // Have not received responses from every member, and the proposed config
+ // version is 1 (initial configuration). Keep waiting.
+ return false;
}
-
- Status checkQuorumForInitiate(ReplicationExecutor* executor,
- const ReplicaSetConfig& rsConfig,
- const int myIndex) {
- invariant(rsConfig.getConfigVersion() == 1);
- return checkQuorumGeneral(executor, rsConfig, myIndex);
+ if (_numElectable == 0) {
+ // Have not heard from at least one electable node. Keep waiting.
+ return false;
+ }
+ if (int(_voters.size()) < _rsConfig->getMajorityVoteCount()) {
+ // Have not heard from a majority of voters. Keep waiting.
+ return false;
}
- Status checkQuorumForReconfig(ReplicationExecutor* executor,
- const ReplicaSetConfig& rsConfig,
- const int myIndex) {
- invariant(rsConfig.getConfigVersion() > 1);
- return checkQuorumGeneral(executor, rsConfig, myIndex);
+ // Have heard from a majority of voters and one electable node. All done.
+ return true;
+}
+
+Status checkQuorumGeneral(ReplicationExecutor* executor,
+ const ReplicaSetConfig& rsConfig,
+ const int myIndex) {
+ QuorumChecker checker(&rsConfig, myIndex);
+ ScatterGatherRunner runner(&checker);
+ Status status = runner.run(executor);
+ if (!status.isOK()) {
+ return status;
}
+ return checker.getFinalStatus();
+}
+
+Status checkQuorumForInitiate(ReplicationExecutor* executor,
+ const ReplicaSetConfig& rsConfig,
+ const int myIndex) {
+ invariant(rsConfig.getConfigVersion() == 1);
+ return checkQuorumGeneral(executor, rsConfig, myIndex);
+}
+
+Status checkQuorumForReconfig(ReplicationExecutor* executor,
+ const ReplicaSetConfig& rsConfig,
+ const int myIndex) {
+ invariant(rsConfig.getConfigVersion() > 1);
+ return checkQuorumGeneral(executor, rsConfig, myIndex);
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/check_quorum_for_config_change.h b/src/mongo/db/repl/check_quorum_for_config_change.h
index c38a49106be..5d33c01f4c1 100644
--- a/src/mongo/db/repl/check_quorum_for_config_change.h
+++ b/src/mongo/db/repl/check_quorum_for_config_change.h
@@ -35,116 +35,118 @@
namespace mongo {
namespace repl {
- class ReplicaSetConfig;
+class ReplicaSetConfig;
+/**
+ * Quorum checking state machine.
+ *
+ * Usage: Construct a QuorumChecker, pass in a pointer to the configuration for which you're
+ * checking quorum, and the integer index of the member config representing the "executing"
+ * node. Use ScatterGatherRunner or otherwise execute a scatter-gather procedure as described
+ * in the class comment for the ScatterGatherAlgorithm class. After
+ * hasReceivedSufficientResponses() returns true, you may call getFinalStatus() to get the
+ * result of the quorum check.
+ */
+class QuorumChecker : public ScatterGatherAlgorithm {
+ MONGO_DISALLOW_COPYING(QuorumChecker);
+
+public:
/**
- * Quorum checking state machine.
+ * Constructs a QuorumChecker that is used to confirm that sufficient nodes are up to accept
+ * "rsConfig". "myIndex" is the index of the local node, which is assumed to be up.
*
- * Usage: Construct a QuorumChecker, pass in a pointer to the configuration for which you're
- * checking quorum, and the integer index of the member config representing the "executing"
- * node. Use ScatterGatherRunner or otherwise execute a scatter-gather procedure as described
- * in the class comment for the ScatterGatherAlgorithm class. After
- * hasReceivedSufficientResponses() returns true, you may call getFinalStatus() to get the
- * result of the quorum check.
+ * "rsConfig" must stay in scope until QuorumChecker's destructor completes.
*/
- class QuorumChecker : public ScatterGatherAlgorithm {
- MONGO_DISALLOW_COPYING(QuorumChecker);
- public:
- /**
- * Constructs a QuorumChecker that is used to confirm that sufficient nodes are up to accept
- * "rsConfig". "myIndex" is the index of the local node, which is assumed to be up.
- *
- * "rsConfig" must stay in scope until QuorumChecker's destructor completes.
- */
- QuorumChecker(const ReplicaSetConfig* rsConfig, int myIndex);
- virtual ~QuorumChecker();
-
- virtual std::vector<RemoteCommandRequest> getRequests() const;
- virtual void processResponse(
- const RemoteCommandRequest& request,
- const ResponseStatus& response);
-
- virtual bool hasReceivedSufficientResponses() const;
-
- Status getFinalStatus() const { return _finalStatus; }
-
- private:
- /**
- * Callback that executes after _haveReceivedSufficientReplies() becomes true.
- *
- * Computes the quorum result based on responses received so far, stores it into
- * _finalStatus, and enables QuorumChecker::run() to return.
- */
- void _onQuorumCheckComplete();
-
- /**
- * Updates the QuorumChecker state based on the data from a single heartbeat response.
- */
- void _tabulateHeartbeatResponse(const RemoteCommandRequest& request,
- const ResponseStatus& response);
-
- // Pointer to the replica set configuration for which we're checking quorum.
- const ReplicaSetConfig* const _rsConfig;
-
- // Index of the local node's member configuration in _rsConfig.
- const int _myIndex;
-
- // List of voting nodes that have responded affirmatively.
- std::vector<HostAndPort> _voters;
-
- // List of nodes with bad responses and the bad response status they returned.
- std::vector<std::pair<HostAndPort, Status> > _badResponses;
-
- // Total number of responses and timeouts processed.
- int _numResponses;
-
- // Number of electable nodes that have responded affirmatively.
- int _numElectable;
-
- // Set to a non-OK status if a response from a remote node indicates
- // that the quorum check should definitely fail, such as because of
- // a replica set name mismatch.
- Status _vetoStatus;
-
- // Final status of the quorum check, returned by run().
- Status _finalStatus;
- };
+ QuorumChecker(const ReplicaSetConfig* rsConfig, int myIndex);
+ virtual ~QuorumChecker();
+
+ virtual std::vector<RemoteCommandRequest> getRequests() const;
+ virtual void processResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response);
+
+ virtual bool hasReceivedSufficientResponses() const;
+
+ Status getFinalStatus() const {
+ return _finalStatus;
+ }
+private:
/**
- * Performs a quorum call to determine if a sufficient number of nodes are up
- * to initiate a replica set with configuration "rsConfig".
+ * Callback that executes after _haveReceivedSufficientReplies() becomes true.
*
- * "myIndex" is the index of this node's member configuration in "rsConfig".
- * "executor" is the event loop in which to schedule network/aysnchronous processing.
- *
- * For purposes of initiate, a quorum is only met if all of the following conditions
- * are met:
- * - All nodes respond.
- * - No nodes other than the node running the quorum check have data.
- * - No nodes are already joined to a replica set.
- * - No node reports a replica set name other than the one in "rsConfig".
+ * Computes the quorum result based on responses received so far, stores it into
+ * _finalStatus, and enables QuorumChecker::run() to return.
*/
- Status checkQuorumForInitiate(ReplicationExecutor* executor,
- const ReplicaSetConfig& rsConfig,
- const int myIndex);
+ void _onQuorumCheckComplete();
/**
- * Performs a quorum call to determine if a sufficient number of nodes are up
- * to replace the current replica set configuration with "rsConfig".
- *
- * "myIndex" is the index of this node's member configuration in "rsConfig".
- * "executor" is the event loop in which to schedule network/aysnchronous processing.
- *
- * For purposes of reconfig, a quorum is only met if all of the following conditions
- * are met:
- * - A majority of voting nodes respond.
- * - At least one electable node responds.
- * - No responding node reports a replica set name other than the one in "rsConfig".
- * - All responding nodes report a config version less than the one in "rsConfig".
+ * Updates the QuorumChecker state based on the data from a single heartbeat response.
*/
- Status checkQuorumForReconfig(ReplicationExecutor* executor,
- const ReplicaSetConfig& rsConfig,
- const int myIndex);
+ void _tabulateHeartbeatResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response);
+
+ // Pointer to the replica set configuration for which we're checking quorum.
+ const ReplicaSetConfig* const _rsConfig;
+
+ // Index of the local node's member configuration in _rsConfig.
+ const int _myIndex;
+
+ // List of voting nodes that have responded affirmatively.
+ std::vector<HostAndPort> _voters;
+
+ // List of nodes with bad responses and the bad response status they returned.
+ std::vector<std::pair<HostAndPort, Status>> _badResponses;
+
+ // Total number of responses and timeouts processed.
+ int _numResponses;
+
+ // Number of electable nodes that have responded affirmatively.
+ int _numElectable;
+
+ // Set to a non-OK status if a response from a remote node indicates
+ // that the quorum check should definitely fail, such as because of
+ // a replica set name mismatch.
+ Status _vetoStatus;
+
+ // Final status of the quorum check, returned by run().
+ Status _finalStatus;
+};
+
+/**
+ * Performs a quorum call to determine if a sufficient number of nodes are up
+ * to initiate a replica set with configuration "rsConfig".
+ *
+ * "myIndex" is the index of this node's member configuration in "rsConfig".
+ * "executor" is the event loop in which to schedule network/aysnchronous processing.
+ *
+ * For purposes of initiate, a quorum is only met if all of the following conditions
+ * are met:
+ * - All nodes respond.
+ * - No nodes other than the node running the quorum check have data.
+ * - No nodes are already joined to a replica set.
+ * - No node reports a replica set name other than the one in "rsConfig".
+ */
+Status checkQuorumForInitiate(ReplicationExecutor* executor,
+ const ReplicaSetConfig& rsConfig,
+ const int myIndex);
+
+/**
+ * Performs a quorum call to determine if a sufficient number of nodes are up
+ * to replace the current replica set configuration with "rsConfig".
+ *
+ * "myIndex" is the index of this node's member configuration in "rsConfig".
+ * "executor" is the event loop in which to schedule network/aysnchronous processing.
+ *
+ * For purposes of reconfig, a quorum is only met if all of the following conditions
+ * are met:
+ * - A majority of voting nodes respond.
+ * - At least one electable node responds.
+ * - No responding node reports a replica set name other than the one in "rsConfig".
+ * - All responding nodes report a config version less than the one in "rsConfig".
+ */
+Status checkQuorumForReconfig(ReplicationExecutor* executor,
+ const ReplicaSetConfig& rsConfig,
+ const int myIndex);
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/check_quorum_for_config_change_test.cpp b/src/mongo/db/repl/check_quorum_for_config_change_test.cpp
index 8f0e01ec00b..7b3d869b47b 100644
--- a/src/mongo/db/repl/check_quorum_for_config_change_test.cpp
+++ b/src/mongo/db/repl/check_quorum_for_config_change_test.cpp
@@ -45,756 +45,770 @@
#include "mongo/unittest/unittest.h"
#include "mongo/util/net/hostandport.h"
-#define ASSERT_REASON_CONTAINS(STATUS, PATTERN) do { \
- const mongo::Status s_ = (STATUS); \
- ASSERT_FALSE(s_.reason().find(PATTERN) == std::string::npos) << \
- #STATUS ".reason() == " << s_.reason(); \
+#define ASSERT_REASON_CONTAINS(STATUS, PATTERN) \
+ do { \
+ const mongo::Status s_ = (STATUS); \
+ ASSERT_FALSE(s_.reason().find(PATTERN) == std::string::npos) \
+ << #STATUS ".reason() == " << s_.reason(); \
} while (false)
-#define ASSERT_NOT_REASON_CONTAINS(STATUS, PATTERN) do { \
- const mongo::Status s_ = (STATUS); \
- ASSERT_TRUE(s_.reason().find(PATTERN) == std::string::npos) << \
- #STATUS ".reason() == " << s_.reason(); \
+#define ASSERT_NOT_REASON_CONTAINS(STATUS, PATTERN) \
+ do { \
+ const mongo::Status s_ = (STATUS); \
+ ASSERT_TRUE(s_.reason().find(PATTERN) == std::string::npos) \
+ << #STATUS ".reason() == " << s_.reason(); \
} while (false)
namespace mongo {
namespace repl {
namespace {
- using executor::NetworkInterfaceMock;
-
- class CheckQuorumTest : public mongo::unittest::Test {
- protected:
- CheckQuorumTest();
-
- void startQuorumCheck(const ReplicaSetConfig& config, int myIndex);
- Status waitForQuorumCheck();
- bool isQuorumCheckDone();
-
- NetworkInterfaceMock* _net;
- StorageInterfaceMock* _storage;
- std::unique_ptr<ReplicationExecutor> _executor;
-
- private:
- void setUp();
- void tearDown();
-
- void _runQuorumCheck(const ReplicaSetConfig& config, int myIndex);
- virtual Status _runQuorumCheckImpl(const ReplicaSetConfig& config, int myIndex) = 0;
-
- std::unique_ptr<stdx::thread> _executorThread;
- std::unique_ptr<stdx::thread> _quorumCheckThread;
- Status _quorumCheckStatus;
- stdx::mutex _mutex;
- bool _isQuorumCheckDone;
- };
-
- CheckQuorumTest::CheckQuorumTest() :
- _quorumCheckStatus(ErrorCodes::InternalError, "Not executed") {
- }
-
- void CheckQuorumTest::setUp() {
- _net = new NetworkInterfaceMock;
- _storage = new StorageInterfaceMock;
- _executor.reset(new ReplicationExecutor(_net, _storage, 1 /* prng */ ));
- _executorThread.reset(new stdx::thread(stdx::bind(&ReplicationExecutor::run,
- _executor.get())));
- }
-
- void CheckQuorumTest::tearDown() {
- _executor->shutdown();
- _executorThread->join();
- }
-
- void CheckQuorumTest::startQuorumCheck(const ReplicaSetConfig& config, int myIndex) {
- ASSERT_FALSE(_quorumCheckThread);
- _isQuorumCheckDone = false;
- _quorumCheckThread.reset(new stdx::thread(stdx::bind(&CheckQuorumTest::_runQuorumCheck,
- this,
- config,
- myIndex)));
- }
-
- Status CheckQuorumTest::waitForQuorumCheck() {
- ASSERT_TRUE(_quorumCheckThread);
- _quorumCheckThread->join();
- return _quorumCheckStatus;
+using executor::NetworkInterfaceMock;
+
+class CheckQuorumTest : public mongo::unittest::Test {
+protected:
+ CheckQuorumTest();
+
+ void startQuorumCheck(const ReplicaSetConfig& config, int myIndex);
+ Status waitForQuorumCheck();
+ bool isQuorumCheckDone();
+
+ NetworkInterfaceMock* _net;
+ StorageInterfaceMock* _storage;
+ std::unique_ptr<ReplicationExecutor> _executor;
+
+private:
+ void setUp();
+ void tearDown();
+
+ void _runQuorumCheck(const ReplicaSetConfig& config, int myIndex);
+ virtual Status _runQuorumCheckImpl(const ReplicaSetConfig& config, int myIndex) = 0;
+
+ std::unique_ptr<stdx::thread> _executorThread;
+ std::unique_ptr<stdx::thread> _quorumCheckThread;
+ Status _quorumCheckStatus;
+ stdx::mutex _mutex;
+ bool _isQuorumCheckDone;
+};
+
+CheckQuorumTest::CheckQuorumTest()
+ : _quorumCheckStatus(ErrorCodes::InternalError, "Not executed") {}
+
+void CheckQuorumTest::setUp() {
+ _net = new NetworkInterfaceMock;
+ _storage = new StorageInterfaceMock;
+ _executor.reset(new ReplicationExecutor(_net, _storage, 1 /* prng */));
+ _executorThread.reset(new stdx::thread(stdx::bind(&ReplicationExecutor::run, _executor.get())));
+}
+
+void CheckQuorumTest::tearDown() {
+ _executor->shutdown();
+ _executorThread->join();
+}
+
+void CheckQuorumTest::startQuorumCheck(const ReplicaSetConfig& config, int myIndex) {
+ ASSERT_FALSE(_quorumCheckThread);
+ _isQuorumCheckDone = false;
+ _quorumCheckThread.reset(
+ new stdx::thread(stdx::bind(&CheckQuorumTest::_runQuorumCheck, this, config, myIndex)));
+}
+
+Status CheckQuorumTest::waitForQuorumCheck() {
+ ASSERT_TRUE(_quorumCheckThread);
+ _quorumCheckThread->join();
+ return _quorumCheckStatus;
+}
+
+bool CheckQuorumTest::isQuorumCheckDone() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _isQuorumCheckDone;
+}
+
+void CheckQuorumTest::_runQuorumCheck(const ReplicaSetConfig& config, int myIndex) {
+ _quorumCheckStatus = _runQuorumCheckImpl(config, myIndex);
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _isQuorumCheckDone = true;
+}
+
+class CheckQuorumForInitiate : public CheckQuorumTest {
+private:
+ virtual Status _runQuorumCheckImpl(const ReplicaSetConfig& config, int myIndex) {
+ return checkQuorumForInitiate(_executor.get(), config, myIndex);
}
+};
- bool CheckQuorumTest::isQuorumCheckDone() {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- return _isQuorumCheckDone;
+class CheckQuorumForReconfig : public CheckQuorumTest {
+protected:
+ virtual Status _runQuorumCheckImpl(const ReplicaSetConfig& config, int myIndex) {
+ return checkQuorumForReconfig(_executor.get(), config, myIndex);
}
-
- void CheckQuorumTest::_runQuorumCheck(const ReplicaSetConfig& config, int myIndex) {
- _quorumCheckStatus = _runQuorumCheckImpl(config, myIndex);
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- _isQuorumCheckDone = true;
- }
-
- class CheckQuorumForInitiate : public CheckQuorumTest {
- private:
- virtual Status _runQuorumCheckImpl(const ReplicaSetConfig& config, int myIndex) {
- return checkQuorumForInitiate(_executor.get(), config, myIndex);
- }
- };
-
- class CheckQuorumForReconfig : public CheckQuorumTest {
- protected:
- virtual Status _runQuorumCheckImpl(const ReplicaSetConfig& config, int myIndex) {
- return checkQuorumForReconfig(_executor.get(), config, myIndex);
- }
- };
-
- ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(configBson));
- ASSERT_OK(config.validate());
- return config;
+};
+
+ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(configBson));
+ ASSERT_OK(config.validate());
+ return config;
+}
+
+TEST_F(CheckQuorumForInitiate, ValidSingleNodeSet) {
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1"))));
+ startQuorumCheck(config, 0);
+ ASSERT_OK(waitForQuorumCheck());
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckCanceledByShutdown) {
+ _executor->shutdown();
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1"))));
+ startQuorumCheck(config, 0);
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, waitForQuorumCheck());
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToSeveralDownNodes) {
+ // In this test, "we" are host "h3:1". All other nodes time out on
+ // their heartbeat request, and so the quorum check for initiate
+ // will fail because some members were unavailable.
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1") << BSON("_id" << 5 << "host"
+ << "h5:1"))));
+ startQuorumCheck(config, 2);
+ _net->enterNetwork();
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = config.getNumMembers() - 1;
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ _net->scheduleResponse(_net->getNextReadyRequest(),
+ startDate + Milliseconds(10),
+ ResponseStatus(ErrorCodes::NoSuchKey, "No reply"));
}
-
- TEST_F(CheckQuorumForInitiate, ValidSingleNodeSet) {
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1"))));
- startQuorumCheck(config, 0);
- ASSERT_OK(waitForQuorumCheck());
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
+ ASSERT_REASON_CONTAINS(
+ status, "replSetInitiate quorum check failed because not all proposed set members");
+ ASSERT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_REASON_CONTAINS(status, "h5:1");
+}
+
+const BSONObj makeHeartbeatRequest(const ReplicaSetConfig& rsConfig, int myConfigIndex) {
+ const MemberConfig& myConfig = rsConfig.getMemberAt(myConfigIndex);
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setSetName(rsConfig.getReplSetName());
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(rsConfig.getConfigVersion());
+ hbArgs.setCheckEmpty(rsConfig.getConfigVersion() == 1);
+ hbArgs.setSenderHost(myConfig.getHostAndPort());
+ hbArgs.setSenderId(myConfig.getId());
+ return hbArgs.toBSON();
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckSuccessForFiveNodes) {
+ // In this test, "we" are host "h3:1". All nodes respond successfully to their heartbeat
+ // requests, and the quorum check succeeds.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1") << BSON("_id" << 5 << "host"
+ << "h5:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(8))));
}
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckCanceledByShutdown) {
- _executor->shutdown();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1"))));
- startQuorumCheck(config, 0);
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, waitForQuorumCheck());
- }
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToSeveralDownNodes) {
- // In this test, "we" are host "h3:1". All other nodes time out on
- // their heartbeat request, and so the quorum check for initiate
- // will fail because some members were unavailable.
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1"))));
- startQuorumCheck(config, 2);
- _net->enterNetwork();
- const Date_t startDate = _net->now();
- const int numCommandsExpected = config.getNumMembers() - 1;
- for (int i = 0; i < numCommandsExpected; ++i) {
- _net->scheduleResponse(_net->getNextReadyRequest(),
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ ASSERT_OK(waitForQuorumCheck());
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToOneDownNode) {
+ // In this test, "we" are host "h3:1". All nodes except "h2:1" respond
+ // successfully to their heartbeat requests, but quorum check fails because
+ // all nodes must be available for initiate. This is so even though "h2"
+ // is neither voting nor electable.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1"
+ << "priority" << 0 << "votes" << 0)
+ << BSON("_id" << 3 << "host"
+ << "h3:1") << BSON("_id" << 4 << "host"
+ << "h4:1")
+ << BSON("_id" << 5 << "host"
+ << "h5:1") << BSON("_id" << 6 << "host"
+ << "h6:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h2", 1)) {
+ _net->scheduleResponse(noi,
startDate + Milliseconds(10),
- ResponseStatus(ErrorCodes::NoSuchKey, "No reply"));
+ ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
+ } else {
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(8))));
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
- ASSERT_REASON_CONTAINS(
- status, "replSetInitiate quorum check failed because not all proposed set members");
- ASSERT_REASON_CONTAINS(status, "h1:1");
- ASSERT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_REASON_CONTAINS(status, "h4:1");
- ASSERT_REASON_CONTAINS(status, "h5:1");
}
-
- const BSONObj makeHeartbeatRequest(const ReplicaSetConfig& rsConfig, int myConfigIndex) {
- const MemberConfig& myConfig = rsConfig.getMemberAt(myConfigIndex);
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setSetName(rsConfig.getReplSetName());
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(rsConfig.getConfigVersion());
- hbArgs.setCheckEmpty(rsConfig.getConfigVersion() == 1);
- hbArgs.setSenderHost(myConfig.getHostAndPort());
- hbArgs.setSenderId(myConfig.getId());
- return hbArgs.toBSON();
- }
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckSuccessForFiveNodes) {
- // In this test, "we" are host "h3:1". All nodes respond successfully to their heartbeat
- // requests, and the quorum check succeeds.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
+ ASSERT_REASON_CONTAINS(
+ status, "replSetInitiate quorum check failed because not all proposed set members");
+ ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h6:1");
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToSetNameMismatch) {
+ // In this test, "we" are host "h3:1". All nodes respond
+ // successfully to their heartbeat requests, but quorum check fails because
+ // "h4" declares that the requested replica set name was not what it expected.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1") << BSON("_id" << 5 << "host"
+ << "h5:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h4", 1)) {
_net->scheduleResponse(noi,
startDate + Milliseconds(10),
ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1), Milliseconds(8))));
+ BSON("ok" << 0 << "mismatch" << true), Milliseconds(8))));
+ } else {
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(8))));
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- ASSERT_OK(waitForQuorumCheck());
}
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToOneDownNode) {
- // In this test, "we" are host "h3:1". All nodes except "h2:1" respond
- // successfully to their heartbeat requests, but quorum check fails because
- // all nodes must be available for initiate. This is so even though "h2"
- // is neither voting nor electable.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1" <<
- "priority" << 0 << "votes" << 0) <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1") <<
- BSON("_id" << 6 << "host" << "h6:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h2", 1)) {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
- }
- else {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1), Milliseconds(8))));
- }
- }
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
- ASSERT_REASON_CONTAINS(
- status, "replSetInitiate quorum check failed because not all proposed set members");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h6:1");
- }
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToSetNameMismatch) {
- // In this test, "we" are host "h3:1". All nodes respond
- // successfully to their heartbeat requests, but quorum check fails because
- // "h4" declares that the requested replica set name was not what it expected.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h4", 1)) {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 0 << "mismatch" << true),
- Milliseconds(8))));
- }
- else {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1), Milliseconds(8))));
- }
- }
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- ASSERT_REASON_CONTAINS(
- status, "Our set name did not match");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_REASON_CONTAINS(status, "h4:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
- }
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToInitializedNode) {
- // In this test, "we" are host "h3:1". All nodes respond
- // successfully to their heartbeat requests, but quorum check fails because
- // "h5" declares that it is already initialized.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h5", 1)) {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 0 <<
- "set" << "rs0" <<
- "v" << 1),
- Milliseconds(8))));
- }
- else {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1), Milliseconds(8))));
- }
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+ ASSERT_REASON_CONTAINS(status, "Our set name did not match");
+ ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToInitializedNode) {
+ // In this test, "we" are host "h3:1". All nodes respond
+ // successfully to their heartbeat requests, but quorum check fails because
+ // "h5" declares that it is already initialized.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1") << BSON("_id" << 5 << "host"
+ << "h5:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h5", 1)) {
+ _net->scheduleResponse(noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 0 << "set"
+ << "rs0"
+ << "v" << 1),
+ Milliseconds(8))));
+ } else {
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(8))));
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- ASSERT_REASON_CONTAINS(
- status, "Our config version of");
- ASSERT_REASON_CONTAINS(
- status, "is no larger than the version");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
- ASSERT_REASON_CONTAINS(status, "h5:1");
}
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToInitializedNodeOnlyOneRespondent) {
- // In this test, "we" are host "h3:1". Only node "h5" responds before the test completes,
- // and quorum check fails because "h5" declares that it is already initialized.
- //
- // Compare to QuorumCheckFailedDueToInitializedNode, above.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h5", 1)) {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 0 <<
- "set" << "rs0" <<
- "v" << 1),
- Milliseconds(8))));
- }
- else {
- _net->blackHole(noi);
- }
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+ ASSERT_REASON_CONTAINS(status, "Our config version of");
+ ASSERT_REASON_CONTAINS(status, "is no larger than the version");
+ ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_REASON_CONTAINS(status, "h5:1");
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToInitializedNodeOnlyOneRespondent) {
+ // In this test, "we" are host "h3:1". Only node "h5" responds before the test completes,
+ // and quorum check fails because "h5" declares that it is already initialized.
+ //
+ // Compare to QuorumCheckFailedDueToInitializedNode, above.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1") << BSON("_id" << 5 << "host"
+ << "h5:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h5", 1)) {
+ _net->scheduleResponse(noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 0 << "set"
+ << "rs0"
+ << "v" << 1),
+ Milliseconds(8))));
+ } else {
+ _net->blackHole(noi);
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- ASSERT_REASON_CONTAINS(
- status, "Our config version of");
- ASSERT_REASON_CONTAINS(
- status, "is no larger than the version");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
- ASSERT_REASON_CONTAINS(status, "h5:1");
}
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToNodeWithData) {
- // In this test, "we" are host "h3:1". Only node "h5" responds before the test completes,
- // and quorum check fails because "h5" declares that it has data already.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- ReplSetHeartbeatResponse hbResp;
- hbResp.setConfigVersion(0);
- hbResp.noteHasData();
- if (request.target == HostAndPort("h5", 1)) {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- hbResp.toBSON(false),
- Milliseconds(8))));
- }
- else {
- _net->blackHole(noi);
- }
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+ ASSERT_REASON_CONTAINS(status, "Our config version of");
+ ASSERT_REASON_CONTAINS(status, "is no larger than the version");
+ ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_REASON_CONTAINS(status, "h5:1");
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToNodeWithData) {
+ // In this test, "we" are host "h3:1". Only node "h5" responds before the test completes,
+ // and quorum check fails because "h5" declares that it has data already.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1") << BSON("_id" << 5 << "host"
+ << "h5:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setConfigVersion(0);
+ hbResp.noteHasData();
+ if (request.target == HostAndPort("h5", 1)) {
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(hbResp.toBSON(false), Milliseconds(8))));
+ } else {
+ _net->blackHole(noi);
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::CannotInitializeNodeWithData, status);
- ASSERT_REASON_CONTAINS(
- status, "has data already");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
- ASSERT_REASON_CONTAINS(status, "h5:1");
}
- TEST_F(CheckQuorumForReconfig, QuorumCheckVetoedDueToHigherConfigVersion) {
- // In this test, "we" are host "h3:1". The request to "h2" does not arrive before the end
- // of the test, and the request to "h1" comes back indicating a higher config version.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h1", 1)) {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 0 <<
- "set" << "rs0" <<
- "v" << 5),
- Milliseconds(8))));
- }
- else {
- _net->blackHole(noi);
- }
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::CannotInitializeNodeWithData, status);
+ ASSERT_REASON_CONTAINS(status, "has data already");
+ ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_REASON_CONTAINS(status, "h5:1");
+}
+TEST_F(CheckQuorumForReconfig, QuorumCheckVetoedDueToHigherConfigVersion) {
+ // In this test, "we" are host "h3:1". The request to "h2" does not arrive before the end
+ // of the test, and the request to "h1" comes back indicating a higher config version.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h1", 1)) {
+ _net->scheduleResponse(noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 0 << "set"
+ << "rs0"
+ << "v" << 5),
+ Milliseconds(8))));
+ } else {
+ _net->blackHole(noi);
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- ASSERT_REASON_CONTAINS(
- status, "Our config version of");
- ASSERT_REASON_CONTAINS(
- status, "is no larger than the version");
- ASSERT_REASON_CONTAINS(status, "h1:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
}
-
- TEST_F(CheckQuorumForReconfig, QuorumCheckVetoedDueToIncompatibleSetName) {
- // In this test, "we" are host "h3:1". The request to "h1" times out,
- // and the request to "h2" comes back indicating an incompatible set name.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h2", 1)) {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 0 << "mismatch" << true),
- Milliseconds(8))));
- }
- else {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
- }
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+ ASSERT_REASON_CONTAINS(status, "Our config version of");
+ ASSERT_REASON_CONTAINS(status, "is no larger than the version");
+ ASSERT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+}
+
+TEST_F(CheckQuorumForReconfig, QuorumCheckVetoedDueToIncompatibleSetName) {
+ // In this test, "we" are host "h3:1". The request to "h1" times out,
+ // and the request to "h2" comes back indicating an incompatible set name.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h2", 1)) {
+ _net->scheduleResponse(noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(
+ BSON("ok" << 0 << "mismatch" << true), Milliseconds(8))));
+ } else {
+ _net->scheduleResponse(noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- ASSERT_REASON_CONTAINS(status, "Our set name did not match");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
-
}
-
- TEST_F(CheckQuorumForReconfig, QuorumCheckFailsDueToInsufficientVoters) {
- // In this test, "we" are host "h4". Only "h1", "h2" and "h3" are voters,
- // and of the voters, only "h1" responds. As a result, quorum check fails.
- // "h5" also responds, but because it cannot vote, is irrelevant for the reconfig
- // quorum check.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1" << "votes" << 0) <<
- BSON("_id" << 5 << "host" << "h5:1" << "votes" << 0))));
- const int myConfigIndex = 3;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h1", 1) || request.target == HostAndPort("h5", 1)) {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(8))));
- }
- else {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
- }
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+ ASSERT_REASON_CONTAINS(status, "Our set name did not match");
+ ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+}
+
+TEST_F(CheckQuorumForReconfig, QuorumCheckFailsDueToInsufficientVoters) {
+ // In this test, "we" are host "h4". Only "h1", "h2" and "h3" are voters,
+ // and of the voters, only "h1" responds. As a result, quorum check fails.
+ // "h5" also responds, but because it cannot vote, is irrelevant for the reconfig
+ // quorum check.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1"
+ << "votes" << 0)
+ << BSON("_id" << 5 << "host"
+ << "h5:1"
+ << "votes" << 0))));
+ const int myConfigIndex = 3;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h1", 1) || request.target == HostAndPort("h5", 1)) {
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(8))));
+ } else {
+ _net->scheduleResponse(noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
- ASSERT_REASON_CONTAINS(status, "not enough voting nodes responded; required 2 but only");
- ASSERT_REASON_CONTAINS(status, "h1:1");
- ASSERT_REASON_CONTAINS(status, "h2:1 failed with");
- ASSERT_REASON_CONTAINS(status, "h3:1 failed with");
- ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
}
-
- TEST_F(CheckQuorumForReconfig, QuorumCheckFailsDueToNoElectableNodeResponding) {
- // In this test, "we" are host "h4". Only "h1", "h2" and "h3" are electable,
- // and none of them respond.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1" << "priority" << 0) <<
- BSON("_id" << 5 << "host" << "h5:1" << "priority" << 0))));
- const int myConfigIndex = 3;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h5", 1)) {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(8))));
- }
- else {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
- }
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
+ ASSERT_REASON_CONTAINS(status, "not enough voting nodes responded; required 2 but only");
+ ASSERT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_REASON_CONTAINS(status, "h2:1 failed with");
+ ASSERT_REASON_CONTAINS(status, "h3:1 failed with");
+ ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
+}
+
+TEST_F(CheckQuorumForReconfig, QuorumCheckFailsDueToNoElectableNodeResponding) {
+ // In this test, "we" are host "h4". Only "h1", "h2" and "h3" are electable,
+ // and none of them respond.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1"
+ << "priority" << 0)
+ << BSON("_id" << 5 << "host"
+ << "h5:1"
+ << "priority" << 0))));
+ const int myConfigIndex = 3;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h5", 1)) {
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(8))));
+ } else {
+ _net->scheduleResponse(noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
- ASSERT_REASON_CONTAINS(status, "no electable nodes responded");
}
-
- TEST_F(CheckQuorumForReconfig, QuorumCheckSucceedsWithAsSoonAsPossible) {
- // In this test, "we" are host "h4". Only "h1", "h2" and "h3" can vote.
- // This test should succeed as soon as h1 and h2 respond, so we block
- // h3 and h5 from responding or timing out until the test completes.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1" << "votes" << 0) <<
- BSON("_id" << 5 << "host" << "h5:1" << "votes" << 0))));
- const int myConfigIndex = 3;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h1", 1) || request.target == HostAndPort("h2", 1)) {
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(8))));
- }
- else {
- _net->blackHole(noi);
- }
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
+ ASSERT_REASON_CONTAINS(status, "no electable nodes responded");
+}
+
+TEST_F(CheckQuorumForReconfig, QuorumCheckSucceedsWithAsSoonAsPossible) {
+ // In this test, "we" are host "h4". Only "h1", "h2" and "h3" can vote.
+ // This test should succeed as soon as h1 and h2 respond, so we block
+ // h3 and h5 from responding or timing out until the test completes.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1"
+ << "votes" << 0)
+ << BSON("_id" << 5 << "host"
+ << "h5:1"
+ << "votes" << 0))));
+ const int myConfigIndex = 3;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h1", 1) || request.target == HostAndPort("h2", 1)) {
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(8))));
+ } else {
+ _net->blackHole(noi);
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- ASSERT_OK(waitForQuorumCheck());
}
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ ASSERT_OK(waitForQuorumCheck());
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/collection_cloner.cpp b/src/mongo/db/repl/collection_cloner.cpp
index 5abe3c2ed84..1f71fe762e7 100644
--- a/src/mongo/db/repl/collection_cloner.cpp
+++ b/src/mongo/db/repl/collection_cloner.cpp
@@ -39,273 +39,269 @@
namespace mongo {
namespace repl {
- CollectionCloner::CollectionCloner(ReplicationExecutor* executor,
- const HostAndPort& source,
- const NamespaceString& sourceNss,
- const CollectionOptions& options,
- const CallbackFn& onCompletion,
- StorageInterface* storageInterface)
- : _executor(executor),
- _source(source),
- _sourceNss(sourceNss),
- _destNss(_sourceNss),
- _options(options),
- _onCompletion(onCompletion),
- _storageInterface(storageInterface),
- _active(false),
- _listIndexesFetcher(_executor,
- _source,
- _sourceNss.db().toString(),
- BSON("listIndexes" << _sourceNss.coll()),
- stdx::bind(&CollectionCloner::_listIndexesCallback,
- this,
- stdx::placeholders::_1,
- stdx::placeholders::_2,
- stdx::placeholders::_3)),
- _findFetcher(_executor,
- _source,
- _sourceNss.db().toString(),
- BSON("find" << _sourceNss.coll() <<
- "noCursorTimeout" << true), // SERVER-1387
- stdx::bind(&CollectionCloner::_findCallback,
- this,
- stdx::placeholders::_1,
- stdx::placeholders::_2,
- stdx::placeholders::_3)),
- _indexSpecs(),
- _documents(),
- _dbWorkCallbackHandle(),
- _scheduleDbWorkFn([this](const ReplicationExecutor::CallbackFn& work) {
- return _executor->scheduleDBWork(work);
- }) {
-
- uassert(ErrorCodes::BadValue, "null replication executor", executor);
- uassert(ErrorCodes::BadValue, "invalid collection namespace: " + sourceNss.ns(),
- sourceNss.isValid());
- uassertStatusOK(options.validate());
- uassert(ErrorCodes::BadValue, "callback function cannot be null", onCompletion);
- uassert(ErrorCodes::BadValue, "null storage interface", storageInterface);
+CollectionCloner::CollectionCloner(ReplicationExecutor* executor,
+ const HostAndPort& source,
+ const NamespaceString& sourceNss,
+ const CollectionOptions& options,
+ const CallbackFn& onCompletion,
+ StorageInterface* storageInterface)
+ : _executor(executor),
+ _source(source),
+ _sourceNss(sourceNss),
+ _destNss(_sourceNss),
+ _options(options),
+ _onCompletion(onCompletion),
+ _storageInterface(storageInterface),
+ _active(false),
+ _listIndexesFetcher(_executor,
+ _source,
+ _sourceNss.db().toString(),
+ BSON("listIndexes" << _sourceNss.coll()),
+ stdx::bind(&CollectionCloner::_listIndexesCallback,
+ this,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2,
+ stdx::placeholders::_3)),
+ _findFetcher(_executor,
+ _source,
+ _sourceNss.db().toString(),
+ BSON("find" << _sourceNss.coll() << "noCursorTimeout" << true), // SERVER-1387
+ stdx::bind(&CollectionCloner::_findCallback,
+ this,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2,
+ stdx::placeholders::_3)),
+ _indexSpecs(),
+ _documents(),
+ _dbWorkCallbackHandle(),
+ _scheduleDbWorkFn([this](const ReplicationExecutor::CallbackFn& work) {
+ return _executor->scheduleDBWork(work);
+ }) {
+ uassert(ErrorCodes::BadValue, "null replication executor", executor);
+ uassert(ErrorCodes::BadValue,
+ "invalid collection namespace: " + sourceNss.ns(),
+ sourceNss.isValid());
+ uassertStatusOK(options.validate());
+ uassert(ErrorCodes::BadValue, "callback function cannot be null", onCompletion);
+ uassert(ErrorCodes::BadValue, "null storage interface", storageInterface);
+}
+
+CollectionCloner::~CollectionCloner() {
+ DESTRUCTOR_GUARD(cancel(); wait(););
+}
+
+const NamespaceString& CollectionCloner::getSourceNamespace() const {
+ return _sourceNss;
+}
+
+std::string CollectionCloner::getDiagnosticString() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ str::stream output;
+ output << "CollectionCloner";
+ output << " executor: " << _executor->getDiagnosticString();
+ output << " source: " << _source.toString();
+ output << " source namespace: " << _sourceNss.toString();
+ output << " destination namespace: " << _destNss.toString();
+ output << " collection options: " << _options.toBSON();
+ output << " active: " << _active;
+ output << " listIndexes fetcher: " << _listIndexesFetcher.getDiagnosticString();
+ output << " find fetcher: " << _findFetcher.getDiagnosticString();
+ output << " database worked callback handle: " << (_dbWorkCallbackHandle.isValid() ? "valid"
+ : "invalid");
+ return output;
+}
+
+bool CollectionCloner::isActive() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _active;
+}
+
+Status CollectionCloner::start() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+
+ if (_active) {
+ return Status(ErrorCodes::IllegalOperation, "collection cloner already started");
}
- CollectionCloner::~CollectionCloner() {
- DESTRUCTOR_GUARD(
- cancel();
- wait();
- );
+ Status scheduleResult = _listIndexesFetcher.schedule();
+ if (!scheduleResult.isOK()) {
+ return scheduleResult;
}
- const NamespaceString& CollectionCloner::getSourceNamespace() const {
- return _sourceNss;
- }
+ _active = true;
- std::string CollectionCloner::getDiagnosticString() const {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- str::stream output;
- output << "CollectionCloner";
- output << " executor: " << _executor->getDiagnosticString();
- output << " source: " << _source.toString();
- output << " source namespace: " << _sourceNss.toString();
- output << " destination namespace: " << _destNss.toString();
- output << " collection options: " << _options.toBSON();
- output << " active: " << _active;
- output << " listIndexes fetcher: " << _listIndexesFetcher.getDiagnosticString();
- output << " find fetcher: " << _findFetcher.getDiagnosticString();
- output << " database worked callback handle: "
- << (_dbWorkCallbackHandle.isValid() ? "valid" : "invalid");
- return output;
- }
+ return Status::OK();
+}
- bool CollectionCloner::isActive() const {
+void CollectionCloner::cancel() {
+ ReplicationExecutor::CallbackHandle dbWorkCallbackHandle;
+ {
stdx::lock_guard<stdx::mutex> lk(_mutex);
- return _active;
- }
- Status CollectionCloner::start() {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
-
- if (_active) {
- return Status(ErrorCodes::IllegalOperation, "collection cloner already started");
+ if (!_active) {
+ return;
}
- Status scheduleResult = _listIndexesFetcher.schedule();
- if (!scheduleResult.isOK()) {
- return scheduleResult;
- }
+ dbWorkCallbackHandle = _dbWorkCallbackHandle;
+ }
- _active = true;
+ _listIndexesFetcher.cancel();
+ _findFetcher.cancel();
- return Status::OK();
+ if (dbWorkCallbackHandle.isValid()) {
+ _executor->cancel(dbWorkCallbackHandle);
}
+}
- void CollectionCloner::cancel() {
- ReplicationExecutor::CallbackHandle dbWorkCallbackHandle;
- {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
+void CollectionCloner::wait() {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ _condition.wait(lk, [this]() { return !_active; });
+}
- if (!_active) {
- return;
- }
+void CollectionCloner::waitForDbWorker() {
+ ReplicationExecutor::CallbackHandle dbWorkCallbackHandle;
+ {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
- dbWorkCallbackHandle = _dbWorkCallbackHandle;
+ if (!_active) {
+ return;
}
- _listIndexesFetcher.cancel();
- _findFetcher.cancel();
-
- if (dbWorkCallbackHandle.isValid()) {
- _executor->cancel(dbWorkCallbackHandle);
- }
+ dbWorkCallbackHandle = _dbWorkCallbackHandle;
}
- void CollectionCloner::wait() {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- _condition.wait(lk, [this]() { return !_active; });
+ if (dbWorkCallbackHandle.isValid()) {
+ _executor->wait(dbWorkCallbackHandle);
}
+}
- void CollectionCloner::waitForDbWorker() {
- ReplicationExecutor::CallbackHandle dbWorkCallbackHandle;
- {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
-
- if (!_active) {
- return;
- }
+void CollectionCloner::setScheduleDbWorkFn(const ScheduleDbWorkFn& scheduleDbWorkFn) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
- dbWorkCallbackHandle = _dbWorkCallbackHandle;
- }
+ _scheduleDbWorkFn = scheduleDbWorkFn;
+}
- if (dbWorkCallbackHandle.isValid()) {
- _executor->wait(dbWorkCallbackHandle);
- }
+void CollectionCloner::_listIndexesCallback(const Fetcher::QueryResponseStatus& fetchResult,
+ Fetcher::NextAction* nextAction,
+ BSONObjBuilder* getMoreBob) {
+ if (!fetchResult.isOK()) {
+ _finishCallback(nullptr, fetchResult.getStatus());
+ return;
}
- void CollectionCloner::setScheduleDbWorkFn(const ScheduleDbWorkFn& scheduleDbWorkFn) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
+ auto batchData(fetchResult.getValue());
+ auto&& documents = batchData.documents;
- _scheduleDbWorkFn = scheduleDbWorkFn;
+ if (documents.empty()) {
+ warning() << "No indexes found for collection " << _sourceNss.ns() << " while cloning from "
+ << _source;
}
- void CollectionCloner::_listIndexesCallback(const Fetcher::QueryResponseStatus& fetchResult,
- Fetcher::NextAction* nextAction,
- BSONObjBuilder* getMoreBob) {
- if (!fetchResult.isOK()) {
- _finishCallback(nullptr, fetchResult.getStatus());
- return;
- }
-
- auto batchData(fetchResult.getValue());
- auto&& documents = batchData.documents;
-
- if (documents.empty()) {
- warning() << "No indexes found for collection " << _sourceNss.ns()
- << " while cloning from " << _source;
- }
-
- // We may be called with multiple batches leading to a need to grow _indexSpecs.
- _indexSpecs.reserve(_indexSpecs.size() + documents.size());
- _indexSpecs.insert(_indexSpecs.end(), documents.begin(), documents.end());
-
- // The fetcher will continue to call with kGetMore until an error or the last batch.
- if (*nextAction == Fetcher::NextAction::kGetMore) {
- invariant(getMoreBob);
- getMoreBob->append("getMore", batchData.cursorId);
- getMoreBob->append("collection", batchData.nss.coll());
- return;
- }
-
- // We have all of the indexes now, so we can start cloning the collection data.
- auto&& scheduleResult = _scheduleDbWorkFn(
- stdx::bind(&CollectionCloner::_beginCollectionCallback, this, stdx::placeholders::_1));
- if (!scheduleResult.isOK()) {
- _finishCallback(nullptr, scheduleResult.getStatus());
- return;
- }
+ // We may be called with multiple batches leading to a need to grow _indexSpecs.
+ _indexSpecs.reserve(_indexSpecs.size() + documents.size());
+ _indexSpecs.insert(_indexSpecs.end(), documents.begin(), documents.end());
- _dbWorkCallbackHandle = scheduleResult.getValue();
+ // The fetcher will continue to call with kGetMore until an error or the last batch.
+ if (*nextAction == Fetcher::NextAction::kGetMore) {
+ invariant(getMoreBob);
+ getMoreBob->append("getMore", batchData.cursorId);
+ getMoreBob->append("collection", batchData.nss.coll());
+ return;
}
- void CollectionCloner::_findCallback(const StatusWith<Fetcher::QueryResponse>& fetchResult,
- Fetcher::NextAction* nextAction,
- BSONObjBuilder* getMoreBob) {
- if (!fetchResult.isOK()) {
- _finishCallback(nullptr, fetchResult.getStatus());
- return;
- }
+ // We have all of the indexes now, so we can start cloning the collection data.
+ auto&& scheduleResult = _scheduleDbWorkFn(
+ stdx::bind(&CollectionCloner::_beginCollectionCallback, this, stdx::placeholders::_1));
+ if (!scheduleResult.isOK()) {
+ _finishCallback(nullptr, scheduleResult.getStatus());
+ return;
+ }
- auto batchData(fetchResult.getValue());
- _documents = batchData.documents;
+ _dbWorkCallbackHandle = scheduleResult.getValue();
+}
- bool lastBatch = *nextAction == Fetcher::NextAction::kNoAction;
- auto&& scheduleResult = _scheduleDbWorkFn(stdx::bind(
- &CollectionCloner::_insertDocumentsCallback, this, stdx::placeholders::_1, lastBatch));
- if (!scheduleResult.isOK()) {
- _finishCallback(nullptr, scheduleResult.getStatus());
- return;
- }
+void CollectionCloner::_findCallback(const StatusWith<Fetcher::QueryResponse>& fetchResult,
+ Fetcher::NextAction* nextAction,
+ BSONObjBuilder* getMoreBob) {
+ if (!fetchResult.isOK()) {
+ _finishCallback(nullptr, fetchResult.getStatus());
+ return;
+ }
- if (*nextAction == Fetcher::NextAction::kGetMore) {
- invariant(getMoreBob);
- getMoreBob->append("getMore", batchData.cursorId);
- getMoreBob->append("collection", batchData.nss.coll());
- }
+ auto batchData(fetchResult.getValue());
+ _documents = batchData.documents;
- _dbWorkCallbackHandle = scheduleResult.getValue();
+ bool lastBatch = *nextAction == Fetcher::NextAction::kNoAction;
+ auto&& scheduleResult = _scheduleDbWorkFn(stdx::bind(
+ &CollectionCloner::_insertDocumentsCallback, this, stdx::placeholders::_1, lastBatch));
+ if (!scheduleResult.isOK()) {
+ _finishCallback(nullptr, scheduleResult.getStatus());
+ return;
}
- void CollectionCloner::_beginCollectionCallback(const ReplicationExecutor::CallbackArgs& cbd) {
- OperationContext* txn = cbd.txn;
- if (!cbd.status.isOK()) {
- _finishCallback(txn, cbd.status);
- return;
- }
+ if (*nextAction == Fetcher::NextAction::kGetMore) {
+ invariant(getMoreBob);
+ getMoreBob->append("getMore", batchData.cursorId);
+ getMoreBob->append("collection", batchData.nss.coll());
+ }
- Status status = _storageInterface->beginCollection(txn, _destNss, _options, _indexSpecs);
- if (!status.isOK()) {
- _finishCallback(txn, status);
- return;
- }
+ _dbWorkCallbackHandle = scheduleResult.getValue();
+}
- Status scheduleStatus = _findFetcher.schedule();
- if (!scheduleStatus.isOK()) {
- _finishCallback(txn, scheduleStatus);
- return;
- }
+void CollectionCloner::_beginCollectionCallback(const ReplicationExecutor::CallbackArgs& cbd) {
+ OperationContext* txn = cbd.txn;
+ if (!cbd.status.isOK()) {
+ _finishCallback(txn, cbd.status);
+ return;
}
- void CollectionCloner::_insertDocumentsCallback(const ReplicationExecutor::CallbackArgs& cbd,
- bool lastBatch) {
- OperationContext* txn = cbd.txn;
- if (!cbd.status.isOK()) {
- _finishCallback(txn, cbd.status);
- return;
- }
+ Status status = _storageInterface->beginCollection(txn, _destNss, _options, _indexSpecs);
+ if (!status.isOK()) {
+ _finishCallback(txn, status);
+ return;
+ }
- Status status = _storageInterface->insertDocuments(txn, _destNss, _documents);
- if (!status.isOK()) {
- _finishCallback(txn, status);
- return;
- }
+ Status scheduleStatus = _findFetcher.schedule();
+ if (!scheduleStatus.isOK()) {
+ _finishCallback(txn, scheduleStatus);
+ return;
+ }
+}
+
+void CollectionCloner::_insertDocumentsCallback(const ReplicationExecutor::CallbackArgs& cbd,
+ bool lastBatch) {
+ OperationContext* txn = cbd.txn;
+ if (!cbd.status.isOK()) {
+ _finishCallback(txn, cbd.status);
+ return;
+ }
- if (!lastBatch) {
- return;
- }
+ Status status = _storageInterface->insertDocuments(txn, _destNss, _documents);
+ if (!status.isOK()) {
+ _finishCallback(txn, status);
+ return;
+ }
- _finishCallback(txn, Status::OK());
+ if (!lastBatch) {
+ return;
}
- void CollectionCloner::_finishCallback(OperationContext* txn, const Status& status) {
- if (status.isOK()) {
- auto commitStatus = _storageInterface->commitCollection(txn, _destNss);
- if (!commitStatus.isOK()) {
- warning() << "Failed to commit changes to collection " << _destNss.ns()
- << ": " << commitStatus;
- }
+ _finishCallback(txn, Status::OK());
+}
+
+void CollectionCloner::_finishCallback(OperationContext* txn, const Status& status) {
+ if (status.isOK()) {
+ auto commitStatus = _storageInterface->commitCollection(txn, _destNss);
+ if (!commitStatus.isOK()) {
+ warning() << "Failed to commit changes to collection " << _destNss.ns() << ": "
+ << commitStatus;
}
- _onCompletion(status);
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- _active = false;
- _condition.notify_all();
}
-
-} // namespace repl
-} // namespace mongo
+ _onCompletion(status);
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _active = false;
+ _condition.notify_all();
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/collection_cloner.h b/src/mongo/db/repl/collection_cloner.h
index 69f3caa1f18..cf69d7f44ef 100644
--- a/src/mongo/db/repl/collection_cloner.h
+++ b/src/mongo/db/repl/collection_cloner.h
@@ -48,217 +48,212 @@
namespace mongo {
namespace repl {
- class CollectionCloner : public BaseCloner {
- MONGO_DISALLOW_COPYING(CollectionCloner);
- public:
-
- /**
- * Storage interface for collection cloner.
- *
- * Supports the operations on the storage layer required by the cloner.
- */
- class StorageInterface;
-
- /**
- * Type of function to schedule database work with the executor.
- *
- * Must be consistent with ReplicationExecutor::scheduleWorkWithGlobalExclusiveLock().
- *
- * Used for testing only.
- */
- using ScheduleDbWorkFn = stdx::function<StatusWith<ReplicationExecutor::CallbackHandle> (
- const ReplicationExecutor::CallbackFn&)>;
-
- /**
- * Creates CollectionCloner task in inactive state. Use start() to activate cloner.
- *
- * The cloner calls 'onCompletion' when the collection cloning has completed or failed.
- *
- * 'onCompletion' will be called exactly once.
- *
- * Takes ownership of the passed StorageInterface object.
- */
- CollectionCloner(ReplicationExecutor* executor,
- const HostAndPort& source,
- const NamespaceString& sourceNss,
- const CollectionOptions& options,
- const CallbackFn& onCompletion,
- StorageInterface* storageInterface);
-
- virtual ~CollectionCloner();
-
- const NamespaceString& getSourceNamespace() const;
-
- std::string getDiagnosticString() const override;
-
- bool isActive() const override;
-
- Status start() override;
-
- void cancel() override;
-
- void wait() override;
-
- //
- // Testing only functions below.
- //
-
- /**
- * Waits for database worker to complete.
- * Returns immediately if collection cloner is not active.
- *
- * For testing only.
- */
- void waitForDbWorker();
-
- /**
- * Overrides how executor schedules database work.
- *
- * For testing only.
- */
- void setScheduleDbWorkFn(const ScheduleDbWorkFn& scheduleDbWorkFn);
-
- private:
-
- /**
- * Read index specs from listIndexes result.
- */
- void _listIndexesCallback(const StatusWith<Fetcher::QueryResponse>& fetchResult,
- Fetcher::NextAction* nextAction,
- BSONObjBuilder* getMoreBob);
-
- /**
- * Read collection documents from find result.
- */
- void _findCallback(const StatusWith<Fetcher::QueryResponse>& fetchResult,
- Fetcher::NextAction* nextAction,
- BSONObjBuilder* getMoreBob);
-
- /**
- * Request storage interface to create collection.
- *
- * Called multiple times if there are more than one batch of responses from listIndexes
- * cursor.
- *
- * 'nextAction' is an in/out arg indicating the next action planned and to be taken
- * by the fetcher.
- */
- void _beginCollectionCallback(const ReplicationExecutor::CallbackArgs& callbackData);
-
- /**
- * Called multiple times if there are more than one batch of documents from the fetcher.
- * On the last batch, 'lastBatch' will be true.
- *
- * Each document returned will be inserted via the storage interfaceRequest storage
- * interface.
- */
- void _insertDocumentsCallback(const ReplicationExecutor::CallbackArgs& callbackData,
- bool lastBatch);
-
- /**
- * Reports completion status.
- * Commits/aborts collection building.
- * Sets cloner to inactive.
- */
- void _finishCallback(OperationContext* txn, const Status& status);
-
- // Not owned by us.
- ReplicationExecutor* _executor;
-
- HostAndPort _source;
- NamespaceString _sourceNss;
- NamespaceString _destNss;
- CollectionOptions _options;
-
- // Invoked once when cloning completes or fails.
- CallbackFn _onCompletion;
-
- // Not owned by us.
- StorageInterface* _storageInterface;
-
- // Protects member data of this collection cloner.
- mutable stdx::mutex _mutex;
+class CollectionCloner : public BaseCloner {
+ MONGO_DISALLOW_COPYING(CollectionCloner);
- mutable stdx::condition_variable _condition;
+public:
+ /**
+ * Storage interface for collection cloner.
+ *
+ * Supports the operations on the storage layer required by the cloner.
+ */
+ class StorageInterface;
+
+ /**
+ * Type of function to schedule database work with the executor.
+ *
+ * Must be consistent with ReplicationExecutor::scheduleWorkWithGlobalExclusiveLock().
+ *
+ * Used for testing only.
+ */
+ using ScheduleDbWorkFn = stdx::function<StatusWith<ReplicationExecutor::CallbackHandle>(
+ const ReplicationExecutor::CallbackFn&)>;
+
+ /**
+ * Creates CollectionCloner task in inactive state. Use start() to activate cloner.
+ *
+ * The cloner calls 'onCompletion' when the collection cloning has completed or failed.
+ *
+ * 'onCompletion' will be called exactly once.
+ *
+ * Takes ownership of the passed StorageInterface object.
+ */
+ CollectionCloner(ReplicationExecutor* executor,
+ const HostAndPort& source,
+ const NamespaceString& sourceNss,
+ const CollectionOptions& options,
+ const CallbackFn& onCompletion,
+ StorageInterface* storageInterface);
+
+ virtual ~CollectionCloner();
+
+ const NamespaceString& getSourceNamespace() const;
+
+ std::string getDiagnosticString() const override;
+
+ bool isActive() const override;
+
+ Status start() override;
- // _active is true when Collection Cloner is started.
- bool _active;
+ void cancel() override;
- // Fetcher instances for running listIndexes and find commands.
- Fetcher _listIndexesFetcher;
- Fetcher _findFetcher;
+ void wait() override;
- std::vector<BSONObj> _indexSpecs;
-
- // Current batch of documents read from fetcher to insert into collection.
- std::vector<BSONObj> _documents;
-
- // Callback handle for database worker.
- ReplicationExecutor::CallbackHandle _dbWorkCallbackHandle;
-
- // Function for scheduling database work using the executor.
- ScheduleDbWorkFn _scheduleDbWorkFn;
-
- };
+ //
+ // Testing only functions below.
+ //
/**
- * Storage interface used by the collection cloner to build a collection.
+ * Waits for database worker to complete.
+ * Returns immediately if collection cloner is not active.
*
- * Operation context is provided by the replication executor via the cloner.
+ * For testing only.
+ */
+ void waitForDbWorker();
+
+ /**
+ * Overrides how executor schedules database work.
*
- * The storage interface is expected to acquire locks on any resources it needs
- * to perform any of its functions.
+ * For testing only.
+ */
+ void setScheduleDbWorkFn(const ScheduleDbWorkFn& scheduleDbWorkFn);
+
+private:
+ /**
+ * Read index specs from listIndexes result.
+ */
+ void _listIndexesCallback(const StatusWith<Fetcher::QueryResponse>& fetchResult,
+ Fetcher::NextAction* nextAction,
+ BSONObjBuilder* getMoreBob);
+
+ /**
+ * Read collection documents from find result.
+ */
+ void _findCallback(const StatusWith<Fetcher::QueryResponse>& fetchResult,
+ Fetcher::NextAction* nextAction,
+ BSONObjBuilder* getMoreBob);
+
+ /**
+ * Request storage interface to create collection.
+ *
+ * Called multiple times if there are more than one batch of responses from listIndexes
+ * cursor.
*
- * TODO: Consider having commit/abort/cancel functions.
+ * 'nextAction' is an in/out arg indicating the next action planned and to be taken
+ * by the fetcher.
*/
- class CollectionCloner::StorageInterface {
- public:
-
- virtual ~StorageInterface() = default;
-
- /**
- * Creates a collection with the provided indexes.
- *
- * Assume that no database locks have been acquired prior to calling this
- * function.
- */
- virtual Status beginCollection(OperationContext* txn,
- const NamespaceString& nss,
- const CollectionOptions& options,
- const std::vector<BSONObj>& indexSpecs) = 0;
-
- /**
- * Inserts documents into a collection.
- *
- * Assume that no database locks have been acquired prior to calling this
- * function.
- */
- virtual Status insertDocuments(OperationContext* txn,
- const NamespaceString& nss,
- const std::vector<BSONObj>& documents) = 0;
-
- /**
- * Commits changes to collection. No effect if collection building has not begun.
- * Operation context could be null.
- */
- virtual Status commitCollection(OperationContext* txn,
- const NamespaceString& nss) = 0;
-
- /**
- * Inserts missing document into a collection (not related to insertDocuments above),
- * during initial sync retry logic
- */
- virtual Status insertMissingDoc(OperationContext* txn,
- const NamespaceString& nss,
- const BSONObj& doc) = 0;
-
- /**
- * Inserts missing document into a collection (not related to insertDocuments above),
- * during initial sync retry logic
- */
- virtual Status dropUserDatabases(OperationContext* txn) = 0;
-
- };
-
-} // namespace repl
-} // namespace mongo
+ void _beginCollectionCallback(const ReplicationExecutor::CallbackArgs& callbackData);
+
+ /**
+ * Called multiple times if there are more than one batch of documents from the fetcher.
+ * On the last batch, 'lastBatch' will be true.
+ *
+ * Each document returned will be inserted via the storage interfaceRequest storage
+ * interface.
+ */
+ void _insertDocumentsCallback(const ReplicationExecutor::CallbackArgs& callbackData,
+ bool lastBatch);
+
+ /**
+ * Reports completion status.
+ * Commits/aborts collection building.
+ * Sets cloner to inactive.
+ */
+ void _finishCallback(OperationContext* txn, const Status& status);
+
+ // Not owned by us.
+ ReplicationExecutor* _executor;
+
+ HostAndPort _source;
+ NamespaceString _sourceNss;
+ NamespaceString _destNss;
+ CollectionOptions _options;
+
+ // Invoked once when cloning completes or fails.
+ CallbackFn _onCompletion;
+
+ // Not owned by us.
+ StorageInterface* _storageInterface;
+
+ // Protects member data of this collection cloner.
+ mutable stdx::mutex _mutex;
+
+ mutable stdx::condition_variable _condition;
+
+ // _active is true when Collection Cloner is started.
+ bool _active;
+
+ // Fetcher instances for running listIndexes and find commands.
+ Fetcher _listIndexesFetcher;
+ Fetcher _findFetcher;
+
+ std::vector<BSONObj> _indexSpecs;
+
+ // Current batch of documents read from fetcher to insert into collection.
+ std::vector<BSONObj> _documents;
+
+ // Callback handle for database worker.
+ ReplicationExecutor::CallbackHandle _dbWorkCallbackHandle;
+
+ // Function for scheduling database work using the executor.
+ ScheduleDbWorkFn _scheduleDbWorkFn;
+};
+
+/**
+ * Storage interface used by the collection cloner to build a collection.
+ *
+ * Operation context is provided by the replication executor via the cloner.
+ *
+ * The storage interface is expected to acquire locks on any resources it needs
+ * to perform any of its functions.
+ *
+ * TODO: Consider having commit/abort/cancel functions.
+ */
+class CollectionCloner::StorageInterface {
+public:
+ virtual ~StorageInterface() = default;
+
+ /**
+ * Creates a collection with the provided indexes.
+ *
+ * Assume that no database locks have been acquired prior to calling this
+ * function.
+ */
+ virtual Status beginCollection(OperationContext* txn,
+ const NamespaceString& nss,
+ const CollectionOptions& options,
+ const std::vector<BSONObj>& indexSpecs) = 0;
+
+ /**
+ * Inserts documents into a collection.
+ *
+ * Assume that no database locks have been acquired prior to calling this
+ * function.
+ */
+ virtual Status insertDocuments(OperationContext* txn,
+ const NamespaceString& nss,
+ const std::vector<BSONObj>& documents) = 0;
+
+ /**
+ * Commits changes to collection. No effect if collection building has not begun.
+ * Operation context could be null.
+ */
+ virtual Status commitCollection(OperationContext* txn, const NamespaceString& nss) = 0;
+
+ /**
+ * Inserts missing document into a collection (not related to insertDocuments above),
+ * during initial sync retry logic
+ */
+ virtual Status insertMissingDoc(OperationContext* txn,
+ const NamespaceString& nss,
+ const BSONObj& doc) = 0;
+
+ /**
+ * Inserts missing document into a collection (not related to insertDocuments above),
+ * during initial sync retry logic
+ */
+ virtual Status dropUserDatabases(OperationContext* txn) = 0;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/collection_cloner_test.cpp b/src/mongo/db/repl/collection_cloner_test.cpp
index 4a6c4d2bc03..8e41238ff7c 100644
--- a/src/mongo/db/repl/collection_cloner_test.cpp
+++ b/src/mongo/db/repl/collection_cloner_test.cpp
@@ -39,451 +39,454 @@
namespace {
- using namespace mongo;
- using namespace mongo::repl;
-
- class CollectionClonerTest : public BaseClonerTest {
- public:
-
- BaseCloner* getCloner() const override;
-
- protected:
-
- void setUp() override;
- void tearDown() override;
-
- CollectionOptions options;
- std::unique_ptr<CollectionCloner> collectionCloner;
- };
-
- void CollectionClonerTest::setUp() {
- BaseClonerTest::setUp();
- options.reset();
- options.storageEngine = BSON("storageEngine1" << BSONObj());
- collectionCloner.reset(new CollectionCloner(&getExecutor(), target, nss, options,
- stdx::bind(&CollectionClonerTest::setStatus,
- this,
- stdx::placeholders::_1),
- storageInterface.get()));
+using namespace mongo;
+using namespace mongo::repl;
+
+class CollectionClonerTest : public BaseClonerTest {
+public:
+ BaseCloner* getCloner() const override;
+
+protected:
+ void setUp() override;
+ void tearDown() override;
+
+ CollectionOptions options;
+ std::unique_ptr<CollectionCloner> collectionCloner;
+};
+
+void CollectionClonerTest::setUp() {
+ BaseClonerTest::setUp();
+ options.reset();
+ options.storageEngine = BSON("storageEngine1" << BSONObj());
+ collectionCloner.reset(new CollectionCloner(
+ &getExecutor(),
+ target,
+ nss,
+ options,
+ stdx::bind(&CollectionClonerTest::setStatus, this, stdx::placeholders::_1),
+ storageInterface.get()));
+}
+
+void CollectionClonerTest::tearDown() {
+ BaseClonerTest::tearDown();
+ // Executor may still invoke collection cloner's callback before shutting down.
+ collectionCloner.reset();
+ options.reset();
+}
+
+BaseCloner* CollectionClonerTest::getCloner() const {
+ return collectionCloner.get();
+}
+
+TEST_F(CollectionClonerTest, InvalidConstruction) {
+ ReplicationExecutor& executor = getExecutor();
+
+ const auto& cb = [](const Status&) { FAIL("should not reach here"); };
+
+ // Null executor.
+ {
+ CollectionCloner::StorageInterface* si = storageInterface.get();
+ ASSERT_THROWS(CollectionCloner(nullptr, target, nss, options, cb, si), UserException);
}
- void CollectionClonerTest::tearDown() {
- BaseClonerTest::tearDown();
- // Executor may still invoke collection cloner's callback before shutting down.
- collectionCloner.reset();
- options.reset();
- }
+ // Null storage interface
+ ASSERT_THROWS(CollectionCloner(&executor, target, nss, options, cb, nullptr), UserException);
- BaseCloner* CollectionClonerTest::getCloner() const {
- return collectionCloner.get();
+ // Invalid namespace.
+ {
+ NamespaceString badNss("db.");
+ CollectionCloner::StorageInterface* si = storageInterface.get();
+ ASSERT_THROWS(CollectionCloner(&executor, target, badNss, options, cb, si), UserException);
}
- TEST_F(CollectionClonerTest, InvalidConstruction) {
- ReplicationExecutor& executor = getExecutor();
-
- const auto& cb = [](const Status&) { FAIL("should not reach here"); };
-
- // Null executor.
- {
- CollectionCloner::StorageInterface* si = storageInterface.get();
- ASSERT_THROWS(CollectionCloner(nullptr, target, nss, options, cb, si), UserException);
- }
-
- // Null storage interface
- ASSERT_THROWS(CollectionCloner(&executor, target, nss, options, cb, nullptr),
+ // Invalid collection options.
+ {
+ CollectionOptions invalidOptions;
+ invalidOptions.storageEngine = BSON("storageEngine1"
+ << "not a document");
+ CollectionCloner::StorageInterface* si = storageInterface.get();
+ ASSERT_THROWS(CollectionCloner(&executor, target, nss, invalidOptions, cb, si),
UserException);
-
- // Invalid namespace.
- {
- NamespaceString badNss("db.");
- CollectionCloner::StorageInterface* si = storageInterface.get();
- ASSERT_THROWS(CollectionCloner(&executor, target, badNss, options, cb, si),
- UserException);
- }
-
- // Invalid collection options.
- {
- CollectionOptions invalidOptions;
- invalidOptions.storageEngine = BSON("storageEngine1" << "not a document");
- CollectionCloner::StorageInterface* si = storageInterface.get();
- ASSERT_THROWS(CollectionCloner(&executor, target, nss, invalidOptions, cb, si),
- UserException);
- }
-
- // Callback function cannot be null.
- {
- CollectionCloner::CallbackFn nullCb;
- CollectionCloner::StorageInterface* si = storageInterface.get();
- ASSERT_THROWS(CollectionCloner(&executor, target, nss, options, nullCb, si),
- UserException);
- }
- }
-
- TEST_F(CollectionClonerTest, ClonerLifeCycle) {
- testLifeCycle();
- }
-
- TEST_F(CollectionClonerTest, FirstRemoteCommand) {
- ASSERT_OK(collectionCloner->start());
-
- auto net = getNet();
- ASSERT_TRUE(net->hasReadyRequests());
- NetworkOperationIterator noi = net->getNextReadyRequest();
- auto&& noiRequest = noi->getRequest();
- ASSERT_EQUALS(nss.db().toString(), noiRequest.dbname);
- ASSERT_EQUALS("listIndexes", std::string(noiRequest.cmdObj.firstElementFieldName()));
- ASSERT_EQUALS(nss.coll().toString(), noiRequest.cmdObj.firstElement().valuestrsafe());
- ASSERT_FALSE(net->hasReadyRequests());
- ASSERT_TRUE(collectionCloner->isActive());
}
- TEST_F(CollectionClonerTest, RemoteCollectionMissing) {
- ASSERT_OK(collectionCloner->start());
-
- processNetworkResponse(
- BSON("ok" << 0 << "errmsg" << "" << "code" << ErrorCodes::NamespaceNotFound));
-
- ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, getStatus().code());
- ASSERT_FALSE(collectionCloner->isActive());
+ // Callback function cannot be null.
+ {
+ CollectionCloner::CallbackFn nullCb;
+ CollectionCloner::StorageInterface* si = storageInterface.get();
+ ASSERT_THROWS(CollectionCloner(&executor, target, nss, options, nullCb, si), UserException);
}
+}
+
+TEST_F(CollectionClonerTest, ClonerLifeCycle) {
+ testLifeCycle();
+}
+
+TEST_F(CollectionClonerTest, FirstRemoteCommand) {
+ ASSERT_OK(collectionCloner->start());
+
+ auto net = getNet();
+ ASSERT_TRUE(net->hasReadyRequests());
+ NetworkOperationIterator noi = net->getNextReadyRequest();
+ auto&& noiRequest = noi->getRequest();
+ ASSERT_EQUALS(nss.db().toString(), noiRequest.dbname);
+ ASSERT_EQUALS("listIndexes", std::string(noiRequest.cmdObj.firstElementFieldName()));
+ ASSERT_EQUALS(nss.coll().toString(), noiRequest.cmdObj.firstElement().valuestrsafe());
+ ASSERT_FALSE(net->hasReadyRequests());
+ ASSERT_TRUE(collectionCloner->isActive());
+}
+
+TEST_F(CollectionClonerTest, RemoteCollectionMissing) {
+ ASSERT_OK(collectionCloner->start());
+
+ processNetworkResponse(BSON("ok" << 0 << "errmsg"
+ << ""
+ << "code" << ErrorCodes::NamespaceNotFound));
+
+ ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, getStatus().code());
+ ASSERT_FALSE(collectionCloner->isActive());
+}
+
+// A collection may have no indexes. The cloner will produce a warning but
+// will still proceed with cloning.
+TEST_F(CollectionClonerTest, ListIndexesReturnedNoIndexes) {
+ ASSERT_OK(collectionCloner->start());
+
+ // Using a non-zero cursor to ensure that
+ // the cloner stops the fetcher from retrieving more results.
+ processNetworkResponse(createListIndexesResponse(1, BSONArray()));
+
+ ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
+ ASSERT_TRUE(collectionCloner->isActive());
+
+ ASSERT_TRUE(getNet()->hasReadyRequests());
+}
+
+TEST_F(CollectionClonerTest, BeginCollectionScheduleDbWorkFailed) {
+ ASSERT_OK(collectionCloner->start());
+
+ // Replace scheduleDbWork function so that cloner will fail to schedule DB work after
+ // getting index specs.
+ collectionCloner->setScheduleDbWorkFn([](const ReplicationExecutor::CallbackFn& workFn) {
+ return StatusWith<ReplicationExecutor::CallbackHandle>(ErrorCodes::UnknownError, "");
+ });
+
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+
+ ASSERT_EQUALS(ErrorCodes::UnknownError, getStatus().code());
+ ASSERT_FALSE(collectionCloner->isActive());
+}
+
+TEST_F(CollectionClonerTest, BeginCollectionCallbackCanceled) {
+ ASSERT_OK(collectionCloner->start());
+
+ // Replace scheduleDbWork function so that the callback for beginCollection is canceled
+ // immediately after scheduling.
+ auto&& executor = getExecutor();
+ collectionCloner->setScheduleDbWorkFn([&](const ReplicationExecutor::CallbackFn& workFn) {
+ // Schedule as non-exclusive task to allow us to cancel it before the executor is able
+ // to invoke the callback.
+ auto scheduleResult = executor.scheduleWork(workFn);
+ ASSERT_OK(scheduleResult.getStatus());
+ executor.cancel(scheduleResult.getValue());
+ return scheduleResult;
+ });
+
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+
+ collectionCloner->waitForDbWorker();
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, getStatus().code());
+ ASSERT_FALSE(collectionCloner->isActive());
+}
+
+TEST_F(CollectionClonerTest, BeginCollectionFailed) {
+ ASSERT_OK(collectionCloner->start());
+
+ storageInterface->beginCollectionFn = [&](OperationContext* txn,
+ const NamespaceString& theNss,
+ const CollectionOptions& theOptions,
+ const std::vector<BSONObj>& theIndexSpecs) {
+ return Status(ErrorCodes::OperationFailed, "");
+ };
- // A collection may have no indexes. The cloner will produce a warning but
- // will still proceed with cloning.
- TEST_F(CollectionClonerTest, ListIndexesReturnedNoIndexes) {
- ASSERT_OK(collectionCloner->start());
-
- // Using a non-zero cursor to ensure that
- // the cloner stops the fetcher from retrieving more results.
- processNetworkResponse(createListIndexesResponse(1, BSONArray()));
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+
+ collectionCloner->waitForDbWorker();
+
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, getStatus().code());
+ ASSERT_FALSE(collectionCloner->isActive());
+}
+
+TEST_F(CollectionClonerTest, BeginCollection) {
+ ASSERT_OK(collectionCloner->start());
+
+ NamespaceString collNss;
+ CollectionOptions collOptions;
+ std::vector<BSONObj> collIndexSpecs;
+ storageInterface->beginCollectionFn = [&](OperationContext* txn,
+ const NamespaceString& theNss,
+ const CollectionOptions& theOptions,
+ const std::vector<BSONObj>& theIndexSpecs) {
+ ASSERT(txn);
+ collNss = theNss;
+ collOptions = theOptions;
+ collIndexSpecs = theIndexSpecs;
+ return Status::OK();
+ };
- ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
- ASSERT_TRUE(collectionCloner->isActive());
+ // Split listIndexes response into 2 batches: first batch contains specs[0] and specs[1];
+ // second batch contains specs[2]
+ const std::vector<BSONObj> specs = {idIndexSpec,
+ BSON("v" << 1 << "key" << BSON("a" << 1) << "name"
+ << "a_1"
+ << "ns" << nss.ns()),
+ BSON("v" << 1 << "key" << BSON("b" << 1) << "name"
+ << "b_1"
+ << "ns" << nss.ns())};
- ASSERT_TRUE(getNet()->hasReadyRequests());
- }
+ processNetworkResponse(createListIndexesResponse(1, BSON_ARRAY(specs[0] << specs[1])));
- TEST_F(CollectionClonerTest, BeginCollectionScheduleDbWorkFailed) {
- ASSERT_OK(collectionCloner->start());
+ // 'status' should not be modified because cloning is not finished.
+ ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
+ ASSERT_TRUE(collectionCloner->isActive());
- // Replace scheduleDbWork function so that cloner will fail to schedule DB work after
- // getting index specs.
- collectionCloner->setScheduleDbWorkFn([](const ReplicationExecutor::CallbackFn& workFn) {
- return StatusWith<ReplicationExecutor::CallbackHandle>(ErrorCodes::UnknownError, "");
- });
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(specs[2]), "nextBatch"));
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ collectionCloner->waitForDbWorker();
- ASSERT_EQUALS(ErrorCodes::UnknownError, getStatus().code());
- ASSERT_FALSE(collectionCloner->isActive());
- }
+ // 'status' will be set if listIndexes fails.
+ ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
- TEST_F(CollectionClonerTest, BeginCollectionCallbackCanceled) {
- ASSERT_OK(collectionCloner->start());
-
- // Replace scheduleDbWork function so that the callback for beginCollection is canceled
- // immediately after scheduling.
- auto&& executor = getExecutor();
- collectionCloner->setScheduleDbWorkFn([&](const ReplicationExecutor::CallbackFn& workFn) {
- // Schedule as non-exclusive task to allow us to cancel it before the executor is able
- // to invoke the callback.
- auto scheduleResult = executor.scheduleWork(workFn);
- ASSERT_OK(scheduleResult.getStatus());
- executor.cancel(scheduleResult.getValue());
- return scheduleResult;
- });
-
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
-
- collectionCloner->waitForDbWorker();
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, getStatus().code());
- ASSERT_FALSE(collectionCloner->isActive());
+ ASSERT_EQUALS(nss.ns(), collNss.ns());
+ ASSERT_EQUALS(options.toBSON(), collOptions.toBSON());
+ ASSERT_EQUALS(specs.size(), collIndexSpecs.size());
+ for (std::vector<BSONObj>::size_type i = 0; i < specs.size(); ++i) {
+ ASSERT_EQUALS(specs[i], collIndexSpecs[i]);
}
- TEST_F(CollectionClonerTest, BeginCollectionFailed) {
- ASSERT_OK(collectionCloner->start());
+ // Cloner is still active because it has to read the documents from the source collection.
+ ASSERT_TRUE(collectionCloner->isActive());
+}
+
+TEST_F(CollectionClonerTest, FindFetcherScheduleFailed) {
+ ASSERT_OK(collectionCloner->start());
+
+ // Shut down executor while in beginCollection callback.
+ // This will cause the fetcher to fail to schedule the find command.
+ bool collectionCreated = false;
+ storageInterface->beginCollectionFn = [&](OperationContext* txn,
+ const NamespaceString& theNss,
+ const CollectionOptions& theOptions,
+ const std::vector<BSONObj>& theIndexSpecs) {
+ collectionCreated = true;
+ getExecutor().shutdown();
+ return Status::OK();
+ };
- storageInterface->beginCollectionFn = [&](OperationContext* txn,
- const NamespaceString& theNss,
- const CollectionOptions& theOptions,
- const std::vector<BSONObj>& theIndexSpecs) {
- return Status(ErrorCodes::OperationFailed, "");
- };
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ collectionCloner->waitForDbWorker();
+ ASSERT_TRUE(collectionCreated);
- collectionCloner->waitForDbWorker();
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, getStatus().code());
+ ASSERT_FALSE(collectionCloner->isActive());
+}
- ASSERT_EQUALS(ErrorCodes::OperationFailed, getStatus().code());
- ASSERT_FALSE(collectionCloner->isActive());
- }
+TEST_F(CollectionClonerTest, FindCommandAfterBeginCollection) {
+ ASSERT_OK(collectionCloner->start());
- TEST_F(CollectionClonerTest, BeginCollection) {
- ASSERT_OK(collectionCloner->start());
-
- NamespaceString collNss;
- CollectionOptions collOptions;
- std::vector<BSONObj> collIndexSpecs;
- storageInterface->beginCollectionFn = [&](OperationContext* txn,
- const NamespaceString& theNss,
- const CollectionOptions& theOptions,
- const std::vector<BSONObj>& theIndexSpecs) {
- ASSERT(txn);
- collNss = theNss;
- collOptions = theOptions;
- collIndexSpecs = theIndexSpecs;
- return Status::OK();
- };
-
- // Split listIndexes response into 2 batches: first batch contains specs[0] and specs[1];
- // second batch contains specs[2]
- const std::vector<BSONObj> specs = {
- idIndexSpec,
- BSON("v" << 1 << "key" << BSON("a" << 1) << "name" << "a_1" << "ns" << nss.ns()),
- BSON("v" << 1 << "key" << BSON("b" << 1) << "name" << "b_1" << "ns" << nss.ns())};
-
- processNetworkResponse(createListIndexesResponse(1, BSON_ARRAY(specs[0] << specs[1])));
-
- // 'status' should not be modified because cloning is not finished.
- ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
- ASSERT_TRUE(collectionCloner->isActive());
-
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(specs[2]), "nextBatch"));
-
- collectionCloner->waitForDbWorker();
-
- // 'status' will be set if listIndexes fails.
- ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
-
- ASSERT_EQUALS(nss.ns(), collNss.ns());
- ASSERT_EQUALS(options.toBSON(), collOptions.toBSON());
- ASSERT_EQUALS(specs.size(), collIndexSpecs.size());
- for (std::vector<BSONObj>::size_type i = 0; i < specs.size(); ++i) {
- ASSERT_EQUALS(specs[i], collIndexSpecs[i]);
- }
-
- // Cloner is still active because it has to read the documents from the source collection.
- ASSERT_TRUE(collectionCloner->isActive());
- }
+ bool collectionCreated = false;
+ storageInterface->beginCollectionFn = [&](OperationContext* txn,
+ const NamespaceString& theNss,
+ const CollectionOptions& theOptions,
+ const std::vector<BSONObj>& theIndexSpecs) {
+ collectionCreated = true;
+ return Status::OK();
+ };
- TEST_F(CollectionClonerTest, FindFetcherScheduleFailed) {
- ASSERT_OK(collectionCloner->start());
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
- // Shut down executor while in beginCollection callback.
- // This will cause the fetcher to fail to schedule the find command.
- bool collectionCreated = false;
- storageInterface->beginCollectionFn = [&](OperationContext* txn,
- const NamespaceString& theNss,
- const CollectionOptions& theOptions,
- const std::vector<BSONObj>& theIndexSpecs) {
- collectionCreated = true;
- getExecutor().shutdown();
- return Status::OK();
- };
+ collectionCloner->waitForDbWorker();
+ ASSERT_TRUE(collectionCreated);
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ // Fetcher should be scheduled after cloner creates collection.
+ auto net = getNet();
+ ASSERT_TRUE(net->hasReadyRequests());
+ NetworkOperationIterator noi = net->getNextReadyRequest();
+ auto&& noiRequest = noi->getRequest();
+ ASSERT_EQUALS(nss.db().toString(), noiRequest.dbname);
+ ASSERT_EQUALS("find", std::string(noiRequest.cmdObj.firstElementFieldName()));
+ ASSERT_EQUALS(nss.coll().toString(), noiRequest.cmdObj.firstElement().valuestrsafe());
+ ASSERT_TRUE(noiRequest.cmdObj.getField("noCursorTimeout").trueValue());
+ ASSERT_FALSE(net->hasReadyRequests());
+}
- collectionCloner->waitForDbWorker();
- ASSERT_TRUE(collectionCreated);
+TEST_F(CollectionClonerTest, FindCommandFailed) {
+ ASSERT_OK(collectionCloner->start());
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, getStatus().code());
- ASSERT_FALSE(collectionCloner->isActive());
- }
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
- TEST_F(CollectionClonerTest, FindCommandAfterBeginCollection) {
- ASSERT_OK(collectionCloner->start());
-
- bool collectionCreated = false;
- storageInterface->beginCollectionFn = [&](OperationContext* txn,
- const NamespaceString& theNss,
- const CollectionOptions& theOptions,
- const std::vector<BSONObj>& theIndexSpecs) {
- collectionCreated = true;
- return Status::OK();
- };
-
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
-
- collectionCloner->waitForDbWorker();
- ASSERT_TRUE(collectionCreated);
-
- // Fetcher should be scheduled after cloner creates collection.
- auto net = getNet();
- ASSERT_TRUE(net->hasReadyRequests());
- NetworkOperationIterator noi = net->getNextReadyRequest();
- auto&& noiRequest = noi->getRequest();
- ASSERT_EQUALS(nss.db().toString(), noiRequest.dbname);
- ASSERT_EQUALS("find", std::string(noiRequest.cmdObj.firstElementFieldName()));
- ASSERT_EQUALS(nss.coll().toString(), noiRequest.cmdObj.firstElement().valuestrsafe());
- ASSERT_TRUE(noiRequest.cmdObj.getField("noCursorTimeout").trueValue());
- ASSERT_FALSE(net->hasReadyRequests());
- }
+ collectionCloner->waitForDbWorker();
- TEST_F(CollectionClonerTest, FindCommandFailed) {
- ASSERT_OK(collectionCloner->start());
+ processNetworkResponse(BSON("ok" << 0 << "errmsg"
+ << ""
+ << "code" << ErrorCodes::CursorNotFound));
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ ASSERT_EQUALS(ErrorCodes::CursorNotFound, getStatus().code());
+ ASSERT_FALSE(collectionCloner->isActive());
+}
- collectionCloner->waitForDbWorker();
+TEST_F(CollectionClonerTest, FindCommandCanceled) {
+ ASSERT_OK(collectionCloner->start());
- processNetworkResponse(
- BSON("ok" << 0 << "errmsg" << "" << "code" << ErrorCodes::CursorNotFound));
+ scheduleNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
- ASSERT_EQUALS(ErrorCodes::CursorNotFound, getStatus().code());
- ASSERT_FALSE(collectionCloner->isActive());
- }
+ auto net = getNet();
+ net->runReadyNetworkOperations();
- TEST_F(CollectionClonerTest, FindCommandCanceled) {
- ASSERT_OK(collectionCloner->start());
+ collectionCloner->waitForDbWorker();
- scheduleNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ scheduleNetworkResponse(BSON("ok" << 1));
- auto net = getNet();
- net->runReadyNetworkOperations();
+ collectionCloner->cancel();
- collectionCloner->waitForDbWorker();
+ net->runReadyNetworkOperations();
- scheduleNetworkResponse(BSON("ok" << 1));
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, getStatus().code());
+ ASSERT_FALSE(collectionCloner->isActive());
+}
- collectionCloner->cancel();
+TEST_F(CollectionClonerTest, InsertDocumentsScheduleDbWorkFailed) {
+ ASSERT_OK(collectionCloner->start());
- net->runReadyNetworkOperations();
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, getStatus().code());
- ASSERT_FALSE(collectionCloner->isActive());
- }
+ collectionCloner->waitForDbWorker();
- TEST_F(CollectionClonerTest, InsertDocumentsScheduleDbWorkFailed) {
- ASSERT_OK(collectionCloner->start());
+ // Replace scheduleDbWork function so that cloner will fail to schedule DB work after
+ // getting documents.
+ collectionCloner->setScheduleDbWorkFn([](const ReplicationExecutor::CallbackFn& workFn) {
+ return StatusWith<ReplicationExecutor::CallbackHandle>(ErrorCodes::UnknownError, "");
+ });
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ const BSONObj doc = BSON("_id" << 1);
+ processNetworkResponse(createCursorResponse(0, BSON_ARRAY(doc)));
- collectionCloner->waitForDbWorker();
+ ASSERT_EQUALS(ErrorCodes::UnknownError, getStatus().code());
+ ASSERT_FALSE(collectionCloner->isActive());
+}
- // Replace scheduleDbWork function so that cloner will fail to schedule DB work after
- // getting documents.
- collectionCloner->setScheduleDbWorkFn([](const ReplicationExecutor::CallbackFn& workFn) {
- return StatusWith<ReplicationExecutor::CallbackHandle>(ErrorCodes::UnknownError, "");
- });
+TEST_F(CollectionClonerTest, InsertDocumentsCallbackCanceled) {
+ ASSERT_OK(collectionCloner->start());
- const BSONObj doc = BSON("_id" << 1);
- processNetworkResponse(createCursorResponse(0, BSON_ARRAY(doc)));
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
- ASSERT_EQUALS(ErrorCodes::UnknownError, getStatus().code());
- ASSERT_FALSE(collectionCloner->isActive());
- }
+ collectionCloner->waitForDbWorker();
- TEST_F(CollectionClonerTest, InsertDocumentsCallbackCanceled) {
- ASSERT_OK(collectionCloner->start());
+ // Replace scheduleDbWork function so that the callback for insertDocuments is canceled
+ // immediately after scheduling.
+ auto&& executor = getExecutor();
+ collectionCloner->setScheduleDbWorkFn([&](const ReplicationExecutor::CallbackFn& workFn) {
+ // Schedule as non-exclusive task to allow us to cancel it before the executor is able
+ // to invoke the callback.
+ auto scheduleResult = executor.scheduleWork(workFn);
+ ASSERT_OK(scheduleResult.getStatus());
+ executor.cancel(scheduleResult.getValue());
+ return scheduleResult;
+ });
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ const BSONObj doc = BSON("_id" << 1);
+ processNetworkResponse(createCursorResponse(0, BSON_ARRAY(doc)));
- collectionCloner->waitForDbWorker();
+ collectionCloner->waitForDbWorker();
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, getStatus().code());
+ ASSERT_FALSE(collectionCloner->isActive());
+}
- // Replace scheduleDbWork function so that the callback for insertDocuments is canceled
- // immediately after scheduling.
- auto&& executor = getExecutor();
- collectionCloner->setScheduleDbWorkFn([&](const ReplicationExecutor::CallbackFn& workFn) {
- // Schedule as non-exclusive task to allow us to cancel it before the executor is able
- // to invoke the callback.
- auto scheduleResult = executor.scheduleWork(workFn);
- ASSERT_OK(scheduleResult.getStatus());
- executor.cancel(scheduleResult.getValue());
- return scheduleResult;
- });
+TEST_F(CollectionClonerTest, InsertDocumentsFailed) {
+ ASSERT_OK(collectionCloner->start());
- const BSONObj doc = BSON("_id" << 1);
- processNetworkResponse(createCursorResponse(0, BSON_ARRAY(doc)));
-
- collectionCloner->waitForDbWorker();
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, getStatus().code());
- ASSERT_FALSE(collectionCloner->isActive());
- }
-
- TEST_F(CollectionClonerTest, InsertDocumentsFailed) {
- ASSERT_OK(collectionCloner->start());
-
- bool insertDocumentsCalled = false;
- storageInterface->insertDocumentsFn = [&](OperationContext* txn,
- const NamespaceString& theNss,
- const std::vector<BSONObj>& theDocuments) {
- insertDocumentsCalled = true;
- return Status(ErrorCodes::OperationFailed, "");
- };
+ bool insertDocumentsCalled = false;
+ storageInterface->insertDocumentsFn = [&](OperationContext* txn,
+ const NamespaceString& theNss,
+ const std::vector<BSONObj>& theDocuments) {
+ insertDocumentsCalled = true;
+ return Status(ErrorCodes::OperationFailed, "");
+ };
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
- collectionCloner->waitForDbWorker();
+ collectionCloner->waitForDbWorker();
- processNetworkResponse(createCursorResponse(0, BSONArray()));
+ processNetworkResponse(createCursorResponse(0, BSONArray()));
- collectionCloner->wait();
+ collectionCloner->wait();
- ASSERT_EQUALS(ErrorCodes::OperationFailed, getStatus().code());
- ASSERT_FALSE(collectionCloner->isActive());
- }
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, getStatus().code());
+ ASSERT_FALSE(collectionCloner->isActive());
+}
- TEST_F(CollectionClonerTest, InsertDocumentsSingleBatch) {
- ASSERT_OK(collectionCloner->start());
+TEST_F(CollectionClonerTest, InsertDocumentsSingleBatch) {
+ ASSERT_OK(collectionCloner->start());
- std::vector<BSONObj> collDocuments;
- storageInterface->insertDocumentsFn = [&](OperationContext* txn,
- const NamespaceString& theNss,
- const std::vector<BSONObj>& theDocuments) {
- ASSERT(txn);
- collDocuments = theDocuments;
- return Status::OK();
- };
+ std::vector<BSONObj> collDocuments;
+ storageInterface->insertDocumentsFn = [&](OperationContext* txn,
+ const NamespaceString& theNss,
+ const std::vector<BSONObj>& theDocuments) {
+ ASSERT(txn);
+ collDocuments = theDocuments;
+ return Status::OK();
+ };
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
- collectionCloner->waitForDbWorker();
+ collectionCloner->waitForDbWorker();
- const BSONObj doc = BSON("_id" << 1);
- processNetworkResponse(createCursorResponse(0, BSON_ARRAY(doc)));
+ const BSONObj doc = BSON("_id" << 1);
+ processNetworkResponse(createCursorResponse(0, BSON_ARRAY(doc)));
- collectionCloner->waitForDbWorker();
- ASSERT_EQUALS(1U, collDocuments.size());
- ASSERT_EQUALS(doc, collDocuments[0]);
+ collectionCloner->waitForDbWorker();
+ ASSERT_EQUALS(1U, collDocuments.size());
+ ASSERT_EQUALS(doc, collDocuments[0]);
- ASSERT_OK(getStatus());
- ASSERT_FALSE(collectionCloner->isActive());
- }
+ ASSERT_OK(getStatus());
+ ASSERT_FALSE(collectionCloner->isActive());
+}
- TEST_F(CollectionClonerTest, InsertDocumentsMultipleBatches) {
- ASSERT_OK(collectionCloner->start());
+TEST_F(CollectionClonerTest, InsertDocumentsMultipleBatches) {
+ ASSERT_OK(collectionCloner->start());
- std::vector<BSONObj> collDocuments;
- storageInterface->insertDocumentsFn = [&](OperationContext* txn,
- const NamespaceString& theNss,
- const std::vector<BSONObj>& theDocuments) {
- ASSERT(txn);
- collDocuments = theDocuments;
- return Status::OK();
- };
+ std::vector<BSONObj> collDocuments;
+ storageInterface->insertDocumentsFn = [&](OperationContext* txn,
+ const NamespaceString& theNss,
+ const std::vector<BSONObj>& theDocuments) {
+ ASSERT(txn);
+ collDocuments = theDocuments;
+ return Status::OK();
+ };
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
- collectionCloner->waitForDbWorker();
+ collectionCloner->waitForDbWorker();
- const BSONObj doc = BSON("_id" << 1);
- processNetworkResponse(createCursorResponse(1, BSON_ARRAY(doc)));
+ const BSONObj doc = BSON("_id" << 1);
+ processNetworkResponse(createCursorResponse(1, BSON_ARRAY(doc)));
- collectionCloner->waitForDbWorker();
- ASSERT_EQUALS(1U, collDocuments.size());
- ASSERT_EQUALS(doc, collDocuments[0]);
+ collectionCloner->waitForDbWorker();
+ ASSERT_EQUALS(1U, collDocuments.size());
+ ASSERT_EQUALS(doc, collDocuments[0]);
- ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
- ASSERT_TRUE(collectionCloner->isActive());
+ ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
+ ASSERT_TRUE(collectionCloner->isActive());
- const BSONObj doc2 = BSON("_id" << 1);
- processNetworkResponse(createCursorResponse(0, BSON_ARRAY(doc2), "nextBatch"));
+ const BSONObj doc2 = BSON("_id" << 1);
+ processNetworkResponse(createCursorResponse(0, BSON_ARRAY(doc2), "nextBatch"));
- collectionCloner->waitForDbWorker();
- ASSERT_EQUALS(1U, collDocuments.size());
- ASSERT_EQUALS(doc2, collDocuments[0]);
+ collectionCloner->waitForDbWorker();
+ ASSERT_EQUALS(1U, collDocuments.size());
+ ASSERT_EQUALS(doc2, collDocuments[0]);
- ASSERT_OK(getStatus());
- ASSERT_FALSE(collectionCloner->isActive());
- }
+ ASSERT_OK(getStatus());
+ ASSERT_FALSE(collectionCloner->isActive());
+}
-} // namespace
+} // namespace
diff --git a/src/mongo/db/repl/data_replicator.cpp b/src/mongo/db/repl/data_replicator.cpp
index 0344608dc78..1eb7c9a0ae3 100644
--- a/src/mongo/db/repl/data_replicator.cpp
+++ b/src/mongo/db/repl/data_replicator.cpp
@@ -58,1309 +58,1266 @@
namespace mongo {
namespace repl {
- // Failpoint for initial sync
- MONGO_FP_DECLARE(failInitialSyncWithBadHost);
+// Failpoint for initial sync
+MONGO_FP_DECLARE(failInitialSyncWithBadHost);
namespace {
- // Limit buffer to 256MB
- const size_t kOplogBufferSize = 256 * 1024 * 1024;
+// Limit buffer to 256MB
+const size_t kOplogBufferSize = 256 * 1024 * 1024;
- size_t getSize(const BSONObj& o) {
- // SERVER-9808 Avoid Fortify complaint about implicit signed->unsigned conversion
- return static_cast<size_t>(o.objsize());
- }
+size_t getSize(const BSONObj& o) {
+ // SERVER-9808 Avoid Fortify complaint about implicit signed->unsigned conversion
+ return static_cast<size_t>(o.objsize());
+}
- Timestamp findCommonPoint(HostAndPort host, Timestamp start) {
- // TODO: walk back in the oplog looking for a known/shared optime.
- return Timestamp();
- }
+Timestamp findCommonPoint(HostAndPort host, Timestamp start) {
+ // TODO: walk back in the oplog looking for a known/shared optime.
+ return Timestamp();
+}
-} // namespace
+} // namespace
std::string toString(DataReplicatorState s) {
switch (s) {
- case DataReplicatorState::InitialSync:
- return "InitialSync";
- case DataReplicatorState::Rollback:
- return "Rollback";
- case DataReplicatorState::Steady:
- return "Steady Replication";
- case DataReplicatorState::Uninitialized:
- return "Uninitialized";
+ case DataReplicatorState::InitialSync:
+ return "InitialSync";
+ case DataReplicatorState::Rollback:
+ return "Rollback";
+ case DataReplicatorState::Steady:
+ return "Steady Replication";
+ case DataReplicatorState::Uninitialized:
+ return "Uninitialized";
}
MONGO_UNREACHABLE;
}
- /**
- * Follows the fetcher pattern for a find+getmore on an oplog
- * Returns additional errors if the start oplog entry cannot be found.
- */
- class OplogFetcher : public QueryFetcher {
- MONGO_DISALLOW_COPYING(OplogFetcher);
- public:
- OplogFetcher(ReplicationExecutor* exec,
- const Timestamp& startTS,
- const HostAndPort& src,
- const NamespaceString& nss,
- const QueryFetcher::CallbackFn& work);
-
- virtual ~OplogFetcher() = default;
- std::string toString() const;
-
- const Timestamp getStartTimestamp() const {
- return _startTS;
- }
-
- protected:
-
- void _delegateCallback(const Fetcher::QueryResponseStatus& fetchResult,
- NextAction* nextAction);
-
- const Timestamp _startTS;
- };
-
- // OplogFetcher
- OplogFetcher::OplogFetcher(ReplicationExecutor* exec,
- const Timestamp& startTS,
- const HostAndPort& src,
- const NamespaceString& oplogNSS,
- const QueryFetcher::CallbackFn& work)
- // TODO: add query options await_data, oplog_replay
- : QueryFetcher(exec,
- src,
- oplogNSS,
- BSON("find" << oplogNSS.coll() <<
- "filter" << BSON("ts" << BSON("$gte" << startTS))),
- work),
- _startTS(startTS) {
- }
-
- std::string OplogFetcher::toString() const {
- return str::stream() << "OplogReader -"
- << " startTS: " << _startTS.toString()
- << " fetcher: " << QueryFetcher::getDiagnosticString();
- }
-
- void OplogFetcher::_delegateCallback(const Fetcher::QueryResponseStatus& fetchResult,
- Fetcher::NextAction* nextAction) {
- const bool checkStartTS = _getResponses() == 0;
-
- if (fetchResult.isOK()) {
- Fetcher::Documents::const_iterator firstDoc = fetchResult.getValue().documents.begin();
- auto hasDoc = firstDoc != fetchResult.getValue().documents.end();
-
- if (checkStartTS) {
- if (!hasDoc) {
- // Set next action to none.
- *nextAction = Fetcher::NextAction::kNoAction;
- _onQueryResponse(
- Status(ErrorCodes::OplogStartMissing, str::stream() <<
- "No operations on sync source with op time starting at: " <<
- _startTS.toString()),
- nextAction);
- return;
- } else if ((*firstDoc)["ts"].eoo()) {
- // Set next action to none.
- *nextAction = Fetcher::NextAction::kNoAction;
- _onQueryResponse(
- Status(ErrorCodes::OplogStartMissing, str::stream() <<
- "Missing 'ts' field in first returned " << (*firstDoc)["ts"] <<
- " starting at " << _startTS.toString()),
- nextAction);
- return;
- } else if ((*firstDoc)["ts"].timestamp() != _startTS) {
- // Set next action to none.
- *nextAction = Fetcher::NextAction::kNoAction;
- _onQueryResponse(
- Status(ErrorCodes::OplogStartMissing, str::stream() <<
- "First returned " << (*firstDoc)["ts"] <<
- " is not where we wanted to start: " << _startTS.toString()),
- nextAction);
- return;
- }
+/**
+ * Follows the fetcher pattern for a find+getmore on an oplog
+ * Returns additional errors if the start oplog entry cannot be found.
+ */
+class OplogFetcher : public QueryFetcher {
+ MONGO_DISALLOW_COPYING(OplogFetcher);
- }
+public:
+ OplogFetcher(ReplicationExecutor* exec,
+ const Timestamp& startTS,
+ const HostAndPort& src,
+ const NamespaceString& nss,
+ const QueryFetcher::CallbackFn& work);
- if (hasDoc) {
- _onQueryResponse(fetchResult, nextAction);
- }
- else {
- }
- }
- else {
- _onQueryResponse(fetchResult, nextAction);
- }
- };
+ virtual ~OplogFetcher() = default;
+ std::string toString() const;
- class DatabasesCloner {
- public:
- DatabasesCloner(ReplicationExecutor* exec,
- HostAndPort source,
- stdx::function<void (const Status&)> finishFn)
- : _status(ErrorCodes::NotYetInitialized, ""),
- _exec(exec),
- _source(source),
- _active(false),
- _clonersActive(0),
- _finishFn(finishFn) {
- if (!_finishFn) {
- _status = Status(ErrorCodes::InvalidOptions, "finishFn is not callable.");
- }
- };
-
- Status start();
-
- bool isActive() {
- return _active;
- }
+ const Timestamp getStartTimestamp() const {
+ return _startTS;
+ }
- Status getStatus() {
- return _status;
- }
+protected:
+ void _delegateCallback(const Fetcher::QueryResponseStatus& fetchResult, NextAction* nextAction);
+
+ const Timestamp _startTS;
+};
+
+// OplogFetcher
+OplogFetcher::OplogFetcher(ReplicationExecutor* exec,
+ const Timestamp& startTS,
+ const HostAndPort& src,
+ const NamespaceString& oplogNSS,
+ const QueryFetcher::CallbackFn& work)
+ // TODO: add query options await_data, oplog_replay
+ : QueryFetcher(exec,
+ src,
+ oplogNSS,
+ BSON("find" << oplogNSS.coll() << "filter"
+ << BSON("ts" << BSON("$gte" << startTS))),
+ work),
+ _startTS(startTS) {}
+
+std::string OplogFetcher::toString() const {
+ return str::stream() << "OplogReader -"
+ << " startTS: " << _startTS.toString()
+ << " fetcher: " << QueryFetcher::getDiagnosticString();
+}
- void cancel() {
- if (!_active)
+void OplogFetcher::_delegateCallback(const Fetcher::QueryResponseStatus& fetchResult,
+ Fetcher::NextAction* nextAction) {
+ const bool checkStartTS = _getResponses() == 0;
+
+ if (fetchResult.isOK()) {
+ Fetcher::Documents::const_iterator firstDoc = fetchResult.getValue().documents.begin();
+ auto hasDoc = firstDoc != fetchResult.getValue().documents.end();
+
+ if (checkStartTS) {
+ if (!hasDoc) {
+ // Set next action to none.
+ *nextAction = Fetcher::NextAction::kNoAction;
+ _onQueryResponse(
+ Status(ErrorCodes::OplogStartMissing,
+ str::stream()
+ << "No operations on sync source with op time starting at: "
+ << _startTS.toString()),
+ nextAction);
+ return;
+ } else if ((*firstDoc)["ts"].eoo()) {
+ // Set next action to none.
+ *nextAction = Fetcher::NextAction::kNoAction;
+ _onQueryResponse(Status(ErrorCodes::OplogStartMissing,
+ str::stream() << "Missing 'ts' field in first returned "
+ << (*firstDoc)["ts"] << " starting at "
+ << _startTS.toString()),
+ nextAction);
+ return;
+ } else if ((*firstDoc)["ts"].timestamp() != _startTS) {
+ // Set next action to none.
+ *nextAction = Fetcher::NextAction::kNoAction;
+ _onQueryResponse(Status(ErrorCodes::OplogStartMissing,
+ str::stream() << "First returned " << (*firstDoc)["ts"]
+ << " is not where we wanted to start: "
+ << _startTS.toString()),
+ nextAction);
return;
- _active = false;
- // TODO: cancel all cloners
- _setStatus(Status(ErrorCodes::CallbackCanceled, "Initial Sync Cancelled."));
- }
-
- void wait() {
- // TODO: wait on all cloners
- }
-
- std::string toString() {
- return str::stream() << "initial sync --" <<
- " active:" << _active <<
- " status:" << _status.toString() <<
- " source:" << _source.toString() <<
- " db cloners active:" << _clonersActive <<
- " db count:" << _databaseCloners.size();
- }
-
-
- // For testing
- void setStorageInterface(CollectionCloner::StorageInterface* si) {
- _storage = si;
- }
-
- private:
-
- /**
- * Does the next action necessary for the initial sync process.
- *
- * NOTE: If (!_status.isOK() || !_isActive) then early return.
- */
- void _doNextActions();
-
- /**
- * Setting the status to not-OK will stop the process
- */
- void _setStatus(CBHStatus s) {
- _setStatus(s.getStatus());
- }
-
- /**
- * Setting the status to not-OK will stop the process
- */
- void _setStatus(Status s) {
- // Only set the first time called, all subsequent failures are not recorded --only first
- if (_status.code() != ErrorCodes::NotYetInitialized) {
- _status = s;
}
}
- /**
- * Setting the status to not-OK will stop the process
- */
- void _setStatus(TimestampStatus s) {
- _setStatus(s.getStatus());
+ if (hasDoc) {
+ _onQueryResponse(fetchResult, nextAction);
+ } else {
}
+ } else {
+ _onQueryResponse(fetchResult, nextAction);
+ }
+};
+
+class DatabasesCloner {
+public:
+ DatabasesCloner(ReplicationExecutor* exec,
+ HostAndPort source,
+ stdx::function<void(const Status&)> finishFn)
+ : _status(ErrorCodes::NotYetInitialized, ""),
+ _exec(exec),
+ _source(source),
+ _active(false),
+ _clonersActive(0),
+ _finishFn(finishFn) {
+ if (!_finishFn) {
+ _status = Status(ErrorCodes::InvalidOptions, "finishFn is not callable.");
+ }
+ };
- void _failed();
-
- /** Called each time a database clone is finished */
- void _onEachDBCloneFinish(const Status& status, const std::string name);
+ Status start();
- // Callbacks
+ bool isActive() {
+ return _active;
+ }
- void _onListDatabaseFinish(const CommandCallbackArgs& cbd);
+ Status getStatus() {
+ return _status;
+ }
+ void cancel() {
+ if (!_active)
+ return;
+ _active = false;
+ // TODO: cancel all cloners
+ _setStatus(Status(ErrorCodes::CallbackCanceled, "Initial Sync Cancelled."));
+ }
- // Member variables
- Status _status; // If it is not OK, we stop everything.
- ReplicationExecutor* _exec; // executor to schedule things with
- HostAndPort _source; // The source to use, until we get an error
- bool _active; // false until we start
- std::vector<std::shared_ptr<DatabaseCloner>> _databaseCloners; // database cloners by name
- int _clonersActive;
+ void wait() {
+ // TODO: wait on all cloners
+ }
- const stdx::function<void (const Status&)> _finishFn;
+ std::string toString() {
+ return str::stream() << "initial sync --"
+ << " active:" << _active << " status:" << _status.toString()
+ << " source:" << _source.toString()
+ << " db cloners active:" << _clonersActive
+ << " db count:" << _databaseCloners.size();
+ }
- CollectionCloner::StorageInterface* _storage;
- };
- /** State held during Initial Sync */
- struct InitialSyncState {
- InitialSyncState(DatabasesCloner cloner, Event event)
- : dbsCloner(cloner), finishEvent(event), status(ErrorCodes::IllegalOperation, "") {};
-
- DatabasesCloner dbsCloner; // Cloner for all databases included in initial sync.
- Timestamp beginTimestamp; // Timestamp from the latest entry in oplog when started.
- Timestamp stopTimestamp; // Referred to as minvalid, or the place we can transition states.
- Event finishEvent; // event fired on completion, either successful or not.
- Status status; // final status, only valid after the finishEvent fires.
- size_t fetchedMissingDocs;
- size_t appliedOps;
-
- // Temporary fetch for things like fetching remote optime, or tail
- std::unique_ptr<Fetcher> tmpFetcher;
- TimestampStatus getLatestOplogTimestamp(ReplicationExecutor* exec,
- HostAndPort source,
- const NamespaceString& oplogNS);
- void setStatus(const Status& s);
- void setStatus(const CBHStatus& s);
- void _setTimestampStatus(const QueryResponseStatus& fetchResult,
- Fetcher::NextAction* nextAction,
- TimestampStatus* status) ;
- };
+ // For testing
+ void setStorageInterface(CollectionCloner::StorageInterface* si) {
+ _storage = si;
+ }
- // Initial Sync state
- TimestampStatus InitialSyncState::getLatestOplogTimestamp(ReplicationExecutor* exec,
- HostAndPort source,
- const NamespaceString& oplogNS) {
-
- BSONObj query = BSON("find" << oplogNS.coll() <<
- "sort" << BSON ("$natural" << -1) <<
- "limit" << 1);
-
- TimestampStatus timestampStatus(ErrorCodes::BadValue, "");
- Fetcher f(exec,
- source,
- oplogNS.db().toString(),
- query,
- stdx::bind(&InitialSyncState::_setTimestampStatus, this, stdx::placeholders::_1,
- stdx::placeholders::_2, &timestampStatus));
- Status s = f.schedule();
- if (!s.isOK()) {
- return TimestampStatus(s);
- }
+private:
+ /**
+ * Does the next action necessary for the initial sync process.
+ *
+ * NOTE: If (!_status.isOK() || !_isActive) then early return.
+ */
+ void _doNextActions();
- // wait for fetcher to get the oplog position.
- f.wait();
- return timestampStatus;
+ /**
+ * Setting the status to not-OK will stop the process
+ */
+ void _setStatus(CBHStatus s) {
+ _setStatus(s.getStatus());
}
- void InitialSyncState::_setTimestampStatus(const QueryResponseStatus& fetchResult,
- Fetcher::NextAction* nextAction,
- TimestampStatus* status) {
- if (!fetchResult.isOK()) {
- *status = TimestampStatus(fetchResult.getStatus());
- } else {
- // TODO: Set _beginTimestamp from first doc "ts" field.
- const auto docs = fetchResult.getValue().documents;
- const auto hasDoc = docs.begin() != docs.end();
- if (!hasDoc || !docs.begin()->hasField("ts")) {
- *status = TimestampStatus(ErrorCodes::FailedToParse,
- "Could not find an oplog entry with 'ts' field.");
- } else {
- *status = TimestampStatus(docs.begin()->getField("ts").timestamp());
- }
+ /**
+ * Setting the status to not-OK will stop the process
+ */
+ void _setStatus(Status s) {
+ // Only set the first time called, all subsequent failures are not recorded --only first
+ if (_status.code() != ErrorCodes::NotYetInitialized) {
+ _status = s;
}
}
- void InitialSyncState::setStatus(const Status& s) {
- status = s;
+ /**
+ * Setting the status to not-OK will stop the process
+ */
+ void _setStatus(TimestampStatus s) {
+ _setStatus(s.getStatus());
}
- void InitialSyncState::setStatus(const CBHStatus& s) {
- setStatus(s.getStatus());
+
+ void _failed();
+
+ /** Called each time a database clone is finished */
+ void _onEachDBCloneFinish(const Status& status, const std::string name);
+
+ // Callbacks
+
+ void _onListDatabaseFinish(const CommandCallbackArgs& cbd);
+
+
+ // Member variables
+ Status _status; // If it is not OK, we stop everything.
+ ReplicationExecutor* _exec; // executor to schedule things with
+ HostAndPort _source; // The source to use, until we get an error
+ bool _active; // false until we start
+ std::vector<std::shared_ptr<DatabaseCloner>> _databaseCloners; // database cloners by name
+ int _clonersActive;
+
+ const stdx::function<void(const Status&)> _finishFn;
+
+ CollectionCloner::StorageInterface* _storage;
+};
+
+/** State held during Initial Sync */
+struct InitialSyncState {
+ InitialSyncState(DatabasesCloner cloner, Event event)
+ : dbsCloner(cloner), finishEvent(event), status(ErrorCodes::IllegalOperation, ""){};
+
+ DatabasesCloner dbsCloner; // Cloner for all databases included in initial sync.
+ Timestamp beginTimestamp; // Timestamp from the latest entry in oplog when started.
+ Timestamp stopTimestamp; // Referred to as minvalid, or the place we can transition states.
+ Event finishEvent; // event fired on completion, either successful or not.
+ Status status; // final status, only valid after the finishEvent fires.
+ size_t fetchedMissingDocs;
+ size_t appliedOps;
+
+ // Temporary fetch for things like fetching remote optime, or tail
+ std::unique_ptr<Fetcher> tmpFetcher;
+ TimestampStatus getLatestOplogTimestamp(ReplicationExecutor* exec,
+ HostAndPort source,
+ const NamespaceString& oplogNS);
+ void setStatus(const Status& s);
+ void setStatus(const CBHStatus& s);
+ void _setTimestampStatus(const QueryResponseStatus& fetchResult,
+ Fetcher::NextAction* nextAction,
+ TimestampStatus* status);
+};
+
+// Initial Sync state
+TimestampStatus InitialSyncState::getLatestOplogTimestamp(ReplicationExecutor* exec,
+ HostAndPort source,
+ const NamespaceString& oplogNS) {
+ BSONObj query =
+ BSON("find" << oplogNS.coll() << "sort" << BSON("$natural" << -1) << "limit" << 1);
+
+ TimestampStatus timestampStatus(ErrorCodes::BadValue, "");
+ Fetcher f(exec,
+ source,
+ oplogNS.db().toString(),
+ query,
+ stdx::bind(&InitialSyncState::_setTimestampStatus,
+ this,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2,
+ &timestampStatus));
+ Status s = f.schedule();
+ if (!s.isOK()) {
+ return TimestampStatus(s);
}
- // Initial Sync
- Status DatabasesCloner::start() {
- _active = true;
+ // wait for fetcher to get the oplog position.
+ f.wait();
+ return timestampStatus;
+}
- if (!_status.isOK() && _status.code() != ErrorCodes::NotYetInitialized) {
- return _status;
+void InitialSyncState::_setTimestampStatus(const QueryResponseStatus& fetchResult,
+ Fetcher::NextAction* nextAction,
+ TimestampStatus* status) {
+ if (!fetchResult.isOK()) {
+ *status = TimestampStatus(fetchResult.getStatus());
+ } else {
+ // TODO: Set _beginTimestamp from first doc "ts" field.
+ const auto docs = fetchResult.getValue().documents;
+ const auto hasDoc = docs.begin() != docs.end();
+ if (!hasDoc || !docs.begin()->hasField("ts")) {
+ *status = TimestampStatus(ErrorCodes::FailedToParse,
+ "Could not find an oplog entry with 'ts' field.");
+ } else {
+ *status = TimestampStatus(docs.begin()->getField("ts").timestamp());
}
+ }
+}
- _status = Status::OK();
-
- log() << "starting cloning of all databases";
- // Schedule listDatabase command which will kick off the database cloner per result db.
- Request listDBsReq(_source, "admin", BSON("listDatabases" << true));
- CBHStatus s = _exec->scheduleRemoteCommand(
- listDBsReq,
- stdx::bind(&DatabasesCloner::_onListDatabaseFinish,
- this,
- stdx::placeholders::_1));
- if (!s.isOK()) {
- _setStatus(s);
- _failed();
- }
+void InitialSyncState::setStatus(const Status& s) {
+ status = s;
+}
+void InitialSyncState::setStatus(const CBHStatus& s) {
+ setStatus(s.getStatus());
+}
- _doNextActions();
+// Initial Sync
+Status DatabasesCloner::start() {
+ _active = true;
+ if (!_status.isOK() && _status.code() != ErrorCodes::NotYetInitialized) {
return _status;
}
- void DatabasesCloner::_onListDatabaseFinish(const CommandCallbackArgs& cbd) {
- const Status respStatus = cbd.response.getStatus();
- if (!respStatus.isOK()) {
- // TODO: retry internally?
- _setStatus(respStatus);
- _doNextActions();
- return;
- }
+ _status = Status::OK();
+
+ log() << "starting cloning of all databases";
+ // Schedule listDatabase command which will kick off the database cloner per result db.
+ Request listDBsReq(_source, "admin", BSON("listDatabases" << true));
+ CBHStatus s = _exec->scheduleRemoteCommand(
+ listDBsReq,
+ stdx::bind(&DatabasesCloner::_onListDatabaseFinish, this, stdx::placeholders::_1));
+ if (!s.isOK()) {
+ _setStatus(s);
+ _failed();
+ }
- const auto respBSON = cbd.response.getValue().data;
-
- // There should not be any cloners yet
- invariant(_databaseCloners.size() == 0);
-
- const auto okElem = respBSON["ok"];
- if (okElem.trueValue()) {
- const auto dbsElem = respBSON["databases"].Obj();
- BSONForEach(arrayElement, dbsElem) {
- const BSONObj dbBSON = arrayElement.Obj();
- const std::string name = dbBSON["name"].str();
- ++_clonersActive;
- std::shared_ptr<DatabaseCloner> dbCloner{nullptr};
- try {
- dbCloner.reset(new DatabaseCloner(
- _exec,
- _source,
- name,
- BSONObj(), // do not filter database out.
- [](const BSONObj&) { return true; }, // clone all dbs.
- _storage, // use storage provided.
- [](const Status& status, const NamespaceString& srcNss) {
- if (status.isOK()) {
- log() << "collection clone finished: " << srcNss;
- }
- else {
- log() << "collection clone for '"
- << srcNss << "' failed due to "
- << status.toString();
- }
- },
- [=](const Status& status) {
- _onEachDBCloneFinish(status, name);
- }));
- }
- catch (...) {
- // error creating, fails below.
- }
+ _doNextActions();
- Status s = dbCloner ? dbCloner->start() : Status(ErrorCodes::UnknownError, "Bad!");
+ return _status;
+}
- if (!s.isOK()) {
- std::string err = str::stream() << "could not create cloner for database: "
- << name << " due to: " << s.toString();
- _setStatus(Status(ErrorCodes::InitialSyncFailure, err));
- error() << err;
- break; // exit for_each loop
- }
+void DatabasesCloner::_onListDatabaseFinish(const CommandCallbackArgs& cbd) {
+ const Status respStatus = cbd.response.getStatus();
+ if (!respStatus.isOK()) {
+ // TODO: retry internally?
+ _setStatus(respStatus);
+ _doNextActions();
+ return;
+ }
- // add cloner to list.
- _databaseCloners.push_back(dbCloner);
+ const auto respBSON = cbd.response.getValue().data;
+
+ // There should not be any cloners yet
+ invariant(_databaseCloners.size() == 0);
+
+ const auto okElem = respBSON["ok"];
+ if (okElem.trueValue()) {
+ const auto dbsElem = respBSON["databases"].Obj();
+ BSONForEach(arrayElement, dbsElem) {
+ const BSONObj dbBSON = arrayElement.Obj();
+ const std::string name = dbBSON["name"].str();
+ ++_clonersActive;
+ std::shared_ptr<DatabaseCloner> dbCloner{nullptr};
+ try {
+ dbCloner.reset(new DatabaseCloner(
+ _exec,
+ _source,
+ name,
+ BSONObj(), // do not filter database out.
+ [](const BSONObj&) { return true; }, // clone all dbs.
+ _storage, // use storage provided.
+ [](const Status& status, const NamespaceString& srcNss) {
+ if (status.isOK()) {
+ log() << "collection clone finished: " << srcNss;
+ } else {
+ log() << "collection clone for '" << srcNss << "' failed due to "
+ << status.toString();
+ }
+ },
+ [=](const Status& status) { _onEachDBCloneFinish(status, name); }));
+ } catch (...) {
+ // error creating, fails below.
}
- }
- else {
- _setStatus(Status(ErrorCodes::InitialSyncFailure,
- "failed to clone databases due to failed server response."));
- }
- // Move on to the next steps in the process.
- _doNextActions();
- }
+ Status s = dbCloner ? dbCloner->start() : Status(ErrorCodes::UnknownError, "Bad!");
- void DatabasesCloner::_onEachDBCloneFinish(const Status& status, const std::string name) {
- auto clonersLeft = --_clonersActive;
+ if (!s.isOK()) {
+ std::string err = str::stream() << "could not create cloner for database: " << name
+ << " due to: " << s.toString();
+ _setStatus(Status(ErrorCodes::InitialSyncFailure, err));
+ error() << err;
+ break; // exit for_each loop
+ }
- if (status.isOK()) {
- log() << "database clone finished: " << name;
- }
- else {
- log() << "database clone failed due to "
- << status.toString();
- _setStatus(status);
+ // add cloner to list.
+ _databaseCloners.push_back(dbCloner);
}
+ } else {
+ _setStatus(Status(ErrorCodes::InitialSyncFailure,
+ "failed to clone databases due to failed server response."));
+ }
- if (clonersLeft == 0) {
- _active = false;
- // All cloners are done, trigger event.
- log() << "all database clones finished, calling _finishFn";
- _finishFn(_status);
- }
+ // Move on to the next steps in the process.
+ _doNextActions();
+}
- _doNextActions();
- }
+void DatabasesCloner::_onEachDBCloneFinish(const Status& status, const std::string name) {
+ auto clonersLeft = --_clonersActive;
- void DatabasesCloner::_doNextActions() {
- // If we are no longer active or we had an error, stop doing more
- if (!(_active && _status.isOK())) {
- if (!_status.isOK()) {
- // trigger failed state
- _failed();
- }
- return;
- }
+ if (status.isOK()) {
+ log() << "database clone finished: " << name;
+ } else {
+ log() << "database clone failed due to " << status.toString();
+ _setStatus(status);
}
- void DatabasesCloner::_failed() {
- // TODO: cancel outstanding work, like any cloners active
- invariant(_finishFn);
+ if (clonersLeft == 0) {
+ _active = false;
+ // All cloners are done, trigger event.
+ log() << "all database clones finished, calling _finishFn";
_finishFn(_status);
}
- // Data Replicator
- DataReplicator::DataReplicator(DataReplicatorOptions opts,
- ReplicationExecutor* exec,
- ReplicationCoordinator* replCoord)
- : DataReplicator(opts,
+ _doNextActions();
+}
+
+void DatabasesCloner::_doNextActions() {
+ // If we are no longer active or we had an error, stop doing more
+ if (!(_active && _status.isOK())) {
+ if (!_status.isOK()) {
+ // trigger failed state
+ _failed();
+ }
+ return;
+ }
+}
+
+void DatabasesCloner::_failed() {
+ // TODO: cancel outstanding work, like any cloners active
+ invariant(_finishFn);
+ _finishFn(_status);
+}
+
+// Data Replicator
+DataReplicator::DataReplicator(DataReplicatorOptions opts,
+ ReplicationExecutor* exec,
+ ReplicationCoordinator* replCoord)
+ : DataReplicator(
+ opts,
exec,
replCoord,
// TODO: replace this with a method in the replication coordinator.
- [replCoord] (const Timestamp& ts) { replCoord->setMyLastOptime(OpTime(ts, 0)); }) {
- }
+ [replCoord](const Timestamp& ts) { replCoord->setMyLastOptime(OpTime(ts, 0)); }) {}
+
+DataReplicator::DataReplicator(DataReplicatorOptions opts, ReplicationExecutor* exec)
+ : DataReplicator(opts, exec, nullptr, [](const Timestamp& ts) {}) {}
+
+DataReplicator::DataReplicator(DataReplicatorOptions opts,
+ ReplicationExecutor* exec,
+ ReplicationCoordinator* replCoord,
+ OnBatchCompleteFn batchCompletedFn)
+ : _opts(opts),
+ _exec(exec),
+ _replCoord(replCoord),
+ _state(DataReplicatorState::Uninitialized),
+ _fetcherPaused(false),
+ _reporterPaused(false),
+ _applierActive(false),
+ _applierPaused(false),
+ _batchCompletedFn(batchCompletedFn),
+ _oplogBuffer(kOplogBufferSize, &getSize) {}
+
+DataReplicator::~DataReplicator() {
+ DESTRUCTOR_GUARD(_cancelAllHandles_inlock(); _waitOnAll_inlock(););
+}
- DataReplicator::DataReplicator(DataReplicatorOptions opts,
- ReplicationExecutor* exec)
- : DataReplicator(opts, exec, nullptr, [] (const Timestamp& ts) {}) {
+Status DataReplicator::start() {
+ UniqueLock lk(_mutex);
+ if (_state != DataReplicatorState::Uninitialized) {
+ return Status(ErrorCodes::IllegalOperation,
+ str::stream() << "Already started in another state: " << toString(_state));
}
- DataReplicator::DataReplicator(DataReplicatorOptions opts,
- ReplicationExecutor* exec,
- ReplicationCoordinator* replCoord,
- OnBatchCompleteFn batchCompletedFn)
- : _opts(opts),
- _exec(exec),
- _replCoord(replCoord),
- _state(DataReplicatorState::Uninitialized),
- _fetcherPaused(false),
- _reporterPaused(false),
- _applierActive(false),
- _applierPaused(false),
- _batchCompletedFn(batchCompletedFn),
- _oplogBuffer(kOplogBufferSize, &getSize) {
- }
+ _state = DataReplicatorState::Steady;
+ _applierPaused = false;
+ _fetcherPaused = false;
+ _reporterPaused = false;
+ _doNextActions_Steady_inlock();
+ return Status::OK();
+}
- DataReplicator::~DataReplicator() {
- DESTRUCTOR_GUARD(
- _cancelAllHandles_inlock();
- _waitOnAll_inlock();
- );
- }
+Status DataReplicator::shutdown() {
+ return _shutdown();
+}
- Status DataReplicator::start() {
- UniqueLock lk(_mutex);
- if (_state != DataReplicatorState::Uninitialized) {
- return Status(ErrorCodes::IllegalOperation,
- str::stream() << "Already started in another state: "
- << toString(_state));
- }
+Status DataReplicator::pause() {
+ _pauseApplier();
+ return Status::OK();
+}
- _state = DataReplicatorState::Steady;
- _applierPaused = false;
- _fetcherPaused = false;
- _reporterPaused = false;
- _doNextActions_Steady_inlock();
- return Status::OK();
- }
+DataReplicatorState DataReplicator::getState() const {
+ LockGuard lk(_mutex);
+ return _state;
+}
- Status DataReplicator::shutdown() {
- return _shutdown();
- }
+Timestamp DataReplicator::getLastTimestampFetched() const {
+ LockGuard lk(_mutex);
+ return _lastTimestampFetched;
+}
- Status DataReplicator::pause() {
- _pauseApplier();
- return Status::OK();
+std::string DataReplicator::getDiagnosticString() const {
+ LockGuard lk(_mutex);
+ str::stream out;
+ out << "DataReplicator -"
+ << " opts: " << _opts.toString() << " oplogFetcher: " << _fetcher->toString()
+ << " opsBuffered: " << _oplogBuffer.size() << " state: " << toString(_state);
+ switch (_state) {
+ case DataReplicatorState::InitialSync:
+ out << " opsAppied: " << _initialSyncState->appliedOps
+ << " status: " << _initialSyncState->status.toString();
+ break;
+ case DataReplicatorState::Steady:
+ // TODO: add more here
+ break;
+ case DataReplicatorState::Rollback:
+ // TODO: add more here
+ break;
+ default:
+ break;
}
- DataReplicatorState DataReplicator::getState() const {
- LockGuard lk(_mutex);
- return _state;
- }
+ return out;
+}
- Timestamp DataReplicator::getLastTimestampFetched() const {
- LockGuard lk(_mutex);
- return _lastTimestampFetched;
+Status DataReplicator::resume(bool wait) {
+ CBHStatus handle = _exec->scheduleWork(
+ stdx::bind(&DataReplicator::_resumeFinish, this, stdx::placeholders::_1));
+ const Status status = handle.getStatus();
+ if (wait && status.isOK()) {
+ _exec->wait(handle.getValue());
}
+ return status;
+}
- std::string DataReplicator::getDiagnosticString() const {
- LockGuard lk(_mutex);
- str::stream out;
- out << "DataReplicator -"
- << " opts: " << _opts.toString()
- << " oplogFetcher: " << _fetcher->toString()
- << " opsBuffered: " << _oplogBuffer.size()
- << " state: " << toString(_state);
- switch (_state) {
- case DataReplicatorState::InitialSync:
- out << " opsAppied: " << _initialSyncState->appliedOps
- << " status: " << _initialSyncState->status.toString();
- break;
- case DataReplicatorState::Steady:
- // TODO: add more here
- break;
- case DataReplicatorState::Rollback:
- // TODO: add more here
- break;
- default:
- break;
- }
+void DataReplicator::_resumeFinish(CallbackArgs cbData) {
+ UniqueLock lk(_mutex);
+ _fetcherPaused = _applierPaused = false;
+ lk.unlock();
- return out;
- }
+ _doNextActions();
+}
- Status DataReplicator::resume(bool wait) {
- CBHStatus handle = _exec->scheduleWork(stdx::bind(&DataReplicator::_resumeFinish,
- this,
- stdx::placeholders::_1));
- const Status status = handle.getStatus();
- if (wait && status.isOK()) {
- _exec->wait(handle.getValue());
- }
- return status;
- }
+void DataReplicator::_pauseApplier() {
+ LockGuard lk(_mutex);
+ if (_applier)
+ _applier->wait();
+ _applierPaused = true;
+ _applier.reset();
+}
- void DataReplicator::_resumeFinish(CallbackArgs cbData) {
- UniqueLock lk(_mutex);
- _fetcherPaused = _applierPaused = false;
- lk.unlock();
+Timestamp DataReplicator::_applyUntil(Timestamp untilTimestamp) {
+ // TODO: block until all oplog buffer application is done to the given optime
+ return Timestamp();
+}
- _doNextActions();
- }
+Timestamp DataReplicator::_applyUntilAndPause(Timestamp untilTimestamp) {
+ //_run(&_pauseApplier);
+ return _applyUntil(untilTimestamp);
+}
- void DataReplicator::_pauseApplier() {
- LockGuard lk(_mutex);
- if (_applier)
- _applier->wait();
+TimestampStatus DataReplicator::flushAndPause() {
+ //_run(&_pauseApplier);
+ UniqueLock lk(_mutex);
+ if (_applierActive) {
_applierPaused = true;
- _applier.reset();
+ lk.unlock();
+ _applier->wait();
+ lk.lock();
}
+ return TimestampStatus(_lastTimestampApplied);
+}
- Timestamp DataReplicator::_applyUntil(Timestamp untilTimestamp) {
- // TODO: block until all oplog buffer application is done to the given optime
- return Timestamp();
+void DataReplicator::_resetState_inlock(Timestamp lastAppliedOptime) {
+ invariant(!_anyActiveHandles_inlock());
+ _lastTimestampApplied = _lastTimestampFetched = lastAppliedOptime;
+ _oplogBuffer.clear();
+}
+
+void DataReplicator::slavesHaveProgressed() {
+ if (_reporter) {
+ _reporter->trigger();
}
+}
- Timestamp DataReplicator::_applyUntilAndPause(Timestamp untilTimestamp) {
- //_run(&_pauseApplier);
- return _applyUntil(untilTimestamp);
+void DataReplicator::_setInitialSyncStorageInterface(CollectionCloner::StorageInterface* si) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _storage = si;
+ if (_initialSyncState) {
+ _initialSyncState->dbsCloner.setStorageInterface(_storage);
}
+}
- TimestampStatus DataReplicator::flushAndPause() {
- //_run(&_pauseApplier);
- UniqueLock lk(_mutex);
- if (_applierActive) {
- _applierPaused = true;
- lk.unlock();
- _applier->wait();
- lk.lock();
- }
- return TimestampStatus(_lastTimestampApplied);
+TimestampStatus DataReplicator::resync() {
+ _shutdown();
+ // Drop databases and do initialSync();
+ CBHStatus cbh = _exec->scheduleDBWork(
+ [&](const CallbackArgs& cbData) { _storage->dropUserDatabases(cbData.txn); });
+
+ if (!cbh.isOK()) {
+ return TimestampStatus(cbh.getStatus());
}
- void DataReplicator::_resetState_inlock(Timestamp lastAppliedOptime) {
- invariant(!_anyActiveHandles_inlock());
- _lastTimestampApplied = _lastTimestampFetched = lastAppliedOptime;
- _oplogBuffer.clear();
- }
+ _exec->wait(cbh.getValue());
- void DataReplicator::slavesHaveProgressed() {
- if (_reporter) {
- _reporter->trigger();
- }
+ TimestampStatus status = initialSync();
+ if (status.isOK()) {
+ _resetState_inlock(status.getValue());
}
+ return status;
+}
- void DataReplicator::_setInitialSyncStorageInterface(CollectionCloner::StorageInterface* si) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- _storage = si;
- if (_initialSyncState) {
- _initialSyncState->dbsCloner.setStorageInterface(_storage);
+TimestampStatus DataReplicator::initialSync() {
+ Timer t;
+ UniqueLock lk(_mutex);
+ if (_state != DataReplicatorState::Uninitialized) {
+ if (_state == DataReplicatorState::InitialSync)
+ return TimestampStatus(ErrorCodes::InvalidRoleModification,
+ (str::stream() << "Already doing initial sync;try resync"));
+ else {
+ return TimestampStatus(
+ ErrorCodes::AlreadyInitialized,
+ (str::stream() << "Cannot do initial sync in " << toString(_state) << " state."));
}
}
- TimestampStatus DataReplicator::resync() {
- _shutdown();
- // Drop databases and do initialSync();
- CBHStatus cbh = _exec->scheduleDBWork([&](const CallbackArgs& cbData) {
- _storage->dropUserDatabases(cbData.txn);
- });
+ _state = DataReplicatorState::InitialSync;
- if (!cbh.isOK()) {
- return TimestampStatus(cbh.getStatus());
- }
+ // The reporter is paused for the duration of the initial sync, so cancel just in case.
+ if (_reporter) {
+ _reporter->cancel();
+ }
+ _reporterPaused = true;
+ _applierPaused = true;
- _exec->wait(cbh.getValue());
+ // TODO: set minvalid doc initial sync state.
- TimestampStatus status = initialSync();
- if (status.isOK()) {
- _resetState_inlock(status.getValue());
+ const int maxFailedAttempts = 10;
+ int failedAttempts = 0;
+ Status attemptErrorStatus(Status::OK());
+ while (failedAttempts < maxFailedAttempts) {
+ // For testing, we may want to fail if we receive a getmore.
+ if (MONGO_FAIL_POINT(failInitialSyncWithBadHost)) {
+ attemptErrorStatus = Status(ErrorCodes::InvalidSyncSource, "no sync source avail.");
}
- return status;
- }
-
- TimestampStatus DataReplicator::initialSync() {
- Timer t;
- UniqueLock lk(_mutex);
- if (_state != DataReplicatorState::Uninitialized) {
- if (_state == DataReplicatorState::InitialSync)
- return TimestampStatus(ErrorCodes::InvalidRoleModification,
- (str::stream() << "Already doing initial sync;try resync"));
- else {
- return TimestampStatus(ErrorCodes::AlreadyInitialized,
- (str::stream() << "Cannot do initial sync in "
- << toString(_state) << " state."));
+
+ Event initialSyncFinishEvent;
+ if (attemptErrorStatus.isOK() && _syncSource.empty()) {
+ attemptErrorStatus = _ensureGoodSyncSource_inlock();
+ } else if (attemptErrorStatus.isOK()) {
+ StatusWith<Event> status = _exec->makeEvent();
+ if (!status.isOK()) {
+ attemptErrorStatus = status.getStatus();
+ } else {
+ initialSyncFinishEvent = status.getValue();
}
}
- _state = DataReplicatorState::InitialSync;
+ if (attemptErrorStatus.isOK()) {
+ invariant(initialSyncFinishEvent.isValid());
+ _initialSyncState.reset(new InitialSyncState(
+ DatabasesCloner(
+ _exec,
+ _syncSource,
+ stdx::bind(&DataReplicator::_onDataClonerFinish, this, stdx::placeholders::_1)),
+ initialSyncFinishEvent));
- // The reporter is paused for the duration of the initial sync, so cancel just in case.
- if (_reporter) {
- _reporter->cancel();
+ _initialSyncState->dbsCloner.setStorageInterface(_storage);
+ const NamespaceString ns(_opts.remoteOplogNS);
+ TimestampStatus tsStatus =
+ _initialSyncState->getLatestOplogTimestamp(_exec, _syncSource, ns);
+ attemptErrorStatus = tsStatus.getStatus();
+ if (attemptErrorStatus.isOK()) {
+ _initialSyncState->beginTimestamp = tsStatus.getValue();
+ _fetcher.reset(new OplogFetcher(_exec,
+ _initialSyncState->beginTimestamp,
+ _syncSource,
+ _opts.remoteOplogNS,
+ stdx::bind(&DataReplicator::_onOplogFetchFinish,
+ this,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2)));
+ _scheduleFetch_inlock();
+ lk.unlock();
+ _initialSyncState->dbsCloner.start(); // When the cloner is done applier starts.
+ invariant(_initialSyncState->finishEvent.isValid());
+ _exec->waitForEvent(_initialSyncState->finishEvent);
+ attemptErrorStatus = _initialSyncState->status;
+
+ // Re-lock DataReplicator Internals
+ lk.lock();
+ }
}
- _reporterPaused = true;
- _applierPaused = true;
- // TODO: set minvalid doc initial sync state.
+ if (attemptErrorStatus.isOK()) {
+ break; // success
+ }
- const int maxFailedAttempts = 10;
- int failedAttempts = 0;
- Status attemptErrorStatus(Status::OK());
- while (failedAttempts < maxFailedAttempts) {
- // For testing, we may want to fail if we receive a getmore.
- if (MONGO_FAIL_POINT(failInitialSyncWithBadHost)) {
- attemptErrorStatus = Status(ErrorCodes::InvalidSyncSource, "no sync source avail.");
- }
+ ++failedAttempts;
- Event initialSyncFinishEvent;
- if (attemptErrorStatus.isOK() && _syncSource.empty()) {
- attemptErrorStatus = _ensureGoodSyncSource_inlock();
- }
- else if(attemptErrorStatus.isOK()) {
- StatusWith<Event> status = _exec->makeEvent();
- if (!status.isOK()) {
- attemptErrorStatus = status.getStatus();
- } else {
- initialSyncFinishEvent = status.getValue();
- }
- }
+ error() << "Initial sync attempt failed -- attempts left: "
+ << (maxFailedAttempts - failedAttempts) << " cause: " << attemptErrorStatus;
- if (attemptErrorStatus.isOK()) {
- invariant(initialSyncFinishEvent.isValid());
- _initialSyncState.reset(new InitialSyncState(
- DatabasesCloner(_exec,
- _syncSource,
- stdx::bind(&DataReplicator::_onDataClonerFinish,
- this,
- stdx::placeholders::_1)),
- initialSyncFinishEvent));
-
- _initialSyncState->dbsCloner.setStorageInterface(_storage);
- const NamespaceString ns(_opts.remoteOplogNS);
- TimestampStatus tsStatus = _initialSyncState->getLatestOplogTimestamp(
- _exec,
- _syncSource,
- ns);
- attemptErrorStatus = tsStatus.getStatus();
- if (attemptErrorStatus.isOK()) {
- _initialSyncState->beginTimestamp = tsStatus.getValue();
- _fetcher.reset(new OplogFetcher(_exec,
- _initialSyncState->beginTimestamp,
- _syncSource,
- _opts.remoteOplogNS,
- stdx::bind(&DataReplicator::_onOplogFetchFinish,
- this,
- stdx::placeholders::_1,
- stdx::placeholders::_2)));
- _scheduleFetch_inlock();
- lk.unlock();
- _initialSyncState->dbsCloner.start(); // When the cloner is done applier starts.
- invariant(_initialSyncState->finishEvent.isValid());
- _exec->waitForEvent(_initialSyncState->finishEvent);
- attemptErrorStatus = _initialSyncState->status;
-
- // Re-lock DataReplicator Internals
- lk.lock();
- }
- }
+ // Sleep for retry time
+ lk.unlock();
+ sleepmillis(_opts.initialSyncRetryWait.count());
+ lk.lock();
- if (attemptErrorStatus.isOK()) {
- break; // success
- }
+ // No need to print a stack
+ if (failedAttempts >= maxFailedAttempts) {
+ const std::string err =
+ "The maximum number of retries"
+ " have been exhausted for initial sync.";
+ severe() << err;
+ return Status(ErrorCodes::InitialSyncFailure, err);
+ }
+ }
- ++failedAttempts;
+ // Success, cleanup
+ // TODO: re-enable, find blocking call from tests
+ /*
+ _cancelAllHandles_inlock();
+ _waitOnAll_inlock();
- error() << "Initial sync attempt failed -- attempts left: "
- << (maxFailedAttempts - failedAttempts) << " cause: "
- << attemptErrorStatus;
+ _reporterPaused = false;
+ _fetcherPaused = false;
+ _fetcher.reset(nullptr);
+ _tmpFetcher.reset(nullptr);
+ _applierPaused = false;
+ _applier.reset(nullptr);
+ _applierActive = false;
+ _initialSyncState.reset(nullptr);
+ _oplogBuffer.clear();
+ _resetState_inlock(_lastTimestampApplied);
+ */
+ log() << "Initial sync took: " << t.millis() << " milliseconds.";
+ return TimestampStatus(_lastTimestampApplied);
+}
- // Sleep for retry time
- lk.unlock();
- sleepmillis(_opts.initialSyncRetryWait.count());
- lk.lock();
+void DataReplicator::_onDataClonerFinish(const Status& status) {
+ log() << "data clone finished, status: " << status.toString();
+ if (!status.isOK()) {
+ // Iniitial sync failed during cloning of databases
+ _initialSyncState->setStatus(status);
+ _exec->signalEvent(_initialSyncState->finishEvent);
+ return;
+ }
- // No need to print a stack
- if (failedAttempts >= maxFailedAttempts) {
- const std::string err = "The maximum number of retries"
- " have been exhausted for initial sync.";
- severe() << err;
- return Status(ErrorCodes::InitialSyncFailure, err);
- }
+ BSONObj query = BSON("find" << _opts.remoteOplogNS.coll() << "sort" << BSON("$natural" << -1)
+ << "limit" << 1);
+
+ TimestampStatus timestampStatus(ErrorCodes::BadValue, "");
+ _tmpFetcher.reset(new QueryFetcher(_exec,
+ _syncSource,
+ _opts.remoteOplogNS,
+ query,
+ stdx::bind(&DataReplicator::_onApplierReadyStart,
+ this,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2)));
+ Status s = _tmpFetcher->schedule();
+ if (!s.isOK()) {
+ _initialSyncState->setStatus(s);
+ }
+}
+
+void DataReplicator::_onApplierReadyStart(const QueryResponseStatus& fetchResult,
+ NextAction* nextAction) {
+ // Data clone done, move onto apply.
+ TimestampStatus ts(ErrorCodes::OplogStartMissing, "");
+ _initialSyncState->_setTimestampStatus(fetchResult, nextAction, &ts);
+ if (ts.isOK()) {
+ // TODO: set minvalid?
+ LockGuard lk(_mutex);
+ _initialSyncState->stopTimestamp = ts.getValue();
+ if (_lastTimestampApplied < ts.getValue()) {
+ log() << "waiting for applier to run until ts: " << ts.getValue();
}
+ invariant(_applierPaused);
+ _applierPaused = false;
+ _doNextActions_InitialSync_inlock();
+ } else {
+ _initialSyncState->setStatus(ts.getStatus());
+ _doNextActions();
+ }
+}
- // Success, cleanup
- // TODO: re-enable, find blocking call from tests
-/*
- _cancelAllHandles_inlock();
- _waitOnAll_inlock();
+bool DataReplicator::_anyActiveHandles_inlock() const {
+ return _applierActive || (_fetcher && _fetcher->isActive()) ||
+ (_initialSyncState && _initialSyncState->dbsCloner.isActive()) ||
+ (_reporter && _reporter->isActive());
+}
- _reporterPaused = false;
- _fetcherPaused = false;
- _fetcher.reset(nullptr);
- _tmpFetcher.reset(nullptr);
- _applierPaused = false;
- _applier.reset(nullptr);
- _applierActive = false;
- _initialSyncState.reset(nullptr);
- _oplogBuffer.clear();
- _resetState_inlock(_lastTimestampApplied);
-*/
- log() << "Initial sync took: " << t.millis() << " milliseconds.";
- return TimestampStatus(_lastTimestampApplied);
- }
-
- void DataReplicator::_onDataClonerFinish(const Status& status) {
- log() << "data clone finished, status: " << status.toString();
- if (!status.isOK()) {
- // Iniitial sync failed during cloning of databases
- _initialSyncState->setStatus(status);
- _exec->signalEvent(_initialSyncState->finishEvent);
- return;
- }
+void DataReplicator::_cancelAllHandles_inlock() {
+ if (_fetcher)
+ _fetcher->cancel();
+ if (_applier)
+ _applier->cancel();
+ if (_reporter)
+ _reporter->cancel();
+ if (_initialSyncState && _initialSyncState->dbsCloner.isActive())
+ _initialSyncState->dbsCloner.cancel();
+}
- BSONObj query = BSON("find" << _opts.remoteOplogNS.coll() <<
- "sort" << BSON ("$natural" << -1) <<
- "limit" << 1);
-
- TimestampStatus timestampStatus(ErrorCodes::BadValue, "");
- _tmpFetcher.reset(new QueryFetcher(_exec,
- _syncSource,
- _opts.remoteOplogNS,
- query,
- stdx::bind(&DataReplicator::_onApplierReadyStart,
- this,
- stdx::placeholders::_1,
- stdx::placeholders::_2)));
- Status s = _tmpFetcher->schedule();
- if (!s.isOK()) {
- _initialSyncState->setStatus(s);
- }
- }
+void DataReplicator::_waitOnAll_inlock() {
+ if (_fetcher)
+ _fetcher->wait();
+ if (_applier)
+ _applier->wait();
+ if (_reporter)
+ _reporter->wait();
+ if (_initialSyncState)
+ _initialSyncState->dbsCloner.wait();
+}
- void DataReplicator::_onApplierReadyStart(const QueryResponseStatus& fetchResult,
- NextAction* nextAction) {
- // Data clone done, move onto apply.
- TimestampStatus ts(ErrorCodes::OplogStartMissing, "");
- _initialSyncState->_setTimestampStatus(fetchResult, nextAction, &ts);
- if (ts.isOK()) {
- // TODO: set minvalid?
- LockGuard lk(_mutex);
- _initialSyncState->stopTimestamp = ts.getValue();
- if (_lastTimestampApplied < ts.getValue()) {
- log() << "waiting for applier to run until ts: " << ts.getValue();
- }
- invariant(_applierPaused);
- _applierPaused = false;
- _doNextActions_InitialSync_inlock();
- }
- else {
- _initialSyncState->setStatus(ts.getStatus());
- _doNextActions();
+void DataReplicator::_doNextActions() {
+ // Can be in one of 3 main states/modes (DataReplicatiorState):
+ // 1.) Initial Sync
+ // 2.) Rollback
+ // 3.) Steady (Replication)
+
+ // Check for shutdown flag, signal event
+ LockGuard lk(_mutex);
+ if (_onShutdown.isValid()) {
+ if (!_anyActiveHandles_inlock()) {
+ _exec->signalEvent(_onShutdown);
+ _state = DataReplicatorState::Uninitialized;
}
+ return;
}
- bool DataReplicator::_anyActiveHandles_inlock() const {
- return _applierActive ||
- (_fetcher && _fetcher->isActive()) ||
- (_initialSyncState && _initialSyncState->dbsCloner.isActive()) ||
- (_reporter && _reporter->isActive());
+ // Do work for the current state
+ switch (_state) {
+ case DataReplicatorState::Rollback:
+ _doNextActions_Rollback_inlock();
+ break;
+ case DataReplicatorState::InitialSync:
+ _doNextActions_InitialSync_inlock();
+ break;
+ case DataReplicatorState::Steady:
+ _doNextActions_Steady_inlock();
+ break;
+ default:
+ return;
}
- void DataReplicator::_cancelAllHandles_inlock() {
- if (_fetcher)
- _fetcher->cancel();
- if (_applier)
- _applier->cancel();
- if (_reporter)
- _reporter->cancel();
- if (_initialSyncState && _initialSyncState->dbsCloner.isActive())
- _initialSyncState->dbsCloner.cancel();
- }
+ // transition when needed
+ _changeStateIfNeeded();
+}
- void DataReplicator::_waitOnAll_inlock() {
- if (_fetcher)
- _fetcher->wait();
- if (_applier)
- _applier->wait();
- if (_reporter)
- _reporter->wait();
- if (_initialSyncState)
- _initialSyncState->dbsCloner.wait();
+void DataReplicator::_doNextActions_InitialSync_inlock() {
+ if (!_initialSyncState) {
+ // TODO: Error case?, reset to uninit'd
+ _state = DataReplicatorState::Uninitialized;
+ log() << "_initialSyncState, so resetting state to Uninitialized";
+ return;
}
- void DataReplicator::_doNextActions() {
- // Can be in one of 3 main states/modes (DataReplicatiorState):
- // 1.) Initial Sync
- // 2.) Rollback
- // 3.) Steady (Replication)
-
- // Check for shutdown flag, signal event
- LockGuard lk(_mutex);
- if (_onShutdown.isValid()) {
- if(!_anyActiveHandles_inlock()) {
- _exec->signalEvent(_onShutdown);
+ if (!_initialSyncState->dbsCloner.isActive()) {
+ if (!_initialSyncState->dbsCloner.getStatus().isOK()) {
+ // TODO: Initial sync failed
+ } else {
+ if (!_lastTimestampApplied.isNull() &&
+ _lastTimestampApplied >= _initialSyncState->stopTimestamp) {
+ invariant(_initialSyncState->finishEvent.isValid());
+ log() << "Applier done, initial sync done, end timestamp: "
+ << _initialSyncState->stopTimestamp
+ << " , last applier: " << _lastTimestampApplied;
_state = DataReplicatorState::Uninitialized;
- }
- return;
- }
-
- // Do work for the current state
- switch (_state) {
- case DataReplicatorState::Rollback:
- _doNextActions_Rollback_inlock();
- break;
- case DataReplicatorState::InitialSync:
- _doNextActions_InitialSync_inlock();
- break;
- case DataReplicatorState::Steady:
+ _initialSyncState->setStatus(Status::OK());
+ _exec->signalEvent(_initialSyncState->finishEvent);
+ } else {
+ // Run steady state events to fetch/apply.
_doNextActions_Steady_inlock();
- break;
- default:
- return;
+ }
}
-
- // transition when needed
- _changeStateIfNeeded();
}
+}
- void DataReplicator::_doNextActions_InitialSync_inlock() {
- if (!_initialSyncState) {
- // TODO: Error case?, reset to uninit'd
- _state = DataReplicatorState::Uninitialized;
- log() << "_initialSyncState, so resetting state to Uninitialized";
- return;
- }
+void DataReplicator::_doNextActions_Rollback_inlock() {
+ // TODO: check rollback state and do next actions
+ // move from rollback phase to rollback phase via scheduled work in exec
+}
- if (!_initialSyncState->dbsCloner.isActive()) {
- if (!_initialSyncState->dbsCloner.getStatus().isOK()) {
- // TODO: Initial sync failed
- }
- else {
- if (!_lastTimestampApplied.isNull() &&
- _lastTimestampApplied >= _initialSyncState->stopTimestamp) {
- invariant(_initialSyncState->finishEvent.isValid());
- log() << "Applier done, initial sync done, end timestamp: "
- << _initialSyncState->stopTimestamp << " , last applier: "
- << _lastTimestampApplied;
- _state = DataReplicatorState::Uninitialized;
- _initialSyncState->setStatus(Status::OK());
- _exec->signalEvent(_initialSyncState->finishEvent);
- }
- else {
- // Run steady state events to fetch/apply.
- _doNextActions_Steady_inlock();
- }
+void DataReplicator::_doNextActions_Steady_inlock() {
+ // Check sync source is still good.
+ if (_syncSource.empty()) {
+ _syncSource = _replCoord->chooseNewSyncSource();
+ }
+ if (_syncSource.empty()) {
+ // No sync source, reschedule check
+ Date_t when = _exec->now() + _opts.syncSourceRetryWait;
+ // schedule self-callback w/executor
+ // to try to get a new sync source in a bit
+ auto checkSyncSource = [this](const executor::TaskExecutor::CallbackArgs& cba) {
+ if (cba.status.code() == ErrorCodes::CallbackCanceled) {
+ return;
}
+ _doNextActions();
+ };
+ _exec->scheduleWorkAt(when, checkSyncSource);
+ } else {
+ // Check if active fetch, if not start one
+ if (!_fetcher || !_fetcher->isActive()) {
+ _scheduleFetch_inlock();
}
}
- void DataReplicator::_doNextActions_Rollback_inlock() {
- // TODO: check rollback state and do next actions
- // move from rollback phase to rollback phase via scheduled work in exec
+ // Check if no active apply and ops to apply
+ if (!_applierActive && _oplogBuffer.size()) {
+ _scheduleApplyBatch_inlock();
}
- void DataReplicator::_doNextActions_Steady_inlock() {
- // Check sync source is still good.
- if (_syncSource.empty()) {
- _syncSource = _replCoord->chooseNewSyncSource();
- }
- if (_syncSource.empty()) {
- // No sync source, reschedule check
- Date_t when = _exec->now() + _opts.syncSourceRetryWait;
- // schedule self-callback w/executor
- // to try to get a new sync source in a bit
- auto checkSyncSource = [this] (const executor::TaskExecutor::CallbackArgs& cba) {
- if (cba.status.code() == ErrorCodes::CallbackCanceled) {
- return;
- }
- _doNextActions();
- };
- _exec->scheduleWorkAt(when, checkSyncSource);
- } else {
- // Check if active fetch, if not start one
- if (!_fetcher || !_fetcher->isActive()) {
- _scheduleFetch_inlock();
- }
- }
+ if (!_reporterPaused && (!_reporter || !_reporter->getStatus().isOK())) {
+ // TODO get reporter in good shape
+ _reporter.reset(new Reporter(_exec, _replCoord, _syncSource));
+ }
+}
- // Check if no active apply and ops to apply
- if (!_applierActive && _oplogBuffer.size()) {
- _scheduleApplyBatch_inlock();
- }
+Operations DataReplicator::_getNextApplierBatch_inlock() {
+ // Return a new batch of ops to apply.
+ // TODO: limit the batch like SyncTail::tryPopAndWaitForMore
+ Operations ops;
+ BSONObj op;
+ while (_oplogBuffer.tryPop(op)) {
+ ops.push_back(op);
+ }
+ return ops;
+}
- if (!_reporterPaused && (!_reporter || !_reporter->getStatus().isOK())) {
- // TODO get reporter in good shape
- _reporter.reset(new Reporter(_exec, _replCoord, _syncSource));
- }
+void DataReplicator::_onApplyBatchFinish(const CallbackArgs& cbData,
+ const TimestampStatus& ts,
+ const Operations& ops,
+ const size_t numApplied) {
+ invariant(cbData.status.isOK());
+ UniqueLock lk(_mutex);
+ if (_initialSyncState) {
+ _initialSyncState->appliedOps += numApplied;
+ }
+ if (!ts.isOK()) {
+ _handleFailedApplyBatch(ts, ops);
+ return;
}
- Operations DataReplicator::_getNextApplierBatch_inlock() {
- // Return a new batch of ops to apply.
- // TODO: limit the batch like SyncTail::tryPopAndWaitForMore
- Operations ops;
- BSONObj op;
- while(_oplogBuffer.tryPop(op)) {
- ops.push_back(op);
- }
- return ops;
+ _lastTimestampApplied = ts.getValue();
+ lk.unlock();
+
+ if (_batchCompletedFn) {
+ _batchCompletedFn(ts.getValue());
+ }
+ // TODO: move the reporter to the replication coordinator and set _batchCompletedFn to a
+ // function in the replCoord.
+ if (_reporter) {
+ _reporter->trigger();
}
- void DataReplicator::_onApplyBatchFinish(const CallbackArgs& cbData,
- const TimestampStatus& ts,
- const Operations& ops,
- const size_t numApplied) {
- invariant(cbData.status.isOK());
- UniqueLock lk(_mutex);
- if (_initialSyncState) {
- _initialSyncState->appliedOps += numApplied;
- }
- if (!ts.isOK()) {
- _handleFailedApplyBatch(ts, ops);
- return;
- }
+ _doNextActions();
+}
- _lastTimestampApplied = ts.getValue();
- lk.unlock();
+void DataReplicator::_handleFailedApplyBatch(const TimestampStatus& ts, const Operations& ops) {
+ switch (_state) {
+ case DataReplicatorState::InitialSync:
+ // TODO: fetch missing doc, and retry.
+ _scheduleApplyAfterFetch(ops);
+ break;
+ case DataReplicatorState::Rollback:
+ // TODO: nothing?
+ default:
+ // fatal
+ fassert(28666, ts.getStatus());
+ }
+}
- if (_batchCompletedFn) {
- _batchCompletedFn(ts.getValue());
- }
- // TODO: move the reporter to the replication coordinator and set _batchCompletedFn to a
- // function in the replCoord.
- if (_reporter) {
- _reporter->trigger();
- }
+void DataReplicator::_scheduleApplyAfterFetch(const Operations& ops) {
+ ++_initialSyncState->fetchedMissingDocs;
+ // TODO: check collection.isCapped, like SyncTail::getMissingDoc
+ const BSONObj failedOplogEntry = *ops.begin();
+ const BSONElement missingIdElem = failedOplogEntry.getFieldDotted("o2._id");
+ const NamespaceString nss(ops.begin()->getField("ns").str());
+ const BSONObj query = BSON("find" << nss.coll() << "filter" << missingIdElem.wrap());
+ _tmpFetcher.reset(new QueryFetcher(_exec,
+ _syncSource,
+ nss,
+ query,
+ stdx::bind(&DataReplicator::_onMissingFetched,
+ this,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2,
+ ops,
+ nss)));
+ Status s = _tmpFetcher->schedule();
+ if (!s.isOK()) {
+ // record error and take next step based on it.
+ _initialSyncState->setStatus(s);
+ _doNextActions();
+ }
+}
+void DataReplicator::_onMissingFetched(const QueryResponseStatus& fetchResult,
+ Fetcher::NextAction* nextAction,
+ const Operations& ops,
+ const NamespaceString nss) {
+ if (!fetchResult.isOK()) {
+ // TODO: do retries on network issues, like SyncTail::getMissingDoc
+ _initialSyncState->setStatus(fetchResult.getStatus());
_doNextActions();
+ return;
+ } else if (!fetchResult.getValue().documents.size()) {
+ // TODO: skip apply for this doc, like multiInitialSyncApply?
+ _initialSyncState->setStatus(
+ Status(ErrorCodes::InitialSyncFailure, "missing doc not found"));
+ _doNextActions();
+ return;
}
- void DataReplicator::_handleFailedApplyBatch(const TimestampStatus& ts, const Operations& ops) {
- switch (_state) {
- case DataReplicatorState::InitialSync:
- // TODO: fetch missing doc, and retry.
- _scheduleApplyAfterFetch(ops);
- break;
- case DataReplicatorState::Rollback:
- // TODO: nothing?
- default:
- // fatal
- fassert(28666, ts.getStatus());
- }
+ const BSONObj missingDoc = *fetchResult.getValue().documents.begin();
+ Status rs{Status::OK()};
+ auto s = _exec->scheduleDBWork(
+ ([&](const CallbackArgs& cd) { rs = _storage->insertMissingDoc(cd.txn, nss, missingDoc); }),
+ nss,
+ MODE_IX);
+ if (!s.isOK()) {
+ _initialSyncState->setStatus(s);
+ _doNextActions();
+ return;
}
- void DataReplicator::_scheduleApplyAfterFetch(const Operations& ops) {
- ++_initialSyncState->fetchedMissingDocs;
- // TODO: check collection.isCapped, like SyncTail::getMissingDoc
- const BSONObj failedOplogEntry = *ops.begin();
- const BSONElement missingIdElem = failedOplogEntry.getFieldDotted("o2._id");
- const NamespaceString nss(ops.begin()->getField("ns").str());
- const BSONObj query = BSON("find" << nss.coll() << "filter" << missingIdElem.wrap());
- _tmpFetcher.reset(new QueryFetcher(_exec, _syncSource, nss, query,
- stdx::bind(&DataReplicator::_onMissingFetched,
- this,
- stdx::placeholders::_1,
- stdx::placeholders::_2,
- ops,
- nss)));
- Status s = _tmpFetcher->schedule();
- if (!s.isOK()) {
- // record error and take next step based on it.
- _initialSyncState->setStatus(s);
- _doNextActions();
- }
+ _exec->wait(s.getValue());
+ if (!rs.isOK()) {
+ _initialSyncState->setStatus(rs);
+ _doNextActions();
+ return;
}
- void DataReplicator::_onMissingFetched(const QueryResponseStatus& fetchResult,
- Fetcher::NextAction* nextAction,
- const Operations& ops,
- const NamespaceString nss) {
- if (!fetchResult.isOK()) {
- // TODO: do retries on network issues, like SyncTail::getMissingDoc
- _initialSyncState->setStatus(fetchResult.getStatus());
- _doNextActions();
- return;
- } else if (!fetchResult.getValue().documents.size()) {
- // TODO: skip apply for this doc, like multiInitialSyncApply?
- _initialSyncState->setStatus(Status(ErrorCodes::InitialSyncFailure,
- "missing doc not found"));
- _doNextActions();
- return;
- }
+ LockGuard lk(_mutex);
+ auto status = _scheduleApplyBatch_inlock(ops);
+ if (!status.isOK()) {
+ _initialSyncState->setStatus(status);
+ _exec->signalEvent(_initialSyncState->finishEvent);
+ }
+}
- const BSONObj missingDoc = *fetchResult.getValue().documents.begin();
- Status rs{Status::OK()};
- auto s = _exec->scheduleDBWork(([&](const CallbackArgs& cd) {
- rs = _storage->insertMissingDoc(cd.txn, nss, missingDoc);
- }),
- nss,
- MODE_IX);
- if (!s.isOK()) {
- _initialSyncState->setStatus(s);
- _doNextActions();
- return;
- }
+Status DataReplicator::_scheduleApplyBatch() {
+ LockGuard lk(_mutex);
+ return _scheduleApplyBatch_inlock();
+}
- _exec->wait(s.getValue());
- if (!rs.isOK()) {
- _initialSyncState->setStatus(rs);
- _doNextActions();
- return;
- }
+Status DataReplicator::_scheduleApplyBatch_inlock() {
+ if (!_applierPaused && !_applierActive) {
+ _applierActive = true;
+ const Operations ops = _getNextApplierBatch_inlock();
+ invariant(ops.size());
+ invariant(_opts.applierFn);
+ invariant(!(_applier && _applier->isActive()));
+ return _scheduleApplyBatch_inlock(ops);
+ }
+ return Status::OK();
+}
- LockGuard lk(_mutex);
- auto status = _scheduleApplyBatch_inlock(ops);
+Status DataReplicator::_scheduleApplyBatch_inlock(const Operations& ops) {
+ auto lambda = [this](const TimestampStatus& ts, const Operations& theOps) {
+ CBHStatus status = _exec->scheduleWork(stdx::bind(&DataReplicator::_onApplyBatchFinish,
+ this,
+ stdx::placeholders::_1,
+ ts,
+ theOps,
+ theOps.size()));
if (!status.isOK()) {
+ LockGuard lk(_mutex);
_initialSyncState->setStatus(status);
_exec->signalEvent(_initialSyncState->finishEvent);
+ return;
}
- }
+ // block until callback done.
+ _exec->wait(status.getValue());
+ };
- Status DataReplicator::_scheduleApplyBatch() {
- LockGuard lk(_mutex);
- return _scheduleApplyBatch_inlock();
- }
+ _applier.reset(new Applier(_exec, ops, _opts.applierFn, lambda));
+ return _applier->start();
+}
- Status DataReplicator::_scheduleApplyBatch_inlock() {
- if (!_applierPaused && !_applierActive) {
- _applierActive = true;
- const Operations ops = _getNextApplierBatch_inlock();
- invariant(ops.size());
- invariant(_opts.applierFn);
- invariant(!(_applier && _applier->isActive()));
- return _scheduleApplyBatch_inlock(ops);
+Status DataReplicator::_scheduleFetch() {
+ LockGuard lk(_mutex);
+ return _scheduleFetch_inlock();
+}
+
+Status DataReplicator::_ensureGoodSyncSource_inlock() {
+ if (_syncSource.empty()) {
+ if (_replCoord) {
+ _syncSource = _replCoord->chooseNewSyncSource();
+ if (!_syncSource.empty()) {
+ return Status::OK();
+ }
+ } else {
+ _syncSource = _opts.syncSource; // set this back to the options source
}
- return Status::OK();
- }
- Status DataReplicator::_scheduleApplyBatch_inlock(const Operations& ops) {
- auto lambda = [this] (const TimestampStatus& ts, const Operations& theOps) {
- CBHStatus status = _exec->scheduleWork(stdx::bind(&DataReplicator::_onApplyBatchFinish,
- this,
- stdx::placeholders::_1,
- ts,
- theOps,
- theOps.size()));
- if (!status.isOK()) {
- LockGuard lk(_mutex);
- _initialSyncState->setStatus(status);
- _exec->signalEvent(_initialSyncState->finishEvent);
- return;
- }
- // block until callback done.
- _exec->wait(status.getValue());
- };
-
- _applier.reset(new Applier(_exec, ops, _opts.applierFn, lambda));
- return _applier->start();
- }
-
- Status DataReplicator::_scheduleFetch() {
- LockGuard lk(_mutex);
- return _scheduleFetch_inlock();
+ return Status{ErrorCodes::InvalidSyncSource, "No valid sync source."};
}
+ return Status::OK();
+}
- Status DataReplicator::_ensureGoodSyncSource_inlock() {
- if (_syncSource.empty()) {
- if (_replCoord) {
- _syncSource = _replCoord->chooseNewSyncSource();
- if (!_syncSource.empty()) {
- return Status::OK();
- }
- } else {
- _syncSource = _opts.syncSource; // set this back to the options source
+Status DataReplicator::_scheduleFetch_inlock() {
+ if (!_fetcher) {
+ if (!_ensureGoodSyncSource_inlock().isOK()) {
+ auto status = _exec->scheduleWork([this](const CallbackArgs&) { _doNextActions(); });
+ if (!status.isOK()) {
+ return status.getStatus();
}
+ }
- return Status{ErrorCodes::InvalidSyncSource, "No valid sync source."};
+ const auto startOptime =
+ _replCoord ? _replCoord->getMyLastOptime().getTimestamp() : _opts.startOptime;
+ const auto remoteOplogNS = _opts.remoteOplogNS;
+
+ // TODO: add query options await_data, oplog_replay
+ _fetcher.reset(new OplogFetcher(_exec,
+ startOptime,
+ _syncSource,
+ remoteOplogNS,
+ stdx::bind(&DataReplicator::_onOplogFetchFinish,
+ this,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2)));
+ }
+ if (!_fetcher->isActive()) {
+ Status status = _fetcher->schedule();
+ if (!status.isOK()) {
+ return status;
}
- return Status::OK();
}
+ return Status::OK();
+}
- Status DataReplicator::_scheduleFetch_inlock() {
- if (!_fetcher) {
- if (!_ensureGoodSyncSource_inlock().isOK()) {
- auto status = _exec->scheduleWork([this](const CallbackArgs&){ _doNextActions(); });
- if (!status.isOK()) {
- return status.getStatus();
- }
- }
-
- const auto startOptime = _replCoord ? _replCoord->getMyLastOptime().getTimestamp()
- : _opts.startOptime;
- const auto remoteOplogNS = _opts.remoteOplogNS;
+Status DataReplicator::_scheduleReport() {
+ // TODO
+ return Status::OK();
+}
- // TODO: add query options await_data, oplog_replay
- _fetcher.reset(new OplogFetcher(_exec,
- startOptime,
- _syncSource,
- remoteOplogNS,
- stdx::bind(&DataReplicator::_onOplogFetchFinish,
- this,
- stdx::placeholders::_1,
- stdx::placeholders::_2)));
+void DataReplicator::_changeStateIfNeeded() {
+ // TODO
+}
- }
- if (!_fetcher->isActive()) {
- Status status = _fetcher->schedule();
- if (!status.isOK()) {
- return status;
- }
- }
- return Status::OK();
+Status DataReplicator::scheduleShutdown() {
+ auto eventStatus = _exec->makeEvent();
+ if (!eventStatus.isOK()) {
+ return eventStatus.getStatus();
}
- Status DataReplicator::_scheduleReport() {
- // TODO
- return Status::OK();
+ {
+ LockGuard lk(_mutex);
+ invariant(!_onShutdown.isValid());
+ _onShutdown = eventStatus.getValue();
+ _cancelAllHandles_inlock();
}
- void DataReplicator::_changeStateIfNeeded() {
- // TODO
+ // Schedule _doNextActions in case nothing is active to trigger the _onShutdown event.
+ auto scheduleResult = _exec->scheduleWork([this](const CallbackArgs&) { _doNextActions(); });
+ if (scheduleResult.isOK()) {
+ return Status::OK();
}
+ return scheduleResult.getStatus();
+}
- Status DataReplicator::scheduleShutdown() {
- auto eventStatus = _exec->makeEvent();
- if (!eventStatus.isOK()) {
- return eventStatus.getStatus();
- }
-
- {
- LockGuard lk(_mutex);
- invariant(!_onShutdown.isValid());
- _onShutdown = eventStatus.getValue();
- _cancelAllHandles_inlock();
- }
-
- // Schedule _doNextActions in case nothing is active to trigger the _onShutdown event.
- auto scheduleResult = _exec->scheduleWork([this] (const CallbackArgs&) {
- _doNextActions();
- });
- if (scheduleResult.isOK()) {
- return Status::OK();
- }
- return scheduleResult.getStatus();
+void DataReplicator::waitForShutdown() {
+ Event onShutdown;
+ {
+ LockGuard lk(_mutex);
+ invariant(_onShutdown.isValid());
+ onShutdown = _onShutdown;
}
-
- void DataReplicator::waitForShutdown() {
- Event onShutdown;
- {
- LockGuard lk(_mutex);
- invariant(_onShutdown.isValid());
- onShutdown = _onShutdown;
- }
- _exec->waitForEvent(onShutdown);
- {
- LockGuard lk(_mutex);
- invariant(!_fetcher->isActive());
- invariant(!_applierActive);
- invariant(!_reporter->isActive());
- }
+ _exec->waitForEvent(onShutdown);
+ {
+ LockGuard lk(_mutex);
+ invariant(!_fetcher->isActive());
+ invariant(!_applierActive);
+ invariant(!_reporter->isActive());
}
+}
- Status DataReplicator::_shutdown() {
- auto status = scheduleShutdown();
- if (status.isOK()) {
- waitForShutdown();
- }
- return status;
+Status DataReplicator::_shutdown() {
+ auto status = scheduleShutdown();
+ if (status.isOK()) {
+ waitForShutdown();
}
+ return status;
+}
- void DataReplicator::_onOplogFetchFinish(const StatusWith<Fetcher::QueryResponse>& fetchResult,
- Fetcher::NextAction* nextAction) {
- const Status status = fetchResult.getStatus();
- if (status.code() == ErrorCodes::CallbackCanceled)
- return;
- if (status.isOK()) {
- const auto& docs = fetchResult.getValue().documents;
- if (docs.begin() != docs.end()) {
- LockGuard lk(_mutex);
- std::for_each(docs.cbegin(),
- docs.cend(),
- [&](const BSONObj& doc) {
- _oplogBuffer.push(doc);
- });
- auto doc = docs.rbegin();
- BSONElement tsElem(doc->getField("ts"));
- while(tsElem.eoo() && doc != docs.rend()) {
- tsElem = (doc++)->getField("ts");
- }
-
- if (!tsElem.eoo()) {
- _lastTimestampFetched = tsElem.timestamp();
- } else {
- warning() <<
- "Did not find a 'ts' timestamp field in any of the fetched documents";
- }
+void DataReplicator::_onOplogFetchFinish(const StatusWith<Fetcher::QueryResponse>& fetchResult,
+ Fetcher::NextAction* nextAction) {
+ const Status status = fetchResult.getStatus();
+ if (status.code() == ErrorCodes::CallbackCanceled)
+ return;
+ if (status.isOK()) {
+ const auto& docs = fetchResult.getValue().documents;
+ if (docs.begin() != docs.end()) {
+ LockGuard lk(_mutex);
+ std::for_each(
+ docs.cbegin(), docs.cend(), [&](const BSONObj& doc) { _oplogBuffer.push(doc); });
+ auto doc = docs.rbegin();
+ BSONElement tsElem(doc->getField("ts"));
+ while (tsElem.eoo() && doc != docs.rend()) {
+ tsElem = (doc++)->getField("ts");
}
- if (*nextAction == Fetcher::NextAction::kNoAction) {
- // TODO: create new fetcher?, with new query from where we left off -- d'tor fetcher
+ if (!tsElem.eoo()) {
+ _lastTimestampFetched = tsElem.timestamp();
+ } else {
+ warning() << "Did not find a 'ts' timestamp field in any of the fetched documents";
}
}
+ if (*nextAction == Fetcher::NextAction::kNoAction) {
+ // TODO: create new fetcher?, with new query from where we left off -- d'tor fetcher
+ }
+ }
- if (!status.isOK()) {
- // Got an error, now decide what to do...
- switch (status.code()) {
- case ErrorCodes::OplogStartMissing: {
- // possible rollback
- _rollbackCommonOptime = findCommonPoint(_syncSource, _lastTimestampApplied);
- if (_rollbackCommonOptime.isNull()) {
- auto s = _replCoord->setFollowerMode(MemberState::RS_RECOVERING);
- if (!s) {
- error() << "Failed to transition to RECOVERING when "
- "we couldn't find oplog start position ("
- << _fetcher->getStartTimestamp().toString()
- << ") from sync source: "
- << _syncSource.toString();
- }
- Date_t until{_exec->now() +
- _opts.blacklistSyncSourcePenaltyForOplogStartMissing};
- _replCoord->blacklistSyncSource(_syncSource, until);
+ if (!status.isOK()) {
+ // Got an error, now decide what to do...
+ switch (status.code()) {
+ case ErrorCodes::OplogStartMissing: {
+ // possible rollback
+ _rollbackCommonOptime = findCommonPoint(_syncSource, _lastTimestampApplied);
+ if (_rollbackCommonOptime.isNull()) {
+ auto s = _replCoord->setFollowerMode(MemberState::RS_RECOVERING);
+ if (!s) {
+ error() << "Failed to transition to RECOVERING when "
+ "we couldn't find oplog start position ("
+ << _fetcher->getStartTimestamp().toString()
+ << ") from sync source: " << _syncSource.toString();
}
- else {
- // TODO: cleanup state/restart -- set _lastApplied, and other stuff
- }
- break;
- }
- case ErrorCodes::InvalidSyncSource:
- // Error, sync source
- // fallthrough
- default:
Date_t until{_exec->now() +
- _opts.blacklistSyncSourcePenaltyForNetworkConnectionError};
+ _opts.blacklistSyncSourcePenaltyForOplogStartMissing};
_replCoord->blacklistSyncSource(_syncSource, until);
+ } else {
+ // TODO: cleanup state/restart -- set _lastApplied, and other stuff
+ }
+ break;
}
- LockGuard lk(_mutex);
- _syncSource = HostAndPort();
+ case ErrorCodes::InvalidSyncSource:
+ // Error, sync source
+ // fallthrough
+ default:
+ Date_t until{_exec->now() +
+ _opts.blacklistSyncSourcePenaltyForNetworkConnectionError};
+ _replCoord->blacklistSyncSource(_syncSource, until);
}
-
- _doNextActions();
+ LockGuard lk(_mutex);
+ _syncSource = HostAndPort();
}
-} // namespace repl
-} // namespace mongo
+
+ _doNextActions();
+}
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/data_replicator.h b/src/mongo/db/repl/data_replicator.h
index 8f1717db09a..3bfce3e9a93 100644
--- a/src/mongo/db/repl/data_replicator.h
+++ b/src/mongo/db/repl/data_replicator.h
@@ -72,7 +72,7 @@ struct InitialSyncState;
/** State for decision tree */
enum class DataReplicatorState {
- Steady, // Default
+ Steady, // Default
InitialSync,
Rollback,
Uninitialized,
@@ -81,11 +81,7 @@ enum class DataReplicatorState {
std::string toString(DataReplicatorState s);
// TBD -- ignore for now
-enum class DataReplicatorScope {
- ReplicateAll,
- ReplicateDB,
- ReplicateCollection
-};
+enum class DataReplicatorScope { ReplicateAll, ReplicateDB, ReplicateCollection };
struct DataReplicatorOptions {
// Error and retry values
@@ -103,12 +99,11 @@ struct DataReplicatorOptions {
DataReplicatorScope scope = DataReplicatorScope::ReplicateAll;
std::string scopeNS;
BSONObj filterCriteria;
- HostAndPort syncSource; // for use without replCoord -- maybe some kind of rsMonitor/interface
+ HostAndPort syncSource; // for use without replCoord -- maybe some kind of rsMonitor/interface
// TODO: replace with real applier function
- Applier::ApplyOperationFn applierFn = [] (OperationContext*, const BSONObj&) -> Status {
- return Status::OK();
- };
+ Applier::ApplyOperationFn applierFn =
+ [](OperationContext*, const BSONObj&) -> Status { return Status::OK(); };
std::string toString() const {
return str::stream() << "DataReplicatorOptions -- "
@@ -129,7 +124,7 @@ struct DataReplicatorOptions {
class DataReplicator {
public:
/** Function to call when a batch is applied. */
- using OnBatchCompleteFn = stdx::function<void (const Timestamp&)>;
+ using OnBatchCompleteFn = stdx::function<void(const Timestamp&)>;
DataReplicator(DataReplicatorOptions opts,
ReplicationExecutor* exec,
@@ -137,8 +132,7 @@ public:
/**
* Used by non-replication coordinator processes, like sharding.
*/
- DataReplicator(DataReplicatorOptions opts,
- ReplicationExecutor* exec);
+ DataReplicator(DataReplicatorOptions opts, ReplicationExecutor* exec);
/**
* Used for testing.
@@ -165,7 +159,7 @@ public:
void waitForShutdown();
// Resumes apply replication events from the oplog
- Status resume(bool wait=false);
+ Status resume(bool wait = false);
// Pauses replication and application
Status pause();
@@ -189,11 +183,12 @@ public:
// For testing only
void _resetState_inlock(Timestamp lastAppliedOptime);
- void __setSourceForTesting(HostAndPort src) { _syncSource = src; }
+ void __setSourceForTesting(HostAndPort src) {
+ _syncSource = src;
+ }
void _setInitialSyncStorageInterface(CollectionCloner::StorageInterface* si);
private:
-
// Returns OK when there is a good syncSource at _syncSource.
Status _ensureGoodSyncSource_inlock();
@@ -263,39 +258,39 @@ private:
// (I) Independently synchronized, see member variable comment.
// Protects member data of this ReplicationCoordinator.
- mutable stdx::mutex _mutex; // (S)
- DataReplicatorState _state; // (MX)
+ mutable stdx::mutex _mutex; // (S)
+ DataReplicatorState _state; // (MX)
// initial sync state
- std::unique_ptr<InitialSyncState> _initialSyncState; // (M)
- CollectionCloner::StorageInterface* _storage; // (M)
+ std::unique_ptr<InitialSyncState> _initialSyncState; // (M)
+ CollectionCloner::StorageInterface* _storage; // (M)
// set during scheduling and onFinish
- bool _fetcherPaused; // (X)
- std::unique_ptr<OplogFetcher> _fetcher; // (S)
- std::unique_ptr<QueryFetcher> _tmpFetcher; // (S)
+ bool _fetcherPaused; // (X)
+ std::unique_ptr<OplogFetcher> _fetcher; // (S)
+ std::unique_ptr<QueryFetcher> _tmpFetcher; // (S)
- bool _reporterPaused; // (M)
- Handle _reporterHandle; // (M)
- std::unique_ptr<Reporter> _reporter; // (M)
+ bool _reporterPaused; // (M)
+ Handle _reporterHandle; // (M)
+ std::unique_ptr<Reporter> _reporter; // (M)
- bool _applierActive; // (M)
- bool _applierPaused; // (X)
- std::unique_ptr<Applier> _applier; // (M)
- OnBatchCompleteFn _batchCompletedFn; // (M)
+ bool _applierActive; // (M)
+ bool _applierPaused; // (X)
+ std::unique_ptr<Applier> _applier; // (M)
+ OnBatchCompleteFn _batchCompletedFn; // (M)
- HostAndPort _syncSource; // (M)
- Timestamp _lastTimestampFetched; // (MX)
- Timestamp _lastTimestampApplied; // (MX)
- BlockingQueue<BSONObj> _oplogBuffer; // (M)
+ HostAndPort _syncSource; // (M)
+ Timestamp _lastTimestampFetched; // (MX)
+ Timestamp _lastTimestampApplied; // (MX)
+ BlockingQueue<BSONObj> _oplogBuffer; // (M)
// Shutdown
- Event _onShutdown; // (M)
+ Event _onShutdown; // (M)
// Rollback stuff
- Timestamp _rollbackCommonOptime; // (MX)
+ Timestamp _rollbackCommonOptime; // (MX)
};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/data_replicator_test.cpp b/src/mongo/db/repl/data_replicator_test.cpp
index 0b8aa6887f5..de8f2da9f12 100644
--- a/src/mongo/db/repl/data_replicator_test.cpp
+++ b/src/mongo/db/repl/data_replicator_test.cpp
@@ -53,599 +53,590 @@
#include "mongo/unittest/unittest.h"
namespace {
- using namespace mongo;
- using namespace mongo::repl;
- using executor::NetworkInterfaceMock;
- using LockGuard = stdx::lock_guard<stdx::mutex>;
- using UniqueLock = stdx::unique_lock<stdx::mutex>;
- using mutex = stdx::mutex;
-
- ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(configBson));
- ASSERT_OK(config.validate());
- return config;
+using namespace mongo;
+using namespace mongo::repl;
+using executor::NetworkInterfaceMock;
+using LockGuard = stdx::lock_guard<stdx::mutex>;
+using UniqueLock = stdx::unique_lock<stdx::mutex>;
+using mutex = stdx::mutex;
+
+ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(configBson));
+ ASSERT_OK(config.validate());
+ return config;
+}
+const HostAndPort target("localhost", -1);
+
+class DataReplicatorTest : public ReplicationExecutorTest {
+public:
+ DataReplicatorTest() {}
+
+ void postExecutorThreadLaunch() override{};
+
+ void reset() {
+ // clear/reset state
}
- const HostAndPort target("localhost", -1);
- class DataReplicatorTest : public ReplicationExecutorTest {
- public:
-
- DataReplicatorTest() {}
-
- void postExecutorThreadLaunch() override {};
-
- void reset() {
- // clear/reset state
- }
-
- void createDataReplicator(DataReplicatorOptions opts) {
- _dr.reset(new DataReplicator(opts, &(getExecutor()), _repl.get()));
- _dr->__setSourceForTesting(target);
- }
-
- void createDataReplicator(DataReplicatorOptions opts,
- DataReplicator::OnBatchCompleteFn batchCompletedFn) {
- _dr.reset(new DataReplicator(opts, &(getExecutor()), _repl.get(), batchCompletedFn));
- _dr->__setSourceForTesting(target);
- }
-
- void scheduleNetworkResponse(const BSONObj& obj) {
- NetworkInterfaceMock* net = getNet();
- ASSERT_TRUE(net->hasReadyRequests());
- scheduleNetworkResponse(net->getNextReadyRequest(), obj);
- }
-
- void scheduleNetworkResponse(NetworkInterfaceMock::NetworkOperationIterator noi,
- const BSONObj& obj) {
- NetworkInterfaceMock* net = getNet();
- Milliseconds millis(0);
- RemoteCommandResponse response(obj, millis);
- ReplicationExecutor::ResponseStatus responseStatus(response);
- net->scheduleResponse(noi, net->now(), responseStatus);
- }
-
- void scheduleNetworkResponse(ErrorCodes::Error code, const std::string& reason) {
- NetworkInterfaceMock* net = getNet();
- ASSERT_TRUE(net->hasReadyRequests());
- ReplicationExecutor::ResponseStatus responseStatus(code, reason);
- net->scheduleResponse(net->getNextReadyRequest(), net->now(), responseStatus);
- }
-
- void processNetworkResponse(const BSONObj& obj) {
- scheduleNetworkResponse(obj);
- finishProcessingNetworkResponse();
- }
-
- void processNetworkResponse(ErrorCodes::Error code, const std::string& reason) {
- scheduleNetworkResponse(code, reason);
- finishProcessingNetworkResponse();
- }
-
- void finishProcessingNetworkResponse() {
- getNet()->runReadyNetworkOperations();
- ASSERT_FALSE(getNet()->hasReadyRequests());
- }
-
- DataReplicator& getDR() { return *_dr; }
- ReplicationCoordinator& getRepl() { return *_repl; }
-
- protected:
-
- void setUp() override {
- ReplicationExecutorTest::setUp();
- reset();
-
- _settings.replSet = "foo"; // We are a replica set :)
- _repl.reset(new ReplicationCoordinatorMock(_settings));
- launchExecutorThread();
- DataReplicatorOptions options;
- options.initialSyncRetryWait = Milliseconds(0);
- createDataReplicator(options);
- }
+ void createDataReplicator(DataReplicatorOptions opts) {
+ _dr.reset(new DataReplicator(opts, &(getExecutor()), _repl.get()));
+ _dr->__setSourceForTesting(target);
+ }
- void tearDown() override {
- ReplicationExecutorTest::tearDown();
- _dr.reset();
- _repl.reset();
- // Executor may still invoke callback before shutting down.
- }
+ void createDataReplicator(DataReplicatorOptions opts,
+ DataReplicator::OnBatchCompleteFn batchCompletedFn) {
+ _dr.reset(new DataReplicator(opts, &(getExecutor()), _repl.get(), batchCompletedFn));
+ _dr->__setSourceForTesting(target);
+ }
- private:
- std::unique_ptr<DataReplicator> _dr;
- std::unique_ptr<ReplicationCoordinator> _repl;
- ReplSettings _settings;
+ void scheduleNetworkResponse(const BSONObj& obj) {
+ NetworkInterfaceMock* net = getNet();
+ ASSERT_TRUE(net->hasReadyRequests());
+ scheduleNetworkResponse(net->getNextReadyRequest(), obj);
+ }
- };
+ void scheduleNetworkResponse(NetworkInterfaceMock::NetworkOperationIterator noi,
+ const BSONObj& obj) {
+ NetworkInterfaceMock* net = getNet();
+ Milliseconds millis(0);
+ RemoteCommandResponse response(obj, millis);
+ ReplicationExecutor::ResponseStatus responseStatus(response);
+ net->scheduleResponse(noi, net->now(), responseStatus);
+ }
- TEST_F(DataReplicatorTest, CreateDestroy) {
+ void scheduleNetworkResponse(ErrorCodes::Error code, const std::string& reason) {
+ NetworkInterfaceMock* net = getNet();
+ ASSERT_TRUE(net->hasReadyRequests());
+ ReplicationExecutor::ResponseStatus responseStatus(code, reason);
+ net->scheduleResponse(net->getNextReadyRequest(), net->now(), responseStatus);
}
- TEST_F(DataReplicatorTest, StartOk) {
- ASSERT_EQ(getDR().start().code(), ErrorCodes::OK);
+ void processNetworkResponse(const BSONObj& obj) {
+ scheduleNetworkResponse(obj);
+ finishProcessingNetworkResponse();
}
- TEST_F(DataReplicatorTest, CannotInitialSyncAfterStart) {
- ASSERT_EQ(getDR().start().code(), ErrorCodes::OK);
- ASSERT_EQ(getDR().initialSync(), ErrorCodes::AlreadyInitialized);
+ void processNetworkResponse(ErrorCodes::Error code, const std::string& reason) {
+ scheduleNetworkResponse(code, reason);
+ finishProcessingNetworkResponse();
}
- // Used to run a Initial Sync in a separate thread, to avoid blocking test execution.
- class InitialSyncBackgroundRunner {
- public:
+ void finishProcessingNetworkResponse() {
+ getNet()->runReadyNetworkOperations();
+ ASSERT_FALSE(getNet()->hasReadyRequests());
+ }
- InitialSyncBackgroundRunner(DataReplicator* dr) :
- _dr(dr),
- _result(Status(ErrorCodes::BadValue, "failed to set status")) {}
+ DataReplicator& getDR() {
+ return *_dr;
+ }
+ ReplicationCoordinator& getRepl() {
+ return *_repl;
+ }
- // Could block if _sgr has not finished
- TimestampStatus getResult() {
- _thread->join();
- return _result;
- }
+protected:
+ void setUp() override {
+ ReplicationExecutorTest::setUp();
+ reset();
+
+ _settings.replSet = "foo"; // We are a replica set :)
+ _repl.reset(new ReplicationCoordinatorMock(_settings));
+ launchExecutorThread();
+ DataReplicatorOptions options;
+ options.initialSyncRetryWait = Milliseconds(0);
+ createDataReplicator(options);
+ }
- void run() {
- _thread.reset(new stdx::thread(stdx::bind(&InitialSyncBackgroundRunner::_run, this)));
- sleepmillis(2); // sleep to let new thread run initialSync so it schedules work
- }
+ void tearDown() override {
+ ReplicationExecutorTest::tearDown();
+ _dr.reset();
+ _repl.reset();
+ // Executor may still invoke callback before shutting down.
+ }
- private:
+private:
+ std::unique_ptr<DataReplicator> _dr;
+ std::unique_ptr<ReplicationCoordinator> _repl;
+ ReplSettings _settings;
+};
+
+TEST_F(DataReplicatorTest, CreateDestroy) {}
+
+TEST_F(DataReplicatorTest, StartOk) {
+ ASSERT_EQ(getDR().start().code(), ErrorCodes::OK);
+}
+
+TEST_F(DataReplicatorTest, CannotInitialSyncAfterStart) {
+ ASSERT_EQ(getDR().start().code(), ErrorCodes::OK);
+ ASSERT_EQ(getDR().initialSync(), ErrorCodes::AlreadyInitialized);
+}
+
+// Used to run a Initial Sync in a separate thread, to avoid blocking test execution.
+class InitialSyncBackgroundRunner {
+public:
+ InitialSyncBackgroundRunner(DataReplicator* dr)
+ : _dr(dr), _result(Status(ErrorCodes::BadValue, "failed to set status")) {}
+
+ // Could block if _sgr has not finished
+ TimestampStatus getResult() {
+ _thread->join();
+ return _result;
+ }
- void _run() {
- setThreadName("InitialSyncRunner");
- log() << "starting initial sync";
- _result = _dr->initialSync(); // blocking
- }
+ void run() {
+ _thread.reset(new stdx::thread(stdx::bind(&InitialSyncBackgroundRunner::_run, this)));
+ sleepmillis(2); // sleep to let new thread run initialSync so it schedules work
+ }
- DataReplicator* _dr;
- TimestampStatus _result;
- std::unique_ptr<stdx::thread> _thread;
- };
+private:
+ void _run() {
+ setThreadName("InitialSyncRunner");
+ log() << "starting initial sync";
+ _result = _dr->initialSync(); // blocking
+ }
- class InitialSyncTest : public DataReplicatorTest {
- public:
- InitialSyncTest()
- : _insertCollectionFn([&](OperationContext* txn,
- const NamespaceString& theNss,
- const std::vector<BSONObj>& theDocuments) {
- log() << "insertDoc for " << theNss.toString();
- LockGuard lk(_collectionCountMutex);
- ++(_collectionCounts[theNss.toString()]);
- return Status::OK();
- }),
- _beginCollectionFn([&](OperationContext* txn,
- const NamespaceString& theNss,
- const CollectionOptions& theOptions,
- const std::vector<BSONObj>& theIndexSpecs) {
- log() << "beginCollection for " << theNss.toString();
- LockGuard lk(_collectionCountMutex);
- _collectionCounts[theNss.toString()] = 0;
- return Status::OK();
- }) {};
-
- protected:
-
- void setStorageFuncs(ClonerStorageInterfaceMock::InsertCollectionFn ins,
- ClonerStorageInterfaceMock::BeginCollectionFn beg) {
- _insertCollectionFn = ins;
- _beginCollectionFn = beg;
- }
+ DataReplicator* _dr;
+ TimestampStatus _result;
+ std::unique_ptr<stdx::thread> _thread;
+};
+
+class InitialSyncTest : public DataReplicatorTest {
+public:
+ InitialSyncTest()
+ : _insertCollectionFn([&](OperationContext* txn,
+ const NamespaceString& theNss,
+ const std::vector<BSONObj>& theDocuments) {
+ log() << "insertDoc for " << theNss.toString();
+ LockGuard lk(_collectionCountMutex);
+ ++(_collectionCounts[theNss.toString()]);
+ return Status::OK();
+ }),
+ _beginCollectionFn([&](OperationContext* txn,
+ const NamespaceString& theNss,
+ const CollectionOptions& theOptions,
+ const std::vector<BSONObj>& theIndexSpecs) {
+ log() << "beginCollection for " << theNss.toString();
+ LockGuard lk(_collectionCountMutex);
+ _collectionCounts[theNss.toString()] = 0;
+ return Status::OK();
+ }){};
+
+protected:
+ void setStorageFuncs(ClonerStorageInterfaceMock::InsertCollectionFn ins,
+ ClonerStorageInterfaceMock::BeginCollectionFn beg) {
+ _insertCollectionFn = ins;
+ _beginCollectionFn = beg;
+ }
- void setResponses(std::vector<BSONObj> resps) {
- _responses = resps;
- }
+ void setResponses(std::vector<BSONObj> resps) {
+ _responses = resps;
+ }
- void startSync() {
- DataReplicator* dr = &(getDR());
+ void startSync() {
+ DataReplicator* dr = &(getDR());
- _storage.beginCollectionFn = _beginCollectionFn;
- _storage.insertDocumentsFn = _insertCollectionFn;
- _storage.insertMissingDocFn = [&] (OperationContext* txn,
- const NamespaceString& nss,
- const BSONObj& doc) {
+ _storage.beginCollectionFn = _beginCollectionFn;
+ _storage.insertDocumentsFn = _insertCollectionFn;
+ _storage.insertMissingDocFn =
+ [&](OperationContext* txn, const NamespaceString& nss, const BSONObj& doc) {
return Status::OK();
};
- dr->_setInitialSyncStorageInterface(&_storage);
- _isbr.reset(new InitialSyncBackgroundRunner(dr));
- _isbr->run();
- }
+ dr->_setInitialSyncStorageInterface(&_storage);
+ _isbr.reset(new InitialSyncBackgroundRunner(dr));
+ _isbr->run();
+ }
- void playResponses() {
- // TODO: Handle network responses
- NetworkInterfaceMock* net = getNet();
- int processedRequests(0);
- const int expectedResponses(_responses.size());
-
- //counter for oplog entries
- int c(0);
- while (true) {
- net->enterNetwork();
- if (!net->hasReadyRequests() && processedRequests < expectedResponses) {
- net->exitNetwork();
- continue;
- }
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
-
- const BSONObj reqBSON = noi->getRequest().cmdObj;
- const BSONElement cmdElem = reqBSON.firstElement();
- const bool isGetMore =
- cmdElem.fieldNameStringData().equalCaseInsensitive("getmore");
- const long long cursorId = cmdElem.numberLong();
- if (isGetMore && cursorId == 1LL) {
- // process getmore requests from the oplog fetcher
- auto respBSON = fromjson(
- str::stream() << "{ok:1, cursor:{id:1, ns:'local.oplog.rs', nextBatch:["
- "{ts:Timestamp(" << ++c << ",1), h:1, ns:'test.a', v:2, op:'u', o2:{_id:"
- << c << "}, o:{$set:{a:1}}}"
- "]}}");
- net->scheduleResponse(noi,
- net->now(),
- ResponseStatus(RemoteCommandResponse(respBSON,
- Milliseconds(10))));
- net->runReadyNetworkOperations();
- net->exitNetwork();
- continue;
- }
- else if (isGetMore) {
- // TODO: return more data
- }
-
- // process fixed set of responses
- log() << "processing network request: "
- << noi->getRequest().dbname << "." << noi->getRequest().cmdObj.toString();
- net->scheduleResponse(noi,
- net->now(),
- ResponseStatus(
- RemoteCommandResponse(_responses[processedRequests],
- Milliseconds(10))));
+ void playResponses() {
+ // TODO: Handle network responses
+ NetworkInterfaceMock* net = getNet();
+ int processedRequests(0);
+ const int expectedResponses(_responses.size());
+
+ // counter for oplog entries
+ int c(0);
+ while (true) {
+ net->enterNetwork();
+ if (!net->hasReadyRequests() && processedRequests < expectedResponses) {
+ net->exitNetwork();
+ continue;
+ }
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+
+ const BSONObj reqBSON = noi->getRequest().cmdObj;
+ const BSONElement cmdElem = reqBSON.firstElement();
+ const bool isGetMore = cmdElem.fieldNameStringData().equalCaseInsensitive("getmore");
+ const long long cursorId = cmdElem.numberLong();
+ if (isGetMore && cursorId == 1LL) {
+ // process getmore requests from the oplog fetcher
+ auto respBSON = fromjson(str::stream()
+ << "{ok:1, cursor:{id:1, ns:'local.oplog.rs', nextBatch:["
+ "{ts:Timestamp(" << ++c
+ << ",1), h:1, ns:'test.a', v:2, op:'u', o2:{_id:" << c
+ << "}, o:{$set:{a:1}}}"
+ "]}}");
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ ResponseStatus(RemoteCommandResponse(respBSON, Milliseconds(10))));
net->runReadyNetworkOperations();
net->exitNetwork();
- if (++processedRequests >= expectedResponses) {
- log() << "done processing expected requests ";
- break; // once we have processed all requests, continue;
- }
+ continue;
+ } else if (isGetMore) {
+ // TODO: return more data
}
- net->enterNetwork();
- if (net->hasReadyRequests()) {
- log() << "There are unexpected requests left";
- log() << "next cmd: " << net->getNextReadyRequest()->getRequest().cmdObj.toString();
- ASSERT_FALSE(net->hasReadyRequests());
- }
+ // process fixed set of responses
+ log() << "processing network request: " << noi->getRequest().dbname << "."
+ << noi->getRequest().cmdObj.toString();
+ net->scheduleResponse(noi,
+ net->now(),
+ ResponseStatus(RemoteCommandResponse(
+ _responses[processedRequests], Milliseconds(10))));
+ net->runReadyNetworkOperations();
net->exitNetwork();
+ if (++processedRequests >= expectedResponses) {
+ log() << "done processing expected requests ";
+ break; // once we have processed all requests, continue;
+ }
}
- void verifySync(Status s = Status::OK()) {
- verifySync(_isbr->getResult().getStatus().code());
- }
-
- void verifySync(ErrorCodes::Error code) {
- // Check result
- ASSERT_EQ(_isbr->getResult().getStatus().code(), code) << "status codes differ";
- }
-
- std::map<std::string, int> getLocalCollectionCounts() {
- return _collectionCounts;
+ net->enterNetwork();
+ if (net->hasReadyRequests()) {
+ log() << "There are unexpected requests left";
+ log() << "next cmd: " << net->getNextReadyRequest()->getRequest().cmdObj.toString();
+ ASSERT_FALSE(net->hasReadyRequests());
}
-
- private:
- ClonerStorageInterfaceMock::InsertCollectionFn _insertCollectionFn;
- ClonerStorageInterfaceMock::BeginCollectionFn _beginCollectionFn;
- std::vector<BSONObj> _responses;
- std::unique_ptr<InitialSyncBackgroundRunner> _isbr;
- std::map<std::string, int> _collectionCounts; // counts of inserts during cloning
- mutex _collectionCountMutex; // used to protect the collectionCount map
- ClonerStorageInterfaceMock _storage;
- };
-
- TEST_F(InitialSyncTest, Complete) {
- /**
- * Initial Sync will issue these query/commands
- * - startTS = oplog.rs->find().sort({$natural:-1}).limit(-1).next()["ts"]
- * - listDatabases (foreach db do below)
- * -- cloneDatabase (see DatabaseCloner tests).
- * - endTS = oplog.rs->find().sort({$natural:-1}).limit(-1).next()["ts"]
- * - ops = oplog.rs->find({ts:{$gte: startTS}}) (foreach op)
- * -- if local doc is missing, getCollection(op.ns).findOne(_id:op.o2._id)
- * - if any retries were done in the previous loop, endTS query again for minvalid
- *
- */
-
- const std::vector<BSONObj> responses = {
- // get latest oplog ts
- fromjson(
- "{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch:["
- "{ts:Timestamp(1,1), h:1, ns:'a.a', v:2, op:'i', o:{_id:1, a:1}}"
- "]}}"),
- // oplog fetcher find
- fromjson(
- "{ok:1, cursor:{id:1, ns:'local.oplog.rs', firstBatch:["
- "{ts:Timestamp(1,1), h:1, ns:'a.a', v:2, op:'i', o:{_id:1, a:1}}"
- "]}}"),
-// Clone Start
- // listDatabases
- fromjson("{ok:1, databases:[{name:'a'}]}"),
- // listCollections for "a"
- fromjson(
- "{ok:1, cursor:{id:0, ns:'a.$cmd.listCollections', firstBatch:["
- "{name:'a', options:{}} "
- "]}}"),
- // listIndexes:a
- fromjson(
- "{ok:1, cursor:{id:0, ns:'a.$cmd.listIndexes.a', firstBatch:["
- "{v:1, key:{_id:1}, name:'_id_', ns:'a.a'}"
- "]}}"),
- // find:a
- fromjson(
- "{ok:1, cursor:{id:0, ns:'a.a', firstBatch:["
- "{_id:1, a:1} "
- "]}}"),
-// Clone Done
- // get latest oplog ts
- fromjson(
- "{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch:["
- "{ts:Timestamp(2,2), h:1, ns:'b.c', v:2, op:'i', o:{_id:1, c:1}}"
- "]}}"),
-// Applier starts ...
- };
- startSync();
- setResponses(responses);
- playResponses();
- verifySync();
+ net->exitNetwork();
}
- TEST_F(InitialSyncTest, MissingDocOnApplyCompletes) {
-
- DataReplicatorOptions opts;
- int applyCounter{0};
- opts.applierFn = [&] (OperationContext* txn, const BSONObj& op) {
- if (++applyCounter == 1) {
- return Status(ErrorCodes::NoMatchingDocument, "failed: missing doc.");
- }
- return Status::OK();
- };
- createDataReplicator(opts);
-
- const std::vector<BSONObj> responses = {
- // get latest oplog ts
- fromjson(
- "{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch:["
- "{ts:Timestamp(1,1), h:1, ns:'a.a', v:2, op:'i', o:{_id:1, a:1}}"
- "]}}"),
- // oplog fetcher find
- fromjson(
- "{ok:1, cursor:{id:1, ns:'local.oplog.rs', firstBatch:["
- "{ts:Timestamp(1,1), h:1, ns:'a.a', v:2, op:'u', o2:{_id:1}, o:{$set:{a:1}}}"
- "]}}"),
-// Clone Start
- // listDatabases
- fromjson("{ok:1, databases:[{name:'a'}]}"),
- // listCollections for "a"
- fromjson(
- "{ok:1, cursor:{id:0, ns:'a.$cmd.listCollections', firstBatch:["
- "{name:'a', options:{}} "
- "]}}"),
- // listIndexes:a
- fromjson(
- "{ok:1, cursor:{id:0, ns:'a.$cmd.listIndexes.a', firstBatch:["
- "{v:1, key:{_id:1}, name:'_id_', ns:'a.a'}"
- "]}}"),
- // find:a -- empty
- fromjson(
- "{ok:1, cursor:{id:0, ns:'a.a', firstBatch:[]}}"),
-// Clone Done
- // get latest oplog ts
- fromjson(
- "{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch:["
- "{ts:Timestamp(2,2), h:1, ns:'b.c', v:2, op:'i', o:{_id:1, c:1}}"
- "]}}"),
-// Applier starts ...
- // missing doc fetch -- find:a {_id:1}
- fromjson(
- "{ok:1, cursor:{id:0, ns:'a.a', firstBatch:["
- "{_id:1, a:1} "
- "]}}"),
- };
- startSync();
- setResponses(responses);
- playResponses();
- verifySync(ErrorCodes::OK);
+ void verifySync(Status s = Status::OK()) {
+ verifySync(_isbr->getResult().getStatus().code());
}
- TEST_F(InitialSyncTest, Failpoint) {
- mongo::getGlobalFailPointRegistry()->
- getFailPoint("failInitialSyncWithBadHost")->
- setMode(FailPoint::alwaysOn);
-
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ));
-
- ReplicaSetConfig config = assertMakeRSConfig(configObj);
- Timestamp time1(100, 1);
- OpTime opTime1(time1, OpTime::kDefaultTerm);
- getRepl().setMyLastOptime(opTime1);
- ASSERT(getRepl().setFollowerMode(MemberState::RS_SECONDARY));
-
- DataReplicator* dr = &(getDR());
- InitialSyncBackgroundRunner isbr(dr);
- isbr.run();
- ASSERT_EQ(isbr.getResult().getStatus().code(), ErrorCodes::InitialSyncFailure);
-
- mongo::getGlobalFailPointRegistry()->
- getFailPoint("failInitialSyncWithBadHost")->
- setMode(FailPoint::off);
+ void verifySync(ErrorCodes::Error code) {
+ // Check result
+ ASSERT_EQ(_isbr->getResult().getStatus().code(), code) << "status codes differ";
}
- TEST_F(InitialSyncTest, FailsOnClone) {
- const std::vector<BSONObj> responses = {
- // get latest oplog ts
- fromjson(
- "{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch:["
- "{ts:Timestamp(1,1), h:1, ns:'a.a', v:2, op:'i', o:{_id:1, a:1}}"
- "]}}"),
- // oplog fetcher find
- fromjson(
- "{ok:1, cursor:{id:1, ns:'local.oplog.rs', firstBatch:["
- "{ts:Timestamp(1,1), h:1, ns:'a.a', v:2, op:'i', o:{_id:1, a:1}}"
- "]}}"),
-// Clone Start
- // listDatabases
- fromjson("{ok:0}")
- };
- startSync();
- setResponses(responses);
- playResponses();
- verifySync(ErrorCodes::InitialSyncFailure);
+ std::map<std::string, int> getLocalCollectionCounts() {
+ return _collectionCounts;
}
- class SteadyStateTest : public DataReplicatorTest {
- protected:
- void _testOplogStartMissing(const BSONObj& oplogFetcherResponse) {
- DataReplicator& dr = getDR();
- auto net = getNet();
- net->enterNetwork();
- ASSERT_OK(dr.start());
-
- ASSERT_TRUE(net->hasReadyRequests());
- auto noi = net->getNextReadyRequest();
- scheduleNetworkResponse(noi, oplogFetcherResponse);
- net->runReadyNetworkOperations();
- ASSERT_EQUALS(MemberState(MemberState::RS_RECOVERING).toString(),
- getRepl().getMemberState().toString());
+private:
+ ClonerStorageInterfaceMock::InsertCollectionFn _insertCollectionFn;
+ ClonerStorageInterfaceMock::BeginCollectionFn _beginCollectionFn;
+ std::vector<BSONObj> _responses;
+ std::unique_ptr<InitialSyncBackgroundRunner> _isbr;
+ std::map<std::string, int> _collectionCounts; // counts of inserts during cloning
+ mutex _collectionCountMutex; // used to protect the collectionCount map
+ ClonerStorageInterfaceMock _storage;
+};
+
+TEST_F(InitialSyncTest, Complete) {
+ /**
+ * Initial Sync will issue these query/commands
+ * - startTS = oplog.rs->find().sort({$natural:-1}).limit(-1).next()["ts"]
+ * - listDatabases (foreach db do below)
+ * -- cloneDatabase (see DatabaseCloner tests).
+ * - endTS = oplog.rs->find().sort({$natural:-1}).limit(-1).next()["ts"]
+ * - ops = oplog.rs->find({ts:{$gte: startTS}}) (foreach op)
+ * -- if local doc is missing, getCollection(op.ns).findOne(_id:op.o2._id)
+ * - if any retries were done in the previous loop, endTS query again for minvalid
+ *
+ */
+
+ const std::vector<BSONObj> responses = {
+ // get latest oplog ts
+ fromjson(
+ "{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch:["
+ "{ts:Timestamp(1,1), h:1, ns:'a.a', v:2, op:'i', o:{_id:1, a:1}}"
+ "]}}"),
+ // oplog fetcher find
+ fromjson(
+ "{ok:1, cursor:{id:1, ns:'local.oplog.rs', firstBatch:["
+ "{ts:Timestamp(1,1), h:1, ns:'a.a', v:2, op:'i', o:{_id:1, a:1}}"
+ "]}}"),
+ // Clone Start
+ // listDatabases
+ fromjson("{ok:1, databases:[{name:'a'}]}"),
+ // listCollections for "a"
+ fromjson(
+ "{ok:1, cursor:{id:0, ns:'a.$cmd.listCollections', firstBatch:["
+ "{name:'a', options:{}} "
+ "]}}"),
+ // listIndexes:a
+ fromjson(
+ "{ok:1, cursor:{id:0, ns:'a.$cmd.listIndexes.a', firstBatch:["
+ "{v:1, key:{_id:1}, name:'_id_', ns:'a.a'}"
+ "]}}"),
+ // find:a
+ fromjson(
+ "{ok:1, cursor:{id:0, ns:'a.a', firstBatch:["
+ "{_id:1, a:1} "
+ "]}}"),
+ // Clone Done
+ // get latest oplog ts
+ fromjson(
+ "{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch:["
+ "{ts:Timestamp(2,2), h:1, ns:'b.c', v:2, op:'i', o:{_id:1, c:1}}"
+ "]}}"),
+ // Applier starts ...
+ };
+ startSync();
+ setResponses(responses);
+ playResponses();
+ verifySync();
+}
+
+TEST_F(InitialSyncTest, MissingDocOnApplyCompletes) {
+ DataReplicatorOptions opts;
+ int applyCounter{0};
+ opts.applierFn = [&](OperationContext* txn, const BSONObj& op) {
+ if (++applyCounter == 1) {
+ return Status(ErrorCodes::NoMatchingDocument, "failed: missing doc.");
}
-
+ return Status::OK();
};
-
- TEST_F(SteadyStateTest, StartWhenInSteadyState) {
- DataReplicator& dr = getDR();
- ASSERT_EQUALS(toString(DataReplicatorState::Uninitialized), toString(dr.getState()));
- ASSERT_OK(dr.start());
- ASSERT_EQUALS(toString(DataReplicatorState::Steady), toString(dr.getState()));
- ASSERT_EQUALS(ErrorCodes::IllegalOperation, dr.start().code());
- }
-
- TEST_F(SteadyStateTest, ShutdownAfterStart) {
+ createDataReplicator(opts);
+
+ const std::vector<BSONObj> responses = {
+ // get latest oplog ts
+ fromjson(
+ "{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch:["
+ "{ts:Timestamp(1,1), h:1, ns:'a.a', v:2, op:'i', o:{_id:1, a:1}}"
+ "]}}"),
+ // oplog fetcher find
+ fromjson(
+ "{ok:1, cursor:{id:1, ns:'local.oplog.rs', firstBatch:["
+ "{ts:Timestamp(1,1), h:1, ns:'a.a', v:2, op:'u', o2:{_id:1}, o:{$set:{a:1}}}"
+ "]}}"),
+ // Clone Start
+ // listDatabases
+ fromjson("{ok:1, databases:[{name:'a'}]}"),
+ // listCollections for "a"
+ fromjson(
+ "{ok:1, cursor:{id:0, ns:'a.$cmd.listCollections', firstBatch:["
+ "{name:'a', options:{}} "
+ "]}}"),
+ // listIndexes:a
+ fromjson(
+ "{ok:1, cursor:{id:0, ns:'a.$cmd.listIndexes.a', firstBatch:["
+ "{v:1, key:{_id:1}, name:'_id_', ns:'a.a'}"
+ "]}}"),
+ // find:a -- empty
+ fromjson("{ok:1, cursor:{id:0, ns:'a.a', firstBatch:[]}}"),
+ // Clone Done
+ // get latest oplog ts
+ fromjson(
+ "{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch:["
+ "{ts:Timestamp(2,2), h:1, ns:'b.c', v:2, op:'i', o:{_id:1, c:1}}"
+ "]}}"),
+ // Applier starts ...
+ // missing doc fetch -- find:a {_id:1}
+ fromjson(
+ "{ok:1, cursor:{id:0, ns:'a.a', firstBatch:["
+ "{_id:1, a:1} "
+ "]}}"),
+ };
+ startSync();
+ setResponses(responses);
+ playResponses();
+ verifySync(ErrorCodes::OK);
+}
+
+TEST_F(InitialSyncTest, Failpoint) {
+ mongo::getGlobalFailPointRegistry()
+ ->getFailPoint("failInitialSyncWithBadHost")
+ ->setMode(FailPoint::alwaysOn);
+
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")));
+
+ ReplicaSetConfig config = assertMakeRSConfig(configObj);
+ Timestamp time1(100, 1);
+ OpTime opTime1(time1, OpTime::kDefaultTerm);
+ getRepl().setMyLastOptime(opTime1);
+ ASSERT(getRepl().setFollowerMode(MemberState::RS_SECONDARY));
+
+ DataReplicator* dr = &(getDR());
+ InitialSyncBackgroundRunner isbr(dr);
+ isbr.run();
+ ASSERT_EQ(isbr.getResult().getStatus().code(), ErrorCodes::InitialSyncFailure);
+
+ mongo::getGlobalFailPointRegistry()
+ ->getFailPoint("failInitialSyncWithBadHost")
+ ->setMode(FailPoint::off);
+}
+
+TEST_F(InitialSyncTest, FailsOnClone) {
+ const std::vector<BSONObj> responses = {
+ // get latest oplog ts
+ fromjson(
+ "{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch:["
+ "{ts:Timestamp(1,1), h:1, ns:'a.a', v:2, op:'i', o:{_id:1, a:1}}"
+ "]}}"),
+ // oplog fetcher find
+ fromjson(
+ "{ok:1, cursor:{id:1, ns:'local.oplog.rs', firstBatch:["
+ "{ts:Timestamp(1,1), h:1, ns:'a.a', v:2, op:'i', o:{_id:1, a:1}}"
+ "]}}"),
+ // Clone Start
+ // listDatabases
+ fromjson("{ok:0}")};
+ startSync();
+ setResponses(responses);
+ playResponses();
+ verifySync(ErrorCodes::InitialSyncFailure);
+}
+
+class SteadyStateTest : public DataReplicatorTest {
+protected:
+ void _testOplogStartMissing(const BSONObj& oplogFetcherResponse) {
DataReplicator& dr = getDR();
- ASSERT_EQUALS(toString(DataReplicatorState::Uninitialized), toString(dr.getState()));
auto net = getNet();
net->enterNetwork();
ASSERT_OK(dr.start());
- ASSERT_TRUE(net->hasReadyRequests());
- getExecutor().shutdown();
- ASSERT_EQUALS(toString(DataReplicatorState::Steady), toString(dr.getState()));
- ASSERT_EQUALS(ErrorCodes::IllegalOperation, dr.start().code());
- }
- TEST_F(SteadyStateTest, RequestShutdownAfterStart) {
- DataReplicator& dr = getDR();
- ASSERT_EQUALS(toString(DataReplicatorState::Uninitialized), toString(dr.getState()));
- auto net = getNet();
- net->enterNetwork();
- ASSERT_OK(dr.start());
ASSERT_TRUE(net->hasReadyRequests());
- ASSERT_EQUALS(toString(DataReplicatorState::Steady), toString(dr.getState()));
- // Simulating an invalid remote oplog query response. This will invalidate the existing
- // sync source but that's fine because we're not testing oplog processing.
- scheduleNetworkResponse(BSON("ok" << 0));
+ auto noi = net->getNextReadyRequest();
+ scheduleNetworkResponse(noi, oplogFetcherResponse);
net->runReadyNetworkOperations();
- ASSERT_OK(dr.scheduleShutdown());
- net->exitNetwork(); // runs work item scheduled in 'scheduleShutdown()).
- dr.waitForShutdown();
- ASSERT_EQUALS(toString(DataReplicatorState::Uninitialized), toString(dr.getState()));
- }
-
- TEST_F(SteadyStateTest, RemoteOplogEmpty) {
- _testOplogStartMissing(
- fromjson("{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch: []}}"));
- }
-
- TEST_F(SteadyStateTest, RemoteOplogFirstOperationMissingTimestamp) {
- _testOplogStartMissing(
- fromjson("{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch: [{}]}}"));
- }
-
- TEST_F(SteadyStateTest, RemoteOplogFirstOperationTimestampDoesNotMatch) {
- _testOplogStartMissing(
- fromjson("{ok:1, cursor:{id:0, ns:'local.oplog.rs', "
- "firstBatch: [{ts:Timestamp(1,1)}]}}"));
+ ASSERT_EQUALS(MemberState(MemberState::RS_RECOVERING).toString(),
+ getRepl().getMemberState().toString());
}
-
- TEST_F(SteadyStateTest, ApplyOneOperation) {
- auto operationToApply = BSON("op" << "a" << "ts" << Timestamp(Seconds(123), 0));
- stdx::mutex mutex;
- unittest::Barrier barrier(2U);
- Timestamp lastTimestampApplied;
- BSONObj operationApplied;
- auto batchCompletedFn = [&] (const Timestamp& ts) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- lastTimestampApplied = ts;
- barrier.countDownAndWait();
- };
- DataReplicatorOptions opts;
- opts.applierFn = [&] (OperationContext* txn, const BSONObj& op) {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- operationApplied = op;
- barrier.countDownAndWait();
- return Status::OK();
- };
- createDataReplicator(opts, batchCompletedFn);
-
- auto& repl = getRepl();
- repl.setMyLastOptime(OpTime(operationToApply["ts"].timestamp(), 0));
- ASSERT_TRUE(repl.setFollowerMode(MemberState::RS_SECONDARY));
-
- auto net = getNet();
- net->enterNetwork();
-
- auto& dr = getDR();
- ASSERT_OK(dr.start());
-
- ASSERT_TRUE(net->hasReadyRequests());
- {
- auto networkRequest = net->getNextReadyRequest();
- auto commandResponse = BSON(
- "ok" << 1 <<
- "cursor" << BSON(
- "id" << 0LL <<
- "ns" << "local.oplog.rs" <<
- "firstBatch" << BSON_ARRAY(operationToApply)));
- scheduleNetworkResponse(networkRequest, commandResponse);
- }
- net->runReadyNetworkOperations();
-
- // Wait for applier function.
+};
+
+TEST_F(SteadyStateTest, StartWhenInSteadyState) {
+ DataReplicator& dr = getDR();
+ ASSERT_EQUALS(toString(DataReplicatorState::Uninitialized), toString(dr.getState()));
+ ASSERT_OK(dr.start());
+ ASSERT_EQUALS(toString(DataReplicatorState::Steady), toString(dr.getState()));
+ ASSERT_EQUALS(ErrorCodes::IllegalOperation, dr.start().code());
+}
+
+TEST_F(SteadyStateTest, ShutdownAfterStart) {
+ DataReplicator& dr = getDR();
+ ASSERT_EQUALS(toString(DataReplicatorState::Uninitialized), toString(dr.getState()));
+ auto net = getNet();
+ net->enterNetwork();
+ ASSERT_OK(dr.start());
+ ASSERT_TRUE(net->hasReadyRequests());
+ getExecutor().shutdown();
+ ASSERT_EQUALS(toString(DataReplicatorState::Steady), toString(dr.getState()));
+ ASSERT_EQUALS(ErrorCodes::IllegalOperation, dr.start().code());
+}
+
+TEST_F(SteadyStateTest, RequestShutdownAfterStart) {
+ DataReplicator& dr = getDR();
+ ASSERT_EQUALS(toString(DataReplicatorState::Uninitialized), toString(dr.getState()));
+ auto net = getNet();
+ net->enterNetwork();
+ ASSERT_OK(dr.start());
+ ASSERT_TRUE(net->hasReadyRequests());
+ ASSERT_EQUALS(toString(DataReplicatorState::Steady), toString(dr.getState()));
+ // Simulating an invalid remote oplog query response. This will invalidate the existing
+ // sync source but that's fine because we're not testing oplog processing.
+ scheduleNetworkResponse(BSON("ok" << 0));
+ net->runReadyNetworkOperations();
+ ASSERT_OK(dr.scheduleShutdown());
+ net->exitNetwork(); // runs work item scheduled in 'scheduleShutdown()).
+ dr.waitForShutdown();
+ ASSERT_EQUALS(toString(DataReplicatorState::Uninitialized), toString(dr.getState()));
+}
+
+TEST_F(SteadyStateTest, RemoteOplogEmpty) {
+ _testOplogStartMissing(fromjson("{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch: []}}"));
+}
+
+TEST_F(SteadyStateTest, RemoteOplogFirstOperationMissingTimestamp) {
+ _testOplogStartMissing(
+ fromjson("{ok:1, cursor:{id:0, ns:'local.oplog.rs', firstBatch: [{}]}}"));
+}
+
+TEST_F(SteadyStateTest, RemoteOplogFirstOperationTimestampDoesNotMatch) {
+ _testOplogStartMissing(fromjson(
+ "{ok:1, cursor:{id:0, ns:'local.oplog.rs', "
+ "firstBatch: [{ts:Timestamp(1,1)}]}}"));
+}
+
+TEST_F(SteadyStateTest, ApplyOneOperation) {
+ auto operationToApply = BSON("op"
+ << "a"
+ << "ts" << Timestamp(Seconds(123), 0));
+ stdx::mutex mutex;
+ unittest::Barrier barrier(2U);
+ Timestamp lastTimestampApplied;
+ BSONObj operationApplied;
+ auto batchCompletedFn = [&](const Timestamp& ts) {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ lastTimestampApplied = ts;
barrier.countDownAndWait();
- ASSERT_EQUALS(operationToApply["ts"].timestamp(), dr.getLastTimestampFetched());
- // Run scheduleWork() work item scheduled in DataReplicator::_onApplyBatchFinish().
- net->exitNetwork();
-
- // Wait for batch completion callback.
+ };
+ DataReplicatorOptions opts;
+ opts.applierFn = [&](OperationContext* txn, const BSONObj& op) {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ operationApplied = op;
barrier.countDownAndWait();
+ return Status::OK();
+ };
+ createDataReplicator(opts, batchCompletedFn);
+
+ auto& repl = getRepl();
+ repl.setMyLastOptime(OpTime(operationToApply["ts"].timestamp(), 0));
+ ASSERT_TRUE(repl.setFollowerMode(MemberState::RS_SECONDARY));
+
+ auto net = getNet();
+ net->enterNetwork();
+
+ auto& dr = getDR();
+ ASSERT_OK(dr.start());
+
+ ASSERT_TRUE(net->hasReadyRequests());
+ {
+ auto networkRequest = net->getNextReadyRequest();
+ auto commandResponse = BSON(
+ "ok" << 1 << "cursor" << BSON("id" << 0LL << "ns"
+ << "local.oplog.rs"
+ << "firstBatch" << BSON_ARRAY(operationToApply)));
+ scheduleNetworkResponse(networkRequest, commandResponse);
+ }
+ net->runReadyNetworkOperations();
+
+ // Wait for applier function.
+ barrier.countDownAndWait();
+ ASSERT_EQUALS(operationToApply["ts"].timestamp(), dr.getLastTimestampFetched());
+ // Run scheduleWork() work item scheduled in DataReplicator::_onApplyBatchFinish().
+ net->exitNetwork();
+
+ // Wait for batch completion callback.
+ barrier.countDownAndWait();
+
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ repl.getMemberState().toString());
+ {
+ stdx::lock_guard<stdx::mutex> lock(mutex);
+ ASSERT_EQUALS(operationToApply, operationApplied);
+ ASSERT_EQUALS(operationToApply["ts"].timestamp(), lastTimestampApplied);
+ }
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- repl.getMemberState().toString());
- {
- stdx::lock_guard<stdx::mutex> lock(mutex);
- ASSERT_EQUALS(operationToApply, operationApplied);
- ASSERT_EQUALS(operationToApply["ts"].timestamp(), lastTimestampApplied);
- }
-
- // Ensure that we send position information upstream after completing batch.
- net->enterNetwork();
- ASSERT_TRUE(net->hasReadyRequests());
- {
- auto networkRequest = net->getNextReadyRequest();
- auto commandRequest = networkRequest->getRequest();
- ASSERT_EQUALS("admin", commandRequest.dbname);
- const auto& cmdObj = commandRequest.cmdObj;
- ASSERT_EQUALS(std::string("replSetUpdatePosition"), cmdObj.firstElementFieldName());
- }
+ // Ensure that we send position information upstream after completing batch.
+ net->enterNetwork();
+ ASSERT_TRUE(net->hasReadyRequests());
+ {
+ auto networkRequest = net->getNextReadyRequest();
+ auto commandRequest = networkRequest->getRequest();
+ ASSERT_EQUALS("admin", commandRequest.dbname);
+ const auto& cmdObj = commandRequest.cmdObj;
+ ASSERT_EQUALS(std::string("replSetUpdatePosition"), cmdObj.firstElementFieldName());
}
+}
-} // namespace
+} // namespace
diff --git a/src/mongo/db/repl/database_cloner.cpp b/src/mongo/db/repl/database_cloner.cpp
index b96ab403169..b0c0bddcd01 100644
--- a/src/mongo/db/repl/database_cloner.cpp
+++ b/src/mongo/db/repl/database_cloner.cpp
@@ -47,294 +47,287 @@ namespace repl {
namespace {
- const char* kNameFieldName = "name";
- const char* kOptionsFieldName = "options";
-
- /**
- * Default listCollections predicate.
- */
- bool acceptAllPred(const BSONObj&) {
- return true;
- }
+const char* kNameFieldName = "name";
+const char* kOptionsFieldName = "options";
- /**
- * Creates a listCollections command obj with an optional filter.
- */
- BSONObj createListCollectionsCommandObject(const BSONObj& filter) {
- BSONObjBuilder output;
- output.append("listCollections", 1);
- if (!filter.isEmpty()) {
- output.append("filter", filter);
- }
- return output.obj();
- }
+/**
+ * Default listCollections predicate.
+ */
+bool acceptAllPred(const BSONObj&) {
+ return true;
+}
-} // namespace
-
- DatabaseCloner::DatabaseCloner(ReplicationExecutor* executor,
- const HostAndPort& source,
- const std::string& dbname,
- const BSONObj& listCollectionsFilter,
- const ListCollectionsPredicateFn& listCollectionsPred,
- CollectionCloner::StorageInterface* si,
- const CollectionCallbackFn& collWork,
- const CallbackFn& onCompletion)
- : _executor(executor),
- _source(source),
- _dbname(dbname),
- _listCollectionsFilter(listCollectionsFilter),
- _listCollectionsPredicate(listCollectionsPred ? listCollectionsPred : acceptAllPred),
- _storageInterface(si),
- _collectionWork(collWork),
- _onCompletion(onCompletion),
- _active(false),
- _listCollectionsFetcher(_executor,
- _source,
- _dbname,
- createListCollectionsCommandObject(_listCollectionsFilter),
- stdx::bind(&DatabaseCloner::_listCollectionsCallback,
- this,
- stdx::placeholders::_1,
- stdx::placeholders::_2,
- stdx::placeholders::_3)),
- _scheduleDbWorkFn([this](const ReplicationExecutor::CallbackFn& work) {
- return _executor->scheduleDBWork(work);
- }),
- _startCollectionCloner([](CollectionCloner& cloner) { return cloner.start(); }) {
-
- uassert(ErrorCodes::BadValue, "null replication executor", executor);
- uassert(ErrorCodes::BadValue, "empty database name", !dbname.empty());
- uassert(ErrorCodes::BadValue, "storage interface cannot be null", si);
- uassert(ErrorCodes::BadValue, "collection callback function cannot be null", collWork);
- uassert(ErrorCodes::BadValue, "callback function cannot be null", onCompletion);
+/**
+ * Creates a listCollections command obj with an optional filter.
+ */
+BSONObj createListCollectionsCommandObject(const BSONObj& filter) {
+ BSONObjBuilder output;
+ output.append("listCollections", 1);
+ if (!filter.isEmpty()) {
+ output.append("filter", filter);
}
-
- DatabaseCloner::~DatabaseCloner() {
- DESTRUCTOR_GUARD(
- cancel();
- wait();
- );
+ return output.obj();
+}
+
+} // namespace
+
+DatabaseCloner::DatabaseCloner(ReplicationExecutor* executor,
+ const HostAndPort& source,
+ const std::string& dbname,
+ const BSONObj& listCollectionsFilter,
+ const ListCollectionsPredicateFn& listCollectionsPred,
+ CollectionCloner::StorageInterface* si,
+ const CollectionCallbackFn& collWork,
+ const CallbackFn& onCompletion)
+ : _executor(executor),
+ _source(source),
+ _dbname(dbname),
+ _listCollectionsFilter(listCollectionsFilter),
+ _listCollectionsPredicate(listCollectionsPred ? listCollectionsPred : acceptAllPred),
+ _storageInterface(si),
+ _collectionWork(collWork),
+ _onCompletion(onCompletion),
+ _active(false),
+ _listCollectionsFetcher(_executor,
+ _source,
+ _dbname,
+ createListCollectionsCommandObject(_listCollectionsFilter),
+ stdx::bind(&DatabaseCloner::_listCollectionsCallback,
+ this,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2,
+ stdx::placeholders::_3)),
+ _scheduleDbWorkFn([this](const ReplicationExecutor::CallbackFn& work) {
+ return _executor->scheduleDBWork(work);
+ }),
+ _startCollectionCloner([](CollectionCloner& cloner) { return cloner.start(); }) {
+ uassert(ErrorCodes::BadValue, "null replication executor", executor);
+ uassert(ErrorCodes::BadValue, "empty database name", !dbname.empty());
+ uassert(ErrorCodes::BadValue, "storage interface cannot be null", si);
+ uassert(ErrorCodes::BadValue, "collection callback function cannot be null", collWork);
+ uassert(ErrorCodes::BadValue, "callback function cannot be null", onCompletion);
+}
+
+DatabaseCloner::~DatabaseCloner() {
+ DESTRUCTOR_GUARD(cancel(); wait(););
+}
+
+const std::vector<BSONObj>& DatabaseCloner::getCollectionInfos() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _collectionInfos;
+}
+
+std::string DatabaseCloner::getDiagnosticString() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ str::stream output;
+ output << "DatabaseCloner";
+ output << " executor: " << _executor->getDiagnosticString();
+ output << " source: " << _source.toString();
+ output << " database: " << _dbname;
+ output << " listCollections filter" << _listCollectionsFilter;
+ output << " active: " << _active;
+ output << " collection info objects (empty if listCollections is in progress): "
+ << _collectionInfos.size();
+ return output;
+}
+
+bool DatabaseCloner::isActive() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _active;
+}
+
+Status DatabaseCloner::start() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+
+ if (_active) {
+ return Status(ErrorCodes::IllegalOperation, "database cloner already started");
}
- const std::vector<BSONObj>& DatabaseCloner::getCollectionInfos() const {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- return _collectionInfos;
+ Status scheduleResult = _listCollectionsFetcher.schedule();
+ if (!scheduleResult.isOK()) {
+ return scheduleResult;
}
- std::string DatabaseCloner::getDiagnosticString() const {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- str::stream output;
- output << "DatabaseCloner";
- output << " executor: " << _executor->getDiagnosticString();
- output << " source: " << _source.toString();
- output << " database: " << _dbname;
- output << " listCollections filter" << _listCollectionsFilter;
- output << " active: " << _active;
- output << " collection info objects (empty if listCollections is in progress): "
- << _collectionInfos.size();
- return output;
- }
+ _active = true;
- bool DatabaseCloner::isActive() const {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- return _active;
- }
+ return Status::OK();
+}
- Status DatabaseCloner::start() {
+void DatabaseCloner::cancel() {
+ {
stdx::lock_guard<stdx::mutex> lk(_mutex);
- if (_active) {
- return Status(ErrorCodes::IllegalOperation, "database cloner already started");
- }
-
- Status scheduleResult = _listCollectionsFetcher.schedule();
- if (!scheduleResult.isOK()) {
- return scheduleResult;
+ if (!_active) {
+ return;
}
-
- _active = true;
-
- return Status::OK();
}
- void DatabaseCloner::cancel() {
- {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _listCollectionsFetcher.cancel();
+}
- if (!_active) {
- return;
- }
- }
+void DatabaseCloner::wait() {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ _condition.wait(lk, [this]() { return !_active; });
+}
- _listCollectionsFetcher.cancel();
- }
+void DatabaseCloner::setScheduleDbWorkFn(const CollectionCloner::ScheduleDbWorkFn& work) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
- void DatabaseCloner::wait() {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- _condition.wait(lk, [this]() { return !_active; });
- }
+ _scheduleDbWorkFn = work;
+}
- void DatabaseCloner::setScheduleDbWorkFn(const CollectionCloner::ScheduleDbWorkFn& work) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
+void DatabaseCloner::setStartCollectionClonerFn(
+ const StartCollectionClonerFn& startCollectionCloner) {
+ _startCollectionCloner = startCollectionCloner;
+}
- _scheduleDbWorkFn = work;
+void DatabaseCloner::_listCollectionsCallback(const StatusWith<Fetcher::QueryResponse>& result,
+ Fetcher::NextAction* nextAction,
+ BSONObjBuilder* getMoreBob) {
+ if (!result.isOK()) {
+ _finishCallback(result.getStatus());
+ return;
}
- void DatabaseCloner::setStartCollectionClonerFn(
- const StartCollectionClonerFn& startCollectionCloner) {
-
- _startCollectionCloner = startCollectionCloner;
+ auto batchData(result.getValue());
+ auto&& documents = batchData.documents;
+
+ // We may be called with multiple batches leading to a need to grow _collectionInfos.
+ _collectionInfos.reserve(_collectionInfos.size() + documents.size());
+ std::copy_if(documents.begin(),
+ documents.end(),
+ std::back_inserter(_collectionInfos),
+ _listCollectionsPredicate);
+
+ // The fetcher will continue to call with kGetMore until an error or the last batch.
+ if (*nextAction == Fetcher::NextAction::kGetMore) {
+ invariant(getMoreBob);
+ getMoreBob->append("getMore", batchData.cursorId);
+ getMoreBob->append("collection", batchData.nss.coll());
+ return;
}
- void DatabaseCloner::_listCollectionsCallback(const StatusWith<Fetcher::QueryResponse>& result,
- Fetcher::NextAction* nextAction,
- BSONObjBuilder* getMoreBob) {
+ // Nothing to do for an empty database.
+ if (_collectionInfos.empty()) {
+ _finishCallback(Status::OK());
+ return;
+ }
- if (!result.isOK()) {
- _finishCallback(result.getStatus());
+ _collectionNamespaces.reserve(_collectionInfos.size());
+ std::set<std::string> seen;
+ for (auto&& info : _collectionInfos) {
+ BSONElement nameElement = info.getField(kNameFieldName);
+ if (nameElement.eoo()) {
+ _finishCallback(Status(ErrorCodes::FailedToParse,
+ str::stream() << "collection info must contain '"
+ << kNameFieldName << "' "
+ << "field : " << info));
return;
}
-
- auto batchData(result.getValue());
- auto&& documents = batchData.documents;
-
- // We may be called with multiple batches leading to a need to grow _collectionInfos.
- _collectionInfos.reserve(_collectionInfos.size() + documents.size());
- std::copy_if(documents.begin(), documents.end(),
- std::back_inserter(_collectionInfos),
- _listCollectionsPredicate);
-
- // The fetcher will continue to call with kGetMore until an error or the last batch.
- if (*nextAction == Fetcher::NextAction::kGetMore) {
- invariant(getMoreBob);
- getMoreBob->append("getMore", batchData.cursorId);
- getMoreBob->append("collection", batchData.nss.coll());
+ if (nameElement.type() != mongo::String) {
+ _finishCallback(Status(ErrorCodes::TypeMismatch,
+ str::stream() << "'" << kNameFieldName
+ << "' field must be a string: " << info));
return;
}
-
- // Nothing to do for an empty database.
- if (_collectionInfos.empty()) {
- _finishCallback(Status::OK());
+ const std::string collectionName = nameElement.String();
+ if (seen.find(collectionName) != seen.end()) {
+ _finishCallback(Status(ErrorCodes::DuplicateKey,
+ str::stream()
+ << "collection info contains duplicate collection name "
+ << "'" << collectionName << "': " << info));
return;
}
- _collectionNamespaces.reserve(_collectionInfos.size());
- std::set<std::string> seen;
- for (auto&& info : _collectionInfos) {
- BSONElement nameElement = info.getField(kNameFieldName);
- if (nameElement.eoo()) {
- _finishCallback(Status(ErrorCodes::FailedToParse, str::stream() <<
- "collection info must contain '" << kNameFieldName << "' " <<
- "field : " << info));
- return;
- }
- if (nameElement.type() != mongo::String) {
- _finishCallback(Status(ErrorCodes::TypeMismatch, str::stream() <<
- "'" << kNameFieldName << "' field must be a string: " << info));
- return;
- }
- const std::string collectionName = nameElement.String();
- if (seen.find(collectionName) != seen.end()) {
- _finishCallback(Status(ErrorCodes::DuplicateKey, str::stream() <<
- "collection info contains duplicate collection name " <<
- "'" << collectionName << "': " << info));
- return;
- }
-
- BSONElement optionsElement = info.getField(kOptionsFieldName);
- if (optionsElement.eoo()) {
- _finishCallback(Status(ErrorCodes::FailedToParse, str::stream() <<
- "collection info must contain '" << kOptionsFieldName << "' " <<
- "field : " << info));
- return;
- }
- if (!optionsElement.isABSONObj()) {
- _finishCallback(Status(ErrorCodes::TypeMismatch, str::stream() <<
- "'" << kOptionsFieldName << "' field must be an object: " <<
- info));
- return;
- }
- const BSONObj optionsObj = optionsElement.Obj();
- CollectionOptions options;
- Status parseStatus = options.parse(optionsObj);
- if (!parseStatus.isOK()) {
- _finishCallback(parseStatus);
- return;
- }
- seen.insert(collectionName);
-
- _collectionNamespaces.emplace_back(_dbname, collectionName);
- auto&& nss = *_collectionNamespaces.crbegin();
-
- try {
- _collectionCloners.emplace_back(
- _executor,
- _source,
- nss,
- options,
- stdx::bind(&DatabaseCloner::_collectionClonerCallback,
- this,
- stdx::placeholders::_1,
- nss),
- _storageInterface);
- }
- catch (const UserException& ex) {
- _finishCallback(ex.toStatus());
- return;
- }
+ BSONElement optionsElement = info.getField(kOptionsFieldName);
+ if (optionsElement.eoo()) {
+ _finishCallback(Status(ErrorCodes::FailedToParse,
+ str::stream() << "collection info must contain '"
+ << kOptionsFieldName << "' "
+ << "field : " << info));
+ return;
}
-
- for (auto&& collectionCloner : _collectionCloners) {
- collectionCloner.setScheduleDbWorkFn(_scheduleDbWorkFn);
+ if (!optionsElement.isABSONObj()) {
+ _finishCallback(Status(ErrorCodes::TypeMismatch,
+ str::stream() << "'" << kOptionsFieldName
+ << "' field must be an object: " << info));
+ return;
}
+ const BSONObj optionsObj = optionsElement.Obj();
+ CollectionOptions options;
+ Status parseStatus = options.parse(optionsObj);
+ if (!parseStatus.isOK()) {
+ _finishCallback(parseStatus);
+ return;
+ }
+ seen.insert(collectionName);
+
+ _collectionNamespaces.emplace_back(_dbname, collectionName);
+ auto&& nss = *_collectionNamespaces.crbegin();
+
+ try {
+ _collectionCloners.emplace_back(
+ _executor,
+ _source,
+ nss,
+ options,
+ stdx::bind(
+ &DatabaseCloner::_collectionClonerCallback, this, stdx::placeholders::_1, nss),
+ _storageInterface);
+ } catch (const UserException& ex) {
+ _finishCallback(ex.toStatus());
+ return;
+ }
+ }
+
+ for (auto&& collectionCloner : _collectionCloners) {
+ collectionCloner.setScheduleDbWorkFn(_scheduleDbWorkFn);
+ }
- // Start first collection cloner.
- _currentCollectionClonerIter = _collectionCloners.begin();
+ // Start first collection cloner.
+ _currentCollectionClonerIter = _collectionCloners.begin();
- LOG(1) << " cloning collection " << _currentCollectionClonerIter->getSourceNamespace();
+ LOG(1) << " cloning collection " << _currentCollectionClonerIter->getSourceNamespace();
+ Status startStatus = _startCollectionCloner(*_currentCollectionClonerIter);
+ if (!startStatus.isOK()) {
+ LOG(1) << " failed to start collection cloning on "
+ << _currentCollectionClonerIter->getSourceNamespace() << ": " << startStatus;
+ _finishCallback(startStatus);
+ return;
+ }
+}
+
+void DatabaseCloner::_collectionClonerCallback(const Status& status, const NamespaceString& nss) {
+ // Forward collection cloner result to caller.
+ // Failure to clone a collection does not stop the database cloner
+ // from cloning the rest of the collections in the listCollections result.
+ _collectionWork(status, nss);
+
+ _currentCollectionClonerIter++;
+
+ LOG(1) << " cloning collection " << _currentCollectionClonerIter->getSourceNamespace();
+
+ if (_currentCollectionClonerIter != _collectionCloners.end()) {
Status startStatus = _startCollectionCloner(*_currentCollectionClonerIter);
if (!startStatus.isOK()) {
LOG(1) << " failed to start collection cloning on "
- << _currentCollectionClonerIter->getSourceNamespace()
- << ": " << startStatus;
+ << _currentCollectionClonerIter->getSourceNamespace() << ": " << startStatus;
_finishCallback(startStatus);
return;
}
+ return;
}
- void DatabaseCloner::_collectionClonerCallback(const Status& status,
- const NamespaceString& nss) {
- // Forward collection cloner result to caller.
- // Failure to clone a collection does not stop the database cloner
- // from cloning the rest of the collections in the listCollections result.
- _collectionWork(status, nss);
-
- _currentCollectionClonerIter++;
-
- LOG(1) << " cloning collection " << _currentCollectionClonerIter->getSourceNamespace();
-
- if (_currentCollectionClonerIter != _collectionCloners.end()) {
- Status startStatus = _startCollectionCloner(*_currentCollectionClonerIter);
- if (!startStatus.isOK()) {
- LOG(1) << " failed to start collection cloning on "
- << _currentCollectionClonerIter->getSourceNamespace()
- << ": " << startStatus;
- _finishCallback(startStatus);
- return;
- }
- return;
- }
-
- _finishCallback(Status::OK());
- }
+ _finishCallback(Status::OK());
+}
- void DatabaseCloner::_finishCallback(const Status& status) {
- _onCompletion(status);
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- _active = false;
- _condition.notify_all();
- }
+void DatabaseCloner::_finishCallback(const Status& status) {
+ _onCompletion(status);
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _active = false;
+ _condition.notify_all();
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/database_cloner.h b/src/mongo/db/repl/database_cloner.h
index f1171bf5e4b..954f816cdaa 100644
--- a/src/mongo/db/repl/database_cloner.h
+++ b/src/mongo/db/repl/database_cloner.h
@@ -47,158 +47,157 @@
namespace mongo {
namespace repl {
- class DatabaseCloner : public BaseCloner {
- MONGO_DISALLOW_COPYING(DatabaseCloner);
- public:
-
- /**
- * Predicate used on the collection info objects returned by listCollections.
- * Each collection info is represented by a document in the following format:
- * {
- * name: <collection name>,
- * options: <collection options>
- * }
- *
- * Returns true if the collection described by the info object should be cloned.
- * Returns false if the collection should be ignored.
- */
- using ListCollectionsPredicateFn = stdx::function<bool (const BSONObj&)>;
-
- /**
- * Callback function to report progress of collection cloning. Arguments are:
- * - status from the collection cloner's 'onCompletion' callback.
- * - source namespace of the collection cloner that completed (or failed).
- *
- * Called exactly once for every collection cloner started by the the database cloner.
- */
- using CollectionCallbackFn = stdx::function<void (const Status&, const NamespaceString&)>;
-
- /**
- * Type of function to start a collection cloner.
- */
- using StartCollectionClonerFn = stdx::function<Status (CollectionCloner&)>;
-
- /**
- * Creates DatabaseCloner task in inactive state. Use start() to activate cloner.
- *
- * The cloner calls 'onCompletion' when the database cloning has completed or failed.
- *
- * 'onCompletion' will be called exactly once.
- *
- * Takes ownership of the passed StorageInterface object.
- */
- DatabaseCloner(ReplicationExecutor* executor,
- const HostAndPort& source,
- const std::string& dbname,
- const BSONObj& listCollectionsFilter,
- const ListCollectionsPredicateFn& listCollectionsPredicate,
- CollectionCloner::StorageInterface* storageInterface,
- const CollectionCallbackFn& collectionWork,
- const CallbackFn& onCompletion);
-
- virtual ~DatabaseCloner();
-
- /**
- * Returns collection info objects read from listCollections result.
- * This will return an empty vector until we have processed the last
- * batch of results from listCollections.
- */
- const std::vector<BSONObj>& getCollectionInfos() const;
-
- std::string getDiagnosticString() const override;
-
- bool isActive() const override;
-
- Status start() override;
-
- void cancel() override;
-
- void wait() override;
-
- //
- // Testing only functions below.
- //
-
- /**
- * Overrides how executor schedules database work.
- *
- * For testing only.
- */
- void setScheduleDbWorkFn(const CollectionCloner::ScheduleDbWorkFn& scheduleDbWorkFn);
-
- /**
- * Overrides how executor starts a collection cloner.
- *
- * For testing only
- */
- void setStartCollectionClonerFn(const StartCollectionClonerFn& startCollectionCloner);
-
- private:
-
- /**
- * Read collection names and options from listCollections result.
- */
- void _listCollectionsCallback(const StatusWith<Fetcher::QueryResponse>& fetchResult,
- Fetcher::NextAction* nextAction,
- BSONObjBuilder* getMoreBob);
-
- /**
- * Forwards collection cloner result to client.
- * Starts a new cloner on a different collection.
- */
- void _collectionClonerCallback(const Status& status, const NamespaceString& nss);
-
- /**
- * Reports completion status.
- * Sets cloner to inactive.
- */
- void _finishCallback(const Status& status);
-
- // Not owned by us.
- ReplicationExecutor* _executor;
-
- HostAndPort _source;
- std::string _dbname;
- BSONObj _listCollectionsFilter;
- ListCollectionsPredicateFn _listCollectionsPredicate;
- CollectionCloner::StorageInterface* _storageInterface;
-
- // Invoked once for every successfully started collection cloner.
- CollectionCallbackFn _collectionWork;
-
- // Invoked once when cloning completes or fails.
- CallbackFn _onCompletion;
-
- // Protects member data of this database cloner.
- mutable stdx::mutex _mutex;
-
- mutable stdx::condition_variable _condition;
-
- // _active is true when database cloner is started.
- bool _active;
-
- // Fetcher instance for running listCollections command.
- Fetcher _listCollectionsFetcher;
-
- // Collection info objects returned from listCollections.
- // Format of each document:
- // {
- // name: <collection name>,
- // options: <collection options>
- // }
- // Holds all collection infos from listCollections.
- std::vector<BSONObj> _collectionInfos;
+class DatabaseCloner : public BaseCloner {
+ MONGO_DISALLOW_COPYING(DatabaseCloner);
+
+public:
+ /**
+ * Predicate used on the collection info objects returned by listCollections.
+ * Each collection info is represented by a document in the following format:
+ * {
+ * name: <collection name>,
+ * options: <collection options>
+ * }
+ *
+ * Returns true if the collection described by the info object should be cloned.
+ * Returns false if the collection should be ignored.
+ */
+ using ListCollectionsPredicateFn = stdx::function<bool(const BSONObj&)>;
+
+ /**
+ * Callback function to report progress of collection cloning. Arguments are:
+ * - status from the collection cloner's 'onCompletion' callback.
+ * - source namespace of the collection cloner that completed (or failed).
+ *
+ * Called exactly once for every collection cloner started by the the database cloner.
+ */
+ using CollectionCallbackFn = stdx::function<void(const Status&, const NamespaceString&)>;
+
+ /**
+ * Type of function to start a collection cloner.
+ */
+ using StartCollectionClonerFn = stdx::function<Status(CollectionCloner&)>;
+
+ /**
+ * Creates DatabaseCloner task in inactive state. Use start() to activate cloner.
+ *
+ * The cloner calls 'onCompletion' when the database cloning has completed or failed.
+ *
+ * 'onCompletion' will be called exactly once.
+ *
+ * Takes ownership of the passed StorageInterface object.
+ */
+ DatabaseCloner(ReplicationExecutor* executor,
+ const HostAndPort& source,
+ const std::string& dbname,
+ const BSONObj& listCollectionsFilter,
+ const ListCollectionsPredicateFn& listCollectionsPredicate,
+ CollectionCloner::StorageInterface* storageInterface,
+ const CollectionCallbackFn& collectionWork,
+ const CallbackFn& onCompletion);
+
+ virtual ~DatabaseCloner();
+
+ /**
+ * Returns collection info objects read from listCollections result.
+ * This will return an empty vector until we have processed the last
+ * batch of results from listCollections.
+ */
+ const std::vector<BSONObj>& getCollectionInfos() const;
+
+ std::string getDiagnosticString() const override;
+
+ bool isActive() const override;
+
+ Status start() override;
+
+ void cancel() override;
+
+ void wait() override;
+
+ //
+ // Testing only functions below.
+ //
+
+ /**
+ * Overrides how executor schedules database work.
+ *
+ * For testing only.
+ */
+ void setScheduleDbWorkFn(const CollectionCloner::ScheduleDbWorkFn& scheduleDbWorkFn);
+
+ /**
+ * Overrides how executor starts a collection cloner.
+ *
+ * For testing only
+ */
+ void setStartCollectionClonerFn(const StartCollectionClonerFn& startCollectionCloner);
+
+private:
+ /**
+ * Read collection names and options from listCollections result.
+ */
+ void _listCollectionsCallback(const StatusWith<Fetcher::QueryResponse>& fetchResult,
+ Fetcher::NextAction* nextAction,
+ BSONObjBuilder* getMoreBob);
+
+ /**
+ * Forwards collection cloner result to client.
+ * Starts a new cloner on a different collection.
+ */
+ void _collectionClonerCallback(const Status& status, const NamespaceString& nss);
+
+ /**
+ * Reports completion status.
+ * Sets cloner to inactive.
+ */
+ void _finishCallback(const Status& status);
+
+ // Not owned by us.
+ ReplicationExecutor* _executor;
+
+ HostAndPort _source;
+ std::string _dbname;
+ BSONObj _listCollectionsFilter;
+ ListCollectionsPredicateFn _listCollectionsPredicate;
+ CollectionCloner::StorageInterface* _storageInterface;
+
+ // Invoked once for every successfully started collection cloner.
+ CollectionCallbackFn _collectionWork;
+
+ // Invoked once when cloning completes or fails.
+ CallbackFn _onCompletion;
+
+ // Protects member data of this database cloner.
+ mutable stdx::mutex _mutex;
+
+ mutable stdx::condition_variable _condition;
+
+ // _active is true when database cloner is started.
+ bool _active;
+
+ // Fetcher instance for running listCollections command.
+ Fetcher _listCollectionsFetcher;
+
+ // Collection info objects returned from listCollections.
+ // Format of each document:
+ // {
+ // name: <collection name>,
+ // options: <collection options>
+ // }
+ // Holds all collection infos from listCollections.
+ std::vector<BSONObj> _collectionInfos;
+
+ std::vector<NamespaceString> _collectionNamespaces;
+
+ std::list<CollectionCloner> _collectionCloners;
+ std::list<CollectionCloner>::iterator _currentCollectionClonerIter;
- std::vector<NamespaceString> _collectionNamespaces;
+ // Function for scheduling database work using the executor.
+ CollectionCloner::ScheduleDbWorkFn _scheduleDbWorkFn;
- std::list<CollectionCloner> _collectionCloners;
- std::list<CollectionCloner>::iterator _currentCollectionClonerIter;
+ StartCollectionClonerFn _startCollectionCloner;
+};
- // Function for scheduling database work using the executor.
- CollectionCloner::ScheduleDbWorkFn _scheduleDbWorkFn;
-
- StartCollectionClonerFn _startCollectionCloner;
- };
-
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/database_cloner_test.cpp b/src/mongo/db/repl/database_cloner_test.cpp
index 1cb772bc898..3683e24eae3 100644
--- a/src/mongo/db/repl/database_cloner_test.cpp
+++ b/src/mongo/db/repl/database_cloner_test.cpp
@@ -39,463 +39,494 @@
namespace {
- using namespace mongo;
- using namespace mongo::repl;
-
- const std::string dbname("db");
-
- class DatabaseClonerTest : public BaseClonerTest {
- public:
-
- DatabaseClonerTest();
- void collectionWork(const Status& status, const NamespaceString& sourceNss);
- void clear() override;
- BaseCloner* getCloner() const override;
-
- protected:
-
- void setUp() override;
- void tearDown() override;
-
- std::list<std::pair<Status, NamespaceString> > collectionWorkResults;
- std::unique_ptr<DatabaseCloner> databaseCloner;
- };
-
- DatabaseClonerTest::DatabaseClonerTest()
- : collectionWorkResults(),
- databaseCloner() { }
-
- void DatabaseClonerTest::collectionWork(const Status& status, const NamespaceString& srcNss) {
- collectionWorkResults.emplace_back(status, srcNss);
- }
-
- void DatabaseClonerTest::setUp() {
- BaseClonerTest::setUp();
- collectionWorkResults.clear();
- databaseCloner.reset(new DatabaseCloner(&getExecutor(),
- target,
- dbname,
- BSONObj(),
- DatabaseCloner::ListCollectionsPredicateFn(),
- storageInterface.get(),
- stdx::bind(&DatabaseClonerTest::collectionWork,
- this,
- stdx::placeholders::_1,
- stdx::placeholders::_2),
- stdx::bind(&DatabaseClonerTest::setStatus,
- this,
- stdx::placeholders::_1)));
- }
-
- void DatabaseClonerTest::tearDown() {
- BaseClonerTest::tearDown();
- databaseCloner.reset();
- collectionWorkResults.clear();
- }
-
- void DatabaseClonerTest::clear() {
- }
-
- BaseCloner* DatabaseClonerTest::getCloner() const {
- return databaseCloner.get();
- }
-
- TEST_F(DatabaseClonerTest, InvalidConstruction) {
- ReplicationExecutor& executor = getExecutor();
-
- const BSONObj filter;
- DatabaseCloner::ListCollectionsPredicateFn pred;
- CollectionCloner::StorageInterface* si = storageInterface.get();
- namespace stdxph = stdx::placeholders;
- const DatabaseCloner::CollectionCallbackFn ccb =
- stdx::bind(&DatabaseClonerTest::collectionWork, this, stdxph::_1, stdxph::_2);
-
- const auto& cb = [](const Status&) { FAIL("should not reach here"); };
-
- // Null executor.
- ASSERT_THROWS(DatabaseCloner(nullptr, target, dbname, filter, pred, si, ccb, cb),
+using namespace mongo;
+using namespace mongo::repl;
+
+const std::string dbname("db");
+
+class DatabaseClonerTest : public BaseClonerTest {
+public:
+ DatabaseClonerTest();
+ void collectionWork(const Status& status, const NamespaceString& sourceNss);
+ void clear() override;
+ BaseCloner* getCloner() const override;
+
+protected:
+ void setUp() override;
+ void tearDown() override;
+
+ std::list<std::pair<Status, NamespaceString>> collectionWorkResults;
+ std::unique_ptr<DatabaseCloner> databaseCloner;
+};
+
+DatabaseClonerTest::DatabaseClonerTest() : collectionWorkResults(), databaseCloner() {}
+
+void DatabaseClonerTest::collectionWork(const Status& status, const NamespaceString& srcNss) {
+ collectionWorkResults.emplace_back(status, srcNss);
+}
+
+void DatabaseClonerTest::setUp() {
+ BaseClonerTest::setUp();
+ collectionWorkResults.clear();
+ databaseCloner.reset(new DatabaseCloner(
+ &getExecutor(),
+ target,
+ dbname,
+ BSONObj(),
+ DatabaseCloner::ListCollectionsPredicateFn(),
+ storageInterface.get(),
+ stdx::bind(&DatabaseClonerTest::collectionWork,
+ this,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2),
+ stdx::bind(&DatabaseClonerTest::setStatus, this, stdx::placeholders::_1)));
+}
+
+void DatabaseClonerTest::tearDown() {
+ BaseClonerTest::tearDown();
+ databaseCloner.reset();
+ collectionWorkResults.clear();
+}
+
+void DatabaseClonerTest::clear() {}
+
+BaseCloner* DatabaseClonerTest::getCloner() const {
+ return databaseCloner.get();
+}
+
+TEST_F(DatabaseClonerTest, InvalidConstruction) {
+ ReplicationExecutor& executor = getExecutor();
+
+ const BSONObj filter;
+ DatabaseCloner::ListCollectionsPredicateFn pred;
+ CollectionCloner::StorageInterface* si = storageInterface.get();
+ namespace stdxph = stdx::placeholders;
+ const DatabaseCloner::CollectionCallbackFn ccb =
+ stdx::bind(&DatabaseClonerTest::collectionWork, this, stdxph::_1, stdxph::_2);
+
+ const auto& cb = [](const Status&) { FAIL("should not reach here"); };
+
+ // Null executor.
+ ASSERT_THROWS(DatabaseCloner(nullptr, target, dbname, filter, pred, si, ccb, cb),
+ UserException);
+
+ // Empty database name
+ ASSERT_THROWS(DatabaseCloner(&executor, target, "", filter, pred, si, ccb, cb), UserException);
+
+ // Callback function cannot be null.
+ {
+ DatabaseCloner::CallbackFn ncb;
+ ASSERT_THROWS(DatabaseCloner(&executor, target, dbname, filter, pred, si, ccb, ncb),
UserException);
-
- // Empty database name
- ASSERT_THROWS(DatabaseCloner(&executor, target, "", filter, pred, si, ccb, cb),
- UserException);
-
- // Callback function cannot be null.
- {
- DatabaseCloner::CallbackFn ncb;
- ASSERT_THROWS(DatabaseCloner(&executor, target, dbname, filter, pred, si, ccb, ncb),
- UserException);
- }
-
- // Storage interface cannot be null.
- {
- CollectionCloner::StorageInterface* nsi = nullptr;
- ASSERT_THROWS(DatabaseCloner(&executor, target, dbname, filter, pred, nsi, ccb, cb),
- UserException);
- }
-
- // CollectionCallbackFn function cannot be null.
- {
- DatabaseCloner::CollectionCallbackFn nccb;
- ASSERT_THROWS(DatabaseCloner(&executor, target, dbname, filter, pred, si, nccb, cb),
- UserException);
- }
- }
-
- TEST_F(DatabaseClonerTest, ClonerLifeCycle) {
- testLifeCycle();
}
- TEST_F(DatabaseClonerTest, FirstRemoteCommandWithoutFilter) {
- ASSERT_OK(databaseCloner->start());
-
- auto net = getNet();
- ASSERT_TRUE(net->hasReadyRequests());
- NetworkOperationIterator noi = net->getNextReadyRequest();
- auto&& noiRequest = noi->getRequest();
- ASSERT_EQUALS(nss.db().toString(), noiRequest.dbname);
- ASSERT_EQUALS("listCollections", std::string(noiRequest.cmdObj.firstElementFieldName()));
- ASSERT_EQUALS(1, noiRequest.cmdObj.firstElement().numberInt());
- ASSERT_FALSE(noiRequest.cmdObj.hasField("filter"));
- ASSERT_FALSE(net->hasReadyRequests());
- ASSERT_TRUE(databaseCloner->isActive());
+ // Storage interface cannot be null.
+ {
+ CollectionCloner::StorageInterface* nsi = nullptr;
+ ASSERT_THROWS(DatabaseCloner(&executor, target, dbname, filter, pred, nsi, ccb, cb),
+ UserException);
}
- TEST_F(DatabaseClonerTest, FirstRemoteCommandWithFilter) {
- const BSONObj listCollectionsFilter = BSON("name" << "coll");
- databaseCloner.reset(new DatabaseCloner(&getExecutor(),
- target,
- dbname,
- listCollectionsFilter,
- DatabaseCloner::ListCollectionsPredicateFn(),
- storageInterface.get(),
- stdx::bind(&DatabaseClonerTest::collectionWork,
- this,
- stdx::placeholders::_1,
- stdx::placeholders::_2),
- stdx::bind(&DatabaseClonerTest::setStatus,
- this,
- stdx::placeholders::_1)));
- ASSERT_OK(databaseCloner->start());
-
- auto net = getNet();
- ASSERT_TRUE(net->hasReadyRequests());
- NetworkOperationIterator noi = net->getNextReadyRequest();
- auto&& noiRequest = noi->getRequest();
- ASSERT_EQUALS(nss.db().toString(), noiRequest.dbname);
- ASSERT_EQUALS("listCollections", std::string(noiRequest.cmdObj.firstElementFieldName()));
- ASSERT_EQUALS(1, noiRequest.cmdObj.firstElement().numberInt());
- BSONElement filterElement = noiRequest.cmdObj.getField("filter");
- ASSERT_TRUE(filterElement.isABSONObj());
- ASSERT_EQUALS(listCollectionsFilter, filterElement.Obj());
- ASSERT_FALSE(net->hasReadyRequests());
- ASSERT_TRUE(databaseCloner->isActive());
+ // CollectionCallbackFn function cannot be null.
+ {
+ DatabaseCloner::CollectionCallbackFn nccb;
+ ASSERT_THROWS(DatabaseCloner(&executor, target, dbname, filter, pred, si, nccb, cb),
+ UserException);
}
-
- TEST_F(DatabaseClonerTest, InvalidListCollectionsFilter) {
- ASSERT_OK(databaseCloner->start());
-
- processNetworkResponse(
- BSON("ok" << 0 << "errmsg" << "unknown operator" << "code" << ErrorCodes::BadValue));
-
- ASSERT_EQUALS(ErrorCodes::BadValue, getStatus().code());
- ASSERT_FALSE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, ClonerLifeCycle) {
+ testLifeCycle();
+}
+
+TEST_F(DatabaseClonerTest, FirstRemoteCommandWithoutFilter) {
+ ASSERT_OK(databaseCloner->start());
+
+ auto net = getNet();
+ ASSERT_TRUE(net->hasReadyRequests());
+ NetworkOperationIterator noi = net->getNextReadyRequest();
+ auto&& noiRequest = noi->getRequest();
+ ASSERT_EQUALS(nss.db().toString(), noiRequest.dbname);
+ ASSERT_EQUALS("listCollections", std::string(noiRequest.cmdObj.firstElementFieldName()));
+ ASSERT_EQUALS(1, noiRequest.cmdObj.firstElement().numberInt());
+ ASSERT_FALSE(noiRequest.cmdObj.hasField("filter"));
+ ASSERT_FALSE(net->hasReadyRequests());
+ ASSERT_TRUE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, FirstRemoteCommandWithFilter) {
+ const BSONObj listCollectionsFilter = BSON("name"
+ << "coll");
+ databaseCloner.reset(new DatabaseCloner(
+ &getExecutor(),
+ target,
+ dbname,
+ listCollectionsFilter,
+ DatabaseCloner::ListCollectionsPredicateFn(),
+ storageInterface.get(),
+ stdx::bind(&DatabaseClonerTest::collectionWork,
+ this,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2),
+ stdx::bind(&DatabaseClonerTest::setStatus, this, stdx::placeholders::_1)));
+ ASSERT_OK(databaseCloner->start());
+
+ auto net = getNet();
+ ASSERT_TRUE(net->hasReadyRequests());
+ NetworkOperationIterator noi = net->getNextReadyRequest();
+ auto&& noiRequest = noi->getRequest();
+ ASSERT_EQUALS(nss.db().toString(), noiRequest.dbname);
+ ASSERT_EQUALS("listCollections", std::string(noiRequest.cmdObj.firstElementFieldName()));
+ ASSERT_EQUALS(1, noiRequest.cmdObj.firstElement().numberInt());
+ BSONElement filterElement = noiRequest.cmdObj.getField("filter");
+ ASSERT_TRUE(filterElement.isABSONObj());
+ ASSERT_EQUALS(listCollectionsFilter, filterElement.Obj());
+ ASSERT_FALSE(net->hasReadyRequests());
+ ASSERT_TRUE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, InvalidListCollectionsFilter) {
+ ASSERT_OK(databaseCloner->start());
+
+ processNetworkResponse(BSON("ok" << 0 << "errmsg"
+ << "unknown operator"
+ << "code" << ErrorCodes::BadValue));
+
+ ASSERT_EQUALS(ErrorCodes::BadValue, getStatus().code());
+ ASSERT_FALSE(databaseCloner->isActive());
+}
+
+// A database may have no collections. Nothing to do for the database cloner.
+TEST_F(DatabaseClonerTest, ListCollectionsReturnedNoCollections) {
+ ASSERT_OK(databaseCloner->start());
+
+ // Keep going even if initial batch is empty.
+ processNetworkResponse(createListCollectionsResponse(1, BSONArray()));
+
+ ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
+ ASSERT_TRUE(databaseCloner->isActive());
+
+ // Final batch is also empty. Database cloner should stop and return a successful status.
+ processNetworkResponse(createListCollectionsResponse(0, BSONArray(), "nextBatch"));
+
+ ASSERT_OK(getStatus());
+ ASSERT_FALSE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, ListCollectionsPredicate) {
+ DatabaseCloner::ListCollectionsPredicateFn pred =
+ [](const BSONObj& info) { return info["name"].String() != "b"; };
+ databaseCloner.reset(new DatabaseCloner(
+ &getExecutor(),
+ target,
+ dbname,
+ BSONObj(),
+ pred,
+ storageInterface.get(),
+ stdx::bind(&DatabaseClonerTest::collectionWork,
+ this,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2),
+ stdx::bind(&DatabaseClonerTest::setStatus, this, stdx::placeholders::_1)));
+ ASSERT_OK(databaseCloner->start());
+
+ const std::vector<BSONObj> sourceInfos = {BSON("name"
+ << "a"
+ << "options" << BSONObj()),
+ BSON("name"
+ << "b"
+ << "options" << BSONObj()),
+ BSON("name"
+ << "c"
+ << "options" << BSONObj())};
+ processNetworkResponse(createListCollectionsResponse(
+ 0, BSON_ARRAY(sourceInfos[0] << sourceInfos[1] << sourceInfos[2])));
+
+ ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
+ ASSERT_TRUE(databaseCloner->isActive());
+
+ const std::vector<BSONObj>& collectionInfos = databaseCloner->getCollectionInfos();
+ ASSERT_EQUALS(2U, collectionInfos.size());
+ ASSERT_EQUALS(sourceInfos[0], collectionInfos[0]);
+ ASSERT_EQUALS(sourceInfos[2], collectionInfos[1]);
+}
+
+TEST_F(DatabaseClonerTest, ListCollectionsMultipleBatches) {
+ ASSERT_OK(databaseCloner->start());
+
+ const std::vector<BSONObj> sourceInfos = {BSON("name"
+ << "a"
+ << "options" << BSONObj()),
+ BSON("name"
+ << "b"
+ << "options" << BSONObj())};
+ processNetworkResponse(createListCollectionsResponse(1, BSON_ARRAY(sourceInfos[0])));
+
+ ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
+ ASSERT_TRUE(databaseCloner->isActive());
+
+ {
+ const std::vector<BSONObj>& collectionInfos = databaseCloner->getCollectionInfos();
+ ASSERT_EQUALS(1U, collectionInfos.size());
+ ASSERT_EQUALS(sourceInfos[0], collectionInfos[0]);
}
- // A database may have no collections. Nothing to do for the database cloner.
- TEST_F(DatabaseClonerTest, ListCollectionsReturnedNoCollections) {
- ASSERT_OK(databaseCloner->start());
-
- // Keep going even if initial batch is empty.
- processNetworkResponse(createListCollectionsResponse(1, BSONArray()));
-
- ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
- ASSERT_TRUE(databaseCloner->isActive());
-
- // Final batch is also empty. Database cloner should stop and return a successful status.
- processNetworkResponse(createListCollectionsResponse(0, BSONArray(), "nextBatch"));
-
- ASSERT_OK(getStatus());
- ASSERT_FALSE(databaseCloner->isActive());
- }
+ processNetworkResponse(
+ createListCollectionsResponse(0, BSON_ARRAY(sourceInfos[1]), "nextBatch"));
- TEST_F(DatabaseClonerTest, ListCollectionsPredicate) {
- DatabaseCloner::ListCollectionsPredicateFn pred = [](const BSONObj& info) {
- return info["name"].String() != "b";
- };
- databaseCloner.reset(new DatabaseCloner(&getExecutor(),
- target,
- dbname,
- BSONObj(),
- pred,
- storageInterface.get(),
- stdx::bind(&DatabaseClonerTest::collectionWork,
- this,
- stdx::placeholders::_1,
- stdx::placeholders::_2),
- stdx::bind(&DatabaseClonerTest::setStatus,
- this,
- stdx::placeholders::_1)));
- ASSERT_OK(databaseCloner->start());
-
- const std::vector<BSONObj> sourceInfos = {
- BSON("name" << "a" << "options" << BSONObj()),
- BSON("name" << "b" << "options" << BSONObj()),
- BSON("name" << "c" << "options" << BSONObj())};
- processNetworkResponse(createListCollectionsResponse(0, BSON_ARRAY(sourceInfos[0] <<
- sourceInfos[1] <<
- sourceInfos[2])));
-
- ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
- ASSERT_TRUE(databaseCloner->isActive());
+ ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
+ ASSERT_TRUE(databaseCloner->isActive());
+ {
const std::vector<BSONObj>& collectionInfos = databaseCloner->getCollectionInfos();
ASSERT_EQUALS(2U, collectionInfos.size());
ASSERT_EQUALS(sourceInfos[0], collectionInfos[0]);
- ASSERT_EQUALS(sourceInfos[2], collectionInfos[1]);
+ ASSERT_EQUALS(sourceInfos[1], collectionInfos[1]);
}
-
- TEST_F(DatabaseClonerTest, ListCollectionsMultipleBatches) {
- ASSERT_OK(databaseCloner->start());
-
- const std::vector<BSONObj> sourceInfos = {
- BSON("name" << "a" << "options" << BSONObj()),
- BSON("name" << "b" << "options" << BSONObj())};
- processNetworkResponse(createListCollectionsResponse(1, BSON_ARRAY(sourceInfos[0])));
-
- ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
- ASSERT_TRUE(databaseCloner->isActive());
-
- {
- const std::vector<BSONObj>& collectionInfos = databaseCloner->getCollectionInfos();
- ASSERT_EQUALS(1U, collectionInfos.size());
- ASSERT_EQUALS(sourceInfos[0], collectionInfos[0]);
- }
-
- processNetworkResponse(
- createListCollectionsResponse(0, BSON_ARRAY(sourceInfos[1]), "nextBatch"));
-
- ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
- ASSERT_TRUE(databaseCloner->isActive());
-
- {
- const std::vector<BSONObj>& collectionInfos = databaseCloner->getCollectionInfos();
- ASSERT_EQUALS(2U, collectionInfos.size());
- ASSERT_EQUALS(sourceInfos[0], collectionInfos[0]);
- ASSERT_EQUALS(sourceInfos[1], collectionInfos[1]);
- }
- }
-
- TEST_F(DatabaseClonerTest, CollectionInfoNameFieldMissing) {
- ASSERT_OK(databaseCloner->start());
- processNetworkResponse(createListCollectionsResponse(0, BSON_ARRAY(
- BSON("options" << BSONObj()))));
- ASSERT_EQUALS(ErrorCodes::FailedToParse, getStatus().code());
- ASSERT_STRING_CONTAINS(getStatus().reason(), "must contain 'name' field");
- ASSERT_FALSE(databaseCloner->isActive());
- }
-
- TEST_F(DatabaseClonerTest, CollectionInfoNameNotAString) {
- ASSERT_OK(databaseCloner->start());
- processNetworkResponse(createListCollectionsResponse(0, BSON_ARRAY(
- BSON("name" << 123 << "options" << BSONObj()))));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, getStatus().code());
- ASSERT_STRING_CONTAINS(getStatus().reason(), "'name' field must be a string");
- ASSERT_FALSE(databaseCloner->isActive());
- }
-
- TEST_F(DatabaseClonerTest, CollectionInfoNameEmpty) {
- ASSERT_OK(databaseCloner->start());
- processNetworkResponse(createListCollectionsResponse(0, BSON_ARRAY(
- BSON("name" << "" << "options" << BSONObj()))));
- ASSERT_EQUALS(ErrorCodes::BadValue, getStatus().code());
- ASSERT_STRING_CONTAINS(getStatus().reason(), "invalid collection namespace: db.");
- ASSERT_FALSE(databaseCloner->isActive());
- }
-
- TEST_F(DatabaseClonerTest, CollectionInfoNameDuplicate) {
- ASSERT_OK(databaseCloner->start());
- processNetworkResponse(createListCollectionsResponse(0, BSON_ARRAY(
- BSON("name" << "a" << "options" << BSONObj()) <<
- BSON("name" << "a" << "options" << BSONObj()))));
- ASSERT_EQUALS(ErrorCodes::DuplicateKey, getStatus().code());
- ASSERT_STRING_CONTAINS(getStatus().reason(), "duplicate collection name 'a'");
- ASSERT_FALSE(databaseCloner->isActive());
- }
-
- TEST_F(DatabaseClonerTest, CollectionInfoOptionsFieldMissing) {
- ASSERT_OK(databaseCloner->start());
- processNetworkResponse(createListCollectionsResponse(0, BSON_ARRAY(BSON("name" << "a"))));
- ASSERT_EQUALS(ErrorCodes::FailedToParse, getStatus().code());
- ASSERT_STRING_CONTAINS(getStatus().reason(), "must contain 'options' field");
- ASSERT_FALSE(databaseCloner->isActive());
- }
-
- TEST_F(DatabaseClonerTest, CollectionInfoOptionsNotAnObject) {
- ASSERT_OK(databaseCloner->start());
- processNetworkResponse(createListCollectionsResponse(0, BSON_ARRAY(
- BSON("name" << "a" << "options" << 123))));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, getStatus().code());
- ASSERT_STRING_CONTAINS(getStatus().reason(), "'options' field must be an object");
- ASSERT_FALSE(databaseCloner->isActive());
- }
-
- TEST_F(DatabaseClonerTest, InvalidCollectionOptions) {
- ASSERT_OK(databaseCloner->start());
-
- processNetworkResponse(createListCollectionsResponse(0, BSON_ARRAY(
- BSON("name" << "a" << "options" << BSON("storageEngine" << 1)))));
-
- ASSERT_EQUALS(ErrorCodes::BadValue, getStatus().code());
- ASSERT_FALSE(databaseCloner->isActive());
- }
-
- TEST_F(DatabaseClonerTest, ListCollectionsReturnsEmptyCollectionName) {
- databaseCloner.reset(new DatabaseCloner(&getExecutor(),
- target,
- dbname,
- BSONObj(),
- DatabaseCloner::ListCollectionsPredicateFn(),
- storageInterface.get(),
- stdx::bind(&DatabaseClonerTest::collectionWork,
- this,
- stdx::placeholders::_1,
- stdx::placeholders::_2),
- stdx::bind(&DatabaseClonerTest::setStatus,
- this,
- stdx::placeholders::_1)));
- ASSERT_OK(databaseCloner->start());
-
- processNetworkResponse(createListCollectionsResponse(0, BSON_ARRAY(
- BSON("name" << "" << "options" << BSONObj()))));
-
- ASSERT_EQUALS(ErrorCodes::BadValue, getStatus().code());
- ASSERT_STRING_CONTAINS(getStatus().reason(), "invalid collection namespace: db.");
- ASSERT_FALSE(databaseCloner->isActive());
- }
-
- TEST_F(DatabaseClonerTest, StartFirstCollectionClonerFailed) {
- ASSERT_OK(databaseCloner->start());
-
- databaseCloner->setStartCollectionClonerFn([](CollectionCloner& cloner) {
+}
+
+TEST_F(DatabaseClonerTest, CollectionInfoNameFieldMissing) {
+ ASSERT_OK(databaseCloner->start());
+ processNetworkResponse(
+ createListCollectionsResponse(0, BSON_ARRAY(BSON("options" << BSONObj()))));
+ ASSERT_EQUALS(ErrorCodes::FailedToParse, getStatus().code());
+ ASSERT_STRING_CONTAINS(getStatus().reason(), "must contain 'name' field");
+ ASSERT_FALSE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, CollectionInfoNameNotAString) {
+ ASSERT_OK(databaseCloner->start());
+ processNetworkResponse(createListCollectionsResponse(
+ 0, BSON_ARRAY(BSON("name" << 123 << "options" << BSONObj()))));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, getStatus().code());
+ ASSERT_STRING_CONTAINS(getStatus().reason(), "'name' field must be a string");
+ ASSERT_FALSE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, CollectionInfoNameEmpty) {
+ ASSERT_OK(databaseCloner->start());
+ processNetworkResponse(
+ createListCollectionsResponse(0,
+ BSON_ARRAY(BSON("name"
+ << ""
+ << "options" << BSONObj()))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, getStatus().code());
+ ASSERT_STRING_CONTAINS(getStatus().reason(), "invalid collection namespace: db.");
+ ASSERT_FALSE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, CollectionInfoNameDuplicate) {
+ ASSERT_OK(databaseCloner->start());
+ processNetworkResponse(
+ createListCollectionsResponse(0,
+ BSON_ARRAY(BSON("name"
+ << "a"
+ << "options" << BSONObj())
+ << BSON("name"
+ << "a"
+ << "options" << BSONObj()))));
+ ASSERT_EQUALS(ErrorCodes::DuplicateKey, getStatus().code());
+ ASSERT_STRING_CONTAINS(getStatus().reason(), "duplicate collection name 'a'");
+ ASSERT_FALSE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, CollectionInfoOptionsFieldMissing) {
+ ASSERT_OK(databaseCloner->start());
+ processNetworkResponse(createListCollectionsResponse(0,
+ BSON_ARRAY(BSON("name"
+ << "a"))));
+ ASSERT_EQUALS(ErrorCodes::FailedToParse, getStatus().code());
+ ASSERT_STRING_CONTAINS(getStatus().reason(), "must contain 'options' field");
+ ASSERT_FALSE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, CollectionInfoOptionsNotAnObject) {
+ ASSERT_OK(databaseCloner->start());
+ processNetworkResponse(createListCollectionsResponse(0,
+ BSON_ARRAY(BSON("name"
+ << "a"
+ << "options" << 123))));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, getStatus().code());
+ ASSERT_STRING_CONTAINS(getStatus().reason(), "'options' field must be an object");
+ ASSERT_FALSE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, InvalidCollectionOptions) {
+ ASSERT_OK(databaseCloner->start());
+
+ processNetworkResponse(
+ createListCollectionsResponse(
+ 0,
+ BSON_ARRAY(BSON("name"
+ << "a"
+ << "options" << BSON("storageEngine" << 1)))));
+
+ ASSERT_EQUALS(ErrorCodes::BadValue, getStatus().code());
+ ASSERT_FALSE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, ListCollectionsReturnsEmptyCollectionName) {
+ databaseCloner.reset(new DatabaseCloner(
+ &getExecutor(),
+ target,
+ dbname,
+ BSONObj(),
+ DatabaseCloner::ListCollectionsPredicateFn(),
+ storageInterface.get(),
+ stdx::bind(&DatabaseClonerTest::collectionWork,
+ this,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2),
+ stdx::bind(&DatabaseClonerTest::setStatus, this, stdx::placeholders::_1)));
+ ASSERT_OK(databaseCloner->start());
+
+ processNetworkResponse(
+ createListCollectionsResponse(0,
+ BSON_ARRAY(BSON("name"
+ << ""
+ << "options" << BSONObj()))));
+
+ ASSERT_EQUALS(ErrorCodes::BadValue, getStatus().code());
+ ASSERT_STRING_CONTAINS(getStatus().reason(), "invalid collection namespace: db.");
+ ASSERT_FALSE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, StartFirstCollectionClonerFailed) {
+ ASSERT_OK(databaseCloner->start());
+
+ databaseCloner->setStartCollectionClonerFn(
+ [](CollectionCloner& cloner) { return Status(ErrorCodes::OperationFailed, ""); });
+
+ processNetworkResponse(
+ createListCollectionsResponse(0,
+ BSON_ARRAY(BSON("name"
+ << "a"
+ << "options" << BSONObj()))));
+
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, getStatus().code());
+ ASSERT_FALSE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, StartSecondCollectionClonerFailed) {
+ ASSERT_OK(databaseCloner->start());
+
+ // Replace scheduleDbWork function so that all callbacks (including exclusive tasks)
+ // will run through network interface.
+ auto&& executor = getExecutor();
+ databaseCloner->setScheduleDbWorkFn([&](const ReplicationExecutor::CallbackFn& workFn) {
+ return executor.scheduleWork(workFn);
+ });
+
+ databaseCloner->setStartCollectionClonerFn([](CollectionCloner& cloner) {
+ if (cloner.getSourceNamespace().coll() == "b") {
return Status(ErrorCodes::OperationFailed, "");
- });
-
- processNetworkResponse(createListCollectionsResponse(0, BSON_ARRAY(
- BSON("name" << "a" << "options" << BSONObj()))));
-
- ASSERT_EQUALS(ErrorCodes::OperationFailed, getStatus().code());
- ASSERT_FALSE(databaseCloner->isActive());
- }
-
- TEST_F(DatabaseClonerTest, StartSecondCollectionClonerFailed) {
- ASSERT_OK(databaseCloner->start());
-
- // Replace scheduleDbWork function so that all callbacks (including exclusive tasks)
- // will run through network interface.
- auto&& executor = getExecutor();
- databaseCloner->setScheduleDbWorkFn([&](const ReplicationExecutor::CallbackFn& workFn) {
- return executor.scheduleWork(workFn);
- });
-
- databaseCloner->setStartCollectionClonerFn([](CollectionCloner& cloner) {
- if (cloner.getSourceNamespace().coll() == "b") {
- return Status(ErrorCodes::OperationFailed, "");
- }
- return cloner.start();
- });
-
- processNetworkResponse(createListCollectionsResponse(0, BSON_ARRAY(
- BSON("name" << "a" << "options" << BSONObj()) <<
- BSON("name" << "b" << "options" << BSONObj()))));
-
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
- processNetworkResponse(createCursorResponse(0, BSONArray()));
-
- ASSERT_EQUALS(ErrorCodes::OperationFailed, getStatus().code());
- ASSERT_FALSE(databaseCloner->isActive());
- }
-
- TEST_F(DatabaseClonerTest, FirstCollectionListIndexesFailed) {
- ASSERT_OK(databaseCloner->start());
-
- // Replace scheduleDbWork function so that all callbacks (including exclusive tasks)
- // will run through network interface.
- auto&& executor = getExecutor();
- databaseCloner->setScheduleDbWorkFn([&](const ReplicationExecutor::CallbackFn& workFn) {
- return executor.scheduleWork(workFn);
- });
-
- const std::vector<BSONObj> sourceInfos = {
- BSON("name" << "a" << "options" << BSONObj()),
- BSON("name" << "b" << "options" << BSONObj())};
- processNetworkResponse(createListCollectionsResponse(0, BSON_ARRAY(sourceInfos[0] <<
- sourceInfos[1])));
-
- ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
- ASSERT_TRUE(databaseCloner->isActive());
-
- // Collection cloners are run serially for now.
- // This affects the order of the network responses.
- processNetworkResponse(
- BSON("ok" << 0 << "errmsg" << "" << "code" << ErrorCodes::NamespaceNotFound));
-
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
- processNetworkResponse(createCursorResponse(0, BSONArray()));
-
- ASSERT_OK(getStatus());
- ASSERT_FALSE(databaseCloner->isActive());
-
- ASSERT_EQUALS(2U, collectionWorkResults.size());
- {
- auto i = collectionWorkResults.cbegin();
- ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, i->first.code());
- ASSERT_EQUALS(i->second.ns(), NamespaceString(dbname, "a").ns());
- i++;
- ASSERT_OK(i->first);
- ASSERT_EQUALS(i->second.ns(), NamespaceString(dbname, "b").ns());
}
+ return cloner.start();
+ });
+
+ processNetworkResponse(
+ createListCollectionsResponse(0,
+ BSON_ARRAY(BSON("name"
+ << "a"
+ << "options" << BSONObj())
+ << BSON("name"
+ << "b"
+ << "options" << BSONObj()))));
+
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ processNetworkResponse(createCursorResponse(0, BSONArray()));
+
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, getStatus().code());
+ ASSERT_FALSE(databaseCloner->isActive());
+}
+
+TEST_F(DatabaseClonerTest, FirstCollectionListIndexesFailed) {
+ ASSERT_OK(databaseCloner->start());
+
+ // Replace scheduleDbWork function so that all callbacks (including exclusive tasks)
+ // will run through network interface.
+ auto&& executor = getExecutor();
+ databaseCloner->setScheduleDbWorkFn([&](const ReplicationExecutor::CallbackFn& workFn) {
+ return executor.scheduleWork(workFn);
+ });
+
+ const std::vector<BSONObj> sourceInfos = {BSON("name"
+ << "a"
+ << "options" << BSONObj()),
+ BSON("name"
+ << "b"
+ << "options" << BSONObj())};
+ processNetworkResponse(
+ createListCollectionsResponse(0, BSON_ARRAY(sourceInfos[0] << sourceInfos[1])));
+
+ ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
+ ASSERT_TRUE(databaseCloner->isActive());
+
+ // Collection cloners are run serially for now.
+ // This affects the order of the network responses.
+ processNetworkResponse(BSON("ok" << 0 << "errmsg"
+ << ""
+ << "code" << ErrorCodes::NamespaceNotFound));
+
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ processNetworkResponse(createCursorResponse(0, BSONArray()));
+
+ ASSERT_OK(getStatus());
+ ASSERT_FALSE(databaseCloner->isActive());
+
+ ASSERT_EQUALS(2U, collectionWorkResults.size());
+ {
+ auto i = collectionWorkResults.cbegin();
+ ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, i->first.code());
+ ASSERT_EQUALS(i->second.ns(), NamespaceString(dbname, "a").ns());
+ i++;
+ ASSERT_OK(i->first);
+ ASSERT_EQUALS(i->second.ns(), NamespaceString(dbname, "b").ns());
}
-
- TEST_F(DatabaseClonerTest, CreateCollections) {
- ASSERT_OK(databaseCloner->start());
-
- // Replace scheduleDbWork function so that all callbacks (including exclusive tasks)
- // will run through network interface.
- auto&& executor = getExecutor();
- databaseCloner->setScheduleDbWorkFn([&](const ReplicationExecutor::CallbackFn& workFn) {
- return executor.scheduleWork(workFn);
- });
-
- const std::vector<BSONObj> sourceInfos = {
- BSON("name" << "a" << "options" << BSONObj()),
- BSON("name" << "b" << "options" << BSONObj())};
- processNetworkResponse(createListCollectionsResponse(0, BSON_ARRAY(sourceInfos[0] <<
- sourceInfos[1])));
-
- ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
- ASSERT_TRUE(databaseCloner->isActive());
-
- // Collection cloners are run serially for now.
- // This affects the order of the network responses.
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
- processNetworkResponse(createCursorResponse(0, BSONArray()));
-
- processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
- processNetworkResponse(createCursorResponse(0, BSONArray()));
-
- ASSERT_OK(getStatus());
- ASSERT_FALSE(databaseCloner->isActive());
-
- ASSERT_EQUALS(2U, collectionWorkResults.size());
- {
- auto i = collectionWorkResults.cbegin();
- ASSERT_OK(i->first);
- ASSERT_EQUALS(i->second.ns(), NamespaceString(dbname, "a").ns());
- i++;
- ASSERT_OK(i->first);
- ASSERT_EQUALS(i->second.ns(), NamespaceString(dbname, "b").ns());
- }
+}
+
+TEST_F(DatabaseClonerTest, CreateCollections) {
+ ASSERT_OK(databaseCloner->start());
+
+ // Replace scheduleDbWork function so that all callbacks (including exclusive tasks)
+ // will run through network interface.
+ auto&& executor = getExecutor();
+ databaseCloner->setScheduleDbWorkFn([&](const ReplicationExecutor::CallbackFn& workFn) {
+ return executor.scheduleWork(workFn);
+ });
+
+ const std::vector<BSONObj> sourceInfos = {BSON("name"
+ << "a"
+ << "options" << BSONObj()),
+ BSON("name"
+ << "b"
+ << "options" << BSONObj())};
+ processNetworkResponse(
+ createListCollectionsResponse(0, BSON_ARRAY(sourceInfos[0] << sourceInfos[1])));
+
+ ASSERT_EQUALS(getDetectableErrorStatus(), getStatus());
+ ASSERT_TRUE(databaseCloner->isActive());
+
+ // Collection cloners are run serially for now.
+ // This affects the order of the network responses.
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ processNetworkResponse(createCursorResponse(0, BSONArray()));
+
+ processNetworkResponse(createListIndexesResponse(0, BSON_ARRAY(idIndexSpec)));
+ processNetworkResponse(createCursorResponse(0, BSONArray()));
+
+ ASSERT_OK(getStatus());
+ ASSERT_FALSE(databaseCloner->isActive());
+
+ ASSERT_EQUALS(2U, collectionWorkResults.size());
+ {
+ auto i = collectionWorkResults.cbegin();
+ ASSERT_OK(i->first);
+ ASSERT_EQUALS(i->second.ns(), NamespaceString(dbname, "a").ns());
+ i++;
+ ASSERT_OK(i->first);
+ ASSERT_EQUALS(i->second.ns(), NamespaceString(dbname, "b").ns());
}
+}
-} // namespace
+} // namespace
diff --git a/src/mongo/db/repl/database_task.cpp b/src/mongo/db/repl/database_task.cpp
index 716155a3716..b19bf201b5d 100644
--- a/src/mongo/db/repl/database_task.cpp
+++ b/src/mongo/db/repl/database_task.cpp
@@ -38,63 +38,66 @@
namespace mongo {
namespace repl {
- // static
- DatabaseTask::Task DatabaseTask::makeGlobalExclusiveLockTask(const Task& task) {
- invariant(task);
- DatabaseTask::Task newTask = [task](OperationContext* txn, const Status& status) {
- if (!status.isOK()) {
- return task(txn, status);
- }
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lock(txn->lockState());
- return task(txn, status);
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "globalExclusiveLockTask", "global");
- MONGO_UNREACHABLE;
- };
- return newTask;
- }
+// static
+DatabaseTask::Task DatabaseTask::makeGlobalExclusiveLockTask(const Task& task) {
+ invariant(task);
+ DatabaseTask::Task newTask = [task](OperationContext* txn, const Status& status) {
+ if (!status.isOK()) {
+ return task(txn, status);
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lock(txn->lockState());
+ return task(txn, status);
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "globalExclusiveLockTask", "global");
+ MONGO_UNREACHABLE;
+ };
+ return newTask;
+}
- // static
- DatabaseTask::Task DatabaseTask::makeDatabaseLockTask(const Task& task,
- const std::string& databaseName,
- LockMode mode) {
- invariant(task);
- DatabaseTask::Task newTask = [=](OperationContext* txn, const Status& status) {
- if (!status.isOK()) {
- return task(txn, status);
- }
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- LockMode permissiveLockMode = isSharedLockMode(mode) ? MODE_IS : MODE_IX;
- ScopedTransaction transaction(txn, permissiveLockMode);
- Lock::DBLock lock(txn->lockState(), databaseName, mode);
- return task(txn, status);
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "databaseLockTask", databaseName);
- MONGO_UNREACHABLE;
- };
- return newTask;
- }
+// static
+DatabaseTask::Task DatabaseTask::makeDatabaseLockTask(const Task& task,
+ const std::string& databaseName,
+ LockMode mode) {
+ invariant(task);
+ DatabaseTask::Task newTask = [=](OperationContext* txn, const Status& status) {
+ if (!status.isOK()) {
+ return task(txn, status);
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ LockMode permissiveLockMode = isSharedLockMode(mode) ? MODE_IS : MODE_IX;
+ ScopedTransaction transaction(txn, permissiveLockMode);
+ Lock::DBLock lock(txn->lockState(), databaseName, mode);
+ return task(txn, status);
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "databaseLockTask", databaseName);
+ MONGO_UNREACHABLE;
+ };
+ return newTask;
+}
- // static
- DatabaseTask::Task DatabaseTask::makeCollectionLockTask(const Task& task,
- const NamespaceString& nss,
- LockMode mode) {
- invariant(task);
- DatabaseTask::Task newTask = [=](OperationContext* txn, const Status& status) {
- if (!status.isOK()) {
- return task(txn, status);
- }
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- LockMode permissiveLockMode = isSharedLockMode(mode) ? MODE_IS : MODE_IX;
- ScopedTransaction transaction(txn, permissiveLockMode);
- Lock::DBLock lock(txn->lockState(), nss.db(), permissiveLockMode);
- Lock::CollectionLock collectionLock(txn->lockState(), nss.toString(), mode);
- return task(txn, status);
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "collectionLockTask", nss.toString());
- MONGO_UNREACHABLE;
- };
- return newTask;
- }
+// static
+DatabaseTask::Task DatabaseTask::makeCollectionLockTask(const Task& task,
+ const NamespaceString& nss,
+ LockMode mode) {
+ invariant(task);
+ DatabaseTask::Task newTask = [=](OperationContext* txn, const Status& status) {
+ if (!status.isOK()) {
+ return task(txn, status);
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ LockMode permissiveLockMode = isSharedLockMode(mode) ? MODE_IS : MODE_IX;
+ ScopedTransaction transaction(txn, permissiveLockMode);
+ Lock::DBLock lock(txn->lockState(), nss.db(), permissiveLockMode);
+ Lock::CollectionLock collectionLock(txn->lockState(), nss.toString(), mode);
+ return task(txn, status);
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "collectionLockTask", nss.toString());
+ MONGO_UNREACHABLE;
+ };
+ return newTask;
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/database_task.h b/src/mongo/db/repl/database_task.h
index 5a4f09dfdae..29f10f2902c 100644
--- a/src/mongo/db/repl/database_task.h
+++ b/src/mongo/db/repl/database_task.h
@@ -36,40 +36,36 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace repl {
- class DatabaseTask{
- private:
- DatabaseTask();
+class DatabaseTask {
+private:
+ DatabaseTask();
- public:
+public:
+ using Task = TaskRunner::Task;
- using Task = TaskRunner::Task;
+ /**
+ * Creates a task wrapper that runs the target task inside a global exclusive lock.
+ */
+ static Task makeGlobalExclusiveLockTask(const Task& task);
- /**
- * Creates a task wrapper that runs the target task inside a global exclusive lock.
- */
- static Task makeGlobalExclusiveLockTask(const Task& task);
+ /**
+ * Creates a task wrapper that runs the target task inside a database lock.
+ */
+ static Task makeDatabaseLockTask(const Task& task,
+ const std::string& databaseName,
+ LockMode mode);
- /**
- * Creates a task wrapper that runs the target task inside a database lock.
- */
- static Task makeDatabaseLockTask(const Task& task,
- const std::string& databaseName,
- LockMode mode);
+ /**
+ * Creates a task wrapper that runs the target task inside a collection lock.
+ * Task acquires database lock before attempting to lock collection. Do not
+ * use in combination with makeDatabaseLockTask().
+ */
+ static Task makeCollectionLockTask(const Task& task, const NamespaceString& nss, LockMode mode);
+};
- /**
- * Creates a task wrapper that runs the target task inside a collection lock.
- * Task acquires database lock before attempting to lock collection. Do not
- * use in combination with makeDatabaseLockTask().
- */
- static Task makeCollectionLockTask(const Task& task,
- const NamespaceString& nss,
- LockMode mode);
-
- };
-
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/database_task_test.cpp b/src/mongo/db/repl/database_task_test.cpp
index ef27f8ba93d..25a1b01178c 100644
--- a/src/mongo/db/repl/database_task_test.cpp
+++ b/src/mongo/db/repl/database_task_test.cpp
@@ -37,149 +37,147 @@
namespace {
- using namespace mongo;
- using namespace mongo::repl;
-
- const std::string databaseName = "mydb";
- const std::string collectionName = "mycoll";
- const NamespaceString nss(databaseName, collectionName);
-
- class DatabaseTaskTest : public TaskRunnerTest {
- public:
- OperationContext* createOperationContext() const override;
+using namespace mongo;
+using namespace mongo::repl;
+
+const std::string databaseName = "mydb";
+const std::string collectionName = "mycoll";
+const NamespaceString nss(databaseName, collectionName);
+
+class DatabaseTaskTest : public TaskRunnerTest {
+public:
+ OperationContext* createOperationContext() const override;
+};
+
+OperationContext* DatabaseTaskTest::createOperationContext() const {
+ return new OperationContextReplMock();
+}
+
+TEST_F(DatabaseTaskTest, TaskRunnerErrorStatus) {
+ // Should not attempt to acquire lock on error status from task runner.
+ auto task = [](OperationContext* txn, const Status& status) {
+ ASSERT_FALSE(txn);
+ ASSERT_EQUALS(ErrorCodes::BadValue, status.code());
+ return TaskRunner::NextAction::kInvalid;
};
-
- OperationContext* DatabaseTaskTest::createOperationContext() const {
- return new OperationContextReplMock();
- }
-
- TEST_F(DatabaseTaskTest, TaskRunnerErrorStatus) {
- // Should not attempt to acquire lock on error status from task runner.
- auto task = [](OperationContext* txn, const Status& status) {
- ASSERT_FALSE(txn);
- ASSERT_EQUALS(ErrorCodes::BadValue, status.code());
- return TaskRunner::NextAction::kInvalid;
- };
- auto testLockTask = [](DatabaseTask::Task task) {
- ASSERT_TRUE(TaskRunner::NextAction::kInvalid ==
- task(nullptr, Status(ErrorCodes::BadValue, "")));
- };
- testLockTask(DatabaseTask::makeGlobalExclusiveLockTask(task));
- testLockTask(DatabaseTask::makeDatabaseLockTask(task, databaseName, MODE_X));
- testLockTask(DatabaseTask::makeCollectionLockTask(task, nss, MODE_X));
- }
-
- TEST_F(DatabaseTaskTest, RunGlobalExclusiveLockTask) {
- stdx::mutex mutex;
- bool called = false;
- OperationContext* txn = nullptr;
- bool lockIsW = false;
- Status status = getDetectableErrorStatus();
- // Task returning 'void' implies NextAction::NoAction.
- auto task = [&](OperationContext* theTxn, const Status& theStatus) {
- stdx::lock_guard<stdx::mutex> lk(mutex);
- called = true;
- txn = theTxn;
- lockIsW = txn->lockState()->isW();
- status = theStatus;
- return TaskRunner::NextAction::kCancel;
- };
- getTaskRunner().schedule(DatabaseTask::makeGlobalExclusiveLockTask(task));
- getThreadPool().join();
- ASSERT_FALSE(getTaskRunner().isActive());
-
+ auto testLockTask = [](DatabaseTask::Task task) {
+ ASSERT_TRUE(TaskRunner::NextAction::kInvalid ==
+ task(nullptr, Status(ErrorCodes::BadValue, "")));
+ };
+ testLockTask(DatabaseTask::makeGlobalExclusiveLockTask(task));
+ testLockTask(DatabaseTask::makeDatabaseLockTask(task, databaseName, MODE_X));
+ testLockTask(DatabaseTask::makeCollectionLockTask(task, nss, MODE_X));
+}
+
+TEST_F(DatabaseTaskTest, RunGlobalExclusiveLockTask) {
+ stdx::mutex mutex;
+ bool called = false;
+ OperationContext* txn = nullptr;
+ bool lockIsW = false;
+ Status status = getDetectableErrorStatus();
+ // Task returning 'void' implies NextAction::NoAction.
+ auto task = [&](OperationContext* theTxn, const Status& theStatus) {
stdx::lock_guard<stdx::mutex> lk(mutex);
- ASSERT_TRUE(called);
- ASSERT(txn);
- ASSERT_TRUE(lockIsW);
- ASSERT_OK(status);
- }
-
- void _testRunDatabaseLockTask(DatabaseTaskTest& test, LockMode mode) {
- stdx::mutex mutex;
- bool called = false;
- OperationContext* txn = nullptr;
- bool isDatabaseLockedForMode = false;
- Status status = test.getDetectableErrorStatus();
- // Task returning 'void' implies NextAction::NoAction.
- auto task = [&](OperationContext* theTxn, const Status& theStatus) {
- stdx::lock_guard<stdx::mutex> lk(mutex);
- called = true;
- txn = theTxn;
- isDatabaseLockedForMode = txn->lockState()->isDbLockedForMode(databaseName, mode);
- status = theStatus;
- return TaskRunner::NextAction::kCancel;
- };
- test.getTaskRunner().schedule(
- DatabaseTask::makeDatabaseLockTask(task, databaseName, mode));
- test.getThreadPool().join();
- ASSERT_FALSE(test.getTaskRunner().isActive());
-
+ called = true;
+ txn = theTxn;
+ lockIsW = txn->lockState()->isW();
+ status = theStatus;
+ return TaskRunner::NextAction::kCancel;
+ };
+ getTaskRunner().schedule(DatabaseTask::makeGlobalExclusiveLockTask(task));
+ getThreadPool().join();
+ ASSERT_FALSE(getTaskRunner().isActive());
+
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ ASSERT_TRUE(called);
+ ASSERT(txn);
+ ASSERT_TRUE(lockIsW);
+ ASSERT_OK(status);
+}
+
+void _testRunDatabaseLockTask(DatabaseTaskTest& test, LockMode mode) {
+ stdx::mutex mutex;
+ bool called = false;
+ OperationContext* txn = nullptr;
+ bool isDatabaseLockedForMode = false;
+ Status status = test.getDetectableErrorStatus();
+ // Task returning 'void' implies NextAction::NoAction.
+ auto task = [&](OperationContext* theTxn, const Status& theStatus) {
stdx::lock_guard<stdx::mutex> lk(mutex);
- ASSERT_TRUE(called);
- ASSERT(txn);
- ASSERT_TRUE(isDatabaseLockedForMode);
- ASSERT_OK(status);
- }
-
- TEST_F(DatabaseTaskTest, RunDatabaseLockTaskModeX) {
- _testRunDatabaseLockTask(*this, MODE_X);
- }
-
- TEST_F(DatabaseTaskTest, RunDatabaseLockTaskModeS) {
- _testRunDatabaseLockTask(*this, MODE_S);
- }
-
- TEST_F(DatabaseTaskTest, RunDatabaseLockTaskModeIX) {
- _testRunDatabaseLockTask(*this, MODE_IX);
- }
-
- TEST_F(DatabaseTaskTest, RunDatabaseLockTaskModeIS) {
- _testRunDatabaseLockTask(*this, MODE_IS);
- }
-
- void _testRunCollectionLockTask(DatabaseTaskTest& test, LockMode mode) {
- stdx::mutex mutex;
- bool called = false;
- OperationContext* txn = nullptr;
- bool isCollectionLockedForMode = false;
- Status status = test.getDetectableErrorStatus();
- // Task returning 'void' implies NextAction::NoAction.
- auto task = [&](OperationContext* theTxn, const Status& theStatus) {
- stdx::lock_guard<stdx::mutex> lk(mutex);
- called = true;
- txn = theTxn;
- isCollectionLockedForMode =
- txn->lockState()->isCollectionLockedForMode(nss.toString(), mode);
- status = theStatus;
- return TaskRunner::NextAction::kCancel;
- };
- test.getTaskRunner().schedule(
- DatabaseTask::makeCollectionLockTask(task, nss, mode));
- test.getThreadPool().join();
- ASSERT_FALSE(test.getTaskRunner().isActive());
-
+ called = true;
+ txn = theTxn;
+ isDatabaseLockedForMode = txn->lockState()->isDbLockedForMode(databaseName, mode);
+ status = theStatus;
+ return TaskRunner::NextAction::kCancel;
+ };
+ test.getTaskRunner().schedule(DatabaseTask::makeDatabaseLockTask(task, databaseName, mode));
+ test.getThreadPool().join();
+ ASSERT_FALSE(test.getTaskRunner().isActive());
+
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ ASSERT_TRUE(called);
+ ASSERT(txn);
+ ASSERT_TRUE(isDatabaseLockedForMode);
+ ASSERT_OK(status);
+}
+
+TEST_F(DatabaseTaskTest, RunDatabaseLockTaskModeX) {
+ _testRunDatabaseLockTask(*this, MODE_X);
+}
+
+TEST_F(DatabaseTaskTest, RunDatabaseLockTaskModeS) {
+ _testRunDatabaseLockTask(*this, MODE_S);
+}
+
+TEST_F(DatabaseTaskTest, RunDatabaseLockTaskModeIX) {
+ _testRunDatabaseLockTask(*this, MODE_IX);
+}
+
+TEST_F(DatabaseTaskTest, RunDatabaseLockTaskModeIS) {
+ _testRunDatabaseLockTask(*this, MODE_IS);
+}
+
+void _testRunCollectionLockTask(DatabaseTaskTest& test, LockMode mode) {
+ stdx::mutex mutex;
+ bool called = false;
+ OperationContext* txn = nullptr;
+ bool isCollectionLockedForMode = false;
+ Status status = test.getDetectableErrorStatus();
+ // Task returning 'void' implies NextAction::NoAction.
+ auto task = [&](OperationContext* theTxn, const Status& theStatus) {
stdx::lock_guard<stdx::mutex> lk(mutex);
- ASSERT_TRUE(called);
- ASSERT(txn);
- ASSERT_TRUE(isCollectionLockedForMode);
- ASSERT_OK(status);
- }
-
- TEST_F(DatabaseTaskTest, RunCollectionLockTaskModeX) {
- _testRunCollectionLockTask(*this, MODE_X);
- }
-
- TEST_F(DatabaseTaskTest, RunCollectionLockTaskModeS) {
- _testRunCollectionLockTask(*this, MODE_S);
- }
-
- TEST_F(DatabaseTaskTest, RunCollectionLockTaskModeIX) {
- _testRunCollectionLockTask(*this, MODE_IX);
- }
-
- TEST_F(DatabaseTaskTest, RunCollectionLockTaskModeIS) {
- _testRunCollectionLockTask(*this, MODE_IS);
- }
-
-} // namespace
+ called = true;
+ txn = theTxn;
+ isCollectionLockedForMode =
+ txn->lockState()->isCollectionLockedForMode(nss.toString(), mode);
+ status = theStatus;
+ return TaskRunner::NextAction::kCancel;
+ };
+ test.getTaskRunner().schedule(DatabaseTask::makeCollectionLockTask(task, nss, mode));
+ test.getThreadPool().join();
+ ASSERT_FALSE(test.getTaskRunner().isActive());
+
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ ASSERT_TRUE(called);
+ ASSERT(txn);
+ ASSERT_TRUE(isCollectionLockedForMode);
+ ASSERT_OK(status);
+}
+
+TEST_F(DatabaseTaskTest, RunCollectionLockTaskModeX) {
+ _testRunCollectionLockTask(*this, MODE_X);
+}
+
+TEST_F(DatabaseTaskTest, RunCollectionLockTaskModeS) {
+ _testRunCollectionLockTask(*this, MODE_S);
+}
+
+TEST_F(DatabaseTaskTest, RunCollectionLockTaskModeIX) {
+ _testRunCollectionLockTask(*this, MODE_IX);
+}
+
+TEST_F(DatabaseTaskTest, RunCollectionLockTaskModeIS) {
+ _testRunCollectionLockTask(*this, MODE_IS);
+}
+
+} // namespace
diff --git a/src/mongo/db/repl/elect_cmd_runner.cpp b/src/mongo/db/repl/elect_cmd_runner.cpp
index c80badf684f..c0d958c428e 100644
--- a/src/mongo/db/repl/elect_cmd_runner.cpp
+++ b/src/mongo/db/repl/elect_cmd_runner.cpp
@@ -42,119 +42,108 @@
namespace mongo {
namespace repl {
- ElectCmdRunner::Algorithm::Algorithm(
- const ReplicaSetConfig& rsConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets,
- OID round)
- : _actualResponses(0),
- _sufficientResponsesReceived(false),
- _rsConfig(rsConfig),
- _selfIndex(selfIndex),
- _targets(targets),
- _round(round) {
-
- // Vote for ourselves, first.
- _receivedVotes = _rsConfig.getMemberAt(_selfIndex).getNumVotes();
+ElectCmdRunner::Algorithm::Algorithm(const ReplicaSetConfig& rsConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets,
+ OID round)
+ : _actualResponses(0),
+ _sufficientResponsesReceived(false),
+ _rsConfig(rsConfig),
+ _selfIndex(selfIndex),
+ _targets(targets),
+ _round(round) {
+ // Vote for ourselves, first.
+ _receivedVotes = _rsConfig.getMemberAt(_selfIndex).getNumVotes();
+}
+
+ElectCmdRunner::Algorithm::~Algorithm() {}
+
+std::vector<RemoteCommandRequest> ElectCmdRunner::Algorithm::getRequests() const {
+ const MemberConfig& selfConfig = _rsConfig.getMemberAt(_selfIndex);
+ std::vector<RemoteCommandRequest> requests;
+ BSONObjBuilder electCmdBuilder;
+ electCmdBuilder.append("replSetElect", 1);
+ electCmdBuilder.append("set", _rsConfig.getReplSetName());
+ electCmdBuilder.append("who", selfConfig.getHostAndPort().toString());
+ electCmdBuilder.append("whoid", selfConfig.getId());
+ electCmdBuilder.appendIntOrLL("cfgver", _rsConfig.getConfigVersion());
+ electCmdBuilder.append("round", _round);
+ const BSONObj replSetElectCmd = electCmdBuilder.obj();
+
+ // Schedule a RemoteCommandRequest for each non-DOWN node
+ for (std::vector<HostAndPort>::const_iterator it = _targets.begin(); it != _targets.end();
+ ++it) {
+ invariant(*it != selfConfig.getHostAndPort());
+ requests.push_back(RemoteCommandRequest(
+ *it,
+ "admin",
+ replSetElectCmd,
+ Milliseconds(30 * 1000))); // trying to match current Socket timeout
}
- ElectCmdRunner::Algorithm::~Algorithm() {}
-
- std::vector<RemoteCommandRequest>
- ElectCmdRunner::Algorithm::getRequests() const {
-
- const MemberConfig& selfConfig = _rsConfig.getMemberAt(_selfIndex);
- std::vector<RemoteCommandRequest> requests;
- BSONObjBuilder electCmdBuilder;
- electCmdBuilder.append("replSetElect", 1);
- electCmdBuilder.append("set", _rsConfig.getReplSetName());
- electCmdBuilder.append("who", selfConfig.getHostAndPort().toString());
- electCmdBuilder.append("whoid", selfConfig.getId());
- electCmdBuilder.appendIntOrLL("cfgver", _rsConfig.getConfigVersion());
- electCmdBuilder.append("round", _round);
- const BSONObj replSetElectCmd = electCmdBuilder.obj();
-
- // Schedule a RemoteCommandRequest for each non-DOWN node
- for (std::vector<HostAndPort>::const_iterator it = _targets.begin();
- it != _targets.end();
- ++it) {
-
- invariant(*it != selfConfig.getHostAndPort());
- requests.push_back(RemoteCommandRequest(
- *it,
- "admin",
- replSetElectCmd,
- Milliseconds(30*1000))); // trying to match current Socket timeout
- }
-
- return requests;
- }
+ return requests;
+}
- bool ElectCmdRunner::Algorithm::hasReceivedSufficientResponses() const {
- if (_sufficientResponsesReceived) {
- return true;
- }
- if (_receivedVotes >= _rsConfig.getMajorityVoteCount()) {
- return true;
- }
- if (_receivedVotes < 0) {
- return true;
- }
- if (_actualResponses == _targets.size()) {
- return true;
- }
- return false;
+bool ElectCmdRunner::Algorithm::hasReceivedSufficientResponses() const {
+ if (_sufficientResponsesReceived) {
+ return true;
}
-
- void ElectCmdRunner::Algorithm::processResponse(
- const RemoteCommandRequest& request,
- const ResponseStatus& response) {
-
- ++_actualResponses;
-
- if (response.isOK()) {
- BSONObj res = response.getValue().data;
- log() << "received " << res["vote"] << " votes from " << request.target;
- LOG(1) << "full elect res: " << res.toString();
- BSONElement vote(res["vote"]);
- if (vote.type() != mongo::NumberInt) {
- error() << "wrong type for vote argument in replSetElect command: " <<
- typeName(vote.type());
- _sufficientResponsesReceived = true;
- return;
- }
-
- _receivedVotes += vote._numberInt();
- }
- else {
- warning() << "elect command to " << request.target << " failed: " <<
- response.getStatus();
- }
+ if (_receivedVotes >= _rsConfig.getMajorityVoteCount()) {
+ return true;
}
-
- ElectCmdRunner::ElectCmdRunner() : _isCanceled(false) {}
- ElectCmdRunner::~ElectCmdRunner() {}
-
- StatusWith<ReplicationExecutor::EventHandle> ElectCmdRunner::start(
- ReplicationExecutor* executor,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets,
- const stdx::function<void ()>& onCompletion) {
-
- _algorithm.reset(new Algorithm(currentConfig, selfIndex, targets, OID::gen()));
- _runner.reset(new ScatterGatherRunner(_algorithm.get()));
- return _runner->start(executor, onCompletion);
+ if (_receivedVotes < 0) {
+ return true;
}
-
- void ElectCmdRunner::cancel(ReplicationExecutor* executor) {
- _isCanceled = true;
- _runner->cancel(executor);
+ if (_actualResponses == _targets.size()) {
+ return true;
}
+ return false;
+}
+
+void ElectCmdRunner::Algorithm::processResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response) {
+ ++_actualResponses;
+
+ if (response.isOK()) {
+ BSONObj res = response.getValue().data;
+ log() << "received " << res["vote"] << " votes from " << request.target;
+ LOG(1) << "full elect res: " << res.toString();
+ BSONElement vote(res["vote"]);
+ if (vote.type() != mongo::NumberInt) {
+ error() << "wrong type for vote argument in replSetElect command: "
+ << typeName(vote.type());
+ _sufficientResponsesReceived = true;
+ return;
+ }
- int ElectCmdRunner::getReceivedVotes() const {
- return _algorithm->getReceivedVotes();
+ _receivedVotes += vote._numberInt();
+ } else {
+ warning() << "elect command to " << request.target << " failed: " << response.getStatus();
}
-
-} // namespace repl
-} // namespace mongo
+}
+
+ElectCmdRunner::ElectCmdRunner() : _isCanceled(false) {}
+ElectCmdRunner::~ElectCmdRunner() {}
+
+StatusWith<ReplicationExecutor::EventHandle> ElectCmdRunner::start(
+ ReplicationExecutor* executor,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets,
+ const stdx::function<void()>& onCompletion) {
+ _algorithm.reset(new Algorithm(currentConfig, selfIndex, targets, OID::gen()));
+ _runner.reset(new ScatterGatherRunner(_algorithm.get()));
+ return _runner->start(executor, onCompletion);
+}
+
+void ElectCmdRunner::cancel(ReplicationExecutor* executor) {
+ _isCanceled = true;
+ _runner->cancel(executor);
+}
+
+int ElectCmdRunner::getReceivedVotes() const {
+ return _algorithm->getReceivedVotes();
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/elect_cmd_runner.h b/src/mongo/db/repl/elect_cmd_runner.h
index 2a9a7bab435..21295991f26 100644
--- a/src/mongo/db/repl/elect_cmd_runner.h
+++ b/src/mongo/db/repl/elect_cmd_runner.h
@@ -38,88 +38,91 @@
namespace mongo {
- class Status;
+class Status;
namespace repl {
- class ReplicaSetConfig;
- class ScatterGatherRunner;
+class ReplicaSetConfig;
+class ScatterGatherRunner;
- class ElectCmdRunner {
- MONGO_DISALLOW_COPYING(ElectCmdRunner);
+class ElectCmdRunner {
+ MONGO_DISALLOW_COPYING(ElectCmdRunner);
+
+public:
+ class Algorithm : public ScatterGatherAlgorithm {
public:
- class Algorithm : public ScatterGatherAlgorithm {
- public:
- Algorithm(const ReplicaSetConfig& rsConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets,
- OID round);
-
- virtual ~Algorithm();
- virtual std::vector<RemoteCommandRequest> getRequests() const;
- virtual void processResponse(
- const RemoteCommandRequest& request,
- const ResponseStatus& response);
- virtual bool hasReceivedSufficientResponses() const;
-
- int getReceivedVotes() const { return _receivedVotes; }
-
- private:
- // Tally of the number of received votes for this election.
- int _receivedVotes;
-
- // Number of responses received so far.
- size_t _actualResponses;
-
- bool _sufficientResponsesReceived;
-
- const ReplicaSetConfig _rsConfig;
- const int _selfIndex;
- const std::vector<HostAndPort> _targets;
- const OID _round;
- };
-
- ElectCmdRunner();
- ~ElectCmdRunner();
-
- /**
- * Begins the process of sending replSetElect commands to all non-DOWN nodes
- * in currentConfig.
- *
- * Returned handle can be used to schedule a callback when the process is complete.
- */
- StatusWith<ReplicationExecutor::EventHandle> start(
- ReplicationExecutor* executor,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets,
- const stdx::function<void ()>& onCompletion = stdx::function<void ()>());
-
- /**
- * Informs the ElectCmdRunner to cancel further processing. The "executor"
- * argument must point to the same executor passed to "start()".
- *
- * Like start(), this method must run in the executor context.
- */
- void cancel(ReplicationExecutor* executor);
-
- /**
- * Returns the number of received votes. Only valid to call after
- * the event handle returned from start() has been signaled, which guarantees that
- * the vote count will no longer be touched by callbacks.
- */
- int getReceivedVotes() const;
-
- /**
- * Returns true if cancel() was called on this instance.
- */
- bool isCanceled() const { return _isCanceled; }
+ Algorithm(const ReplicaSetConfig& rsConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets,
+ OID round);
+
+ virtual ~Algorithm();
+ virtual std::vector<RemoteCommandRequest> getRequests() const;
+ virtual void processResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response);
+ virtual bool hasReceivedSufficientResponses() const;
+
+ int getReceivedVotes() const {
+ return _receivedVotes;
+ }
private:
- std::unique_ptr<Algorithm> _algorithm;
- std::unique_ptr<ScatterGatherRunner> _runner;
- bool _isCanceled;
+ // Tally of the number of received votes for this election.
+ int _receivedVotes;
+
+ // Number of responses received so far.
+ size_t _actualResponses;
+
+ bool _sufficientResponsesReceived;
+
+ const ReplicaSetConfig _rsConfig;
+ const int _selfIndex;
+ const std::vector<HostAndPort> _targets;
+ const OID _round;
};
+ ElectCmdRunner();
+ ~ElectCmdRunner();
+
+ /**
+ * Begins the process of sending replSetElect commands to all non-DOWN nodes
+ * in currentConfig.
+ *
+ * Returned handle can be used to schedule a callback when the process is complete.
+ */
+ StatusWith<ReplicationExecutor::EventHandle> start(
+ ReplicationExecutor* executor,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets,
+ const stdx::function<void()>& onCompletion = stdx::function<void()>());
+
+ /**
+ * Informs the ElectCmdRunner to cancel further processing. The "executor"
+ * argument must point to the same executor passed to "start()".
+ *
+ * Like start(), this method must run in the executor context.
+ */
+ void cancel(ReplicationExecutor* executor);
+
+ /**
+ * Returns the number of received votes. Only valid to call after
+ * the event handle returned from start() has been signaled, which guarantees that
+ * the vote count will no longer be touched by callbacks.
+ */
+ int getReceivedVotes() const;
+
+ /**
+ * Returns true if cancel() was called on this instance.
+ */
+ bool isCanceled() const {
+ return _isCanceled;
+ }
+
+private:
+ std::unique_ptr<Algorithm> _algorithm;
+ std::unique_ptr<ScatterGatherRunner> _runner;
+ bool _isCanceled;
+};
}
}
diff --git a/src/mongo/db/repl/elect_cmd_runner_test.cpp b/src/mongo/db/repl/elect_cmd_runner_test.cpp
index c029994f30d..91f2bbaea5a 100644
--- a/src/mongo/db/repl/elect_cmd_runner_test.cpp
+++ b/src/mongo/db/repl/elect_cmd_runner_test.cpp
@@ -47,379 +47,369 @@ namespace mongo {
namespace repl {
namespace {
- using executor::NetworkInterfaceMock;
-
- class ElectCmdRunnerTest : public mongo::unittest::Test {
- public:
- void startTest(ElectCmdRunner* electCmdRunner,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts);
-
- void waitForTest();
-
- void electCmdRunnerRunner(const ReplicationExecutor::CallbackArgs& data,
- ElectCmdRunner* electCmdRunner,
- StatusWith<ReplicationExecutor::EventHandle>* evh,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts);
-
- NetworkInterfaceMock* _net;
- StorageInterfaceMock* _storage;
- std::unique_ptr<ReplicationExecutor> _executor;
- std::unique_ptr<stdx::thread> _executorThread;
-
- private:
- void setUp();
- void tearDown();
-
- ReplicationExecutor::EventHandle _allDoneEvent;
- };
-
- void ElectCmdRunnerTest::setUp() {
- _net = new NetworkInterfaceMock;
- _storage = new StorageInterfaceMock;
- _executor.reset(new ReplicationExecutor(_net, _storage, 1 /* prng seed */));
- _executorThread.reset(new stdx::thread(stdx::bind(&ReplicationExecutor::run,
- _executor.get())));
- }
-
- void ElectCmdRunnerTest::tearDown() {
- _executor->shutdown();
- _executorThread->join();
+using executor::NetworkInterfaceMock;
+
+class ElectCmdRunnerTest : public mongo::unittest::Test {
+public:
+ void startTest(ElectCmdRunner* electCmdRunner,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts);
+
+ void waitForTest();
+
+ void electCmdRunnerRunner(const ReplicationExecutor::CallbackArgs& data,
+ ElectCmdRunner* electCmdRunner,
+ StatusWith<ReplicationExecutor::EventHandle>* evh,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts);
+
+ NetworkInterfaceMock* _net;
+ StorageInterfaceMock* _storage;
+ std::unique_ptr<ReplicationExecutor> _executor;
+ std::unique_ptr<stdx::thread> _executorThread;
+
+private:
+ void setUp();
+ void tearDown();
+
+ ReplicationExecutor::EventHandle _allDoneEvent;
+};
+
+void ElectCmdRunnerTest::setUp() {
+ _net = new NetworkInterfaceMock;
+ _storage = new StorageInterfaceMock;
+ _executor.reset(new ReplicationExecutor(_net, _storage, 1 /* prng seed */));
+ _executorThread.reset(new stdx::thread(stdx::bind(&ReplicationExecutor::run, _executor.get())));
+}
+
+void ElectCmdRunnerTest::tearDown() {
+ _executor->shutdown();
+ _executorThread->join();
+}
+
+ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(configBson));
+ ASSERT_OK(config.validate());
+ return config;
+}
+
+const BSONObj makeElectRequest(const ReplicaSetConfig& rsConfig, int selfIndex) {
+ const MemberConfig& myConfig = rsConfig.getMemberAt(selfIndex);
+ return BSON("replSetElect" << 1 << "set" << rsConfig.getReplSetName() << "who"
+ << myConfig.getHostAndPort().toString() << "whoid"
+ << myConfig.getId() << "cfgver" << rsConfig.getConfigVersion()
+ << "round" << 380865962699346850ll);
+}
+
+BSONObj stripRound(const BSONObj& orig) {
+ BSONObjBuilder builder;
+ for (BSONObjIterator iter(orig); iter.more(); iter.next()) {
+ BSONElement e = *iter;
+ if (e.fieldNameStringData() == "round") {
+ continue;
+ }
+ builder.append(e);
}
+ return builder.obj();
+}
+
+// This is necessary because the run method must be scheduled in the Replication Executor
+// for correct concurrency operation.
+void ElectCmdRunnerTest::electCmdRunnerRunner(const ReplicationExecutor::CallbackArgs& data,
+ ElectCmdRunner* electCmdRunner,
+ StatusWith<ReplicationExecutor::EventHandle>* evh,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts) {
+ invariant(data.status.isOK());
+ ReplicationExecutor* executor = dynamic_cast<ReplicationExecutor*>(data.executor);
+ ASSERT(executor);
+ *evh = electCmdRunner->start(executor, currentConfig, selfIndex, hosts);
+}
+
+void ElectCmdRunnerTest::startTest(ElectCmdRunner* electCmdRunner,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts) {
+ StatusWith<ReplicationExecutor::EventHandle> evh(ErrorCodes::InternalError, "Not set");
+ StatusWith<ReplicationExecutor::CallbackHandle> cbh =
+ _executor->scheduleWork(stdx::bind(&ElectCmdRunnerTest::electCmdRunnerRunner,
+ this,
+ stdx::placeholders::_1,
+ electCmdRunner,
+ &evh,
+ currentConfig,
+ selfIndex,
+ hosts));
+ ASSERT_OK(cbh.getStatus());
+ _executor->wait(cbh.getValue());
+ ASSERT_OK(evh.getStatus());
+ _allDoneEvent = evh.getValue();
+}
+
+void ElectCmdRunnerTest::waitForTest() {
+ _executor->waitForEvent(_allDoneEvent);
+}
+
+TEST_F(ElectCmdRunnerTest, OneNode) {
+ // Only one node in the config.
+ const ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ ElectCmdRunner electCmdRunner;
+ startTest(&electCmdRunner, config, 0, hosts);
+ waitForTest();
+ ASSERT_EQUALS(electCmdRunner.getReceivedVotes(), 1);
+}
+
+TEST_F(ElectCmdRunnerTest, TwoNodes) {
+ // Two nodes, we are node h1.
+ const ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ const BSONObj electRequest = makeElectRequest(config, 0);
+
+ ElectCmdRunner electCmdRunner;
+ startTest(&electCmdRunner, config, 0, hosts);
+ const Date_t startDate = _net->now();
+ _net->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(stripRound(electRequest), stripRound(noi->getRequest().cmdObj));
+ ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(
+ BSON("ok" << 1 << "vote" << 1 << "round" << 380865962699346850ll), Milliseconds(8))));
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
+ waitForTest();
+ ASSERT_EQUALS(electCmdRunner.getReceivedVotes(), 2);
+}
+
+TEST_F(ElectCmdRunnerTest, ShuttingDown) {
+ // Two nodes, we are node h1. Shutdown happens while we're scheduling remote commands.
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ ElectCmdRunner electCmdRunner;
+ StatusWith<ReplicationExecutor::EventHandle> evh(ErrorCodes::InternalError, "Not set");
+ StatusWith<ReplicationExecutor::CallbackHandle> cbh =
+ _executor->scheduleWork(stdx::bind(&ElectCmdRunnerTest::electCmdRunnerRunner,
+ this,
+ stdx::placeholders::_1,
+ &electCmdRunner,
+ &evh,
+ config,
+ 0,
+ hosts));
+ ASSERT_OK(cbh.getStatus());
+ _executor->wait(cbh.getValue());
+ ASSERT_OK(evh.getStatus());
+ _executor->shutdown();
+ _executor->waitForEvent(evh.getValue());
+ ASSERT_EQUALS(electCmdRunner.getReceivedVotes(), 1);
+}
+
+class ElectScatterGatherTest : public mongo::unittest::Test {
+public:
+ virtual void start(const BSONObj& configObj) {
+ int selfConfigIndex = 0;
- ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
ReplicaSetConfig config;
- ASSERT_OK(config.initialize(configBson));
- ASSERT_OK(config.validate());
- return config;
- }
+ config.initialize(configObj);
- const BSONObj makeElectRequest(const ReplicaSetConfig& rsConfig,
- int selfIndex) {
- const MemberConfig& myConfig = rsConfig.getMemberAt(selfIndex);
- return BSON("replSetElect" << 1 <<
- "set" << rsConfig.getReplSetName() <<
- "who" << myConfig.getHostAndPort().toString() <<
- "whoid" << myConfig.getId() <<
- "cfgver" << rsConfig.getConfigVersion() <<
- "round" << 380865962699346850ll);
- }
-
- BSONObj stripRound(const BSONObj& orig) {
- BSONObjBuilder builder;
- for (BSONObjIterator iter(orig); iter.more(); iter.next()) {
- BSONElement e = *iter;
- if (e.fieldNameStringData() == "round") {
- continue;
- }
- builder.append(e);
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
+ mem != config.membersEnd();
+ ++mem) {
+ hosts.push_back(mem->getHostAndPort());
}
- return builder.obj();
- }
- // This is necessary because the run method must be scheduled in the Replication Executor
- // for correct concurrency operation.
- void ElectCmdRunnerTest::electCmdRunnerRunner(
- const ReplicationExecutor::CallbackArgs& data,
- ElectCmdRunner* electCmdRunner,
- StatusWith<ReplicationExecutor::EventHandle>* evh,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts) {
-
- invariant(data.status.isOK());
- ReplicationExecutor* executor = dynamic_cast<ReplicationExecutor*>(data.executor);
- ASSERT(executor);
- *evh = electCmdRunner->start(
- executor,
- currentConfig,
- selfIndex,
- hosts);
+ _checker.reset(new ElectCmdRunner::Algorithm(config, selfConfigIndex, hosts, OID()));
}
- void ElectCmdRunnerTest::startTest(ElectCmdRunner* electCmdRunner,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts) {
-
- StatusWith<ReplicationExecutor::EventHandle> evh(ErrorCodes::InternalError, "Not set");
- StatusWith<ReplicationExecutor::CallbackHandle> cbh =
- _executor->scheduleWork(
- stdx::bind(&ElectCmdRunnerTest::electCmdRunnerRunner,
- this,
- stdx::placeholders::_1,
- electCmdRunner,
- &evh,
- currentConfig,
- selfIndex,
- hosts));
- ASSERT_OK(cbh.getStatus());
- _executor->wait(cbh.getValue());
- ASSERT_OK(evh.getStatus());
- _allDoneEvent = evh.getValue();
+ virtual void tearDown() {
+ _checker.reset(NULL);
}
- void ElectCmdRunnerTest::waitForTest() {
- _executor->waitForEvent(_allDoneEvent);
+protected:
+ bool hasReceivedSufficientResponses() {
+ return _checker->hasReceivedSufficientResponses();
}
- TEST_F(ElectCmdRunnerTest, OneNode) {
- // Only one node in the config.
- const ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- ElectCmdRunner electCmdRunner;
- startTest(&electCmdRunner, config, 0, hosts);
- waitForTest();
- ASSERT_EQUALS(electCmdRunner.getReceivedVotes(), 1);
+ int getReceivedVotes() {
+ return _checker->getReceivedVotes();
}
- TEST_F(ElectCmdRunnerTest, TwoNodes) {
- // Two nodes, we are node h1.
- const ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- const BSONObj electRequest = makeElectRequest(config, 0);
-
- ElectCmdRunner electCmdRunner;
- startTest(&electCmdRunner, config, 0, hosts);
- const Date_t startDate = _net->now();
- _net->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(stripRound(electRequest), stripRound(noi->getRequest().cmdObj));
- ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
- _net->scheduleResponse(noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1 <<
- "vote" << 1 <<
- "round" << 380865962699346850ll),
- Milliseconds(8))));
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
- waitForTest();
- ASSERT_EQUALS(electCmdRunner.getReceivedVotes(), 2);
+ void processResponse(const RemoteCommandRequest& request, const ResponseStatus& response) {
+ _checker->processResponse(request, response);
}
- TEST_F(ElectCmdRunnerTest, ShuttingDown) {
- // Two nodes, we are node h1. Shutdown happens while we're scheduling remote commands.
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- ElectCmdRunner electCmdRunner;
- StatusWith<ReplicationExecutor::EventHandle> evh(ErrorCodes::InternalError, "Not set");
- StatusWith<ReplicationExecutor::CallbackHandle> cbh =
- _executor->scheduleWork(
- stdx::bind(&ElectCmdRunnerTest::electCmdRunnerRunner,
- this,
- stdx::placeholders::_1,
- &electCmdRunner,
- &evh,
- config,
- 0,
- hosts));
- ASSERT_OK(cbh.getStatus());
- _executor->wait(cbh.getValue());
- ASSERT_OK(evh.getStatus());
- _executor->shutdown();
- _executor->waitForEvent(evh.getValue());
- ASSERT_EQUALS(electCmdRunner.getReceivedVotes(), 1);
+ RemoteCommandRequest requestFrom(std::string hostname) {
+ return RemoteCommandRequest(HostAndPort(hostname),
+ "", // the non-hostname fields do not matter for Elect
+ BSONObj(),
+ Milliseconds(0));
}
- class ElectScatterGatherTest : public mongo::unittest::Test {
- public:
- virtual void start(const BSONObj& configObj) {
- int selfConfigIndex = 0;
-
- ReplicaSetConfig config;
- config.initialize(configObj);
-
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- hosts.push_back(mem->getHostAndPort());
- }
-
- _checker.reset(new ElectCmdRunner::Algorithm(config,
- selfConfigIndex,
- hosts,
- OID()));
- }
-
- virtual void tearDown() {
- _checker.reset(NULL);
- }
-
- protected:
- bool hasReceivedSufficientResponses() {
- return _checker->hasReceivedSufficientResponses();
- }
-
- int getReceivedVotes() {
- return _checker->getReceivedVotes();
- }
-
- void processResponse(const RemoteCommandRequest& request, const ResponseStatus& response) {
- _checker->processResponse(request, response);
- }
-
- RemoteCommandRequest requestFrom(std::string hostname) {
- return RemoteCommandRequest(HostAndPort(hostname),
- "", // the non-hostname fields do not matter for Elect
- BSONObj(),
- Milliseconds(0));
- }
-
- ResponseStatus badResponseStatus() {
- return ResponseStatus(ErrorCodes::NodeNotFound, "not on my watch");
- }
-
- ResponseStatus wrongTypeForVoteField() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("vote" << std::string("yea")),
- Milliseconds(10)));
- }
-
- ResponseStatus voteYea() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("vote" << 1),
- Milliseconds(10)));
- }
-
- ResponseStatus voteNay() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("vote" << -10000),
- Milliseconds(10)));
- }
-
- ResponseStatus abstainFromVoting() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("vote" << 0),
- Milliseconds(10)));
- }
-
- BSONObj threeNodesTwoArbitersConfig() {
- return BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host0") <<
- BSON("_id" << 1 << "host" << "host1" << "arbiterOnly" << true) <<
- BSON("_id" << 2 << "host" << "host2" << "arbiterOnly" << true)));
- }
-
- BSONObj basicThreeNodeConfig() {
- return BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host0") <<
- BSON("_id" << 1 << "host" << "host1") <<
- BSON("_id" << 2 << "host" << "host2")));
- }
-
- private:
- unique_ptr<ElectCmdRunner::Algorithm> _checker;
- };
-
- TEST_F(ElectScatterGatherTest, NodeRespondsWithBadVoteType) {
- start(basicThreeNodeConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), wrongTypeForVoteField());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, getReceivedVotes()); // 1 because we have 1 vote and voted for ourself
+ ResponseStatus badResponseStatus() {
+ return ResponseStatus(ErrorCodes::NodeNotFound, "not on my watch");
}
- TEST_F(ElectScatterGatherTest, NodeRespondsWithBadStatus) {
- start(basicThreeNodeConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), badResponseStatus());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host3"), abstainFromVoting());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, getReceivedVotes()); // 1 because we have 1 vote and voted for ourself
+ ResponseStatus wrongTypeForVoteField() {
+ return ResponseStatus(
+ NetworkInterfaceMock::Response(BSON("vote" << std::string("yea")), Milliseconds(10)));
}
- TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithYea) {
- start(basicThreeNodeConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), voteYea());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(2, getReceivedVotes());
+ ResponseStatus voteYea() {
+ return ResponseStatus(NetworkInterfaceMock::Response(BSON("vote" << 1), Milliseconds(10)));
}
- TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithNaySecondWithYea) {
- start(basicThreeNodeConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), voteNay());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(-9999, getReceivedVotes());
+ ResponseStatus voteNay() {
+ return ResponseStatus(
+ NetworkInterfaceMock::Response(BSON("vote" << -10000), Milliseconds(10)));
}
- TEST_F(ElectScatterGatherTest, BothNodesAbstainFromVoting) {
- start(basicThreeNodeConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), abstainFromVoting());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host3"), abstainFromVoting());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, getReceivedVotes());
+ ResponseStatus abstainFromVoting() {
+ return ResponseStatus(NetworkInterfaceMock::Response(BSON("vote" << 0), Milliseconds(10)));
}
- TEST_F(ElectScatterGatherTest, NodeRespondsWithBadStatusArbiters) {
- start(threeNodesTwoArbitersConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), badResponseStatus());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host3"), abstainFromVoting());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, getReceivedVotes()); // 1 because we have 1 vote and voted for ourself
+ BSONObj threeNodesTwoArbitersConfig() {
+ return BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0")
+ << BSON("_id" << 1 << "host"
+ << "host1"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 2 << "host"
+ << "host2"
+ << "arbiterOnly" << true)));
}
- TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithYeaArbiters) {
- start(threeNodesTwoArbitersConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), voteYea());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(2, getReceivedVotes());
+ BSONObj basicThreeNodeConfig() {
+ return BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0")
+ << BSON("_id" << 1 << "host"
+ << "host1") << BSON("_id" << 2 << "host"
+ << "host2")));
}
- TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithNaySecondWithYeaArbiters) {
- start(threeNodesTwoArbitersConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), voteNay());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(-9999, getReceivedVotes());
- }
+private:
+ unique_ptr<ElectCmdRunner::Algorithm> _checker;
+};
+
+TEST_F(ElectScatterGatherTest, NodeRespondsWithBadVoteType) {
+ start(basicThreeNodeConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), wrongTypeForVoteField());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, getReceivedVotes()); // 1 because we have 1 vote and voted for ourself
+}
+
+TEST_F(ElectScatterGatherTest, NodeRespondsWithBadStatus) {
+ start(basicThreeNodeConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), badResponseStatus());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host3"), abstainFromVoting());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, getReceivedVotes()); // 1 because we have 1 vote and voted for ourself
+}
+
+TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithYea) {
+ start(basicThreeNodeConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), voteYea());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(2, getReceivedVotes());
+}
+
+TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithNaySecondWithYea) {
+ start(basicThreeNodeConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), voteNay());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(-9999, getReceivedVotes());
+}
+
+TEST_F(ElectScatterGatherTest, BothNodesAbstainFromVoting) {
+ start(basicThreeNodeConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), abstainFromVoting());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host3"), abstainFromVoting());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, getReceivedVotes());
+}
+
+TEST_F(ElectScatterGatherTest, NodeRespondsWithBadStatusArbiters) {
+ start(threeNodesTwoArbitersConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), badResponseStatus());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host3"), abstainFromVoting());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, getReceivedVotes()); // 1 because we have 1 vote and voted for ourself
+}
+
+TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithYeaArbiters) {
+ start(threeNodesTwoArbitersConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), voteYea());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(2, getReceivedVotes());
+}
+
+TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithNaySecondWithYeaArbiters) {
+ start(threeNodesTwoArbitersConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), voteNay());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(-9999, getReceivedVotes());
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/election_winner_declarer.cpp b/src/mongo/db/repl/election_winner_declarer.cpp
index 04175335cbd..55016dc373a 100644
--- a/src/mongo/db/repl/election_winner_declarer.cpp
+++ b/src/mongo/db/repl/election_winner_declarer.cpp
@@ -41,86 +41,80 @@
namespace mongo {
namespace repl {
- ElectionWinnerDeclarer::Algorithm::Algorithm(const std::string& setName,
- long long winnerId,
- long long term,
- const std::vector<HostAndPort>& targets) :
- _setName(setName),
- _winnerId(winnerId),
- _term(term),
- _targets(targets) {}
-
- ElectionWinnerDeclarer::Algorithm::~Algorithm() {}
-
- std::vector<RemoteCommandRequest>
- ElectionWinnerDeclarer::Algorithm::getRequests() const {
- BSONObjBuilder declareElectionWinnerCmdBuilder;
- declareElectionWinnerCmdBuilder.append("replSetDeclareElectionWinner", 1);
- declareElectionWinnerCmdBuilder.append("setName", _setName);
- declareElectionWinnerCmdBuilder.append("winnerId", _winnerId);
- declareElectionWinnerCmdBuilder.append("term", _term);
- const BSONObj declareElectionWinnerCmd = declareElectionWinnerCmdBuilder.obj();
-
- std::vector<RemoteCommandRequest> requests;
- for (const auto& target : _targets) {
- requests.push_back(RemoteCommandRequest(
- target,
- "admin",
- declareElectionWinnerCmd,
- Milliseconds(30*1000))); // trying to match current Socket timeout
- }
-
- return requests;
+ElectionWinnerDeclarer::Algorithm::Algorithm(const std::string& setName,
+ long long winnerId,
+ long long term,
+ const std::vector<HostAndPort>& targets)
+ : _setName(setName), _winnerId(winnerId), _term(term), _targets(targets) {}
+
+ElectionWinnerDeclarer::Algorithm::~Algorithm() {}
+
+std::vector<RemoteCommandRequest> ElectionWinnerDeclarer::Algorithm::getRequests() const {
+ BSONObjBuilder declareElectionWinnerCmdBuilder;
+ declareElectionWinnerCmdBuilder.append("replSetDeclareElectionWinner", 1);
+ declareElectionWinnerCmdBuilder.append("setName", _setName);
+ declareElectionWinnerCmdBuilder.append("winnerId", _winnerId);
+ declareElectionWinnerCmdBuilder.append("term", _term);
+ const BSONObj declareElectionWinnerCmd = declareElectionWinnerCmdBuilder.obj();
+
+ std::vector<RemoteCommandRequest> requests;
+ for (const auto& target : _targets) {
+ requests.push_back(RemoteCommandRequest(
+ target,
+ "admin",
+ declareElectionWinnerCmd,
+ Milliseconds(30 * 1000))); // trying to match current Socket timeout
}
- void ElectionWinnerDeclarer::Algorithm::processResponse(
- const RemoteCommandRequest& request,
- const ResponseStatus& response) {
- _responsesProcessed++;
- if (!response.isOK()) { // failed response
- log() << "ElectionWinnerDeclarer: Got failed response from " << request.target
- << ": " << response.getStatus();
- return;
- }
-
- Status cmdResponseStatus = getStatusFromCommandResult(response.getValue().data);
- if (!cmdResponseStatus.isOK()) { // disagreement response
- _failed = true;
- _status = cmdResponseStatus;
- log() << "ElectionWinnerDeclarer: Got error response from " << request.target
- << " with term: " << response.getValue().data["term"].Number()
- << " and error: " << cmdResponseStatus;
- }
- }
-
- bool ElectionWinnerDeclarer::Algorithm::hasReceivedSufficientResponses() const {
- return _failed || _responsesProcessed == static_cast<int>(_targets.size());
- }
-
- ElectionWinnerDeclarer::ElectionWinnerDeclarer() : _isCanceled(false) {}
- ElectionWinnerDeclarer::~ElectionWinnerDeclarer() {}
+ return requests;
+}
- StatusWith<ReplicationExecutor::EventHandle> ElectionWinnerDeclarer::start(
- ReplicationExecutor* executor,
- const std::string& setName,
- long long winnerId,
- long long term,
- const std::vector<HostAndPort>& targets,
- const stdx::function<void ()>& onCompletion) {
-
- _algorithm.reset(new Algorithm(setName, winnerId, term, targets));
- _runner.reset(new ScatterGatherRunner(_algorithm.get()));
- return _runner->start(executor, onCompletion);
- }
-
- void ElectionWinnerDeclarer::cancel(ReplicationExecutor* executor) {
- _isCanceled = true;
- _runner->cancel(executor);
+void ElectionWinnerDeclarer::Algorithm::processResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response) {
+ _responsesProcessed++;
+ if (!response.isOK()) { // failed response
+ log() << "ElectionWinnerDeclarer: Got failed response from " << request.target << ": "
+ << response.getStatus();
+ return;
}
- Status ElectionWinnerDeclarer::getStatus() const {
- return _algorithm->getStatus();
+ Status cmdResponseStatus = getStatusFromCommandResult(response.getValue().data);
+ if (!cmdResponseStatus.isOK()) { // disagreement response
+ _failed = true;
+ _status = cmdResponseStatus;
+ log() << "ElectionWinnerDeclarer: Got error response from " << request.target
+ << " with term: " << response.getValue().data["term"].Number()
+ << " and error: " << cmdResponseStatus;
}
-
-} // namespace repl
-} // namespace mongo
+}
+
+bool ElectionWinnerDeclarer::Algorithm::hasReceivedSufficientResponses() const {
+ return _failed || _responsesProcessed == static_cast<int>(_targets.size());
+}
+
+ElectionWinnerDeclarer::ElectionWinnerDeclarer() : _isCanceled(false) {}
+ElectionWinnerDeclarer::~ElectionWinnerDeclarer() {}
+
+StatusWith<ReplicationExecutor::EventHandle> ElectionWinnerDeclarer::start(
+ ReplicationExecutor* executor,
+ const std::string& setName,
+ long long winnerId,
+ long long term,
+ const std::vector<HostAndPort>& targets,
+ const stdx::function<void()>& onCompletion) {
+ _algorithm.reset(new Algorithm(setName, winnerId, term, targets));
+ _runner.reset(new ScatterGatherRunner(_algorithm.get()));
+ return _runner->start(executor, onCompletion);
+}
+
+void ElectionWinnerDeclarer::cancel(ReplicationExecutor* executor) {
+ _isCanceled = true;
+ _runner->cancel(executor);
+}
+
+Status ElectionWinnerDeclarer::getStatus() const {
+ return _algorithm->getStatus();
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/election_winner_declarer.h b/src/mongo/db/repl/election_winner_declarer.h
index 9745ed4ce20..8c5fa995880 100644
--- a/src/mongo/db/repl/election_winner_declarer.h
+++ b/src/mongo/db/repl/election_winner_declarer.h
@@ -39,89 +39,90 @@
namespace mongo {
- class Status;
+class Status;
namespace repl {
- class ScatterGatherRunner;
- class ReplSetDeclareElectionWinnerArgs;
+class ScatterGatherRunner;
+class ReplSetDeclareElectionWinnerArgs;
- class ElectionWinnerDeclarer {
- MONGO_DISALLOW_COPYING(ElectionWinnerDeclarer);
- public:
-
- class Algorithm : public ScatterGatherAlgorithm {
- public:
- Algorithm(const std::string& setName,
- long long winnerId,
- long long term,
- const std::vector<HostAndPort>& targets);
- virtual ~Algorithm();
- virtual std::vector<RemoteCommandRequest> getRequests() const;
- virtual void processResponse(
- const RemoteCommandRequest& request,
- const ResponseStatus& response);
- virtual bool hasReceivedSufficientResponses() const;
-
- /**
- * Returns a Status indicating what if anything went wrong while declaring the
- * election winner.
- *
- * It is invalid to call this before hasReceivedSufficeintResponses returns true.
- */
- Status getStatus() const { return _status; }
-
- private:
- const std::string _setName;
- const long long _winnerId;
- const long long _term;
- const std::vector<HostAndPort> _targets;
- bool _failed = false;
- long long _responsesProcessed = 0;
- Status _status = Status::OK();
- };
-
- ElectionWinnerDeclarer();
- virtual ~ElectionWinnerDeclarer();
-
- /**
- * Begins the process of sending replSetDeclareElectionWinner commands to all non-DOWN nodes
- * in currentConfig, with the intention of alerting them of a new primary.
- *
- * evh can be used to schedule a callback when the process is complete.
- * This function must be run in the executor, as it must be synchronous with the command
- * callbacks that it schedules.
- * If this function returns Status::OK(), evh is then guaranteed to be signaled.
- **/
- StatusWith<ReplicationExecutor::EventHandle> start(
- ReplicationExecutor* executor,
- const std::string& setName,
- long long winnerId,
- long long term,
- const std::vector<HostAndPort>& targets,
- const stdx::function<void ()>& onCompletion = stdx::function<void ()>());
+class ElectionWinnerDeclarer {
+ MONGO_DISALLOW_COPYING(ElectionWinnerDeclarer);
- /**
- * Informs the ElectionWinnerDeclarer to cancel further processing. The "executor"
- * argument must point to the same executor passed to "start()".
- *
- * Like start(), this method must run in the executor context.
- */
- void cancel(ReplicationExecutor* executor);
+public:
+ class Algorithm : public ScatterGatherAlgorithm {
+ public:
+ Algorithm(const std::string& setName,
+ long long winnerId,
+ long long term,
+ const std::vector<HostAndPort>& targets);
+ virtual ~Algorithm();
+ virtual std::vector<RemoteCommandRequest> getRequests() const;
+ virtual void processResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response);
+ virtual bool hasReceivedSufficientResponses() const;
/**
- * Returns a Status from the ElectionWinnerDeclarer::algorithm which indicates what
- * if anything went wrong while declaring the election winner.
+ * Returns a Status indicating what if anything went wrong while declaring the
+ * election winner.
*
- * It is invalid to call this before the ElectionWinnerDeclarer::algorithm finishes running.
+ * It is invalid to call this before hasReceivedSufficeintResponses returns true.
*/
- Status getStatus() const;
+ Status getStatus() const {
+ return _status;
+ }
private:
- std::unique_ptr<Algorithm> _algorithm;
- std::unique_ptr<ScatterGatherRunner> _runner;
- bool _isCanceled = false;
+ const std::string _setName;
+ const long long _winnerId;
+ const long long _term;
+ const std::vector<HostAndPort> _targets;
+ bool _failed = false;
+ long long _responsesProcessed = 0;
+ Status _status = Status::OK();
};
+ ElectionWinnerDeclarer();
+ virtual ~ElectionWinnerDeclarer();
+
+ /**
+ * Begins the process of sending replSetDeclareElectionWinner commands to all non-DOWN nodes
+ * in currentConfig, with the intention of alerting them of a new primary.
+ *
+ * evh can be used to schedule a callback when the process is complete.
+ * This function must be run in the executor, as it must be synchronous with the command
+ * callbacks that it schedules.
+ * If this function returns Status::OK(), evh is then guaranteed to be signaled.
+ **/
+ StatusWith<ReplicationExecutor::EventHandle> start(
+ ReplicationExecutor* executor,
+ const std::string& setName,
+ long long winnerId,
+ long long term,
+ const std::vector<HostAndPort>& targets,
+ const stdx::function<void()>& onCompletion = stdx::function<void()>());
+
+ /**
+ * Informs the ElectionWinnerDeclarer to cancel further processing. The "executor"
+ * argument must point to the same executor passed to "start()".
+ *
+ * Like start(), this method must run in the executor context.
+ */
+ void cancel(ReplicationExecutor* executor);
+
+ /**
+ * Returns a Status from the ElectionWinnerDeclarer::algorithm which indicates what
+ * if anything went wrong while declaring the election winner.
+ *
+ * It is invalid to call this before the ElectionWinnerDeclarer::algorithm finishes running.
+ */
+ Status getStatus() const;
+
+private:
+ std::unique_ptr<Algorithm> _algorithm;
+ std::unique_ptr<ScatterGatherRunner> _runner;
+ bool _isCanceled = false;
+};
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/election_winner_declarer_test.cpp b/src/mongo/db/repl/election_winner_declarer_test.cpp
index dfb278f69a1..04177148453 100644
--- a/src/mongo/db/repl/election_winner_declarer_test.cpp
+++ b/src/mongo/db/repl/election_winner_declarer_test.cpp
@@ -44,190 +44,178 @@ namespace mongo {
namespace repl {
namespace {
- using executor::NetworkInterfaceMock;
- using unittest::assertGet;
+using executor::NetworkInterfaceMock;
+using unittest::assertGet;
- bool stringContains(const std::string &haystack, const std::string& needle) {
- return haystack.find(needle) != std::string::npos;
+bool stringContains(const std::string& haystack, const std::string& needle) {
+ return haystack.find(needle) != std::string::npos;
+}
+
+
+class ElectionWinnerDeclarerTest : public mongo::unittest::Test {
+public:
+ virtual void setUp() {
+ std::string setName = "rs0";
+ long long winnerId = 0;
+ long long term = 1;
+ std::vector<HostAndPort> hosts = {
+ HostAndPort("host0"), HostAndPort("host1"), HostAndPort("host2")};
+
+ _declarer.reset(new ElectionWinnerDeclarer::Algorithm(setName, winnerId, term, hosts));
+ }
+
+ virtual void tearDown() {
+ _declarer.reset(NULL);
+ }
+
+protected:
+ int64_t countLogLinesContaining(const std::string& needle) {
+ return std::count_if(getCapturedLogMessages().begin(),
+ getCapturedLogMessages().end(),
+ stdx::bind(stringContains, stdx::placeholders::_1, needle));
}
+ bool hasReceivedSufficientResponses() {
+ return _declarer->hasReceivedSufficientResponses();
+ }
- class ElectionWinnerDeclarerTest : public mongo::unittest::Test {
- public:
- virtual void setUp() {
- std::string setName = "rs0";
- long long winnerId = 0;
- long long term = 1;
- std::vector<HostAndPort> hosts = {HostAndPort("host0"),
- HostAndPort("host1"),
- HostAndPort("host2")};
-
- _declarer.reset(new ElectionWinnerDeclarer::Algorithm(setName,
- winnerId,
- term,
- hosts));
- }
-
- virtual void tearDown() {
- _declarer.reset(NULL);
- }
-
- protected:
- int64_t countLogLinesContaining(const std::string& needle) {
- return std::count_if(getCapturedLogMessages().begin(),
- getCapturedLogMessages().end(),
- stdx::bind(stringContains,
- stdx::placeholders::_1,
- needle));
- }
-
- bool hasReceivedSufficientResponses() {
- return _declarer->hasReceivedSufficientResponses();
- }
-
- Status getStatus() {
- return _declarer->getStatus();
- }
-
- void processResponse(const RemoteCommandRequest& request, const ResponseStatus& response) {
- _declarer->processResponse(request, response);
- }
-
- RemoteCommandRequest requestFrom(std::string hostname) {
- return RemoteCommandRequest(HostAndPort(hostname),
- "", // fields do not matter in ElectionWinnerDeclarer
- BSONObj(),
- Milliseconds(0));
- }
-
- ResponseStatus badResponseStatus() {
- return ResponseStatus(ErrorCodes::NodeNotFound, "not on my watch");
- }
-
- ResponseStatus staleTermResponse() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("ok" << 0
- << "code" << ErrorCodes::BadValue
- << "errmsg"
- << "term has already passed"
- << "term" << 3),
- Milliseconds(10)));
- }
-
- ResponseStatus alreadyAnotherPrimaryResponse() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("ok" << 0
- << "code" << ErrorCodes::BadValue
- << "errmsg"
- << "term already has a primary"
- << "term" << 1),
- Milliseconds(10)));
- }
-
- ResponseStatus differentConfigVersionResponse() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("ok" << 0
- << "code" << ErrorCodes::BadValue
- << "errmsg"
- << "config version does not match"
- << "term" << 1),
- Milliseconds(10)));
- }
-
- ResponseStatus differentSetNameResponse() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("ok" << 0
- << "code" << ErrorCodes::BadValue
- << "errmsg"
- << "replSet name does not match"
- << "term" << 1),
- Milliseconds(10)));
- }
-
- ResponseStatus goodResponse() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("ok" << 1
- << "term" << 1),
- Milliseconds(10)));
- }
-
- private:
- unique_ptr<ElectionWinnerDeclarer::Algorithm> _declarer;
-
- };
-
- TEST_F(ElectionWinnerDeclarerTest, FinishWithOnlyGoodResponses) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host0"), goodResponse());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), goodResponse());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), goodResponse());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_OK(getStatus());
+ Status getStatus() {
+ return _declarer->getStatus();
}
- TEST_F(ElectionWinnerDeclarerTest, FailedDueToStaleTerm) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host0"), goodResponse());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), staleTermResponse());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got error response from host1"));
- stopCapturingLogMessages();
- ASSERT_EQUALS(getStatus().reason(), "term has already passed");
+ void processResponse(const RemoteCommandRequest& request, const ResponseStatus& response) {
+ _declarer->processResponse(request, response);
}
- TEST_F(ElectionWinnerDeclarerTest, FailedDueToAnotherPrimary) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host0"), goodResponse());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), alreadyAnotherPrimaryResponse());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got error response from host1"));
- stopCapturingLogMessages();
- ASSERT_EQUALS(getStatus().reason(), "term already has a primary");
+ RemoteCommandRequest requestFrom(std::string hostname) {
+ return RemoteCommandRequest(HostAndPort(hostname),
+ "", // fields do not matter in ElectionWinnerDeclarer
+ BSONObj(),
+ Milliseconds(0));
}
- TEST_F(ElectionWinnerDeclarerTest, FailedDueToDifferentSetName) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host0"), goodResponse());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), differentSetNameResponse());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got error response from host1"));
- stopCapturingLogMessages();
- ASSERT_EQUALS(getStatus().reason(), "replSet name does not match");
+ ResponseStatus badResponseStatus() {
+ return ResponseStatus(ErrorCodes::NodeNotFound, "not on my watch");
}
- TEST_F(ElectionWinnerDeclarerTest, FinishWithOnlyGoodResponsesAndMissingNode) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host0"), goodResponse());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), badResponseStatus());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), goodResponse());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host1"));
- stopCapturingLogMessages();
- ASSERT_OK(getStatus());
+ ResponseStatus staleTermResponse() {
+ return ResponseStatus(NetworkInterfaceMock::Response(
+ BSON("ok" << 0 << "code" << ErrorCodes::BadValue << "errmsg"
+ << "term has already passed"
+ << "term" << 3),
+ Milliseconds(10)));
}
- TEST_F(ElectionWinnerDeclarerTest, FinishWithOnlyMissingResponses) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host0"), badResponseStatus());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), badResponseStatus());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), badResponseStatus());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host0"));
- ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host1"));
- ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host2"));
- stopCapturingLogMessages();
- ASSERT_OK(getStatus());
+ ResponseStatus alreadyAnotherPrimaryResponse() {
+ return ResponseStatus(NetworkInterfaceMock::Response(
+ BSON("ok" << 0 << "code" << ErrorCodes::BadValue << "errmsg"
+ << "term already has a primary"
+ << "term" << 1),
+ Milliseconds(10)));
}
+ ResponseStatus differentConfigVersionResponse() {
+ return ResponseStatus(NetworkInterfaceMock::Response(
+ BSON("ok" << 0 << "code" << ErrorCodes::BadValue << "errmsg"
+ << "config version does not match"
+ << "term" << 1),
+ Milliseconds(10)));
+ }
+
+ ResponseStatus differentSetNameResponse() {
+ return ResponseStatus(NetworkInterfaceMock::Response(
+ BSON("ok" << 0 << "code" << ErrorCodes::BadValue << "errmsg"
+ << "replSet name does not match"
+ << "term" << 1),
+ Milliseconds(10)));
+ }
+
+ ResponseStatus goodResponse() {
+ return ResponseStatus(
+ NetworkInterfaceMock::Response(BSON("ok" << 1 << "term" << 1), Milliseconds(10)));
+ }
+
+private:
+ unique_ptr<ElectionWinnerDeclarer::Algorithm> _declarer;
+};
+
+TEST_F(ElectionWinnerDeclarerTest, FinishWithOnlyGoodResponses) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host0"), goodResponse());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), goodResponse());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host2"), goodResponse());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_OK(getStatus());
+}
+
+TEST_F(ElectionWinnerDeclarerTest, FailedDueToStaleTerm) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host0"), goodResponse());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), staleTermResponse());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got error response from host1"));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(getStatus().reason(), "term has already passed");
+}
+
+TEST_F(ElectionWinnerDeclarerTest, FailedDueToAnotherPrimary) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host0"), goodResponse());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), alreadyAnotherPrimaryResponse());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got error response from host1"));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(getStatus().reason(), "term already has a primary");
+}
+
+TEST_F(ElectionWinnerDeclarerTest, FailedDueToDifferentSetName) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host0"), goodResponse());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), differentSetNameResponse());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got error response from host1"));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(getStatus().reason(), "replSet name does not match");
+}
+
+TEST_F(ElectionWinnerDeclarerTest, FinishWithOnlyGoodResponsesAndMissingNode) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host0"), goodResponse());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), badResponseStatus());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host2"), goodResponse());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host1"));
+ stopCapturingLogMessages();
+ ASSERT_OK(getStatus());
+}
+
+TEST_F(ElectionWinnerDeclarerTest, FinishWithOnlyMissingResponses) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host0"), badResponseStatus());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), badResponseStatus());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host2"), badResponseStatus());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host0"));
+ ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host1"));
+ ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host2"));
+ stopCapturingLogMessages();
+ ASSERT_OK(getStatus());
+}
+
} // namespace
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/freshness_checker.cpp b/src/mongo/db/repl/freshness_checker.cpp
index 17a501f2ce4..ca1d665dd81 100644
--- a/src/mongo/db/repl/freshness_checker.cpp
+++ b/src/mongo/db/repl/freshness_checker.cpp
@@ -46,192 +46,178 @@
namespace mongo {
namespace repl {
- FreshnessChecker::Algorithm::Algorithm(
- Timestamp lastOpTimeApplied,
- const ReplicaSetConfig& rsConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets) :
- _responsesProcessed(0),
- _failedVoterResponses(0),
- _lastOpTimeApplied(lastOpTimeApplied),
- _rsConfig(rsConfig),
- _selfIndex(selfIndex),
- _targets(targets),
- _votingTargets(0),
- _losableVoters(0),
- _myVote(0),
- _abortReason(None) {
-
- // Count voting targets (since the targets could be a subset of members).
- for (std::vector<HostAndPort>::const_iterator it = _targets.begin();
- it != _targets.end();
- ++it) {
- const MemberConfig* member = _rsConfig.findMemberByHostAndPort(*it);
- if (member && member->isVoter())
- ++_votingTargets;
- }
-
- _myVote = _rsConfig.getMemberAt(_selfIndex).isVoter() ? 1 : 0;
- _losableVoters = std::max(0,
- ((_votingTargets + _myVote) - _rsConfig.getMajorityVoteCount()));
-
- }
-
- FreshnessChecker::Algorithm::~Algorithm() {}
-
- std::vector<RemoteCommandRequest>
- FreshnessChecker::Algorithm::getRequests() const {
- const MemberConfig& selfConfig = _rsConfig.getMemberAt(_selfIndex);
-
- // gather all not-down nodes, get their fullnames(or hostandport's)
- // schedule fresh command for each node
- BSONObjBuilder freshCmdBuilder;
- freshCmdBuilder.append("replSetFresh", 1);
- freshCmdBuilder.append("set", _rsConfig.getReplSetName());
- freshCmdBuilder.append("opTime", Date_t::fromMillisSinceEpoch(_lastOpTimeApplied.asLL()));
- freshCmdBuilder.append("who", selfConfig.getHostAndPort().toString());
- freshCmdBuilder.appendIntOrLL("cfgver", _rsConfig.getConfigVersion());
- freshCmdBuilder.append("id", selfConfig.getId());
- const BSONObj replSetFreshCmd = freshCmdBuilder.obj();
-
- std::vector<RemoteCommandRequest> requests;
- for (std::vector<HostAndPort>::const_iterator it = _targets.begin();
- it != _targets.end();
- ++it) {
- invariant(*it != selfConfig.getHostAndPort());
- requests.push_back(RemoteCommandRequest(
- *it,
- "admin",
- replSetFreshCmd,
- Milliseconds(30*1000))); // trying to match current Socket timeout
- }
-
- return requests;
+FreshnessChecker::Algorithm::Algorithm(Timestamp lastOpTimeApplied,
+ const ReplicaSetConfig& rsConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets)
+ : _responsesProcessed(0),
+ _failedVoterResponses(0),
+ _lastOpTimeApplied(lastOpTimeApplied),
+ _rsConfig(rsConfig),
+ _selfIndex(selfIndex),
+ _targets(targets),
+ _votingTargets(0),
+ _losableVoters(0),
+ _myVote(0),
+ _abortReason(None) {
+ // Count voting targets (since the targets could be a subset of members).
+ for (std::vector<HostAndPort>::const_iterator it = _targets.begin(); it != _targets.end();
+ ++it) {
+ const MemberConfig* member = _rsConfig.findMemberByHostAndPort(*it);
+ if (member && member->isVoter())
+ ++_votingTargets;
}
- bool FreshnessChecker::Algorithm::hadTooManyFailedVoterResponses() const {
- const bool tooManyLostVoters = (_failedVoterResponses > _losableVoters);
-
- LOG(3) << "hadTooManyFailedVoterResponses(" << tooManyLostVoters << ") = "
- << _failedVoterResponses << " failed responses <"
- << " (" << _votingTargets << " total voters - "
- << _rsConfig.getMajorityVoteCount() << " majority voters - me ("
- << _myVote << ")) -- losableVotes: " << _losableVoters;
- return tooManyLostVoters;
+ _myVote = _rsConfig.getMemberAt(_selfIndex).isVoter() ? 1 : 0;
+ _losableVoters = std::max(0, ((_votingTargets + _myVote) - _rsConfig.getMajorityVoteCount()));
+}
+
+FreshnessChecker::Algorithm::~Algorithm() {}
+
+std::vector<RemoteCommandRequest> FreshnessChecker::Algorithm::getRequests() const {
+ const MemberConfig& selfConfig = _rsConfig.getMemberAt(_selfIndex);
+
+ // gather all not-down nodes, get their fullnames(or hostandport's)
+ // schedule fresh command for each node
+ BSONObjBuilder freshCmdBuilder;
+ freshCmdBuilder.append("replSetFresh", 1);
+ freshCmdBuilder.append("set", _rsConfig.getReplSetName());
+ freshCmdBuilder.append("opTime", Date_t::fromMillisSinceEpoch(_lastOpTimeApplied.asLL()));
+ freshCmdBuilder.append("who", selfConfig.getHostAndPort().toString());
+ freshCmdBuilder.appendIntOrLL("cfgver", _rsConfig.getConfigVersion());
+ freshCmdBuilder.append("id", selfConfig.getId());
+ const BSONObj replSetFreshCmd = freshCmdBuilder.obj();
+
+ std::vector<RemoteCommandRequest> requests;
+ for (std::vector<HostAndPort>::const_iterator it = _targets.begin(); it != _targets.end();
+ ++it) {
+ invariant(*it != selfConfig.getHostAndPort());
+ requests.push_back(RemoteCommandRequest(
+ *it,
+ "admin",
+ replSetFreshCmd,
+ Milliseconds(30 * 1000))); // trying to match current Socket timeout
}
- bool FreshnessChecker::Algorithm::_isVotingMember(const HostAndPort hap) const {
- const MemberConfig* member = _rsConfig.findMemberByHostAndPort(hap);
- invariant(member);
- return member->isVoter();
- }
-
- void FreshnessChecker::Algorithm::processResponse(
- const RemoteCommandRequest& request,
- const ResponseStatus& response) {
- ++_responsesProcessed;
- bool votingMember = _isVotingMember(request.target);
-
- Status status = Status::OK();
-
- if (!response.isOK() ||
- !((status = getStatusFromCommandResult(response.getValue().data)).isOK())) {
- if (votingMember) {
- ++_failedVoterResponses;
- if (hadTooManyFailedVoterResponses()) {
- _abortReason = QuorumUnreachable;
- }
- }
- if (!response.isOK()) { // network/executor error
- LOG(2) << "FreshnessChecker: Got failed response from " << request.target;
- }
- else { // command error, like unauth
- LOG(2) << "FreshnessChecker: Got error response from " << request.target
- << " :" << status;
+ return requests;
+}
+
+bool FreshnessChecker::Algorithm::hadTooManyFailedVoterResponses() const {
+ const bool tooManyLostVoters = (_failedVoterResponses > _losableVoters);
+
+ LOG(3) << "hadTooManyFailedVoterResponses(" << tooManyLostVoters
+ << ") = " << _failedVoterResponses << " failed responses <"
+ << " (" << _votingTargets << " total voters - " << _rsConfig.getMajorityVoteCount()
+ << " majority voters - me (" << _myVote << ")) -- losableVotes: " << _losableVoters;
+ return tooManyLostVoters;
+}
+
+bool FreshnessChecker::Algorithm::_isVotingMember(const HostAndPort hap) const {
+ const MemberConfig* member = _rsConfig.findMemberByHostAndPort(hap);
+ invariant(member);
+ return member->isVoter();
+}
+
+void FreshnessChecker::Algorithm::processResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response) {
+ ++_responsesProcessed;
+ bool votingMember = _isVotingMember(request.target);
+
+ Status status = Status::OK();
+
+ if (!response.isOK() ||
+ !((status = getStatusFromCommandResult(response.getValue().data)).isOK())) {
+ if (votingMember) {
+ ++_failedVoterResponses;
+ if (hadTooManyFailedVoterResponses()) {
+ _abortReason = QuorumUnreachable;
}
- return;
}
-
- const BSONObj res = response.getValue().data;
-
- LOG(2) << "FreshnessChecker: Got response from " << request.target
- << " of " << res;
-
- if (res["fresher"].trueValue()) {
- log() << "not electing self, we are not freshest";
- _abortReason = FresherNodeFound;
- return;
- }
-
- if (res["opTime"].type() != mongo::Date) {
- error() << "wrong type for opTime argument in replSetFresh response: " <<
- typeName(res["opTime"].type());
- _abortReason = FresherNodeFound;
- return;
- }
- Timestamp remoteTime(res["opTime"].date());
- if (remoteTime == _lastOpTimeApplied) {
- _abortReason = FreshnessTie;
- }
- if (remoteTime > _lastOpTimeApplied) {
- // something really wrong (rogue command?)
- _abortReason = FresherNodeFound;
- return;
- }
-
- if (res["veto"].trueValue()) {
- BSONElement msg = res["errmsg"];
- if (msg.type() == String) {
- log() << "not electing self, " << request.target.toString() <<
- " would veto with '" << msg.String() << "'";
- }
- else {
- log() << "not electing self, " << request.target.toString() <<
- " would veto";
- }
- _abortReason = FresherNodeFound;
- return;
+ if (!response.isOK()) { // network/executor error
+ LOG(2) << "FreshnessChecker: Got failed response from " << request.target;
+ } else { // command error, like unauth
+ LOG(2) << "FreshnessChecker: Got error response from " << request.target << " :"
+ << status;
}
+ return;
}
- bool FreshnessChecker::Algorithm::hasReceivedSufficientResponses() const {
- return (_abortReason != None && _abortReason != FreshnessTie) ||
- (_responsesProcessed == static_cast<int>(_targets.size()));
- }
+ const BSONObj res = response.getValue().data;
- FreshnessChecker::ElectionAbortReason FreshnessChecker::Algorithm::shouldAbortElection() const {
- return _abortReason;
- }
+ LOG(2) << "FreshnessChecker: Got response from " << request.target << " of " << res;
- FreshnessChecker::ElectionAbortReason FreshnessChecker::shouldAbortElection() const {
- return _algorithm->shouldAbortElection();
+ if (res["fresher"].trueValue()) {
+ log() << "not electing self, we are not freshest";
+ _abortReason = FresherNodeFound;
+ return;
}
- long long FreshnessChecker::getOriginalConfigVersion() const {
- return _originalConfigVersion;
+ if (res["opTime"].type() != mongo::Date) {
+ error() << "wrong type for opTime argument in replSetFresh response: "
+ << typeName(res["opTime"].type());
+ _abortReason = FresherNodeFound;
+ return;
}
-
- FreshnessChecker::FreshnessChecker() : _isCanceled(false) {}
- FreshnessChecker::~FreshnessChecker() {}
-
- StatusWith<ReplicationExecutor::EventHandle> FreshnessChecker::start(
- ReplicationExecutor* executor,
- const Timestamp& lastOpTimeApplied,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets,
- const stdx::function<void ()>& onCompletion) {
-
- _originalConfigVersion = currentConfig.getConfigVersion();
- _algorithm.reset(new Algorithm(lastOpTimeApplied, currentConfig, selfIndex, targets));
- _runner.reset(new ScatterGatherRunner(_algorithm.get()));
- return _runner->start(executor, onCompletion);
+ Timestamp remoteTime(res["opTime"].date());
+ if (remoteTime == _lastOpTimeApplied) {
+ _abortReason = FreshnessTie;
}
-
- void FreshnessChecker::cancel(ReplicationExecutor* executor) {
- _isCanceled = true;
- _runner->cancel(executor);
+ if (remoteTime > _lastOpTimeApplied) {
+ // something really wrong (rogue command?)
+ _abortReason = FresherNodeFound;
+ return;
}
-} // namespace repl
-} // namespace mongo
+ if (res["veto"].trueValue()) {
+ BSONElement msg = res["errmsg"];
+ if (msg.type() == String) {
+ log() << "not electing self, " << request.target.toString() << " would veto with '"
+ << msg.String() << "'";
+ } else {
+ log() << "not electing self, " << request.target.toString() << " would veto";
+ }
+ _abortReason = FresherNodeFound;
+ return;
+ }
+}
+
+bool FreshnessChecker::Algorithm::hasReceivedSufficientResponses() const {
+ return (_abortReason != None && _abortReason != FreshnessTie) ||
+ (_responsesProcessed == static_cast<int>(_targets.size()));
+}
+
+FreshnessChecker::ElectionAbortReason FreshnessChecker::Algorithm::shouldAbortElection() const {
+ return _abortReason;
+}
+
+FreshnessChecker::ElectionAbortReason FreshnessChecker::shouldAbortElection() const {
+ return _algorithm->shouldAbortElection();
+}
+
+long long FreshnessChecker::getOriginalConfigVersion() const {
+ return _originalConfigVersion;
+}
+
+FreshnessChecker::FreshnessChecker() : _isCanceled(false) {}
+FreshnessChecker::~FreshnessChecker() {}
+
+StatusWith<ReplicationExecutor::EventHandle> FreshnessChecker::start(
+ ReplicationExecutor* executor,
+ const Timestamp& lastOpTimeApplied,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets,
+ const stdx::function<void()>& onCompletion) {
+ _originalConfigVersion = currentConfig.getConfigVersion();
+ _algorithm.reset(new Algorithm(lastOpTimeApplied, currentConfig, selfIndex, targets));
+ _runner.reset(new ScatterGatherRunner(_algorithm.get()));
+ return _runner->start(executor, onCompletion);
+}
+
+void FreshnessChecker::cancel(ReplicationExecutor* executor) {
+ _isCanceled = true;
+ _runner->cancel(executor);
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/freshness_checker.h b/src/mongo/db/repl/freshness_checker.h
index 188e67bd953..11515bfc0ad 100644
--- a/src/mongo/db/repl/freshness_checker.h
+++ b/src/mongo/db/repl/freshness_checker.h
@@ -38,127 +38,128 @@
namespace mongo {
- class Status;
+class Status;
namespace repl {
- class ReplicaSetConfig;
- class ScatterGatherRunner;
+class ReplicaSetConfig;
+class ScatterGatherRunner;
- class FreshnessChecker {
- MONGO_DISALLOW_COPYING(FreshnessChecker);
+class FreshnessChecker {
+ MONGO_DISALLOW_COPYING(FreshnessChecker);
+
+public:
+ enum ElectionAbortReason {
+ None = 0,
+ FresherNodeFound, // Freshness check found fresher node
+ FreshnessTie, // Freshness check resulted in one or more nodes with our lastAppliedOpTime
+ QuorumUnavailable, // Not enough up voters
+ QuorumUnreachable // Too many failed voter responses
+ };
+
+ class Algorithm : public ScatterGatherAlgorithm {
public:
- enum ElectionAbortReason {
- None = 0,
- FresherNodeFound, // Freshness check found fresher node
- FreshnessTie, // Freshness check resulted in one or more nodes with our lastAppliedOpTime
- QuorumUnavailable, // Not enough up voters
- QuorumUnreachable // Too many failed voter responses
- };
-
- class Algorithm : public ScatterGatherAlgorithm {
- public:
- Algorithm(Timestamp lastOpTimeApplied,
- const ReplicaSetConfig& rsConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets);
- virtual ~Algorithm();
- virtual std::vector<RemoteCommandRequest> getRequests() const;
- virtual void processResponse(
- const RemoteCommandRequest& request,
- const ResponseStatus& response);
- virtual bool hasReceivedSufficientResponses() const;
- ElectionAbortReason shouldAbortElection() const;
-
- private:
- // Returns true if the number of failed votes is over _losableVotes()
- bool hadTooManyFailedVoterResponses() const;
-
- // Returns true if the member, by host and port, has a vote.
- bool _isVotingMember(const HostAndPort host) const;
-
- // Number of responses received so far.
- int _responsesProcessed;
-
- // Number of failed voter responses so far.
- int _failedVoterResponses;
-
- // Last Timestamp applied by the caller; used in the Fresh command
- const Timestamp _lastOpTimeApplied;
-
- // Config to use for this check
- const ReplicaSetConfig _rsConfig;
-
- // Our index position in _rsConfig
- const int _selfIndex;
-
- // The UP members we are checking
- const std::vector<HostAndPort> _targets;
-
- // Number of voting targets
- int _votingTargets;
-
- // Number of voting nodes which can error
- int _losableVoters;
-
- // 1 if I have a vote, otherwise 0
- int _myVote;
-
- // Reason to abort, start with None
- ElectionAbortReason _abortReason;
-
- };
-
- FreshnessChecker();
- virtual ~FreshnessChecker();
-
- /**
- * Begins the process of sending replSetFresh commands to all non-DOWN nodes
- * in currentConfig, with the intention of determining whether the current node
- * is freshest.
- * evh can be used to schedule a callback when the process is complete.
- * This function must be run in the executor, as it must be synchronous with the command
- * callbacks that it schedules.
- * If this function returns Status::OK(), evh is then guaranteed to be signaled.
- **/
- StatusWith<ReplicationExecutor::EventHandle> start(
- ReplicationExecutor* executor,
- const Timestamp& lastOpTimeApplied,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets,
- const stdx::function<void ()>& onCompletion = stdx::function<void ()>());
-
- /**
- * Informs the freshness checker to cancel further processing. The "executor"
- * argument must point to the same executor passed to "start()".
- *
- * Like start(), this method must run in the executor context.
- */
- void cancel(ReplicationExecutor* executor);
-
- /**
- * Returns true if cancel() was called on this instance.
- */
- bool isCanceled() const { return _isCanceled; }
-
- /**
- * 'None' if the election should continue, otherwise the reason to abort
- */
+ Algorithm(Timestamp lastOpTimeApplied,
+ const ReplicaSetConfig& rsConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets);
+ virtual ~Algorithm();
+ virtual std::vector<RemoteCommandRequest> getRequests() const;
+ virtual void processResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response);
+ virtual bool hasReceivedSufficientResponses() const;
ElectionAbortReason shouldAbortElection() const;
- /**
- * Returns the config version supplied in the config when start() was called.
- * Useful for determining if the the config version has changed.
- */
- long long getOriginalConfigVersion() const;
-
private:
- std::unique_ptr<Algorithm> _algorithm;
- std::unique_ptr<ScatterGatherRunner> _runner;
- long long _originalConfigVersion;
- bool _isCanceled;
+ // Returns true if the number of failed votes is over _losableVotes()
+ bool hadTooManyFailedVoterResponses() const;
+
+ // Returns true if the member, by host and port, has a vote.
+ bool _isVotingMember(const HostAndPort host) const;
+
+ // Number of responses received so far.
+ int _responsesProcessed;
+
+ // Number of failed voter responses so far.
+ int _failedVoterResponses;
+
+ // Last Timestamp applied by the caller; used in the Fresh command
+ const Timestamp _lastOpTimeApplied;
+
+ // Config to use for this check
+ const ReplicaSetConfig _rsConfig;
+
+ // Our index position in _rsConfig
+ const int _selfIndex;
+
+ // The UP members we are checking
+ const std::vector<HostAndPort> _targets;
+
+ // Number of voting targets
+ int _votingTargets;
+
+ // Number of voting nodes which can error
+ int _losableVoters;
+
+ // 1 if I have a vote, otherwise 0
+ int _myVote;
+
+ // Reason to abort, start with None
+ ElectionAbortReason _abortReason;
};
+ FreshnessChecker();
+ virtual ~FreshnessChecker();
+
+ /**
+ * Begins the process of sending replSetFresh commands to all non-DOWN nodes
+ * in currentConfig, with the intention of determining whether the current node
+ * is freshest.
+ * evh can be used to schedule a callback when the process is complete.
+ * This function must be run in the executor, as it must be synchronous with the command
+ * callbacks that it schedules.
+ * If this function returns Status::OK(), evh is then guaranteed to be signaled.
+ **/
+ StatusWith<ReplicationExecutor::EventHandle> start(
+ ReplicationExecutor* executor,
+ const Timestamp& lastOpTimeApplied,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets,
+ const stdx::function<void()>& onCompletion = stdx::function<void()>());
+
+ /**
+ * Informs the freshness checker to cancel further processing. The "executor"
+ * argument must point to the same executor passed to "start()".
+ *
+ * Like start(), this method must run in the executor context.
+ */
+ void cancel(ReplicationExecutor* executor);
+
+ /**
+ * Returns true if cancel() was called on this instance.
+ */
+ bool isCanceled() const {
+ return _isCanceled;
+ }
+
+ /**
+ * 'None' if the election should continue, otherwise the reason to abort
+ */
+ ElectionAbortReason shouldAbortElection() const;
+
+ /**
+ * Returns the config version supplied in the config when start() was called.
+ * Useful for determining if the the config version has changed.
+ */
+ long long getOriginalConfigVersion() const;
+
+private:
+ std::unique_ptr<Algorithm> _algorithm;
+ std::unique_ptr<ScatterGatherRunner> _runner;
+ long long _originalConfigVersion;
+ bool _isCanceled;
+};
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/freshness_checker_test.cpp b/src/mongo/db/repl/freshness_checker_test.cpp
index 82a266b2910..fb08c072acd 100644
--- a/src/mongo/db/repl/freshness_checker_test.cpp
+++ b/src/mongo/db/repl/freshness_checker_test.cpp
@@ -48,1034 +48,991 @@ namespace mongo {
namespace repl {
namespace {
- using executor::NetworkInterfaceMock;
- using unittest::assertGet;
-
- bool stringContains(const std::string &haystack, const std::string& needle) {
- return haystack.find(needle) != std::string::npos;
+using executor::NetworkInterfaceMock;
+using unittest::assertGet;
+
+bool stringContains(const std::string& haystack, const std::string& needle) {
+ return haystack.find(needle) != std::string::npos;
+}
+
+class FreshnessCheckerTest : public mongo::unittest::Test {
+protected:
+ void startTest(const Timestamp& lastOpTimeApplied,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts);
+ void waitOnChecker();
+ FreshnessChecker::ElectionAbortReason shouldAbortElection() const;
+
+ int64_t countLogLinesContaining(const std::string& needle) {
+ return std::count_if(getCapturedLogMessages().begin(),
+ getCapturedLogMessages().end(),
+ stdx::bind(stringContains, stdx::placeholders::_1, needle));
}
- class FreshnessCheckerTest : public mongo::unittest::Test {
- protected:
- void startTest(const Timestamp& lastOpTimeApplied,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts);
- void waitOnChecker();
- FreshnessChecker::ElectionAbortReason shouldAbortElection() const;
-
- int64_t countLogLinesContaining(const std::string& needle) {
- return std::count_if(getCapturedLogMessages().begin(),
- getCapturedLogMessages().end(),
- stdx::bind(stringContains,
- stdx::placeholders::_1,
- needle));
- }
-
- NetworkInterfaceMock* _net;
- StorageInterfaceMock* _storage;
- std::unique_ptr<ReplicationExecutor> _executor;
- std::unique_ptr<stdx::thread> _executorThread;
-
- private:
- void freshnessCheckerRunner(const ReplicationExecutor::CallbackArgs& data,
- const Timestamp& lastOpTimeApplied,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts);
- void setUp();
- void tearDown();
-
- std::unique_ptr<FreshnessChecker> _checker;
- ReplicationExecutor::EventHandle _checkerDoneEvent;
- };
-
- void FreshnessCheckerTest::setUp() {
- _net = new NetworkInterfaceMock;
- _storage = new StorageInterfaceMock;
- _executor.reset(new ReplicationExecutor(_net, _storage, 1 /* prng seed */));
- _executorThread.reset(new stdx::thread(stdx::bind(&ReplicationExecutor::run,
- _executor.get())));
- _checker.reset(new FreshnessChecker);
+ NetworkInterfaceMock* _net;
+ StorageInterfaceMock* _storage;
+ std::unique_ptr<ReplicationExecutor> _executor;
+ std::unique_ptr<stdx::thread> _executorThread;
+
+private:
+ void freshnessCheckerRunner(const ReplicationExecutor::CallbackArgs& data,
+ const Timestamp& lastOpTimeApplied,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts);
+ void setUp();
+ void tearDown();
+
+ std::unique_ptr<FreshnessChecker> _checker;
+ ReplicationExecutor::EventHandle _checkerDoneEvent;
+};
+
+void FreshnessCheckerTest::setUp() {
+ _net = new NetworkInterfaceMock;
+ _storage = new StorageInterfaceMock;
+ _executor.reset(new ReplicationExecutor(_net, _storage, 1 /* prng seed */));
+ _executorThread.reset(new stdx::thread(stdx::bind(&ReplicationExecutor::run, _executor.get())));
+ _checker.reset(new FreshnessChecker);
+}
+
+void FreshnessCheckerTest::tearDown() {
+ _executor->shutdown();
+ _executorThread->join();
+}
+
+void FreshnessCheckerTest::waitOnChecker() {
+ _executor->waitForEvent(_checkerDoneEvent);
+}
+
+FreshnessChecker::ElectionAbortReason FreshnessCheckerTest::shouldAbortElection() const {
+ return _checker->shouldAbortElection();
+}
+
+ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(configBson));
+ ASSERT_OK(config.validate());
+ return config;
+}
+
+const BSONObj makeFreshRequest(const ReplicaSetConfig& rsConfig,
+ Timestamp lastOpTimeApplied,
+ int selfIndex) {
+ const MemberConfig& myConfig = rsConfig.getMemberAt(selfIndex);
+ return BSON("replSetFresh" << 1 << "set" << rsConfig.getReplSetName() << "opTime"
+ << Date_t::fromMillisSinceEpoch(lastOpTimeApplied.asLL()) << "who"
+ << myConfig.getHostAndPort().toString() << "cfgver"
+ << rsConfig.getConfigVersion() << "id" << myConfig.getId());
+}
+
+// This is necessary because the run method must be scheduled in the Replication Executor
+// for correct concurrency operation.
+void FreshnessCheckerTest::freshnessCheckerRunner(const ReplicationExecutor::CallbackArgs& data,
+ const Timestamp& lastOpTimeApplied,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts) {
+ invariant(data.status.isOK());
+ ReplicationExecutor* executor = dynamic_cast<ReplicationExecutor*>(data.executor);
+ ASSERT(executor);
+ StatusWith<ReplicationExecutor::EventHandle> evh =
+ _checker->start(executor, lastOpTimeApplied, currentConfig, selfIndex, hosts);
+ _checkerDoneEvent = assertGet(evh);
+}
+
+void FreshnessCheckerTest::startTest(const Timestamp& lastOpTimeApplied,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts) {
+ _executor->wait(
+ assertGet(_executor->scheduleWork(stdx::bind(&FreshnessCheckerTest::freshnessCheckerRunner,
+ this,
+ stdx::placeholders::_1,
+ lastOpTimeApplied,
+ currentConfig,
+ selfIndex,
+ hosts))));
+}
+
+TEST_F(FreshnessCheckerTest, TwoNodes) {
+ // Two nodes, we are node h1. We are freshest, but we tie with h2.
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+ const BSONObj freshRequest = makeFreshRequest(config, Timestamp(0, 0), 0);
+
+ startTest(Timestamp(0, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ _net->scheduleResponse(noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(
+ BSON("ok" << 1 << "id" << 2 << "set"
+ << "rs0"
+ << "who"
+ << "h1"
+ << "cfgver" << 1 << "opTime" << Date_t()),
+ Milliseconds(8))));
}
-
- void FreshnessCheckerTest::tearDown() {
- _executor->shutdown();
- _executorThread->join();
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
+ waitOnChecker();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FreshnessTie);
+}
+
+TEST_F(FreshnessCheckerTest, ShuttingDown) {
+ // Two nodes, we are node h1. Shutdown happens while we're scheduling remote commands.
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ startTest(Timestamp(0, 0), config, 0, hosts);
+ _executor->shutdown();
+ waitOnChecker();
+
+ // This seems less than ideal, but if we are shutting down, the next phase of election
+ // cannot proceed anyway.
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::None);
+}
+
+TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshest) {
+ // other responds as fresher than us
+ startCapturingLogMessages();
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10, 0), 0);
+
+ startTest(Timestamp(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1 << "id" << 2 << "set"
+ << "rs0"
+ << "who"
+ << "h1"
+ << "cfgver" << 1 << "fresher" << true
+ << "opTime" << Date_t()),
+ Milliseconds(8))));
}
-
- void FreshnessCheckerTest::waitOnChecker() {
- _executor->waitForEvent(_checkerDoneEvent);
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
+ waitOnChecker();
+
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1, countLogLinesContaining("not electing self, we are not freshest"));
+}
+
+TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshestOpTime) {
+ // other responds with a later optime than ours
+ startCapturingLogMessages();
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ const BSONObj freshRequest = makeFreshRequest(config, Timestamp(0, 0), 0);
+
+ startTest(Timestamp(0, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(
+ BSON("ok" << 1 << "id" << 2 << "set"
+ << "rs0"
+ << "who"
+ << "h1"
+ << "cfgver" << 1 << "opTime"
+ << Date_t::fromMillisSinceEpoch(Timestamp(10, 0).asLL())),
+ Milliseconds(8))));
}
-
- FreshnessChecker::ElectionAbortReason FreshnessCheckerTest::shouldAbortElection() const {
- return _checker->shouldAbortElection();
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
+ waitOnChecker();
+
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
+
+TEST_F(FreshnessCheckerTest, ElectWrongTypeInFreshnessResponse) {
+ // other responds with "opTime" field of non-Date value, causing not freshest
+ startCapturingLogMessages();
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10, 0), 0);
+
+ startTest(Timestamp(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1 << "id" << 2 << "set"
+ << "rs0"
+ << "who"
+ << "h1"
+ << "cfgver" << 1 << "opTime" << 3),
+ Milliseconds(8))));
}
-
- ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(configBson));
- ASSERT_OK(config.validate());
- return config;
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
+ waitOnChecker();
+
+ stopCapturingLogMessages();
+
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "wrong type for opTime argument in replSetFresh "
+ "response: NumberInt32"));
+}
+
+TEST_F(FreshnessCheckerTest, ElectVetoed) {
+ // other responds with veto
+ startCapturingLogMessages();
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10, 0), 0);
+
+ startTest(Timestamp(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(
+ BSON("ok" << 1 << "id" << 2 << "set"
+ << "rs0"
+ << "who"
+ << "h1"
+ << "cfgver" << 1 << "veto" << true << "errmsg"
+ << "I'd rather you didn't"
+ << "opTime" << Date_t::fromMillisSinceEpoch(Timestamp(0, 0).asLL())),
+ Milliseconds(8))));
}
-
- const BSONObj makeFreshRequest(const ReplicaSetConfig& rsConfig,
- Timestamp lastOpTimeApplied,
- int selfIndex) {
- const MemberConfig& myConfig = rsConfig.getMemberAt(selfIndex);
- return BSON("replSetFresh" << 1 <<
- "set" << rsConfig.getReplSetName() <<
- "opTime" << Date_t::fromMillisSinceEpoch(lastOpTimeApplied.asLL()) <<
- "who" << myConfig.getHostAndPort().toString() <<
- "cfgver" << rsConfig.getConfigVersion() <<
- "id" << myConfig.getId());
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
+ waitOnChecker();
+
+ stopCapturingLogMessages();
+
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "not electing self, h1:27017 would veto with "
+ "'I'd rather you didn't'"));
+}
+
+int findIdForMember(const ReplicaSetConfig& rsConfig, const HostAndPort& host) {
+ const MemberConfig* member = rsConfig.findMemberByHostAndPort(host);
+ ASSERT_TRUE(member != NULL) << "No host named " << host.toString() << " in config";
+ return member->getId();
+}
+
+TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshestManyNodes) {
+ // one other responds as fresher than us
+ startCapturingLogMessages();
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1") << BSON("_id" << 3 << "host"
+ << "h2")
+ << BSON("_id" << 4 << "host"
+ << "h3") << BSON("_id" << 5 << "host"
+ << "h4"))));
+
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin(); mem != config.membersEnd();
+ ++mem) {
+ hosts.push_back(mem->getHostAndPort());
}
- // This is necessary because the run method must be scheduled in the Replication Executor
- // for correct concurrency operation.
- void FreshnessCheckerTest::freshnessCheckerRunner(
- const ReplicationExecutor::CallbackArgs& data,
- const Timestamp& lastOpTimeApplied,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts) {
-
- invariant(data.status.isOK());
- ReplicationExecutor* executor = dynamic_cast<ReplicationExecutor*>(data.executor);
- ASSERT(executor);
- StatusWith<ReplicationExecutor::EventHandle> evh = _checker->start(executor,
- lastOpTimeApplied,
- currentConfig,
- selfIndex,
- hosts);
- _checkerDoneEvent = assertGet(evh);
+ const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10, 0), 0);
+
+ startTest(Timestamp(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ unordered_set<HostAndPort> seen;
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const HostAndPort target = noi->getRequest().target;
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT(seen.insert(target).second) << "Already saw " << target;
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "opTime"
+ << Date_t::fromMillisSinceEpoch(Timestamp(0, 0).asLL());
+ if (target.host() == "h1") {
+ responseBuilder << "fresher" << true;
+ }
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(responseBuilder.obj(), Milliseconds(8))));
}
-
- void FreshnessCheckerTest::startTest(const Timestamp& lastOpTimeApplied,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts) {
- _executor->wait(
- assertGet(
- _executor->scheduleWork(
- stdx::bind(&FreshnessCheckerTest::freshnessCheckerRunner,
- this,
- stdx::placeholders::_1,
- lastOpTimeApplied,
- currentConfig,
- selfIndex,
- hosts))));
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
+ waitOnChecker();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1, countLogLinesContaining("not electing self, we are not freshest"));
+}
+
+TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshestOpTimeManyNodes) {
+ // one other responds with a later optime than ours
+ startCapturingLogMessages();
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1") << BSON("_id" << 3 << "host"
+ << "h2")
+ << BSON("_id" << 4 << "host"
+ << "h3") << BSON("_id" << 5 << "host"
+ << "h4"))));
+
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = config.membersBegin(); mem != config.membersEnd();
+ ++mem) {
+ if (HostAndPort("h0") == mem->getHostAndPort()) {
+ continue;
+ }
+ hosts.push_back(mem->getHostAndPort());
}
- TEST_F(FreshnessCheckerTest, TwoNodes) {
- // Two nodes, we are node h1. We are freshest, but we tie with h2.
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
- const BSONObj freshRequest = makeFreshRequest(config, Timestamp(0,0), 0);
-
- startTest(Timestamp(0, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10, 0), 0);
+
+ startTest(Timestamp(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ unordered_set<HostAndPort> seen;
+ _net->enterNetwork();
+
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const HostAndPort target = noi->getRequest().target;
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT(seen.insert(target).second) << "Already saw " << target;
+ BSONObjBuilder responseBuilder;
+ if (target.host() == "h4") {
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "opTime"
+ << Date_t::fromMillisSinceEpoch(Timestamp(20, 0).asLL());
_net->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1 <<
- "id" << 2 <<
- "set" << "rs0" <<
- "who" << "h1" <<
- "cfgver" << 1 <<
- "opTime" << Date_t()),
- Milliseconds(8))));
+ noi,
+ startDate + Milliseconds(20),
+ ResponseStatus(RemoteCommandResponse(responseBuilder.obj(), Milliseconds(8))));
+ } else {
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "opTime"
+ << Date_t::fromMillisSinceEpoch(Timestamp(10, 0).asLL());
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(responseBuilder.obj(), Milliseconds(8))));
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
- waitOnChecker();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FreshnessTie);
}
-
- TEST_F(FreshnessCheckerTest, ShuttingDown) {
- // Two nodes, we are node h1. Shutdown happens while we're scheduling remote commands.
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- startTest(
- Timestamp(0, 0),
- config,
- 0,
- hosts);
- _executor->shutdown();
- waitOnChecker();
-
- // This seems less than ideal, but if we are shutting down, the next phase of election
- // cannot proceed anyway.
- ASSERT_EQUALS(shouldAbortElection(),FreshnessChecker::None);
-
+ _net->runUntil(startDate + Milliseconds(10));
+ ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
+ ASSERT_EQUALS(0, countLogLinesContaining("not electing self, we are not freshest"));
+ _net->runUntil(startDate + Milliseconds(20));
+ ASSERT_EQUALS(startDate + Milliseconds(20), _net->now());
+ _net->exitNetwork();
+ waitOnChecker();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
+
+TEST_F(FreshnessCheckerTest, ElectWrongTypeInFreshnessResponseManyNodes) {
+ // one other responds with "opTime" field of non-Date value, causing not freshest
+ startCapturingLogMessages();
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1") << BSON("_id" << 3 << "host"
+ << "h2")
+ << BSON("_id" << 4 << "host"
+ << "h3") << BSON("_id" << 5 << "host"
+ << "h4"))));
+
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin(); mem != config.membersEnd();
+ ++mem) {
+ hosts.push_back(mem->getHostAndPort());
}
- TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshest) {
- // other responds as fresher than us
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10,0), 0);
-
- startTest(Timestamp(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
- _net->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1 <<
- "id" << 2 <<
- "set" << "rs0" <<
- "who" << "h1" <<
- "cfgver" << 1 <<
- "fresher" << true <<
- "opTime" << Date_t()),
- Milliseconds(8))));
+ const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10, 0), 0);
+
+ startTest(Timestamp(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ unordered_set<HostAndPort> seen;
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const HostAndPort target = noi->getRequest().target;
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT(seen.insert(target).second) << "Already saw " << target;
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1;
+ if (target.host() == "h1") {
+ responseBuilder << "opTime" << 3;
+ } else {
+ responseBuilder << "opTime" << Date_t::fromMillisSinceEpoch(Timestamp(0, 0).asLL());
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
- waitOnChecker();
-
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("not electing self, we are not freshest"));
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(responseBuilder.obj(), Milliseconds(8))));
+ }
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
+ waitOnChecker();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "wrong type for opTime argument in replSetFresh "
+ "response: NumberInt32"));
+}
+
+TEST_F(FreshnessCheckerTest, ElectVetoedManyNodes) {
+ // one other responds with veto
+ startCapturingLogMessages();
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1") << BSON("_id" << 3 << "host"
+ << "h2")
+ << BSON("_id" << 4 << "host"
+ << "h3") << BSON("_id" << 5 << "host"
+ << "h4"))));
+
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin(); mem != config.membersEnd();
+ ++mem) {
+ hosts.push_back(mem->getHostAndPort());
}
- TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshestOpTime) {
- // other responds with a later optime than ours
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- const BSONObj freshRequest = makeFreshRequest(config, Timestamp(0,0), 0);
-
- startTest(Timestamp(0, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
- _net->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1 <<
- "id" << 2 <<
- "set" << "rs0" <<
- "who" << "h1" <<
- "cfgver" << 1 <<
- "opTime" << Date_t::fromMillisSinceEpoch(
- Timestamp(10,0).asLL())),
- Milliseconds(8))));
+ const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10, 0), 0);
+
+ startTest(Timestamp(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ unordered_set<HostAndPort> seen;
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const HostAndPort target = noi->getRequest().target;
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT(seen.insert(target).second) << "Already saw " << target;
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "opTime"
+ << Date_t::fromMillisSinceEpoch(Timestamp(0, 0).asLL());
+ if (target.host() == "h1") {
+ responseBuilder << "veto" << true << "errmsg"
+ << "I'd rather you didn't";
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
- waitOnChecker();
-
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(responseBuilder.obj(), Milliseconds(8))));
}
-
- TEST_F(FreshnessCheckerTest, ElectWrongTypeInFreshnessResponse) {
- // other responds with "opTime" field of non-Date value, causing not freshest
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10,0), 0);
-
- startTest(Timestamp(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
- _net->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1 <<
- "id" << 2 <<
- "set" << "rs0" <<
- "who" << "h1" <<
- "cfgver" << 1 <<
- "opTime" << 3),
- Milliseconds(8))));
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
+ waitOnChecker();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "not electing self, h1:27017 would veto with "
+ "'I'd rather you didn't'"));
+}
+
+TEST_F(FreshnessCheckerTest, ElectVetoedAndTiedFreshnessManyNodes) {
+ // one other responds with veto and another responds with tie
+ startCapturingLogMessages();
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1") << BSON("_id" << 3 << "host"
+ << "h2")
+ << BSON("_id" << 4 << "host"
+ << "h3") << BSON("_id" << 5 << "host"
+ << "h4"))));
+
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = config.membersBegin(); mem != config.membersEnd();
+ ++mem) {
+ if (HostAndPort("h0") == mem->getHostAndPort()) {
+ continue;
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
- waitOnChecker();
-
- stopCapturingLogMessages();
-
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("wrong type for opTime argument in replSetFresh "
- "response: NumberInt32"));
+ hosts.push_back(mem->getHostAndPort());
}
- TEST_F(FreshnessCheckerTest, ElectVetoed) {
- // other responds with veto
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10,0), 0);
-
- startTest(Timestamp(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10, 0), 0);
+
+ startTest(Timestamp(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ unordered_set<HostAndPort> seen;
+ _net->enterNetwork();
+
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const HostAndPort target = noi->getRequest().target;
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT(seen.insert(target).second) << "Already saw " << target;
+ BSONObjBuilder responseBuilder;
+ if (target.host() == "h4") {
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "veto" << true
+ << "errmsg"
+ << "I'd rather you didn't"
+ << "opTime" << Date_t::fromMillisSinceEpoch(Timestamp(10, 0).asLL());
_net->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1 <<
- "id" << 2 <<
- "set" << "rs0" <<
- "who" << "h1" <<
- "cfgver" << 1 <<
- "veto" << true <<
- "errmsg" << "I'd rather you didn't" <<
- "opTime" << Date_t::fromMillisSinceEpoch(
- Timestamp(0,0).asLL())),
- Milliseconds(8))));
+ noi,
+ startDate + Milliseconds(20),
+ ResponseStatus(RemoteCommandResponse(responseBuilder.obj(), Milliseconds(8))));
+ } else {
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "opTime"
+ << Date_t::fromMillisSinceEpoch(Timestamp(10, 0).asLL());
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(responseBuilder.obj(), Milliseconds(8))));
}
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
- waitOnChecker();
-
- stopCapturingLogMessages();
-
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("not electing self, h1:27017 would veto with "
- "'I'd rather you didn't'"));
}
-
- int findIdForMember(const ReplicaSetConfig& rsConfig, const HostAndPort& host) {
- const MemberConfig* member = rsConfig.findMemberByHostAndPort(host);
- ASSERT_TRUE(member != NULL) << "No host named " << host.toString() << " in config";
- return member->getId();
+ _net->runUntil(startDate + Milliseconds(10));
+ ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
+ ASSERT_EQUALS(0,
+ countLogLinesContaining(
+ "not electing self, h4:27017 would veto with '"
+ "errmsg: \"I'd rather you didn't\"'"));
+ _net->runUntil(startDate + Milliseconds(20));
+ ASSERT_EQUALS(startDate + Milliseconds(20), _net->now());
+ _net->exitNetwork();
+ waitOnChecker();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "not electing self, h4:27017 would veto with "
+ "'I'd rather you didn't'"));
+}
+
+TEST_F(FreshnessCheckerTest, ElectManyNodesNotAllRespond) {
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1") << BSON("_id" << 3 << "host"
+ << "h2")
+ << BSON("_id" << 4 << "host"
+ << "h3") << BSON("_id" << 5 << "host"
+ << "h4"))));
+
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin(); mem != config.membersEnd();
+ ++mem) {
+ hosts.push_back(mem->getHostAndPort());
}
- TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshestManyNodes) {
- // one other responds as fresher than us
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1") <<
- BSON("_id" << 3 << "host" << "h2") <<
- BSON("_id" << 4 << "host" << "h3") <<
- BSON("_id" << 5 << "host" << "h4"))));
-
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- hosts.push_back(mem->getHostAndPort());
- }
-
- const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10,0), 0);
-
- startTest(Timestamp(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- unordered_set<HostAndPort> seen;
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const HostAndPort target = noi->getRequest().target;
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT(seen.insert(target).second) << "Already saw " << target;
+ const Timestamp lastOpTimeApplied(10, 0);
+ const BSONObj freshRequest = makeFreshRequest(config, lastOpTimeApplied, 0);
+
+ startTest(Timestamp(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ unordered_set<HostAndPort> seen;
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const HostAndPort target = noi->getRequest().target;
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT(seen.insert(target).second) << "Already saw " << target;
+ if (target.host() == "h2" || target.host() == "h3") {
+ _net->scheduleResponse(noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
+ } else {
BSONObjBuilder responseBuilder;
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "opTime" << Date_t::fromMillisSinceEpoch(Timestamp(0,0).asLL());
- if (target.host() == "h1") {
- responseBuilder << "fresher" << true;
- }
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "opTime"
+ << Date_t::fromMillisSinceEpoch(Timestamp(0, 0).asLL());
_net->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
- waitOnChecker();
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("not electing self, we are not freshest"));
- }
-
- TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshestOpTimeManyNodes) {
- // one other responds with a later optime than ours
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1") <<
- BSON("_id" << 3 << "host" << "h2") <<
- BSON("_id" << 4 << "host" << "h3") <<
- BSON("_id" << 5 << "host" << "h4"))));
-
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- if (HostAndPort("h0") == mem->getHostAndPort()) {
- continue;
- }
- hosts.push_back(mem->getHostAndPort());
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(responseBuilder.obj(), Milliseconds(8))));
}
-
- const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10,0), 0);
-
- startTest(Timestamp(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- unordered_set<HostAndPort> seen;
- _net->enterNetwork();
-
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const HostAndPort target = noi->getRequest().target;
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT(seen.insert(target).second) << "Already saw " << target;
- BSONObjBuilder responseBuilder;
- if (target.host() == "h4") {
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "opTime" << Date_t::fromMillisSinceEpoch(Timestamp(20,0).asLL());
- _net->scheduleResponse(
- noi,
- startDate + Milliseconds(20),
- ResponseStatus(RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- else {
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "opTime" << Date_t::fromMillisSinceEpoch(Timestamp(10,0).asLL());
- _net->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- }
- _net->runUntil(startDate + Milliseconds(10));
- ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
- ASSERT_EQUALS(0, countLogLinesContaining("not electing self, we are not freshest"));
- _net->runUntil(startDate + Milliseconds(20));
- ASSERT_EQUALS(startDate + Milliseconds(20), _net->now());
- _net->exitNetwork();
- waitOnChecker();
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
}
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
+ waitOnChecker();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::None);
+}
+
+class FreshnessScatterGatherTest : public mongo::unittest::Test {
+public:
+ virtual void setUp() {
+ int selfConfigIndex = 0;
+ Timestamp lastOpTimeApplied(100, 0);
- TEST_F(FreshnessCheckerTest, ElectWrongTypeInFreshnessResponseManyNodes) {
- // one other responds with "opTime" field of non-Date value, causing not freshest
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1") <<
- BSON("_id" << 3 << "host" << "h2") <<
- BSON("_id" << 4 << "host" << "h3") <<
- BSON("_id" << 5 << "host" << "h4"))));
+ ReplicaSetConfig config;
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0")
+ << BSON("_id" << 1 << "host"
+ << "host1") << BSON("_id" << 2 << "host"
+ << "host2"))));
std::vector<HostAndPort> hosts;
for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
+ mem != config.membersEnd();
+ ++mem) {
hosts.push_back(mem->getHostAndPort());
}
- const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10,0), 0);
-
- startTest(Timestamp(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- unordered_set<HostAndPort> seen;
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const HostAndPort target = noi->getRequest().target;
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT(seen.insert(target).second) << "Already saw " << target;
- BSONObjBuilder responseBuilder;
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1;
- if (target.host() == "h1") {
- responseBuilder << "opTime" << 3;
- }
- else {
- responseBuilder << "opTime" << Date_t::fromMillisSinceEpoch(Timestamp(0,0).asLL());
- }
- _net->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
- waitOnChecker();
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("wrong type for opTime argument in replSetFresh "
- "response: NumberInt32"));
+ _checker.reset(
+ new FreshnessChecker::Algorithm(lastOpTimeApplied, config, selfConfigIndex, hosts));
}
- TEST_F(FreshnessCheckerTest, ElectVetoedManyNodes) {
- // one other responds with veto
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1") <<
- BSON("_id" << 3 << "host" << "h2") <<
- BSON("_id" << 4 << "host" << "h3") <<
- BSON("_id" << 5 << "host" << "h4"))));
+ virtual void tearDown() {
+ _checker.reset(NULL);
+ }
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- hosts.push_back(mem->getHostAndPort());
- }
+protected:
+ bool hasReceivedSufficientResponses() {
+ return _checker->hasReceivedSufficientResponses();
+ }
- const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10,0), 0);
-
- startTest(Timestamp(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- unordered_set<HostAndPort> seen;
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const HostAndPort target = noi->getRequest().target;
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT(seen.insert(target).second) << "Already saw " << target;
- BSONObjBuilder responseBuilder;
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "opTime" << Date_t::fromMillisSinceEpoch(Timestamp(0,0).asLL());
- if (target.host() == "h1") {
- responseBuilder << "veto" << true << "errmsg" << "I'd rather you didn't";
- }
- _net->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
- waitOnChecker();
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("not electing self, h1:27017 would veto with "
- "'I'd rather you didn't'"));
+ void processResponse(const RemoteCommandRequest& request, const ResponseStatus& response) {
+ _checker->processResponse(request, response);
}
- TEST_F(FreshnessCheckerTest, ElectVetoedAndTiedFreshnessManyNodes) {
- // one other responds with veto and another responds with tie
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1") <<
- BSON("_id" << 3 << "host" << "h2") <<
- BSON("_id" << 4 << "host" << "h3") <<
- BSON("_id" << 5 << "host" << "h4"))));
-
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- if (HostAndPort("h0") == mem->getHostAndPort()) {
- continue;
- }
- hosts.push_back(mem->getHostAndPort());
- }
+ FreshnessChecker::ElectionAbortReason shouldAbortElection() const {
+ return _checker->shouldAbortElection();
+ }
- const BSONObj freshRequest = makeFreshRequest(config, Timestamp(10,0), 0);
+ ResponseStatus lessFresh() {
+ BSONObjBuilder bb;
+ bb.append("ok", 1.0);
+ bb.appendDate("opTime", Date_t::fromMillisSinceEpoch(Timestamp(10, 0).asLL()));
+ return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
+ }
- startTest(Timestamp(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- unordered_set<HostAndPort> seen;
- _net->enterNetwork();
+ ResponseStatus moreFreshViaOpTime() {
+ BSONObjBuilder bb;
+ bb.append("ok", 1.0);
+ bb.appendDate("opTime", Date_t::fromMillisSinceEpoch(Timestamp(110, 0).asLL()));
+ return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
+ }
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const HostAndPort target = noi->getRequest().target;
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT(seen.insert(target).second) << "Already saw " << target;
- BSONObjBuilder responseBuilder;
- if (target.host() == "h4") {
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "veto" << true <<
- "errmsg" << "I'd rather you didn't" <<
- "opTime" << Date_t::fromMillisSinceEpoch(Timestamp(10,0).asLL());
- _net->scheduleResponse(
- noi,
- startDate + Milliseconds(20),
- ResponseStatus(RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- else {
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "opTime" << Date_t::fromMillisSinceEpoch(Timestamp(10,0).asLL());
- _net->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- }
- _net->runUntil(startDate + Milliseconds(10));
- ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
- ASSERT_EQUALS(0, countLogLinesContaining("not electing self, h4:27017 would veto with '"
- "errmsg: \"I'd rather you didn't\"'"));
- _net->runUntil(startDate + Milliseconds(20));
- ASSERT_EQUALS(startDate + Milliseconds(20), _net->now());
- _net->exitNetwork();
- waitOnChecker();
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("not electing self, h4:27017 would veto with "
- "'I'd rather you didn't'"));
+ ResponseStatus wrongTypeForOpTime() {
+ BSONObjBuilder bb;
+ bb.append("ok", 1.0);
+ bb.append("opTime", std::string("several minutes ago"));
+ return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
}
- TEST_F(FreshnessCheckerTest, ElectManyNodesNotAllRespond) {
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1") <<
- BSON("_id" << 3 << "host" << "h2") <<
- BSON("_id" << 4 << "host" << "h3") <<
- BSON("_id" << 5 << "host" << "h4"))));
+ ResponseStatus unauthorized() {
+ BSONObjBuilder bb;
+ bb.append("ok", 0.0);
+ bb.append("code", ErrorCodes::Unauthorized);
+ bb.append("errmsg", "Unauthorized");
+ return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
+ }
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- hosts.push_back(mem->getHostAndPort());
- }
+ ResponseStatus tiedForFreshness() {
+ BSONObjBuilder bb;
+ bb.append("ok", 1.0);
+ bb.appendDate("opTime", Date_t::fromMillisSinceEpoch(Timestamp(100, 0).asLL()));
+ return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
+ }
- const Timestamp lastOpTimeApplied(10,0);
- const BSONObj freshRequest = makeFreshRequest(config, lastOpTimeApplied, 0);
-
- startTest(Timestamp(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- unordered_set<HostAndPort> seen;
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const HostAndPort target = noi->getRequest().target;
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT(seen.insert(target).second) << "Already saw " << target;
- if (target.host() == "h2" || target.host() == "h3") {
- _net->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
- }
- else {
- BSONObjBuilder responseBuilder;
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "opTime" << Date_t::fromMillisSinceEpoch(Timestamp(0,0).asLL());
- _net->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- }
- _net->runUntil(startDate + Milliseconds(10));
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + Milliseconds(10), _net->now());
- waitOnChecker();
- ASSERT_EQUALS(shouldAbortElection(),FreshnessChecker::None);
+ ResponseStatus moreFresh() {
+ return ResponseStatus(NetworkInterfaceMock::Response(BSON("ok" << 1.0 << "fresher" << true),
+ Milliseconds(10)));
+ }
+
+ ResponseStatus veto() {
+ return ResponseStatus(
+ NetworkInterfaceMock::Response(BSON("ok" << 1.0 << "veto" << true << "errmsg"
+ << "vetoed!"),
+ Milliseconds(10)));
}
- class FreshnessScatterGatherTest : public mongo::unittest::Test {
- public:
- virtual void setUp() {
- int selfConfigIndex = 0;
- Timestamp lastOpTimeApplied(100, 0);
-
- ReplicaSetConfig config;
- config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host0") <<
- BSON("_id" << 1 << "host" << "host1") <<
- BSON("_id" << 2 << "host" << "host2"))));
-
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- hosts.push_back(mem->getHostAndPort());
- }
-
- _checker.reset(new FreshnessChecker::Algorithm(lastOpTimeApplied,
- config,
- selfConfigIndex,
- hosts));
+ RemoteCommandRequest requestFrom(std::string hostname) {
+ return RemoteCommandRequest(HostAndPort(hostname),
+ "", // the non-hostname fields do not matter in Freshness
+ BSONObj(),
+ Milliseconds(0));
+ }
- }
+private:
+ unique_ptr<FreshnessChecker::Algorithm> _checker;
+};
- virtual void tearDown() {
- _checker.reset(NULL);
- }
+TEST_F(FreshnessScatterGatherTest, BothNodesLessFresh) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- protected:
- bool hasReceivedSufficientResponses() {
- return _checker->hasReceivedSufficientResponses();
- }
+ processResponse(requestFrom("host1"), lessFresh());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- void processResponse(const RemoteCommandRequest& request, const ResponseStatus& response) {
- _checker->processResponse(request, response);
- }
+ processResponse(requestFrom("host2"), lessFresh());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::None);
+}
- FreshnessChecker::ElectionAbortReason shouldAbortElection() const {
- return _checker->shouldAbortElection();
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeFresher) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- ResponseStatus lessFresh() {
- BSONObjBuilder bb;
- bb.append("ok", 1.0);
- bb.appendDate("opTime", Date_t::fromMillisSinceEpoch(Timestamp(10, 0).asLL()));
- return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
- }
+ processResponse(requestFrom("host1"), moreFresh());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- ResponseStatus moreFreshViaOpTime() {
- BSONObjBuilder bb;
- bb.append("ok", 1.0);
- bb.appendDate("opTime", Date_t::fromMillisSinceEpoch(Timestamp(110, 0).asLL()));
- return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeFresherViaOpTime) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- ResponseStatus wrongTypeForOpTime() {
- BSONObjBuilder bb;
- bb.append("ok", 1.0);
- bb.append("opTime", std::string("several minutes ago"));
- return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
- }
+ processResponse(requestFrom("host1"), moreFreshViaOpTime());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- ResponseStatus unauthorized() {
- BSONObjBuilder bb;
- bb.append("ok", 0.0);
- bb.append("code", ErrorCodes::Unauthorized);
- bb.append("errmsg", "Unauthorized");
- return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeVetoes) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- ResponseStatus tiedForFreshness() {
- BSONObjBuilder bb;
- bb.append("ok", 1.0);
- bb.appendDate("opTime", Date_t::fromMillisSinceEpoch(Timestamp(100, 0).asLL()));
- return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
- }
+ processResponse(requestFrom("host1"), veto());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- ResponseStatus moreFresh() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("ok" << 1.0 <<
- "fresher" << true),
- Milliseconds(10)));
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeWrongTypeForOpTime) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- ResponseStatus veto() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("ok" << 1.0 <<
- "veto" << true <<
- "errmsg" << "vetoed!"),
- Milliseconds(10)));
- }
+ processResponse(requestFrom("host1"), wrongTypeForOpTime());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- RemoteCommandRequest requestFrom(std::string hostname) {
- return RemoteCommandRequest(HostAndPort(hostname),
- "", // the non-hostname fields do not matter in Freshness
- BSONObj(),
- Milliseconds(0));
- }
- private:
- unique_ptr<FreshnessChecker::Algorithm> _checker;
- };
-
- TEST_F(FreshnessScatterGatherTest, BothNodesLessFresh) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), lessFresh());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), lessFresh());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(),FreshnessChecker::None);
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeTiedForFreshness) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, FirstNodeFresher) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), moreFresh());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host1"), tiedForFreshness());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, FirstNodeFresherViaOpTime) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), moreFreshViaOpTime());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host2"), lessFresh());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FreshnessTie);
+}
- TEST_F(FreshnessScatterGatherTest, FirstNodeVetoes) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), veto());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondFresher) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, FirstNodeWrongTypeForOpTime) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), wrongTypeForOpTime());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host1"), tiedForFreshness());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, FirstNodeTiedForFreshness) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), tiedForFreshness());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host2"), moreFresh());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- processResponse(requestFrom("host2"), lessFresh());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FreshnessTie);
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondFresherViaOpTime) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondFresher) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), tiedForFreshness());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), tiedForFreshness());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), moreFresh());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host2"), moreFreshViaOpTime());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondFresherViaOpTime) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), tiedForFreshness());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondVetoes) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), moreFreshViaOpTime());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host1"), tiedForFreshness());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondVetoes) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), tiedForFreshness());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host2"), veto());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- processResponse(requestFrom("host2"), veto());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondWrongTypeForOpTime) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondWrongTypeForOpTime) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), tiedForFreshness());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), tiedForFreshness());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), wrongTypeForOpTime());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host2"), wrongTypeForOpTime());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- TEST_F(FreshnessScatterGatherTest, FirstNodeLessFreshAndSecondWrongTypeForOpTime) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
+TEST_F(FreshnessScatterGatherTest, FirstNodeLessFreshAndSecondWrongTypeForOpTime) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), lessFresh());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), lessFresh());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), wrongTypeForOpTime());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host2"), wrongTypeForOpTime());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- TEST_F(FreshnessScatterGatherTest, SecondNodeTiedAndFirstWrongTypeForOpTime) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
+TEST_F(FreshnessScatterGatherTest, SecondNodeTiedAndFirstWrongTypeForOpTime) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), wrongTypeForOpTime());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host2"), wrongTypeForOpTime());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- TEST_F(FreshnessScatterGatherTest, NotEnoughVotersDueNetworkErrors) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
+TEST_F(FreshnessScatterGatherTest, NotEnoughVotersDueNetworkErrors) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"),
- ResponseStatus(Status(ErrorCodes::NetworkTimeout, "")));
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), ResponseStatus(Status(ErrorCodes::NetworkTimeout, "")));
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"),
- ResponseStatus(Status(ErrorCodes::NetworkTimeout, "")));
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::QuorumUnreachable);
- }
+ processResponse(requestFrom("host2"), ResponseStatus(Status(ErrorCodes::NetworkTimeout, "")));
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::QuorumUnreachable);
+}
- TEST_F(FreshnessScatterGatherTest, NotEnoughVotersDueToUnauthorized) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
+TEST_F(FreshnessScatterGatherTest, NotEnoughVotersDueToUnauthorized) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), unauthorized());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), unauthorized());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), unauthorized());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::QuorumUnreachable);
- }
+ processResponse(requestFrom("host2"), unauthorized());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::QuorumUnreachable);
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/handshake_args.cpp b/src/mongo/db/repl/handshake_args.cpp
index db815ee3aa2..2ceae3df86e 100644
--- a/src/mongo/db/repl/handshake_args.cpp
+++ b/src/mongo/db/repl/handshake_args.cpp
@@ -40,75 +40,65 @@ namespace repl {
namespace {
- const std::string kRIDFieldName = "handshake";
- // TODO(danneberg) remove after 3.0 since this field is only allowed for backwards compatibility
- const std::string kOldMemberConfigFieldName = "config";
- const std::string kMemberIdFieldName = "member";
-
- const std::string kLegalHandshakeFieldNames[] = {
- kRIDFieldName,
- kOldMemberConfigFieldName,
- kMemberIdFieldName
- };
-
-} // namespace
-
- HandshakeArgs::HandshakeArgs() :
- _hasRid(false),
- _hasMemberId(false),
- _rid(OID()),
- _memberId(-1) {}
-
- Status HandshakeArgs::initialize(const BSONObj& argsObj) {
- Status status = bsonCheckOnlyHasFields("HandshakeArgs",
- argsObj,
- kLegalHandshakeFieldNames);
- if (!status.isOK())
+const std::string kRIDFieldName = "handshake";
+// TODO(danneberg) remove after 3.0 since this field is only allowed for backwards compatibility
+const std::string kOldMemberConfigFieldName = "config";
+const std::string kMemberIdFieldName = "member";
+
+const std::string kLegalHandshakeFieldNames[] = {
+ kRIDFieldName, kOldMemberConfigFieldName, kMemberIdFieldName};
+
+} // namespace
+
+HandshakeArgs::HandshakeArgs() : _hasRid(false), _hasMemberId(false), _rid(OID()), _memberId(-1) {}
+
+Status HandshakeArgs::initialize(const BSONObj& argsObj) {
+ Status status = bsonCheckOnlyHasFields("HandshakeArgs", argsObj, kLegalHandshakeFieldNames);
+ if (!status.isOK())
+ return status;
+
+ BSONElement oid;
+ status = bsonExtractTypedField(argsObj, kRIDFieldName, jstOID, &oid);
+ if (!status.isOK())
+ return status;
+ _rid = oid.OID();
+ _hasRid = true;
+
+ status = bsonExtractIntegerField(argsObj, kMemberIdFieldName, &_memberId);
+ if (!status.isOK()) {
+ // field not necessary for master slave, do not return NoSuchKey Error
+ if (status != ErrorCodes::NoSuchKey) {
return status;
-
- BSONElement oid;
- status = bsonExtractTypedField(argsObj, kRIDFieldName, jstOID, &oid);
- if (!status.isOK())
- return status;
- _rid = oid.OID();
- _hasRid = true;
-
- status = bsonExtractIntegerField(argsObj, kMemberIdFieldName, &_memberId);
- if (!status.isOK()) {
- // field not necessary for master slave, do not return NoSuchKey Error
- if (status != ErrorCodes::NoSuchKey) {
- return status;
- }
- _memberId = -1;
- }
- else {
- _hasMemberId = true;
}
-
- return Status::OK();
- }
-
- bool HandshakeArgs::isInitialized() const {
- return _hasRid;
- }
-
- void HandshakeArgs::setRid(const OID& newVal) {
- _rid = newVal;
- _hasRid = true;
- }
-
- void HandshakeArgs::setMemberId(long long newVal) {
- _memberId = newVal;
+ _memberId = -1;
+ } else {
_hasMemberId = true;
}
- BSONObj HandshakeArgs::toBSON() const {
- invariant(isInitialized());
- BSONObjBuilder builder;
- builder.append(kRIDFieldName, _rid);
- builder.append(kMemberIdFieldName, _memberId);
- return builder.obj();
- }
+ return Status::OK();
+}
+
+bool HandshakeArgs::isInitialized() const {
+ return _hasRid;
+}
+
+void HandshakeArgs::setRid(const OID& newVal) {
+ _rid = newVal;
+ _hasRid = true;
+}
+
+void HandshakeArgs::setMemberId(long long newVal) {
+ _memberId = newVal;
+ _hasMemberId = true;
+}
+
+BSONObj HandshakeArgs::toBSON() const {
+ invariant(isInitialized());
+ BSONObjBuilder builder;
+ builder.append(kRIDFieldName, _rid);
+ builder.append(kMemberIdFieldName, _memberId);
+ return builder.obj();
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/handshake_args.h b/src/mongo/db/repl/handshake_args.h
index b0d442aaaf6..b83bef87842 100644
--- a/src/mongo/db/repl/handshake_args.h
+++ b/src/mongo/db/repl/handshake_args.h
@@ -32,64 +32,72 @@
namespace mongo {
- class Status;
+class Status;
namespace repl {
+/**
+ * Arguments to the handshake command.
+ */
+class HandshakeArgs {
+public:
+ HandshakeArgs();
+
+ /**
+ * Initializes this HandshakeArgs from the contents of args.
+ */
+ Status initialize(const BSONObj& argsObj);
+
+ /**
+ * Returns true if all required fields have been initialized.
+ */
+ bool isInitialized() const;
+
+ /**
+ * Gets the _id of the sender in their ReplSetConfig.
+ */
+ long long getMemberId() const {
+ return _memberId;
+ }
+
/**
- * Arguments to the handshake command.
+ * Gets the unique identifier of the sender, which is used to track replication progress.
*/
- class HandshakeArgs {
- public:
- HandshakeArgs();
-
- /**
- * Initializes this HandshakeArgs from the contents of args.
- */
- Status initialize(const BSONObj& argsObj);
-
- /**
- * Returns true if all required fields have been initialized.
- */
- bool isInitialized() const;
-
- /**
- * Gets the _id of the sender in their ReplSetConfig.
- */
- long long getMemberId() const { return _memberId; }
-
- /**
- * Gets the unique identifier of the sender, which is used to track replication progress.
- */
- OID getRid() const { return _rid; }
-
- /**
- * The below methods check whether or not value in the method name has been set.
- */
- bool hasRid() { return _hasRid; };
- bool hasMemberId() { return _hasMemberId; };
-
- /**
- * The below methods set the value in the method name to 'newVal'.
- */
- void setRid(const OID& newVal);
- void setMemberId(long long newVal);
-
- /**
- * Returns a BSONified version of the object.
- * Should only be called if the mandatory fields have been set.
- * Optional fields are only included if they have been set.
- */
- BSONObj toBSON() const;
-
- private:
- bool _hasRid;
- bool _hasMemberId;
-
- // look at the body of the isInitialized() function to see which fields are mandatory
- OID _rid;
- long long _memberId;
+ OID getRid() const {
+ return _rid;
+ }
+
+ /**
+ * The below methods check whether or not value in the method name has been set.
+ */
+ bool hasRid() {
+ return _hasRid;
};
+ bool hasMemberId() {
+ return _hasMemberId;
+ };
+
+ /**
+ * The below methods set the value in the method name to 'newVal'.
+ */
+ void setRid(const OID& newVal);
+ void setMemberId(long long newVal);
+
+ /**
+ * Returns a BSONified version of the object.
+ * Should only be called if the mandatory fields have been set.
+ * Optional fields are only included if they have been set.
+ */
+ BSONObj toBSON() const;
+
+private:
+ bool _hasRid;
+ bool _hasMemberId;
+
+ // look at the body of the isInitialized() function to see which fields are mandatory
+ OID _rid;
+ long long _memberId;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/heartbeat_response_action.cpp b/src/mongo/db/repl/heartbeat_response_action.cpp
index 49ed33c4780..385e250329d 100644
--- a/src/mongo/db/repl/heartbeat_response_action.cpp
+++ b/src/mongo/db/repl/heartbeat_response_action.cpp
@@ -33,41 +33,41 @@
namespace mongo {
namespace repl {
- HeartbeatResponseAction HeartbeatResponseAction::makeNoAction() {
- return HeartbeatResponseAction();
- }
+HeartbeatResponseAction HeartbeatResponseAction::makeNoAction() {
+ return HeartbeatResponseAction();
+}
- HeartbeatResponseAction HeartbeatResponseAction::makeReconfigAction() {
- HeartbeatResponseAction result;
- result._action = Reconfig;
- return result;
- }
+HeartbeatResponseAction HeartbeatResponseAction::makeReconfigAction() {
+ HeartbeatResponseAction result;
+ result._action = Reconfig;
+ return result;
+}
- HeartbeatResponseAction HeartbeatResponseAction::makeElectAction() {
- HeartbeatResponseAction result;
- result._action = StartElection;
- return result;
- }
+HeartbeatResponseAction HeartbeatResponseAction::makeElectAction() {
+ HeartbeatResponseAction result;
+ result._action = StartElection;
+ return result;
+}
- HeartbeatResponseAction HeartbeatResponseAction::makeStepDownSelfAction(int primaryIndex) {
- HeartbeatResponseAction result;
- result._action = StepDownSelf;
- result._primaryIndex = primaryIndex;
- return result;
- }
+HeartbeatResponseAction HeartbeatResponseAction::makeStepDownSelfAction(int primaryIndex) {
+ HeartbeatResponseAction result;
+ result._action = StepDownSelf;
+ result._primaryIndex = primaryIndex;
+ return result;
+}
- HeartbeatResponseAction HeartbeatResponseAction::makeStepDownRemoteAction(int primaryIndex) {
- HeartbeatResponseAction result;
- result._action = StepDownRemotePrimary;
- result._primaryIndex = primaryIndex;
- return result;
- }
+HeartbeatResponseAction HeartbeatResponseAction::makeStepDownRemoteAction(int primaryIndex) {
+ HeartbeatResponseAction result;
+ result._action = StepDownRemotePrimary;
+ result._primaryIndex = primaryIndex;
+ return result;
+}
- HeartbeatResponseAction::HeartbeatResponseAction() : _action(NoAction), _primaryIndex(-1) {}
+HeartbeatResponseAction::HeartbeatResponseAction() : _action(NoAction), _primaryIndex(-1) {}
- void HeartbeatResponseAction::setNextHeartbeatStartDate(Date_t when) {
- _nextHeartbeatStartDate = when;
- }
+void HeartbeatResponseAction::setNextHeartbeatStartDate(Date_t when) {
+ _nextHeartbeatStartDate = when;
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/heartbeat_response_action.h b/src/mongo/db/repl/heartbeat_response_action.h
index 55c2d459920..f45b3668a91 100644
--- a/src/mongo/db/repl/heartbeat_response_action.h
+++ b/src/mongo/db/repl/heartbeat_response_action.h
@@ -33,88 +33,88 @@
namespace mongo {
namespace repl {
+/**
+ * Description of actions taken in response to a heartbeat.
+ *
+ * This includes when to schedule the next heartbeat to a target, and any other actions to
+ * take, such as scheduling an election or stepping down as primary.
+ */
+class HeartbeatResponseAction {
+public:
+ /**
+ * Actions taken based on heartbeat responses
+ */
+ enum Action { NoAction, Reconfig, StartElection, StepDownSelf, StepDownRemotePrimary };
+
+ /**
+ * Makes a new action representing doing nothing.
+ */
+ static HeartbeatResponseAction makeNoAction();
+
+ /**
+ * Makes a new action representing the instruction to reconfigure the current node.
+ */
+ static HeartbeatResponseAction makeReconfigAction();
+
+ /**
+ * Makes a new action telling the current node to attempt to elect itself primary.
+ */
+ static HeartbeatResponseAction makeElectAction();
+
+ /**
+ * Makes a new action telling the current node to step down as primary.
+ *
+ * It is an error to call this with primaryIndex != the index of the current node.
+ */
+ static HeartbeatResponseAction makeStepDownSelfAction(int primaryIndex);
+
/**
- * Description of actions taken in response to a heartbeat.
+ * Makes a new action telling the current node to ask the specified remote node to step
+ * down as primary.
*
- * This includes when to schedule the next heartbeat to a target, and any other actions to
- * take, such as scheduling an election or stepping down as primary.
+ * It is an error to call this with primaryIndex == the index of the current node.
+ */
+ static HeartbeatResponseAction makeStepDownRemoteAction(int primaryIndex);
+
+ /**
+ * Construct an action with unspecified action and a next heartbeat start date in the
+ * past.
+ */
+ HeartbeatResponseAction();
+
+ /**
+ * Sets the date at which the next heartbeat should be scheduled.
+ */
+ void setNextHeartbeatStartDate(Date_t when);
+
+ /**
+ * Gets the action type of this action.
+ */
+ Action getAction() const {
+ return _action;
+ }
+
+ /**
+ * Gets the time at which the next heartbeat should be scheduled. If the
+ * time is not in the future, the next heartbeat should be scheduled immediately.
+ */
+ Date_t getNextHeartbeatStartDate() const {
+ return _nextHeartbeatStartDate;
+ }
+
+ /**
+ * If getAction() returns StepDownSelf or StepDownPrimary, this is the index
+ * in the current replica set config of the node that ought to step down.
*/
- class HeartbeatResponseAction {
- public:
- /**
- * Actions taken based on heartbeat responses
- */
- enum Action {
- NoAction,
- Reconfig,
- StartElection,
- StepDownSelf,
- StepDownRemotePrimary
- };
-
- /**
- * Makes a new action representing doing nothing.
- */
- static HeartbeatResponseAction makeNoAction();
-
- /**
- * Makes a new action representing the instruction to reconfigure the current node.
- */
- static HeartbeatResponseAction makeReconfigAction();
-
- /**
- * Makes a new action telling the current node to attempt to elect itself primary.
- */
- static HeartbeatResponseAction makeElectAction();
-
- /**
- * Makes a new action telling the current node to step down as primary.
- *
- * It is an error to call this with primaryIndex != the index of the current node.
- */
- static HeartbeatResponseAction makeStepDownSelfAction(int primaryIndex);
-
- /**
- * Makes a new action telling the current node to ask the specified remote node to step
- * down as primary.
- *
- * It is an error to call this with primaryIndex == the index of the current node.
- */
- static HeartbeatResponseAction makeStepDownRemoteAction(int primaryIndex);
-
- /**
- * Construct an action with unspecified action and a next heartbeat start date in the
- * past.
- */
- HeartbeatResponseAction();
-
- /**
- * Sets the date at which the next heartbeat should be scheduled.
- */
- void setNextHeartbeatStartDate(Date_t when);
-
- /**
- * Gets the action type of this action.
- */
- Action getAction() const { return _action; }
-
- /**
- * Gets the time at which the next heartbeat should be scheduled. If the
- * time is not in the future, the next heartbeat should be scheduled immediately.
- */
- Date_t getNextHeartbeatStartDate() const { return _nextHeartbeatStartDate; }
-
- /**
- * If getAction() returns StepDownSelf or StepDownPrimary, this is the index
- * in the current replica set config of the node that ought to step down.
- */
- int getPrimaryConfigIndex() const { return _primaryIndex; }
-
- private:
- Action _action;
- int _primaryIndex;
- Date_t _nextHeartbeatStartDate;
- };
+ int getPrimaryConfigIndex() const {
+ return _primaryIndex;
+ }
+
+private:
+ Action _action;
+ int _primaryIndex;
+ Date_t _nextHeartbeatStartDate;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/initial_sync.cpp b/src/mongo/db/repl/initial_sync.cpp
index 494094862ba..0c6695c15a3 100644
--- a/src/mongo/db/repl/initial_sync.cpp
+++ b/src/mongo/db/repl/initial_sync.cpp
@@ -39,24 +39,23 @@
namespace mongo {
namespace repl {
- unsigned replSetForceInitialSyncFailure = 0;
-
- InitialSync::InitialSync(BackgroundSyncInterface *q) :
- SyncTail(q, multiInitialSyncApply) {}
-
- InitialSync::~InitialSync() {}
-
- /* initial oplog application, during initial sync, after cloning.
- */
- void InitialSync::oplogApplication(OperationContext* txn, const OpTime& endOpTime) {
- if (replSetForceInitialSyncFailure > 0) {
- log() << "test code invoked, forced InitialSync failure: "
- << replSetForceInitialSyncFailure;
- replSetForceInitialSyncFailure--;
- throw DBException("forced error",0);
- }
- _applyOplogUntil(txn, endOpTime);
+unsigned replSetForceInitialSyncFailure = 0;
+
+InitialSync::InitialSync(BackgroundSyncInterface* q) : SyncTail(q, multiInitialSyncApply) {}
+
+InitialSync::~InitialSync() {}
+
+/* initial oplog application, during initial sync, after cloning.
+*/
+void InitialSync::oplogApplication(OperationContext* txn, const OpTime& endOpTime) {
+ if (replSetForceInitialSyncFailure > 0) {
+ log() << "test code invoked, forced InitialSync failure: "
+ << replSetForceInitialSyncFailure;
+ replSetForceInitialSyncFailure--;
+ throw DBException("forced error", 0);
}
+ _applyOplogUntil(txn, endOpTime);
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/initial_sync.h b/src/mongo/db/repl/initial_sync.h
index 41c1310656c..9fd435341c1 100644
--- a/src/mongo/db/repl/initial_sync.h
+++ b/src/mongo/db/repl/initial_sync.h
@@ -33,28 +33,30 @@
namespace mongo {
namespace repl {
- class BackgroundSyncInterface;
+class BackgroundSyncInterface;
+
+/**
+ * Initial clone and sync
+ */
+class InitialSync : public SyncTail {
+public:
+ virtual ~InitialSync();
+ InitialSync(BackgroundSyncInterface* q);
/**
- * Initial clone and sync
+ * applies up to endOpTime, fetching missing documents as needed.
*/
- class InitialSync : public SyncTail {
- public:
- virtual ~InitialSync();
- InitialSync(BackgroundSyncInterface *q);
-
- /**
- * applies up to endOpTime, fetching missing documents as needed.
- */
- void oplogApplication(OperationContext* txn, const OpTime& endOpTime);
-
- // Initial sync will ignore all journal requirement flags and doesn't wait until
- // operations are durable before updating the last OpTime.
- virtual bool supportsWaitingUntilDurable() { return false; }
- };
-
- // Used for ReplSetTest testing.
- extern unsigned replSetForceInitialSyncFailure;
-
-} // namespace repl
-} // namespace mongo
+ void oplogApplication(OperationContext* txn, const OpTime& endOpTime);
+
+ // Initial sync will ignore all journal requirement flags and doesn't wait until
+ // operations are durable before updating the last OpTime.
+ virtual bool supportsWaitingUntilDurable() {
+ return false;
+ }
+};
+
+// Used for ReplSetTest testing.
+extern unsigned replSetForceInitialSyncFailure;
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/is_master_response.cpp b/src/mongo/db/repl/is_master_response.cpp
index ab38c295d14..4a4b1394670 100644
--- a/src/mongo/db/repl/is_master_response.cpp
+++ b/src/mongo/db/repl/is_master_response.cpp
@@ -42,415 +42,417 @@ namespace mongo {
namespace repl {
namespace {
- const std::string kIsMasterFieldName = "ismaster";
- const std::string kSecondaryFieldName = "secondary";
- const std::string kSetNameFieldName = "setName";
- const std::string kSetVersionFieldName = "setVersion";
- const std::string kHostsFieldName = "hosts";
- const std::string kPassivesFieldName = "passives";
- const std::string kArbitersFieldName = "arbiters";
- const std::string kPrimaryFieldName = "primary";
- const std::string kArbiterOnlyFieldName = "arbiterOnly";
- const std::string kPassiveFieldName = "passive";
- const std::string kHiddenFieldName = "hidden";
- const std::string kBuildIndexesFieldName = "buildIndexes";
- const std::string kSlaveDelayFieldName = "slaveDelay";
- const std::string kTagsFieldName = "tags";
- const std::string kMeFieldName = "me";
- const std::string kElectionIdFieldName = "electionId";
-
- // field name constants that don't directly correspond to member variables
- const std::string kInfoFieldName = "info";
- const std::string kIsReplicaSetFieldName = "isreplicaset";
- const std::string kErrmsgFieldName = "errmsg";
- const std::string kCodeFieldName = "code";
+const std::string kIsMasterFieldName = "ismaster";
+const std::string kSecondaryFieldName = "secondary";
+const std::string kSetNameFieldName = "setName";
+const std::string kSetVersionFieldName = "setVersion";
+const std::string kHostsFieldName = "hosts";
+const std::string kPassivesFieldName = "passives";
+const std::string kArbitersFieldName = "arbiters";
+const std::string kPrimaryFieldName = "primary";
+const std::string kArbiterOnlyFieldName = "arbiterOnly";
+const std::string kPassiveFieldName = "passive";
+const std::string kHiddenFieldName = "hidden";
+const std::string kBuildIndexesFieldName = "buildIndexes";
+const std::string kSlaveDelayFieldName = "slaveDelay";
+const std::string kTagsFieldName = "tags";
+const std::string kMeFieldName = "me";
+const std::string kElectionIdFieldName = "electionId";
+
+// field name constants that don't directly correspond to member variables
+const std::string kInfoFieldName = "info";
+const std::string kIsReplicaSetFieldName = "isreplicaset";
+const std::string kErrmsgFieldName = "errmsg";
+const std::string kCodeFieldName = "code";
} // namespace
- IsMasterResponse::IsMasterResponse() :
- _isMaster(false),
- _isMasterSet(false),
- _secondary(false),
- _isSecondarySet(false),
- _setNameSet(false),
- _setVersion(0),
- _setVersionSet(false),
- _hostsSet(false),
- _passivesSet(false),
- _arbitersSet(false),
- _primarySet(false),
- _arbiterOnly(false),
- _arbiterOnlySet(false),
- _passive(false),
- _passiveSet(false),
- _hidden(false),
- _hiddenSet(false),
- _buildIndexes(true),
- _buildIndexesSet(false),
- _slaveDelay(0),
- _slaveDelaySet(false),
- _tagsSet(false),
- _meSet(false),
- _electionId(OID()),
- _configSet(true),
- _shutdownInProgress(false)
- {}
-
- void IsMasterResponse::addToBSON(BSONObjBuilder* builder) const {
- if (_shutdownInProgress) {
- builder->append(kCodeFieldName, ErrorCodes::ShutdownInProgress);
- builder->append(kErrmsgFieldName, "replication shutdown in progress");
- return;
- }
+IsMasterResponse::IsMasterResponse()
+ : _isMaster(false),
+ _isMasterSet(false),
+ _secondary(false),
+ _isSecondarySet(false),
+ _setNameSet(false),
+ _setVersion(0),
+ _setVersionSet(false),
+ _hostsSet(false),
+ _passivesSet(false),
+ _arbitersSet(false),
+ _primarySet(false),
+ _arbiterOnly(false),
+ _arbiterOnlySet(false),
+ _passive(false),
+ _passiveSet(false),
+ _hidden(false),
+ _hiddenSet(false),
+ _buildIndexes(true),
+ _buildIndexesSet(false),
+ _slaveDelay(0),
+ _slaveDelaySet(false),
+ _tagsSet(false),
+ _meSet(false),
+ _electionId(OID()),
+ _configSet(true),
+ _shutdownInProgress(false) {}
+
+void IsMasterResponse::addToBSON(BSONObjBuilder* builder) const {
+ if (_shutdownInProgress) {
+ builder->append(kCodeFieldName, ErrorCodes::ShutdownInProgress);
+ builder->append(kErrmsgFieldName, "replication shutdown in progress");
+ return;
+ }
- if (!_configSet) {
- builder->append(kIsMasterFieldName, false);
- builder->append(kSecondaryFieldName, false);
- builder->append(kInfoFieldName, "Does not have a valid replica set config");
- builder->append(kIsReplicaSetFieldName , true);
- return;
- }
+ if (!_configSet) {
+ builder->append(kIsMasterFieldName, false);
+ builder->append(kSecondaryFieldName, false);
+ builder->append(kInfoFieldName, "Does not have a valid replica set config");
+ builder->append(kIsReplicaSetFieldName, true);
+ return;
+ }
- invariant(_setNameSet);
- builder->append(kSetNameFieldName, _setName);
- invariant(_setVersionSet);
- builder->append(kSetVersionFieldName, static_cast<int>(_setVersion));
- invariant(_isMasterSet);
- builder->append(kIsMasterFieldName, _isMaster);
- invariant(_isSecondarySet);
- builder->append(kSecondaryFieldName, _secondary);
-
- if (_hostsSet) {
- std::vector<std::string> hosts;
- for (size_t i = 0; i < _hosts.size(); ++i) {
- hosts.push_back(_hosts[i].toString());
- }
- builder->append(kHostsFieldName, hosts);
+ invariant(_setNameSet);
+ builder->append(kSetNameFieldName, _setName);
+ invariant(_setVersionSet);
+ builder->append(kSetVersionFieldName, static_cast<int>(_setVersion));
+ invariant(_isMasterSet);
+ builder->append(kIsMasterFieldName, _isMaster);
+ invariant(_isSecondarySet);
+ builder->append(kSecondaryFieldName, _secondary);
+
+ if (_hostsSet) {
+ std::vector<std::string> hosts;
+ for (size_t i = 0; i < _hosts.size(); ++i) {
+ hosts.push_back(_hosts[i].toString());
}
- if (_passivesSet) {
- std::vector<std::string> passives;
- for (size_t i = 0; i < _passives.size(); ++i) {
- passives.push_back(_passives[i].toString());
- }
- builder->append(kPassivesFieldName, passives);
+ builder->append(kHostsFieldName, hosts);
+ }
+ if (_passivesSet) {
+ std::vector<std::string> passives;
+ for (size_t i = 0; i < _passives.size(); ++i) {
+ passives.push_back(_passives[i].toString());
}
- if (_arbitersSet) {
- std::vector<std::string> arbiters;
- for (size_t i = 0; i < _arbiters.size(); ++i) {
- arbiters.push_back(_arbiters[i].toString());
- }
- builder->append(kArbitersFieldName, arbiters);
+ builder->append(kPassivesFieldName, passives);
+ }
+ if (_arbitersSet) {
+ std::vector<std::string> arbiters;
+ for (size_t i = 0; i < _arbiters.size(); ++i) {
+ arbiters.push_back(_arbiters[i].toString());
}
- if (_primarySet)
- builder->append(kPrimaryFieldName, _primary.toString());
- if (_arbiterOnlySet)
- builder->append(kArbiterOnlyFieldName, _arbiterOnly);
- if (_passiveSet)
- builder->append(kPassiveFieldName, _passive);
- if (_hiddenSet)
- builder->append(kHiddenFieldName, _hidden);
- if (_buildIndexesSet)
- builder->append(kBuildIndexesFieldName, _buildIndexes);
- if (_slaveDelaySet)
- builder->appendIntOrLL(kSlaveDelayFieldName, durationCount<Seconds>(_slaveDelay));
- if (_tagsSet) {
- BSONObjBuilder tags(builder->subobjStart(kTagsFieldName));
- for (unordered_map<std::string, std::string>::const_iterator it = _tags.begin();
- it != _tags.end(); ++it) {
- tags.append(it->first, it->second);
- }
+ builder->append(kArbitersFieldName, arbiters);
+ }
+ if (_primarySet)
+ builder->append(kPrimaryFieldName, _primary.toString());
+ if (_arbiterOnlySet)
+ builder->append(kArbiterOnlyFieldName, _arbiterOnly);
+ if (_passiveSet)
+ builder->append(kPassiveFieldName, _passive);
+ if (_hiddenSet)
+ builder->append(kHiddenFieldName, _hidden);
+ if (_buildIndexesSet)
+ builder->append(kBuildIndexesFieldName, _buildIndexes);
+ if (_slaveDelaySet)
+ builder->appendIntOrLL(kSlaveDelayFieldName, durationCount<Seconds>(_slaveDelay));
+ if (_tagsSet) {
+ BSONObjBuilder tags(builder->subobjStart(kTagsFieldName));
+ for (unordered_map<std::string, std::string>::const_iterator it = _tags.begin();
+ it != _tags.end();
+ ++it) {
+ tags.append(it->first, it->second);
+ }
+ }
+ invariant(_meSet);
+ builder->append(kMeFieldName, _me.toString());
+ if (_electionId.isSet())
+ builder->append(kElectionIdFieldName, _electionId);
+}
+
+BSONObj IsMasterResponse::toBSON() const {
+ BSONObjBuilder builder;
+ addToBSON(&builder);
+ return builder.obj();
+}
+
+Status IsMasterResponse::initialize(const BSONObj& doc) {
+ Status status = bsonExtractBooleanField(doc, kIsMasterFieldName, &_isMaster);
+ if (!status.isOK()) {
+ return status;
+ }
+ _isMasterSet = true;
+ status = bsonExtractBooleanField(doc, kSecondaryFieldName, &_secondary);
+ if (!status.isOK()) {
+ return status;
+ }
+ _isSecondarySet = true;
+ if (doc.hasField(kInfoFieldName)) {
+ if (_isMaster || _secondary || !doc.hasField(kIsReplicaSetFieldName) ||
+ !doc[kIsReplicaSetFieldName].booleanSafe()) {
+ return Status(ErrorCodes::FailedToParse,
+ str::stream() << "Expected presence of \"" << kInfoFieldName
+ << "\" field to indicate no valid config loaded, but other "
+ "fields weren't as we expected");
+ }
+ _configSet = false;
+ return Status::OK();
+ } else {
+ if (doc.hasField(kIsReplicaSetFieldName)) {
+ return Status(ErrorCodes::FailedToParse,
+ str::stream() << "Found \"" << kIsReplicaSetFieldName
+ << "\" field which should indicate that no valid config "
+ "is loaded, but we didn't also have an \""
+ << kInfoFieldName << "\" field as we expected");
}
- invariant(_meSet);
- builder->append(kMeFieldName, _me.toString());
- if (_electionId.isSet())
- builder->append(kElectionIdFieldName, _electionId);
}
- BSONObj IsMasterResponse::toBSON() const {
- BSONObjBuilder builder;
- addToBSON(&builder);
- return builder.obj();
+ status = bsonExtractStringField(doc, kSetNameFieldName, &_setName);
+ if (!status.isOK()) {
+ return status;
+ }
+ _setNameSet = true;
+ status = bsonExtractIntegerField(doc, kSetVersionFieldName, &_setVersion);
+ if (!status.isOK()) {
+ return status;
}
+ _setVersionSet = true;
- Status IsMasterResponse::initialize(const BSONObj& doc) {
- Status status = bsonExtractBooleanField(doc, kIsMasterFieldName, &_isMaster);
- if (!status.isOK()) {
- return status;
- }
- _isMasterSet = true;
- status = bsonExtractBooleanField(doc, kSecondaryFieldName, &_secondary);
+ if (doc.hasField(kHostsFieldName)) {
+ BSONElement hostsElement;
+ status = bsonExtractTypedField(doc, kHostsFieldName, Array, &hostsElement);
if (!status.isOK()) {
return status;
}
- _isSecondarySet = true;
- if (doc.hasField(kInfoFieldName)) {
- if (_isMaster ||
- _secondary ||
- !doc.hasField(kIsReplicaSetFieldName) ||
- !doc[kIsReplicaSetFieldName].booleanSafe()) {
- return Status(ErrorCodes::FailedToParse,
- str::stream() << "Expected presence of \"" << kInfoFieldName <<
- "\" field to indicate no valid config loaded, but other "
- "fields weren't as we expected");
- }
- _configSet = false;
- return Status::OK();
- }
- else {
- if (doc.hasField(kIsReplicaSetFieldName)) {
- return Status(ErrorCodes::FailedToParse,
- str::stream() << "Found \"" << kIsReplicaSetFieldName <<
- "\" field which should indicate that no valid config "
- "is loaded, but we didn't also have an \"" <<
- kInfoFieldName << "\" field as we expected");
+ for (BSONObjIterator it(hostsElement.Obj()); it.more();) {
+ BSONElement hostElement = it.next();
+ if (hostElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Elements in \"" << kHostsFieldName
+ << "\" array of isMaster response must be of type "
+ << typeName(String) << " but found type "
+ << typeName(hostElement.type()));
}
+ _hosts.push_back(HostAndPort(hostElement.String()));
}
+ _hostsSet = true;
+ }
- status = bsonExtractStringField(doc, kSetNameFieldName, &_setName);
- if (!status.isOK()) {
- return status;
- }
- _setNameSet = true;
- status = bsonExtractIntegerField(doc, kSetVersionFieldName, &_setVersion);
+ if (doc.hasField(kPassivesFieldName)) {
+ BSONElement passivesElement;
+ status = bsonExtractTypedField(doc, kPassivesFieldName, Array, &passivesElement);
if (!status.isOK()) {
return status;
}
- _setVersionSet = true;
-
- if (doc.hasField(kHostsFieldName)) {
- BSONElement hostsElement;
- status = bsonExtractTypedField(doc, kHostsFieldName, Array, &hostsElement);
- if (!status.isOK()) {
- return status;
- }
- for (BSONObjIterator it(hostsElement.Obj()); it.more();) {
- BSONElement hostElement = it.next();
- if (hostElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch,
- str::stream() << "Elements in \"" << kHostsFieldName <<
- "\" array of isMaster response must be of type " <<
- typeName(String) << " but found type " <<
- typeName(hostElement.type()));
- }
- _hosts.push_back(HostAndPort(hostElement.String()));
- }
- _hostsSet = true;
- }
-
- if (doc.hasField(kPassivesFieldName)) {
- BSONElement passivesElement;
- status = bsonExtractTypedField(doc, kPassivesFieldName, Array, &passivesElement);
- if (!status.isOK()) {
- return status;
- }
- for (BSONObjIterator it(passivesElement.Obj()); it.more();) {
- BSONElement passiveElement = it.next();
- if (passiveElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch,
- str::stream() << "Elements in \"" << kPassivesFieldName <<
- "\" array of isMaster response must be of type " <<
- typeName(String) << " but found type " <<
- typeName(passiveElement.type()));
- }
- _passives.push_back(HostAndPort(passiveElement.String()));
- }
- _passivesSet = true;
- }
-
- if (doc.hasField(kArbitersFieldName)) {
- BSONElement arbitersElement;
- status = bsonExtractTypedField(doc, kArbitersFieldName, Array, &arbitersElement);
- if (!status.isOK()) {
- return status;
- }
- for (BSONObjIterator it(arbitersElement.Obj()); it.more();) {
- BSONElement arbiterElement = it.next();
- if (arbiterElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch,
- str::stream() << "Elements in \"" << kArbitersFieldName <<
- "\" array of isMaster response must be of type " <<
- typeName(String) << " but found type " <<
- typeName(arbiterElement.type()));
- }
- _arbiters.push_back(HostAndPort(arbiterElement.String()));
- }
- _arbitersSet = true;
- }
-
- if (doc.hasField(kPrimaryFieldName)) {
- std::string primaryString;
- status = bsonExtractStringField(doc, kPrimaryFieldName, &primaryString);
- if (!status.isOK()) {
- return status;
- }
- _primary = HostAndPort(primaryString);
- _primarySet = true;
- }
-
- if (doc.hasField(kArbiterOnlyFieldName)) {
- status = bsonExtractBooleanField(doc, kArbiterOnlyFieldName, &_arbiterOnly);
- if (!status.isOK()) {
- return status;
- }
- _arbiterOnlySet = true;
- }
-
- if (doc.hasField(kPassiveFieldName)) {
- status = bsonExtractBooleanField(doc, kPassiveFieldName, &_passive);
- if (!status.isOK()) {
- return status;
- }
- _passiveSet = true;
- }
-
- if (doc.hasField(kHiddenFieldName)) {
- status = bsonExtractBooleanField(doc, kHiddenFieldName, &_hidden);
- if (!status.isOK()) {
- return status;
- }
- _hiddenSet = true;
- }
-
- if (doc.hasField(kBuildIndexesFieldName)) {
- status = bsonExtractBooleanField(doc, kBuildIndexesFieldName, &_buildIndexes);
- if (!status.isOK()) {
- return status;
- }
- _buildIndexesSet = true;
- }
-
- if (doc.hasField(kSlaveDelayFieldName)) {
- long long slaveDelaySecs;
- status = bsonExtractIntegerField(doc, kSlaveDelayFieldName, &slaveDelaySecs);
- if (!status.isOK()) {
- return status;
+ for (BSONObjIterator it(passivesElement.Obj()); it.more();) {
+ BSONElement passiveElement = it.next();
+ if (passiveElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Elements in \"" << kPassivesFieldName
+ << "\" array of isMaster response must be of type "
+ << typeName(String) << " but found type "
+ << typeName(passiveElement.type()));
}
- _slaveDelaySet = true;
- _slaveDelay = Seconds(slaveDelaySecs);
+ _passives.push_back(HostAndPort(passiveElement.String()));
}
+ _passivesSet = true;
+ }
- if (doc.hasField(kTagsFieldName)) {
- BSONElement tagsElement;
- status = bsonExtractTypedField(doc, kTagsFieldName, Object, &tagsElement);
- if (!status.isOK()) {
- return status;
- }
- for (BSONObjIterator it(tagsElement.Obj()); it.more();) {
- BSONElement tagElement = it.next();
- if (tagElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch,
- str::stream() << "Elements in \"" << kTagsFieldName << "\" obj "
- "of isMaster response must be of type " <<
- typeName(String) << " but found type " <<
- typeName(tagsElement.type()));
- }
- _tags[tagElement.fieldNameStringData().toString()] = tagElement.String();
- }
- _tagsSet = true;
+ if (doc.hasField(kArbitersFieldName)) {
+ BSONElement arbitersElement;
+ status = bsonExtractTypedField(doc, kArbitersFieldName, Array, &arbitersElement);
+ if (!status.isOK()) {
+ return status;
}
-
- if (doc.hasField(kElectionIdFieldName)) {
- BSONElement electionIdElem;
- status = bsonExtractTypedField(doc, kElectionIdFieldName, jstOID, &electionIdElem);
- if (!status.isOK()) {
- return status;
+ for (BSONObjIterator it(arbitersElement.Obj()); it.more();) {
+ BSONElement arbiterElement = it.next();
+ if (arbiterElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Elements in \"" << kArbitersFieldName
+ << "\" array of isMaster response must be of type "
+ << typeName(String) << " but found type "
+ << typeName(arbiterElement.type()));
}
- _electionId = electionIdElem.OID();
+ _arbiters.push_back(HostAndPort(arbiterElement.String()));
}
+ _arbitersSet = true;
+ }
- std::string meString;
- status = bsonExtractStringField(doc, kMeFieldName, &meString);
+ if (doc.hasField(kPrimaryFieldName)) {
+ std::string primaryString;
+ status = bsonExtractStringField(doc, kPrimaryFieldName, &primaryString);
if (!status.isOK()) {
return status;
}
- _me = HostAndPort(meString);
- _meSet = true;
-
- return Status::OK();
- }
-
- void IsMasterResponse::setIsMaster(bool isMaster) {
- _isMasterSet = true;
- _isMaster = isMaster;
- }
-
- void IsMasterResponse::setIsSecondary(bool secondary) {
- _isSecondarySet = true;
- _secondary = secondary;
- }
-
- void IsMasterResponse::setReplSetName(const std::string& setName) {
- _setNameSet = true;
- _setName = setName;
- }
-
- void IsMasterResponse::setReplSetVersion(long long version) {
- _setVersionSet = true;
- _setVersion = version;
- }
-
- void IsMasterResponse::addHost(const HostAndPort& host) {
- _hostsSet = true;
- _hosts.push_back(host);
- }
-
- void IsMasterResponse::addPassive(const HostAndPort& passive) {
- _passivesSet = true;
- _passives.push_back(passive);
- }
-
- void IsMasterResponse::addArbiter(const HostAndPort& arbiter) {
- _arbitersSet = true;
- _arbiters.push_back(arbiter);
- }
-
- void IsMasterResponse::setPrimary(const HostAndPort& primary) {
+ _primary = HostAndPort(primaryString);
_primarySet = true;
- _primary = primary;
}
- void IsMasterResponse::setIsArbiterOnly(bool arbiterOnly) {
+ if (doc.hasField(kArbiterOnlyFieldName)) {
+ status = bsonExtractBooleanField(doc, kArbiterOnlyFieldName, &_arbiterOnly);
+ if (!status.isOK()) {
+ return status;
+ }
_arbiterOnlySet = true;
- _arbiterOnly = arbiterOnly;
}
- void IsMasterResponse::setIsPassive(bool passive) {
+ if (doc.hasField(kPassiveFieldName)) {
+ status = bsonExtractBooleanField(doc, kPassiveFieldName, &_passive);
+ if (!status.isOK()) {
+ return status;
+ }
_passiveSet = true;
- _passive = passive;
}
- void IsMasterResponse::setIsHidden(bool hidden) {
+ if (doc.hasField(kHiddenFieldName)) {
+ status = bsonExtractBooleanField(doc, kHiddenFieldName, &_hidden);
+ if (!status.isOK()) {
+ return status;
+ }
_hiddenSet = true;
- _hidden = hidden;
}
- void IsMasterResponse::setShouldBuildIndexes(bool buildIndexes) {
+ if (doc.hasField(kBuildIndexesFieldName)) {
+ status = bsonExtractBooleanField(doc, kBuildIndexesFieldName, &_buildIndexes);
+ if (!status.isOK()) {
+ return status;
+ }
_buildIndexesSet = true;
- _buildIndexes = buildIndexes;
}
- void IsMasterResponse::setSlaveDelay(Seconds slaveDelay) {
+ if (doc.hasField(kSlaveDelayFieldName)) {
+ long long slaveDelaySecs;
+ status = bsonExtractIntegerField(doc, kSlaveDelayFieldName, &slaveDelaySecs);
+ if (!status.isOK()) {
+ return status;
+ }
_slaveDelaySet = true;
- _slaveDelay = slaveDelay;
+ _slaveDelay = Seconds(slaveDelaySecs);
}
- void IsMasterResponse::addTag(const std::string& tagKey, const std::string& tagValue) {
+ if (doc.hasField(kTagsFieldName)) {
+ BSONElement tagsElement;
+ status = bsonExtractTypedField(doc, kTagsFieldName, Object, &tagsElement);
+ if (!status.isOK()) {
+ return status;
+ }
+ for (BSONObjIterator it(tagsElement.Obj()); it.more();) {
+ BSONElement tagElement = it.next();
+ if (tagElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Elements in \"" << kTagsFieldName
+ << "\" obj "
+ "of isMaster response must be of type "
+ << typeName(String) << " but found type "
+ << typeName(tagsElement.type()));
+ }
+ _tags[tagElement.fieldNameStringData().toString()] = tagElement.String();
+ }
_tagsSet = true;
- _tags[tagKey] = tagValue;
}
- void IsMasterResponse::setMe(const HostAndPort& me) {
- _meSet = true;
- _me = me;
+ if (doc.hasField(kElectionIdFieldName)) {
+ BSONElement electionIdElem;
+ status = bsonExtractTypedField(doc, kElectionIdFieldName, jstOID, &electionIdElem);
+ if (!status.isOK()) {
+ return status;
+ }
+ _electionId = electionIdElem.OID();
}
- void IsMasterResponse::setElectionId(const OID& electionId) {
- _electionId = electionId;
+ std::string meString;
+ status = bsonExtractStringField(doc, kMeFieldName, &meString);
+ if (!status.isOK()) {
+ return status;
}
-
- void IsMasterResponse::markAsNoConfig() { _configSet = false; }
-
- void IsMasterResponse::markAsShutdownInProgress() { _shutdownInProgress = true; }
-
-} // namespace repl
-} // namespace mongo
+ _me = HostAndPort(meString);
+ _meSet = true;
+
+ return Status::OK();
+}
+
+void IsMasterResponse::setIsMaster(bool isMaster) {
+ _isMasterSet = true;
+ _isMaster = isMaster;
+}
+
+void IsMasterResponse::setIsSecondary(bool secondary) {
+ _isSecondarySet = true;
+ _secondary = secondary;
+}
+
+void IsMasterResponse::setReplSetName(const std::string& setName) {
+ _setNameSet = true;
+ _setName = setName;
+}
+
+void IsMasterResponse::setReplSetVersion(long long version) {
+ _setVersionSet = true;
+ _setVersion = version;
+}
+
+void IsMasterResponse::addHost(const HostAndPort& host) {
+ _hostsSet = true;
+ _hosts.push_back(host);
+}
+
+void IsMasterResponse::addPassive(const HostAndPort& passive) {
+ _passivesSet = true;
+ _passives.push_back(passive);
+}
+
+void IsMasterResponse::addArbiter(const HostAndPort& arbiter) {
+ _arbitersSet = true;
+ _arbiters.push_back(arbiter);
+}
+
+void IsMasterResponse::setPrimary(const HostAndPort& primary) {
+ _primarySet = true;
+ _primary = primary;
+}
+
+void IsMasterResponse::setIsArbiterOnly(bool arbiterOnly) {
+ _arbiterOnlySet = true;
+ _arbiterOnly = arbiterOnly;
+}
+
+void IsMasterResponse::setIsPassive(bool passive) {
+ _passiveSet = true;
+ _passive = passive;
+}
+
+void IsMasterResponse::setIsHidden(bool hidden) {
+ _hiddenSet = true;
+ _hidden = hidden;
+}
+
+void IsMasterResponse::setShouldBuildIndexes(bool buildIndexes) {
+ _buildIndexesSet = true;
+ _buildIndexes = buildIndexes;
+}
+
+void IsMasterResponse::setSlaveDelay(Seconds slaveDelay) {
+ _slaveDelaySet = true;
+ _slaveDelay = slaveDelay;
+}
+
+void IsMasterResponse::addTag(const std::string& tagKey, const std::string& tagValue) {
+ _tagsSet = true;
+ _tags[tagKey] = tagValue;
+}
+
+void IsMasterResponse::setMe(const HostAndPort& me) {
+ _meSet = true;
+ _me = me;
+}
+
+void IsMasterResponse::setElectionId(const OID& electionId) {
+ _electionId = electionId;
+}
+
+void IsMasterResponse::markAsNoConfig() {
+ _configSet = false;
+}
+
+void IsMasterResponse::markAsShutdownInProgress() {
+ _shutdownInProgress = true;
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/is_master_response.h b/src/mongo/db/repl/is_master_response.h
index dd0eda70e2b..7bfaf1ac0b2 100644
--- a/src/mongo/db/repl/is_master_response.h
+++ b/src/mongo/db/repl/is_master_response.h
@@ -38,179 +38,216 @@
namespace mongo {
- class BSONObj;
- class BSONObjBuilder;
- class Status;
+class BSONObj;
+class BSONObjBuilder;
+class Status;
namespace repl {
+/**
+ * Response structure for the ismaster command. Only handles responses from nodes
+ * that are in replset mode.
+ */
+class IsMasterResponse {
+public:
+ IsMasterResponse();
+
/**
- * Response structure for the ismaster command. Only handles responses from nodes
- * that are in replset mode.
+ * Initializes this IsMasterResponse from the contents of "doc".
*/
- class IsMasterResponse {
- public:
- IsMasterResponse();
-
- /**
- * Initializes this IsMasterResponse from the contents of "doc".
- */
- Status initialize(const BSONObj& doc);
-
- /**
- * Appends all non-default values to "builder".
- * There are two values that are handled specially: if _inShutdown is true or _configSet
- * is false, we will add a standard response to "builder" indicating either that we are
- * in the middle of shutting down or we do not have a valid replica set config, ignoring
- * the values of all other member variables.
- */
- void addToBSON(BSONObjBuilder* builder) const;
+ Status initialize(const BSONObj& doc);
- /**
- * Returns a BSONObj consisting the results of calling addToBSON on an otherwise empty
- * BSONObjBuilder.
- */
- BSONObj toBSON() const;
-
-
- // ===================== Accessors for member variables ================================= //
-
- bool isMaster() const { return _isMaster; }
+ /**
+ * Appends all non-default values to "builder".
+ * There are two values that are handled specially: if _inShutdown is true or _configSet
+ * is false, we will add a standard response to "builder" indicating either that we are
+ * in the middle of shutting down or we do not have a valid replica set config, ignoring
+ * the values of all other member variables.
+ */
+ void addToBSON(BSONObjBuilder* builder) const;
- bool isSecondary() const { return _secondary; }
+ /**
+ * Returns a BSONObj consisting the results of calling addToBSON on an otherwise empty
+ * BSONObjBuilder.
+ */
+ BSONObj toBSON() const;
- const std::string& getReplSetName() const { return _setName; }
- long long getReplSetVersion() const { return _setVersion; }
+ // ===================== Accessors for member variables ================================= //
- const std::vector<HostAndPort>& getHosts() const { return _hosts; }
+ bool isMaster() const {
+ return _isMaster;
+ }
- const std::vector<HostAndPort>& getPassives() const { return _passives; }
+ bool isSecondary() const {
+ return _secondary;
+ }
- const std::vector<HostAndPort>& getArbiters() const { return _arbiters; }
+ const std::string& getReplSetName() const {
+ return _setName;
+ }
- const HostAndPort& getPrimary() const { return _primary; }
+ long long getReplSetVersion() const {
+ return _setVersion;
+ }
- bool hasPrimary() const { return _primarySet; }
+ const std::vector<HostAndPort>& getHosts() const {
+ return _hosts;
+ }
- bool isArbiterOnly() const { return _arbiterOnly; }
+ const std::vector<HostAndPort>& getPassives() const {
+ return _passives;
+ }
- bool isPassive() const { return _passive; }
+ const std::vector<HostAndPort>& getArbiters() const {
+ return _arbiters;
+ }
- bool isHidden() const { return _hidden; }
+ const HostAndPort& getPrimary() const {
+ return _primary;
+ }
- bool shouldBuildIndexes() const { return _buildIndexes; }
+ bool hasPrimary() const {
+ return _primarySet;
+ }
- Seconds getSlaveDelay() const { return _slaveDelay; }
+ bool isArbiterOnly() const {
+ return _arbiterOnly;
+ }
- const unordered_map<std::string, std::string> getTags() const { return _tags; }
+ bool isPassive() const {
+ return _passive;
+ }
- const HostAndPort& getMe() const { return _me; }
+ bool isHidden() const {
+ return _hidden;
+ }
- const OID& getElectionId() const { return _electionId; }
+ bool shouldBuildIndexes() const {
+ return _buildIndexes;
+ }
- /**
- * If false, calls to toBSON/addToBSON will ignore all other fields and add a specific
- * message to indicate that we have no replica set config.
- */
- bool isConfigSet() const { return _configSet; }
+ Seconds getSlaveDelay() const {
+ return _slaveDelay;
+ }
- /**
- * If false, calls to toBSON/addToBSON will ignore all other fields and add a specific
- * message to indicate that we are in the middle of shutting down.
- */
- bool isShutdownInProgress() const { return _shutdownInProgress; }
+ const unordered_map<std::string, std::string> getTags() const {
+ return _tags;
+ }
+ const HostAndPort& getMe() const {
+ return _me;
+ }
- // ===================== Mutators for member variables ================================= //
+ const OID& getElectionId() const {
+ return _electionId;
+ }
- void setIsMaster(bool isMaster);
+ /**
+ * If false, calls to toBSON/addToBSON will ignore all other fields and add a specific
+ * message to indicate that we have no replica set config.
+ */
+ bool isConfigSet() const {
+ return _configSet;
+ }
- void setIsSecondary(bool secondary);
+ /**
+ * If false, calls to toBSON/addToBSON will ignore all other fields and add a specific
+ * message to indicate that we are in the middle of shutting down.
+ */
+ bool isShutdownInProgress() const {
+ return _shutdownInProgress;
+ }
- void setReplSetName(const std::string& setName);
- void setReplSetVersion(long long version);
+ // ===================== Mutators for member variables ================================= //
- void addHost(const HostAndPort& host);
+ void setIsMaster(bool isMaster);
- void addPassive(const HostAndPort& passive);
+ void setIsSecondary(bool secondary);
- void addArbiter(const HostAndPort& arbiter);
+ void setReplSetName(const std::string& setName);
- void setPrimary(const HostAndPort& primary);
+ void setReplSetVersion(long long version);
- void setIsArbiterOnly(bool arbiterOnly);
+ void addHost(const HostAndPort& host);
- void setIsPassive(bool passive);
+ void addPassive(const HostAndPort& passive);
- void setIsHidden(bool hidden);
+ void addArbiter(const HostAndPort& arbiter);
- void setShouldBuildIndexes(bool buildIndexes);
+ void setPrimary(const HostAndPort& primary);
- void setSlaveDelay(Seconds slaveDelay);
+ void setIsArbiterOnly(bool arbiterOnly);
- void addTag(const std::string& tagKey, const std::string& tagValue);
+ void setIsPassive(bool passive);
- void setMe(const HostAndPort& me);
+ void setIsHidden(bool hidden);
- void setElectionId(const OID& electionId);
+ void setShouldBuildIndexes(bool buildIndexes);
- /**
- * Marks _configSet as false, which will cause future calls to toBSON/addToBSON to ignore
- * all other member variables and output a hardcoded response indicating that we have no
- * valid replica set config.
- */
- void markAsNoConfig();
+ void setSlaveDelay(Seconds slaveDelay);
- /**
- * Marks _shutdownInProgress as true, which will cause future calls to toBSON/addToBSON to
- * ignore all other member variables and output a hardcoded response indicating that we are
- * in the middle of shutting down.
- */
- void markAsShutdownInProgress();
+ void addTag(const std::string& tagKey, const std::string& tagValue);
- private:
+ void setMe(const HostAndPort& me);
- bool _isMaster;
- bool _isMasterSet;
- bool _secondary;
- bool _isSecondarySet;
- std::string _setName;
- bool _setNameSet;
- long long _setVersion;
- bool _setVersionSet;
- std::vector<HostAndPort> _hosts;
- bool _hostsSet;
- std::vector<HostAndPort> _passives;
- bool _passivesSet;
- std::vector<HostAndPort> _arbiters;
- bool _arbitersSet;
- HostAndPort _primary;
- bool _primarySet;
- bool _arbiterOnly;
- bool _arbiterOnlySet;
- bool _passive;
- bool _passiveSet;
- bool _hidden;
- bool _hiddenSet;
- bool _buildIndexes;
- bool _buildIndexesSet;
- Seconds _slaveDelay;
- bool _slaveDelaySet;
- unordered_map<std::string, std::string> _tags;
- bool _tagsSet;
- HostAndPort _me;
- bool _meSet;
- OID _electionId;
+ void setElectionId(const OID& electionId);
- // If _configSet is false this means we don't have a valid repl set config, so toBSON
- // will return a set of hardcoded values that indicate this.
- bool _configSet;
- // If _shutdownInProgress is true toBSON will return a set of hardcoded values to indicate
- // that we are mid shutdown
- bool _shutdownInProgress;
- };
+ /**
+ * Marks _configSet as false, which will cause future calls to toBSON/addToBSON to ignore
+ * all other member variables and output a hardcoded response indicating that we have no
+ * valid replica set config.
+ */
+ void markAsNoConfig();
-} // namespace repl
-} // namespace mongo
+ /**
+ * Marks _shutdownInProgress as true, which will cause future calls to toBSON/addToBSON to
+ * ignore all other member variables and output a hardcoded response indicating that we are
+ * in the middle of shutting down.
+ */
+ void markAsShutdownInProgress();
+
+private:
+ bool _isMaster;
+ bool _isMasterSet;
+ bool _secondary;
+ bool _isSecondarySet;
+ std::string _setName;
+ bool _setNameSet;
+ long long _setVersion;
+ bool _setVersionSet;
+ std::vector<HostAndPort> _hosts;
+ bool _hostsSet;
+ std::vector<HostAndPort> _passives;
+ bool _passivesSet;
+ std::vector<HostAndPort> _arbiters;
+ bool _arbitersSet;
+ HostAndPort _primary;
+ bool _primarySet;
+ bool _arbiterOnly;
+ bool _arbiterOnlySet;
+ bool _passive;
+ bool _passiveSet;
+ bool _hidden;
+ bool _hiddenSet;
+ bool _buildIndexes;
+ bool _buildIndexesSet;
+ Seconds _slaveDelay;
+ bool _slaveDelaySet;
+ unordered_map<std::string, std::string> _tags;
+ bool _tagsSet;
+ HostAndPort _me;
+ bool _meSet;
+ OID _electionId;
+
+ // If _configSet is false this means we don't have a valid repl set config, so toBSON
+ // will return a set of hardcoded values that indicate this.
+ bool _configSet;
+ // If _shutdownInProgress is true toBSON will return a set of hardcoded values to indicate
+ // that we are mid shutdown
+ bool _shutdownInProgress;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/isself.cpp b/src/mongo/db/repl/isself.cpp
index d0b44858913..e34b1cc9660 100644
--- a/src/mongo/db/repl/isself.cpp
+++ b/src/mongo/db/repl/isself.cpp
@@ -47,7 +47,8 @@
#include "mongo/util/scopeguard.h"
#include "mongo/util/log.h"
-#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__sun) || defined(__OpenBSD__)
+#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__sun) || \
+ defined(__OpenBSD__)
#define FASTPATH_UNIX 1
#endif
@@ -75,281 +76,269 @@
namespace mongo {
namespace repl {
- OID instanceId;
+OID instanceId;
- MONGO_INITIALIZER(GenerateInstanceId)(InitializerContext*) {
- instanceId = OID::gen();
- return Status::OK();
- }
+MONGO_INITIALIZER(GenerateInstanceId)(InitializerContext*) {
+ instanceId = OID::gen();
+ return Status::OK();
+}
namespace {
- /**
- * Helper to convert a message from a networking function to a string.
- * Needed because errnoWithDescription uses strerror on linux, when
- * we need gai_strerror.
- */
- std::string stringifyError(int code) {
+/**
+ * Helper to convert a message from a networking function to a string.
+ * Needed because errnoWithDescription uses strerror on linux, when
+ * we need gai_strerror.
+ */
+std::string stringifyError(int code) {
#if FASTPATH_UNIX
- return gai_strerror(code);
+ return gai_strerror(code);
#elif defined(_WIN32)
- // FormatMessage in errnoWithDescription works here on windows
- return errnoWithDescription(code);
+ // FormatMessage in errnoWithDescription works here on windows
+ return errnoWithDescription(code);
#endif
- }
-
- /**
- * Resolves a host and port to a list of IP addresses. This requires a syscall. If the
- * ipv6enabled parameter is true, both IPv6 and IPv4 addresses will be returned.
- */
- std::vector<std::string> getAddrsForHost(const std::string& iporhost,
- const int port,
- const bool ipv6enabled) {
- addrinfo* addrs = NULL;
- addrinfo hints = {0};
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_family = (ipv6enabled ? AF_UNSPEC : AF_INET);
-
- const std::string portNum = BSONObjBuilder::numStr(port);
-
- std::vector<std::string> out;
-
- int err = getaddrinfo(iporhost.c_str(), portNum.c_str(), &hints, &addrs);
+}
- if (err) {
- warning() << "getaddrinfo(\"" << iporhost << "\") failed: "
- << stringifyError(err) << std::endl;
- return out;
- }
+/**
+ * Resolves a host and port to a list of IP addresses. This requires a syscall. If the
+ * ipv6enabled parameter is true, both IPv6 and IPv4 addresses will be returned.
+ */
+std::vector<std::string> getAddrsForHost(const std::string& iporhost,
+ const int port,
+ const bool ipv6enabled) {
+ addrinfo* addrs = NULL;
+ addrinfo hints = {0};
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_family = (ipv6enabled ? AF_UNSPEC : AF_INET);
+
+ const std::string portNum = BSONObjBuilder::numStr(port);
+
+ std::vector<std::string> out;
+
+ int err = getaddrinfo(iporhost.c_str(), portNum.c_str(), &hints, &addrs);
+
+ if (err) {
+ warning() << "getaddrinfo(\"" << iporhost << "\") failed: " << stringifyError(err)
+ << std::endl;
+ return out;
+ }
- ON_BLOCK_EXIT(freeaddrinfo, addrs);
+ ON_BLOCK_EXIT(freeaddrinfo, addrs);
- for (addrinfo* addr = addrs; addr != NULL; addr = addr->ai_next) {
- int family = addr->ai_family;
- char host[NI_MAXHOST];
+ for (addrinfo* addr = addrs; addr != NULL; addr = addr->ai_next) {
+ int family = addr->ai_family;
+ char host[NI_MAXHOST];
- if (family == AF_INET || family == AF_INET6) {
- err = getnameinfo(addr->ai_addr, addr->ai_addrlen, host,
- NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
- if (err) {
- warning() << "getnameinfo() failed: " << stringifyError(err) << std::endl;
- continue;
- }
- out.push_back(host);
+ if (family == AF_INET || family == AF_INET6) {
+ err = getnameinfo(
+ addr->ai_addr, addr->ai_addrlen, host, NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+ if (err) {
+ warning() << "getnameinfo() failed: " << stringifyError(err) << std::endl;
+ continue;
}
-
+ out.push_back(host);
}
+ }
- if (shouldLog(logger::LogSeverity::Debug(2))) {
- StringBuilder builder;
- builder << "getAddrsForHost(\"" << iporhost << ":" << port << "\"):";
- for (std::vector<std::string>::const_iterator o = out.begin(); o != out.end(); ++o) {
- builder << " [ " << *o << "]";
- }
- LOG(2) << builder.str();
+ if (shouldLog(logger::LogSeverity::Debug(2))) {
+ StringBuilder builder;
+ builder << "getAddrsForHost(\"" << iporhost << ":" << port << "\"):";
+ for (std::vector<std::string>::const_iterator o = out.begin(); o != out.end(); ++o) {
+ builder << " [ " << *o << "]";
}
-
- return out;
+ LOG(2) << builder.str();
}
-} // namespace
+ return out;
+}
- bool isSelf(const HostAndPort& hostAndPort) {
+} // namespace
- // Fastpath: check if the host&port in question is bound to one
- // of the interfaces on this machine.
- // No need for ip match if the ports do not match
- if (hostAndPort.port() == serverGlobalParams.port) {
- std::vector<std::string> myAddrs = serverGlobalParams.bind_ip.empty() ?
- getBoundAddrs(IPv6Enabled()) :
- std::vector<std::string>();
+bool isSelf(const HostAndPort& hostAndPort) {
+ // Fastpath: check if the host&port in question is bound to one
+ // of the interfaces on this machine.
+ // No need for ip match if the ports do not match
+ if (hostAndPort.port() == serverGlobalParams.port) {
+ std::vector<std::string> myAddrs = serverGlobalParams.bind_ip.empty()
+ ? getBoundAddrs(IPv6Enabled())
+ : std::vector<std::string>();
+
+ if (!serverGlobalParams.bind_ip.empty()) {
+ boost::split(myAddrs, serverGlobalParams.bind_ip, boost::is_any_of(", "));
+ }
- if (!serverGlobalParams.bind_ip.empty()) {
- boost::split(myAddrs, serverGlobalParams.bind_ip, boost::is_any_of(", "));
- }
+ const std::vector<std::string> hostAddrs =
+ getAddrsForHost(hostAndPort.host(), hostAndPort.port(), IPv6Enabled());
- const std::vector<std::string> hostAddrs = getAddrsForHost(hostAndPort.host(),
- hostAndPort.port(),
- IPv6Enabled());
-
- for (std::vector<std::string>::const_iterator i = myAddrs.begin();
- i != myAddrs.end(); ++i) {
- for (std::vector<std::string>::const_iterator j = hostAddrs.begin();
- j != hostAddrs.end(); ++j) {
- if (*i == *j) {
- return true;
- }
+ for (std::vector<std::string>::const_iterator i = myAddrs.begin(); i != myAddrs.end();
+ ++i) {
+ for (std::vector<std::string>::const_iterator j = hostAddrs.begin();
+ j != hostAddrs.end();
+ ++j) {
+ if (*i == *j) {
+ return true;
}
}
}
+ }
+
+ // Ensure that the server is up and ready to accept incoming network requests.
+ const Listener* listener = Listener::getTimeTracker();
+ if (!listener) {
+ return false;
+ }
+ listener->waitUntilListening();
- // Ensure that the server is up and ready to accept incoming network requests.
- const Listener* listener = Listener::getTimeTracker();
- if (!listener) {
+ try {
+ DBClientConnection conn;
+ std::string errmsg;
+ conn.setSoTimeout(30); // 30 second timeout
+ if (!conn.connect(hostAndPort, errmsg)) {
return false;
}
- listener->waitUntilListening();
- try {
- DBClientConnection conn;
- std::string errmsg;
- conn.setSoTimeout(30); // 30 second timeout
- if (!conn.connect(hostAndPort, errmsg)) {
+ if (getGlobalAuthorizationManager()->isAuthEnabled() && isInternalAuthSet()) {
+ if (!authenticateInternalUser(&conn)) {
return false;
}
-
- if (getGlobalAuthorizationManager()->isAuthEnabled() && isInternalAuthSet()) {
- if (!authenticateInternalUser(&conn)) {
- return false;
- }
- }
- BSONObj out;
- bool ok = conn.simpleCommand("admin" , &out, "_isSelf");
- bool me = ok && out["id"].type() == jstOID && instanceId == out["id"].OID();
-
- return me;
- }
- catch (const std::exception& e) {
- warning() << "could't check isSelf (" << hostAndPort << ") " << e.what() << std::endl;
}
+ BSONObj out;
+ bool ok = conn.simpleCommand("admin", &out, "_isSelf");
+ bool me = ok && out["id"].type() == jstOID && instanceId == out["id"].OID();
- return false;
+ return me;
+ } catch (const std::exception& e) {
+ warning() << "could't check isSelf (" << hostAndPort << ") " << e.what() << std::endl;
}
- /**
- * Returns all the IP addresses bound to the network interfaces of this machine.
- * This requires a syscall. If the ipv6enabled parameter is true, both IPv6 AND IPv4
- * addresses will be returned.
- */
- std::vector<std::string> getBoundAddrs(const bool ipv6enabled) {
- std::vector<std::string> out;
+ return false;
+}
+
+/**
+ * Returns all the IP addresses bound to the network interfaces of this machine.
+ * This requires a syscall. If the ipv6enabled parameter is true, both IPv6 AND IPv4
+ * addresses will be returned.
+ */
+std::vector<std::string> getBoundAddrs(const bool ipv6enabled) {
+ std::vector<std::string> out;
#ifdef FASTPATH_UNIX
- ifaddrs* addrs;
+ ifaddrs* addrs;
- int err = getifaddrs(&addrs);
- if (err) {
- warning() << "getifaddrs failure: " << errnoWithDescription(err) << std::endl;
- return out;
- }
- ON_BLOCK_EXIT(freeifaddrs, addrs);
-
- // based on example code from linux getifaddrs manpage
- for (ifaddrs* addr = addrs; addr != NULL; addr = addr->ifa_next) {
- if (addr->ifa_addr == NULL) continue;
- int family = addr->ifa_addr->sa_family;
- char host[NI_MAXHOST];
-
- if (family == AF_INET || (ipv6enabled && (family == AF_INET6))) {
- err = getnameinfo(addr->ifa_addr,
- (family == AF_INET ? sizeof(struct sockaddr_in)
- : sizeof(struct sockaddr_in6)),
- host, NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
- if (err) {
- warning() << "getnameinfo() failed: " << gai_strerror(err) << std::endl;
- continue;
- }
- out.push_back(host);
+ int err = getifaddrs(&addrs);
+ if (err) {
+ warning() << "getifaddrs failure: " << errnoWithDescription(err) << std::endl;
+ return out;
+ }
+ ON_BLOCK_EXIT(freeifaddrs, addrs);
+
+ // based on example code from linux getifaddrs manpage
+ for (ifaddrs* addr = addrs; addr != NULL; addr = addr->ifa_next) {
+ if (addr->ifa_addr == NULL)
+ continue;
+ int family = addr->ifa_addr->sa_family;
+ char host[NI_MAXHOST];
+
+ if (family == AF_INET || (ipv6enabled && (family == AF_INET6))) {
+ err = getnameinfo(
+ addr->ifa_addr,
+ (family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)),
+ host,
+ NI_MAXHOST,
+ NULL,
+ 0,
+ NI_NUMERICHOST);
+ if (err) {
+ warning() << "getnameinfo() failed: " << gai_strerror(err) << std::endl;
+ continue;
}
+ out.push_back(host);
}
+ }
#elif defined(_WIN32)
- // Start with the MS recommended 15KB buffer. Use multiple attempts
- // for the rare case that the adapter config changes between calls
+ // Start with the MS recommended 15KB buffer. Use multiple attempts
+ // for the rare case that the adapter config changes between calls
- ULONG adaptersLen = 15 * 1024;
- std::unique_ptr<char[]> buf(new char[adaptersLen]);
- IP_ADAPTER_ADDRESSES* adapters = reinterpret_cast<IP_ADAPTER_ADDRESSES*>(buf.get());
- DWORD err;
+ ULONG adaptersLen = 15 * 1024;
+ std::unique_ptr<char[]> buf(new char[adaptersLen]);
+ IP_ADAPTER_ADDRESSES* adapters = reinterpret_cast<IP_ADAPTER_ADDRESSES*>(buf.get());
+ DWORD err;
- ULONG family = ipv6enabled ? AF_UNSPEC : AF_INET;
+ ULONG family = ipv6enabled ? AF_UNSPEC : AF_INET;
- for (int tries = 0; tries < 3; ++tries) {
- err = GetAdaptersAddresses(family,
- GAA_FLAG_SKIP_ANYCAST | // only want unicast addrs
+ for (int tries = 0; tries < 3; ++tries) {
+ err = GetAdaptersAddresses(family,
+ GAA_FLAG_SKIP_ANYCAST | // only want unicast addrs
GAA_FLAG_SKIP_MULTICAST |
GAA_FLAG_SKIP_DNS_SERVER,
- NULL,
- adapters,
- &adaptersLen);
-
- if (err == ERROR_BUFFER_OVERFLOW) {
- // in this case, adaptersLen will be set to the size we need to allocate
- buf.reset(new char[adaptersLen]);
- adapters = reinterpret_cast<IP_ADAPTER_ADDRESSES*>(buf.get());
- }
- else {
- break; // only retry for incorrectly sized buffer
- }
+ NULL,
+ adapters,
+ &adaptersLen);
+
+ if (err == ERROR_BUFFER_OVERFLOW) {
+ // in this case, adaptersLen will be set to the size we need to allocate
+ buf.reset(new char[adaptersLen]);
+ adapters = reinterpret_cast<IP_ADAPTER_ADDRESSES*>(buf.get());
+ } else {
+ break; // only retry for incorrectly sized buffer
}
+ }
- if (err != NO_ERROR) {
- warning() << "GetAdaptersAddresses() failed: " << errnoWithDescription(err)
- << std::endl;
- return out;
- }
+ if (err != NO_ERROR) {
+ warning() << "GetAdaptersAddresses() failed: " << errnoWithDescription(err) << std::endl;
+ return out;
+ }
- for (IP_ADAPTER_ADDRESSES* adapter = adapters;
- adapter != NULL; adapter = adapter->Next) {
- for (IP_ADAPTER_UNICAST_ADDRESS* addr = adapter->FirstUnicastAddress;
- addr != NULL; addr = addr->Next) {
-
- short family =
- reinterpret_cast<SOCKADDR_STORAGE*>(addr->Address.lpSockaddr)->ss_family;
-
- if (family == AF_INET) {
- // IPv4
- SOCKADDR_IN* sock = reinterpret_cast<SOCKADDR_IN*>(addr->Address.lpSockaddr);
- char addrstr[INET_ADDRSTRLEN] = {0};
- boost::system::error_code ec;
- // Not all windows versions have inet_ntop
- boost::asio::detail::socket_ops::inet_ntop(AF_INET,
- &(sock->sin_addr),
- addrstr,
- INET_ADDRSTRLEN,
- 0,
- ec);
- if (ec) {
- warning() << "inet_ntop failed during IPv4 address conversion: "
- << ec.message() << std::endl;
- continue;
- }
- out.push_back(addrstr);
+ for (IP_ADAPTER_ADDRESSES* adapter = adapters; adapter != NULL; adapter = adapter->Next) {
+ for (IP_ADAPTER_UNICAST_ADDRESS* addr = adapter->FirstUnicastAddress; addr != NULL;
+ addr = addr->Next) {
+ short family = reinterpret_cast<SOCKADDR_STORAGE*>(addr->Address.lpSockaddr)->ss_family;
+
+ if (family == AF_INET) {
+ // IPv4
+ SOCKADDR_IN* sock = reinterpret_cast<SOCKADDR_IN*>(addr->Address.lpSockaddr);
+ char addrstr[INET_ADDRSTRLEN] = {0};
+ boost::system::error_code ec;
+ // Not all windows versions have inet_ntop
+ boost::asio::detail::socket_ops::inet_ntop(
+ AF_INET, &(sock->sin_addr), addrstr, INET_ADDRSTRLEN, 0, ec);
+ if (ec) {
+ warning() << "inet_ntop failed during IPv4 address conversion: " << ec.message()
+ << std::endl;
+ continue;
}
- else if (family == AF_INET6) {
- // IPv6
- SOCKADDR_IN6* sock = reinterpret_cast<SOCKADDR_IN6*>(addr->Address.lpSockaddr);
- char addrstr[INET6_ADDRSTRLEN] = {0};
- boost::system::error_code ec;
- boost::asio::detail::socket_ops::inet_ntop(AF_INET6,
- &(sock->sin6_addr),
- addrstr,
- INET6_ADDRSTRLEN,
- 0,
- ec);
- if (ec) {
- warning() << "inet_ntop failed during IPv6 address conversion: "
- << ec.message() << std::endl;
- continue;
- }
- out.push_back(addrstr);
+ out.push_back(addrstr);
+ } else if (family == AF_INET6) {
+ // IPv6
+ SOCKADDR_IN6* sock = reinterpret_cast<SOCKADDR_IN6*>(addr->Address.lpSockaddr);
+ char addrstr[INET6_ADDRSTRLEN] = {0};
+ boost::system::error_code ec;
+ boost::asio::detail::socket_ops::inet_ntop(
+ AF_INET6, &(sock->sin6_addr), addrstr, INET6_ADDRSTRLEN, 0, ec);
+ if (ec) {
+ warning() << "inet_ntop failed during IPv6 address conversion: " << ec.message()
+ << std::endl;
+ continue;
}
+ out.push_back(addrstr);
}
}
+ }
#endif // defined(_WIN32)
- if (shouldLog(logger::LogSeverity::Debug(2))) {
- StringBuilder builder;
- builder << "getBoundAddrs():";
- for (std::vector<std::string>::const_iterator o = out.begin(); o != out.end(); ++o) {
- builder << " [ " << *o << "]";
- }
- LOG(2) << builder.str();
+ if (shouldLog(logger::LogSeverity::Debug(2))) {
+ StringBuilder builder;
+ builder << "getBoundAddrs():";
+ for (std::vector<std::string>::const_iterator o = out.begin(); o != out.end(); ++o) {
+ builder << " [ " << *o << "]";
}
- return out;
+ LOG(2) << builder.str();
}
+ return out;
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/isself.h b/src/mongo/db/repl/isself.h
index cbcbbd9f031..19b61bf47bd 100644
--- a/src/mongo/db/repl/isself.h
+++ b/src/mongo/db/repl/isself.h
@@ -34,30 +34,30 @@
#include "mongo/bson/oid.h"
namespace mongo {
- struct HostAndPort;
+struct HostAndPort;
namespace repl {
- /**
- * An identifier unique to this instance. Used by isSelf to see if we are talking
- * to ourself or someone else.
- */
- extern OID instanceId;
-
- /**
- * Returns true if "hostAndPort" identifies this instance.
- */
- bool isSelf(const HostAndPort& hostAndPort);
-
- /**
- * Returns all the IP addresses bound to the network interfaces of this machine.
- * This requires a syscall. If the ipv6enabled parameter is true, both IPv6 AND IPv4
- * addresses will be returned.
- *
- * Note: this only works on Linux and Windows. All calls should be properly ifdef'd,
- * otherwise an invariant will be triggered.
- */
- std::vector<std::string> getBoundAddrs(const bool ipv6enabled);
-
-} // namespace repl
-} // namespace mongo
+/**
+ * An identifier unique to this instance. Used by isSelf to see if we are talking
+ * to ourself or someone else.
+ */
+extern OID instanceId;
+
+/**
+ * Returns true if "hostAndPort" identifies this instance.
+ */
+bool isSelf(const HostAndPort& hostAndPort);
+
+/**
+ * Returns all the IP addresses bound to the network interfaces of this machine.
+ * This requires a syscall. If the ipv6enabled parameter is true, both IPv6 AND IPv4
+ * addresses will be returned.
+ *
+ * Note: this only works on Linux and Windows. All calls should be properly ifdef'd,
+ * otherwise an invariant will be triggered.
+ */
+std::vector<std::string> getBoundAddrs(const bool ipv6enabled);
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/isself_test.cpp b/src/mongo/db/repl/isself_test.cpp
index b3af1721e62..b6a3e26f8e6 100644
--- a/src/mongo/db/repl/isself_test.cpp
+++ b/src/mongo/db/repl/isself_test.cpp
@@ -40,45 +40,41 @@ namespace repl {
namespace {
- using std::string;
+using std::string;
- TEST(IsSelf, DetectsSameHostIPv4) {
+TEST(IsSelf, DetectsSameHostIPv4) {
#if defined(_WIN32) || defined(__linux__) || defined(__APPLE__)
- bool wasEnabled = IPv6Enabled();
- enableIPv6(false);
- ON_BLOCK_EXIT(enableIPv6, wasEnabled);
- // first we get the addrs bound on this host
- const std::vector<std::string> addrs = getBoundAddrs(false);
- // Fastpath should agree with the result of getBoundAddrs
- // since it uses it...
- for (std::vector<string>::const_iterator it = addrs.begin();
- it != addrs.end(); ++it) {
-
- ASSERT(isSelf(HostAndPort(*it, serverGlobalParams.port)));
- }
+ bool wasEnabled = IPv6Enabled();
+ enableIPv6(false);
+ ON_BLOCK_EXIT(enableIPv6, wasEnabled);
+ // first we get the addrs bound on this host
+ const std::vector<std::string> addrs = getBoundAddrs(false);
+ // Fastpath should agree with the result of getBoundAddrs
+ // since it uses it...
+ for (std::vector<string>::const_iterator it = addrs.begin(); it != addrs.end(); ++it) {
+ ASSERT(isSelf(HostAndPort(*it, serverGlobalParams.port)));
+ }
#else
- ASSERT(true);
+ ASSERT(true);
#endif
- }
+}
- TEST(IsSelf, DetectsSameHostIPv6) {
+TEST(IsSelf, DetectsSameHostIPv6) {
#if defined(_WIN32) || defined(__linux__) || defined(__APPLE__)
- bool wasEnabled = IPv6Enabled();
- enableIPv6(true);
- ON_BLOCK_EXIT(enableIPv6, wasEnabled);
- // first we get the addrs bound on this host
- const std::vector<std::string> addrs = getBoundAddrs(true);
- // Fastpath should agree with the result of getBoundAddrs
- // since it uses it...
- for (std::vector<string>::const_iterator it = addrs.begin();
- it != addrs.end(); ++it) {
-
- ASSERT(isSelf(HostAndPort(*it, serverGlobalParams.port)));
- }
+ bool wasEnabled = IPv6Enabled();
+ enableIPv6(true);
+ ON_BLOCK_EXIT(enableIPv6, wasEnabled);
+ // first we get the addrs bound on this host
+ const std::vector<std::string> addrs = getBoundAddrs(true);
+ // Fastpath should agree with the result of getBoundAddrs
+ // since it uses it...
+ for (std::vector<string>::const_iterator it = addrs.begin(); it != addrs.end(); ++it) {
+ ASSERT(isSelf(HostAndPort(*it, serverGlobalParams.port)));
+ }
#else
- ASSERT(true);
+ ASSERT(true);
#endif
- }
+}
} // namespace
diff --git a/src/mongo/db/repl/last_vote.cpp b/src/mongo/db/repl/last_vote.cpp
index e7a1b178bf4..94e88231bac 100644
--- a/src/mongo/db/repl/last_vote.cpp
+++ b/src/mongo/db/repl/last_vote.cpp
@@ -36,57 +36,54 @@ namespace mongo {
namespace repl {
namespace {
- const std::string kCandidateIdFieldName = "candidateId";
- const std::string kTermFieldName = "term";
+const std::string kCandidateIdFieldName = "candidateId";
+const std::string kTermFieldName = "term";
- const std::string kLegalFieldNames[] = {
- kCandidateIdFieldName,
- kTermFieldName,
- };
+const std::string kLegalFieldNames[] = {
+ kCandidateIdFieldName, kTermFieldName,
+};
} // namespace
- Status LastVote::initialize(const BSONObj& argsObj) {
- Status status = bsonCheckOnlyHasFields("VotedFar",
- argsObj,
- kLegalFieldNames);
- if (!status.isOK())
- return status;
+Status LastVote::initialize(const BSONObj& argsObj) {
+ Status status = bsonCheckOnlyHasFields("VotedFar", argsObj, kLegalFieldNames);
+ if (!status.isOK())
+ return status;
- status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term);
- if (!status.isOK())
- return status;
+ status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term);
+ if (!status.isOK())
+ return status;
- status = bsonExtractIntegerField(argsObj, kCandidateIdFieldName, &_candidateId);
- if (!status.isOK())
- return status;
+ status = bsonExtractIntegerField(argsObj, kCandidateIdFieldName, &_candidateId);
+ if (!status.isOK())
+ return status;
- return Status::OK();
- }
+ return Status::OK();
+}
- void LastVote::setTerm(long long term) {
- _term = term;
- }
+void LastVote::setTerm(long long term) {
+ _term = term;
+}
- void LastVote::setCandidateId(long long candidateId) {
- _candidateId = candidateId;
- }
+void LastVote::setCandidateId(long long candidateId) {
+ _candidateId = candidateId;
+}
- long long LastVote::getTerm() const {
- return _term;
- }
+long long LastVote::getTerm() const {
+ return _term;
+}
- long long LastVote::getCandidateId() const {
- return _candidateId;
- }
+long long LastVote::getCandidateId() const {
+ return _candidateId;
+}
- BSONObj LastVote::toBSON() const {
- BSONObjBuilder builder;
- builder.append(kTermFieldName, _term);
- builder.append(kCandidateIdFieldName, _candidateId);
- return builder.obj();
- }
+BSONObj LastVote::toBSON() const {
+ BSONObjBuilder builder;
+ builder.append(kTermFieldName, _term);
+ builder.append(kCandidateIdFieldName, _candidateId);
+ return builder.obj();
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/last_vote.h b/src/mongo/db/repl/last_vote.h
index b2314823f00..fe1d67a3fe9 100644
--- a/src/mongo/db/repl/last_vote.h
+++ b/src/mongo/db/repl/last_vote.h
@@ -30,27 +30,27 @@
namespace mongo {
- class BSONObj;
- class BSONObjBuilder;
- class Status;
+class BSONObj;
+class BSONObjBuilder;
+class Status;
namespace repl {
- class LastVote {
- public:
- Status initialize(const BSONObj& argsObj);
+class LastVote {
+public:
+ Status initialize(const BSONObj& argsObj);
- long long getTerm() const;
- long long getCandidateId() const;
+ long long getTerm() const;
+ long long getCandidateId() const;
- void setTerm(long long term);
- void setCandidateId(long long candidateId);
- BSONObj toBSON() const;
+ void setTerm(long long term);
+ void setCandidateId(long long candidateId);
+ BSONObj toBSON() const;
- private:
- long long _candidateId = -1;
- long long _term = -1;
- };
+private:
+ long long _candidateId = -1;
+ long long _term = -1;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/master_slave.cpp b/src/mongo/db/repl/master_slave.cpp
index a1a58527b62..1e1bd428d39 100644
--- a/src/mongo/db/repl/master_slave.cpp
+++ b/src/mongo/db/repl/master_slave.cpp
@@ -82,1374 +82,1352 @@ using std::vector;
namespace mongo {
namespace repl {
- void pretouchOperation(OperationContext* txn, const BSONObj& op);
- void pretouchN(vector<BSONObj>&, unsigned a, unsigned b);
+void pretouchOperation(OperationContext* txn, const BSONObj& op);
+void pretouchN(vector<BSONObj>&, unsigned a, unsigned b);
- /* if 1 sync() is running */
- volatile int syncing = 0;
- volatile int relinquishSyncingSome = 0;
+/* if 1 sync() is running */
+volatile int syncing = 0;
+volatile int relinquishSyncingSome = 0;
- static time_t lastForcedResync = 0;
+static time_t lastForcedResync = 0;
- /* output by the web console */
- const char *replInfo = "";
- struct ReplInfo {
- ReplInfo(const char *msg) {
- replInfo = msg;
- }
- ~ReplInfo() {
- replInfo = "?";
- }
- };
-
-
- ReplSource::ReplSource(OperationContext* txn) {
- nClonedThisPass = 0;
- ensureMe(txn);
- }
-
- ReplSource::ReplSource(OperationContext* txn, BSONObj o) : nClonedThisPass(0) {
- only = o.getStringField("only");
- hostName = o.getStringField("host");
- _sourceName = o.getStringField("source");
- uassert( 10118 , "'host' field not set in sources collection object", !hostName.empty() );
- uassert( 10119 , "only source='main' allowed for now with replication", sourceName() == "main" );
- BSONElement e = o.getField("syncedTo");
- if ( !e.eoo() ) {
- uassert(10120, "bad sources 'syncedTo' field value",
- e.type() == Date || e.type() == bsonTimestamp);
- Timestamp tmp( e.date() );
- syncedTo = tmp;
- }
-
- BSONObj dbsObj = o.getObjectField("dbsNextPass");
- if ( !dbsObj.isEmpty() ) {
- BSONObjIterator i(dbsObj);
- while ( 1 ) {
- BSONElement e = i.next();
- if ( e.eoo() )
- break;
- addDbNextPass.insert( e.fieldName() );
- }
- }
-
- dbsObj = o.getObjectField("incompleteCloneDbs");
- if ( !dbsObj.isEmpty() ) {
- BSONObjIterator i(dbsObj);
- while ( 1 ) {
- BSONElement e = i.next();
- if ( e.eoo() )
- break;
- incompleteCloneDbs.insert( e.fieldName() );
- }
- }
- ensureMe(txn);
+/* output by the web console */
+const char* replInfo = "";
+struct ReplInfo {
+ ReplInfo(const char* msg) {
+ replInfo = msg;
+ }
+ ~ReplInfo() {
+ replInfo = "?";
+ }
+};
+
+
+ReplSource::ReplSource(OperationContext* txn) {
+ nClonedThisPass = 0;
+ ensureMe(txn);
+}
+
+ReplSource::ReplSource(OperationContext* txn, BSONObj o) : nClonedThisPass(0) {
+ only = o.getStringField("only");
+ hostName = o.getStringField("host");
+ _sourceName = o.getStringField("source");
+ uassert(10118, "'host' field not set in sources collection object", !hostName.empty());
+ uassert(10119, "only source='main' allowed for now with replication", sourceName() == "main");
+ BSONElement e = o.getField("syncedTo");
+ if (!e.eoo()) {
+ uassert(10120,
+ "bad sources 'syncedTo' field value",
+ e.type() == Date || e.type() == bsonTimestamp);
+ Timestamp tmp(e.date());
+ syncedTo = tmp;
}
- /* Turn our C++ Source object into a BSONObj */
- BSONObj ReplSource::jsobj() {
- BSONObjBuilder b;
- b.append("host", hostName);
- b.append("source", sourceName());
- if ( !only.empty() )
- b.append("only", only);
- if ( !syncedTo.isNull() )
- b.append("syncedTo", syncedTo);
-
- BSONObjBuilder dbsNextPassBuilder;
- int n = 0;
- for ( set<string>::iterator i = addDbNextPass.begin(); i != addDbNextPass.end(); i++ ) {
- n++;
- dbsNextPassBuilder.appendBool(*i, 1);
+ BSONObj dbsObj = o.getObjectField("dbsNextPass");
+ if (!dbsObj.isEmpty()) {
+ BSONObjIterator i(dbsObj);
+ while (1) {
+ BSONElement e = i.next();
+ if (e.eoo())
+ break;
+ addDbNextPass.insert(e.fieldName());
}
- if ( n )
- b.append("dbsNextPass", dbsNextPassBuilder.done());
+ }
- BSONObjBuilder incompleteCloneDbsBuilder;
- n = 0;
- for ( set<string>::iterator i = incompleteCloneDbs.begin(); i != incompleteCloneDbs.end(); i++ ) {
- n++;
- incompleteCloneDbsBuilder.appendBool(*i, 1);
+ dbsObj = o.getObjectField("incompleteCloneDbs");
+ if (!dbsObj.isEmpty()) {
+ BSONObjIterator i(dbsObj);
+ while (1) {
+ BSONElement e = i.next();
+ if (e.eoo())
+ break;
+ incompleteCloneDbs.insert(e.fieldName());
}
- if ( n )
- b.append("incompleteCloneDbs", incompleteCloneDbsBuilder.done());
-
- return b.obj();
}
+ ensureMe(txn);
+}
+
+/* Turn our C++ Source object into a BSONObj */
+BSONObj ReplSource::jsobj() {
+ BSONObjBuilder b;
+ b.append("host", hostName);
+ b.append("source", sourceName());
+ if (!only.empty())
+ b.append("only", only);
+ if (!syncedTo.isNull())
+ b.append("syncedTo", syncedTo);
+
+ BSONObjBuilder dbsNextPassBuilder;
+ int n = 0;
+ for (set<string>::iterator i = addDbNextPass.begin(); i != addDbNextPass.end(); i++) {
+ n++;
+ dbsNextPassBuilder.appendBool(*i, 1);
+ }
+ if (n)
+ b.append("dbsNextPass", dbsNextPassBuilder.done());
+
+ BSONObjBuilder incompleteCloneDbsBuilder;
+ n = 0;
+ for (set<string>::iterator i = incompleteCloneDbs.begin(); i != incompleteCloneDbs.end(); i++) {
+ n++;
+ incompleteCloneDbsBuilder.appendBool(*i, 1);
+ }
+ if (n)
+ b.append("incompleteCloneDbs", incompleteCloneDbsBuilder.done());
- void ReplSource::ensureMe(OperationContext* txn) {
- string myname = getHostName();
+ return b.obj();
+}
- // local.me is an identifier for a server for getLastError w:2+
- bool exists = Helpers::getSingleton(txn, "local.me", _me);
+void ReplSource::ensureMe(OperationContext* txn) {
+ string myname = getHostName();
- if (!exists || !_me.hasField("host") || _me["host"].String() != myname) {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock dblk(txn->lockState(), "local", MODE_X);
- WriteUnitOfWork wunit(txn);
- // clean out local.me
- Helpers::emptyCollection(txn, "local.me");
+ // local.me is an identifier for a server for getLastError w:2+
+ bool exists = Helpers::getSingleton(txn, "local.me", _me);
- // repopulate
- BSONObjBuilder b;
- b.appendOID("_id", 0, true);
- b.append("host", myname);
- _me = b.obj();
- Helpers::putSingleton(txn, "local.me", _me);
- wunit.commit();
- }
- _me = _me.getOwned();
- }
+ if (!exists || !_me.hasField("host") || _me["host"].String() != myname) {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock dblk(txn->lockState(), "local", MODE_X);
+ WriteUnitOfWork wunit(txn);
+ // clean out local.me
+ Helpers::emptyCollection(txn, "local.me");
- void ReplSource::save(OperationContext* txn) {
+ // repopulate
BSONObjBuilder b;
- verify( !hostName.empty() );
- b.append("host", hostName);
- // todo: finish allowing multiple source configs.
- // this line doesn't work right when source is null, if that is allowed as it is now:
- //b.append("source", _sourceName);
- BSONObj pattern = b.done();
+ b.appendOID("_id", 0, true);
+ b.append("host", myname);
+ _me = b.obj();
+ Helpers::putSingleton(txn, "local.me", _me);
+ wunit.commit();
+ }
+ _me = _me.getOwned();
+}
- BSONObj o = jsobj();
- LOG( 1 ) << "Saving repl source: " << o << endl;
+void ReplSource::save(OperationContext* txn) {
+ BSONObjBuilder b;
+ verify(!hostName.empty());
+ b.append("host", hostName);
+ // todo: finish allowing multiple source configs.
+ // this line doesn't work right when source is null, if that is allowed as it is now:
+ // b.append("source", _sourceName);
+ BSONObj pattern = b.done();
- {
- OpDebug debug;
+ BSONObj o = jsobj();
+ LOG(1) << "Saving repl source: " << o << endl;
- OldClientContext ctx(txn, "local.sources");
+ {
+ OpDebug debug;
- const NamespaceString requestNs("local.sources");
- UpdateRequest request(requestNs);
+ OldClientContext ctx(txn, "local.sources");
- request.setQuery(pattern);
- request.setUpdates(o);
- request.setUpsert();
+ const NamespaceString requestNs("local.sources");
+ UpdateRequest request(requestNs);
- UpdateResult res = update(txn, ctx.db(), request, &debug);
+ request.setQuery(pattern);
+ request.setUpdates(o);
+ request.setUpsert();
- verify( ! res.modifiers );
- verify( res.numMatched == 1 );
- }
- }
+ UpdateResult res = update(txn, ctx.db(), request, &debug);
- static void addSourceToList(OperationContext* txn,
- ReplSource::SourceVector &v,
- ReplSource& s,
- ReplSource::SourceVector &old) {
- if ( !s.syncedTo.isNull() ) { // Don't reuse old ReplSource if there was a forced resync.
- for ( ReplSource::SourceVector::iterator i = old.begin(); i != old.end(); ) {
- if ( s == **i ) {
- v.push_back(*i);
- old.erase(i);
- return;
- }
- i++;
+ verify(!res.modifiers);
+ verify(res.numMatched == 1);
+ }
+}
+
+static void addSourceToList(OperationContext* txn,
+ ReplSource::SourceVector& v,
+ ReplSource& s,
+ ReplSource::SourceVector& old) {
+ if (!s.syncedTo.isNull()) { // Don't reuse old ReplSource if there was a forced resync.
+ for (ReplSource::SourceVector::iterator i = old.begin(); i != old.end();) {
+ if (s == **i) {
+ v.push_back(*i);
+ old.erase(i);
+ return;
}
+ i++;
}
-
- v.push_back( std::shared_ptr< ReplSource >( new ReplSource( s ) ) );
}
- /* we reuse our existing objects so that we can keep our existing connection
- and cursor in effect.
- */
- void ReplSource::loadAll(OperationContext* txn, SourceVector &v) {
- const char* localSources = "local.sources";
- OldClientContext ctx(txn, localSources);
- SourceVector old = v;
- v.clear();
-
- const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
- if (!replSettings.source.empty()) {
- // --source <host> specified.
- // check that no items are in sources other than that
- // add if missing
- int n = 0;
- unique_ptr<PlanExecutor> exec(
- InternalPlanner::collectionScan(txn,
- localSources,
- ctx.db()->getCollection(localSources)));
- BSONObj obj;
- PlanExecutor::ExecState state;
- while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
- n++;
- ReplSource tmp(txn, obj);
- if (tmp.hostName != replSettings.source) {
- log() << "--source " << replSettings.source << " != " << tmp.hostName
- << " from local.sources collection" << endl;
- log() << "for instructions on changing this slave's source, see:" << endl;
- log() << "http://dochub.mongodb.org/core/masterslave" << endl;
- log() << "terminating mongod after 30 seconds" << endl;
- sleepsecs(30);
- dbexit( EXIT_REPLICATION_ERROR );
- }
- if (tmp.only != replSettings.only) {
- log() << "--only " << replSettings.only << " != " << tmp.only
- << " from local.sources collection" << endl;
- log() << "terminating after 30 seconds" << endl;
- sleepsecs(30);
- dbexit( EXIT_REPLICATION_ERROR );
- }
- }
- uassert(17065, "Internal error reading from local.sources", PlanExecutor::IS_EOF == state);
- uassert( 10002 , "local.sources collection corrupt?", n<2 );
- if ( n == 0 ) {
- // source missing. add.
- ReplSource s(txn);
- s.hostName = replSettings.source;
- s.only = replSettings.only;
- s.save(txn);
- }
- }
- else {
- try {
- massert(10384 , "--only requires use of --source", replSettings.only.empty());
- }
- catch ( ... ) {
- dbexit( EXIT_BADOPTIONS );
- }
- }
+ v.push_back(std::shared_ptr<ReplSource>(new ReplSource(s)));
+}
- unique_ptr<PlanExecutor> exec(
- InternalPlanner::collectionScan(txn,
- localSources,
- ctx.db()->getCollection(localSources)));
+/* we reuse our existing objects so that we can keep our existing connection
+ and cursor in effect.
+*/
+void ReplSource::loadAll(OperationContext* txn, SourceVector& v) {
+ const char* localSources = "local.sources";
+ OldClientContext ctx(txn, localSources);
+ SourceVector old = v;
+ v.clear();
+
+ const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
+ if (!replSettings.source.empty()) {
+ // --source <host> specified.
+ // check that no items are in sources other than that
+ // add if missing
+ int n = 0;
+ unique_ptr<PlanExecutor> exec(InternalPlanner::collectionScan(
+ txn, localSources, ctx.db()->getCollection(localSources)));
BSONObj obj;
PlanExecutor::ExecState state;
while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
+ n++;
ReplSource tmp(txn, obj);
- if ( tmp.syncedTo.isNull() ) {
- DBDirectClient c(txn);
- BSONObj op = c.findOne( "local.oplog.$main", QUERY( "op" << NE << "n" ).sort( BSON( "$natural" << -1 ) ) );
- if ( !op.isEmpty() ) {
- tmp.syncedTo = op[ "ts" ].timestamp();
- }
+ if (tmp.hostName != replSettings.source) {
+ log() << "--source " << replSettings.source << " != " << tmp.hostName
+ << " from local.sources collection" << endl;
+ log() << "for instructions on changing this slave's source, see:" << endl;
+ log() << "http://dochub.mongodb.org/core/masterslave" << endl;
+ log() << "terminating mongod after 30 seconds" << endl;
+ sleepsecs(30);
+ dbexit(EXIT_REPLICATION_ERROR);
}
- addSourceToList(txn, v, tmp, old);
+ if (tmp.only != replSettings.only) {
+ log() << "--only " << replSettings.only << " != " << tmp.only
+ << " from local.sources collection" << endl;
+ log() << "terminating after 30 seconds" << endl;
+ sleepsecs(30);
+ dbexit(EXIT_REPLICATION_ERROR);
+ }
+ }
+ uassert(17065, "Internal error reading from local.sources", PlanExecutor::IS_EOF == state);
+ uassert(10002, "local.sources collection corrupt?", n < 2);
+ if (n == 0) {
+ // source missing. add.
+ ReplSource s(txn);
+ s.hostName = replSettings.source;
+ s.only = replSettings.only;
+ s.save(txn);
+ }
+ } else {
+ try {
+ massert(10384, "--only requires use of --source", replSettings.only.empty());
+ } catch (...) {
+ dbexit(EXIT_BADOPTIONS);
}
- uassert(17066, "Internal error reading from local.sources", PlanExecutor::IS_EOF == state);
}
- bool ReplSource::throttledForceResyncDead( OperationContext* txn, const char *requester ) {
- if ( time( 0 ) - lastForcedResync > 600 ) {
- forceResyncDead( txn, requester );
- lastForcedResync = time( 0 );
- return true;
+ unique_ptr<PlanExecutor> exec(
+ InternalPlanner::collectionScan(txn, localSources, ctx.db()->getCollection(localSources)));
+ BSONObj obj;
+ PlanExecutor::ExecState state;
+ while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
+ ReplSource tmp(txn, obj);
+ if (tmp.syncedTo.isNull()) {
+ DBDirectClient c(txn);
+ BSONObj op = c.findOne("local.oplog.$main",
+ QUERY("op" << NE << "n").sort(BSON("$natural" << -1)));
+ if (!op.isEmpty()) {
+ tmp.syncedTo = op["ts"].timestamp();
+ }
}
+ addSourceToList(txn, v, tmp, old);
+ }
+ uassert(17066, "Internal error reading from local.sources", PlanExecutor::IS_EOF == state);
+}
+
+bool ReplSource::throttledForceResyncDead(OperationContext* txn, const char* requester) {
+ if (time(0) - lastForcedResync > 600) {
+ forceResyncDead(txn, requester);
+ lastForcedResync = time(0);
+ return true;
+ }
+ return false;
+}
+
+void ReplSource::forceResyncDead(OperationContext* txn, const char* requester) {
+ if (!replAllDead)
+ return;
+ SourceVector sources;
+ ReplSource::loadAll(txn, sources);
+ for (SourceVector::iterator i = sources.begin(); i != sources.end(); ++i) {
+ log() << requester << " forcing resync from " << (*i)->hostName << endl;
+ (*i)->forceResync(txn, requester);
+ }
+ replAllDead = 0;
+}
+
+class HandshakeCmd : public Command {
+public:
+ void help(stringstream& h) const {
+ h << "internal";
+ }
+ HandshakeCmd() : Command("handshake") {}
+ virtual bool isWriteCommandForConfigServer() const {
+ return false;
+ }
+ virtual bool slaveOk() const {
+ return true;
+ }
+ virtual bool adminOnly() const {
return false;
}
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::internal);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
- void ReplSource::forceResyncDead( OperationContext* txn, const char *requester ) {
- if ( !replAllDead )
- return;
- SourceVector sources;
- ReplSource::loadAll(txn, sources);
- for( SourceVector::iterator i = sources.begin(); i != sources.end(); ++i ) {
- log() << requester << " forcing resync from " << (*i)->hostName << endl;
- (*i)->forceResync( txn, requester );
- }
- replAllDead = 0;
- }
-
- class HandshakeCmd : public Command {
- public:
- void help(stringstream& h) const { h << "internal"; }
- HandshakeCmd() : Command("handshake") {}
- virtual bool isWriteCommandForConfigServer() const { return false; }
- virtual bool slaveOk() const { return true; }
- virtual bool adminOnly() const { return false; }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::internal);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ virtual bool run(OperationContext* txn,
+ const string& ns,
+ BSONObj& cmdObj,
+ int options,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ HandshakeArgs handshake;
+ Status status = handshake.initialize(cmdObj);
+ if (!status.isOK()) {
+ return appendCommandStatus(result, status);
}
- virtual bool run(OperationContext* txn,
- const string& ns,
- BSONObj& cmdObj,
- int options,
- string& errmsg,
- BSONObjBuilder& result) {
-
- HandshakeArgs handshake;
- Status status = handshake.initialize(cmdObj);
- if (!status.isOK()) {
- return appendCommandStatus(result, status);
- }
+ ReplClientInfo::forClient(txn->getClient()).setRemoteID(handshake.getRid());
- ReplClientInfo::forClient(txn->getClient()).setRemoteID(handshake.getRid());
+ status = getGlobalReplicationCoordinator()->processHandshake(txn, handshake);
+ return appendCommandStatus(result, status);
+ }
- status = getGlobalReplicationCoordinator()->processHandshake(txn, handshake);
- return appendCommandStatus(result, status);
- }
+} handshakeCmd;
- } handshakeCmd;
+bool replHandshake(DBClientConnection* conn, const OID& myRID) {
+ string myname = getHostName();
- bool replHandshake(DBClientConnection *conn, const OID& myRID) {
- string myname = getHostName();
+ BSONObjBuilder cmd;
+ cmd.append("handshake", myRID);
- BSONObjBuilder cmd;
- cmd.append("handshake", myRID);
+ BSONObj res;
+ bool ok = conn->runCommand("admin", cmd.obj(), res);
+ // ignoring for now on purpose for older versions
+ LOG(ok ? 1 : 0) << "replHandshake result: " << res << endl;
+ return true;
+}
- BSONObj res;
- bool ok = conn->runCommand( "admin" , cmd.obj() , res );
- // ignoring for now on purpose for older versions
- LOG( ok ? 1 : 0 ) << "replHandshake result: " << res << endl;
+bool ReplSource::_connect(OplogReader* reader, const HostAndPort& host, const OID& myRID) {
+ if (reader->conn()) {
return true;
}
- bool ReplSource::_connect(OplogReader* reader, const HostAndPort& host, const OID& myRID) {
- if (reader->conn()) {
- return true;
- }
-
- if (!reader->connect(host)) {
- return false;
- }
-
- if (!replHandshake(reader->conn(), myRID)) {
- return false;
- }
+ if (!reader->connect(host)) {
+ return false;
+ }
- return true;
+ if (!replHandshake(reader->conn(), myRID)) {
+ return false;
}
+ return true;
+}
- void ReplSource::forceResync( OperationContext* txn, const char *requester ) {
- BSONObj info;
- {
- // This is always a GlobalWrite lock (so no ns/db used from the context)
- invariant(txn->lockState()->isW());
- Lock::TempRelease tempRelease(txn->lockState());
- if (!_connect(&oplogReader, HostAndPort(hostName),
- getGlobalReplicationCoordinator()->getMyRID())) {
- msgassertedNoTrace( 14051 , "unable to connect to resync");
- }
- /* todo use getDatabaseNames() method here */
- bool ok = oplogReader.conn()->runCommand("admin",
- BSON("listDatabases" << 1),
- info,
- QueryOption_SlaveOk);
- massert( 10385 , "Unable to get database list", ok );
+void ReplSource::forceResync(OperationContext* txn, const char* requester) {
+ BSONObj info;
+ {
+ // This is always a GlobalWrite lock (so no ns/db used from the context)
+ invariant(txn->lockState()->isW());
+ Lock::TempRelease tempRelease(txn->lockState());
+
+ if (!_connect(&oplogReader,
+ HostAndPort(hostName),
+ getGlobalReplicationCoordinator()->getMyRID())) {
+ msgassertedNoTrace(14051, "unable to connect to resync");
}
+ /* todo use getDatabaseNames() method here */
+ bool ok = oplogReader.conn()->runCommand(
+ "admin", BSON("listDatabases" << 1), info, QueryOption_SlaveOk);
+ massert(10385, "Unable to get database list", ok);
+ }
- BSONObjIterator i( info.getField( "databases" ).embeddedObject() );
- while( i.moreWithEOO() ) {
- BSONElement e = i.next();
- if ( e.eoo() )
- break;
- string name = e.embeddedObject().getField( "name" ).valuestr();
- if ( !e.embeddedObject().getBoolField( "empty" ) ) {
- if ( name != "local" ) {
- if ( only.empty() || only == name ) {
- resyncDrop( txn, name );
- }
+ BSONObjIterator i(info.getField("databases").embeddedObject());
+ while (i.moreWithEOO()) {
+ BSONElement e = i.next();
+ if (e.eoo())
+ break;
+ string name = e.embeddedObject().getField("name").valuestr();
+ if (!e.embeddedObject().getBoolField("empty")) {
+ if (name != "local") {
+ if (only.empty() || only == name) {
+ resyncDrop(txn, name);
}
}
}
- syncedTo = Timestamp();
- addDbNextPass.clear();
- save(txn);
- }
-
- void ReplSource::resyncDrop( OperationContext* txn, const string& db ) {
- log() << "resync: dropping database " << db;
- OldClientContext ctx(txn, db);
- dropDatabase(txn, ctx.db());
}
-
- /* grab initial copy of a database from the master */
- void ReplSource::resync(OperationContext* txn, const std::string& dbName) {
- const std::string db(dbName); // need local copy of the name, we're dropping the original
- resyncDrop( txn, db );
-
- {
- log() << "resync: cloning database " << db << " to get an initial copy" << endl;
- ReplInfo r("resync: cloning a database");
-
- CloneOptions cloneOptions;
- cloneOptions.fromDB = db;
- cloneOptions.slaveOk = true;
- cloneOptions.useReplAuth = true;
- cloneOptions.snapshot = true;
- cloneOptions.mayYield = true;
- cloneOptions.mayBeInterrupted = false;
-
- Cloner cloner;
- Status status = cloner.copyDb(txn,
- db,
- hostName.c_str(),
- cloneOptions,
- NULL);
-
- if (!status.isOK()) {
- if (status.code() == ErrorCodes::DatabaseDifferCase) {
- resyncDrop( txn, db );
- log() << "resync: database " << db
- << " not valid on the master due to a name conflict, dropping.";
- return;
- }
- else {
- log() << "resync of " << db << " from " << hostName
- << " failed due to: " << status.toString();
- throw SyncException();
- }
+ syncedTo = Timestamp();
+ addDbNextPass.clear();
+ save(txn);
+}
+
+void ReplSource::resyncDrop(OperationContext* txn, const string& db) {
+ log() << "resync: dropping database " << db;
+ OldClientContext ctx(txn, db);
+ dropDatabase(txn, ctx.db());
+}
+
+/* grab initial copy of a database from the master */
+void ReplSource::resync(OperationContext* txn, const std::string& dbName) {
+ const std::string db(dbName); // need local copy of the name, we're dropping the original
+ resyncDrop(txn, db);
+
+ {
+ log() << "resync: cloning database " << db << " to get an initial copy" << endl;
+ ReplInfo r("resync: cloning a database");
+
+ CloneOptions cloneOptions;
+ cloneOptions.fromDB = db;
+ cloneOptions.slaveOk = true;
+ cloneOptions.useReplAuth = true;
+ cloneOptions.snapshot = true;
+ cloneOptions.mayYield = true;
+ cloneOptions.mayBeInterrupted = false;
+
+ Cloner cloner;
+ Status status = cloner.copyDb(txn, db, hostName.c_str(), cloneOptions, NULL);
+
+ if (!status.isOK()) {
+ if (status.code() == ErrorCodes::DatabaseDifferCase) {
+ resyncDrop(txn, db);
+ log() << "resync: database " << db
+ << " not valid on the master due to a name conflict, dropping.";
+ return;
+ } else {
+ log() << "resync of " << db << " from " << hostName
+ << " failed due to: " << status.toString();
+ throw SyncException();
}
}
-
- log() << "resync: done with initial clone for db: " << db << endl;
}
- static DatabaseIgnorer ___databaseIgnorer;
+ log() << "resync: done with initial clone for db: " << db << endl;
+}
- void DatabaseIgnorer::doIgnoreUntilAfter( const string &db, const Timestamp &futureOplogTime ) {
- if ( futureOplogTime > _ignores[ db ] ) {
- _ignores[ db ] = futureOplogTime;
- }
+static DatabaseIgnorer ___databaseIgnorer;
+
+void DatabaseIgnorer::doIgnoreUntilAfter(const string& db, const Timestamp& futureOplogTime) {
+ if (futureOplogTime > _ignores[db]) {
+ _ignores[db] = futureOplogTime;
}
+}
- bool DatabaseIgnorer::ignoreAt( const string &db, const Timestamp &currentOplogTime ) {
- if ( _ignores[ db ].isNull() ) {
- return false;
- }
- if ( _ignores[ db ] >= currentOplogTime ) {
- return true;
- } else {
- // The ignore state has expired, so clear it.
- _ignores.erase( db );
- return false;
- }
+bool DatabaseIgnorer::ignoreAt(const string& db, const Timestamp& currentOplogTime) {
+ if (_ignores[db].isNull()) {
+ return false;
+ }
+ if (_ignores[db] >= currentOplogTime) {
+ return true;
+ } else {
+ // The ignore state has expired, so clear it.
+ _ignores.erase(db);
+ return false;
+ }
+}
+
+bool ReplSource::handleDuplicateDbName(OperationContext* txn,
+ const BSONObj& op,
+ const char* ns,
+ const char* db) {
+ // We are already locked at this point
+ if (dbHolder().get(txn, ns) != NULL) {
+ // Database is already present.
+ return true;
+ }
+ BSONElement ts = op.getField("ts");
+ if ((ts.type() == Date || ts.type() == bsonTimestamp) &&
+ ___databaseIgnorer.ignoreAt(db, ts.timestamp())) {
+ // Database is ignored due to a previous indication that it is
+ // missing from master after optime "ts".
+ return false;
+ }
+ if (Database::duplicateUncasedName(db).empty()) {
+ // No duplicate database names are present.
+ return true;
}
- bool ReplSource::handleDuplicateDbName( OperationContext* txn,
- const BSONObj &op,
- const char* ns,
- const char* db ) {
- // We are already locked at this point
- if (dbHolder().get(txn, ns) != NULL) {
- // Database is already present.
- return true;
- }
- BSONElement ts = op.getField( "ts" );
- if ( ( ts.type() == Date || ts.type() == bsonTimestamp ) && ___databaseIgnorer.ignoreAt( db, ts.timestamp() ) ) {
- // Database is ignored due to a previous indication that it is
- // missing from master after optime "ts".
- return false;
- }
- if (Database::duplicateUncasedName(db).empty()) {
- // No duplicate database names are present.
- return true;
+ Timestamp lastTime;
+ bool dbOk = false;
+ {
+ // This is always a GlobalWrite lock (so no ns/db used from the context)
+ invariant(txn->lockState()->isW());
+ Lock::TempRelease(txn->lockState());
+
+ // We always log an operation after executing it (never before), so
+ // a database list will always be valid as of an oplog entry generated
+ // before it was retrieved.
+
+ BSONObj last =
+ oplogReader.findOne(this->ns().c_str(), Query().sort(BSON("$natural" << -1)));
+ if (!last.isEmpty()) {
+ BSONElement ts = last.getField("ts");
+ massert(14032,
+ "Invalid 'ts' in remote log",
+ ts.type() == Date || ts.type() == bsonTimestamp);
+ lastTime = Timestamp(ts.date());
}
- Timestamp lastTime;
- bool dbOk = false;
- {
- // This is always a GlobalWrite lock (so no ns/db used from the context)
- invariant(txn->lockState()->isW());
- Lock::TempRelease(txn->lockState());
-
- // We always log an operation after executing it (never before), so
- // a database list will always be valid as of an oplog entry generated
- // before it was retrieved.
-
- BSONObj last = oplogReader.findOne( this->ns().c_str(), Query().sort( BSON( "$natural" << -1 ) ) );
- if ( !last.isEmpty() ) {
- BSONElement ts = last.getField( "ts" );
- massert(14032, "Invalid 'ts' in remote log",
- ts.type() == Date || ts.type() == bsonTimestamp);
- lastTime = Timestamp( ts.date() );
- }
-
- BSONObj info;
- bool ok = oplogReader.conn()->runCommand( "admin", BSON( "listDatabases" << 1 ), info );
- massert( 14033, "Unable to get database list", ok );
- BSONObjIterator i( info.getField( "databases" ).embeddedObject() );
- while( i.more() ) {
- BSONElement e = i.next();
-
- const char * name = e.embeddedObject().getField( "name" ).valuestr();
- if ( strcasecmp( name, db ) != 0 )
- continue;
+ BSONObj info;
+ bool ok = oplogReader.conn()->runCommand("admin", BSON("listDatabases" << 1), info);
+ massert(14033, "Unable to get database list", ok);
+ BSONObjIterator i(info.getField("databases").embeddedObject());
+ while (i.more()) {
+ BSONElement e = i.next();
- if ( strcmp( name, db ) == 0 ) {
- // The db exists on master, still need to check that no conflicts exist there.
- dbOk = true;
- continue;
- }
+ const char* name = e.embeddedObject().getField("name").valuestr();
+ if (strcasecmp(name, db) != 0)
+ continue;
- // The master has a db name that conflicts with the requested name.
- dbOk = false;
- break;
+ if (strcmp(name, db) == 0) {
+ // The db exists on master, still need to check that no conflicts exist there.
+ dbOk = true;
+ continue;
}
- }
- if ( !dbOk ) {
- ___databaseIgnorer.doIgnoreUntilAfter( db, lastTime );
- incompleteCloneDbs.erase(db);
- addDbNextPass.erase(db);
- return false;
+ // The master has a db name that conflicts with the requested name.
+ dbOk = false;
+ break;
}
+ }
- // Check for duplicates again, since we released the lock above.
- set< string > duplicates;
- Database::duplicateUncasedName(db, &duplicates);
+ if (!dbOk) {
+ ___databaseIgnorer.doIgnoreUntilAfter(db, lastTime);
+ incompleteCloneDbs.erase(db);
+ addDbNextPass.erase(db);
+ return false;
+ }
- // The database is present on the master and no conflicting databases
- // are present on the master. Drop any local conflicts.
- for( set< string >::const_iterator i = duplicates.begin(); i != duplicates.end(); ++i ) {
- ___databaseIgnorer.doIgnoreUntilAfter( *i, lastTime );
- incompleteCloneDbs.erase(*i);
- addDbNextPass.erase(*i);
+ // Check for duplicates again, since we released the lock above.
+ set<string> duplicates;
+ Database::duplicateUncasedName(db, &duplicates);
- OldClientContext ctx(txn, *i);
- dropDatabase(txn, ctx.db());
- }
+ // The database is present on the master and no conflicting databases
+ // are present on the master. Drop any local conflicts.
+ for (set<string>::const_iterator i = duplicates.begin(); i != duplicates.end(); ++i) {
+ ___databaseIgnorer.doIgnoreUntilAfter(*i, lastTime);
+ incompleteCloneDbs.erase(*i);
+ addDbNextPass.erase(*i);
- massert(14034, "Duplicate database names present after attempting to delete duplicates",
- Database::duplicateUncasedName(db).empty());
- return true;
+ OldClientContext ctx(txn, *i);
+ dropDatabase(txn, ctx.db());
}
- void ReplSource::applyCommand(OperationContext* txn, const BSONObj& op) {
- try {
- Status status = applyCommand_inlock(txn, op);
- if (!status.isOK()) {
- SyncTail sync(nullptr, SyncTail::MultiSyncApplyFunc());
- sync.setHostname(hostName);
- if (sync.shouldRetry(txn, op)) {
- uassert(28639,
- "Failure retrying initial sync update",
- applyCommand_inlock(txn, op).isOK());
- }
+ massert(14034,
+ "Duplicate database names present after attempting to delete duplicates",
+ Database::duplicateUncasedName(db).empty());
+ return true;
+}
+
+void ReplSource::applyCommand(OperationContext* txn, const BSONObj& op) {
+ try {
+ Status status = applyCommand_inlock(txn, op);
+ if (!status.isOK()) {
+ SyncTail sync(nullptr, SyncTail::MultiSyncApplyFunc());
+ sync.setHostname(hostName);
+ if (sync.shouldRetry(txn, op)) {
+ uassert(28639,
+ "Failure retrying initial sync update",
+ applyCommand_inlock(txn, op).isOK());
}
}
- catch ( UserException& e ) {
- log() << "sync: caught user assertion " << e << " while applying op: " << op << endl;;
- }
- catch ( DBException& e ) {
- log() << "sync: caught db exception " << e << " while applying op: " << op << endl;;
- }
-
+ } catch (UserException& e) {
+ log() << "sync: caught user assertion " << e << " while applying op: " << op << endl;
+ ;
+ } catch (DBException& e) {
+ log() << "sync: caught db exception " << e << " while applying op: " << op << endl;
+ ;
}
-
- void ReplSource::applyOperation(OperationContext* txn, Database* db, const BSONObj& op) {
- try {
- Status status = applyOperation_inlock( txn, db, op );
- if (!status.isOK()) {
- SyncTail sync(nullptr, SyncTail::MultiSyncApplyFunc());
- sync.setHostname(hostName);
- if (sync.shouldRetry(txn, op)) {
- uassert(15914,
- "Failure retrying initial sync update",
- applyOperation_inlock(txn, db, op).isOK());
- }
+}
+
+void ReplSource::applyOperation(OperationContext* txn, Database* db, const BSONObj& op) {
+ try {
+ Status status = applyOperation_inlock(txn, db, op);
+ if (!status.isOK()) {
+ SyncTail sync(nullptr, SyncTail::MultiSyncApplyFunc());
+ sync.setHostname(hostName);
+ if (sync.shouldRetry(txn, op)) {
+ uassert(15914,
+ "Failure retrying initial sync update",
+ applyOperation_inlock(txn, db, op).isOK());
}
}
- catch ( UserException& e ) {
- log() << "sync: caught user assertion " << e << " while applying op: " << op << endl;;
- }
- catch ( DBException& e ) {
- log() << "sync: caught db exception " << e << " while applying op: " << op << endl;;
- }
-
+ } catch (UserException& e) {
+ log() << "sync: caught user assertion " << e << " while applying op: " << op << endl;
+ ;
+ } catch (DBException& e) {
+ log() << "sync: caught db exception " << e << " while applying op: " << op << endl;
+ ;
}
+}
- /* local.$oplog.main is of the form:
- { ts: ..., op: <optype>, ns: ..., o: <obj> , o2: <extraobj>, b: <boolflag> }
- ...
- see logOp() comments.
+/* local.$oplog.main is of the form:
+ { ts: ..., op: <optype>, ns: ..., o: <obj> , o2: <extraobj>, b: <boolflag> }
+ ...
+ see logOp() comments.
- @param alreadyLocked caller already put us in write lock if true
- */
- void ReplSource::_sync_pullOpLog_applyOperation(OperationContext* txn, BSONObj& op, bool alreadyLocked) {
- LOG(6) << "processing op: " << op << endl;
-
- if( op.getStringField("op")[0] == 'n' )
- return;
-
- char clientName[MaxDatabaseNameLen];
- const char *ns = op.getStringField("ns");
- nsToDatabase(ns, clientName);
-
- if ( *ns == '.' ) {
- log() << "skipping bad op in oplog: " << op.toString() << endl;
- return;
- }
- else if ( *ns == 0 ) {
- /*if( op.getStringField("op")[0] != 'n' )*/ {
- log() << "halting replication, bad op in oplog:\n " << op.toString() << endl;
- replAllDead = "bad object in oplog";
- throw SyncException();
- }
- //ns = "local.system.x";
- //nsToDatabase(ns, clientName);
+ @param alreadyLocked caller already put us in write lock if true
+*/
+void ReplSource::_sync_pullOpLog_applyOperation(OperationContext* txn,
+ BSONObj& op,
+ bool alreadyLocked) {
+ LOG(6) << "processing op: " << op << endl;
+
+ if (op.getStringField("op")[0] == 'n')
+ return;
+
+ char clientName[MaxDatabaseNameLen];
+ const char* ns = op.getStringField("ns");
+ nsToDatabase(ns, clientName);
+
+ if (*ns == '.') {
+ log() << "skipping bad op in oplog: " << op.toString() << endl;
+ return;
+ } else if (*ns == 0) {
+ /*if( op.getStringField("op")[0] != 'n' )*/ {
+ log() << "halting replication, bad op in oplog:\n " << op.toString() << endl;
+ replAllDead = "bad object in oplog";
+ throw SyncException();
}
+ // ns = "local.system.x";
+ // nsToDatabase(ns, clientName);
+ }
- if ( !only.empty() && only != clientName )
- return;
-
- // Push the CurOp stack for "txn" so each individual oplog entry application is separately
- // reported.
- CurOp individualOp(txn);
- txn->setReplicatedWrites(false);
- const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
- if (replSettings.pretouch &&
- !alreadyLocked/*doesn't make sense if in write lock already*/) {
- if (replSettings.pretouch > 1) {
- /* note: this is bad - should be put in ReplSource. but this is first test... */
- static int countdown;
- verify( countdown >= 0 );
- if( countdown > 0 ) {
- countdown--; // was pretouched on a prev pass
+ if (!only.empty() && only != clientName)
+ return;
+
+ // Push the CurOp stack for "txn" so each individual oplog entry application is separately
+ // reported.
+ CurOp individualOp(txn);
+ txn->setReplicatedWrites(false);
+ const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
+ if (replSettings.pretouch && !alreadyLocked /*doesn't make sense if in write lock already*/) {
+ if (replSettings.pretouch > 1) {
+ /* note: this is bad - should be put in ReplSource. but this is first test... */
+ static int countdown;
+ verify(countdown >= 0);
+ if (countdown > 0) {
+ countdown--; // was pretouched on a prev pass
+ } else {
+ const int m = 4;
+ if (tp.get() == 0) {
+ int nthr = min(8, replSettings.pretouch);
+ nthr = max(nthr, 1);
+ tp.reset(new OldThreadPool(nthr));
}
- else {
- const int m = 4;
- if( tp.get() == 0 ) {
- int nthr = min(8, replSettings.pretouch);
- nthr = max(nthr, 1);
- tp.reset( new OldThreadPool(nthr) );
- }
- vector<BSONObj> v;
- oplogReader.peek(v, replSettings.pretouch);
- unsigned a = 0;
- while( 1 ) {
- if( a >= v.size() ) break;
- unsigned b = a + m - 1; // v[a..b]
- if( b >= v.size() ) b = v.size() - 1;
- tp->schedule(pretouchN, v, a, b);
- DEV cout << "pretouch task: " << a << ".." << b << endl;
- a += m;
- }
- // we do one too...
- pretouchOperation(txn, op);
- tp->join();
- countdown = v.size();
+ vector<BSONObj> v;
+ oplogReader.peek(v, replSettings.pretouch);
+ unsigned a = 0;
+ while (1) {
+ if (a >= v.size())
+ break;
+ unsigned b = a + m - 1; // v[a..b]
+ if (b >= v.size())
+ b = v.size() - 1;
+ tp->schedule(pretouchN, v, a, b);
+ DEV cout << "pretouch task: " << a << ".." << b << endl;
+ a += m;
}
- }
- else {
+ // we do one too...
pretouchOperation(txn, op);
+ tp->join();
+ countdown = v.size();
}
+ } else {
+ pretouchOperation(txn, op);
}
+ }
- unique_ptr<Lock::GlobalWrite> lk(alreadyLocked ? 0 : new Lock::GlobalWrite(txn->lockState()));
+ unique_ptr<Lock::GlobalWrite> lk(alreadyLocked ? 0 : new Lock::GlobalWrite(txn->lockState()));
- if ( replAllDead ) {
- // hmmm why is this check here and not at top of this function? does it get set between top and here?
- log() << "replAllDead, throwing SyncException: " << replAllDead << endl;
- throw SyncException();
- }
+ if (replAllDead) {
+ // hmmm why is this check here and not at top of this function? does it get set between top and here?
+ log() << "replAllDead, throwing SyncException: " << replAllDead << endl;
+ throw SyncException();
+ }
- if (!handleDuplicateDbName(txn, op, ns, clientName)) {
- return;
- }
+ if (!handleDuplicateDbName(txn, op, ns, clientName)) {
+ return;
+ }
- // special case apply for commands to avoid implicit database creation
- if (*op.getStringField("op") == 'c') {
- applyCommand(txn, op);
- return;
- }
+ // special case apply for commands to avoid implicit database creation
+ if (*op.getStringField("op") == 'c') {
+ applyCommand(txn, op);
+ return;
+ }
- // This code executes on the slaves only, so it doesn't need to be sharding-aware since
- // mongos will not send requests there. That's why the last argument is false (do not do
- // version checking).
- OldClientContext ctx(txn, ns, false);
-
- bool empty = !ctx.db()->getDatabaseCatalogEntry()->hasUserData();
- bool incompleteClone = incompleteCloneDbs.count( clientName ) != 0;
-
- LOG(6) << "ns: " << ns << ", justCreated: " << ctx.justCreated() << ", empty: " << empty << ", incompleteClone: " << incompleteClone << endl;
-
- if ( ctx.justCreated() || empty || incompleteClone ) {
- // we must add to incomplete list now that setClient has been called
- incompleteCloneDbs.insert( clientName );
- if ( nClonedThisPass ) {
- /* we only clone one database per pass, even if a lot need done. This helps us
- avoid overflowing the master's transaction log by doing too much work before going
- back to read more transactions. (Imagine a scenario of slave startup where we try to
- clone 100 databases in one pass.)
- */
- addDbNextPass.insert( clientName );
- }
- else {
- if ( incompleteClone ) {
- log() << "An earlier initial clone of '" << clientName << "' did not complete, now resyncing." << endl;
- }
- save(txn);
- OldClientContext ctx(txn, ns);
- nClonedThisPass++;
- resync(txn, ctx.db()->name());
- addDbNextPass.erase(clientName);
- incompleteCloneDbs.erase( clientName );
+ // This code executes on the slaves only, so it doesn't need to be sharding-aware since
+ // mongos will not send requests there. That's why the last argument is false (do not do
+ // version checking).
+ OldClientContext ctx(txn, ns, false);
+
+ bool empty = !ctx.db()->getDatabaseCatalogEntry()->hasUserData();
+ bool incompleteClone = incompleteCloneDbs.count(clientName) != 0;
+
+ LOG(6) << "ns: " << ns << ", justCreated: " << ctx.justCreated() << ", empty: " << empty
+ << ", incompleteClone: " << incompleteClone << endl;
+
+ if (ctx.justCreated() || empty || incompleteClone) {
+ // we must add to incomplete list now that setClient has been called
+ incompleteCloneDbs.insert(clientName);
+ if (nClonedThisPass) {
+ /* we only clone one database per pass, even if a lot need done. This helps us
+ avoid overflowing the master's transaction log by doing too much work before going
+ back to read more transactions. (Imagine a scenario of slave startup where we try to
+ clone 100 databases in one pass.)
+ */
+ addDbNextPass.insert(clientName);
+ } else {
+ if (incompleteClone) {
+ log() << "An earlier initial clone of '" << clientName
+ << "' did not complete, now resyncing." << endl;
}
save(txn);
+ OldClientContext ctx(txn, ns);
+ nClonedThisPass++;
+ resync(txn, ctx.db()->name());
+ addDbNextPass.erase(clientName);
+ incompleteCloneDbs.erase(clientName);
}
- else {
- applyOperation(txn, ctx.db(), op);
- addDbNextPass.erase( clientName );
- }
+ save(txn);
+ } else {
+ applyOperation(txn, ctx.db(), op);
+ addDbNextPass.erase(clientName);
}
+}
- void ReplSource::syncToTailOfRemoteLog() {
- string _ns = ns();
- BSONObjBuilder b;
- if ( !only.empty() ) {
- b.appendRegex("ns", string("^") + pcrecpp::RE::QuoteMeta( only ));
- }
- BSONObj last = oplogReader.findOne( _ns.c_str(), Query( b.done() ).sort( BSON( "$natural" << -1 ) ) );
- if ( !last.isEmpty() ) {
- BSONElement ts = last.getField( "ts" );
- massert(10386, "non Date ts found: " + last.toString(),
- ts.type() == Date || ts.type() == bsonTimestamp);
- syncedTo = Timestamp( ts.date() );
- }
+void ReplSource::syncToTailOfRemoteLog() {
+ string _ns = ns();
+ BSONObjBuilder b;
+ if (!only.empty()) {
+ b.appendRegex("ns", string("^") + pcrecpp::RE::QuoteMeta(only));
+ }
+ BSONObj last = oplogReader.findOne(_ns.c_str(), Query(b.done()).sort(BSON("$natural" << -1)));
+ if (!last.isEmpty()) {
+ BSONElement ts = last.getField("ts");
+ massert(10386,
+ "non Date ts found: " + last.toString(),
+ ts.type() == Date || ts.type() == bsonTimestamp);
+ syncedTo = Timestamp(ts.date());
}
+}
- class ReplApplyBatchSize : public ServerParameter {
- public:
- ReplApplyBatchSize()
- : ServerParameter( ServerParameterSet::getGlobal(), "replApplyBatchSize" ),
- _value( 1 ) {
- }
+class ReplApplyBatchSize : public ServerParameter {
+public:
+ ReplApplyBatchSize()
+ : ServerParameter(ServerParameterSet::getGlobal(), "replApplyBatchSize"), _value(1) {}
- int get() const { return _value; }
+ int get() const {
+ return _value;
+ }
+
+ virtual void append(OperationContext* txn, BSONObjBuilder& b, const string& name) {
+ b.append(name, _value);
+ }
- virtual void append(OperationContext* txn, BSONObjBuilder& b, const string& name) {
- b.append( name, _value );
+ virtual Status set(const BSONElement& newValuElement) {
+ return set(newValuElement.numberInt());
+ }
+
+ virtual Status set(int b) {
+ if (b < 1 || b > 1024) {
+ return Status(ErrorCodes::BadValue, "replApplyBatchSize has to be >= 1 and < 1024");
}
- virtual Status set( const BSONElement& newValuElement ) {
- return set( newValuElement.numberInt() );
+ const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
+ if (replSettings.slavedelay != 0 && b > 1) {
+ return Status(ErrorCodes::BadValue, "can't use a batch size > 1 with slavedelay");
+ }
+ if (!replSettings.slave) {
+ return Status(ErrorCodes::BadValue,
+ "can't set replApplyBatchSize on a non-slave machine");
}
- virtual Status set( int b ) {
- if( b < 1 || b > 1024 ) {
- return Status( ErrorCodes::BadValue,
- "replApplyBatchSize has to be >= 1 and < 1024" );
- }
+ _value = b;
+ return Status::OK();
+ }
- const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
- if ( replSettings.slavedelay != 0 && b > 1 ) {
- return Status( ErrorCodes::BadValue,
- "can't use a batch size > 1 with slavedelay" );
- }
- if ( ! replSettings.slave ) {
- return Status( ErrorCodes::BadValue,
- "can't set replApplyBatchSize on a non-slave machine" );
- }
+ virtual Status setFromString(const string& str) {
+ return set(atoi(str.c_str()));
+ }
- _value = b;
- return Status::OK();
- }
+ int _value;
- virtual Status setFromString( const string& str ) {
- return set( atoi( str.c_str() ) );
- }
+} replApplyBatchSize;
- int _value;
-
- } replApplyBatchSize;
-
- /* slave: pull some data from the master's oplog
- note: not yet in db mutex at this point.
- @return -1 error
- 0 ok, don't sleep
- 1 ok, sleep
- */
- int ReplSource::_sync_pullOpLog(OperationContext* txn, int& nApplied) {
- int okResultCode = 1;
- string ns = string("local.oplog.$") + sourceName();
- LOG(2) << "sync_pullOpLog " << ns << " syncedTo:" << syncedTo.toStringLong() << '\n';
-
- bool tailing = true;
- oplogReader.tailCheck();
-
- bool initial = syncedTo.isNull();
-
- if ( !oplogReader.haveCursor() || initial ) {
- if ( initial ) {
- // Important to grab last oplog timestamp before listing databases.
- syncToTailOfRemoteLog();
- BSONObj info;
- bool ok = oplogReader.conn()->runCommand( "admin", BSON( "listDatabases" << 1 ), info );
- massert( 10389 , "Unable to get database list", ok );
- BSONObjIterator i( info.getField( "databases" ).embeddedObject() );
- while( i.moreWithEOO() ) {
- BSONElement e = i.next();
- if ( e.eoo() )
- break;
- string name = e.embeddedObject().getField( "name" ).valuestr();
- if ( !e.embeddedObject().getBoolField( "empty" ) ) {
- if ( name != "local" ) {
- if ( only.empty() || only == name ) {
- LOG( 2 ) << "adding to 'addDbNextPass': " << name << endl;
- addDbNextPass.insert( name );
- }
+/* slave: pull some data from the master's oplog
+ note: not yet in db mutex at this point.
+ @return -1 error
+ 0 ok, don't sleep
+ 1 ok, sleep
+*/
+int ReplSource::_sync_pullOpLog(OperationContext* txn, int& nApplied) {
+ int okResultCode = 1;
+ string ns = string("local.oplog.$") + sourceName();
+ LOG(2) << "sync_pullOpLog " << ns << " syncedTo:" << syncedTo.toStringLong() << '\n';
+
+ bool tailing = true;
+ oplogReader.tailCheck();
+
+ bool initial = syncedTo.isNull();
+
+ if (!oplogReader.haveCursor() || initial) {
+ if (initial) {
+ // Important to grab last oplog timestamp before listing databases.
+ syncToTailOfRemoteLog();
+ BSONObj info;
+ bool ok = oplogReader.conn()->runCommand("admin", BSON("listDatabases" << 1), info);
+ massert(10389, "Unable to get database list", ok);
+ BSONObjIterator i(info.getField("databases").embeddedObject());
+ while (i.moreWithEOO()) {
+ BSONElement e = i.next();
+ if (e.eoo())
+ break;
+ string name = e.embeddedObject().getField("name").valuestr();
+ if (!e.embeddedObject().getBoolField("empty")) {
+ if (name != "local") {
+ if (only.empty() || only == name) {
+ LOG(2) << "adding to 'addDbNextPass': " << name << endl;
+ addDbNextPass.insert(name);
}
}
}
- // obviously global isn't ideal, but non-repl set is old so
- // keeping it simple
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
- save(txn);
}
+ // obviously global isn't ideal, but non-repl set is old so
+ // keeping it simple
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ save(txn);
+ }
- BSONObjBuilder gte;
- gte.append("$gte", syncedTo);
- BSONObjBuilder query;
- query.append("ts", gte.done());
- if ( !only.empty() ) {
- // note we may here skip a LOT of data table scanning, a lot of work for the master.
- // maybe append "\\." here?
- query.appendRegex("ns", string("^") + pcrecpp::RE::QuoteMeta( only ));
- }
- BSONObj queryObj = query.done();
- // e.g. queryObj = { ts: { $gte: syncedTo } }
+ BSONObjBuilder gte;
+ gte.append("$gte", syncedTo);
+ BSONObjBuilder query;
+ query.append("ts", gte.done());
+ if (!only.empty()) {
+ // note we may here skip a LOT of data table scanning, a lot of work for the master.
+ // maybe append "\\." here?
+ query.appendRegex("ns", string("^") + pcrecpp::RE::QuoteMeta(only));
+ }
+ BSONObj queryObj = query.done();
+ // e.g. queryObj = { ts: { $gte: syncedTo } }
- oplogReader.tailingQuery(ns.c_str(), queryObj);
- tailing = false;
+ oplogReader.tailingQuery(ns.c_str(), queryObj);
+ tailing = false;
+ } else {
+ LOG(2) << "tailing=true\n";
+ }
+
+ if (!oplogReader.haveCursor()) {
+ log() << "dbclient::query returns null (conn closed?)" << endl;
+ oplogReader.resetConnection();
+ return -1;
+ }
+
+ // show any deferred database creates from a previous pass
+ {
+ set<string>::iterator i = addDbNextPass.begin();
+ if (i != addDbNextPass.end()) {
+ BSONObjBuilder b;
+ b.append("ns", *i + '.');
+ b.append("op", "db");
+ BSONObj op = b.done();
+ _sync_pullOpLog_applyOperation(txn, op, false);
+ }
+ }
+
+ if (!oplogReader.more()) {
+ if (tailing) {
+ LOG(2) << "tailing & no new activity\n";
+ okResultCode = 0; // don't sleep
+
+ } else {
+ log() << ns << " oplog is empty" << endl;
}
- else {
- LOG(2) << "tailing=true\n";
+ {
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ save(txn);
}
+ return okResultCode;
+ }
- if( !oplogReader.haveCursor() ) {
- log() << "dbclient::query returns null (conn closed?)" << endl;
- oplogReader.resetConnection();
- return -1;
+ Timestamp nextOpTime;
+ {
+ BSONObj op = oplogReader.next();
+ BSONElement ts = op.getField("ts");
+ if (ts.type() != Date && ts.type() != bsonTimestamp) {
+ string err = op.getStringField("$err");
+ if (!err.empty()) {
+ // 13051 is "tailable cursor requested on non capped collection"
+ if (op.getIntField("code") == 13051) {
+ log() << "trying to slave off of a non-master" << '\n';
+ massert(13344, "trying to slave off of a non-master", false);
+ } else {
+ error() << "$err reading remote oplog: " + err << '\n';
+ massert(10390, "got $err reading remote oplog", false);
+ }
+ } else {
+ error() << "bad object read from remote oplog: " << op.toString() << '\n';
+ massert(10391, "bad object read from remote oplog", false);
+ }
}
- // show any deferred database creates from a previous pass
- {
- set<string>::iterator i = addDbNextPass.begin();
- if ( i != addDbNextPass.end() ) {
- BSONObjBuilder b;
- b.append("ns", *i + '.');
- b.append("op", "db");
- BSONObj op = b.done();
- _sync_pullOpLog_applyOperation(txn, op, false);
+ nextOpTime = Timestamp(ts.date());
+ LOG(2) << "first op time received: " << nextOpTime.toString() << '\n';
+ if (initial) {
+ LOG(1) << "initial run\n";
+ }
+ if (tailing) {
+ if (!(syncedTo < nextOpTime)) {
+ warning() << "ASSERTION failed : syncedTo < nextOpTime" << endl;
+ log() << "syncTo: " << syncedTo.toStringLong() << endl;
+ log() << "nextOpTime: " << nextOpTime.toStringLong() << endl;
+ verify(false);
}
+ oplogReader.putBack(op); // op will be processed in the loop below
+ nextOpTime = Timestamp(); // will reread the op below
+ } else if (nextOpTime != syncedTo) { // didn't get what we queried for - error
+ log() << "nextOpTime " << nextOpTime.toStringLong() << ' '
+ << ((nextOpTime < syncedTo) ? "<??" : ">") << " syncedTo "
+ << syncedTo.toStringLong() << '\n'
+ << "time diff: " << (nextOpTime.getSecs() - syncedTo.getSecs()) << "sec\n"
+ << "tailing: " << tailing << '\n' << "data too stale, halting replication"
+ << endl;
+ replInfo = replAllDead = "data too stale halted replication";
+ verify(syncedTo < nextOpTime);
+ throw SyncException();
+ } else {
+ /* t == syncedTo, so the first op was applied previously or it is the first op of initial query and need not be applied. */
}
+ }
- if ( !oplogReader.more() ) {
- if ( tailing ) {
- LOG(2) << "tailing & no new activity\n";
- okResultCode = 0; // don't sleep
+ // apply operations
+ {
+ int n = 0;
+ time_t saveLast = time(0);
+ while (1) {
+ // we need "&& n" to assure we actually process at least one op to get a sync
+ // point recorded in the first place.
+ const bool moreInitialSyncsPending = !addDbNextPass.empty() && n;
- }
- else {
- log() << ns << " oplog is empty" << endl;
- }
- {
+ if (moreInitialSyncsPending || !oplogReader.more()) {
ScopedTransaction transaction(txn, MODE_X);
Lock::GlobalWrite lk(txn->lockState());
- save(txn);
- }
- return okResultCode;
- }
- Timestamp nextOpTime;
- {
- BSONObj op = oplogReader.next();
- BSONElement ts = op.getField("ts");
- if ( ts.type() != Date && ts.type() != bsonTimestamp ) {
- string err = op.getStringField("$err");
- if ( !err.empty() ) {
- // 13051 is "tailable cursor requested on non capped collection"
- if (op.getIntField("code") == 13051) {
- log() << "trying to slave off of a non-master" << '\n';
- massert( 13344 , "trying to slave off of a non-master", false );
- }
- else {
- error() << "$err reading remote oplog: " + err << '\n';
- massert( 10390 , "got $err reading remote oplog", false );
- }
+ if (tailing) {
+ okResultCode = 0; // don't sleep
}
- else {
- error() << "bad object read from remote oplog: " << op.toString() << '\n';
- massert( 10391 , "bad object read from remote oplog", false);
- }
- }
- nextOpTime = Timestamp( ts.date() );
- LOG(2) << "first op time received: " << nextOpTime.toString() << '\n';
- if ( initial ) {
- LOG(1) << "initial run\n";
- }
- if( tailing ) {
- if( !( syncedTo < nextOpTime ) ) {
- warning() << "ASSERTION failed : syncedTo < nextOpTime" << endl;
- log() << "syncTo: " << syncedTo.toStringLong() << endl;
- log() << "nextOpTime: " << nextOpTime.toStringLong() << endl;
- verify(false);
- }
- oplogReader.putBack( op ); // op will be processed in the loop below
- nextOpTime = Timestamp(); // will reread the op below
- }
- else if ( nextOpTime != syncedTo ) { // didn't get what we queried for - error
- log()
- << "nextOpTime " << nextOpTime.toStringLong() << ' '
- << ((nextOpTime < syncedTo) ? "<??" : ">")
- << " syncedTo " << syncedTo.toStringLong() << '\n'
- << "time diff: " << (nextOpTime.getSecs() - syncedTo.getSecs())
- << "sec\n"
- << "tailing: " << tailing << '\n'
- << "data too stale, halting replication" << endl;
- replInfo = replAllDead = "data too stale halted replication";
- verify( syncedTo < nextOpTime );
- throw SyncException();
- }
- else {
- /* t == syncedTo, so the first op was applied previously or it is the first op of initial query and need not be applied. */
+ syncedTo = nextOpTime;
+ save(txn); // note how far we are synced up to now
+ nApplied = n;
+ break;
}
- }
- // apply operations
- {
- int n = 0;
- time_t saveLast = time(0);
- while ( 1 ) {
- // we need "&& n" to assure we actually process at least one op to get a sync
- // point recorded in the first place.
- const bool moreInitialSyncsPending = !addDbNextPass.empty() && n;
-
- if ( moreInitialSyncsPending || !oplogReader.more() ) {
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
+ OCCASIONALLY if (n > 0 && (n > 100000 || time(0) - saveLast > 60)) {
+ // periodically note our progress, in case we are doing a lot of work and crash
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ syncedTo = nextOpTime;
+ // can't update local log ts since there are pending operations from our peer
+ save(txn);
+ log() << "checkpoint applied " << n << " operations" << endl;
+ log() << "syncedTo: " << syncedTo.toStringLong() << endl;
+ saveLast = time(0);
+ n = 0;
+ }
- if (tailing) {
- okResultCode = 0; // don't sleep
- }
+ BSONObj op = oplogReader.next();
- syncedTo = nextOpTime;
- save(txn); // note how far we are synced up to now
- nApplied = n;
- break;
+ int b = replApplyBatchSize.get();
+ bool justOne = b == 1;
+ unique_ptr<Lock::GlobalWrite> lk(justOne ? 0 : new Lock::GlobalWrite(txn->lockState()));
+ while (1) {
+ BSONElement ts = op.getField("ts");
+ if (!(ts.type() == Date || ts.type() == bsonTimestamp)) {
+ log() << "sync error: problem querying remote oplog record" << endl;
+ log() << "op: " << op.toString() << endl;
+ log() << "halting replication" << endl;
+ replInfo = replAllDead = "sync error: no ts found querying remote oplog record";
+ throw SyncException();
}
-
- OCCASIONALLY if( n > 0 && ( n > 100000 || time(0) - saveLast > 60 ) ) {
- // periodically note our progress, in case we are doing a lot of work and crash
+ Timestamp last = nextOpTime;
+ nextOpTime = Timestamp(ts.date());
+ if (!(last < nextOpTime)) {
+ log() << "sync error: last applied optime at slave >= nextOpTime from master"
+ << endl;
+ log() << " last: " << last.toStringLong() << endl;
+ log() << " nextOpTime: " << nextOpTime.toStringLong() << endl;
+ log() << " halting replication" << endl;
+ replInfo = replAllDead = "sync error last >= nextOpTime";
+ uassert(
+ 10123,
+ "replication error last applied optime at slave >= nextOpTime from master",
+ false);
+ }
+ const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
+ if (replSettings.slavedelay &&
+ (unsigned(time(0)) < nextOpTime.getSecs() + replSettings.slavedelay)) {
+ verify(justOne);
+ oplogReader.putBack(op);
+ _sleepAdviceTime = nextOpTime.getSecs() + replSettings.slavedelay + 1;
ScopedTransaction transaction(txn, MODE_X);
Lock::GlobalWrite lk(txn->lockState());
- syncedTo = nextOpTime;
- // can't update local log ts since there are pending operations from our peer
- save(txn);
- log() << "checkpoint applied " << n << " operations" << endl;
+ if (n > 0) {
+ syncedTo = last;
+ save(txn);
+ }
+ log() << "applied " << n << " operations" << endl;
log() << "syncedTo: " << syncedTo.toStringLong() << endl;
- saveLast = time(0);
- n = 0;
+ log() << "waiting until: " << _sleepAdviceTime << " to continue" << endl;
+ return okResultCode;
}
- BSONObj op = oplogReader.next();
-
- int b = replApplyBatchSize.get();
- bool justOne = b == 1;
- unique_ptr<Lock::GlobalWrite> lk(justOne ? 0 : new Lock::GlobalWrite(txn->lockState()));
- while( 1 ) {
-
- BSONElement ts = op.getField("ts");
- if( !( ts.type() == Date || ts.type() == bsonTimestamp ) ) {
- log() << "sync error: problem querying remote oplog record" << endl;
- log() << "op: " << op.toString() << endl;
- log() << "halting replication" << endl;
- replInfo = replAllDead = "sync error: no ts found querying remote oplog record";
- throw SyncException();
- }
- Timestamp last = nextOpTime;
- nextOpTime = Timestamp( ts.date() );
- if ( !( last < nextOpTime ) ) {
- log() << "sync error: last applied optime at slave >= nextOpTime from master" << endl;
- log() << " last: " << last.toStringLong() << endl;
- log() << " nextOpTime: " << nextOpTime.toStringLong() << endl;
- log() << " halting replication" << endl;
- replInfo = replAllDead = "sync error last >= nextOpTime";
- uassert( 10123 , "replication error last applied optime at slave >= nextOpTime from master", false);
- }
- const ReplSettings& replSettings =
- getGlobalReplicationCoordinator()->getSettings();
- if ( replSettings.slavedelay && ( unsigned( time( 0 ) ) < nextOpTime.getSecs() + replSettings.slavedelay ) ) {
- verify( justOne );
- oplogReader.putBack( op );
- _sleepAdviceTime = nextOpTime.getSecs() + replSettings.slavedelay + 1;
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
- if ( n > 0 ) {
- syncedTo = last;
- save(txn);
- }
- log() << "applied " << n << " operations" << endl;
- log() << "syncedTo: " << syncedTo.toStringLong() << endl;
- log() << "waiting until: " << _sleepAdviceTime << " to continue" << endl;
- return okResultCode;
- }
-
- _sync_pullOpLog_applyOperation(txn, op, !justOne);
- n++;
+ _sync_pullOpLog_applyOperation(txn, op, !justOne);
+ n++;
- if( --b == 0 )
- break;
- // if to here, we are doing mulpile applications in a singel write lock acquisition
- if( !oplogReader.moreInCurrentBatch() ) {
- // break if no more in batch so we release lock while reading from the master
- break;
- }
- op = oplogReader.next();
+ if (--b == 0)
+ break;
+ // if to here, we are doing mulpile applications in a singel write lock acquisition
+ if (!oplogReader.moreInCurrentBatch()) {
+ // break if no more in batch so we release lock while reading from the master
+ break;
}
+ op = oplogReader.next();
}
}
-
- return okResultCode;
}
+ return okResultCode;
+}
- /* note: not yet in mutex at this point.
- returns >= 0 if ok. return -1 if you want to reconnect.
- return value of zero indicates no sleep necessary before next call
- */
- int ReplSource::sync(OperationContext* txn, int& nApplied) {
- _sleepAdviceTime = 0;
- ReplInfo r("sync");
- if (!serverGlobalParams.quiet) {
- LogstreamBuilder l = log();
- l << "syncing from ";
- if( sourceName() != "main" ) {
- l << "source:" << sourceName() << ' ';
- }
- l << "host:" << hostName << endl;
- }
- nClonedThisPass = 0;
-
- // FIXME Handle cases where this db isn't on default port, or default port is spec'd in hostName.
- if ((string("localhost") == hostName || string("127.0.0.1") == hostName) &&
- serverGlobalParams.port == ServerGlobalParams::DefaultDBPort) {
- log() << "can't sync from self (localhost). sources configuration may be wrong." << endl;
- sleepsecs(5);
- return -1;
- }
- if ( !_connect(&oplogReader,
- HostAndPort(hostName),
- getGlobalReplicationCoordinator()->getMyRID()) ) {
- LOG(4) << "can't connect to sync source" << endl;
- return -1;
- }
+/* note: not yet in mutex at this point.
+ returns >= 0 if ok. return -1 if you want to reconnect.
+ return value of zero indicates no sleep necessary before next call
+*/
+int ReplSource::sync(OperationContext* txn, int& nApplied) {
+ _sleepAdviceTime = 0;
+ ReplInfo r("sync");
+ if (!serverGlobalParams.quiet) {
+ LogstreamBuilder l = log();
+ l << "syncing from ";
+ if (sourceName() != "main") {
+ l << "source:" << sourceName() << ' ';
+ }
+ l << "host:" << hostName << endl;
+ }
+ nClonedThisPass = 0;
+
+ // FIXME Handle cases where this db isn't on default port, or default port is spec'd in hostName.
+ if ((string("localhost") == hostName || string("127.0.0.1") == hostName) &&
+ serverGlobalParams.port == ServerGlobalParams::DefaultDBPort) {
+ log() << "can't sync from self (localhost). sources configuration may be wrong." << endl;
+ sleepsecs(5);
+ return -1;
+ }
- return _sync_pullOpLog(txn, nApplied);
+ if (!_connect(
+ &oplogReader, HostAndPort(hostName), getGlobalReplicationCoordinator()->getMyRID())) {
+ LOG(4) << "can't connect to sync source" << endl;
+ return -1;
}
- /* --------------------------------------------------------------*/
+ return _sync_pullOpLog(txn, nApplied);
+}
- static bool _replMainStarted = false;
+/* --------------------------------------------------------------*/
- /*
- TODO:
- _ source has autoptr to the cursor
- _ reuse that cursor when we can
- */
+static bool _replMainStarted = false;
- /* returns: # of seconds to sleep before next pass
- 0 = no sleep recommended
- 1 = special sentinel indicating adaptive sleep recommended
- */
- int _replMain(OperationContext* txn, ReplSource::SourceVector& sources, int& nApplied) {
- {
- ReplInfo r("replMain load sources");
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
- ReplSource::loadAll(txn, sources);
+/*
+TODO:
+_ source has autoptr to the cursor
+_ reuse that cursor when we can
+*/
- // only need this param for initial reset
- _replMainStarted = true;
- }
+/* returns: # of seconds to sleep before next pass
+ 0 = no sleep recommended
+ 1 = special sentinel indicating adaptive sleep recommended
+*/
+int _replMain(OperationContext* txn, ReplSource::SourceVector& sources, int& nApplied) {
+ {
+ ReplInfo r("replMain load sources");
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ ReplSource::loadAll(txn, sources);
- if ( sources.empty() ) {
- /* replication is not configured yet (for --slave) in local.sources. Poll for config it
- every 20 seconds.
- */
- log() << "no source given, add a master to local.sources to start replication" << endl;
- return 20;
- }
+ // only need this param for initial reset
+ _replMainStarted = true;
+ }
- int sleepAdvice = 1;
- for ( ReplSource::SourceVector::iterator i = sources.begin(); i != sources.end(); i++ ) {
- ReplSource *s = i->get();
- int res = -1;
- try {
- res = s->sync(txn, nApplied);
- bool moreToSync = s->haveMoreDbsToSync();
- if( res < 0 ) {
- sleepAdvice = 3;
- }
- else if( moreToSync ) {
- sleepAdvice = 0;
- }
- else if ( s->sleepAdvice() ) {
- sleepAdvice = s->sleepAdvice();
- }
- else
- sleepAdvice = res;
- }
- catch ( const SyncException& ) {
- log() << "caught SyncException" << endl;
- return 10;
- }
- catch ( AssertionException& e ) {
- if ( e.severe() ) {
- log() << "replMain AssertionException " << e.what() << endl;
- return 60;
- }
- else {
- log() << "AssertionException " << e.what() << endl;
- }
- replInfo = "replMain caught AssertionException";
- }
- catch ( const DBException& e ) {
- log() << "DBException " << e.what() << endl;
- replInfo = "replMain caught DBException";
- }
- catch ( const std::exception &e ) {
- log() << "std::exception " << e.what() << endl;
- replInfo = "replMain caught std::exception";
- }
- catch ( ... ) {
- log() << "unexpected exception during replication. replication will halt" << endl;
- replAllDead = "caught unexpected exception during replication";
- }
- if ( res < 0 )
- s->oplogReader.resetConnection();
- }
- return sleepAdvice;
+ if (sources.empty()) {
+ /* replication is not configured yet (for --slave) in local.sources. Poll for config it
+ every 20 seconds.
+ */
+ log() << "no source given, add a master to local.sources to start replication" << endl;
+ return 20;
}
- static void replMain(OperationContext* txn) {
- ReplSource::SourceVector sources;
- while ( 1 ) {
- int s = 0;
- {
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
- if ( replAllDead ) {
- // throttledForceResyncDead can throw
- if ( !getGlobalReplicationCoordinator()->getSettings().autoresync ||
- !ReplSource::throttledForceResyncDead( txn, "auto" ) ) {
- log() << "all sources dead: " << replAllDead << ", sleeping for 5 seconds" << endl;
- break;
- }
- }
- verify( syncing == 0 ); // i.e., there is only one sync thread running. we will want to change/fix this.
- syncing++;
+ int sleepAdvice = 1;
+ for (ReplSource::SourceVector::iterator i = sources.begin(); i != sources.end(); i++) {
+ ReplSource* s = i->get();
+ int res = -1;
+ try {
+ res = s->sync(txn, nApplied);
+ bool moreToSync = s->haveMoreDbsToSync();
+ if (res < 0) {
+ sleepAdvice = 3;
+ } else if (moreToSync) {
+ sleepAdvice = 0;
+ } else if (s->sleepAdvice()) {
+ sleepAdvice = s->sleepAdvice();
+ } else
+ sleepAdvice = res;
+ } catch (const SyncException&) {
+ log() << "caught SyncException" << endl;
+ return 10;
+ } catch (AssertionException& e) {
+ if (e.severe()) {
+ log() << "replMain AssertionException " << e.what() << endl;
+ return 60;
+ } else {
+ log() << "AssertionException " << e.what() << endl;
}
+ replInfo = "replMain caught AssertionException";
+ } catch (const DBException& e) {
+ log() << "DBException " << e.what() << endl;
+ replInfo = "replMain caught DBException";
+ } catch (const std::exception& e) {
+ log() << "std::exception " << e.what() << endl;
+ replInfo = "replMain caught std::exception";
+ } catch (...) {
+ log() << "unexpected exception during replication. replication will halt" << endl;
+ replAllDead = "caught unexpected exception during replication";
+ }
+ if (res < 0)
+ s->oplogReader.resetConnection();
+ }
+ return sleepAdvice;
+}
- try {
- int nApplied = 0;
- s = _replMain(txn, sources, nApplied);
- if( s == 1 ) {
- if( nApplied == 0 ) s = 2;
- else if( nApplied > 100 ) {
- // sleep very little - just enough that we aren't truly hammering master
- sleepmillis(75);
- s = 0;
- }
+static void replMain(OperationContext* txn) {
+ ReplSource::SourceVector sources;
+ while (1) {
+ int s = 0;
+ {
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ if (replAllDead) {
+ // throttledForceResyncDead can throw
+ if (!getGlobalReplicationCoordinator()->getSettings().autoresync ||
+ !ReplSource::throttledForceResyncDead(txn, "auto")) {
+ log() << "all sources dead: " << replAllDead << ", sleeping for 5 seconds"
+ << endl;
+ break;
}
}
- catch (...) {
- log() << "caught exception in _replMain" << endl;
- s = 4;
- }
-
- {
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
- verify( syncing == 1 );
- syncing--;
- }
-
- if( relinquishSyncingSome ) {
- relinquishSyncingSome = 0;
- s = 1; // sleep before going back in to syncing=1
- }
+ verify(
+ syncing ==
+ 0); // i.e., there is only one sync thread running. we will want to change/fix this.
+ syncing++;
+ }
- if ( s ) {
- stringstream ss;
- ss << "sleep " << s << " sec before next pass";
- string msg = ss.str();
- if (!serverGlobalParams.quiet)
- log() << msg << endl;
- ReplInfo r(msg.c_str());
- sleepsecs(s);
+ try {
+ int nApplied = 0;
+ s = _replMain(txn, sources, nApplied);
+ if (s == 1) {
+ if (nApplied == 0)
+ s = 2;
+ else if (nApplied > 100) {
+ // sleep very little - just enough that we aren't truly hammering master
+ sleepmillis(75);
+ s = 0;
+ }
}
+ } catch (...) {
+ log() << "caught exception in _replMain" << endl;
+ s = 4;
}
- }
-
- static void replMasterThread() {
- sleepsecs(4);
- Client::initThread("replmaster");
- int toSleep = 10;
- while( 1 ) {
- sleepsecs(toSleep);
- // Write a keep-alive like entry to the log. This will make things like
- // printReplicationStatus() and printSlaveReplicationStatus() stay up-to-date even
- // when things are idle.
- OperationContextImpl txn;
- AuthorizationSession::get(txn.getClient())->grantInternalAuthorization();
+ {
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ verify(syncing == 1);
+ syncing--;
+ }
- Lock::GlobalWrite globalWrite(txn.lockState(), 1);
- if (globalWrite.isLocked()) {
- toSleep = 10;
+ if (relinquishSyncingSome) {
+ relinquishSyncingSome = 0;
+ s = 1; // sleep before going back in to syncing=1
+ }
- try {
- WriteUnitOfWork wuow(&txn);
- getGlobalServiceContext()->getOpObserver()->onOpMessage(&txn, BSONObj());
- wuow.commit();
- }
- catch (...) {
- log() << "caught exception in replMasterThread()" << endl;
- }
- }
- else {
- LOG(5) << "couldn't logKeepalive" << endl;
- toSleep = 1;
- }
+ if (s) {
+ stringstream ss;
+ ss << "sleep " << s << " sec before next pass";
+ string msg = ss.str();
+ if (!serverGlobalParams.quiet)
+ log() << msg << endl;
+ ReplInfo r(msg.c_str());
+ sleepsecs(s);
}
}
-
- static void replSlaveThread() {
- sleepsecs(1);
- Client::initThread("replslave");
-
+}
+
+static void replMasterThread() {
+ sleepsecs(4);
+ Client::initThread("replmaster");
+ int toSleep = 10;
+ while (1) {
+ sleepsecs(toSleep);
+
+ // Write a keep-alive like entry to the log. This will make things like
+ // printReplicationStatus() and printSlaveReplicationStatus() stay up-to-date even
+ // when things are idle.
OperationContextImpl txn;
AuthorizationSession::get(txn.getClient())->grantInternalAuthorization();
- DisableDocumentValidation validationDisabler(&txn);
- while ( 1 ) {
+ Lock::GlobalWrite globalWrite(txn.lockState(), 1);
+ if (globalWrite.isLocked()) {
+ toSleep = 10;
+
try {
- replMain(&txn);
- sleepsecs(5);
- }
- catch ( AssertionException& ) {
- ReplInfo r("Assertion in replSlaveThread(): sleeping 5 minutes before retry");
- log() << "Assertion in replSlaveThread(): sleeping 5 minutes before retry" << endl;
- sleepsecs(300);
- }
- catch ( DBException& e ) {
- log() << "exception in replSlaveThread(): " << e.what()
- << ", sleeping 5 minutes before retry" << endl;
- sleepsecs(300);
- }
- catch ( ... ) {
- log() << "error in replSlaveThread(): sleeping 5 minutes before retry" << endl;
- sleepsecs(300);
+ WriteUnitOfWork wuow(&txn);
+ getGlobalServiceContext()->getOpObserver()->onOpMessage(&txn, BSONObj());
+ wuow.commit();
+ } catch (...) {
+ log() << "caught exception in replMasterThread()" << endl;
}
+ } else {
+ LOG(5) << "couldn't logKeepalive" << endl;
+ toSleep = 1;
}
}
+}
- void startMasterSlave(OperationContext* txn) {
-
- const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
- if( !replSettings.slave && !replSettings.master )
- return;
+static void replSlaveThread() {
+ sleepsecs(1);
+ Client::initThread("replslave");
- AuthorizationSession::get(txn->getClient())->grantInternalAuthorization();
+ OperationContextImpl txn;
+ AuthorizationSession::get(txn.getClient())->grantInternalAuthorization();
+ DisableDocumentValidation validationDisabler(&txn);
- {
- ReplSource temp(txn); // Ensures local.me is populated
+ while (1) {
+ try {
+ replMain(&txn);
+ sleepsecs(5);
+ } catch (AssertionException&) {
+ ReplInfo r("Assertion in replSlaveThread(): sleeping 5 minutes before retry");
+ log() << "Assertion in replSlaveThread(): sleeping 5 minutes before retry" << endl;
+ sleepsecs(300);
+ } catch (DBException& e) {
+ log() << "exception in replSlaveThread(): " << e.what()
+ << ", sleeping 5 minutes before retry" << endl;
+ sleepsecs(300);
+ } catch (...) {
+ log() << "error in replSlaveThread(): sleeping 5 minutes before retry" << endl;
+ sleepsecs(300);
}
+ }
+}
- if ( replSettings.slave ) {
- verify( replSettings.slave == SimpleSlave );
- LOG(1) << "slave=true" << endl;
- stdx::thread repl_thread(replSlaveThread);
- }
+void startMasterSlave(OperationContext* txn) {
+ const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
+ if (!replSettings.slave && !replSettings.master)
+ return;
- if ( replSettings.master ) {
- LOG(1) << "master=true" << endl;
- createOplog(txn);
- stdx::thread t(replMasterThread);
- }
+ AuthorizationSession::get(txn->getClient())->grantInternalAuthorization();
- if (replSettings.fastsync) {
- while(!_replMainStarted) // don't allow writes until we've set up from log
- sleepmillis( 50 );
- }
+ {
+ ReplSource temp(txn); // Ensures local.me is populated
}
- int _dummy_z;
-
- void pretouchN(vector<BSONObj>& v, unsigned a, unsigned b) {
- Client::initThreadIfNotAlready("pretouchN");
- OperationContextImpl txn; // XXX
- ScopedTransaction transaction(&txn, MODE_S);
- Lock::GlobalRead lk(txn.lockState());
+ if (replSettings.slave) {
+ verify(replSettings.slave == SimpleSlave);
+ LOG(1) << "slave=true" << endl;
+ stdx::thread repl_thread(replSlaveThread);
+ }
- for( unsigned i = a; i <= b; i++ ) {
- const BSONObj& op = v[i];
- const char *which = "o";
- const char *opType = op.getStringField("op");
- if ( *opType == 'i' )
- ;
- else if( *opType == 'u' )
- which = "o2";
- else
- continue;
- /* todo : other operations */
+ if (replSettings.master) {
+ LOG(1) << "master=true" << endl;
+ createOplog(txn);
+ stdx::thread t(replMasterThread);
+ }
- try {
- BSONObj o = op.getObjectField(which);
- BSONElement _id;
- if( o.getObjectID(_id) ) {
- const char *ns = op.getStringField("ns");
- BSONObjBuilder b;
- b.append(_id);
- BSONObj result;
- OldClientContext ctx(&txn, ns);
- if( Helpers::findById(&txn, ctx.db(), ns, b.done(), result) )
- _dummy_z += result.objsize(); // touch
- }
- }
- catch( DBException& e ) {
- log() << "ignoring assertion in pretouchN() " << a << ' ' << b << ' ' << i << ' ' << e.toString() << endl;
- }
- }
+ if (replSettings.fastsync) {
+ while (!_replMainStarted) // don't allow writes until we've set up from log
+ sleepmillis(50);
}
+}
+int _dummy_z;
- void pretouchOperation(OperationContext* txn, const BSONObj& op) {
+void pretouchN(vector<BSONObj>& v, unsigned a, unsigned b) {
+ Client::initThreadIfNotAlready("pretouchN");
- if (txn->lockState()->isWriteLocked()) {
- return; // no point pretouching if write locked. not sure if this will ever fire, but just in case.
- }
+ OperationContextImpl txn; // XXX
+ ScopedTransaction transaction(&txn, MODE_S);
+ Lock::GlobalRead lk(txn.lockState());
- const char *which = "o";
- const char *opType = op.getStringField("op");
- if ( *opType == 'i' )
+ for (unsigned i = a; i <= b; i++) {
+ const BSONObj& op = v[i];
+ const char* which = "o";
+ const char* opType = op.getStringField("op");
+ if (*opType == 'i')
;
- else if( *opType == 'u' )
+ else if (*opType == 'u')
which = "o2";
else
- return;
+ continue;
/* todo : other operations */
try {
BSONObj o = op.getObjectField(which);
BSONElement _id;
- if( o.getObjectID(_id) ) {
- const char *ns = op.getStringField("ns");
+ if (o.getObjectID(_id)) {
+ const char* ns = op.getStringField("ns");
BSONObjBuilder b;
b.append(_id);
BSONObj result;
- AutoGetCollectionForRead ctx(txn, ns );
- if (Helpers::findById(txn, ctx.getDb(), ns, b.done(), result)) {
- _dummy_z += result.objsize(); // touch
- }
+ OldClientContext ctx(&txn, ns);
+ if (Helpers::findById(&txn, ctx.db(), ns, b.done(), result))
+ _dummy_z += result.objsize(); // touch
}
+ } catch (DBException& e) {
+ log() << "ignoring assertion in pretouchN() " << a << ' ' << b << ' ' << i << ' '
+ << e.toString() << endl;
}
- catch( DBException& ) {
- log() << "ignoring assertion in pretouchOperation()" << endl;
+ }
+}
+
+void pretouchOperation(OperationContext* txn, const BSONObj& op) {
+ if (txn->lockState()->isWriteLocked()) {
+ return; // no point pretouching if write locked. not sure if this will ever fire, but just in case.
+ }
+
+ const char* which = "o";
+ const char* opType = op.getStringField("op");
+ if (*opType == 'i')
+ ;
+ else if (*opType == 'u')
+ which = "o2";
+ else
+ return;
+ /* todo : other operations */
+
+ try {
+ BSONObj o = op.getObjectField(which);
+ BSONElement _id;
+ if (o.getObjectID(_id)) {
+ const char* ns = op.getStringField("ns");
+ BSONObjBuilder b;
+ b.append(_id);
+ BSONObj result;
+ AutoGetCollectionForRead ctx(txn, ns);
+ if (Helpers::findById(txn, ctx.getDb(), ns, b.done(), result)) {
+ _dummy_z += result.objsize(); // touch
+ }
}
+ } catch (DBException&) {
+ log() << "ignoring assertion in pretouchOperation()" << endl;
}
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/master_slave.h b/src/mongo/db/repl/master_slave.h
index 89a73ddfb90..d290be23de3 100644
--- a/src/mongo/db/repl/master_slave.h
+++ b/src/mongo/db/repl/master_slave.h
@@ -42,152 +42,163 @@
namespace mongo {
- class Database;
- class OldThreadPool;
- class OperationContext;
+class Database;
+class OldThreadPool;
+class OperationContext;
namespace repl {
- // Main entry point for master/slave at startup time.
- void startMasterSlave(OperationContext* txn);
+// Main entry point for master/slave at startup time.
+void startMasterSlave(OperationContext* txn);
- // externed for use with resync.cpp
- extern volatile int relinquishSyncingSome;
- extern volatile int syncing;
+// externed for use with resync.cpp
+extern volatile int relinquishSyncingSome;
+extern volatile int syncing;
- extern const char *replInfo;
+extern const char* replInfo;
- /* A replication exception */
- class SyncException : public DBException {
- public:
- SyncException() : DBException( "sync exception" , 10001 ) {}
- };
+/* A replication exception */
+class SyncException : public DBException {
+public:
+ SyncException() : DBException("sync exception", 10001) {}
+};
- /* A Source is a source from which we can pull (replicate) data.
- stored in collection local.sources.
+/* A Source is a source from which we can pull (replicate) data.
+ stored in collection local.sources.
- Can be a group of things to replicate for several databases.
+ Can be a group of things to replicate for several databases.
- { host: ..., source: ..., only: ..., syncedTo: ..., dbsNextPass: { ... }, incompleteCloneDbs: { ... } }
+ { host: ..., source: ..., only: ..., syncedTo: ..., dbsNextPass: { ... }, incompleteCloneDbs: { ... } }
- 'source' defaults to 'main'; support for multiple source names is
- not done (always use main for now).
+ 'source' defaults to 'main'; support for multiple source names is
+ not done (always use main for now).
+*/
+class ReplSource {
+ std::shared_ptr<OldThreadPool> tp;
+
+ void resync(OperationContext* txn, const std::string& dbName);
+
+ /** @param alreadyLocked caller already put us in write lock if true */
+ void _sync_pullOpLog_applyOperation(OperationContext* txn, BSONObj& op, bool alreadyLocked);
+
+ /* pull some operations from the master's oplog, and apply them.
+ calls sync_pullOpLog_applyOperation
*/
- class ReplSource {
- std::shared_ptr<OldThreadPool> tp;
-
- void resync(OperationContext* txn, const std::string& dbName);
-
- /** @param alreadyLocked caller already put us in write lock if true */
- void _sync_pullOpLog_applyOperation(OperationContext* txn, BSONObj& op, bool alreadyLocked);
-
- /* pull some operations from the master's oplog, and apply them.
- calls sync_pullOpLog_applyOperation
- */
- int _sync_pullOpLog(OperationContext* txn, int& nApplied);
-
- /* we only clone one database per pass, even if a lot need done. This helps us
- avoid overflowing the master's transaction log by doing too much work before going
- back to read more transactions. (Imagine a scenario of slave startup where we try to
- clone 100 databases in one pass.)
- */
- std::set<std::string> addDbNextPass;
-
- std::set<std::string> incompleteCloneDbs;
-
- /// TODO(spencer): Remove this once the LegacyReplicationCoordinator is gone.
- BSONObj _me;
-
- void resyncDrop( OperationContext* txn, const std::string& db );
- // call without the db mutex
- void syncToTailOfRemoteLog();
- std::string ns() const { return std::string( "local.oplog.$" ) + sourceName(); }
- unsigned _sleepAdviceTime;
-
- /**
- * If 'db' is a new database and its name would conflict with that of
- * an existing database, synchronize these database names with the
- * master.
- * @return true iff an op with the specified ns may be applied.
- */
- bool handleDuplicateDbName( OperationContext* txn,
- const BSONObj &op,
- const char* ns,
- const char* db );
-
- // populates _me so that it can be passed to oplogreader for handshakes
- /// TODO(spencer): Remove this function once the LegacyReplicationCoordinator is gone.
- void ensureMe(OperationContext* txn);
-
- void forceResync(OperationContext* txn, const char *requester);
-
- bool _connect(OplogReader* reader, const HostAndPort& host, const OID& myRID);
- public:
- OplogReader oplogReader;
-
- void applyCommand(OperationContext* txn, const BSONObj& op);
- void applyOperation(OperationContext* txn, Database* db, const BSONObj& op);
- std::string hostName; // ip addr or hostname plus optionally, ":<port>"
- std::string _sourceName; // a logical source name.
- std::string sourceName() const { return _sourceName.empty() ? "main" : _sourceName; }
- std::string only; // only a certain db. note that in the sources collection, this may not be changed once you start replicating.
-
- /* the last time point we have already synced up to (in the remote/master's oplog). */
- Timestamp syncedTo;
-
- int nClonedThisPass;
-
- typedef std::vector< std::shared_ptr< ReplSource > > SourceVector;
- static void loadAll(OperationContext* txn, SourceVector&);
-
- explicit ReplSource(OperationContext* txn, BSONObj);
- // This is not the constructor you are looking for. Always prefer the version that takes
- // a BSONObj. This is public only as a hack so that the ReplicationCoordinator can find
- // out the process's RID in master/slave setups.
- ReplSource(OperationContext* txn);
-
- /* -1 = error */
- int sync(OperationContext* txn, int& nApplied);
-
- void save(OperationContext* txn); // write ourself to local.sources
-
- // make a jsobj from our member fields of the form
- // { host: ..., source: ..., syncedTo: ... }
- BSONObj jsobj();
-
- bool operator==(const ReplSource&r) const {
- return hostName == r.hostName && sourceName() == r.sourceName();
- }
- std::string toString() const { return sourceName() + "@" + hostName; }
-
- bool haveMoreDbsToSync() const { return !addDbNextPass.empty(); }
- int sleepAdvice() const {
- if ( !_sleepAdviceTime )
- return 0;
- int wait = _sleepAdviceTime - unsigned( time( 0 ) );
- return wait > 0 ? wait : 0;
- }
-
- static bool throttledForceResyncDead( OperationContext* txn, const char *requester );
- static void forceResyncDead( OperationContext* txn, const char *requester );
- };
+ int _sync_pullOpLog(OperationContext* txn, int& nApplied);
+
+ /* we only clone one database per pass, even if a lot need done. This helps us
+ avoid overflowing the master's transaction log by doing too much work before going
+ back to read more transactions. (Imagine a scenario of slave startup where we try to
+ clone 100 databases in one pass.)
+ */
+ std::set<std::string> addDbNextPass;
+
+ std::set<std::string> incompleteCloneDbs;
+
+ /// TODO(spencer): Remove this once the LegacyReplicationCoordinator is gone.
+ BSONObj _me;
+
+ void resyncDrop(OperationContext* txn, const std::string& db);
+ // call without the db mutex
+ void syncToTailOfRemoteLog();
+ std::string ns() const {
+ return std::string("local.oplog.$") + sourceName();
+ }
+ unsigned _sleepAdviceTime;
/**
- * Helper class used to set and query an ignore state for a named database.
- * The ignore state will expire after a specified Timestamp.
+ * If 'db' is a new database and its name would conflict with that of
+ * an existing database, synchronize these database names with the
+ * master.
+ * @return true iff an op with the specified ns may be applied.
*/
- class DatabaseIgnorer {
- public:
- /** Indicate that operations for 'db' should be ignored until after 'futureOplogTime' */
- void doIgnoreUntilAfter( const std::string &db, const Timestamp &futureOplogTime );
- /**
- * Query ignore state of 'db'; if 'currentOplogTime' is after the ignore
- * limit, the ignore state will be cleared.
- */
- bool ignoreAt( const std::string &db, const Timestamp &currentOplogTime );
- private:
- std::map< std::string, Timestamp > _ignores;
- };
-
-} // namespace repl
-} // namespace mongo
+ bool handleDuplicateDbName(OperationContext* txn,
+ const BSONObj& op,
+ const char* ns,
+ const char* db);
+
+ // populates _me so that it can be passed to oplogreader for handshakes
+ /// TODO(spencer): Remove this function once the LegacyReplicationCoordinator is gone.
+ void ensureMe(OperationContext* txn);
+
+ void forceResync(OperationContext* txn, const char* requester);
+
+ bool _connect(OplogReader* reader, const HostAndPort& host, const OID& myRID);
+
+public:
+ OplogReader oplogReader;
+
+ void applyCommand(OperationContext* txn, const BSONObj& op);
+ void applyOperation(OperationContext* txn, Database* db, const BSONObj& op);
+ std::string hostName; // ip addr or hostname plus optionally, ":<port>"
+ std::string _sourceName; // a logical source name.
+ std::string sourceName() const {
+ return _sourceName.empty() ? "main" : _sourceName;
+ }
+ std::string
+ only; // only a certain db. note that in the sources collection, this may not be changed once you start replicating.
+
+ /* the last time point we have already synced up to (in the remote/master's oplog). */
+ Timestamp syncedTo;
+
+ int nClonedThisPass;
+
+ typedef std::vector<std::shared_ptr<ReplSource>> SourceVector;
+ static void loadAll(OperationContext* txn, SourceVector&);
+
+ explicit ReplSource(OperationContext* txn, BSONObj);
+ // This is not the constructor you are looking for. Always prefer the version that takes
+ // a BSONObj. This is public only as a hack so that the ReplicationCoordinator can find
+ // out the process's RID in master/slave setups.
+ ReplSource(OperationContext* txn);
+
+ /* -1 = error */
+ int sync(OperationContext* txn, int& nApplied);
+
+ void save(OperationContext* txn); // write ourself to local.sources
+
+ // make a jsobj from our member fields of the form
+ // { host: ..., source: ..., syncedTo: ... }
+ BSONObj jsobj();
+
+ bool operator==(const ReplSource& r) const {
+ return hostName == r.hostName && sourceName() == r.sourceName();
+ }
+ std::string toString() const {
+ return sourceName() + "@" + hostName;
+ }
+
+ bool haveMoreDbsToSync() const {
+ return !addDbNextPass.empty();
+ }
+ int sleepAdvice() const {
+ if (!_sleepAdviceTime)
+ return 0;
+ int wait = _sleepAdviceTime - unsigned(time(0));
+ return wait > 0 ? wait : 0;
+ }
+
+ static bool throttledForceResyncDead(OperationContext* txn, const char* requester);
+ static void forceResyncDead(OperationContext* txn, const char* requester);
+};
+
+/**
+ * Helper class used to set and query an ignore state for a named database.
+ * The ignore state will expire after a specified Timestamp.
+ */
+class DatabaseIgnorer {
+public:
+ /** Indicate that operations for 'db' should be ignored until after 'futureOplogTime' */
+ void doIgnoreUntilAfter(const std::string& db, const Timestamp& futureOplogTime);
+ /**
+ * Query ignore state of 'db'; if 'currentOplogTime' is after the ignore
+ * limit, the ignore state will be cleared.
+ */
+ bool ignoreAt(const std::string& db, const Timestamp& currentOplogTime);
+
+private:
+ std::map<std::string, Timestamp> _ignores;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/member_config.cpp b/src/mongo/db/repl/member_config.cpp
index 02711adedd3..a6f8e311928 100644
--- a/src/mongo/db/repl/member_config.cpp
+++ b/src/mongo/db/repl/member_config.cpp
@@ -40,280 +40,268 @@
namespace mongo {
namespace repl {
- const std::string MemberConfig::kIdFieldName = "_id";
- const std::string MemberConfig::kVotesFieldName = "votes";
- const std::string MemberConfig::kPriorityFieldName = "priority";
- const std::string MemberConfig::kHostFieldName = "host";
- const std::string MemberConfig::kHiddenFieldName = "hidden";
- const std::string MemberConfig::kSlaveDelayFieldName = "slaveDelay";
- const std::string MemberConfig::kArbiterOnlyFieldName = "arbiterOnly";
- const std::string MemberConfig::kBuildIndexesFieldName = "buildIndexes";
- const std::string MemberConfig::kTagsFieldName = "tags";
- const std::string MemberConfig::kInternalVoterTagName = "$voter";
- const std::string MemberConfig::kInternalElectableTagName = "$electable";
- const std::string MemberConfig::kInternalAllTagName = "$all";
+const std::string MemberConfig::kIdFieldName = "_id";
+const std::string MemberConfig::kVotesFieldName = "votes";
+const std::string MemberConfig::kPriorityFieldName = "priority";
+const std::string MemberConfig::kHostFieldName = "host";
+const std::string MemberConfig::kHiddenFieldName = "hidden";
+const std::string MemberConfig::kSlaveDelayFieldName = "slaveDelay";
+const std::string MemberConfig::kArbiterOnlyFieldName = "arbiterOnly";
+const std::string MemberConfig::kBuildIndexesFieldName = "buildIndexes";
+const std::string MemberConfig::kTagsFieldName = "tags";
+const std::string MemberConfig::kInternalVoterTagName = "$voter";
+const std::string MemberConfig::kInternalElectableTagName = "$electable";
+const std::string MemberConfig::kInternalAllTagName = "$all";
namespace {
- const std::string kLegalMemberConfigFieldNames[] = {
- MemberConfig::kIdFieldName,
- MemberConfig::kVotesFieldName,
- MemberConfig::kPriorityFieldName,
- MemberConfig::kHostFieldName,
- MemberConfig::kHiddenFieldName,
- MemberConfig::kSlaveDelayFieldName,
- MemberConfig::kArbiterOnlyFieldName,
- MemberConfig::kBuildIndexesFieldName,
- MemberConfig::kTagsFieldName
- };
-
- const int kVotesFieldDefault = 1;
- const double kPriorityFieldDefault = 1.0;
- const Seconds kSlaveDelayFieldDefault(0);
- const bool kArbiterOnlyFieldDefault = false;
- const bool kHiddenFieldDefault = false;
- const bool kBuildIndexesFieldDefault = true;
-
- const Seconds kMaxSlaveDelay(3600 * 24 * 366);
+const std::string kLegalMemberConfigFieldNames[] = {MemberConfig::kIdFieldName,
+ MemberConfig::kVotesFieldName,
+ MemberConfig::kPriorityFieldName,
+ MemberConfig::kHostFieldName,
+ MemberConfig::kHiddenFieldName,
+ MemberConfig::kSlaveDelayFieldName,
+ MemberConfig::kArbiterOnlyFieldName,
+ MemberConfig::kBuildIndexesFieldName,
+ MemberConfig::kTagsFieldName};
+
+const int kVotesFieldDefault = 1;
+const double kPriorityFieldDefault = 1.0;
+const Seconds kSlaveDelayFieldDefault(0);
+const bool kArbiterOnlyFieldDefault = false;
+const bool kHiddenFieldDefault = false;
+const bool kBuildIndexesFieldDefault = true;
+
+const Seconds kMaxSlaveDelay(3600 * 24 * 366);
} // namespace
- Status MemberConfig::initialize(const BSONObj& mcfg, ReplicaSetTagConfig* tagConfig) {
- Status status = bsonCheckOnlyHasFields(
- "replica set member configuration", mcfg, kLegalMemberConfigFieldNames);
- if (!status.isOK())
- return status;
-
- //
- // Parse _id field.
- //
- BSONElement idElement = mcfg[kIdFieldName];
- if (idElement.eoo()) {
- return Status(ErrorCodes::NoSuchKey, str::stream() << kIdFieldName <<
- " field is missing");
- }
- if (!idElement.isNumber()) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << kIdFieldName <<
- " field has non-numeric type " << typeName(idElement.type()));
- }
- _id = idElement.numberInt();
-
- //
- // Parse h field.
- //
- std::string hostAndPortString;
- status = bsonExtractStringField(mcfg, kHostFieldName, &hostAndPortString);
- if (!status.isOK())
- return status;
- boost::trim(hostAndPortString);
- status = _host.initialize(hostAndPortString);
- if (!status.isOK())
- return status;
- if (!_host.hasPort()) {
- // make port explicit even if default.
- _host = HostAndPort(_host.host(), _host.port());
- }
+Status MemberConfig::initialize(const BSONObj& mcfg, ReplicaSetTagConfig* tagConfig) {
+ Status status = bsonCheckOnlyHasFields(
+ "replica set member configuration", mcfg, kLegalMemberConfigFieldNames);
+ if (!status.isOK())
+ return status;
- //
- // Parse votes field.
- //
- BSONElement votesElement = mcfg[kVotesFieldName];
- if (votesElement.eoo()) {
- _votes = kVotesFieldDefault;
- }
- else if (votesElement.isNumber()) {
- _votes = votesElement.numberInt();
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << kVotesFieldName <<
- " field value has non-numeric type " <<
- typeName(votesElement.type()));
- }
+ //
+ // Parse _id field.
+ //
+ BSONElement idElement = mcfg[kIdFieldName];
+ if (idElement.eoo()) {
+ return Status(ErrorCodes::NoSuchKey, str::stream() << kIdFieldName << " field is missing");
+ }
+ if (!idElement.isNumber()) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << kIdFieldName << " field has non-numeric type "
+ << typeName(idElement.type()));
+ }
+ _id = idElement.numberInt();
- //
- // Parse priority field.
- //
- BSONElement priorityElement = mcfg[kPriorityFieldName];
- if (priorityElement.eoo()) {
- _priority = kPriorityFieldDefault;
- }
- else if (priorityElement.isNumber()) {
- _priority = priorityElement.numberDouble();
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << kPriorityFieldName <<
- " field has non-numeric type " << typeName(priorityElement.type()));
- }
+ //
+ // Parse h field.
+ //
+ std::string hostAndPortString;
+ status = bsonExtractStringField(mcfg, kHostFieldName, &hostAndPortString);
+ if (!status.isOK())
+ return status;
+ boost::trim(hostAndPortString);
+ status = _host.initialize(hostAndPortString);
+ if (!status.isOK())
+ return status;
+ if (!_host.hasPort()) {
+ // make port explicit even if default.
+ _host = HostAndPort(_host.host(), _host.port());
+ }
- //
- // Parse arbiterOnly field.
- //
- status = bsonExtractBooleanFieldWithDefault(mcfg,
- kArbiterOnlyFieldName,
- kArbiterOnlyFieldDefault,
- &_arbiterOnly);
- if (!status.isOK())
- return status;
-
- //
- // Parse slaveDelay field.
- //
- BSONElement slaveDelayElement = mcfg[kSlaveDelayFieldName];
- if (slaveDelayElement.eoo()) {
- _slaveDelay = kSlaveDelayFieldDefault;
- }
- else if (slaveDelayElement.isNumber()) {
- _slaveDelay = Seconds(slaveDelayElement.numberInt());
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << kSlaveDelayFieldName <<
- " field value has non-numeric type " <<
- typeName(slaveDelayElement.type()));
- }
+ //
+ // Parse votes field.
+ //
+ BSONElement votesElement = mcfg[kVotesFieldName];
+ if (votesElement.eoo()) {
+ _votes = kVotesFieldDefault;
+ } else if (votesElement.isNumber()) {
+ _votes = votesElement.numberInt();
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << kVotesFieldName << " field value has non-numeric type "
+ << typeName(votesElement.type()));
+ }
+
+ //
+ // Parse priority field.
+ //
+ BSONElement priorityElement = mcfg[kPriorityFieldName];
+ if (priorityElement.eoo()) {
+ _priority = kPriorityFieldDefault;
+ } else if (priorityElement.isNumber()) {
+ _priority = priorityElement.numberDouble();
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << kPriorityFieldName << " field has non-numeric type "
+ << typeName(priorityElement.type()));
+ }
+
+ //
+ // Parse arbiterOnly field.
+ //
+ status = bsonExtractBooleanFieldWithDefault(
+ mcfg, kArbiterOnlyFieldName, kArbiterOnlyFieldDefault, &_arbiterOnly);
+ if (!status.isOK())
+ return status;
- //
- // Parse hidden field.
- //
- status = bsonExtractBooleanFieldWithDefault(mcfg,
- kHiddenFieldName,
- kHiddenFieldDefault,
- &_hidden);
- if (!status.isOK())
- return status;
-
- //
- // Parse buildIndexes field.
- //
- status = bsonExtractBooleanFieldWithDefault(mcfg,
- kBuildIndexesFieldName,
- kBuildIndexesFieldDefault,
- &_buildIndexes);
- if (!status.isOK())
- return status;
-
- //
- // Parse "tags" field.
- //
- _tags.clear();
- BSONElement tagsElement;
- status = bsonExtractTypedField(mcfg, kTagsFieldName, Object, &tagsElement);
- if (status.isOK()) {
- for (BSONObj::iterator tagIter(tagsElement.Obj()); tagIter.more();) {
- const BSONElement& tag = tagIter.next();
- if (tag.type() != String) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "tags." <<
- tag.fieldName() << " field has non-string value of type " <<
- typeName(tag.type()));
- }
- _tags.push_back(tagConfig->makeTag(tag.fieldNameStringData(),
- tag.valueStringData()));
+ //
+ // Parse slaveDelay field.
+ //
+ BSONElement slaveDelayElement = mcfg[kSlaveDelayFieldName];
+ if (slaveDelayElement.eoo()) {
+ _slaveDelay = kSlaveDelayFieldDefault;
+ } else if (slaveDelayElement.isNumber()) {
+ _slaveDelay = Seconds(slaveDelayElement.numberInt());
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << kSlaveDelayFieldName << " field value has non-numeric type "
+ << typeName(slaveDelayElement.type()));
+ }
+
+ //
+ // Parse hidden field.
+ //
+ status =
+ bsonExtractBooleanFieldWithDefault(mcfg, kHiddenFieldName, kHiddenFieldDefault, &_hidden);
+ if (!status.isOK())
+ return status;
+
+ //
+ // Parse buildIndexes field.
+ //
+ status = bsonExtractBooleanFieldWithDefault(
+ mcfg, kBuildIndexesFieldName, kBuildIndexesFieldDefault, &_buildIndexes);
+ if (!status.isOK())
+ return status;
+
+ //
+ // Parse "tags" field.
+ //
+ _tags.clear();
+ BSONElement tagsElement;
+ status = bsonExtractTypedField(mcfg, kTagsFieldName, Object, &tagsElement);
+ if (status.isOK()) {
+ for (BSONObj::iterator tagIter(tagsElement.Obj()); tagIter.more();) {
+ const BSONElement& tag = tagIter.next();
+ if (tag.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "tags." << tag.fieldName()
+ << " field has non-string value of type "
+ << typeName(tag.type()));
}
+ _tags.push_back(tagConfig->makeTag(tag.fieldNameStringData(), tag.valueStringData()));
}
- else if (ErrorCodes::NoSuchKey != status) {
- return status;
- }
+ } else if (ErrorCodes::NoSuchKey != status) {
+ return status;
+ }
- //
- // Add internal tags based on other member properties.
- //
-
- // Add a voter tag if this non-arbiter member votes; use _id for uniquity.
- const std::string id = str::stream() << _id;
- if (isVoter() && !_arbiterOnly) {
- _tags.push_back(tagConfig->makeTag(kInternalVoterTagName, id));
- }
+ //
+ // Add internal tags based on other member properties.
+ //
- // Add an electable tag if this member is electable.
- if (isElectable()) {
- _tags.push_back(tagConfig->makeTag(kInternalElectableTagName, id));
- }
+ // Add a voter tag if this non-arbiter member votes; use _id for uniquity.
+ const std::string id = str::stream() << _id;
+ if (isVoter() && !_arbiterOnly) {
+ _tags.push_back(tagConfig->makeTag(kInternalVoterTagName, id));
+ }
- // Add a tag for generic counting of this node.
- if (!_arbiterOnly) {
- _tags.push_back(tagConfig->makeTag(kInternalAllTagName, id));
- }
+ // Add an electable tag if this member is electable.
+ if (isElectable()) {
+ _tags.push_back(tagConfig->makeTag(kInternalElectableTagName, id));
+ }
- return Status::OK();
+ // Add a tag for generic counting of this node.
+ if (!_arbiterOnly) {
+ _tags.push_back(tagConfig->makeTag(kInternalAllTagName, id));
}
- Status MemberConfig::validate() const {
- if (_id < 0 || _id > 255) {
- return Status(ErrorCodes::BadValue, str::stream() << kIdFieldName <<
- " field value of " << _id << " is out of range.");
- }
+ return Status::OK();
+}
- if (_priority < 0 || _priority > 1000) {
- return Status(ErrorCodes::BadValue, str::stream() << kPriorityFieldName <<
- " field value of " << _priority << " is out of range");
- }
- if (_votes != 0 && _votes != 1) {
- return Status(ErrorCodes::BadValue, str::stream() << kVotesFieldName <<
- " field value is " << _votes << " but must be 0 or 1");
- }
- if (_arbiterOnly) {
- if (!_tags.empty()) {
- return Status(ErrorCodes::BadValue, "Cannot set tags on arbiters.");
- }
- if (!isVoter()) {
- return Status(ErrorCodes::BadValue, "Arbiter must vote (cannot have 0 votes)");
- }
- }
- if (_slaveDelay < Seconds(0) || _slaveDelay > kMaxSlaveDelay) {
- return Status(ErrorCodes::BadValue, str::stream() << kSlaveDelayFieldName <<
- " field value of " << durationCount<Seconds>(_slaveDelay) <<
- " seconds is out of range");
- }
- if (_slaveDelay > Seconds(0) && _priority != 0) {
- return Status(ErrorCodes::BadValue, "slaveDelay requires priority be zero");
- }
- if (_hidden && _priority != 0) {
- return Status(ErrorCodes::BadValue, "priority must be 0 when hidden=true");
+Status MemberConfig::validate() const {
+ if (_id < 0 || _id > 255) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kIdFieldName << " field value of " << _id
+ << " is out of range.");
+ }
+
+ if (_priority < 0 || _priority > 1000) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kPriorityFieldName << " field value of " << _priority
+ << " is out of range");
+ }
+ if (_votes != 0 && _votes != 1) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kVotesFieldName << " field value is " << _votes
+ << " but must be 0 or 1");
+ }
+ if (_arbiterOnly) {
+ if (!_tags.empty()) {
+ return Status(ErrorCodes::BadValue, "Cannot set tags on arbiters.");
}
- if (!_buildIndexes && _priority != 0) {
- return Status(ErrorCodes::BadValue, "priority must be 0 when buildIndexes=false");
+ if (!isVoter()) {
+ return Status(ErrorCodes::BadValue, "Arbiter must vote (cannot have 0 votes)");
}
- return Status::OK();
}
+ if (_slaveDelay < Seconds(0) || _slaveDelay > kMaxSlaveDelay) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kSlaveDelayFieldName << " field value of "
+ << durationCount<Seconds>(_slaveDelay)
+ << " seconds is out of range");
+ }
+ if (_slaveDelay > Seconds(0) && _priority != 0) {
+ return Status(ErrorCodes::BadValue, "slaveDelay requires priority be zero");
+ }
+ if (_hidden && _priority != 0) {
+ return Status(ErrorCodes::BadValue, "priority must be 0 when hidden=true");
+ }
+ if (!_buildIndexes && _priority != 0) {
+ return Status(ErrorCodes::BadValue, "priority must be 0 when buildIndexes=false");
+ }
+ return Status::OK();
+}
- bool MemberConfig::hasTags(const ReplicaSetTagConfig& tagConfig) const {
- for (std::vector<ReplicaSetTag>::const_iterator tag = _tags.begin();
- tag != _tags.end();
- tag++) {
- std::string tagKey = tagConfig.getTagKey(*tag);
- if (tagKey[0] == '$') {
- // Filter out internal tags
- continue;
- }
- return true;
+bool MemberConfig::hasTags(const ReplicaSetTagConfig& tagConfig) const {
+ for (std::vector<ReplicaSetTag>::const_iterator tag = _tags.begin(); tag != _tags.end();
+ tag++) {
+ std::string tagKey = tagConfig.getTagKey(*tag);
+ if (tagKey[0] == '$') {
+ // Filter out internal tags
+ continue;
}
- return false;
+ return true;
}
+ return false;
+}
- BSONObj MemberConfig::toBSON(const ReplicaSetTagConfig& tagConfig) const {
- BSONObjBuilder configBuilder;
- configBuilder.append("_id", _id);
- configBuilder.append("host", _host.toString());
- configBuilder.append("arbiterOnly", _arbiterOnly);
- configBuilder.append("buildIndexes", _buildIndexes);
- configBuilder.append("hidden", _hidden);
- configBuilder.append("priority", _priority);
-
- BSONObjBuilder tags(configBuilder.subobjStart("tags"));
- for (std::vector<ReplicaSetTag>::const_iterator tag = _tags.begin();
- tag != _tags.end();
- tag++) {
- std::string tagKey = tagConfig.getTagKey(*tag);
- if (tagKey[0] == '$') {
- // Filter out internal tags
- continue;
- }
- tags.append(tagKey, tagConfig.getTagValue(*tag));
- }
- tags.done();
+BSONObj MemberConfig::toBSON(const ReplicaSetTagConfig& tagConfig) const {
+ BSONObjBuilder configBuilder;
+ configBuilder.append("_id", _id);
+ configBuilder.append("host", _host.toString());
+ configBuilder.append("arbiterOnly", _arbiterOnly);
+ configBuilder.append("buildIndexes", _buildIndexes);
+ configBuilder.append("hidden", _hidden);
+ configBuilder.append("priority", _priority);
- configBuilder.append("slaveDelay", durationCount<Seconds>(_slaveDelay));
- configBuilder.append("votes", getNumVotes());
- return configBuilder.obj();
+ BSONObjBuilder tags(configBuilder.subobjStart("tags"));
+ for (std::vector<ReplicaSetTag>::const_iterator tag = _tags.begin(); tag != _tags.end();
+ tag++) {
+ std::string tagKey = tagConfig.getTagKey(*tag);
+ if (tagKey[0] == '$') {
+ // Filter out internal tags
+ continue;
+ }
+ tags.append(tagKey, tagConfig.getTagValue(*tag));
}
+ tags.done();
+
+ configBuilder.append("slaveDelay", durationCount<Seconds>(_slaveDelay));
+ configBuilder.append("votes", getNumVotes());
+ return configBuilder.obj();
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/member_config.h b/src/mongo/db/repl/member_config.h
index f980a8e2bc8..694a8941f8e 100644
--- a/src/mongo/db/repl/member_config.h
+++ b/src/mongo/db/repl/member_config.h
@@ -38,143 +38,168 @@
namespace mongo {
- class BSONObj;
+class BSONObj;
namespace repl {
+/**
+ * Representation of the configuration information about a particular member of a replica set.
+ */
+class MemberConfig {
+public:
+ typedef std::vector<ReplicaSetTag>::const_iterator TagIterator;
+
+ static const std::string kIdFieldName;
+ static const std::string kVotesFieldName;
+ static const std::string kPriorityFieldName;
+ static const std::string kHostFieldName;
+ static const std::string kHiddenFieldName;
+ static const std::string kSlaveDelayFieldName;
+ static const std::string kArbiterOnlyFieldName;
+ static const std::string kBuildIndexesFieldName;
+ static const std::string kTagsFieldName;
+ static const std::string kInternalVoterTagName;
+ static const std::string kInternalElectableTagName;
+ static const std::string kInternalAllTagName;
+
+ /**
+ * Default constructor, produces a MemberConfig in an undefined state.
+ * Must successfully call initialze() before calling validate() or the
+ * accessors.
+ */
+ MemberConfig() : _slaveDelay(0) {}
+
+ /**
+ * Initializes this MemberConfig from the contents of "mcfg".
+ *
+ * If "mcfg" describes any tags, builds ReplicaSetTags for this
+ * configuration using "tagConfig" as the tag's namespace. This may
+ * have the effect of altering "tagConfig" when "mcfg" describes a
+ * tag not previously added to "tagConfig".
+ */
+ Status initialize(const BSONObj& mcfg, ReplicaSetTagConfig* tagConfig);
+
+ /**
+ * Performs basic consistency checks on the member configuration.
+ */
+ Status validate() const;
+
+ /**
+ * Gets the identifier for this member, unique within a ReplicaSetConfig.
+ */
+ int getId() const {
+ return _id;
+ }
+
+ /**
+ * Gets the canonical name of this member, by which other members and clients
+ * will contact it.
+ */
+ const HostAndPort& getHostAndPort() const {
+ return _host;
+ }
+
+ /**
+ * Gets this member's priority. Higher means more likely to be elected
+ * primary.
+ */
+ double getPriority() const {
+ return _priority;
+ }
+
/**
- * Representation of the configuration information about a particular member of a replica set.
- */
- class MemberConfig {
- public:
- typedef std::vector<ReplicaSetTag>::const_iterator TagIterator;
-
- static const std::string kIdFieldName;
- static const std::string kVotesFieldName;
- static const std::string kPriorityFieldName;
- static const std::string kHostFieldName;
- static const std::string kHiddenFieldName;
- static const std::string kSlaveDelayFieldName;
- static const std::string kArbiterOnlyFieldName;
- static const std::string kBuildIndexesFieldName;
- static const std::string kTagsFieldName;
- static const std::string kInternalVoterTagName;
- static const std::string kInternalElectableTagName;
- static const std::string kInternalAllTagName;
-
- /**
- * Default constructor, produces a MemberConfig in an undefined state.
- * Must successfully call initialze() before calling validate() or the
- * accessors.
- */
- MemberConfig() : _slaveDelay(0) {}
-
- /**
- * Initializes this MemberConfig from the contents of "mcfg".
- *
- * If "mcfg" describes any tags, builds ReplicaSetTags for this
- * configuration using "tagConfig" as the tag's namespace. This may
- * have the effect of altering "tagConfig" when "mcfg" describes a
- * tag not previously added to "tagConfig".
- */
- Status initialize(const BSONObj& mcfg, ReplicaSetTagConfig* tagConfig);
-
- /**
- * Performs basic consistency checks on the member configuration.
- */
- Status validate() const;
-
- /**
- * Gets the identifier for this member, unique within a ReplicaSetConfig.
- */
- int getId() const { return _id; }
-
- /**
- * Gets the canonical name of this member, by which other members and clients
- * will contact it.
- */
- const HostAndPort& getHostAndPort() const { return _host; }
-
- /**
- * Gets this member's priority. Higher means more likely to be elected
- * primary.
- */
- double getPriority() const { return _priority; }
-
- /**
- * Gets the amount of time behind the primary that this member will atempt to
- * remain. Zero seconds means stay as caught up as possible.
- */
- Seconds getSlaveDelay() const { return _slaveDelay; }
-
- /**
- * Returns true if this member may vote in elections.
- */
- bool isVoter() const { return _votes != 0; }
-
- /**
- * Returns the number of votes that this member gets.
- */
- int getNumVotes() const { return isVoter() ? 1 : 0; }
-
- /**
- * Returns true if this member is an arbiter (is not data-bearing).
- */
- bool isArbiter() const { return _arbiterOnly; }
-
- /**
- * Returns true if this member is hidden (not reported by isMaster, not electable).
- */
- bool isHidden() const { return _hidden; }
-
- /**
- * Returns true if this member should build secondary indexes.
- */
- bool shouldBuildIndexes() const { return _buildIndexes; }
-
- /**
- * Gets the number of replica set tags, including internal '$' tags, for this member.
- */
- size_t getNumTags() const { return _tags.size(); }
-
- /**
- * Returns true if this MemberConfig has any non-internal tags, using "tagConfig" to
- * determine the internal property of the tags.
- */
- bool hasTags(const ReplicaSetTagConfig& tagConfig) const;
-
- /**
- * Gets a begin iterator over the tags for this member.
- */
- TagIterator tagsBegin() const { return _tags.begin(); }
-
- /**
- * Gets an end iterator over the tags for this member.
- */
- TagIterator tagsEnd() const { return _tags.end(); }
-
- /**
- * Returns true if this represents the configuration of an electable member.
- */
- bool isElectable() const { return !isArbiter() && getPriority() > 0; }
-
- /**
- * Returns the member config as a BSONObj, using "tagConfig" to generate the tag subdoc.
- */
- BSONObj toBSON(const ReplicaSetTagConfig& tagConfig) const;
-
- private:
-
- int _id;
- HostAndPort _host;
- double _priority; // 0 means can never be primary
- int _votes; // Can this member vote? Only 0 and 1 are valid. Default 1.
- bool _arbiterOnly;
- Seconds _slaveDelay;
- bool _hidden; // if set, don't advertise to drivers in isMaster.
- bool _buildIndexes; // if false, do not create any non-_id indexes
- std::vector<ReplicaSetTag> _tags; // tagging for data center, rack, etc.
- };
+ * Gets the amount of time behind the primary that this member will atempt to
+ * remain. Zero seconds means stay as caught up as possible.
+ */
+ Seconds getSlaveDelay() const {
+ return _slaveDelay;
+ }
+
+ /**
+ * Returns true if this member may vote in elections.
+ */
+ bool isVoter() const {
+ return _votes != 0;
+ }
+
+ /**
+ * Returns the number of votes that this member gets.
+ */
+ int getNumVotes() const {
+ return isVoter() ? 1 : 0;
+ }
+
+ /**
+ * Returns true if this member is an arbiter (is not data-bearing).
+ */
+ bool isArbiter() const {
+ return _arbiterOnly;
+ }
+
+ /**
+ * Returns true if this member is hidden (not reported by isMaster, not electable).
+ */
+ bool isHidden() const {
+ return _hidden;
+ }
+
+ /**
+ * Returns true if this member should build secondary indexes.
+ */
+ bool shouldBuildIndexes() const {
+ return _buildIndexes;
+ }
+
+ /**
+ * Gets the number of replica set tags, including internal '$' tags, for this member.
+ */
+ size_t getNumTags() const {
+ return _tags.size();
+ }
+
+ /**
+ * Returns true if this MemberConfig has any non-internal tags, using "tagConfig" to
+ * determine the internal property of the tags.
+ */
+ bool hasTags(const ReplicaSetTagConfig& tagConfig) const;
+
+ /**
+ * Gets a begin iterator over the tags for this member.
+ */
+ TagIterator tagsBegin() const {
+ return _tags.begin();
+ }
+
+ /**
+ * Gets an end iterator over the tags for this member.
+ */
+ TagIterator tagsEnd() const {
+ return _tags.end();
+ }
+
+ /**
+ * Returns true if this represents the configuration of an electable member.
+ */
+ bool isElectable() const {
+ return !isArbiter() && getPriority() > 0;
+ }
+
+ /**
+ * Returns the member config as a BSONObj, using "tagConfig" to generate the tag subdoc.
+ */
+ BSONObj toBSON(const ReplicaSetTagConfig& tagConfig) const;
+
+private:
+ int _id;
+ HostAndPort _host;
+ double _priority; // 0 means can never be primary
+ int _votes; // Can this member vote? Only 0 and 1 are valid. Default 1.
+ bool _arbiterOnly;
+ Seconds _slaveDelay;
+ bool _hidden; // if set, don't advertise to drivers in isMaster.
+ bool _buildIndexes; // if false, do not create any non-_id indexes
+ std::vector<ReplicaSetTag> _tags; // tagging for data center, rack, etc.
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/member_config_test.cpp b/src/mongo/db/repl/member_config_test.cpp
index 6411156f4f8..c53556f411d 100644
--- a/src/mongo/db/repl/member_config_test.cpp
+++ b/src/mongo/db/repl/member_config_test.cpp
@@ -38,328 +38,417 @@ namespace mongo {
namespace repl {
namespace {
- TEST(MemberConfig, ParseMinimalMemberConfigAndCheckDefaults) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "localhost:12345"),
- &tagConfig));
- ASSERT_EQUALS(0, mc.getId());
- ASSERT_EQUALS(HostAndPort("localhost", 12345), mc.getHostAndPort());
- ASSERT_EQUALS(1.0, mc.getPriority());
- ASSERT_EQUALS(Seconds(0), mc.getSlaveDelay());
- ASSERT_TRUE(mc.isVoter());
- ASSERT_FALSE(mc.isHidden());
- ASSERT_FALSE(mc.isArbiter());
- ASSERT_TRUE(mc.shouldBuildIndexes());
- ASSERT_EQUALS(3U, mc.getNumTags());
- ASSERT_OK(mc.validate());
- }
-
- TEST(MemberConfig, ParseFailsWithIllegalFieldName) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_EQUALS(ErrorCodes::BadValue,
- mc.initialize(BSON("_id" << 0 << "host" << "localhost" << "frim" << 1),
- &tagConfig));
- }
-
- TEST(MemberConfig, ParseFailsWithMissingIdField) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, mc.initialize(BSON("host" << "localhost:12345"),
- &tagConfig));
- }
-
- TEST(MemberConfig, ParseFailsWithBadIdField) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, mc.initialize(BSON("host" << "localhost:12345"),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch,
- mc.initialize(BSON("_id" << "0" << "host" << "localhost:12345"),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch,
- mc.initialize(BSON("_id" << Date_t() << "host" << "localhost:12345"),
- &tagConfig));
- }
-
- TEST(MemberConfig, ParseFailsWithMissingHostField) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, mc.initialize(BSON("_id" << 0), &tagConfig));
- }
-
-
- TEST(MemberConfig, ParseFailsWithBadHostField) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, mc.initialize(BSON("_id" << 0 << "host" << 0),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::FailedToParse, mc.initialize(BSON("_id" << 0 << "host" << ""),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::FailedToParse,
- mc.initialize(BSON("_id" << 0 << "host" << "myhost:zabc"), &tagConfig));
- }
-
- TEST(MemberConfig, ParseArbiterOnly) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "arbiterOnly" << 1.0),
- &tagConfig));
- ASSERT_TRUE(mc.isArbiter());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "arbiterOnly" << false),
- &tagConfig));
- ASSERT_TRUE(!mc.isArbiter());
- }
-
- TEST(MemberConfig, ParseHidden) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "hidden" << 1.0),
- &tagConfig));
- ASSERT_TRUE(mc.isHidden());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "hidden" << false),
- &tagConfig));
- ASSERT_TRUE(!mc.isHidden());
- ASSERT_EQUALS(ErrorCodes::TypeMismatch,
- mc.initialize(BSON("_id" << 0 << "host" << "h" << "hidden" << "1.0"),
- &tagConfig));
- }
-
- TEST(MemberConfig, ParseBuildIndexes) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "buildIndexes" << 1.0),
- &tagConfig));
- ASSERT_TRUE(mc.shouldBuildIndexes());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "buildIndexes" << false),
- &tagConfig));
- ASSERT_TRUE(!mc.shouldBuildIndexes());
- }
-
- TEST(MemberConfig, ParseVotes) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 1.0),
- &tagConfig));
- ASSERT_TRUE(mc.isVoter());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 0),
- &tagConfig));
- ASSERT_FALSE(mc.isVoter());
-
- // For backwards compatibility, truncate 1.X to 1, and 0.X to 0 (and -0.X to 0).
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 1.5),
- &tagConfig));
- ASSERT_TRUE(mc.isVoter());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 0.5),
- &tagConfig));
- ASSERT_FALSE(mc.isVoter());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << -0.5),
- &tagConfig));
- ASSERT_FALSE(mc.isVoter());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 2),
- &tagConfig));
-
- ASSERT_EQUALS(ErrorCodes::TypeMismatch,
- mc.initialize(BSON("_id" << 0 <<
- "host" << "h" <<
- "votes" << Date_t::fromMillisSinceEpoch(2)),
- &tagConfig));
- }
-
- TEST(MemberConfig, ParsePriority) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1),
- &tagConfig));
- ASSERT_EQUALS(1.0, mc.getPriority());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 0),
- &tagConfig));
- ASSERT_EQUALS(0.0, mc.getPriority());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 100.8),
- &tagConfig));
- ASSERT_EQUALS(100.8, mc.getPriority());
-
- ASSERT_EQUALS(ErrorCodes::TypeMismatch,
- mc.initialize(BSON("_id" << 0 <<
- "host" << "h" <<
- "priority" << Date_t::fromMillisSinceEpoch(2)),
- &tagConfig));
- }
-
- TEST(MemberConfig, ParseSlaveDelay) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "slaveDelay" << 100),
- &tagConfig));
- ASSERT_EQUALS(Seconds(100), mc.getSlaveDelay());
- }
-
- TEST(MemberConfig, ParseTags) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" <<
- "tags" << BSON("k1" << "v1" << "k2" << "v2")),
- &tagConfig));
- ASSERT_EQUALS(5U, mc.getNumTags());
- ASSERT_EQUALS(5, std::distance(mc.tagsBegin(), mc.tagsEnd()));
- ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("k1", "v1")));
- ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("k2", "v2")));
- ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$voter",
- "0")));
- ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$electable",
- "0")));
- ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$all",
- "0")));
- }
-
- TEST(MemberConfig, ValidateFailsWithIdOutOfRange) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << -1 << "host" << "localhost:12345"),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 256 << "host" << "localhost:12345"),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- }
-
- TEST(MemberConfig, ValidateVotes) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 1.0),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_TRUE(mc.isVoter());
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 0),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_FALSE(mc.isVoter());
-
- // For backwards compatibility, truncate 1.X to 1, and 0.X to 0 (and -0.X to 0).
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 1.5),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_TRUE(mc.isVoter());
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 0.5),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_FALSE(mc.isVoter());
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << -0.5),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_FALSE(mc.isVoter());
-
- // Invalid values
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 2),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << -1),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- }
-
- TEST(MemberConfig, ValidatePriorityRanges) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 0),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1000),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << -1),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1001),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- }
-
- TEST(MemberConfig, ValidateSlaveDelays) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 0 <<
- "slaveDelay" << 0),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 0 <<
- "slaveDelay" << 3600 * 10),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 0 <<
- "slaveDelay" << -1),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 0 <<
- "slaveDelay" << 3600 * 24 * 400),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- }
-
- TEST(MemberConfig, ValidatePriorityAndSlaveDelayRelationship) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1 <<
- "slaveDelay" << 60),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- }
-
- TEST(MemberConfig, ValidatePriorityAndHiddenRelationship) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1 <<
- "hidden" << true),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1 <<
- "hidden" << false),
- &tagConfig));
- ASSERT_OK(mc.validate());
- }
-
- TEST(MemberConfig, ValidatePriorityAndBuildIndexesRelationship) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1 <<
- "buildIndexes" << false),
- &tagConfig));
-
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1 <<
- "buildIndexes" << true),
- &tagConfig));
- ASSERT_OK(mc.validate());
- }
-
- TEST(MemberConfig, ValidateArbiterVotesRelationship) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" <<
- "votes" << 1 << "arbiterOnly" << true),
- &tagConfig));
- ASSERT_OK(mc.validate());
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" <<
- "votes" << 0 << "arbiterOnly" << false),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" <<
- "votes" << 1 << "arbiterOnly" << false),
- &tagConfig));
- ASSERT_OK(mc.validate());
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" <<
- "votes" << 0 << "arbiterOnly" << true),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- }
+TEST(MemberConfig, ParseMinimalMemberConfigAndCheckDefaults) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "localhost:12345"),
+ &tagConfig));
+ ASSERT_EQUALS(0, mc.getId());
+ ASSERT_EQUALS(HostAndPort("localhost", 12345), mc.getHostAndPort());
+ ASSERT_EQUALS(1.0, mc.getPriority());
+ ASSERT_EQUALS(Seconds(0), mc.getSlaveDelay());
+ ASSERT_TRUE(mc.isVoter());
+ ASSERT_FALSE(mc.isHidden());
+ ASSERT_FALSE(mc.isArbiter());
+ ASSERT_TRUE(mc.shouldBuildIndexes());
+ ASSERT_EQUALS(3U, mc.getNumTags());
+ ASSERT_OK(mc.validate());
+}
+
+TEST(MemberConfig, ParseFailsWithIllegalFieldName) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_EQUALS(ErrorCodes::BadValue,
+ mc.initialize(BSON("_id" << 0 << "host"
+ << "localhost"
+ << "frim" << 1),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParseFailsWithMissingIdField) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey,
+ mc.initialize(BSON("host"
+ << "localhost:12345"),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParseFailsWithBadIdField) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey,
+ mc.initialize(BSON("host"
+ << "localhost:12345"),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ mc.initialize(BSON("_id"
+ << "0"
+ << "host"
+ << "localhost:12345"),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ mc.initialize(BSON("_id" << Date_t() << "host"
+ << "localhost:12345"),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParseFailsWithMissingHostField) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, mc.initialize(BSON("_id" << 0), &tagConfig));
+}
+
+
+TEST(MemberConfig, ParseFailsWithBadHostField) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ mc.initialize(BSON("_id" << 0 << "host" << 0), &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::FailedToParse,
+ mc.initialize(BSON("_id" << 0 << "host"
+ << ""),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::FailedToParse,
+ mc.initialize(BSON("_id" << 0 << "host"
+ << "myhost:zabc"),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParseArbiterOnly) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "arbiterOnly" << 1.0),
+ &tagConfig));
+ ASSERT_TRUE(mc.isArbiter());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "arbiterOnly" << false),
+ &tagConfig));
+ ASSERT_TRUE(!mc.isArbiter());
+}
+
+TEST(MemberConfig, ParseHidden) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "hidden" << 1.0),
+ &tagConfig));
+ ASSERT_TRUE(mc.isHidden());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "hidden" << false),
+ &tagConfig));
+ ASSERT_TRUE(!mc.isHidden());
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "hidden"
+ << "1.0"),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParseBuildIndexes) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "buildIndexes" << 1.0),
+ &tagConfig));
+ ASSERT_TRUE(mc.shouldBuildIndexes());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "buildIndexes" << false),
+ &tagConfig));
+ ASSERT_TRUE(!mc.shouldBuildIndexes());
+}
+
+TEST(MemberConfig, ParseVotes) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 1.0),
+ &tagConfig));
+ ASSERT_TRUE(mc.isVoter());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 0),
+ &tagConfig));
+ ASSERT_FALSE(mc.isVoter());
+
+ // For backwards compatibility, truncate 1.X to 1, and 0.X to 0 (and -0.X to 0).
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 1.5),
+ &tagConfig));
+ ASSERT_TRUE(mc.isVoter());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 0.5),
+ &tagConfig));
+ ASSERT_FALSE(mc.isVoter());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << -0.5),
+ &tagConfig));
+ ASSERT_FALSE(mc.isVoter());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 2),
+ &tagConfig));
+
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << Date_t::fromMillisSinceEpoch(2)),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParsePriority) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1),
+ &tagConfig));
+ ASSERT_EQUALS(1.0, mc.getPriority());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 0),
+ &tagConfig));
+ ASSERT_EQUALS(0.0, mc.getPriority());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 100.8),
+ &tagConfig));
+ ASSERT_EQUALS(100.8, mc.getPriority());
+
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << Date_t::fromMillisSinceEpoch(2)),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParseSlaveDelay) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "slaveDelay" << 100),
+ &tagConfig));
+ ASSERT_EQUALS(Seconds(100), mc.getSlaveDelay());
+}
+
+TEST(MemberConfig, ParseTags) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "tags" << BSON("k1"
+ << "v1"
+ << "k2"
+ << "v2")),
+ &tagConfig));
+ ASSERT_EQUALS(5U, mc.getNumTags());
+ ASSERT_EQUALS(5, std::distance(mc.tagsBegin(), mc.tagsEnd()));
+ ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("k1", "v1")));
+ ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("k2", "v2")));
+ ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$voter", "0")));
+ ASSERT_EQUALS(1,
+ std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$electable", "0")));
+ ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$all", "0")));
+}
+
+TEST(MemberConfig, ValidateFailsWithIdOutOfRange) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << -1 << "host"
+ << "localhost:12345"),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 256 << "host"
+ << "localhost:12345"),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+}
+
+TEST(MemberConfig, ValidateVotes) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 1.0),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_TRUE(mc.isVoter());
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 0),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_FALSE(mc.isVoter());
+
+ // For backwards compatibility, truncate 1.X to 1, and 0.X to 0 (and -0.X to 0).
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 1.5),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_TRUE(mc.isVoter());
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 0.5),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_FALSE(mc.isVoter());
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << -0.5),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_FALSE(mc.isVoter());
+
+ // Invalid values
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 2),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << -1),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+}
+
+TEST(MemberConfig, ValidatePriorityRanges) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 0),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1000),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << -1),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1001),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+}
+
+TEST(MemberConfig, ValidateSlaveDelays) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 0 << "slaveDelay" << 0),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 0 << "slaveDelay" << 3600 * 10),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 0 << "slaveDelay" << -1),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 0 << "slaveDelay" << 3600 * 24 * 400),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+}
+
+TEST(MemberConfig, ValidatePriorityAndSlaveDelayRelationship) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1 << "slaveDelay" << 60),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+}
+
+TEST(MemberConfig, ValidatePriorityAndHiddenRelationship) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1 << "hidden" << true),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1 << "hidden" << false),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+}
+
+TEST(MemberConfig, ValidatePriorityAndBuildIndexesRelationship) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1 << "buildIndexes" << false),
+ &tagConfig));
+
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1 << "buildIndexes" << true),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+}
+
+TEST(MemberConfig, ValidateArbiterVotesRelationship) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 1 << "arbiterOnly" << true),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 0 << "arbiterOnly" << false),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 1 << "arbiterOnly" << false),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 0 << "arbiterOnly" << true),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/member_heartbeat_data.cpp b/src/mongo/db/repl/member_heartbeat_data.cpp
index edbc40de393..357c41f1cf2 100644
--- a/src/mongo/db/repl/member_heartbeat_data.cpp
+++ b/src/mongo/db/repl/member_heartbeat_data.cpp
@@ -39,71 +39,67 @@
namespace mongo {
namespace repl {
- MemberHeartbeatData::MemberHeartbeatData() :
- _health(-1),
- _authIssue(false) {
-
- _lastResponse.setState(MemberState::RS_UNKNOWN);
- _lastResponse.setElectionTime(Timestamp());
- _lastResponse.setOpTime(OpTime());
+MemberHeartbeatData::MemberHeartbeatData() : _health(-1), _authIssue(false) {
+ _lastResponse.setState(MemberState::RS_UNKNOWN);
+ _lastResponse.setElectionTime(Timestamp());
+ _lastResponse.setOpTime(OpTime());
+}
+
+void MemberHeartbeatData::setUpValues(Date_t now,
+ const HostAndPort& host,
+ ReplSetHeartbeatResponse hbResponse) {
+ _health = 1;
+ if (_upSince == Date_t()) {
+ _upSince = now;
}
-
- void MemberHeartbeatData::setUpValues(Date_t now,
- const HostAndPort& host,
- ReplSetHeartbeatResponse hbResponse) {
- _health = 1;
- if (_upSince == Date_t()) {
- _upSince = now;
- }
- _authIssue = false;
- _lastHeartbeat = now;
- if (!hbResponse.hasState()) {
- hbResponse.setState(MemberState::RS_UNKNOWN);
- }
- if (!hbResponse.hasElectionTime()) {
- hbResponse.setElectionTime(_lastResponse.getElectionTime());
- }
- if (!hbResponse.hasOpTime()) {
- hbResponse.setOpTime(_lastResponse.getOpTime());
- }
-
- // Log if the state changes
- if (_lastResponse.getState() != hbResponse.getState()){
- log() << "Member " << host.toString() << " is now in state "
- << hbResponse.getState().toString() << rsLog;
- }
-
- _lastResponse = hbResponse;
+ _authIssue = false;
+ _lastHeartbeat = now;
+ if (!hbResponse.hasState()) {
+ hbResponse.setState(MemberState::RS_UNKNOWN);
}
-
- void MemberHeartbeatData::setDownValues(Date_t now, const std::string& heartbeatMessage) {
-
- _health = 0;
- _upSince = Date_t();
- _lastHeartbeat = now;
- _authIssue = false;
-
- _lastResponse = ReplSetHeartbeatResponse();
- _lastResponse.setState(MemberState::RS_DOWN);
- _lastResponse.setElectionTime(Timestamp());
- _lastResponse.setOpTime(OpTime());
- _lastResponse.setHbMsg(heartbeatMessage);
- _lastResponse.setSyncingTo(HostAndPort());
+ if (!hbResponse.hasElectionTime()) {
+ hbResponse.setElectionTime(_lastResponse.getElectionTime());
+ }
+ if (!hbResponse.hasOpTime()) {
+ hbResponse.setOpTime(_lastResponse.getOpTime());
}
- void MemberHeartbeatData::setAuthIssue(Date_t now) {
- _health = 0; // set health to 0 so that this doesn't count towards majority.
- _upSince = Date_t();
- _lastHeartbeat = now;
- _authIssue = true;
-
- _lastResponse = ReplSetHeartbeatResponse();
- _lastResponse.setState(MemberState::RS_UNKNOWN);
- _lastResponse.setElectionTime(Timestamp());
- _lastResponse.setOpTime(OpTime());
- _lastResponse.setHbMsg("");
- _lastResponse.setSyncingTo(HostAndPort());
+ // Log if the state changes
+ if (_lastResponse.getState() != hbResponse.getState()) {
+ log() << "Member " << host.toString() << " is now in state "
+ << hbResponse.getState().toString() << rsLog;
}
-} // namespace repl
-} // namespace mongo
+ _lastResponse = hbResponse;
+}
+
+void MemberHeartbeatData::setDownValues(Date_t now, const std::string& heartbeatMessage) {
+ _health = 0;
+ _upSince = Date_t();
+ _lastHeartbeat = now;
+ _authIssue = false;
+
+ _lastResponse = ReplSetHeartbeatResponse();
+ _lastResponse.setState(MemberState::RS_DOWN);
+ _lastResponse.setElectionTime(Timestamp());
+ _lastResponse.setOpTime(OpTime());
+ _lastResponse.setHbMsg(heartbeatMessage);
+ _lastResponse.setSyncingTo(HostAndPort());
+}
+
+void MemberHeartbeatData::setAuthIssue(Date_t now) {
+ _health = 0; // set health to 0 so that this doesn't count towards majority.
+ _upSince = Date_t();
+ _lastHeartbeat = now;
+ _authIssue = true;
+
+ _lastResponse = ReplSetHeartbeatResponse();
+ _lastResponse.setState(MemberState::RS_UNKNOWN);
+ _lastResponse.setElectionTime(Timestamp());
+ _lastResponse.setOpTime(OpTime());
+ _lastResponse.setHbMsg("");
+ _lastResponse.setSyncingTo(HostAndPort());
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/member_heartbeat_data.h b/src/mongo/db/repl/member_heartbeat_data.h
index 8996cfb7d96..6a3b4f62880 100644
--- a/src/mongo/db/repl/member_heartbeat_data.h
+++ b/src/mongo/db/repl/member_heartbeat_data.h
@@ -36,77 +36,103 @@
namespace mongo {
namespace repl {
+/**
+ * This class contains the data returned from a heartbeat command for one member
+ * of a replica set.
+ **/
+class MemberHeartbeatData {
+public:
+ MemberHeartbeatData();
+
+ MemberState getState() const {
+ return _lastResponse.getState();
+ }
+ int getHealth() const {
+ return _health;
+ }
+ Date_t getUpSince() const {
+ return _upSince;
+ }
+ Date_t getLastHeartbeat() const {
+ return _lastHeartbeat;
+ }
+ Date_t getLastHeartbeatRecv() const {
+ return _lastHeartbeatRecv;
+ }
+ void setLastHeartbeatRecv(Date_t newHeartbeatRecvTime) {
+ _lastHeartbeatRecv = newHeartbeatRecvTime;
+ }
+ const std::string& getLastHeartbeatMsg() const {
+ return _lastResponse.getHbMsg();
+ }
+ const HostAndPort& getSyncSource() const {
+ return _lastResponse.getSyncingTo();
+ }
+ OpTime getOpTime() const {
+ return _lastResponse.getOpTime();
+ }
+ int getConfigVersion() const {
+ return _lastResponse.getConfigVersion();
+ }
+ bool hasAuthIssue() const {
+ return _authIssue;
+ }
+
+ Timestamp getElectionTime() const {
+ return _lastResponse.getElectionTime();
+ }
+
+ // Returns true if the last heartbeat data explicilty stated that the node
+ // is not electable.
+ bool isUnelectable() const {
+ return _lastResponse.hasIsElectable() && !_lastResponse.isElectable();
+ }
+
+ // Was this member up for the last heartbeat?
+ bool up() const {
+ return _health > 0;
+ }
+ // Was this member up for the last hearbeeat
+ // (or we haven't received the first heartbeat yet)
+ bool maybeUp() const {
+ return _health != 0;
+ }
+
+ /**
+ * Sets values in this object from the results of a successful heartbeat command.
+ */
+ void setUpValues(Date_t now, const HostAndPort& host, ReplSetHeartbeatResponse hbResponse);
+
+ /**
+ * Sets values in this object from the results of a erroring/failed heartbeat command.
+ * _authIssues is set to false, _health is set to 0, _state is set to RS_DOWN, and
+ * other values are set as specified.
+ */
+ void setDownValues(Date_t now, const std::string& heartbeatMessage);
+
/**
- * This class contains the data returned from a heartbeat command for one member
- * of a replica set.
- **/
- class MemberHeartbeatData {
- public:
- MemberHeartbeatData();
-
- MemberState getState() const { return _lastResponse.getState(); }
- int getHealth() const { return _health; }
- Date_t getUpSince() const { return _upSince; }
- Date_t getLastHeartbeat() const { return _lastHeartbeat; }
- Date_t getLastHeartbeatRecv() const { return _lastHeartbeatRecv; }
- void setLastHeartbeatRecv(Date_t newHeartbeatRecvTime) {
- _lastHeartbeatRecv = newHeartbeatRecvTime;
- }
- const std::string& getLastHeartbeatMsg() const { return _lastResponse.getHbMsg(); }
- const HostAndPort& getSyncSource() const { return _lastResponse.getSyncingTo(); }
- OpTime getOpTime() const { return _lastResponse.getOpTime(); }
- int getConfigVersion() const { return _lastResponse.getConfigVersion(); }
- bool hasAuthIssue() const { return _authIssue; }
-
- Timestamp getElectionTime() const { return _lastResponse.getElectionTime(); }
-
- // Returns true if the last heartbeat data explicilty stated that the node
- // is not electable.
- bool isUnelectable() const {
- return _lastResponse.hasIsElectable() && !_lastResponse.isElectable();
- }
-
- // Was this member up for the last heartbeat?
- bool up() const { return _health > 0; }
- // Was this member up for the last hearbeeat
- // (or we haven't received the first heartbeat yet)
- bool maybeUp() const { return _health != 0; }
-
- /**
- * Sets values in this object from the results of a successful heartbeat command.
- */
- void setUpValues(Date_t now, const HostAndPort& host, ReplSetHeartbeatResponse hbResponse);
-
- /**
- * Sets values in this object from the results of a erroring/failed heartbeat command.
- * _authIssues is set to false, _health is set to 0, _state is set to RS_DOWN, and
- * other values are set as specified.
- */
- void setDownValues(Date_t now, const std::string& heartbeatMessage);
-
- /**
- * Sets values in this object that indicate there was an auth issue on the last heartbeat
- * command.
- */
- void setAuthIssue(Date_t now);
-
- private:
- // -1 = not checked yet, 0 = member is down/unreachable, 1 = member is up
- int _health;
-
- // Time of first successful heartbeat, if currently still up
- Date_t _upSince;
- // This is the last time we got a response from a heartbeat request to a given member.
- Date_t _lastHeartbeat;
- // This is the last time we got a heartbeat request from a given member.
- Date_t _lastHeartbeatRecv;
-
- // Did the last heartbeat show a failure to authenticate?
- bool _authIssue;
-
- // The last heartbeat response we received.
- ReplSetHeartbeatResponse _lastResponse;
- };
-
-} // namespace repl
-} // namespace mongo
+ * Sets values in this object that indicate there was an auth issue on the last heartbeat
+ * command.
+ */
+ void setAuthIssue(Date_t now);
+
+private:
+ // -1 = not checked yet, 0 = member is down/unreachable, 1 = member is up
+ int _health;
+
+ // Time of first successful heartbeat, if currently still up
+ Date_t _upSince;
+ // This is the last time we got a response from a heartbeat request to a given member.
+ Date_t _lastHeartbeat;
+ // This is the last time we got a heartbeat request from a given member.
+ Date_t _lastHeartbeatRecv;
+
+ // Did the last heartbeat show a failure to authenticate?
+ bool _authIssue;
+
+ // The last heartbeat response we received.
+ ReplSetHeartbeatResponse _lastResponse;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/member_state.h b/src/mongo/db/repl/member_state.h
index c3e3ffd292b..4adf7516845 100644
--- a/src/mongo/db/repl/member_state.h
+++ b/src/mongo/db/repl/member_state.h
@@ -36,65 +36,97 @@ namespace mongo {
namespace repl {
- /*
- RS_STARTUP serving still starting up, or still trying to initiate the set
- RS_PRIMARY this server thinks it is primary
- RS_SECONDARY this server thinks it is a secondary (slave mode)
- RS_RECOVERING recovering/resyncing; after recovery usually auto-transitions to secondary
- RS_STARTUP2 loaded config, still determining who is primary
+/*
+ RS_STARTUP serving still starting up, or still trying to initiate the set
+ RS_PRIMARY this server thinks it is primary
+ RS_SECONDARY this server thinks it is a secondary (slave mode)
+ RS_RECOVERING recovering/resyncing; after recovery usually auto-transitions to secondary
+ RS_STARTUP2 loaded config, still determining who is primary
- State -> integer mappings are reserved forever. Do not change them or delete them, except
- to update RS_MAX when introducing new states.
- */
- struct MemberState {
- enum MS {
- RS_STARTUP = 0,
- RS_PRIMARY = 1,
- RS_SECONDARY = 2,
- RS_RECOVERING = 3,
- RS_STARTUP2 = 5,
- RS_UNKNOWN = 6, /* remote node not yet reached */
- RS_ARBITER = 7,
- RS_DOWN = 8, /* node not reachable for a report */
- RS_ROLLBACK = 9,
- RS_REMOVED = 10, /* node removed from replica set */
- RS_MAX = 10
- } s;
+ State -> integer mappings are reserved forever. Do not change them or delete them, except
+ to update RS_MAX when introducing new states.
+*/
+struct MemberState {
+ enum MS {
+ RS_STARTUP = 0,
+ RS_PRIMARY = 1,
+ RS_SECONDARY = 2,
+ RS_RECOVERING = 3,
+ RS_STARTUP2 = 5,
+ RS_UNKNOWN = 6, /* remote node not yet reached */
+ RS_ARBITER = 7,
+ RS_DOWN = 8, /* node not reachable for a report */
+ RS_ROLLBACK = 9,
+ RS_REMOVED = 10, /* node removed from replica set */
+ RS_MAX = 10
+ } s;
- MemberState(MS ms = RS_UNKNOWN) : s(ms) { }
- explicit MemberState(int ms) : s((MS) ms) { }
+ MemberState(MS ms = RS_UNKNOWN) : s(ms) {}
+ explicit MemberState(int ms) : s((MS)ms) {}
- bool startup() const { return s == RS_STARTUP; }
- bool primary() const { return s == RS_PRIMARY; }
- bool secondary() const { return s == RS_SECONDARY; }
- bool recovering() const { return s == RS_RECOVERING; }
- bool startup2() const { return s == RS_STARTUP2; }
- bool rollback() const { return s == RS_ROLLBACK; }
- bool readable() const { return s == RS_PRIMARY || s == RS_SECONDARY; }
- bool removed() const { return s == RS_REMOVED; }
- bool arbiter() const { return s == RS_ARBITER; }
+ bool startup() const {
+ return s == RS_STARTUP;
+ }
+ bool primary() const {
+ return s == RS_PRIMARY;
+ }
+ bool secondary() const {
+ return s == RS_SECONDARY;
+ }
+ bool recovering() const {
+ return s == RS_RECOVERING;
+ }
+ bool startup2() const {
+ return s == RS_STARTUP2;
+ }
+ bool rollback() const {
+ return s == RS_ROLLBACK;
+ }
+ bool readable() const {
+ return s == RS_PRIMARY || s == RS_SECONDARY;
+ }
+ bool removed() const {
+ return s == RS_REMOVED;
+ }
+ bool arbiter() const {
+ return s == RS_ARBITER;
+ }
- std::string toString() const;
+ std::string toString() const;
- bool operator==(const MemberState& r) const { return s == r.s; }
- bool operator!=(const MemberState& r) const { return s != r.s; }
- };
+ bool operator==(const MemberState& r) const {
+ return s == r.s;
+ }
+ bool operator!=(const MemberState& r) const {
+ return s != r.s;
+ }
+};
- inline std::string MemberState::toString() const {
- switch ( s ) {
- case RS_STARTUP: return "STARTUP";
- case RS_PRIMARY: return "PRIMARY";
- case RS_SECONDARY: return "SECONDARY";
- case RS_RECOVERING: return "RECOVERING";
- case RS_STARTUP2: return "STARTUP2";
- case RS_ARBITER: return "ARBITER";
- case RS_DOWN: return "DOWN";
- case RS_ROLLBACK: return "ROLLBACK";
- case RS_UNKNOWN: return "UNKNOWN";
- case RS_REMOVED: return "REMOVED";
- }
- return "";
+inline std::string MemberState::toString() const {
+ switch (s) {
+ case RS_STARTUP:
+ return "STARTUP";
+ case RS_PRIMARY:
+ return "PRIMARY";
+ case RS_SECONDARY:
+ return "SECONDARY";
+ case RS_RECOVERING:
+ return "RECOVERING";
+ case RS_STARTUP2:
+ return "STARTUP2";
+ case RS_ARBITER:
+ return "ARBITER";
+ case RS_DOWN:
+ return "DOWN";
+ case RS_ROLLBACK:
+ return "ROLLBACK";
+ case RS_UNKNOWN:
+ return "UNKNOWN";
+ case RS_REMOVED:
+ return "REMOVED";
}
+ return "";
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/minvalid.cpp b/src/mongo/db/repl/minvalid.cpp
index 5cd3acd64e7..b14966486fe 100644
--- a/src/mongo/db/repl/minvalid.cpp
+++ b/src/mongo/db/repl/minvalid.cpp
@@ -46,74 +46,76 @@ namespace mongo {
namespace repl {
namespace {
- const char* initialSyncFlagString = "doingInitialSync";
- const BSONObj initialSyncFlag(BSON(initialSyncFlagString << true));
- const char* minvalidNS = "local.replset.minvalid";
-} // namespace
-
- // Writes
- void clearInitialSyncFlag(OperationContext* txn) {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(txn, MODE_IX);
- // TODO: Investigate correctness of taking MODE_IX for DB/Collection locks
- Lock::DBLock dblk(txn->lockState(), "local", MODE_X);
- Helpers::putSingleton(txn, minvalidNS, BSON("$unset" << initialSyncFlag));
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "clearInitialSyncFlags", minvalidNS);
-
+const char* initialSyncFlagString = "doingInitialSync";
+const BSONObj initialSyncFlag(BSON(initialSyncFlagString << true));
+const char* minvalidNS = "local.replset.minvalid";
+} // namespace
+
+// Writes
+void clearInitialSyncFlag(OperationContext* txn) {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(txn, MODE_IX);
+ // TODO: Investigate correctness of taking MODE_IX for DB/Collection locks
+ Lock::DBLock dblk(txn->lockState(), "local", MODE_X);
+ Helpers::putSingleton(txn, minvalidNS, BSON("$unset" << initialSyncFlag));
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "clearInitialSyncFlags", minvalidNS);
+}
- void setInitialSyncFlag(OperationContext* txn) {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock dblk(txn->lockState(), "local", MODE_X);
- Helpers::putSingleton(txn, minvalidNS, BSON("$set" << initialSyncFlag));
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "setInitialSyncFlags", minvalidNS);
+void setInitialSyncFlag(OperationContext* txn) {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock dblk(txn->lockState(), "local", MODE_X);
+ Helpers::putSingleton(txn, minvalidNS, BSON("$set" << initialSyncFlag));
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "setInitialSyncFlags", minvalidNS);
+}
- void setMinValid(OperationContext* ctx, const OpTime& opTime) {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(ctx, MODE_IX);
- Lock::DBLock dblk(ctx->lockState(), "local", MODE_X);
- Helpers::putSingleton(ctx,
- minvalidNS,
- BSON("$set" << BSON("ts" << opTime.getTimestamp() <<
- "t" << opTime.getTerm())));
-
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(ctx, "setMinValid", minvalidNS);
+void setMinValid(OperationContext* ctx, const OpTime& opTime) {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(ctx, MODE_IX);
+ Lock::DBLock dblk(ctx->lockState(), "local", MODE_X);
+ Helpers::putSingleton(
+ ctx,
+ minvalidNS,
+ BSON("$set" << BSON("ts" << opTime.getTimestamp() << "t" << opTime.getTerm())));
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(ctx, "setMinValid", minvalidNS);
+}
- // Reads
- bool getInitialSyncFlag() {
- OperationContextImpl txn;
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(&txn, MODE_IS);
- Lock::DBLock dblk(txn.lockState(), "local", MODE_IS);
- Lock::CollectionLock lk(txn.lockState(), minvalidNS, MODE_IS);
- BSONObj mv;
- bool found = Helpers::getSingleton( &txn, minvalidNS, mv);
-
- if (found) {
- return mv[initialSyncFlagString].trueValue();
- }
- return false;
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(&txn, "getInitialSyncFlags", minvalidNS);
-
- MONGO_UNREACHABLE;
+// Reads
+bool getInitialSyncFlag() {
+ OperationContextImpl txn;
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(&txn, MODE_IS);
+ Lock::DBLock dblk(txn.lockState(), "local", MODE_IS);
+ Lock::CollectionLock lk(txn.lockState(), minvalidNS, MODE_IS);
+ BSONObj mv;
+ bool found = Helpers::getSingleton(&txn, minvalidNS, mv);
+
+ if (found) {
+ return mv[initialSyncFlagString].trueValue();
+ }
+ return false;
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(&txn, "getInitialSyncFlags", minvalidNS);
- OpTime getMinValid(OperationContext* txn) {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(txn, MODE_IS);
- Lock::DBLock dblk(txn->lockState(), "local", MODE_IS);
- Lock::CollectionLock lk(txn->lockState(), minvalidNS, MODE_IS);
- BSONObj mv;
- bool found = Helpers::getSingleton(txn, minvalidNS, mv);
- if (found) {
- return extractOpTime(mv);
- }
- return OpTime();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "getMinValid", minvalidNS);
- }
+ MONGO_UNREACHABLE;
+}
+OpTime getMinValid(OperationContext* txn) {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(txn, MODE_IS);
+ Lock::DBLock dblk(txn->lockState(), "local", MODE_IS);
+ Lock::CollectionLock lk(txn->lockState(), minvalidNS, MODE_IS);
+ BSONObj mv;
+ bool found = Helpers::getSingleton(txn, minvalidNS, mv);
+ if (found) {
+ return extractOpTime(mv);
+ }
+ return OpTime();
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "getMinValid", minvalidNS);
+}
}
}
diff --git a/src/mongo/db/repl/minvalid.h b/src/mongo/db/repl/minvalid.h
index 2118809c424..8d0ceb79613 100644
--- a/src/mongo/db/repl/minvalid.h
+++ b/src/mongo/db/repl/minvalid.h
@@ -29,39 +29,39 @@
#pragma once
namespace mongo {
- class BSONObj;
- class OperationContext;
+class BSONObj;
+class OperationContext;
namespace repl {
- class OpTime;
+class OpTime;
- /**
- * Helper functions for maintaining local.replset.minvalid collection contents.
- *
- * When a member reaches its minValid optime it is in a consistent state. Thus, minValid is
- * set as the last step in initial sync. At the beginning of initial sync, _initialSyncFlag
- * is appended onto minValid to indicate that initial sync was started but has not yet
- * completed.
- * minValid is also used during "normal" sync: the last op in each batch is used to set
- * minValid, to indicate that we are in a consistent state when the batch has been fully
- * applied.
- */
+/**
+ * Helper functions for maintaining local.replset.minvalid collection contents.
+ *
+ * When a member reaches its minValid optime it is in a consistent state. Thus, minValid is
+ * set as the last step in initial sync. At the beginning of initial sync, _initialSyncFlag
+ * is appended onto minValid to indicate that initial sync was started but has not yet
+ * completed.
+ * minValid is also used during "normal" sync: the last op in each batch is used to set
+ * minValid, to indicate that we are in a consistent state when the batch has been fully
+ * applied.
+ */
- /**
- * The initial sync flag is used to durably record the state of an initial sync; its boolean
- * value is true when an initial sync is in progress and hasn't yet completed. The flag
- * is stored as part of the local.replset.minvalid collection.
- */
- void clearInitialSyncFlag(OperationContext* txn);
- void setInitialSyncFlag(OperationContext* txn);
- bool getInitialSyncFlag();
+/**
+ * The initial sync flag is used to durably record the state of an initial sync; its boolean
+ * value is true when an initial sync is in progress and hasn't yet completed. The flag
+ * is stored as part of the local.replset.minvalid collection.
+ */
+void clearInitialSyncFlag(OperationContext* txn);
+void setInitialSyncFlag(OperationContext* txn);
+bool getInitialSyncFlag();
- /**
- * The minValid value is the earliest (minimum) Timestamp that must be applied in order to
- * consider the dataset consistent. Do not allow client reads if our last applied operation is
- * before the minValid time.
- */
- void setMinValid(OperationContext* ctx, const OpTime& opTime);
- OpTime getMinValid(OperationContext* txn);
+/**
+ * The minValid value is the earliest (minimum) Timestamp that must be applied in order to
+ * consider the dataset consistent. Do not allow client reads if our last applied operation is
+ * before the minValid time.
+ */
+void setMinValid(OperationContext* ctx, const OpTime& opTime);
+OpTime getMinValid(OperationContext* txn);
}
}
diff --git a/src/mongo/db/repl/operation_context_repl_mock.cpp b/src/mongo/db/repl/operation_context_repl_mock.cpp
index 78587bda2f3..8b66c6b800f 100644
--- a/src/mongo/db/repl/operation_context_repl_mock.cpp
+++ b/src/mongo/db/repl/operation_context_repl_mock.cpp
@@ -36,52 +36,50 @@
namespace mongo {
namespace repl {
- OperationContextReplMock::OperationContextReplMock() : OperationContextReplMock(0) {}
+OperationContextReplMock::OperationContextReplMock() : OperationContextReplMock(0) {}
- OperationContextReplMock::OperationContextReplMock(unsigned int opNum) :
- OperationContextReplMock(nullptr, opNum) {
- }
+OperationContextReplMock::OperationContextReplMock(unsigned int opNum)
+ : OperationContextReplMock(nullptr, opNum) {}
- OperationContextReplMock::OperationContextReplMock(Client* client, unsigned int opNum) :
- OperationContextNoop(client, opNum, new MMAPV1LockerImpl()),
- _checkForInterruptStatus(Status::OK()),
- _maxTimeMicrosRemaining(0),
- _writesAreReplicated(true) {
- }
+OperationContextReplMock::OperationContextReplMock(Client* client, unsigned int opNum)
+ : OperationContextNoop(client, opNum, new MMAPV1LockerImpl()),
+ _checkForInterruptStatus(Status::OK()),
+ _maxTimeMicrosRemaining(0),
+ _writesAreReplicated(true) {}
- OperationContextReplMock::~OperationContextReplMock() = default;
+OperationContextReplMock::~OperationContextReplMock() = default;
- void OperationContextReplMock::checkForInterrupt() {
- uassertStatusOK(checkForInterruptNoAssert());
- }
+void OperationContextReplMock::checkForInterrupt() {
+ uassertStatusOK(checkForInterruptNoAssert());
+}
- Status OperationContextReplMock::checkForInterruptNoAssert() {
- if (!_checkForInterruptStatus.isOK()) {
- return _checkForInterruptStatus;
- }
-
- return Status::OK();
+Status OperationContextReplMock::checkForInterruptNoAssert() {
+ if (!_checkForInterruptStatus.isOK()) {
+ return _checkForInterruptStatus;
}
- void OperationContextReplMock::setCheckForInterruptStatus(Status status) {
- _checkForInterruptStatus = std::move(status);
- }
+ return Status::OK();
+}
- uint64_t OperationContextReplMock::getRemainingMaxTimeMicros() const {
- return _maxTimeMicrosRemaining;
- }
+void OperationContextReplMock::setCheckForInterruptStatus(Status status) {
+ _checkForInterruptStatus = std::move(status);
+}
- void OperationContextReplMock::setRemainingMaxTimeMicros(uint64_t micros) {
- _maxTimeMicrosRemaining = micros;
- }
+uint64_t OperationContextReplMock::getRemainingMaxTimeMicros() const {
+ return _maxTimeMicrosRemaining;
+}
- void OperationContextReplMock::setReplicatedWrites(bool writesAreReplicated) {
- _writesAreReplicated = writesAreReplicated;
- }
+void OperationContextReplMock::setRemainingMaxTimeMicros(uint64_t micros) {
+ _maxTimeMicrosRemaining = micros;
+}
- bool OperationContextReplMock::writesAreReplicated() const {
- return _writesAreReplicated;
- }
+void OperationContextReplMock::setReplicatedWrites(bool writesAreReplicated) {
+ _writesAreReplicated = writesAreReplicated;
+}
+
+bool OperationContextReplMock::writesAreReplicated() const {
+ return _writesAreReplicated;
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/operation_context_repl_mock.h b/src/mongo/db/repl/operation_context_repl_mock.h
index 592a331fbb4..c16b55a19bf 100644
--- a/src/mongo/db/repl/operation_context_repl_mock.h
+++ b/src/mongo/db/repl/operation_context_repl_mock.h
@@ -33,41 +33,41 @@
namespace mongo {
- class Locker;
+class Locker;
namespace repl {
- /**
- * Mock implementation of OperationContext that can be used with real instances of LockManager.
- * Note this is not thread safe and the setter methods should only be called in the context
- * where access to this object is guaranteed to be serialized.
- */
- class OperationContextReplMock : public OperationContextNoop {
- public:
- OperationContextReplMock();
- explicit OperationContextReplMock(unsigned int opNum);
- OperationContextReplMock(Client* client, unsigned int opNum);
- virtual ~OperationContextReplMock();
+/**
+ * Mock implementation of OperationContext that can be used with real instances of LockManager.
+ * Note this is not thread safe and the setter methods should only be called in the context
+ * where access to this object is guaranteed to be serialized.
+ */
+class OperationContextReplMock : public OperationContextNoop {
+public:
+ OperationContextReplMock();
+ explicit OperationContextReplMock(unsigned int opNum);
+ OperationContextReplMock(Client* client, unsigned int opNum);
+ virtual ~OperationContextReplMock();
- virtual void checkForInterrupt() override;
+ virtual void checkForInterrupt() override;
- virtual Status checkForInterruptNoAssert() override;
+ virtual Status checkForInterruptNoAssert() override;
- void setCheckForInterruptStatus(Status status);
+ void setCheckForInterruptStatus(Status status);
- virtual uint64_t getRemainingMaxTimeMicros() const override;
+ virtual uint64_t getRemainingMaxTimeMicros() const override;
- void setRemainingMaxTimeMicros(uint64_t micros);
+ void setRemainingMaxTimeMicros(uint64_t micros);
- void setReplicatedWrites(bool writesAreReplicated = true) override;
+ void setReplicatedWrites(bool writesAreReplicated = true) override;
- bool writesAreReplicated() const override;
+ bool writesAreReplicated() const override;
- private:
- Status _checkForInterruptStatus;
- uint64_t _maxTimeMicrosRemaining;
- bool _writesAreReplicated;
- };
+private:
+ Status _checkForInterruptStatus;
+ uint64_t _maxTimeMicrosRemaining;
+ bool _writesAreReplicated;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 26708ee8de7..2afa1b53c52 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -87,742 +87,679 @@
namespace mongo {
- using std::endl;
- using std::string;
- using std::stringstream;
+using std::endl;
+using std::string;
+using std::stringstream;
namespace repl {
- std::string rsOplogName = "local.oplog.rs";
- std::string masterSlaveOplogName = "local.oplog.$main";
- int OPLOG_VERSION = 2;
+std::string rsOplogName = "local.oplog.rs";
+std::string masterSlaveOplogName = "local.oplog.$main";
+int OPLOG_VERSION = 2;
namespace {
- // cached copies of these...so don't rename them, drop them, etc.!!!
- Database* _localDB = nullptr;
- Collection* _localOplogCollection = nullptr;
-
- // Synchronizes the section where a new Timestamp is generated and when it actually
- // appears in the oplog.
- stdx::mutex newOpMutex;
- stdx::condition_variable newTimestampNotifier;
-
- static std::string _oplogCollectionName;
-
- // so we can fail the same way
- void checkOplogInsert( StatusWith<RecordId> result ) {
- massert( 17322,
- str::stream() << "write to oplog failed: " << result.getStatus().toString(),
- result.isOK() );
- }
+// cached copies of these...so don't rename them, drop them, etc.!!!
+Database* _localDB = nullptr;
+Collection* _localOplogCollection = nullptr;
- /**
- * Allocates an optime for a new entry in the oplog, and updates the replication coordinator to
- * reflect that new optime. Returns the new optime and the correct value of the "h" field for
- * the new oplog entry.
- *
- * NOTE: From the time this function returns to the time that the new oplog entry is written
- * to the storage system, all errors must be considered fatal. This is because the this
- * function registers the new optime with the storage system and the replication coordinator,
- * and provides no facility to revert those registrations on rollback.
- */
- std::pair<OpTime, long long> getNextOpTime(OperationContext* txn,
- Collection* oplog,
- const char* ns,
- ReplicationCoordinator* replCoord,
- const char* opstr) {
- stdx::lock_guard<stdx::mutex> lk(newOpMutex);
- Timestamp ts = getNextGlobalTimestamp();
- newTimestampNotifier.notify_all();
-
- fassert(28560, oplog->getRecordStore()->oplogDiskLocRegister(txn, ts));
-
- long long hashNew = 0;
- long long term = 0;
-
- // Set hash and term if we're in replset mode, otherwise they remain 0 in master/slave.
- if (replCoord->getReplicationMode() == ReplicationCoordinator::modeReplSet) {
- // Current term. If we're not a replset of pv=1, it could be the default value (0) or
- // the last valid term before downgrade.
- term = ReplClientInfo::forClient(txn->getClient()).getTerm();
-
- hashNew = BackgroundSync::get()->getLastAppliedHash();
-
- // Check to make sure logOp() is legal at this point.
- if (*opstr == 'n') {
- // 'n' operations are always logged
- invariant(*ns == '\0');
- // 'n' operations do not advance the hash, since they are not rolled back
- }
- else {
- // Advance the hash
- hashNew = (hashNew * 131 + ts.asLL()) * 17 + replCoord->getMyId();
+// Synchronizes the section where a new Timestamp is generated and when it actually
+// appears in the oplog.
+stdx::mutex newOpMutex;
+stdx::condition_variable newTimestampNotifier;
- BackgroundSync::get()->setLastAppliedHash(hashNew);
- }
- }
+static std::string _oplogCollectionName;
- OpTime opTime(ts, term);
- replCoord->setMyLastOptime(opTime);
- return std::pair<OpTime,long long>(opTime, hashNew);
- }
+// so we can fail the same way
+void checkOplogInsert(StatusWith<RecordId> result) {
+ massert(17322,
+ str::stream() << "write to oplog failed: " << result.getStatus().toString(),
+ result.isOK());
+}
- /**
- * This allows us to stream the oplog entry directly into data region
- * main goal is to avoid copying the o portion
- * which can be very large
- * TODO: can have this build the entire doc
- */
- class OplogDocWriter : public DocWriter {
- public:
- OplogDocWriter( const BSONObj& frame, const BSONObj& oField )
- : _frame( frame ), _oField( oField ) {
+/**
+ * Allocates an optime for a new entry in the oplog, and updates the replication coordinator to
+ * reflect that new optime. Returns the new optime and the correct value of the "h" field for
+ * the new oplog entry.
+ *
+ * NOTE: From the time this function returns to the time that the new oplog entry is written
+ * to the storage system, all errors must be considered fatal. This is because the this
+ * function registers the new optime with the storage system and the replication coordinator,
+ * and provides no facility to revert those registrations on rollback.
+ */
+std::pair<OpTime, long long> getNextOpTime(OperationContext* txn,
+ Collection* oplog,
+ const char* ns,
+ ReplicationCoordinator* replCoord,
+ const char* opstr) {
+ stdx::lock_guard<stdx::mutex> lk(newOpMutex);
+ Timestamp ts = getNextGlobalTimestamp();
+ newTimestampNotifier.notify_all();
+
+ fassert(28560, oplog->getRecordStore()->oplogDiskLocRegister(txn, ts));
+
+ long long hashNew = 0;
+ long long term = 0;
+
+ // Set hash and term if we're in replset mode, otherwise they remain 0 in master/slave.
+ if (replCoord->getReplicationMode() == ReplicationCoordinator::modeReplSet) {
+ // Current term. If we're not a replset of pv=1, it could be the default value (0) or
+ // the last valid term before downgrade.
+ term = ReplClientInfo::forClient(txn->getClient()).getTerm();
+
+ hashNew = BackgroundSync::get()->getLastAppliedHash();
+
+ // Check to make sure logOp() is legal at this point.
+ if (*opstr == 'n') {
+ // 'n' operations are always logged
+ invariant(*ns == '\0');
+ // 'n' operations do not advance the hash, since they are not rolled back
+ } else {
+ // Advance the hash
+ hashNew = (hashNew * 131 + ts.asLL()) * 17 + replCoord->getMyId();
+
+ BackgroundSync::get()->setLastAppliedHash(hashNew);
}
+ }
- ~OplogDocWriter(){}
-
- void writeDocument( char* start ) const {
- char* buf = start;
-
- memcpy( buf, _frame.objdata(), _frame.objsize() - 1 ); // don't copy final EOO
+ OpTime opTime(ts, term);
+ replCoord->setMyLastOptime(opTime);
+ return std::pair<OpTime, long long>(opTime, hashNew);
+}
- reinterpret_cast<int*>( buf )[0] = documentSize();
+/**
+ * This allows us to stream the oplog entry directly into data region
+ * main goal is to avoid copying the o portion
+ * which can be very large
+ * TODO: can have this build the entire doc
+ */
+class OplogDocWriter : public DocWriter {
+public:
+ OplogDocWriter(const BSONObj& frame, const BSONObj& oField) : _frame(frame), _oField(oField) {}
- buf += ( _frame.objsize() - 1 );
- buf[0] = (char)Object;
- buf[1] = 'o';
- buf[2] = 0;
- memcpy( buf+3, _oField.objdata(), _oField.objsize() );
- buf += 3 + _oField.objsize();
- buf[0] = EOO;
+ ~OplogDocWriter() {}
- verify( static_cast<size_t>( ( buf + 1 ) - start ) == documentSize() ); // DEV?
- }
+ void writeDocument(char* start) const {
+ char* buf = start;
- size_t documentSize() const {
- return _frame.objsize() + _oField.objsize() + 1 /* type */ + 2 /* "o" */;
- }
+ memcpy(buf, _frame.objdata(), _frame.objsize() - 1); // don't copy final EOO
- private:
- BSONObj _frame;
- BSONObj _oField;
- };
+ reinterpret_cast<int*>(buf)[0] = documentSize();
-} // namespace
+ buf += (_frame.objsize() - 1);
+ buf[0] = (char)Object;
+ buf[1] = 'o';
+ buf[2] = 0;
+ memcpy(buf + 3, _oField.objdata(), _oField.objsize());
+ buf += 3 + _oField.objsize();
+ buf[0] = EOO;
- void setOplogCollectionName() {
- if (getGlobalReplicationCoordinator()->getReplicationMode() ==
- ReplicationCoordinator::modeReplSet) {
- _oplogCollectionName = rsOplogName;
- }
- else {
- _oplogCollectionName = masterSlaveOplogName;
- }
+ verify(static_cast<size_t>((buf + 1) - start) == documentSize()); // DEV?
}
- /* we write to local.oplog.rs:
- { ts : ..., h: ..., v: ..., op: ..., etc }
- ts: an OpTime timestamp
- h: hash
- v: version
- op:
- "i" insert
- "u" update
- "d" delete
- "c" db cmd
- "db" declares presence of a database (ns is set to the db name + '.')
- "n" no op
-
- bb param:
- if not null, specifies a boolean to pass along to the other side as b: param.
- used for "justOne" or "upsert" flags on 'd', 'u'
-
- */
+ size_t documentSize() const {
+ return _frame.objsize() + _oField.objsize() + 1 /* type */ + 2 /* "o" */;
+ }
- void _logOp(OperationContext* txn,
- const char *opstr,
- const char *ns,
- const BSONObj& obj,
- BSONObj *o2,
- bool fromMigrate) {
- NamespaceString nss(ns);
- if (nss.db() == "local") {
- return;
- }
+private:
+ BSONObj _frame;
+ BSONObj _oField;
+};
- if (nss.isSystemDotProfile()) {
- return;
- }
+} // namespace
- if (!getGlobalReplicationCoordinator()->isReplEnabled()) {
- return;
- }
+void setOplogCollectionName() {
+ if (getGlobalReplicationCoordinator()->getReplicationMode() ==
+ ReplicationCoordinator::modeReplSet) {
+ _oplogCollectionName = rsOplogName;
+ } else {
+ _oplogCollectionName = masterSlaveOplogName;
+ }
+}
+
+/* we write to local.oplog.rs:
+ { ts : ..., h: ..., v: ..., op: ..., etc }
+ ts: an OpTime timestamp
+ h: hash
+ v: version
+ op:
+ "i" insert
+ "u" update
+ "d" delete
+ "c" db cmd
+ "db" declares presence of a database (ns is set to the db name + '.')
+ "n" no op
+
+ bb param:
+ if not null, specifies a boolean to pass along to the other side as b: param.
+ used for "justOne" or "upsert" flags on 'd', 'u'
- if (!txn->writesAreReplicated()) {
- return;
- }
+*/
- fassert(28626, txn->recoveryUnit());
+void _logOp(OperationContext* txn,
+ const char* opstr,
+ const char* ns,
+ const BSONObj& obj,
+ BSONObj* o2,
+ bool fromMigrate) {
+ NamespaceString nss(ns);
+ if (nss.db() == "local") {
+ return;
+ }
- Lock::DBLock lk(txn->lockState(), "local", MODE_IX);
+ if (nss.isSystemDotProfile()) {
+ return;
+ }
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (!getGlobalReplicationCoordinator()->isReplEnabled()) {
+ return;
+ }
- if (ns[0] && replCoord->getReplicationMode() == ReplicationCoordinator::modeReplSet &&
- !replCoord->canAcceptWritesFor(nss)) {
- severe() << "logOp() but can't accept write to collection " << ns;
- fassertFailed(17405);
- }
- Lock::CollectionLock lk2(txn->lockState(), _oplogCollectionName, MODE_IX);
+ if (!txn->writesAreReplicated()) {
+ return;
+ }
+ fassert(28626, txn->recoveryUnit());
- if (_localOplogCollection == nullptr) {
- OldClientContext ctx(txn, _oplogCollectionName);
- _localDB = ctx.db();
- invariant(_localDB);
- _localOplogCollection = _localDB->getCollection(_oplogCollectionName);
- massert(13347,
- "the oplog collection " + _oplogCollectionName +
- " missing. did you drop it? if so, restart the server",
- _localOplogCollection);
- }
+ Lock::DBLock lk(txn->lockState(), "local", MODE_IX);
- std::pair<OpTime, long long> slot = getNextOpTime(txn,
- _localOplogCollection,
- ns,
- replCoord,
- opstr);
-
- /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
- instead we do a single copy to the destination position in the memory mapped file.
- */
-
- BSONObjBuilder b(256);
- b.append("ts", slot.first.getTimestamp());
- b.append("t", slot.first.getTerm());
- b.append("h", slot.second);
- b.append("v", OPLOG_VERSION);
- b.append("op", opstr);
- b.append("ns", ns);
- if (fromMigrate) {
- b.appendBool("fromMigrate", true);
- }
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if ( o2 ) {
- b.append("o2", *o2);
- }
- BSONObj partial = b.done();
+ if (ns[0] && replCoord->getReplicationMode() == ReplicationCoordinator::modeReplSet &&
+ !replCoord->canAcceptWritesFor(nss)) {
+ severe() << "logOp() but can't accept write to collection " << ns;
+ fassertFailed(17405);
+ }
+ Lock::CollectionLock lk2(txn->lockState(), _oplogCollectionName, MODE_IX);
- OplogDocWriter writer( partial, obj );
- checkOplogInsert( _localOplogCollection->insertDocument( txn, &writer, false ) );
- ReplClientInfo::forClient(txn->getClient()).setLastOp( slot.first );
+ if (_localOplogCollection == nullptr) {
+ OldClientContext ctx(txn, _oplogCollectionName);
+ _localDB = ctx.db();
+ invariant(_localDB);
+ _localOplogCollection = _localDB->getCollection(_oplogCollectionName);
+ massert(13347,
+ "the oplog collection " + _oplogCollectionName +
+ " missing. did you drop it? if so, restart the server",
+ _localOplogCollection);
}
- OpTime writeOpsToOplog(OperationContext* txn, const std::deque<BSONObj>& ops) {
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
-
- OpTime lastOptime;
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- lastOptime = replCoord->getMyLastOptime();
- invariant(!ops.empty());
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock lk(txn->lockState(), "local", MODE_X);
-
- if ( _localOplogCollection == 0 ) {
- OldClientContext ctx(txn, rsOplogName);
-
- _localDB = ctx.db();
- verify( _localDB );
- _localOplogCollection = _localDB->getCollection(rsOplogName);
- massert(13389,
- "local.oplog.rs missing. did you drop it? if so restart server",
- _localOplogCollection);
- }
+ std::pair<OpTime, long long> slot =
+ getNextOpTime(txn, _localOplogCollection, ns, replCoord, opstr);
- OldClientContext ctx(txn, rsOplogName, _localDB);
- WriteUnitOfWork wunit(txn);
+ /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
+ instead we do a single copy to the destination position in the memory mapped file.
+ */
- for (std::deque<BSONObj>::const_iterator it = ops.begin();
- it != ops.end();
- ++it) {
- const BSONObj& op = *it;
- const OpTime optime = extractOpTime(op);
+ BSONObjBuilder b(256);
+ b.append("ts", slot.first.getTimestamp());
+ b.append("t", slot.first.getTerm());
+ b.append("h", slot.second);
+ b.append("v", OPLOG_VERSION);
+ b.append("op", opstr);
+ b.append("ns", ns);
+ if (fromMigrate) {
+ b.appendBool("fromMigrate", true);
+ }
- checkOplogInsert(_localOplogCollection->insertDocument(txn, op, false));
+ if (o2) {
+ b.append("o2", *o2);
+ }
+ BSONObj partial = b.done();
- if (!(lastOptime < optime)) {
- severe() << "replication oplog stream went back in time. "
- "previous timestamp: " << lastOptime << " newest timestamp: " << optime
- << ". Op being applied: " << op;
- fassertFailedNoTrace(18905);
- }
- lastOptime = optime;
- }
- wunit.commit();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "writeOps", _localOplogCollection->ns().ns());
+ OplogDocWriter writer(partial, obj);
+ checkOplogInsert(_localOplogCollection->insertDocument(txn, &writer, false));
- BackgroundSync* bgsync = BackgroundSync::get();
- // Keep this up-to-date, in case we step up to primary.
- long long hash = ops.back()["h"].numberLong();
- bgsync->setLastAppliedHash(hash);
+ ReplClientInfo::forClient(txn->getClient()).setLastOp(slot.first);
+}
- return lastOptime;
- }
+OpTime writeOpsToOplog(OperationContext* txn, const std::deque<BSONObj>& ops) {
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- void createOplog(OperationContext* txn) {
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
+ OpTime lastOptime;
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ lastOptime = replCoord->getMyLastOptime();
+ invariant(!ops.empty());
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock lk(txn->lockState(), "local", MODE_X);
- const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
- bool rs = !replSettings.replSet.empty();
+ if (_localOplogCollection == 0) {
+ OldClientContext ctx(txn, rsOplogName);
- OldClientContext ctx(txn, _oplogCollectionName);
- Collection* collection = ctx.db()->getCollection( _oplogCollectionName );
+ _localDB = ctx.db();
+ verify(_localDB);
+ _localOplogCollection = _localDB->getCollection(rsOplogName);
+ massert(13389,
+ "local.oplog.rs missing. did you drop it? if so restart server",
+ _localOplogCollection);
+ }
- if ( collection ) {
+ OldClientContext ctx(txn, rsOplogName, _localDB);
+ WriteUnitOfWork wunit(txn);
- if (replSettings.oplogSize != 0) {
- const CollectionOptions oplogOpts =
- collection->getCatalogEntry()->getCollectionOptions(txn);
+ for (std::deque<BSONObj>::const_iterator it = ops.begin(); it != ops.end(); ++it) {
+ const BSONObj& op = *it;
+ const OpTime optime = extractOpTime(op);
- int o = (int)(oplogOpts.cappedSize / ( 1024 * 1024 ) );
- int n = (int)(replSettings.oplogSize / (1024 * 1024));
- if ( n != o ) {
- stringstream ss;
- ss << "cmdline oplogsize (" << n << ") different than existing (" << o << ") see: http://dochub.mongodb.org/core/increase-oplog";
- log() << ss.str() << endl;
- throw UserException( 13257 , ss.str() );
- }
- }
+ checkOplogInsert(_localOplogCollection->insertDocument(txn, op, false));
- if ( !rs )
- initTimestampFromOplog(txn, _oplogCollectionName);
- return;
+ if (!(lastOptime < optime)) {
+ severe() << "replication oplog stream went back in time. "
+ "previous timestamp: " << lastOptime << " newest timestamp: " << optime
+ << ". Op being applied: " << op;
+ fassertFailedNoTrace(18905);
+ }
+ lastOptime = optime;
}
-
- /* create an oplog collection, if it doesn't yet exist. */
- long long sz = 0;
- if ( replSettings.oplogSize != 0 ) {
- sz = replSettings.oplogSize;
+ wunit.commit();
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "writeOps", _localOplogCollection->ns().ns());
+
+ BackgroundSync* bgsync = BackgroundSync::get();
+ // Keep this up-to-date, in case we step up to primary.
+ long long hash = ops.back()["h"].numberLong();
+ bgsync->setLastAppliedHash(hash);
+
+ return lastOptime;
+}
+
+void createOplog(OperationContext* txn) {
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+
+ const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
+ bool rs = !replSettings.replSet.empty();
+
+ OldClientContext ctx(txn, _oplogCollectionName);
+ Collection* collection = ctx.db()->getCollection(_oplogCollectionName);
+
+ if (collection) {
+ if (replSettings.oplogSize != 0) {
+ const CollectionOptions oplogOpts =
+ collection->getCatalogEntry()->getCollectionOptions(txn);
+
+ int o = (int)(oplogOpts.cappedSize / (1024 * 1024));
+ int n = (int)(replSettings.oplogSize / (1024 * 1024));
+ if (n != o) {
+ stringstream ss;
+ ss << "cmdline oplogsize (" << n << ") different than existing (" << o
+ << ") see: http://dochub.mongodb.org/core/increase-oplog";
+ log() << ss.str() << endl;
+ throw UserException(13257, ss.str());
+ }
}
- else {
- /* not specified. pick a default size */
- sz = 50LL * 1024LL * 1024LL;
- if ( sizeof(int *) >= 8 ) {
+
+ if (!rs)
+ initTimestampFromOplog(txn, _oplogCollectionName);
+ return;
+ }
+
+ /* create an oplog collection, if it doesn't yet exist. */
+ long long sz = 0;
+ if (replSettings.oplogSize != 0) {
+ sz = replSettings.oplogSize;
+ } else {
+ /* not specified. pick a default size */
+ sz = 50LL * 1024LL * 1024LL;
+ if (sizeof(int*) >= 8) {
#if defined(__APPLE__)
- // typically these are desktops (dev machines), so keep it smallish
- sz = (256-64) * 1024 * 1024;
+ // typically these are desktops (dev machines), so keep it smallish
+ sz = (256 - 64) * 1024 * 1024;
#else
- sz = 990LL * 1024 * 1024;
- double free =
- File::freeSpace(storageGlobalParams.dbpath); //-1 if call not supported.
- long long fivePct = static_cast<long long>( free * 0.05 );
- if ( fivePct > sz )
- sz = fivePct;
- // we use 5% of free space up to 50GB (1TB free)
- static long long upperBound = 50LL * 1024 * 1024 * 1024;
- if (fivePct > upperBound)
- sz = upperBound;
+ sz = 990LL * 1024 * 1024;
+ double free = File::freeSpace(storageGlobalParams.dbpath); //-1 if call not supported.
+ long long fivePct = static_cast<long long>(free * 0.05);
+ if (fivePct > sz)
+ sz = fivePct;
+ // we use 5% of free space up to 50GB (1TB free)
+ static long long upperBound = 50LL * 1024 * 1024 * 1024;
+ if (fivePct > upperBound)
+ sz = upperBound;
#endif
- }
}
-
- log() << "******" << endl;
- log() << "creating replication oplog of size: " << (int)( sz / ( 1024 * 1024 ) ) << "MB..." << endl;
-
- CollectionOptions options;
- options.capped = true;
- options.cappedSize = sz;
- options.autoIndexId = CollectionOptions::NO;
-
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- WriteUnitOfWork uow( txn );
- invariant(ctx.db()->createCollection(txn, _oplogCollectionName, options));
- if( !rs )
- getGlobalServiceContext()->getOpObserver()->onOpMessage(txn, BSONObj());
- uow.commit();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createCollection", _oplogCollectionName);
-
- /* sync here so we don't get any surprising lag later when we try to sync */
- StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
- storageEngine->flushAllFiles(true);
- log() << "******" << endl;
}
- // -------------------------------------
+ log() << "******" << endl;
+ log() << "creating replication oplog of size: " << (int)(sz / (1024 * 1024)) << "MB..." << endl;
-namespace {
- NamespaceString parseNs(const string& ns, const BSONObj& cmdObj) {
- BSONElement first = cmdObj.firstElement();
- uassert(28635,
- "no collection name specified",
- first.canonicalType() == canonicalizeBSONType(mongo::String)
- && first.valuestrsize() > 0);
- std::string coll = first.valuestr();
- return NamespaceString(NamespaceString(ns).db().toString(), coll);
+ CollectionOptions options;
+ options.capped = true;
+ options.cappedSize = sz;
+ options.autoIndexId = CollectionOptions::NO;
+
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ WriteUnitOfWork uow(txn);
+ invariant(ctx.db()->createCollection(txn, _oplogCollectionName, options));
+ if (!rs)
+ getGlobalServiceContext()->getOpObserver()->onOpMessage(txn, BSONObj());
+ uow.commit();
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createCollection", _oplogCollectionName);
- using OpApplyFn = stdx::function<Status (OperationContext*, const char*, BSONObj&)>;
+ /* sync here so we don't get any surprising lag later when we try to sync */
+ StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
+ storageEngine->flushAllFiles(true);
+ log() << "******" << endl;
+}
- struct ApplyOpMetadata {
- OpApplyFn applyFunc;
- std::set<ErrorCodes::Error> acceptableErrors;
+// -------------------------------------
- ApplyOpMetadata(OpApplyFn fun) {
- applyFunc = fun;
- }
+namespace {
+NamespaceString parseNs(const string& ns, const BSONObj& cmdObj) {
+ BSONElement first = cmdObj.firstElement();
+ uassert(28635,
+ "no collection name specified",
+ first.canonicalType() == canonicalizeBSONType(mongo::String) &&
+ first.valuestrsize() > 0);
+ std::string coll = first.valuestr();
+ return NamespaceString(NamespaceString(ns).db().toString(), coll);
+}
+
+using OpApplyFn = stdx::function<Status(OperationContext*, const char*, BSONObj&)>;
+
+struct ApplyOpMetadata {
+ OpApplyFn applyFunc;
+ std::set<ErrorCodes::Error> acceptableErrors;
+
+ ApplyOpMetadata(OpApplyFn fun) {
+ applyFunc = fun;
+ }
- ApplyOpMetadata(OpApplyFn fun, std::set<ErrorCodes::Error> theAcceptableErrors) {
- applyFunc = fun;
- acceptableErrors = theAcceptableErrors;
+ ApplyOpMetadata(OpApplyFn fun, std::set<ErrorCodes::Error> theAcceptableErrors) {
+ applyFunc = fun;
+ acceptableErrors = theAcceptableErrors;
+ }
+};
+
+std::map<std::string, ApplyOpMetadata> opsMap = {
+ {"create",
+ {[](OperationContext* txn, const char* ns, BSONObj& cmd)
+ -> Status { return createCollection(txn, NamespaceString(ns).db().toString(), cmd); },
+ {ErrorCodes::NamespaceExists}}},
+ {"collMod",
+ {[](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
+ BSONObjBuilder resultWeDontCareAbout;
+ return collMod(txn, parseNs(ns, cmd), cmd, &resultWeDontCareAbout);
+ }}},
+ {"dropDatabase",
+ {[](OperationContext* txn, const char* ns, BSONObj& cmd)
+ -> Status { return dropDatabase(txn, NamespaceString(ns).db().toString()); },
+ {ErrorCodes::DatabaseNotFound}}},
+ {"drop",
+ {[](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
+ BSONObjBuilder resultWeDontCareAbout;
+ return dropCollection(txn, parseNs(ns, cmd), resultWeDontCareAbout);
+ },
+ // IllegalOperation is necessary because in 3.0 we replicate drops of system.profile
+ // TODO(dannenberg) remove IllegalOperation once we no longer need 3.0 compatibility
+ {ErrorCodes::NamespaceNotFound, ErrorCodes::IllegalOperation}}},
+ // deleteIndex(es) is deprecated but still works as of April 10, 2015
+ {"deleteIndex",
+ {[](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
+ BSONObjBuilder resultWeDontCareAbout;
+ return dropIndexes(txn, parseNs(ns, cmd), cmd, &resultWeDontCareAbout);
+ },
+ {ErrorCodes::NamespaceNotFound, ErrorCodes::IndexNotFound}}},
+ {"deleteIndexes",
+ {[](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
+ BSONObjBuilder resultWeDontCareAbout;
+ return dropIndexes(txn, parseNs(ns, cmd), cmd, &resultWeDontCareAbout);
+ },
+ {ErrorCodes::NamespaceNotFound, ErrorCodes::IndexNotFound}}},
+ {"dropIndex",
+ {[](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
+ BSONObjBuilder resultWeDontCareAbout;
+ return dropIndexes(txn, parseNs(ns, cmd), cmd, &resultWeDontCareAbout);
+ },
+ {ErrorCodes::NamespaceNotFound, ErrorCodes::IndexNotFound}}},
+ {"dropIndexes",
+ {[](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
+ BSONObjBuilder resultWeDontCareAbout;
+ return dropIndexes(txn, parseNs(ns, cmd), cmd, &resultWeDontCareAbout);
+ },
+ {ErrorCodes::NamespaceNotFound, ErrorCodes::IndexNotFound}}},
+ {"renameCollection",
+ {[](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
+ return renameCollection(txn,
+ NamespaceString(cmd.firstElement().valuestrsafe()),
+ NamespaceString(cmd["to"].valuestrsafe()),
+ cmd["stayTemp"].trueValue(),
+ cmd["dropTarget"].trueValue());
+ },
+ {ErrorCodes::NamespaceNotFound, ErrorCodes::NamespaceExists}}},
+ {"applyOps",
+ {[](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
+ BSONObjBuilder resultWeDontCareAbout;
+ return applyOps(txn, nsToDatabase(ns), cmd, &resultWeDontCareAbout);
+ },
+ {ErrorCodes::UnknownError}}},
+ {"convertToCapped",
+ {[](OperationContext* txn, const char* ns, BSONObj& cmd)
+ -> Status { return convertToCapped(txn, parseNs(ns, cmd), cmd["size"].number()); }}},
+ {"emptycapped",
+ {[](OperationContext* txn, const char* ns, BSONObj& cmd)
+ -> Status { return emptyCapped(txn, parseNs(ns, cmd)); }}},
+};
+
+} // namespace
+
+// @return failure status if an update should have happened and the document DNE.
+// See replset initial sync code.
+Status applyOperation_inlock(OperationContext* txn,
+ Database* db,
+ const BSONObj& op,
+ bool convertUpdateToUpsert) {
+ LOG(3) << "applying op: " << op << endl;
+
+ OpCounters* opCounters = txn->writesAreReplicated() ? &globalOpCounters : &replOpCounters;
+
+ const char* names[] = {"o", "ns", "op", "b", "o2"};
+ BSONElement fields[5];
+ op.getFields(5, names, fields);
+ BSONElement& fieldO = fields[0];
+ BSONElement& fieldNs = fields[1];
+ BSONElement& fieldOp = fields[2];
+ BSONElement& fieldB = fields[3];
+ BSONElement& fieldO2 = fields[4];
+
+ BSONObj o;
+ if (fieldO.isABSONObj())
+ o = fieldO.embeddedObject();
+
+ const char* ns = fieldNs.valuestrsafe();
+
+ BSONObj o2;
+ if (fieldO2.isABSONObj())
+ o2 = fieldO2.Obj();
+
+ bool valueB = fieldB.booleanSafe();
+
+ if (nsIsFull(ns)) {
+ if (supportsDocLocking()) {
+ // WiredTiger, and others requires MODE_IX since the applier threads driving
+ // this allow writes to the same collection on any thread.
+ invariant(txn->lockState()->isCollectionLockedForMode(ns, MODE_IX));
+ } else {
+ // mmapV1 ensures that all operations to the same collection are executed from
+ // the same worker thread, so it takes an exclusive lock (MODE_X)
+ invariant(txn->lockState()->isCollectionLockedForMode(ns, MODE_X));
}
- };
-
- std::map<std::string, ApplyOpMetadata> opsMap = {
- {"create",
- {
- [](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
- return createCollection(txn, NamespaceString(ns).db().toString(), cmd);
- },
- {ErrorCodes::NamespaceExists}
- }
- },
- {"collMod",
- {
- [](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
- BSONObjBuilder resultWeDontCareAbout;
- return collMod(txn, parseNs(ns, cmd), cmd, &resultWeDontCareAbout);
- }
- }
- },
- {"dropDatabase",
- {
- [](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
- return dropDatabase(txn, NamespaceString(ns).db().toString());
- },
- {ErrorCodes::DatabaseNotFound}
- }
- },
- {"drop",
- {
- [](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
- BSONObjBuilder resultWeDontCareAbout;
- return dropCollection(txn, parseNs(ns, cmd), resultWeDontCareAbout);
- },
- // IllegalOperation is necessary because in 3.0 we replicate drops of system.profile
- // TODO(dannenberg) remove IllegalOperation once we no longer need 3.0 compatibility
- {ErrorCodes::NamespaceNotFound, ErrorCodes::IllegalOperation}
- }
- },
- // deleteIndex(es) is deprecated but still works as of April 10, 2015
- {"deleteIndex",
- {
- [](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
- BSONObjBuilder resultWeDontCareAbout;
- return dropIndexes(txn, parseNs(ns, cmd), cmd, &resultWeDontCareAbout);
- },
- {ErrorCodes::NamespaceNotFound, ErrorCodes::IndexNotFound}
- }
- },
- {"deleteIndexes",
- {
- [](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
- BSONObjBuilder resultWeDontCareAbout;
- return dropIndexes(txn, parseNs(ns, cmd), cmd, &resultWeDontCareAbout);
- },
- {ErrorCodes::NamespaceNotFound, ErrorCodes::IndexNotFound}
- }
- },
- {"dropIndex",
- {
- [](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
- BSONObjBuilder resultWeDontCareAbout;
- return dropIndexes(txn, parseNs(ns, cmd), cmd, &resultWeDontCareAbout);
- },
- {ErrorCodes::NamespaceNotFound, ErrorCodes::IndexNotFound}
- }
- },
- {"dropIndexes",
- {
- [](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
- BSONObjBuilder resultWeDontCareAbout;
- return dropIndexes(txn, parseNs(ns, cmd), cmd, &resultWeDontCareAbout);
- },
- {ErrorCodes::NamespaceNotFound, ErrorCodes::IndexNotFound}
- }
- },
- {"renameCollection",
- {
- [](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
- return renameCollection(txn,
- NamespaceString(cmd.firstElement().valuestrsafe()),
- NamespaceString(cmd["to"].valuestrsafe()),
- cmd["stayTemp"].trueValue(),
- cmd["dropTarget"].trueValue());
- },
- {ErrorCodes::NamespaceNotFound, ErrorCodes::NamespaceExists}
- }
- },
- {"applyOps",
- {
- [](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
- BSONObjBuilder resultWeDontCareAbout;
- return applyOps(txn, nsToDatabase(ns), cmd, &resultWeDontCareAbout);
- },
- {ErrorCodes::UnknownError}
- }
- },
- {"convertToCapped",
- {
- [](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
- return convertToCapped(txn,
- parseNs(ns, cmd),
- cmd["size"].number());
- }
- }
- },
- {"emptycapped",
- {
- [](OperationContext* txn, const char* ns, BSONObj& cmd) -> Status {
- return emptyCapped(txn, parseNs(ns, cmd));
- }
- }
- },
- };
-
-} // namespace
-
- // @return failure status if an update should have happened and the document DNE.
- // See replset initial sync code.
- Status applyOperation_inlock(OperationContext* txn,
- Database* db,
- const BSONObj& op,
- bool convertUpdateToUpsert) {
- LOG(3) << "applying op: " << op << endl;
-
- OpCounters * opCounters = txn->writesAreReplicated() ? &globalOpCounters : &replOpCounters;
-
- const char *names[] = { "o", "ns", "op", "b", "o2" };
- BSONElement fields[5];
- op.getFields(5, names, fields);
- BSONElement& fieldO = fields[0];
- BSONElement& fieldNs = fields[1];
- BSONElement& fieldOp = fields[2];
- BSONElement& fieldB = fields[3];
- BSONElement& fieldO2 = fields[4];
-
- BSONObj o;
- if( fieldO.isABSONObj() )
- o = fieldO.embeddedObject();
-
- const char *ns = fieldNs.valuestrsafe();
-
- BSONObj o2;
- if (fieldO2.isABSONObj())
- o2 = fieldO2.Obj();
-
- bool valueB = fieldB.booleanSafe();
-
- if (nsIsFull(ns)) {
- if (supportsDocLocking()) {
- // WiredTiger, and others requires MODE_IX since the applier threads driving
- // this allow writes to the same collection on any thread.
- invariant(txn->lockState()->isCollectionLockedForMode(ns, MODE_IX));
+ }
+ Collection* collection = db->getCollection(ns);
+ IndexCatalog* indexCatalog = collection == nullptr ? nullptr : collection->getIndexCatalog();
+
+ // operation type -- see logOp() comments for types
+ const char* opType = fieldOp.valuestrsafe();
+ invariant(*opType != 'c'); // commands are processed in applyCommand_inlock()
+
+ if (*opType == 'i') {
+ opCounters->gotInsert();
+
+ const char* p = strchr(ns, '.');
+ if (p && nsToCollectionSubstring(p) == "system.indexes") {
+ if (o["background"].trueValue()) {
+ IndexBuilder* builder = new IndexBuilder(o);
+ // This spawns a new thread and returns immediately.
+ builder->go();
+ // Wait for thread to start and register itself
+ Lock::TempRelease release(txn->lockState());
+ IndexBuilder::waitForBgIndexStarting();
} else {
- // mmapV1 ensures that all operations to the same collection are executed from
- // the same worker thread, so it takes an exclusive lock (MODE_X)
- invariant(txn->lockState()->isCollectionLockedForMode(ns, MODE_X));
+ IndexBuilder builder(o);
+ Status status = builder.buildInForeground(txn, db);
+ uassertStatusOK(status);
}
- }
- Collection* collection = db->getCollection( ns );
- IndexCatalog* indexCatalog = collection == nullptr ? nullptr : collection->getIndexCatalog();
-
- // operation type -- see logOp() comments for types
- const char *opType = fieldOp.valuestrsafe();
- invariant(*opType != 'c'); // commands are processed in applyCommand_inlock()
-
- if ( *opType == 'i' ) {
- opCounters->gotInsert();
-
- const char *p = strchr(ns, '.');
- if ( p && nsToCollectionSubstring( p ) == "system.indexes" ) {
- if (o["background"].trueValue()) {
- IndexBuilder* builder = new IndexBuilder(o);
- // This spawns a new thread and returns immediately.
- builder->go();
- // Wait for thread to start and register itself
- Lock::TempRelease release(txn->lockState());
- IndexBuilder::waitForBgIndexStarting();
- }
- else {
- IndexBuilder builder(o);
- Status status = builder.buildInForeground(txn, db);
- uassertStatusOK(status);
- }
- }
- else {
- // do upserts for inserts as we might get replayed more than once
- OpDebug debug;
-
- uassert(ErrorCodes::NamespaceNotFound, str::stream() <<
- "Failed to apply insert due to missing collection: " << op.toString(),
- collection);
-
- // No _id.
- // This indicates an issue with the upstream server:
- // The oplog entry is corrupted; or
- // The version of the upstream server is obsolete.
- uassert(ErrorCodes::NoSuchKey, str::stream() <<
- "Failed to apply insert due to missing _id: " << op.toString(),
- o.hasField("_id"));
-
- // TODO: It may be better to do an insert here, and then catch the duplicate
- // key exception and do update then. Very few upserts will not be inserts...
- BSONObjBuilder b;
- b.append(o.getField("_id"));
-
- const NamespaceString requestNs(ns);
- UpdateRequest request(requestNs);
-
- request.setQuery(b.done());
- request.setUpdates(o);
- request.setUpsert();
- UpdateLifecycleImpl updateLifecycle(true, requestNs);
- request.setLifecycle(&updateLifecycle);
-
- update(txn, db, request, &debug);
- }
- }
- else if ( *opType == 'u' ) {
- opCounters->gotUpdate();
-
+ } else {
+ // do upserts for inserts as we might get replayed more than once
OpDebug debug;
- BSONObj updateCriteria = o2;
- const bool upsert = valueB || convertUpdateToUpsert;
- uassert(ErrorCodes::NoSuchKey, str::stream() <<
- "Failed to apply update due to missing _id: " << op.toString(),
- updateCriteria.hasField("_id"));
+ uassert(ErrorCodes::NamespaceNotFound,
+ str::stream() << "Failed to apply insert due to missing collection: "
+ << op.toString(),
+ collection);
+
+ // No _id.
+ // This indicates an issue with the upstream server:
+ // The oplog entry is corrupted; or
+ // The version of the upstream server is obsolete.
+ uassert(ErrorCodes::NoSuchKey,
+ str::stream() << "Failed to apply insert due to missing _id: " << op.toString(),
+ o.hasField("_id"));
+
+ // TODO: It may be better to do an insert here, and then catch the duplicate
+ // key exception and do update then. Very few upserts will not be inserts...
+ BSONObjBuilder b;
+ b.append(o.getField("_id"));
const NamespaceString requestNs(ns);
UpdateRequest request(requestNs);
- request.setQuery(updateCriteria);
+ request.setQuery(b.done());
request.setUpdates(o);
- request.setUpsert(upsert);
+ request.setUpsert();
UpdateLifecycleImpl updateLifecycle(true, requestNs);
request.setLifecycle(&updateLifecycle);
- UpdateResult ur = update(txn, db, request, &debug);
-
- if( ur.numMatched == 0 ) {
- if( ur.modifiers ) {
- if( updateCriteria.nFields() == 1 ) {
- // was a simple { _id : ... } update criteria
- string msg = str::stream() << "failed to apply update: " << op.toString();
- error() << msg;
- return Status(ErrorCodes::OperationFailed, msg);
- }
- // Need to check to see if it isn't present so we can exit early with a
- // failure. Note that adds some overhead for this extra check in some cases,
- // such as an updateCriteria
- // of the form
- // { _id:..., { x : {$size:...} }
- // thus this is not ideal.
- if (collection == NULL ||
- (indexCatalog->haveIdIndex(txn) &&
- Helpers::findById(txn, collection, updateCriteria).isNull()) ||
- // capped collections won't have an _id index
- (!indexCatalog->haveIdIndex(txn) &&
- Helpers::findOne(txn, collection, updateCriteria, false).isNull())) {
- string msg = str::stream() << "couldn't find doc: " << op.toString();
- error() << msg;
- return Status(ErrorCodes::OperationFailed, msg);
- }
-
- // Otherwise, it's present; zero objects were updated because of additional specifiers
- // in the query for idempotence
+ update(txn, db, request, &debug);
+ }
+ } else if (*opType == 'u') {
+ opCounters->gotUpdate();
+
+ OpDebug debug;
+ BSONObj updateCriteria = o2;
+ const bool upsert = valueB || convertUpdateToUpsert;
+
+ uassert(ErrorCodes::NoSuchKey,
+ str::stream() << "Failed to apply update due to missing _id: " << op.toString(),
+ updateCriteria.hasField("_id"));
+
+ const NamespaceString requestNs(ns);
+ UpdateRequest request(requestNs);
+
+ request.setQuery(updateCriteria);
+ request.setUpdates(o);
+ request.setUpsert(upsert);
+ UpdateLifecycleImpl updateLifecycle(true, requestNs);
+ request.setLifecycle(&updateLifecycle);
+
+ UpdateResult ur = update(txn, db, request, &debug);
+
+ if (ur.numMatched == 0) {
+ if (ur.modifiers) {
+ if (updateCriteria.nFields() == 1) {
+ // was a simple { _id : ... } update criteria
+ string msg = str::stream() << "failed to apply update: " << op.toString();
+ error() << msg;
+ return Status(ErrorCodes::OperationFailed, msg);
}
- else {
- // this could happen benignly on an oplog duplicate replay of an upsert
- // (because we are idempotent),
- // if an regular non-mod update fails the item is (presumably) missing.
- if( !upsert ) {
- string msg = str::stream() << "update of non-mod failed: " << op.toString();
- error() << msg;
- return Status(ErrorCodes::OperationFailed, msg);
- }
+ // Need to check to see if it isn't present so we can exit early with a
+ // failure. Note that adds some overhead for this extra check in some cases,
+ // such as an updateCriteria
+ // of the form
+ // { _id:..., { x : {$size:...} }
+ // thus this is not ideal.
+ if (collection == NULL ||
+ (indexCatalog->haveIdIndex(txn) &&
+ Helpers::findById(txn, collection, updateCriteria).isNull()) ||
+ // capped collections won't have an _id index
+ (!indexCatalog->haveIdIndex(txn) &&
+ Helpers::findOne(txn, collection, updateCriteria, false).isNull())) {
+ string msg = str::stream() << "couldn't find doc: " << op.toString();
+ error() << msg;
+ return Status(ErrorCodes::OperationFailed, msg);
}
- }
- }
- else if ( *opType == 'd' ) {
- opCounters->gotDelete();
-
- uassert(ErrorCodes::NoSuchKey, str::stream() <<
- "Failed to apply delete due to missing _id: " << op.toString(),
- o.hasField("_id"));
- if (opType[1] == 0) {
- deleteObjects(txn, db, ns, o, PlanExecutor::YIELD_MANUAL, /*justOne*/ valueB);
+ // Otherwise, it's present; zero objects were updated because of additional specifiers
+ // in the query for idempotence
+ } else {
+ // this could happen benignly on an oplog duplicate replay of an upsert
+ // (because we are idempotent),
+ // if an regular non-mod update fails the item is (presumably) missing.
+ if (!upsert) {
+ string msg = str::stream() << "update of non-mod failed: " << op.toString();
+ error() << msg;
+ return Status(ErrorCodes::OperationFailed, msg);
+ }
}
- else
- verify( opType[1] == 'b' ); // "db" advertisement
- }
- else if ( *opType == 'n' ) {
- // no op
}
- else {
- throw MsgAssertionException( 14825 , ErrorMsg("error in applyOperation : unknown opType ", *opType) );
- }
-
- // AuthorizationManager's logOp method registers a RecoveryUnit::Change
- // and to do so we need to have begun a UnitOfWork
- WriteUnitOfWork wuow(txn);
- getGlobalAuthorizationManager()->logOp(
- txn,
- opType,
- ns,
- o,
- fieldO2.isABSONObj() ? &o2 : NULL);
- wuow.commit();
-
- return Status::OK();
+ } else if (*opType == 'd') {
+ opCounters->gotDelete();
+
+ uassert(ErrorCodes::NoSuchKey,
+ str::stream() << "Failed to apply delete due to missing _id: " << op.toString(),
+ o.hasField("_id"));
+
+ if (opType[1] == 0) {
+ deleteObjects(txn, db, ns, o, PlanExecutor::YIELD_MANUAL, /*justOne*/ valueB);
+ } else
+ verify(opType[1] == 'b'); // "db" advertisement
+ } else if (*opType == 'n') {
+ // no op
+ } else {
+ throw MsgAssertionException(14825,
+ ErrorMsg("error in applyOperation : unknown opType ", *opType));
}
- Status applyCommand_inlock(OperationContext* txn, const BSONObj& op) {
- const char *names[] = { "o", "ns", "op" };
- BSONElement fields[3];
- op.getFields(3, names, fields);
- BSONElement& fieldO = fields[0];
- BSONElement& fieldNs = fields[1];
- BSONElement& fieldOp = fields[2];
-
- const char* opType = fieldOp.valuestrsafe();
- invariant(*opType == 'c'); // only commands are processed here
+ // AuthorizationManager's logOp method registers a RecoveryUnit::Change
+ // and to do so we need to have begun a UnitOfWork
+ WriteUnitOfWork wuow(txn);
+ getGlobalAuthorizationManager()->logOp(txn, opType, ns, o, fieldO2.isABSONObj() ? &o2 : NULL);
+ wuow.commit();
+
+ return Status::OK();
+}
+
+Status applyCommand_inlock(OperationContext* txn, const BSONObj& op) {
+ const char* names[] = {"o", "ns", "op"};
+ BSONElement fields[3];
+ op.getFields(3, names, fields);
+ BSONElement& fieldO = fields[0];
+ BSONElement& fieldNs = fields[1];
+ BSONElement& fieldOp = fields[2];
+
+ const char* opType = fieldOp.valuestrsafe();
+ invariant(*opType == 'c'); // only commands are processed here
+
+ BSONObj o;
+ if (fieldO.isABSONObj()) {
+ o = fieldO.embeddedObject();
+ }
- BSONObj o;
- if (fieldO.isABSONObj()) {
- o = fieldO.embeddedObject();
- }
+ const char* ns = fieldNs.valuestrsafe();
- const char* ns = fieldNs.valuestrsafe();
+ // Applying commands in repl is done under Global W-lock, so it is safe to not
+ // perform the current DB checks after reacquiring the lock.
+ invariant(txn->lockState()->isW());
- // Applying commands in repl is done under Global W-lock, so it is safe to not
- // perform the current DB checks after reacquiring the lock.
- invariant(txn->lockState()->isW());
-
- bool done = false;
+ bool done = false;
- while (!done) {
- ApplyOpMetadata curOpToApply = opsMap.find(o.firstElementFieldName())->second;
- Status status = Status::OK();
- try {
- status = curOpToApply.applyFunc(txn, ns, o);
- }
- catch (...) {
- status = exceptionToStatus();
- }
- switch (status.code()) {
+ while (!done) {
+ ApplyOpMetadata curOpToApply = opsMap.find(o.firstElementFieldName())->second;
+ Status status = Status::OK();
+ try {
+ status = curOpToApply.applyFunc(txn, ns, o);
+ } catch (...) {
+ status = exceptionToStatus();
+ }
+ switch (status.code()) {
case ErrorCodes::WriteConflict: {
// Need to throw this up to a higher level where it will be caught and the
// operation retried.
@@ -848,69 +785,65 @@ namespace {
if (_oplogCollectionName == masterSlaveOplogName) {
error() << "Failed command " << o << " on " << nsToDatabaseSubstring(ns)
<< " with status " << status << " during oplog application";
- }
- else if (curOpToApply.acceptableErrors.find(status.code())
- == curOpToApply.acceptableErrors.end()) {
+ } else if (curOpToApply.acceptableErrors.find(status.code()) ==
+ curOpToApply.acceptableErrors.end()) {
error() << "Failed command " << o << " on " << nsToDatabaseSubstring(ns)
<< " with status " << status << " during oplog application";
return status;
}
- // fallthrough
+ // fallthrough
case ErrorCodes::OK:
done = true;
break;
- }
}
-
- // AuthorizationManager's logOp method registers a RecoveryUnit::Change
- // and to do so we need to have begun a UnitOfWork
- WriteUnitOfWork wuow(txn);
- getGlobalAuthorizationManager()->logOp(txn, opType, ns, o, nullptr);
- wuow.commit();
-
- return Status::OK();
}
- void waitUpToOneSecondForTimestampChange(const Timestamp& referenceTime) {
- stdx::unique_lock<stdx::mutex> lk(newOpMutex);
+ // AuthorizationManager's logOp method registers a RecoveryUnit::Change
+ // and to do so we need to have begun a UnitOfWork
+ WriteUnitOfWork wuow(txn);
+ getGlobalAuthorizationManager()->logOp(txn, opType, ns, o, nullptr);
+ wuow.commit();
- while (referenceTime == getLastSetTimestamp()) {
- if (!newTimestampNotifier.timed_wait(lk, boost::posix_time::seconds(1)))
- return;
- }
- }
+ return Status::OK();
+}
- void setNewTimestamp(const Timestamp& newTime) {
- stdx::lock_guard<stdx::mutex> lk(newOpMutex);
- setGlobalTimestamp(newTime);
- newTimestampNotifier.notify_all();
- }
+void waitUpToOneSecondForTimestampChange(const Timestamp& referenceTime) {
+ stdx::unique_lock<stdx::mutex> lk(newOpMutex);
- OpTime extractOpTime(const BSONObj& op) {
- const Timestamp ts = op["ts"].timestamp();
- const long long term = op["t"].numberLong(); // Default to 0 if it's absent
- return OpTime(ts, term);
+ while (referenceTime == getLastSetTimestamp()) {
+ if (!newTimestampNotifier.timed_wait(lk, boost::posix_time::seconds(1)))
+ return;
}
-
- void initTimestampFromOplog(OperationContext* txn, const std::string& oplogNS) {
- DBDirectClient c(txn);
- BSONObj lastOp = c.findOne(oplogNS,
- Query().sort(reverseNaturalObj),
- NULL,
- QueryOption_SlaveOk);
-
- if (!lastOp.isEmpty()) {
- LOG(1) << "replSet setting last Timestamp";
- setNewTimestamp(lastOp[ "ts" ].timestamp());
- }
+}
+
+void setNewTimestamp(const Timestamp& newTime) {
+ stdx::lock_guard<stdx::mutex> lk(newOpMutex);
+ setGlobalTimestamp(newTime);
+ newTimestampNotifier.notify_all();
+}
+
+OpTime extractOpTime(const BSONObj& op) {
+ const Timestamp ts = op["ts"].timestamp();
+ const long long term = op["t"].numberLong(); // Default to 0 if it's absent
+ return OpTime(ts, term);
+}
+
+void initTimestampFromOplog(OperationContext* txn, const std::string& oplogNS) {
+ DBDirectClient c(txn);
+ BSONObj lastOp = c.findOne(oplogNS, Query().sort(reverseNaturalObj), NULL, QueryOption_SlaveOk);
+
+ if (!lastOp.isEmpty()) {
+ LOG(1) << "replSet setting last Timestamp";
+ setNewTimestamp(lastOp["ts"].timestamp());
}
+}
- void oplogCheckCloseDatabase(OperationContext* txn, Database* db) {
- invariant(txn->lockState()->isW());
+void oplogCheckCloseDatabase(OperationContext* txn, Database* db) {
+ invariant(txn->lockState()->isW());
- _localDB = nullptr;
- _localOplogCollection = nullptr;
- }
+ _localDB = nullptr;
+ _localOplogCollection = nullptr;
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplog.h b/src/mongo/db/repl/oplog.h
index b3a1f19f634..16e0944ccf6 100644
--- a/src/mongo/db/repl/oplog.h
+++ b/src/mongo/db/repl/oplog.h
@@ -39,100 +39,99 @@
#include "mongo/util/time_support.h"
namespace mongo {
- class BSONObj;
- class Collection;
- struct CollectionOptions;
- class Database;
- class NamespaceString;
- class OperationContext;
- class Timestamp;
- class RecordId;
+class BSONObj;
+class Collection;
+struct CollectionOptions;
+class Database;
+class NamespaceString;
+class OperationContext;
+class Timestamp;
+class RecordId;
namespace repl {
- class ReplicationCoordinator;
-
- // Create a new capped collection for the oplog if it doesn't yet exist.
- // This will be either local.oplog.rs (replica sets) or local.oplog.$main (master/slave)
- // If the collection already exists, set the 'last' OpTime if master/slave (side effect!)
- void createOplog(OperationContext* txn);
-
- // This function writes ops into the replica-set oplog;
- // used internally by replication secondaries after they have applied ops. Updates the global
- // optime.
- // Returns the optime for the last op inserted.
- OpTime writeOpsToOplog(OperationContext* txn,
- const std::deque<BSONObj>& ops);
-
- extern std::string rsOplogName;
- extern std::string masterSlaveOplogName;
-
- extern int OPLOG_VERSION;
-
- /** Log an operation to the local oplog
- *
- * @param opstr
- * "i" insert
- * "u" update
- * "d" delete
- * "c" db cmd
- * "n" no-op
- * "db" declares presence of a database (ns is set to the db name + '.')
- *
- * For 'u' records, 'obj' captures the mutation made to the object but not
- * the object itself. 'o2' captures the the criteria for the object that will be modified.
- */
- void _logOp(OperationContext* txn,
- const char *opstr,
- const char *ns,
- const BSONObj& obj,
- BSONObj *o2,
- bool fromMigrate);
-
- // Flush out the cached pointers to the local database and oplog.
- // Used by the closeDatabase command to ensure we don't cache closed things.
- void oplogCheckCloseDatabase(OperationContext* txn, Database * db);
-
- /**
- * Take a non-command op and apply it locally
- * Used for applying from an oplog
- * @param convertUpdateToUpsert convert some updates to upserts for idempotency reasons
- * Returns failure status if the op was an update that could not be applied.
- */
- Status applyOperation_inlock(OperationContext* txn,
- Database* db,
- const BSONObj& op,
- bool convertUpdateToUpsert = false);
-
- /**
- * Take a command op and apply it locally
- * Used for applying from an oplog
- * Returns failure status if the op that could not be applied.
- */
- Status applyCommand_inlock(OperationContext* txn, const BSONObj& op);
-
- /**
- * Waits up to one second for the Timestamp from the oplog to change.
- */
- void waitUpToOneSecondForTimestampChange(const Timestamp& referenceTime);
-
- /**
- * Initializes the global Timestamp with the value from the timestamp of the last oplog entry.
- */
- void initTimestampFromOplog(OperationContext* txn, const std::string& oplogNS);
-
- /**
- * Sets the global Timestamp to be 'newTime'.
- */
- void setNewTimestamp(const Timestamp& newTime);
-
- /*
- * Extract the OpTime from log entry.
- */
- OpTime extractOpTime(const BSONObj& op);
-
- /**
- * Detects the current replication mode and sets the "_oplogCollectionName" accordingly.
- */
- void setOplogCollectionName();
-} // namespace repl
-} // namespace mongo
+class ReplicationCoordinator;
+
+// Create a new capped collection for the oplog if it doesn't yet exist.
+// This will be either local.oplog.rs (replica sets) or local.oplog.$main (master/slave)
+// If the collection already exists, set the 'last' OpTime if master/slave (side effect!)
+void createOplog(OperationContext* txn);
+
+// This function writes ops into the replica-set oplog;
+// used internally by replication secondaries after they have applied ops. Updates the global
+// optime.
+// Returns the optime for the last op inserted.
+OpTime writeOpsToOplog(OperationContext* txn, const std::deque<BSONObj>& ops);
+
+extern std::string rsOplogName;
+extern std::string masterSlaveOplogName;
+
+extern int OPLOG_VERSION;
+
+/** Log an operation to the local oplog
+ *
+ * @param opstr
+ * "i" insert
+ * "u" update
+ * "d" delete
+ * "c" db cmd
+ * "n" no-op
+ * "db" declares presence of a database (ns is set to the db name + '.')
+ *
+ * For 'u' records, 'obj' captures the mutation made to the object but not
+ * the object itself. 'o2' captures the the criteria for the object that will be modified.
+ */
+void _logOp(OperationContext* txn,
+ const char* opstr,
+ const char* ns,
+ const BSONObj& obj,
+ BSONObj* o2,
+ bool fromMigrate);
+
+// Flush out the cached pointers to the local database and oplog.
+// Used by the closeDatabase command to ensure we don't cache closed things.
+void oplogCheckCloseDatabase(OperationContext* txn, Database* db);
+
+/**
+ * Take a non-command op and apply it locally
+ * Used for applying from an oplog
+ * @param convertUpdateToUpsert convert some updates to upserts for idempotency reasons
+ * Returns failure status if the op was an update that could not be applied.
+ */
+Status applyOperation_inlock(OperationContext* txn,
+ Database* db,
+ const BSONObj& op,
+ bool convertUpdateToUpsert = false);
+
+/**
+ * Take a command op and apply it locally
+ * Used for applying from an oplog
+ * Returns failure status if the op that could not be applied.
+ */
+Status applyCommand_inlock(OperationContext* txn, const BSONObj& op);
+
+/**
+ * Waits up to one second for the Timestamp from the oplog to change.
+ */
+void waitUpToOneSecondForTimestampChange(const Timestamp& referenceTime);
+
+/**
+ * Initializes the global Timestamp with the value from the timestamp of the last oplog entry.
+ */
+void initTimestampFromOplog(OperationContext* txn, const std::string& oplogNS);
+
+/**
+ * Sets the global Timestamp to be 'newTime'.
+ */
+void setNewTimestamp(const Timestamp& newTime);
+
+/*
+ * Extract the OpTime from log entry.
+ */
+OpTime extractOpTime(const BSONObj& op);
+
+/**
+ * Detects the current replication mode and sets the "_oplogCollectionName" accordingly.
+ */
+void setOplogCollectionName();
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplog_interface.h b/src/mongo/db/repl/oplog_interface.h
index 4ca10947ddd..b03a9ac1a43 100644
--- a/src/mongo/db/repl/oplog_interface.h
+++ b/src/mongo/db/repl/oplog_interface.h
@@ -40,40 +40,40 @@
namespace mongo {
namespace repl {
- class OplogInterface {
- MONGO_DISALLOW_COPYING(OplogInterface);
- public:
+class OplogInterface {
+ MONGO_DISALLOW_COPYING(OplogInterface);
- class Iterator;
+public:
+ class Iterator;
- OplogInterface() = default;
- virtual ~OplogInterface() = default;
+ OplogInterface() = default;
+ virtual ~OplogInterface() = default;
- /**
- * Diagnostic information.
- */
- virtual std::string toString() const = 0;
+ /**
+ * Diagnostic information.
+ */
+ virtual std::string toString() const = 0;
- /**
- * Produces an iterator over oplog collection in reverse natural order.
- */
- virtual std::unique_ptr<Iterator> makeIterator() const = 0;
- };
+ /**
+ * Produces an iterator over oplog collection in reverse natural order.
+ */
+ virtual std::unique_ptr<Iterator> makeIterator() const = 0;
+};
- class OplogInterface::Iterator {
- MONGO_DISALLOW_COPYING(Iterator);
- public:
+class OplogInterface::Iterator {
+ MONGO_DISALLOW_COPYING(Iterator);
- using Value = std::pair<BSONObj, RecordId>;
+public:
+ using Value = std::pair<BSONObj, RecordId>;
- Iterator() = default;
- virtual ~Iterator() = default;
+ Iterator() = default;
+ virtual ~Iterator() = default;
- /**
- * Returns next operation and record id (if applicable) in the oplog.
- */
- virtual StatusWith<Value> next() = 0;
- };
+ /**
+ * Returns next operation and record id (if applicable) in the oplog.
+ */
+ virtual StatusWith<Value> next() = 0;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplog_interface_local.cpp b/src/mongo/db/repl/oplog_interface_local.cpp
index c005865e9b0..30c9a7ab9e5 100644
--- a/src/mongo/db/repl/oplog_interface_local.cpp
+++ b/src/mongo/db/repl/oplog_interface_local.cpp
@@ -41,67 +41,57 @@ namespace repl {
namespace {
- class OplogIteratorLocal : public OplogInterface::Iterator {
- public:
-
- OplogIteratorLocal(OperationContext* txn,
- const std::string& collectionName);
-
- StatusWith<Value> next() override;
-
- private:
-
- ScopedTransaction _transaction;
- Lock::DBLock _dbLock;
- Lock::CollectionLock _collectionLock;
- OldClientContext _ctx;
- std::unique_ptr<PlanExecutor> _exec;
-
- };
-
- OplogIteratorLocal::OplogIteratorLocal(OperationContext* txn,
- const std::string& collectionName)
- : _transaction(txn, MODE_IS),
- _dbLock(txn->lockState(), nsToDatabase(collectionName), MODE_IS),
- _collectionLock(txn->lockState(), collectionName, MODE_S),
- _ctx(txn, collectionName),
- _exec(InternalPlanner::collectionScan(txn,
- collectionName,
- _ctx.db()->getCollection(collectionName),
- InternalPlanner::BACKWARD)) { }
-
- StatusWith<OplogInterface::Iterator::Value> OplogIteratorLocal::next() {
- BSONObj obj;
- RecordId recordId;
-
- if (PlanExecutor::ADVANCED != _exec->getNext(&obj, &recordId)) {
- return StatusWith<Value>(ErrorCodes::NoSuchKey, "no more operations in local oplog");
- }
- return StatusWith<Value>(std::make_pair(obj, recordId));
+class OplogIteratorLocal : public OplogInterface::Iterator {
+public:
+ OplogIteratorLocal(OperationContext* txn, const std::string& collectionName);
+
+ StatusWith<Value> next() override;
+
+private:
+ ScopedTransaction _transaction;
+ Lock::DBLock _dbLock;
+ Lock::CollectionLock _collectionLock;
+ OldClientContext _ctx;
+ std::unique_ptr<PlanExecutor> _exec;
+};
+
+OplogIteratorLocal::OplogIteratorLocal(OperationContext* txn, const std::string& collectionName)
+ : _transaction(txn, MODE_IS),
+ _dbLock(txn->lockState(), nsToDatabase(collectionName), MODE_IS),
+ _collectionLock(txn->lockState(), collectionName, MODE_S),
+ _ctx(txn, collectionName),
+ _exec(InternalPlanner::collectionScan(txn,
+ collectionName,
+ _ctx.db()->getCollection(collectionName),
+ InternalPlanner::BACKWARD)) {}
+
+StatusWith<OplogInterface::Iterator::Value> OplogIteratorLocal::next() {
+ BSONObj obj;
+ RecordId recordId;
+
+ if (PlanExecutor::ADVANCED != _exec->getNext(&obj, &recordId)) {
+ return StatusWith<Value>(ErrorCodes::NoSuchKey, "no more operations in local oplog");
}
+ return StatusWith<Value>(std::make_pair(obj, recordId));
+}
-} // namespace
+} // namespace
- OplogInterfaceLocal::OplogInterfaceLocal(OperationContext* txn,
- const std::string& collectionName)
- : _txn(txn),
- _collectionName(collectionName) {
+OplogInterfaceLocal::OplogInterfaceLocal(OperationContext* txn, const std::string& collectionName)
+ : _txn(txn), _collectionName(collectionName) {
+ invariant(txn);
+ invariant(!collectionName.empty());
+}
- invariant(txn);
- invariant(!collectionName.empty());
- }
-
- std::string OplogInterfaceLocal::toString() const {
- return str::stream() <<
- "LocalOplogInterface: "
- "operation context: " << _txn->getNS() << "/" << _txn->getOpID() <<
- "; collection: " << _collectionName;
- }
+std::string OplogInterfaceLocal::toString() const {
+ return str::stream() << "LocalOplogInterface: "
+ "operation context: " << _txn->getNS() << "/" << _txn->getOpID()
+ << "; collection: " << _collectionName;
+}
- std::unique_ptr<OplogInterface::Iterator> OplogInterfaceLocal::makeIterator() const {
- return std::unique_ptr<OplogInterface::Iterator>(
- new OplogIteratorLocal(_txn, _collectionName));
- }
+std::unique_ptr<OplogInterface::Iterator> OplogInterfaceLocal::makeIterator() const {
+ return std::unique_ptr<OplogInterface::Iterator>(new OplogIteratorLocal(_txn, _collectionName));
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplog_interface_local.h b/src/mongo/db/repl/oplog_interface_local.h
index cd61a81a239..32c9adc4377 100644
--- a/src/mongo/db/repl/oplog_interface_local.h
+++ b/src/mongo/db/repl/oplog_interface_local.h
@@ -32,27 +32,24 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace repl {
- /**
- * Scans local oplog collection in reverse natural order.
- */
-
- class OplogInterfaceLocal : public OplogInterface {
- public:
-
- OplogInterfaceLocal(OperationContext* txn, const std::string& collectionName);
- std::string toString() const override;
- std::unique_ptr<OplogInterface::Iterator> makeIterator() const override;
-
- private:
+/**
+ * Scans local oplog collection in reverse natural order.
+ */
- OperationContext* _txn;
- std::string _collectionName;
+class OplogInterfaceLocal : public OplogInterface {
+public:
+ OplogInterfaceLocal(OperationContext* txn, const std::string& collectionName);
+ std::string toString() const override;
+ std::unique_ptr<OplogInterface::Iterator> makeIterator() const override;
- };
+private:
+ OperationContext* _txn;
+ std::string _collectionName;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplog_interface_mock.cpp b/src/mongo/db/repl/oplog_interface_mock.cpp
index 97cea831fd7..4936d5aa533 100644
--- a/src/mongo/db/repl/oplog_interface_mock.cpp
+++ b/src/mongo/db/repl/oplog_interface_mock.cpp
@@ -35,49 +35,43 @@ namespace repl {
namespace {
- class OplogIteratorMock : public OplogInterface::Iterator {
- public:
-
- OplogIteratorMock(OplogInterfaceMock::Operations::const_iterator iterator,
- OplogInterfaceMock::Operations::const_iterator iteratorEnd);
- StatusWith<Value> next() override;
-
- private:
-
- OplogInterfaceMock::Operations::const_iterator _iterator;
- OplogInterfaceMock::Operations::const_iterator _iteratorEnd;
-
- };
-
- OplogIteratorMock::OplogIteratorMock(OplogInterfaceMock::Operations::const_iterator iter,
- OplogInterfaceMock::Operations::const_iterator iterEnd)
- : _iterator(iter),
- _iteratorEnd(iterEnd) {}
-
- StatusWith<OplogInterface::Iterator::Value> OplogIteratorMock::next() {
- if (_iterator == _iteratorEnd) {
- return StatusWith<OplogInterface::Iterator::Value>(ErrorCodes::NoSuchKey,
- "no more ops");
- }
- return *(_iterator++);
+class OplogIteratorMock : public OplogInterface::Iterator {
+public:
+ OplogIteratorMock(OplogInterfaceMock::Operations::const_iterator iterator,
+ OplogInterfaceMock::Operations::const_iterator iteratorEnd);
+ StatusWith<Value> next() override;
+
+private:
+ OplogInterfaceMock::Operations::const_iterator _iterator;
+ OplogInterfaceMock::Operations::const_iterator _iteratorEnd;
+};
+
+OplogIteratorMock::OplogIteratorMock(OplogInterfaceMock::Operations::const_iterator iter,
+ OplogInterfaceMock::Operations::const_iterator iterEnd)
+ : _iterator(iter), _iteratorEnd(iterEnd) {}
+
+StatusWith<OplogInterface::Iterator::Value> OplogIteratorMock::next() {
+ if (_iterator == _iteratorEnd) {
+ return StatusWith<OplogInterface::Iterator::Value>(ErrorCodes::NoSuchKey, "no more ops");
}
+ return *(_iterator++);
+}
-} // namespace
+} // namespace
- OplogInterfaceMock::OplogInterfaceMock(std::initializer_list<Operation> operations)
- : _operations(operations) {}
+OplogInterfaceMock::OplogInterfaceMock(std::initializer_list<Operation> operations)
+ : _operations(operations) {}
- OplogInterfaceMock::OplogInterfaceMock(const Operations& operations)
- : _operations(operations) {}
+OplogInterfaceMock::OplogInterfaceMock(const Operations& operations) : _operations(operations) {}
- std::string OplogInterfaceMock::toString() const {
- return "OplogInterfaceMock";
- }
+std::string OplogInterfaceMock::toString() const {
+ return "OplogInterfaceMock";
+}
- std::unique_ptr<OplogInterface::Iterator> OplogInterfaceMock::makeIterator() const {
- return std::unique_ptr<OplogInterface::Iterator>(
- new OplogIteratorMock(_operations.begin(), _operations.end()));
- }
+std::unique_ptr<OplogInterface::Iterator> OplogInterfaceMock::makeIterator() const {
+ return std::unique_ptr<OplogInterface::Iterator>(
+ new OplogIteratorMock(_operations.begin(), _operations.end()));
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplog_interface_mock.h b/src/mongo/db/repl/oplog_interface_mock.h
index 4c2049a5688..524ab3c8d2f 100644
--- a/src/mongo/db/repl/oplog_interface_mock.h
+++ b/src/mongo/db/repl/oplog_interface_mock.h
@@ -34,21 +34,23 @@
namespace mongo {
namespace repl {
- /**
- * Simulates oplog for testing rollback functionality.
- */
- class OplogInterfaceMock : public OplogInterface {
- MONGO_DISALLOW_COPYING(OplogInterfaceMock);
- public:
- using Operation = std::pair<BSONObj,RecordId>;
- using Operations = std::list<Operation>;
- explicit OplogInterfaceMock(std::initializer_list<Operation> operations);
- explicit OplogInterfaceMock(const Operations& operations);
- std::string toString() const override;
- std::unique_ptr<OplogInterface::Iterator> makeIterator() const override;
- private:
- Operations _operations;
- };
+/**
+ * Simulates oplog for testing rollback functionality.
+ */
+class OplogInterfaceMock : public OplogInterface {
+ MONGO_DISALLOW_COPYING(OplogInterfaceMock);
+
+public:
+ using Operation = std::pair<BSONObj, RecordId>;
+ using Operations = std::list<Operation>;
+ explicit OplogInterfaceMock(std::initializer_list<Operation> operations);
+ explicit OplogInterfaceMock(const Operations& operations);
+ std::string toString() const override;
+ std::unique_ptr<OplogInterface::Iterator> makeIterator() const override;
+
+private:
+ Operations _operations;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplog_interface_remote.cpp b/src/mongo/db/repl/oplog_interface_remote.cpp
index da78924fc55..445d9776d0b 100644
--- a/src/mongo/db/repl/oplog_interface_remote.cpp
+++ b/src/mongo/db/repl/oplog_interface_remote.cpp
@@ -39,51 +39,46 @@ namespace repl {
namespace {
- class OplogIteratorRemote : public OplogInterface::Iterator {
- public:
+class OplogIteratorRemote : public OplogInterface::Iterator {
+public:
+ OplogIteratorRemote(std::unique_ptr<DBClientCursor> cursor);
+ StatusWith<Value> next() override;
- OplogIteratorRemote(std::unique_ptr<DBClientCursor> cursor);
- StatusWith<Value> next() override;
+private:
+ std::unique_ptr<DBClientCursor> _cursor;
+};
- private:
+OplogIteratorRemote::OplogIteratorRemote(std::unique_ptr<DBClientCursor> cursor)
+ : _cursor(std::move(cursor)) {}
- std::unique_ptr<DBClientCursor> _cursor;
-
- };
-
- OplogIteratorRemote::OplogIteratorRemote(std::unique_ptr<DBClientCursor> cursor)
- : _cursor(std::move(cursor)) { }
-
- StatusWith<OplogInterface::Iterator::Value> OplogIteratorRemote::next() {
- if (!_cursor.get()) {
- return StatusWith<Value>(ErrorCodes::NamespaceNotFound, "no cursor for remote oplog");
- }
- if (!_cursor->more()) {
- return StatusWith<Value>(ErrorCodes::NoSuchKey, "no more operations in remote oplog");
- }
- return StatusWith<Value>(std::make_pair(_cursor->nextSafe(), RecordId()));
+StatusWith<OplogInterface::Iterator::Value> OplogIteratorRemote::next() {
+ if (!_cursor.get()) {
+ return StatusWith<Value>(ErrorCodes::NamespaceNotFound, "no cursor for remote oplog");
}
-
-} // namespace
-
- OplogInterfaceRemote::OplogInterfaceRemote(DBClientConnection* conn,
- const std::string& collectionName)
- : _conn(conn),
- _collectionName(collectionName) {
-
- invariant(conn);
+ if (!_cursor->more()) {
+ return StatusWith<Value>(ErrorCodes::NoSuchKey, "no more operations in remote oplog");
}
-
- std::string OplogInterfaceRemote::toString() const {
- return _conn->toString();
- }
-
- std::unique_ptr<OplogInterface::Iterator> OplogInterfaceRemote::makeIterator() const {
- const Query query = Query().sort(BSON("$natural" << -1));
- const BSONObj fields = BSON("ts" << 1 << "h" << 1);
- return std::unique_ptr<OplogInterface::Iterator>(
- new OplogIteratorRemote(_conn->query(_collectionName, query, 0, 0, &fields, 0, 0)));
- }
-
-} // namespace repl
-} // namespace mongo
+ return StatusWith<Value>(std::make_pair(_cursor->nextSafe(), RecordId()));
+}
+
+} // namespace
+
+OplogInterfaceRemote::OplogInterfaceRemote(DBClientConnection* conn,
+ const std::string& collectionName)
+ : _conn(conn), _collectionName(collectionName) {
+ invariant(conn);
+}
+
+std::string OplogInterfaceRemote::toString() const {
+ return _conn->toString();
+}
+
+std::unique_ptr<OplogInterface::Iterator> OplogInterfaceRemote::makeIterator() const {
+ const Query query = Query().sort(BSON("$natural" << -1));
+ const BSONObj fields = BSON("ts" << 1 << "h" << 1);
+ return std::unique_ptr<OplogInterface::Iterator>(
+ new OplogIteratorRemote(_conn->query(_collectionName, query, 0, 0, &fields, 0, 0)));
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplog_interface_remote.h b/src/mongo/db/repl/oplog_interface_remote.h
index ee91d9197d2..300e755f105 100644
--- a/src/mongo/db/repl/oplog_interface_remote.h
+++ b/src/mongo/db/repl/oplog_interface_remote.h
@@ -32,27 +32,24 @@
namespace mongo {
- class DBClientConnection;
+class DBClientConnection;
namespace repl {
- /**
- * Reads oplog on remote server.
- */
-
- class OplogInterfaceRemote : public OplogInterface {
- public:
-
- explicit OplogInterfaceRemote(DBClientConnection* conn, const std::string& collectionName);
- std::string toString() const override;
- std::unique_ptr<OplogInterface::Iterator> makeIterator() const override;
-
- private:
+/**
+ * Reads oplog on remote server.
+ */
- DBClientConnection* _conn;
- std::string _collectionName;
+class OplogInterfaceRemote : public OplogInterface {
+public:
+ explicit OplogInterfaceRemote(DBClientConnection* conn, const std::string& collectionName);
+ std::string toString() const override;
+ std::unique_ptr<OplogInterface::Iterator> makeIterator() const override;
- };
+private:
+ DBClientConnection* _conn;
+ std::string _collectionName;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplogreader.cpp b/src/mongo/db/repl/oplogreader.cpp
index d982eae975e..012d7d2458c 100644
--- a/src/mongo/db/repl/oplogreader.cpp
+++ b/src/mongo/db/repl/oplogreader.cpp
@@ -52,162 +52,152 @@
namespace mongo {
- using std::shared_ptr;
- using std::endl;
- using std::string;
+using std::shared_ptr;
+using std::endl;
+using std::string;
namespace repl {
- const BSONObj reverseNaturalObj = BSON( "$natural" << -1 );
+const BSONObj reverseNaturalObj = BSON("$natural" << -1);
- //number of readers created;
- // this happens when the source source changes, a reconfig/network-error or the cursor dies
- static Counter64 readersCreatedStats;
- static ServerStatusMetricField<Counter64> displayReadersCreated(
- "repl.network.readersCreated",
- &readersCreatedStats );
+// number of readers created;
+// this happens when the source source changes, a reconfig/network-error or the cursor dies
+static Counter64 readersCreatedStats;
+static ServerStatusMetricField<Counter64> displayReadersCreated("repl.network.readersCreated",
+ &readersCreatedStats);
- bool replAuthenticate(DBClientBase *conn) {
- if (!getGlobalAuthorizationManager()->isAuthEnabled())
- return true;
+bool replAuthenticate(DBClientBase* conn) {
+ if (!getGlobalAuthorizationManager()->isAuthEnabled())
+ return true;
- if (!isInternalAuthSet())
- return false;
- return authenticateInternalUser(conn);
- }
+ if (!isInternalAuthSet())
+ return false;
+ return authenticateInternalUser(conn);
+}
- OplogReader::OplogReader() {
- _tailingQueryOptions = QueryOption_SlaveOk;
- _tailingQueryOptions |= QueryOption_CursorTailable | QueryOption_OplogReplay;
-
- /* TODO: slaveOk maybe shouldn't use? */
- _tailingQueryOptions |= QueryOption_AwaitData;
+OplogReader::OplogReader() {
+ _tailingQueryOptions = QueryOption_SlaveOk;
+ _tailingQueryOptions |= QueryOption_CursorTailable | QueryOption_OplogReplay;
- readersCreatedStats.increment();
- }
+ /* TODO: slaveOk maybe shouldn't use? */
+ _tailingQueryOptions |= QueryOption_AwaitData;
- bool OplogReader::connect(const HostAndPort& host) {
- if (conn() == NULL || _host != host) {
- resetConnection();
- _conn = shared_ptr<DBClientConnection>(new DBClientConnection(false,
- tcp_timeout));
- string errmsg;
- if ( !_conn->connect(host, errmsg) ||
- (getGlobalAuthorizationManager()->isAuthEnabled() &&
- !replAuthenticate(_conn.get())) ) {
-
- resetConnection();
- error() << errmsg << endl;
- return false;
- }
- _conn->port().tag |= executor::NetworkInterface::kMessagingPortKeepOpen;
- _host = host;
- }
- return true;
- }
+ readersCreatedStats.increment();
+}
- void OplogReader::tailCheck() {
- if( cursor.get() && cursor->isDead() ) {
- log() << "old cursor isDead, will initiate a new one" << std::endl;
- resetCursor();
+bool OplogReader::connect(const HostAndPort& host) {
+ if (conn() == NULL || _host != host) {
+ resetConnection();
+ _conn = shared_ptr<DBClientConnection>(new DBClientConnection(false, tcp_timeout));
+ string errmsg;
+ if (!_conn->connect(host, errmsg) ||
+ (getGlobalAuthorizationManager()->isAuthEnabled() && !replAuthenticate(_conn.get()))) {
+ resetConnection();
+ error() << errmsg << endl;
+ return false;
}
+ _conn->port().tag |= executor::NetworkInterface::kMessagingPortKeepOpen;
+ _host = host;
}
+ return true;
+}
- void OplogReader::query(const char *ns,
- Query query,
- int nToReturn,
- int nToSkip,
- const BSONObj* fields) {
- cursor.reset(
- _conn->query(ns, query, nToReturn, nToSkip, fields, QueryOption_SlaveOk).release()
- );
- }
-
- void OplogReader::tailingQuery(const char *ns, const BSONObj& query) {
- verify( !haveCursor() );
- LOG(2) << ns << ".find(" << query.toString() << ')' << endl;
- cursor.reset( _conn->query( ns, query, 0, 0, nullptr, _tailingQueryOptions ).release() );
- }
-
- void OplogReader::tailingQueryGTE(const char *ns, Timestamp optime) {
- BSONObjBuilder gte;
- gte.append("$gte", optime);
- BSONObjBuilder query;
- query.append("ts", gte.done());
- tailingQuery(ns, query.done());
- }
-
- HostAndPort OplogReader::getHost() const {
- return _host;
+void OplogReader::tailCheck() {
+ if (cursor.get() && cursor->isDead()) {
+ log() << "old cursor isDead, will initiate a new one" << std::endl;
+ resetCursor();
}
-
- void OplogReader::connectToSyncSource(OperationContext* txn,
- const OpTime& lastOpTimeFetched,
- ReplicationCoordinator* replCoord) {
- const Timestamp sentinelTimestamp(duration_cast<Seconds>(Milliseconds(curTimeMillis64())), 0);
- const OpTime sentinel(sentinelTimestamp, std::numeric_limits<long long>::max());
- OpTime oldestOpTimeSeen = sentinel;
-
- invariant(conn() == NULL);
-
- while (true) {
- HostAndPort candidate = replCoord->chooseNewSyncSource();
-
- if (candidate.empty()) {
- if (oldestOpTimeSeen == sentinel) {
- // If, in this invocation of connectToSyncSource(), we did not successfully
- // connect to any node ahead of us,
- // we apparently have no sync sources to connect to.
- // This situation is common; e.g. if there are no writes to the primary at
- // the moment.
- return;
- }
-
- // Connected to at least one member, but in all cases we were too stale to use them
- // as a sync source.
- error() << "too stale to catch up";
- log() << "our last optime : " << lastOpTimeFetched;
- log() << "oldest available is " << oldestOpTimeSeen;
- log() << "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember";
- setMinValid(txn, oldestOpTimeSeen);
- bool worked = replCoord->setFollowerMode(MemberState::RS_RECOVERING);
- if (!worked) {
- warning() << "Failed to transition into "
- << MemberState(MemberState::RS_RECOVERING)
- << ". Current state: " << replCoord->getMemberState();
- }
+}
+
+void OplogReader::query(
+ const char* ns, Query query, int nToReturn, int nToSkip, const BSONObj* fields) {
+ cursor.reset(
+ _conn->query(ns, query, nToReturn, nToSkip, fields, QueryOption_SlaveOk).release());
+}
+
+void OplogReader::tailingQuery(const char* ns, const BSONObj& query) {
+ verify(!haveCursor());
+ LOG(2) << ns << ".find(" << query.toString() << ')' << endl;
+ cursor.reset(_conn->query(ns, query, 0, 0, nullptr, _tailingQueryOptions).release());
+}
+
+void OplogReader::tailingQueryGTE(const char* ns, Timestamp optime) {
+ BSONObjBuilder gte;
+ gte.append("$gte", optime);
+ BSONObjBuilder query;
+ query.append("ts", gte.done());
+ tailingQuery(ns, query.done());
+}
+
+HostAndPort OplogReader::getHost() const {
+ return _host;
+}
+
+void OplogReader::connectToSyncSource(OperationContext* txn,
+ const OpTime& lastOpTimeFetched,
+ ReplicationCoordinator* replCoord) {
+ const Timestamp sentinelTimestamp(duration_cast<Seconds>(Milliseconds(curTimeMillis64())), 0);
+ const OpTime sentinel(sentinelTimestamp, std::numeric_limits<long long>::max());
+ OpTime oldestOpTimeSeen = sentinel;
+
+ invariant(conn() == NULL);
+
+ while (true) {
+ HostAndPort candidate = replCoord->chooseNewSyncSource();
+
+ if (candidate.empty()) {
+ if (oldestOpTimeSeen == sentinel) {
+ // If, in this invocation of connectToSyncSource(), we did not successfully
+ // connect to any node ahead of us,
+ // we apparently have no sync sources to connect to.
+ // This situation is common; e.g. if there are no writes to the primary at
+ // the moment.
return;
}
- if (!connect(candidate)) {
- LOG(2) << "can't connect to " << candidate.toString() <<
- " to read operations";
- resetConnection();
- replCoord->blacklistSyncSource(candidate, Date_t::now() + Seconds(10));
- continue;
+ // Connected to at least one member, but in all cases we were too stale to use them
+ // as a sync source.
+ error() << "too stale to catch up";
+ log() << "our last optime : " << lastOpTimeFetched;
+ log() << "oldest available is " << oldestOpTimeSeen;
+ log() << "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember";
+ setMinValid(txn, oldestOpTimeSeen);
+ bool worked = replCoord->setFollowerMode(MemberState::RS_RECOVERING);
+ if (!worked) {
+ warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING)
+ << ". Current state: " << replCoord->getMemberState();
}
- // Read the first (oldest) op and confirm that it's not newer than our last
- // fetched op. Otherwise, we have fallen off the back of that source's oplog.
- BSONObj remoteOldestOp(findOne(rsOplogName.c_str(), Query()));
- OpTime remoteOldOpTime = extractOpTime(remoteOldestOp);
-
- if (lastOpTimeFetched < remoteOldOpTime) {
- // We're too stale to use this sync source.
- resetConnection();
- replCoord->blacklistSyncSource(candidate, Date_t::now() + Minutes(10));
- if (oldestOpTimeSeen > remoteOldOpTime) {
- warning() << "we are too stale to use " << candidate.toString() <<
- " as a sync source";
- oldestOpTimeSeen = remoteOldOpTime;
- }
- continue;
+ return;
+ }
+
+ if (!connect(candidate)) {
+ LOG(2) << "can't connect to " << candidate.toString() << " to read operations";
+ resetConnection();
+ replCoord->blacklistSyncSource(candidate, Date_t::now() + Seconds(10));
+ continue;
+ }
+ // Read the first (oldest) op and confirm that it's not newer than our last
+ // fetched op. Otherwise, we have fallen off the back of that source's oplog.
+ BSONObj remoteOldestOp(findOne(rsOplogName.c_str(), Query()));
+ OpTime remoteOldOpTime = extractOpTime(remoteOldestOp);
+
+ if (lastOpTimeFetched < remoteOldOpTime) {
+ // We're too stale to use this sync source.
+ resetConnection();
+ replCoord->blacklistSyncSource(candidate, Date_t::now() + Minutes(10));
+ if (oldestOpTimeSeen > remoteOldOpTime) {
+ warning() << "we are too stale to use " << candidate.toString()
+ << " as a sync source";
+ oldestOpTimeSeen = remoteOldOpTime;
}
+ continue;
+ }
- // Got a valid sync source.
- return;
- } // while (true)
- }
+ // Got a valid sync source.
+ return;
+ } // while (true)
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplogreader.h b/src/mongo/db/repl/oplogreader.h
index 63dcaaeaa20..718fa162d88 100644
--- a/src/mongo/db/repl/oplogreader.h
+++ b/src/mongo/db/repl/oplogreader.h
@@ -38,117 +38,126 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace repl {
- class ReplicationCoordinator;
- class OpTime;
+class ReplicationCoordinator;
+class OpTime;
- // {"$natural": -1 }
- extern const BSONObj reverseNaturalObj;
+// {"$natural": -1 }
+extern const BSONObj reverseNaturalObj;
+
+/**
+ * Authenticates conn using the server's cluster-membership credentials.
+ *
+ * Returns true on successful authentication.
+ */
+bool replAuthenticate(DBClientBase* conn);
+
+/* started abstracting out the querying of the primary/master's oplog
+ still fairly awkward but a start.
+*/
+
+class OplogReader {
+private:
+ std::shared_ptr<DBClientConnection> _conn;
+ std::shared_ptr<DBClientCursor> cursor;
+ int _tailingQueryOptions;
+
+ // If _conn was actively connected, _host represents the current HostAndPort of the
+ // connection.
+ HostAndPort _host;
+
+public:
+ OplogReader();
+ ~OplogReader() {}
+ void resetCursor() {
+ cursor.reset();
+ }
+ void resetConnection() {
+ cursor.reset();
+ _conn.reset();
+ _host = HostAndPort();
+ }
+ DBClientConnection* conn() {
+ return _conn.get();
+ }
+ BSONObj findOne(const char* ns, const Query& q) {
+ return conn()->findOne(ns, q, 0, QueryOption_SlaveOk);
+ }
+ BSONObj getLastOp(const std::string& ns) {
+ return findOne(ns.c_str(), Query().sort(reverseNaturalObj));
+ }
+
+ /* SO_TIMEOUT (send/recv time out) for our DBClientConnections */
+ static const int tcp_timeout = 30;
+
+ /* ok to call if already connected */
+ bool connect(const HostAndPort& host);
+
+ void tailCheck();
+
+ bool haveCursor() {
+ return cursor.get() != 0;
+ }
+
+ void query(const char* ns, Query query, int nToReturn, int nToSkip, const BSONObj* fields = 0);
+
+ void tailingQuery(const char* ns, const BSONObj& query);
+
+ void tailingQueryGTE(const char* ns, Timestamp t);
+
+ bool more() {
+ uassert(15910, "Doesn't have cursor for reading oplog", cursor.get());
+ return cursor->more();
+ }
+
+ bool moreInCurrentBatch() {
+ uassert(15911, "Doesn't have cursor for reading oplog", cursor.get());
+ return cursor->moreInCurrentBatch();
+ }
+
+ int currentBatchMessageSize() {
+ if (NULL == cursor->getMessage())
+ return 0;
+ return cursor->getMessage()->size();
+ }
+
+ BSONObj nextSafe() {
+ return cursor->nextSafe();
+ }
+ BSONObj next() {
+ return cursor->next();
+ }
+
+
+ // master/slave only
+ void peek(std::vector<BSONObj>& v, int n) {
+ if (cursor.get())
+ cursor->peek(v, n);
+ }
+
+ // master/slave only
+ void putBack(BSONObj op) {
+ cursor->putBack(op);
+ }
+
+ HostAndPort getHost() const;
/**
- * Authenticates conn using the server's cluster-membership credentials.
- *
- * Returns true on successful authentication.
+ * Connects this OplogReader to a valid sync source, using the provided lastOpTimeFetched
+ * and ReplicationCoordinator objects.
+ * If this function fails to connect to a sync source that is viable, this OplogReader
+ * is left unconnected, where this->conn() equals NULL.
+ * In the process of connecting, this function may add items to the repl coordinator's
+ * sync source blacklist.
+ * This function may throw DB exceptions.
*/
- bool replAuthenticate(DBClientBase* conn);
-
- /* started abstracting out the querying of the primary/master's oplog
- still fairly awkward but a start.
- */
-
- class OplogReader {
- private:
- std::shared_ptr<DBClientConnection> _conn;
- std::shared_ptr<DBClientCursor> cursor;
- int _tailingQueryOptions;
-
- // If _conn was actively connected, _host represents the current HostAndPort of the
- // connection.
- HostAndPort _host;
- public:
- OplogReader();
- ~OplogReader() { }
- void resetCursor() { cursor.reset(); }
- void resetConnection() {
- cursor.reset();
- _conn.reset();
- _host = HostAndPort();
- }
- DBClientConnection* conn() { return _conn.get(); }
- BSONObj findOne(const char *ns, const Query& q) {
- return conn()->findOne(ns, q, 0, QueryOption_SlaveOk);
- }
- BSONObj getLastOp(const std::string& ns) {
- return findOne(ns.c_str(), Query().sort(reverseNaturalObj));
- }
-
- /* SO_TIMEOUT (send/recv time out) for our DBClientConnections */
- static const int tcp_timeout = 30;
-
- /* ok to call if already connected */
- bool connect(const HostAndPort& host);
-
- void tailCheck();
-
- bool haveCursor() { return cursor.get() != 0; }
-
- void query(const char *ns,
- Query query,
- int nToReturn,
- int nToSkip,
- const BSONObj* fields=0);
-
- void tailingQuery(const char *ns, const BSONObj& query);
-
- void tailingQueryGTE(const char *ns, Timestamp t);
-
- bool more() {
- uassert( 15910, "Doesn't have cursor for reading oplog", cursor.get() );
- return cursor->more();
- }
-
- bool moreInCurrentBatch() {
- uassert( 15911, "Doesn't have cursor for reading oplog", cursor.get() );
- return cursor->moreInCurrentBatch();
- }
-
- int currentBatchMessageSize() {
- if( NULL == cursor->getMessage() )
- return 0;
- return cursor->getMessage()->size();
- }
-
- BSONObj nextSafe() { return cursor->nextSafe(); }
- BSONObj next() { return cursor->next(); }
-
-
- // master/slave only
- void peek(std::vector<BSONObj>& v, int n) {
- if( cursor.get() )
- cursor->peek(v,n);
- }
-
- // master/slave only
- void putBack(BSONObj op) { cursor->putBack(op); }
-
- HostAndPort getHost() const;
-
- /**
- * Connects this OplogReader to a valid sync source, using the provided lastOpTimeFetched
- * and ReplicationCoordinator objects.
- * If this function fails to connect to a sync source that is viable, this OplogReader
- * is left unconnected, where this->conn() equals NULL.
- * In the process of connecting, this function may add items to the repl coordinator's
- * sync source blacklist.
- * This function may throw DB exceptions.
- */
- void connectToSyncSource(OperationContext* txn,
- const OpTime& lastOpTimeFetched,
- ReplicationCoordinator* replCoord);
- };
-
-} // namespace repl
-} // namespace mongo
+ void connectToSyncSource(OperationContext* txn,
+ const OpTime& lastOpTimeFetched,
+ ReplicationCoordinator* replCoord);
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/optime.cpp b/src/mongo/db/repl/optime.cpp
index 73907ac9146..87cf966c1ef 100644
--- a/src/mongo/db/repl/optime.cpp
+++ b/src/mongo/db/repl/optime.cpp
@@ -36,33 +36,33 @@
namespace mongo {
namespace repl {
- OpTime::OpTime(Timestamp ts, long long term) : _timestamp(std::move(ts)), _term(term) {}
+OpTime::OpTime(Timestamp ts, long long term) : _timestamp(std::move(ts)), _term(term) {}
- Timestamp OpTime::getTimestamp() const {
- return _timestamp;
- }
+Timestamp OpTime::getTimestamp() const {
+ return _timestamp;
+}
- long long OpTime::getSecs() const {
- return _timestamp.getSecs();
- }
+long long OpTime::getSecs() const {
+ return _timestamp.getSecs();
+}
- long long OpTime::getTerm() const {
- return _term;
- }
+long long OpTime::getTerm() const {
+ return _term;
+}
- bool OpTime::isNull() const {
- return _timestamp.isNull();
- }
+bool OpTime::isNull() const {
+ return _timestamp.isNull();
+}
- std::string OpTime::toString() const {
- std::stringstream ss;
- ss << "(term: " << _term << ", timestamp: " << _timestamp.toStringPretty() << ")";
- return ss.str();
- }
+std::string OpTime::toString() const {
+ std::stringstream ss;
+ ss << "(term: " << _term << ", timestamp: " << _timestamp.toStringPretty() << ")";
+ return ss.str();
+}
- std::ostream& operator<<(std::ostream& out, const OpTime& opTime) {
- return out << opTime.toString();
- }
+std::ostream& operator<<(std::ostream& out, const OpTime& opTime) {
+ return out << opTime.toString();
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/optime.h b/src/mongo/db/repl/optime.h
index 76b21eaca2e..5fb4faea093 100644
--- a/src/mongo/db/repl/optime.h
+++ b/src/mongo/db/repl/optime.h
@@ -35,65 +35,65 @@
namespace mongo {
namespace repl {
- /**
- * OpTime encompasses a Timestamp (which itself is composed of two 32-bit integers, which can
- * represent a time_t and a counter), and a 64-bit Term number. OpTime can be used to
- * label every op in an oplog with a unique identifier.
- */
+/**
+ * OpTime encompasses a Timestamp (which itself is composed of two 32-bit integers, which can
+ * represent a time_t and a counter), and a 64-bit Term number. OpTime can be used to
+ * label every op in an oplog with a unique identifier.
+ */
- class OpTime {
- public:
- // The default term after the first time upgrading from protocol version 0.
- //
- // This is also the first term for nodes that were recently started up but have not
- // yet joined the cluster, all in protocol version 1.
- static const long long kDefaultTerm = 0;
+class OpTime {
+public:
+ // The default term after the first time upgrading from protocol version 0.
+ //
+ // This is also the first term for nodes that were recently started up but have not
+ // yet joined the cluster, all in protocol version 1.
+ static const long long kDefaultTerm = 0;
- OpTime() = default;
- OpTime(Timestamp ts, long long term);
+ OpTime() = default;
+ OpTime(Timestamp ts, long long term);
- Timestamp getTimestamp() const;
+ Timestamp getTimestamp() const;
- long long getSecs() const;
+ long long getSecs() const;
- long long getTerm() const;
+ long long getTerm() const;
- std::string toString() const;
+ std::string toString() const;
- // Returns true when this OpTime is not yet initialized.
- bool isNull() const;
+ // Returns true when this OpTime is not yet initialized.
+ bool isNull() const;
- inline bool operator==(const OpTime& rhs) const {
- return std::tie(_term, _timestamp) == std::tie(rhs._term, rhs._timestamp);
- }
+ inline bool operator==(const OpTime& rhs) const {
+ return std::tie(_term, _timestamp) == std::tie(rhs._term, rhs._timestamp);
+ }
- inline bool operator<(const OpTime& rhs) const {
- // Compare term first, then the opTimes.
- return std::tie(_term, _timestamp) < std::tie(rhs._term, rhs._timestamp);
- }
+ inline bool operator<(const OpTime& rhs) const {
+ // Compare term first, then the opTimes.
+ return std::tie(_term, _timestamp) < std::tie(rhs._term, rhs._timestamp);
+ }
- inline bool operator!=(const OpTime& rhs) const {
- return !(*this == rhs);
- }
+ inline bool operator!=(const OpTime& rhs) const {
+ return !(*this == rhs);
+ }
- inline bool operator<=(const OpTime& rhs) const {
- return *this < rhs || *this == rhs;
- }
+ inline bool operator<=(const OpTime& rhs) const {
+ return *this < rhs || *this == rhs;
+ }
- inline bool operator>(const OpTime& rhs) const {
- return !(*this <= rhs);
- }
+ inline bool operator>(const OpTime& rhs) const {
+ return !(*this <= rhs);
+ }
- inline bool operator>=(const OpTime& rhs) const {
- return !(*this < rhs);
- }
+ inline bool operator>=(const OpTime& rhs) const {
+ return !(*this < rhs);
+ }
- friend std::ostream& operator<<(std::ostream& out, const OpTime& opTime);
+ friend std::ostream& operator<<(std::ostream& out, const OpTime& opTime);
- private:
- Timestamp _timestamp;
- long long _term = kDefaultTerm;
- };
+private:
+ Timestamp _timestamp;
+ long long _term = kDefaultTerm;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/read_after_optime_args.cpp b/src/mongo/db/repl/read_after_optime_args.cpp
index 2a7c7817662..e3dcd87274b 100644
--- a/src/mongo/db/repl/read_after_optime_args.cpp
+++ b/src/mongo/db/repl/read_after_optime_args.cpp
@@ -41,70 +41,62 @@ using std::string;
namespace mongo {
namespace repl {
- const string ReadAfterOpTimeArgs::kRootFieldName("$readConcern");
- const string ReadAfterOpTimeArgs::kOpTimeFieldName("afterOpTime");
- const string ReadAfterOpTimeArgs::kOpTimestampFieldName("ts");
- const string ReadAfterOpTimeArgs::kOpTermFieldName("term");
+const string ReadAfterOpTimeArgs::kRootFieldName("$readConcern");
+const string ReadAfterOpTimeArgs::kOpTimeFieldName("afterOpTime");
+const string ReadAfterOpTimeArgs::kOpTimestampFieldName("ts");
+const string ReadAfterOpTimeArgs::kOpTermFieldName("term");
- ReadAfterOpTimeArgs::ReadAfterOpTimeArgs(): ReadAfterOpTimeArgs(OpTime()) {
- }
+ReadAfterOpTimeArgs::ReadAfterOpTimeArgs() : ReadAfterOpTimeArgs(OpTime()) {}
- ReadAfterOpTimeArgs::ReadAfterOpTimeArgs(OpTime opTime):
- _opTime(std::move(opTime)) {
- }
+ReadAfterOpTimeArgs::ReadAfterOpTimeArgs(OpTime opTime) : _opTime(std::move(opTime)) {}
- const OpTime& ReadAfterOpTimeArgs::getOpTime() const {
- return _opTime;
- }
+const OpTime& ReadAfterOpTimeArgs::getOpTime() const {
+ return _opTime;
+}
- Status ReadAfterOpTimeArgs::initialize(const BSONObj& cmdObj) {
- auto afterElem = cmdObj[ReadAfterOpTimeArgs::kRootFieldName];
+Status ReadAfterOpTimeArgs::initialize(const BSONObj& cmdObj) {
+ auto afterElem = cmdObj[ReadAfterOpTimeArgs::kRootFieldName];
- if (afterElem.eoo()) {
- return Status::OK();
- }
+ if (afterElem.eoo()) {
+ return Status::OK();
+ }
- if (!afterElem.isABSONObj()) {
- return Status(ErrorCodes::FailedToParse, "'after' field should be an object");
- }
+ if (!afterElem.isABSONObj()) {
+ return Status(ErrorCodes::FailedToParse, "'after' field should be an object");
+ }
- BSONObj readAfterObj = afterElem.Obj();
- BSONElement opTimeElem;
- auto opTimeStatus = bsonExtractTypedField(readAfterObj,
- ReadAfterOpTimeArgs::kOpTimeFieldName,
- Object,
- &opTimeElem);
+ BSONObj readAfterObj = afterElem.Obj();
+ BSONElement opTimeElem;
+ auto opTimeStatus = bsonExtractTypedField(
+ readAfterObj, ReadAfterOpTimeArgs::kOpTimeFieldName, Object, &opTimeElem);
- if (!opTimeStatus.isOK()) {
- return opTimeStatus;
- }
+ if (!opTimeStatus.isOK()) {
+ return opTimeStatus;
+ }
- BSONObj opTimeObj = opTimeElem.Obj();
- BSONElement timestampElem;
+ BSONObj opTimeObj = opTimeElem.Obj();
+ BSONElement timestampElem;
- Timestamp timestamp;
- auto timestampStatus =
- bsonExtractTimestampField(opTimeObj,
- ReadAfterOpTimeArgs::kOpTimestampFieldName,
- &timestamp);
+ Timestamp timestamp;
+ auto timestampStatus = bsonExtractTimestampField(
+ opTimeObj, ReadAfterOpTimeArgs::kOpTimestampFieldName, &timestamp);
- if (!timestampStatus.isOK()) {
- return timestampStatus;
- }
+ if (!timestampStatus.isOK()) {
+ return timestampStatus;
+ }
- long long termNumber;
- auto termStatus = bsonExtractIntegerField(opTimeObj,
- ReadAfterOpTimeArgs::kOpTermFieldName,
- &termNumber);
+ long long termNumber;
+ auto termStatus =
+ bsonExtractIntegerField(opTimeObj, ReadAfterOpTimeArgs::kOpTermFieldName, &termNumber);
- if (!termStatus.isOK()) {
- return termStatus;
- }
+ if (!termStatus.isOK()) {
+ return termStatus;
+ }
- _opTime = OpTime(timestamp, termNumber);
+ _opTime = OpTime(timestamp, termNumber);
- return Status::OK();
- }
+ return Status::OK();
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/read_after_optime_args.h b/src/mongo/db/repl/read_after_optime_args.h
index ee58dd3b9b2..04536ce8702 100644
--- a/src/mongo/db/repl/read_after_optime_args.h
+++ b/src/mongo/db/repl/read_after_optime_args.h
@@ -36,40 +36,38 @@
namespace mongo {
- class BSONObj;
+class BSONObj;
namespace repl {
- class ReadAfterOpTimeArgs {
- public:
+class ReadAfterOpTimeArgs {
+public:
+ static const std::string kRootFieldName;
+ static const std::string kOpTimeFieldName;
+ static const std::string kOpTimestampFieldName;
+ static const std::string kOpTermFieldName;
- static const std::string kRootFieldName;
- static const std::string kOpTimeFieldName;
- static const std::string kOpTimestampFieldName;
- static const std::string kOpTermFieldName;
+ ReadAfterOpTimeArgs();
+ explicit ReadAfterOpTimeArgs(OpTime opTime);
- ReadAfterOpTimeArgs();
- explicit ReadAfterOpTimeArgs(OpTime opTime);
+ /**
+ * Format:
+ * {
+ * find: “coll”,
+ * filter: <Query Object>,
+ * $readConcern: { // optional
+ * afterOpTime: { ts: <timestamp>, term: <NumberLong> },
+ * }
+ * }
+ */
+ Status initialize(const BSONObj& cmdObj);
- /**
- * Format:
- * {
- * find: “coll”,
- * filter: <Query Object>,
- * $readConcern: { // optional
- * afterOpTime: { ts: <timestamp>, term: <NumberLong> },
- * }
- * }
- */
- Status initialize(const BSONObj& cmdObj);
+ const OpTime& getOpTime() const;
+ const Milliseconds& getTimeout() const;
- const OpTime& getOpTime() const;
- const Milliseconds& getTimeout() const;
+private:
+ OpTime _opTime;
+};
- private:
-
- OpTime _opTime;
- };
-
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/read_after_optime_args_test.cpp b/src/mongo/db/repl/read_after_optime_args_test.cpp
index b37ca9f16ac..e79a9ff5dbb 100644
--- a/src/mongo/db/repl/read_after_optime_args_test.cpp
+++ b/src/mongo/db/repl/read_after_optime_args_test.cpp
@@ -34,86 +34,87 @@ namespace mongo {
namespace repl {
namespace {
- TEST(ReadAfterParse, BasicFullSpecification) {
- ReadAfterOpTimeArgs readAfterOpTime;
- ASSERT_OK(readAfterOpTime.initialize(BSON(
- "find" << "test"
- << ReadAfterOpTimeArgs::kRootFieldName
- << BSON(ReadAfterOpTimeArgs::kOpTimeFieldName
- << BSON(ReadAfterOpTimeArgs::kOpTimestampFieldName << Timestamp(20, 30)
- << ReadAfterOpTimeArgs::kOpTermFieldName << 2)))));
+TEST(ReadAfterParse, BasicFullSpecification) {
+ ReadAfterOpTimeArgs readAfterOpTime;
+ ASSERT_OK(readAfterOpTime.initialize(BSON(
+ "find"
+ << "test" << ReadAfterOpTimeArgs::kRootFieldName
+ << BSON(ReadAfterOpTimeArgs::kOpTimeFieldName
+ << BSON(ReadAfterOpTimeArgs::kOpTimestampFieldName
+ << Timestamp(20, 30) << ReadAfterOpTimeArgs::kOpTermFieldName << 2)))));
- ASSERT_EQ(Timestamp(20, 30), readAfterOpTime.getOpTime().getTimestamp());
- ASSERT_EQ(2, readAfterOpTime.getOpTime().getTerm());
- }
+ ASSERT_EQ(Timestamp(20, 30), readAfterOpTime.getOpTime().getTimestamp());
+ ASSERT_EQ(2, readAfterOpTime.getOpTime().getTerm());
+}
- TEST(ReadAfterParse, Empty) {
- ReadAfterOpTimeArgs readAfterOpTime;
- ASSERT_OK(readAfterOpTime.initialize(BSON("find" << "test")));
+TEST(ReadAfterParse, Empty) {
+ ReadAfterOpTimeArgs readAfterOpTime;
+ ASSERT_OK(readAfterOpTime.initialize(BSON("find"
+ << "test")));
- ASSERT(readAfterOpTime.getOpTime().getTimestamp().isNull());
- }
+ ASSERT(readAfterOpTime.getOpTime().getTimestamp().isNull());
+}
- TEST(ReadAfterParse, BadRootType) {
- ReadAfterOpTimeArgs readAfterOpTime;
- ASSERT_NOT_OK(readAfterOpTime.initialize(BSON(
- "find" << "test"
- << ReadAfterOpTimeArgs::kRootFieldName << "x")));
- }
+TEST(ReadAfterParse, BadRootType) {
+ ReadAfterOpTimeArgs readAfterOpTime;
+ ASSERT_NOT_OK(
+ readAfterOpTime.initialize(BSON("find"
+ << "test" << ReadAfterOpTimeArgs::kRootFieldName << "x")));
+}
- TEST(ReadAfterParse, BadOpTimeType) {
- ReadAfterOpTimeArgs readAfterOpTime;
- ASSERT_NOT_OK(readAfterOpTime.initialize(BSON(
- "find" << "test"
- << ReadAfterOpTimeArgs::kRootFieldName
- << BSON(ReadAfterOpTimeArgs::kOpTimeFieldName << 2))));
- }
+TEST(ReadAfterParse, BadOpTimeType) {
+ ReadAfterOpTimeArgs readAfterOpTime;
+ ASSERT_NOT_OK(
+ readAfterOpTime.initialize(BSON("find"
+ << "test" << ReadAfterOpTimeArgs::kRootFieldName
+ << BSON(ReadAfterOpTimeArgs::kOpTimeFieldName << 2))));
+}
- TEST(ReadAfterParse, OpTimeRequiredIfRootPresent) {
- ReadAfterOpTimeArgs readAfterOpTime;
- ASSERT_NOT_OK(readAfterOpTime.initialize(BSON(
- "find" << "test"
- << ReadAfterOpTimeArgs::kRootFieldName << BSONObj())));
- }
+TEST(ReadAfterParse, OpTimeRequiredIfRootPresent) {
+ ReadAfterOpTimeArgs readAfterOpTime;
+ ASSERT_NOT_OK(readAfterOpTime.initialize(BSON("find"
+ << "test" << ReadAfterOpTimeArgs::kRootFieldName
+ << BSONObj())));
+}
- TEST(ReadAfterParse, NoOpTimeTS) {
- ReadAfterOpTimeArgs readAfterOpTime;
- ASSERT_NOT_OK(readAfterOpTime.initialize(BSON(
- "find" << "test"
- << ReadAfterOpTimeArgs::kRootFieldName
- << BSON(ReadAfterOpTimeArgs::kOpTimeFieldName
- << BSON(ReadAfterOpTimeArgs::kOpTermFieldName << 2)))));
- }
+TEST(ReadAfterParse, NoOpTimeTS) {
+ ReadAfterOpTimeArgs readAfterOpTime;
+ ASSERT_NOT_OK(
+ readAfterOpTime.initialize(BSON("find"
+ << "test" << ReadAfterOpTimeArgs::kRootFieldName
+ << BSON(ReadAfterOpTimeArgs::kOpTimeFieldName << BSON(
+ ReadAfterOpTimeArgs::kOpTermFieldName << 2)))));
+}
- TEST(ReadAfterParse, NoOpTimeTerm) {
- ReadAfterOpTimeArgs readAfterOpTime;
- ASSERT_NOT_OK(readAfterOpTime.initialize(BSON(
- "find" << "test"
- << ReadAfterOpTimeArgs::kRootFieldName
- << BSON(ReadAfterOpTimeArgs::kOpTimeFieldName
- << BSON(ReadAfterOpTimeArgs::kOpTermFieldName << 2)))));
- }
+TEST(ReadAfterParse, NoOpTimeTerm) {
+ ReadAfterOpTimeArgs readAfterOpTime;
+ ASSERT_NOT_OK(
+ readAfterOpTime.initialize(BSON("find"
+ << "test" << ReadAfterOpTimeArgs::kRootFieldName
+ << BSON(ReadAfterOpTimeArgs::kOpTimeFieldName << BSON(
+ ReadAfterOpTimeArgs::kOpTermFieldName << 2)))));
+}
- TEST(ReadAfterParse, BadOpTimeTSType) {
- ReadAfterOpTimeArgs readAfterOpTime;
- ASSERT_NOT_OK(readAfterOpTime.initialize(BSON(
- "find" << "test"
- << ReadAfterOpTimeArgs::kRootFieldName
- << BSON(ReadAfterOpTimeArgs::kOpTimeFieldName
- << BSON(ReadAfterOpTimeArgs::kOpTimestampFieldName << BSON("x" << 1)
- << ReadAfterOpTimeArgs::kOpTermFieldName << 2)))));
- }
+TEST(ReadAfterParse, BadOpTimeTSType) {
+ ReadAfterOpTimeArgs readAfterOpTime;
+ ASSERT_NOT_OK(readAfterOpTime.initialize(
+ BSON("find"
+ << "test" << ReadAfterOpTimeArgs::kRootFieldName
+ << BSON(ReadAfterOpTimeArgs::kOpTimeFieldName
+ << BSON(ReadAfterOpTimeArgs::kOpTimestampFieldName
+ << BSON("x" << 1) << ReadAfterOpTimeArgs::kOpTermFieldName << 2)))));
+}
- TEST(ReadAfterParse, BadOpTimeTermType) {
- ReadAfterOpTimeArgs readAfterOpTime;
- ASSERT_NOT_OK(readAfterOpTime.initialize(BSON(
- "find" << "test"
- << ReadAfterOpTimeArgs::kRootFieldName
- << BSON(ReadAfterOpTimeArgs::kOpTimeFieldName
- << BSON(ReadAfterOpTimeArgs::kOpTimestampFieldName << Timestamp(1, 0)
- << ReadAfterOpTimeArgs::kOpTermFieldName << "y")))));
- }
+TEST(ReadAfterParse, BadOpTimeTermType) {
+ ReadAfterOpTimeArgs readAfterOpTime;
+ ASSERT_NOT_OK(readAfterOpTime.initialize(BSON(
+ "find"
+ << "test" << ReadAfterOpTimeArgs::kRootFieldName
+ << BSON(ReadAfterOpTimeArgs::kOpTimeFieldName
+ << BSON(ReadAfterOpTimeArgs::kOpTimestampFieldName
+ << Timestamp(1, 0) << ReadAfterOpTimeArgs::kOpTermFieldName << "y")))));
+}
-} // unnamed namespace
-} // namespace repl
-} // namespace mongo
+} // unnamed namespace
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/read_after_optime_response.cpp b/src/mongo/db/repl/read_after_optime_response.cpp
index 3a6d5fc9962..332508c9e3c 100644
--- a/src/mongo/db/repl/read_after_optime_response.cpp
+++ b/src/mongo/db/repl/read_after_optime_response.cpp
@@ -39,48 +39,40 @@ using std::string;
namespace mongo {
namespace repl {
- const string ReadAfterOpTimeResponse::kWaitedMSFieldName("waitedMS");
+const string ReadAfterOpTimeResponse::kWaitedMSFieldName("waitedMS");
- ReadAfterOpTimeResponse::ReadAfterOpTimeResponse(Status status):
- ReadAfterOpTimeResponse(status, stdx::chrono::milliseconds(0), false) {
- }
+ReadAfterOpTimeResponse::ReadAfterOpTimeResponse(Status status)
+ : ReadAfterOpTimeResponse(status, stdx::chrono::milliseconds(0), false) {}
- ReadAfterOpTimeResponse::ReadAfterOpTimeResponse():
- ReadAfterOpTimeResponse(Status::OK()) {
- }
+ReadAfterOpTimeResponse::ReadAfterOpTimeResponse() : ReadAfterOpTimeResponse(Status::OK()) {}
- ReadAfterOpTimeResponse::ReadAfterOpTimeResponse(Status status,
- stdx::chrono::milliseconds duration):
- ReadAfterOpTimeResponse(status, duration, true) {
- }
-
- ReadAfterOpTimeResponse::ReadAfterOpTimeResponse(Status status,
- stdx::chrono::milliseconds duration,
- bool waited):
- _waited(waited),
- _duration(duration),
- _status(status) {
- }
+ReadAfterOpTimeResponse::ReadAfterOpTimeResponse(Status status, stdx::chrono::milliseconds duration)
+ : ReadAfterOpTimeResponse(status, duration, true) {}
- void ReadAfterOpTimeResponse::appendInfo(BSONObjBuilder* builder) {
- if (!_waited) {
- return;
- }
+ReadAfterOpTimeResponse::ReadAfterOpTimeResponse(Status status,
+ stdx::chrono::milliseconds duration,
+ bool waited)
+ : _waited(waited), _duration(duration), _status(status) {}
- builder->append(kWaitedMSFieldName, durationCount<Milliseconds>(_duration));
+void ReadAfterOpTimeResponse::appendInfo(BSONObjBuilder* builder) {
+ if (!_waited) {
+ return;
}
- bool ReadAfterOpTimeResponse::didWait() const {
- return _waited;
- }
+ builder->append(kWaitedMSFieldName, durationCount<Milliseconds>(_duration));
+}
- stdx::chrono::milliseconds ReadAfterOpTimeResponse::getDuration() const {
- return _duration;
- }
+bool ReadAfterOpTimeResponse::didWait() const {
+ return _waited;
+}
- Status ReadAfterOpTimeResponse::getStatus() const {
- return _status;
- }
+stdx::chrono::milliseconds ReadAfterOpTimeResponse::getDuration() const {
+ return _duration;
+}
+
+Status ReadAfterOpTimeResponse::getStatus() const {
+ return _status;
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/read_after_optime_response.h b/src/mongo/db/repl/read_after_optime_response.h
index 33163131363..7bd5788dd0f 100644
--- a/src/mongo/db/repl/read_after_optime_response.h
+++ b/src/mongo/db/repl/read_after_optime_response.h
@@ -35,57 +35,55 @@
namespace mongo {
- class BSONObjBuilder;
+class BSONObjBuilder;
namespace repl {
- class ReadAfterOpTimeResponse {
- public:
- static const std::string kWaitedMSFieldName;
-
- /**
- * Constructs a default response that has OK status, and wait is false.
- */
- ReadAfterOpTimeResponse();
-
- /**
- * Constructs a response with the given status with wait equals to false.
- */
- explicit ReadAfterOpTimeResponse(Status status);
-
- /**
- * Constructs a response with wait set to true along with the given parameters.
- */
- ReadAfterOpTimeResponse(Status status, stdx::chrono::milliseconds duration);
-
- /**
- * Appends to the builder the timeout and duration info if didWait() is true.
- * Note: does not include status.
- */
- void appendInfo(BSONObjBuilder* builder);
-
- bool didWait() const;
-
- /**
- * Returns the amount of duration waiting for opTime to pass.
- * Valid only if didWait is true.
- */
- stdx::chrono::milliseconds getDuration() const;
-
- /**
- * Returns more details about an error if it occurred.
- */
- Status getStatus() const;
-
- private:
- ReadAfterOpTimeResponse(Status status,
- stdx::chrono::milliseconds duration,
- bool waited);
-
- bool _waited;
- stdx::chrono::milliseconds _duration;
- Status _status;
- };
-
-} // namespace repl
-} // namespace mongo
+class ReadAfterOpTimeResponse {
+public:
+ static const std::string kWaitedMSFieldName;
+
+ /**
+ * Constructs a default response that has OK status, and wait is false.
+ */
+ ReadAfterOpTimeResponse();
+
+ /**
+ * Constructs a response with the given status with wait equals to false.
+ */
+ explicit ReadAfterOpTimeResponse(Status status);
+
+ /**
+ * Constructs a response with wait set to true along with the given parameters.
+ */
+ ReadAfterOpTimeResponse(Status status, stdx::chrono::milliseconds duration);
+
+ /**
+ * Appends to the builder the timeout and duration info if didWait() is true.
+ * Note: does not include status.
+ */
+ void appendInfo(BSONObjBuilder* builder);
+
+ bool didWait() const;
+
+ /**
+ * Returns the amount of duration waiting for opTime to pass.
+ * Valid only if didWait is true.
+ */
+ stdx::chrono::milliseconds getDuration() const;
+
+ /**
+ * Returns more details about an error if it occurred.
+ */
+ Status getStatus() const;
+
+private:
+ ReadAfterOpTimeResponse(Status status, stdx::chrono::milliseconds duration, bool waited);
+
+ bool _waited;
+ stdx::chrono::milliseconds _duration;
+ Status _status;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/read_after_optime_response_test.cpp b/src/mongo/db/repl/read_after_optime_response_test.cpp
index 09d70204255..7104fca99b7 100644
--- a/src/mongo/db/repl/read_after_optime_response_test.cpp
+++ b/src/mongo/db/repl/read_after_optime_response_test.cpp
@@ -34,49 +34,49 @@ namespace mongo {
namespace repl {
namespace {
- TEST(ReadAfterResponse, Default) {
- ReadAfterOpTimeResponse response;
+TEST(ReadAfterResponse, Default) {
+ ReadAfterOpTimeResponse response;
- ASSERT_FALSE(response.didWait());
+ ASSERT_FALSE(response.didWait());
- BSONObjBuilder builder;
- response.appendInfo(&builder);
+ BSONObjBuilder builder;
+ response.appendInfo(&builder);
- BSONObj obj(builder.done());
- ASSERT_TRUE(obj.isEmpty());
- }
+ BSONObj obj(builder.done());
+ ASSERT_TRUE(obj.isEmpty());
+}
- TEST(ReadAfterResponse, WithStatus) {
- ReadAfterOpTimeResponse response(Status(ErrorCodes::InternalError, "test"));
+TEST(ReadAfterResponse, WithStatus) {
+ ReadAfterOpTimeResponse response(Status(ErrorCodes::InternalError, "test"));
- ASSERT_FALSE(response.didWait());
+ ASSERT_FALSE(response.didWait());
- ASSERT_EQ(ErrorCodes::InternalError, response.getStatus().code());
+ ASSERT_EQ(ErrorCodes::InternalError, response.getStatus().code());
- BSONObjBuilder builder;
- response.appendInfo(&builder);
+ BSONObjBuilder builder;
+ response.appendInfo(&builder);
- BSONObj obj(builder.done());
- ASSERT_TRUE(obj.isEmpty());
- }
+ BSONObj obj(builder.done());
+ ASSERT_TRUE(obj.isEmpty());
+}
- TEST(ReadAfterResponse, WaitedWithDuration) {
- ReadAfterOpTimeResponse response(Status(ErrorCodes::InternalError, "test"),
- stdx::chrono::milliseconds(7));
+TEST(ReadAfterResponse, WaitedWithDuration) {
+ ReadAfterOpTimeResponse response(Status(ErrorCodes::InternalError, "test"),
+ stdx::chrono::milliseconds(7));
- ASSERT_TRUE(response.didWait());
- ASSERT_EQUALS(Milliseconds(7), response.getDuration());
- ASSERT_EQ(ErrorCodes::InternalError, response.getStatus().code());
+ ASSERT_TRUE(response.didWait());
+ ASSERT_EQUALS(Milliseconds(7), response.getDuration());
+ ASSERT_EQ(ErrorCodes::InternalError, response.getStatus().code());
- BSONObjBuilder builder;
- response.appendInfo(&builder);
+ BSONObjBuilder builder;
+ response.appendInfo(&builder);
- BSONObj obj(builder.done());
- auto waitedMSElem = obj[ReadAfterOpTimeResponse::kWaitedMSFieldName];
- ASSERT_TRUE(waitedMSElem.isNumber());
- ASSERT_EQ(7, waitedMSElem.numberLong());
- }
+ BSONObj obj(builder.done());
+ auto waitedMSElem = obj[ReadAfterOpTimeResponse::kWaitedMSFieldName];
+ ASSERT_TRUE(waitedMSElem.isNumber());
+ ASSERT_EQ(7, waitedMSElem.numberLong());
+}
-} // unnamed namespace
-} // namespace repl
-} // namespace mongo
+} // unnamed namespace
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_client_info.cpp b/src/mongo/db/repl/repl_client_info.cpp
index 631c121c223..18191bfaffc 100644
--- a/src/mongo/db/repl/repl_client_info.cpp
+++ b/src/mongo/db/repl/repl_client_info.cpp
@@ -39,15 +39,15 @@
namespace mongo {
namespace repl {
- const Client::Decoration<ReplClientInfo> ReplClientInfo::forClient =
- Client::declareDecoration<ReplClientInfo>();
+const Client::Decoration<ReplClientInfo> ReplClientInfo::forClient =
+ Client::declareDecoration<ReplClientInfo>();
- long long ReplClientInfo::getTerm() {
- if (_cachedTerm == kUninitializedTerm) {
- _cachedTerm = getGlobalReplicationCoordinator()->getTerm();
- }
- return _cachedTerm;
+long long ReplClientInfo::getTerm() {
+ if (_cachedTerm == kUninitializedTerm) {
+ _cachedTerm = getGlobalReplicationCoordinator()->getTerm();
}
+ return _cachedTerm;
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/repl_client_info.h b/src/mongo/db/repl/repl_client_info.h
index 69694f2795d..576142fd9cd 100644
--- a/src/mongo/db/repl/repl_client_info.h
+++ b/src/mongo/db/repl/repl_client_info.h
@@ -34,39 +34,47 @@
namespace mongo {
- class BSONObjBuilder;
- class Client;
+class BSONObjBuilder;
+class Client;
namespace repl {
- class ReplClientInfo {
- public:
- static const Client::Decoration<ReplClientInfo> forClient;
+class ReplClientInfo {
+public:
+ static const Client::Decoration<ReplClientInfo> forClient;
- void setLastOp(const OpTime& op) { _lastOp = op; }
- OpTime getLastOp() const { return _lastOp; }
+ void setLastOp(const OpTime& op) {
+ _lastOp = op;
+ }
+ OpTime getLastOp() const {
+ return _lastOp;
+ }
- // Only used for master/slave
- void setRemoteID(OID rid) { _remoteId = rid; }
- OID getRemoteID() const { return _remoteId; }
+ // Only used for master/slave
+ void setRemoteID(OID rid) {
+ _remoteId = rid;
+ }
+ OID getRemoteID() const {
+ return _remoteId;
+ }
- // If we haven't cached a term from replication coordinator, get the current term
- // and cache it during the life cycle of this client.
- //
- // Used by logOp() to attach the current term to each log entries. Assume we don't change
- // the term since caching it. This is true for write commands, since we acquire the
- // global lock (IX) for write commands and stepping down also needs that lock (S).
- // Stepping down will kill all user operations, so there is no write after stepping down
- // in the case of yielding.
- long long getTerm();
+ // If we haven't cached a term from replication coordinator, get the current term
+ // and cache it during the life cycle of this client.
+ //
+ // Used by logOp() to attach the current term to each log entries. Assume we don't change
+ // the term since caching it. This is true for write commands, since we acquire the
+ // global lock (IX) for write commands and stepping down also needs that lock (S).
+ // Stepping down will kill all user operations, so there is no write after stepping down
+ // in the case of yielding.
+ long long getTerm();
- private:
- static const long long kUninitializedTerm = -1;
+private:
+ static const long long kUninitializedTerm = -1;
- OpTime _lastOp = OpTime();
- OID _remoteId = OID();
- long long _cachedTerm = kUninitializedTerm;
- };
+ OpTime _lastOp = OpTime();
+ OID _remoteId = OID();
+ long long _cachedTerm = kUninitializedTerm;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_command.cpp b/src/mongo/db/repl/repl_set_command.cpp
index 5ff784352cd..2dd2178cdba 100644
--- a/src/mongo/db/repl/repl_set_command.cpp
+++ b/src/mongo/db/repl/repl_set_command.cpp
@@ -35,15 +35,15 @@
namespace mongo {
namespace repl {
- Status ReplSetCommand::checkAuthForCommand(ClientBasic* client,
- const std::string& dbname,
- const BSONObj& cmdObj) {
- if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
- ResourcePattern::forClusterResource(), ActionType::internal)) {
- return {ErrorCodes::Unauthorized, "Unauthorized"};
- }
- return Status::OK();
+Status ReplSetCommand::checkAuthForCommand(ClientBasic* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) {
+ if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
+ ResourcePattern::forClusterResource(), ActionType::internal)) {
+ return {ErrorCodes::Unauthorized, "Unauthorized"};
}
+ return Status::OK();
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_command.h b/src/mongo/db/repl/repl_set_command.h
index 0694eb28157..0b8c74c44b8 100644
--- a/src/mongo/db/repl/repl_set_command.h
+++ b/src/mongo/db/repl/repl_set_command.h
@@ -34,29 +34,35 @@
namespace mongo {
- class Status;
- class ClientBasic;
- class BSONObj;
+class Status;
+class ClientBasic;
+class BSONObj;
namespace repl {
- /**
- * Base class for repl set commands.
- */
- class ReplSetCommand : public Command {
- protected:
- ReplSetCommand(const char * s, bool show=false) : Command(s, show) { }
+/**
+ * Base class for repl set commands.
+ */
+class ReplSetCommand : public Command {
+protected:
+ ReplSetCommand(const char* s, bool show = false) : Command(s, show) {}
- bool slaveOk() const override { return true; }
+ bool slaveOk() const override {
+ return true;
+ }
- bool adminOnly() const override { return true; }
+ bool adminOnly() const override {
+ return true;
+ }
- bool isWriteCommandForConfigServer() const override { return false; }
+ bool isWriteCommandForConfigServer() const override {
+ return false;
+ }
- Status checkAuthForCommand(ClientBasic* client,
- const std::string& dbname,
- const BSONObj& cmdObj) override;
- };
+ Status checkAuthForCommand(ClientBasic* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) override;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_declare_election_winner.cpp b/src/mongo/db/repl/repl_set_declare_election_winner.cpp
index 688428ca221..776d20a7a18 100644
--- a/src/mongo/db/repl/repl_set_declare_election_winner.cpp
+++ b/src/mongo/db/repl/repl_set_declare_election_winner.cpp
@@ -36,36 +36,35 @@
namespace mongo {
namespace repl {
- class CmdReplSetDeclareElectionWinner : public ReplSetCommand {
- public:
- CmdReplSetDeclareElectionWinner() : ReplSetCommand("replSetDeclareElectionWinner") { }
- private:
- bool run(OperationContext* txn,
- const std::string&,
- BSONObj& cmdObj,
- int,
- std::string& errmsg,
- BSONObjBuilder& result) final {
+class CmdReplSetDeclareElectionWinner : public ReplSetCommand {
+public:
+ CmdReplSetDeclareElectionWinner() : ReplSetCommand("replSetDeclareElectionWinner") {}
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK()) {
- return appendCommandStatus(result, status);
- }
-
- ReplSetDeclareElectionWinnerArgs parsedArgs;
- status = parsedArgs.initialize(cmdObj);
- if (!status.isOK()) {
- return appendCommandStatus(result, status);
- }
+private:
+ bool run(OperationContext* txn,
+ const std::string&,
+ BSONObj& cmdObj,
+ int,
+ std::string& errmsg,
+ BSONObjBuilder& result) final {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK()) {
+ return appendCommandStatus(result, status);
+ }
- long long responseTerm = -1;
- status = getGlobalReplicationCoordinator()->processReplSetDeclareElectionWinner(
- parsedArgs,
- &responseTerm);
- result.append("term", responseTerm);
+ ReplSetDeclareElectionWinnerArgs parsedArgs;
+ status = parsedArgs.initialize(cmdObj);
+ if (!status.isOK()) {
return appendCommandStatus(result, status);
}
- } cmdReplSetDeclareElectionWinner;
-} // namespace repl
-} // namespace mongo
+ long long responseTerm = -1;
+ status = getGlobalReplicationCoordinator()->processReplSetDeclareElectionWinner(
+ parsedArgs, &responseTerm);
+ result.append("term", responseTerm);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetDeclareElectionWinner;
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_declare_election_winner_args.cpp b/src/mongo/db/repl/repl_set_declare_election_winner_args.cpp
index f206c8e528a..0961dc901de 100644
--- a/src/mongo/db/repl/repl_set_declare_election_winner_args.cpp
+++ b/src/mongo/db/repl/repl_set_declare_election_winner_args.cpp
@@ -36,133 +36,120 @@ namespace mongo {
namespace repl {
namespace {
- const std::string kCommandName = "replSetDeclareElectionWinner";
- const std::string kErrorMessageFieldName = "errmsg";
- const std::string kErrorCodeFieldName = "code";
- const std::string kOkFieldName = "ok";
- const std::string kSetNameFieldName = "setName";
- const std::string kTermFieldName = "term";
- const std::string kWinnerIdFieldName = "winnerId";
-
- const std::string kLegalArgsFieldNames[] = {
- kCommandName,
- kSetNameFieldName,
- kTermFieldName,
- kWinnerIdFieldName,
- };
-
- const std::string kLegalResponseFieldNames[] = {
- kErrorMessageFieldName,
- kErrorCodeFieldName,
- kOkFieldName,
- kTermFieldName,
- };
+const std::string kCommandName = "replSetDeclareElectionWinner";
+const std::string kErrorMessageFieldName = "errmsg";
+const std::string kErrorCodeFieldName = "code";
+const std::string kOkFieldName = "ok";
+const std::string kSetNameFieldName = "setName";
+const std::string kTermFieldName = "term";
+const std::string kWinnerIdFieldName = "winnerId";
+
+const std::string kLegalArgsFieldNames[] = {
+ kCommandName, kSetNameFieldName, kTermFieldName, kWinnerIdFieldName,
+};
+
+const std::string kLegalResponseFieldNames[] = {
+ kErrorMessageFieldName, kErrorCodeFieldName, kOkFieldName, kTermFieldName,
+};
} // namespace
- Status ReplSetDeclareElectionWinnerArgs::initialize(const BSONObj& argsObj) {
- Status status = bsonCheckOnlyHasFields("ReplSetDeclareElectionWinner",
- argsObj,
- kLegalArgsFieldNames);
- if (!status.isOK())
- return status;
-
- status = bsonExtractStringField(argsObj, kSetNameFieldName, &_setName);
- if (!status.isOK())
- return status;
-
- status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term);
- if (!status.isOK())
- return status;
-
- status = bsonExtractIntegerField(argsObj, kWinnerIdFieldName, &_winnerId);
- if (!status.isOK())
- return status;
-
- return Status::OK();
- }
-
- std::string ReplSetDeclareElectionWinnerArgs::getReplSetName() const {
- return _setName;
- }
-
- long long ReplSetDeclareElectionWinnerArgs::getTerm() const {
- return _term;
- }
-
- long long ReplSetDeclareElectionWinnerArgs::getWinnerId() const {
- return _winnerId;
- }
-
- void ReplSetDeclareElectionWinnerArgs::addToBSON(BSONObjBuilder* builder) const {
- builder->append("replSetDeclareElectionWinner", 1);
- builder->append(kSetNameFieldName, _setName);
- builder->append(kTermFieldName, _term);
- builder->appendIntOrLL(kWinnerIdFieldName, _winnerId);
- }
-
- BSONObj ReplSetDeclareElectionWinnerArgs::toBSON() const {
- BSONObjBuilder builder;
- addToBSON(&builder);
- return builder.obj();
- }
-
- Status ReplSetDeclareElectionWinnerResponse::initialize(const BSONObj& argsObj) {
- Status status = bsonCheckOnlyHasFields("ReplSetDeclareElectionWinner",
- argsObj,
- kLegalResponseFieldNames);
- if (!status.isOK())
- return status;
-
- status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term);
- if (!status.isOK())
- return status;
-
- status = bsonExtractIntegerFieldWithDefault(argsObj,
- kErrorCodeFieldName,
- ErrorCodes::OK,
- &_code);
- if (!status.isOK())
- return status;
-
- status = bsonExtractStringFieldWithDefault(argsObj,
- kErrorMessageFieldName,
- "",
- &_errmsg);
- if (!status.isOK())
- return status;
-
- status = bsonExtractBooleanField(argsObj, kOkFieldName, &_ok);
- if (!status.isOK())
- return status;
-
- return Status::OK();
- }
-
- bool ReplSetDeclareElectionWinnerResponse::getOk() const {
- return _ok;
- }
-
- long long ReplSetDeclareElectionWinnerResponse::getTerm() const {
- return _term;
- }
-
- long long ReplSetDeclareElectionWinnerResponse::getErrorCode() const {
- return _code;
- }
-
- const std::string& ReplSetDeclareElectionWinnerResponse::getErrorMsg() const {
- return _errmsg;
- }
-
- void ReplSetDeclareElectionWinnerResponse::addToBSON(BSONObjBuilder* builder) const {
- builder->append(kOkFieldName, _ok);
- builder->append(kTermFieldName, _term);
- if (_code != ErrorCodes::OK) {
- builder->append(kErrorCodeFieldName, _code);
- builder->append(kErrorMessageFieldName, _errmsg);
- }
+Status ReplSetDeclareElectionWinnerArgs::initialize(const BSONObj& argsObj) {
+ Status status =
+ bsonCheckOnlyHasFields("ReplSetDeclareElectionWinner", argsObj, kLegalArgsFieldNames);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractStringField(argsObj, kSetNameFieldName, &_setName);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractIntegerField(argsObj, kWinnerIdFieldName, &_winnerId);
+ if (!status.isOK())
+ return status;
+
+ return Status::OK();
+}
+
+std::string ReplSetDeclareElectionWinnerArgs::getReplSetName() const {
+ return _setName;
+}
+
+long long ReplSetDeclareElectionWinnerArgs::getTerm() const {
+ return _term;
+}
+
+long long ReplSetDeclareElectionWinnerArgs::getWinnerId() const {
+ return _winnerId;
+}
+
+void ReplSetDeclareElectionWinnerArgs::addToBSON(BSONObjBuilder* builder) const {
+ builder->append("replSetDeclareElectionWinner", 1);
+ builder->append(kSetNameFieldName, _setName);
+ builder->append(kTermFieldName, _term);
+ builder->appendIntOrLL(kWinnerIdFieldName, _winnerId);
+}
+
+BSONObj ReplSetDeclareElectionWinnerArgs::toBSON() const {
+ BSONObjBuilder builder;
+ addToBSON(&builder);
+ return builder.obj();
+}
+
+Status ReplSetDeclareElectionWinnerResponse::initialize(const BSONObj& argsObj) {
+ Status status =
+ bsonCheckOnlyHasFields("ReplSetDeclareElectionWinner", argsObj, kLegalResponseFieldNames);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term);
+ if (!status.isOK())
+ return status;
+
+ status =
+ bsonExtractIntegerFieldWithDefault(argsObj, kErrorCodeFieldName, ErrorCodes::OK, &_code);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractStringFieldWithDefault(argsObj, kErrorMessageFieldName, "", &_errmsg);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractBooleanField(argsObj, kOkFieldName, &_ok);
+ if (!status.isOK())
+ return status;
+
+ return Status::OK();
+}
+
+bool ReplSetDeclareElectionWinnerResponse::getOk() const {
+ return _ok;
+}
+
+long long ReplSetDeclareElectionWinnerResponse::getTerm() const {
+ return _term;
+}
+
+long long ReplSetDeclareElectionWinnerResponse::getErrorCode() const {
+ return _code;
+}
+
+const std::string& ReplSetDeclareElectionWinnerResponse::getErrorMsg() const {
+ return _errmsg;
+}
+
+void ReplSetDeclareElectionWinnerResponse::addToBSON(BSONObjBuilder* builder) const {
+ builder->append(kOkFieldName, _ok);
+ builder->append(kTermFieldName, _term);
+ if (_code != ErrorCodes::OK) {
+ builder->append(kErrorCodeFieldName, _code);
+ builder->append(kErrorMessageFieldName, _errmsg);
}
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_declare_election_winner_args.h b/src/mongo/db/repl/repl_set_declare_election_winner_args.h
index fb4d8f30a15..dd96fd94854 100644
--- a/src/mongo/db/repl/repl_set_declare_election_winner_args.h
+++ b/src/mongo/db/repl/repl_set_declare_election_winner_args.h
@@ -34,45 +34,45 @@
namespace mongo {
- class BSONObj;
- class BSONObjBuilder;
+class BSONObj;
+class BSONObjBuilder;
namespace repl {
- class ReplSetDeclareElectionWinnerArgs {
- public:
- Status initialize(const BSONObj& argsObj);
+class ReplSetDeclareElectionWinnerArgs {
+public:
+ Status initialize(const BSONObj& argsObj);
- std::string getReplSetName() const;
- long long getTerm() const;
- long long getWinnerId() const;
+ std::string getReplSetName() const;
+ long long getTerm() const;
+ long long getWinnerId() const;
- void addToBSON(BSONObjBuilder* builder) const;
- BSONObj toBSON() const;
+ void addToBSON(BSONObjBuilder* builder) const;
+ BSONObj toBSON() const;
- private:
- std::string _setName;
- long long _term = -1; // The term for which the winner is being declared.
- long long _winnerId = -1; // replSet id of the member who was the winner.
- };
+private:
+ std::string _setName;
+ long long _term = -1; // The term for which the winner is being declared.
+ long long _winnerId = -1; // replSet id of the member who was the winner.
+};
- class ReplSetDeclareElectionWinnerResponse {
- public:
- Status initialize(const BSONObj& argsObj);
-
- bool getOk() const;
- long long getTerm() const;
- long long getErrorCode() const;
- const std::string& getErrorMsg() const;
+class ReplSetDeclareElectionWinnerResponse {
+public:
+ Status initialize(const BSONObj& argsObj);
- void addToBSON(BSONObjBuilder* builder) const;
+ bool getOk() const;
+ long long getTerm() const;
+ long long getErrorCode() const;
+ const std::string& getErrorMsg() const;
- private:
- bool _ok = false;
- long long _term = -1;
- long long _code = ErrorCodes::OK;
- std::string _errmsg;
- };
+ void addToBSON(BSONObjBuilder* builder) const;
-} // namespace repl
-} // namespace mongo
+private:
+ bool _ok = false;
+ long long _term = -1;
+ long long _code = ErrorCodes::OK;
+ std::string _errmsg;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_heartbeat_args.cpp b/src/mongo/db/repl/repl_set_heartbeat_args.cpp
index 75eee68348f..babca5a0dfa 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_args.cpp
+++ b/src/mongo/db/repl/repl_set_heartbeat_args.cpp
@@ -39,142 +39,133 @@ namespace repl {
namespace {
- const std::string kCheckEmptyFieldName = "checkEmpty";
- const std::string kProtocolVersionFieldName = "pv";
- const std::string kConfigVersionFieldName = "v";
- const std::string kSenderIdFieldName = "fromId";
- const std::string kSetNameFieldName = "replSetHeartbeat";
- const std::string kSenderHostFieldName = "from";
-
- const std::string kLegalHeartbeatFieldNames[] = {
- kCheckEmptyFieldName,
- kProtocolVersionFieldName,
- kConfigVersionFieldName,
- kSenderIdFieldName,
- kSetNameFieldName,
- kSenderHostFieldName
- };
-
-} // namespace
-
- ReplSetHeartbeatArgs::ReplSetHeartbeatArgs() :
- _hasCheckEmpty(false),
- _hasProtocolVersion(false),
- _hasConfigVersion(false),
- _hasSenderId(false),
- _hasSetName(false),
- _hasSenderHost(false),
- _checkEmpty(false),
- _protocolVersion(-1),
- _configVersion(-1),
- _senderId(-1),
- _setName(""),
- _senderHost(HostAndPort()) {}
-
- Status ReplSetHeartbeatArgs::initialize(const BSONObj& argsObj) {
- Status status = bsonCheckOnlyHasFields("ReplSetHeartbeatArgs",
- argsObj,
- kLegalHeartbeatFieldNames);
+const std::string kCheckEmptyFieldName = "checkEmpty";
+const std::string kProtocolVersionFieldName = "pv";
+const std::string kConfigVersionFieldName = "v";
+const std::string kSenderIdFieldName = "fromId";
+const std::string kSetNameFieldName = "replSetHeartbeat";
+const std::string kSenderHostFieldName = "from";
+
+const std::string kLegalHeartbeatFieldNames[] = {kCheckEmptyFieldName,
+ kProtocolVersionFieldName,
+ kConfigVersionFieldName,
+ kSenderIdFieldName,
+ kSetNameFieldName,
+ kSenderHostFieldName};
+
+} // namespace
+
+ReplSetHeartbeatArgs::ReplSetHeartbeatArgs()
+ : _hasCheckEmpty(false),
+ _hasProtocolVersion(false),
+ _hasConfigVersion(false),
+ _hasSenderId(false),
+ _hasSetName(false),
+ _hasSenderHost(false),
+ _checkEmpty(false),
+ _protocolVersion(-1),
+ _configVersion(-1),
+ _senderId(-1),
+ _setName(""),
+ _senderHost(HostAndPort()) {}
+
+Status ReplSetHeartbeatArgs::initialize(const BSONObj& argsObj) {
+ Status status =
+ bsonCheckOnlyHasFields("ReplSetHeartbeatArgs", argsObj, kLegalHeartbeatFieldNames);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractBooleanFieldWithDefault(argsObj, kCheckEmptyFieldName, false, &_checkEmpty);
+ if (!status.isOK())
+ return status;
+ _hasCheckEmpty = true;
+
+ status = bsonExtractIntegerField(argsObj, kProtocolVersionFieldName, &_protocolVersion);
+ if (!status.isOK())
+ return status;
+ _hasProtocolVersion = true;
+
+ status = bsonExtractIntegerField(argsObj, kConfigVersionFieldName, &_configVersion);
+ if (!status.isOK())
+ return status;
+ _hasConfigVersion = true;
+
+ status = bsonExtractIntegerFieldWithDefault(argsObj, kSenderIdFieldName, -1, &_senderId);
+ if (!status.isOK())
+ return status;
+ _hasSenderId = true;
+
+ status = bsonExtractStringField(argsObj, kSetNameFieldName, &_setName);
+ if (!status.isOK())
+ return status;
+ _hasSetName = true;
+
+ std::string hostAndPortString;
+ status =
+ bsonExtractStringFieldWithDefault(argsObj, kSenderHostFieldName, "", &hostAndPortString);
+ if (!status.isOK())
+ return status;
+
+ if (!hostAndPortString.empty()) {
+ status = _senderHost.initialize(hostAndPortString);
if (!status.isOK())
return status;
-
- status = bsonExtractBooleanFieldWithDefault(argsObj,
- kCheckEmptyFieldName,
- false,
- &_checkEmpty);
- if (!status.isOK())
- return status;
- _hasCheckEmpty = true;
-
- status = bsonExtractIntegerField(argsObj, kProtocolVersionFieldName, &_protocolVersion);
- if (!status.isOK())
- return status;
- _hasProtocolVersion = true;
-
- status = bsonExtractIntegerField(argsObj, kConfigVersionFieldName, &_configVersion);
- if (!status.isOK())
- return status;
- _hasConfigVersion = true;
-
- status = bsonExtractIntegerFieldWithDefault(argsObj, kSenderIdFieldName, -1, &_senderId);
- if (!status.isOK())
- return status;
- _hasSenderId = true;
-
- status = bsonExtractStringField(argsObj, kSetNameFieldName, &_setName);
- if (!status.isOK())
- return status;
- _hasSetName = true;
-
- std::string hostAndPortString;
- status = bsonExtractStringFieldWithDefault(
- argsObj,
- kSenderHostFieldName,
- "",
- &hostAndPortString);
- if (!status.isOK())
- return status;
-
- if (!hostAndPortString.empty()) {
- status = _senderHost.initialize(hostAndPortString);
- if (!status.isOK())
- return status;
- _hasSenderHost = true;
- }
-
- return Status::OK();
- }
-
- bool ReplSetHeartbeatArgs::isInitialized() const {
- return _hasProtocolVersion && _hasConfigVersion && _hasSetName;
- }
-
- BSONObj ReplSetHeartbeatArgs::toBSON() const {
- invariant(isInitialized());
- BSONObjBuilder builder;
- builder.append("replSetHeartbeat", _setName);
- builder.appendIntOrLL("pv", _protocolVersion);
- builder.appendIntOrLL("v", _configVersion);
- builder.append("from", _hasSenderHost ? _senderHost.toString() : "");
-
- if (_hasSenderId) {
- builder.appendIntOrLL("fromId", _senderId);
- }
- if (_hasCheckEmpty) {
- builder.append("checkEmpty", _checkEmpty);
- }
- return builder.obj();
- }
-
- void ReplSetHeartbeatArgs::setCheckEmpty(bool newVal) {
- _checkEmpty = newVal;
- _hasCheckEmpty = true;
+ _hasSenderHost = true;
}
- void ReplSetHeartbeatArgs::setProtocolVersion(long long newVal) {
- _protocolVersion = newVal;
- _hasProtocolVersion = true;
- }
+ return Status::OK();
+}
- void ReplSetHeartbeatArgs::setConfigVersion(long long newVal) {
- _configVersion = newVal;
- _hasConfigVersion = true;
- }
+bool ReplSetHeartbeatArgs::isInitialized() const {
+ return _hasProtocolVersion && _hasConfigVersion && _hasSetName;
+}
- void ReplSetHeartbeatArgs::setSenderId(long long newVal) {
- _senderId = newVal;
- _hasSenderId = true;
- }
+BSONObj ReplSetHeartbeatArgs::toBSON() const {
+ invariant(isInitialized());
+ BSONObjBuilder builder;
+ builder.append("replSetHeartbeat", _setName);
+ builder.appendIntOrLL("pv", _protocolVersion);
+ builder.appendIntOrLL("v", _configVersion);
+ builder.append("from", _hasSenderHost ? _senderHost.toString() : "");
- void ReplSetHeartbeatArgs::setSetName(std::string newVal) {
- _setName = newVal;
- _hasSetName = true;
+ if (_hasSenderId) {
+ builder.appendIntOrLL("fromId", _senderId);
}
-
- void ReplSetHeartbeatArgs::setSenderHost(HostAndPort newVal) {
- _senderHost = newVal;
- _hasSenderHost = true;
+ if (_hasCheckEmpty) {
+ builder.append("checkEmpty", _checkEmpty);
}
+ return builder.obj();
+}
+
+void ReplSetHeartbeatArgs::setCheckEmpty(bool newVal) {
+ _checkEmpty = newVal;
+ _hasCheckEmpty = true;
+}
+
+void ReplSetHeartbeatArgs::setProtocolVersion(long long newVal) {
+ _protocolVersion = newVal;
+ _hasProtocolVersion = true;
+}
+
+void ReplSetHeartbeatArgs::setConfigVersion(long long newVal) {
+ _configVersion = newVal;
+ _hasConfigVersion = true;
+}
+
+void ReplSetHeartbeatArgs::setSenderId(long long newVal) {
+ _senderId = newVal;
+ _hasSenderId = true;
+}
+
+void ReplSetHeartbeatArgs::setSetName(std::string newVal) {
+ _setName = newVal;
+ _hasSetName = true;
+}
+
+void ReplSetHeartbeatArgs::setSenderHost(HostAndPort newVal) {
+ _senderHost = newVal;
+ _hasSenderHost = true;
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_heartbeat_args.h b/src/mongo/db/repl/repl_set_heartbeat_args.h
index 487be758524..f03e3260a04 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_args.h
+++ b/src/mongo/db/repl/repl_set_heartbeat_args.h
@@ -34,101 +34,125 @@
namespace mongo {
- class BSONObj;
- class Status;
+class BSONObj;
+class Status;
namespace repl {
+/**
+ * Arguments to the replSetHeartbeat command.
+ */
+class ReplSetHeartbeatArgs {
+public:
+ ReplSetHeartbeatArgs();
+
+ /**
+ * Initializes this ReplSetHeartbeatArgs from the contents of args.
+ */
+ Status initialize(const BSONObj& argsObj);
+
+ /**
+ * Returns true if all required fields have been initialized.
+ */
+ bool isInitialized() const;
+
+ /**
+ * Returns whether the sender would like to know whether the node is empty or not.
+ */
+ bool getCheckEmpty() const {
+ return _checkEmpty;
+ }
+
+ /**
+ * Gets the version of the Heartbeat protocol being used by the sender.
+ */
+ long long getProtocolVersion() const {
+ return _protocolVersion;
+ }
+
+ /**
+ * Gets the ReplSetConfig version number of the sender.
+ */
+ long long getConfigVersion() const {
+ return _configVersion;
+ }
+
+ /**
+ * Gets the _id of the sender in their ReplSetConfig.
+ */
+ long long getSenderId() const {
+ return _senderId;
+ }
+
+ /**
+ * Gets the replSet name of the sender's replica set.
+ */
+ std::string getSetName() const {
+ return _setName;
+ }
+
+ /**
+ * Gets the HostAndPort of the sender.
+ */
+ HostAndPort getSenderHost() const {
+ return _senderHost;
+ }
+
+ /**
+ * The below methods check whether or not value in the method name has been set.
+ */
+ bool hasCheckEmpty() {
+ return _hasCheckEmpty;
+ }
+ bool hasProtocolVersion() {
+ return _hasProtocolVersion;
+ }
+ bool hasConfigVersion() {
+ return _hasConfigVersion;
+ }
+ bool hasSenderId() {
+ return _hasSenderId;
+ }
+ bool hasSetName() {
+ return _hasSetName;
+ }
+ bool hasSenderHost() {
+ return _hasSenderHost;
+ }
+
+ /**
+ * The below methods set the value in the method name to 'newVal'.
+ */
+ void setCheckEmpty(bool newVal);
+ void setProtocolVersion(long long newVal);
+ void setConfigVersion(long long newVal);
+ void setSenderId(long long newVal);
+ void setSetName(std::string newVal);
+ void setSenderHost(HostAndPort newVal);
+
/**
- * Arguments to the replSetHeartbeat command.
+ * Returns a BSONified version of the object.
+ * Should only be called if the mandatory fields have been set.
+ * Optional fields are only included if they have been set.
*/
- class ReplSetHeartbeatArgs {
- public:
- ReplSetHeartbeatArgs();
-
- /**
- * Initializes this ReplSetHeartbeatArgs from the contents of args.
- */
- Status initialize(const BSONObj& argsObj);
-
- /**
- * Returns true if all required fields have been initialized.
- */
- bool isInitialized() const;
-
- /**
- * Returns whether the sender would like to know whether the node is empty or not.
- */
- bool getCheckEmpty() const { return _checkEmpty; }
-
- /**
- * Gets the version of the Heartbeat protocol being used by the sender.
- */
- long long getProtocolVersion() const { return _protocolVersion; }
-
- /**
- * Gets the ReplSetConfig version number of the sender.
- */
- long long getConfigVersion() const { return _configVersion; }
-
- /**
- * Gets the _id of the sender in their ReplSetConfig.
- */
- long long getSenderId() const { return _senderId; }
-
- /**
- * Gets the replSet name of the sender's replica set.
- */
- std::string getSetName() const { return _setName; }
-
- /**
- * Gets the HostAndPort of the sender.
- */
- HostAndPort getSenderHost() const { return _senderHost; }
-
- /**
- * The below methods check whether or not value in the method name has been set.
- */
- bool hasCheckEmpty() { return _hasCheckEmpty; }
- bool hasProtocolVersion() { return _hasProtocolVersion; }
- bool hasConfigVersion() { return _hasConfigVersion; }
- bool hasSenderId() { return _hasSenderId; }
- bool hasSetName() { return _hasSetName; }
- bool hasSenderHost() { return _hasSenderHost; }
-
- /**
- * The below methods set the value in the method name to 'newVal'.
- */
- void setCheckEmpty(bool newVal);
- void setProtocolVersion(long long newVal);
- void setConfigVersion(long long newVal);
- void setSenderId(long long newVal);
- void setSetName(std::string newVal);
- void setSenderHost(HostAndPort newVal);
-
- /**
- * Returns a BSONified version of the object.
- * Should only be called if the mandatory fields have been set.
- * Optional fields are only included if they have been set.
- */
- BSONObj toBSON() const;
-
- private:
- bool _hasCheckEmpty;
- bool _hasProtocolVersion;
- bool _hasConfigVersion;
- bool _hasSenderId;
- bool _hasSetName;
- bool _hasSenderHost;
-
- // look at the body of the isInitialized() function to see which fields are mandatory
- bool _checkEmpty;
- long long _protocolVersion;
- long long _configVersion;
- long long _senderId;
- std::string _setName;
- HostAndPort _senderHost;
- };
-
-} // namespace repl
-} // namespace mongo
+ BSONObj toBSON() const;
+
+private:
+ bool _hasCheckEmpty;
+ bool _hasProtocolVersion;
+ bool _hasConfigVersion;
+ bool _hasSenderId;
+ bool _hasSetName;
+ bool _hasSenderHost;
+
+ // look at the body of the isInitialized() function to see which fields are mandatory
+ bool _checkEmpty;
+ long long _protocolVersion;
+ long long _configVersion;
+ long long _senderId;
+ std::string _setName;
+ HostAndPort _senderHost;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_heartbeat_args_v1.cpp b/src/mongo/db/repl/repl_set_heartbeat_args_v1.cpp
index b65b9e99521..c79249815f9 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_args_v1.cpp
+++ b/src/mongo/db/repl/repl_set_heartbeat_args_v1.cpp
@@ -40,113 +40,107 @@ namespace repl {
namespace {
- const std::string kCheckEmptyFieldName = "checkEmpty";
- const std::string kConfigVersionFieldName = "configVersion";
- const std::string kSenderHostFieldName = "from";
- const std::string kSenderIdFieldName = "fromId";
- const std::string kSetNameFieldName = "replSetHeartbeat";
- const std::string kTermFieldName = "term";
-
- const std::string kLegalHeartbeatFieldNames[] = {
- kCheckEmptyFieldName,
- kConfigVersionFieldName,
- kSenderHostFieldName,
- kSenderIdFieldName,
- kSetNameFieldName,
- kTermFieldName
- };
-
-} // namespace
-
- Status ReplSetHeartbeatArgsV1::initialize(const BSONObj& argsObj) {
- Status status = bsonCheckOnlyHasFields("ReplSetHeartbeatArgs",
- argsObj,
- kLegalHeartbeatFieldNames);
+const std::string kCheckEmptyFieldName = "checkEmpty";
+const std::string kConfigVersionFieldName = "configVersion";
+const std::string kSenderHostFieldName = "from";
+const std::string kSenderIdFieldName = "fromId";
+const std::string kSetNameFieldName = "replSetHeartbeat";
+const std::string kTermFieldName = "term";
+
+const std::string kLegalHeartbeatFieldNames[] = {kCheckEmptyFieldName,
+ kConfigVersionFieldName,
+ kSenderHostFieldName,
+ kSenderIdFieldName,
+ kSetNameFieldName,
+ kTermFieldName};
+
+} // namespace
+
+Status ReplSetHeartbeatArgsV1::initialize(const BSONObj& argsObj) {
+ Status status =
+ bsonCheckOnlyHasFields("ReplSetHeartbeatArgs", argsObj, kLegalHeartbeatFieldNames);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractBooleanFieldWithDefault(argsObj, kCheckEmptyFieldName, false, &_checkEmpty);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractIntegerField(argsObj, kConfigVersionFieldName, &_configVersion);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractIntegerFieldWithDefault(argsObj, kSenderIdFieldName, -1, &_senderId);
+ if (!status.isOK())
+ return status;
+
+ std::string hostAndPortString;
+ status = bsonExtractStringField(argsObj, kSenderHostFieldName, &hostAndPortString);
+ if (!status.isOK())
+ return status;
+ if (!hostAndPortString.empty()) {
+ status = _senderHost.initialize(hostAndPortString);
if (!status.isOK())
return status;
+ _hasSender = true;
+ }
- status = bsonExtractBooleanFieldWithDefault(argsObj,
- kCheckEmptyFieldName,
- false,
- &_checkEmpty);
- if (!status.isOK())
- return status;
+ status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term);
+ if (!status.isOK())
+ return status;
- status = bsonExtractIntegerField(argsObj, kConfigVersionFieldName, &_configVersion);
- if (!status.isOK())
- return status;
+ status = bsonExtractStringField(argsObj, kSetNameFieldName, &_setName);
+ if (!status.isOK())
+ return status;
- status = bsonExtractIntegerFieldWithDefault(argsObj, kSenderIdFieldName, -1, &_senderId);
- if (!status.isOK())
- return status;
+ return Status::OK();
+}
- std::string hostAndPortString;
- status = bsonExtractStringField(argsObj, kSenderHostFieldName, &hostAndPortString);
- if (!status.isOK())
- return status;
- if (!hostAndPortString.empty()) {
- status = _senderHost.initialize(hostAndPortString);
- if (!status.isOK())
- return status;
- _hasSender = true;
- }
-
- status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term);
- if (!status.isOK())
- return status;
+bool ReplSetHeartbeatArgsV1::isInitialized() const {
+ return _configVersion != -1 && _term != -1 && !_setName.empty();
+}
- status = bsonExtractStringField(argsObj, kSetNameFieldName, &_setName);
- if (!status.isOK())
- return status;
+void ReplSetHeartbeatArgsV1::setConfigVersion(long long newVal) {
+ _configVersion = newVal;
+}
- return Status::OK();
- }
-
- bool ReplSetHeartbeatArgsV1::isInitialized() const {
- return _configVersion != -1 && _term != -1 && !_setName.empty();
- }
-
- void ReplSetHeartbeatArgsV1::setConfigVersion(long long newVal) {
- _configVersion = newVal;
- }
-
- void ReplSetHeartbeatArgsV1::setSenderHost(const HostAndPort& newVal) {
- _senderHost = newVal;
- }
+void ReplSetHeartbeatArgsV1::setSenderHost(const HostAndPort& newVal) {
+ _senderHost = newVal;
+}
- void ReplSetHeartbeatArgsV1::setSenderId(long long newVal) {
- _senderId = newVal;
- }
+void ReplSetHeartbeatArgsV1::setSenderId(long long newVal) {
+ _senderId = newVal;
+}
- void ReplSetHeartbeatArgsV1::setSetName(const std::string& newVal) {
- _setName = newVal;
- }
+void ReplSetHeartbeatArgsV1::setSetName(const std::string& newVal) {
+ _setName = newVal;
+}
- void ReplSetHeartbeatArgsV1::setTerm(long long newVal) {
- _term = newVal;
- }
+void ReplSetHeartbeatArgsV1::setTerm(long long newVal) {
+ _term = newVal;
+}
- void ReplSetHeartbeatArgsV1::setCheckEmpty() {
- _checkEmpty = true;
- }
+void ReplSetHeartbeatArgsV1::setCheckEmpty() {
+ _checkEmpty = true;
+}
- BSONObj ReplSetHeartbeatArgsV1::toBSON() const {
- invariant(isInitialized());
- BSONObjBuilder builder;
- addToBSON(&builder);
- return builder.obj();
- }
+BSONObj ReplSetHeartbeatArgsV1::toBSON() const {
+ invariant(isInitialized());
+ BSONObjBuilder builder;
+ addToBSON(&builder);
+ return builder.obj();
+}
- void ReplSetHeartbeatArgsV1::addToBSON(BSONObjBuilder* builder) const {
- builder->append(kSetNameFieldName, _setName);
- if (_checkEmpty) {
- builder->append(kCheckEmptyFieldName, _checkEmpty);
- }
- builder->appendIntOrLL(kConfigVersionFieldName, _configVersion);
- builder->append(kSenderHostFieldName, _hasSender ? _senderHost.toString() : "");
- builder->appendIntOrLL(kSenderIdFieldName, _senderId);
- builder->appendIntOrLL(kTermFieldName, _term);
+void ReplSetHeartbeatArgsV1::addToBSON(BSONObjBuilder* builder) const {
+ builder->append(kSetNameFieldName, _setName);
+ if (_checkEmpty) {
+ builder->append(kCheckEmptyFieldName, _checkEmpty);
}
+ builder->appendIntOrLL(kConfigVersionFieldName, _configVersion);
+ builder->append(kSenderHostFieldName, _hasSender ? _senderHost.toString() : "");
+ builder->appendIntOrLL(kSenderIdFieldName, _senderId);
+ builder->appendIntOrLL(kTermFieldName, _term);
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_heartbeat_args_v1.h b/src/mongo/db/repl/repl_set_heartbeat_args_v1.h
index a7f6691a31a..2a6dbddd3dd 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_args_v1.h
+++ b/src/mongo/db/repl/repl_set_heartbeat_args_v1.h
@@ -34,86 +34,98 @@
namespace mongo {
- class BSONObj;
- class BSONObjBuilder;
- class Status;
+class BSONObj;
+class BSONObjBuilder;
+class Status;
namespace repl {
+/**
+ * Arguments to the replSetHeartbeat command.
+ */
+class ReplSetHeartbeatArgsV1 {
+public:
+ /**
+ * Initializes this ReplSetHeartbeatArgsV1 from the contents of args.
+ */
+ Status initialize(const BSONObj& argsObj);
+
+ /**
+ * Returns true if all required fields have been initialized.
+ */
+ bool isInitialized() const;
+
+ /**
+ * Gets the ReplSetConfig version number of the sender.
+ */
+ long long getConfigVersion() const {
+ return _configVersion;
+ }
+
+ /**
+ * Gets the _id of the sender in their ReplSetConfig.
+ */
+ long long getSenderId() const {
+ return _senderId;
+ }
+
+ /**
+ * Gets the HostAndPort of the sender.
+ */
+ HostAndPort getSenderHost() const {
+ return _senderHost;
+ }
+
+ /**
+ * Gets the replSet name of the sender's replica set.
+ */
+ std::string getSetName() const {
+ return _setName;
+ }
+
+ /**
+ * Gets the term the sender believes it to be.
+ */
+ long long getTerm() const {
+ return _term;
+ }
+
+ /**
+ * Returns whether or not the sender is checking for emptiness.
+ */
+ bool hasCheckEmpty() const {
+ return _checkEmpty;
+ }
+
+ /**
+ * The below methods set the value in the method name to 'newVal'.
+ */
+ void setConfigVersion(long long newVal);
+ void setSenderId(long long newVal);
+ void setSenderHost(const HostAndPort& newVal);
+ void setSetName(const std::string& newVal);
+ void setTerm(long long newVal);
+ void setCheckEmpty();
+
/**
- * Arguments to the replSetHeartbeat command.
+ * Returns a BSONified version of the object.
+ * Should only be called if the mandatory fields have been set.
+ * Optional fields are only included if they have been set.
*/
- class ReplSetHeartbeatArgsV1 {
- public:
- /**
- * Initializes this ReplSetHeartbeatArgsV1 from the contents of args.
- */
- Status initialize(const BSONObj& argsObj);
-
- /**
- * Returns true if all required fields have been initialized.
- */
- bool isInitialized() const;
-
- /**
- * Gets the ReplSetConfig version number of the sender.
- */
- long long getConfigVersion() const { return _configVersion; }
-
- /**
- * Gets the _id of the sender in their ReplSetConfig.
- */
- long long getSenderId() const { return _senderId; }
-
- /**
- * Gets the HostAndPort of the sender.
- */
- HostAndPort getSenderHost() const { return _senderHost; }
-
- /**
- * Gets the replSet name of the sender's replica set.
- */
- std::string getSetName() const { return _setName; }
-
- /**
- * Gets the term the sender believes it to be.
- */
- long long getTerm() const { return _term; }
-
- /**
- * Returns whether or not the sender is checking for emptiness.
- */
- bool hasCheckEmpty() const { return _checkEmpty; }
-
- /**
- * The below methods set the value in the method name to 'newVal'.
- */
- void setConfigVersion(long long newVal);
- void setSenderId(long long newVal);
- void setSenderHost(const HostAndPort& newVal);
- void setSetName(const std::string& newVal);
- void setTerm(long long newVal);
- void setCheckEmpty();
-
- /**
- * Returns a BSONified version of the object.
- * Should only be called if the mandatory fields have been set.
- * Optional fields are only included if they have been set.
- */
- BSONObj toBSON() const;
-
- void addToBSON(BSONObjBuilder* builder) const;
-
- private:
- // look at the body of the isInitialized() function to see which fields are mandatory
- long long _configVersion = -1;
- long long _senderId = -1;
- long long _term = -1;
- bool _checkEmpty = false;
- bool _hasSender = false;
- std::string _setName;
- HostAndPort _senderHost;
- };
-
-} // namespace repl
-} // namespace mongo
+ BSONObj toBSON() const;
+
+ void addToBSON(BSONObjBuilder* builder) const;
+
+private:
+ // look at the body of the isInitialized() function to see which fields are mandatory
+ long long _configVersion = -1;
+ long long _senderId = -1;
+ long long _term = -1;
+ bool _checkEmpty = false;
+ bool _hasSender = false;
+ std::string _setName;
+ HostAndPort _senderHost;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_heartbeat_response.cpp b/src/mongo/db/repl/repl_set_heartbeat_response.cpp
index 43b62a43f48..d2a77c58a5f 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_response.cpp
+++ b/src/mongo/db/repl/repl_set_heartbeat_response.cpp
@@ -45,345 +45,335 @@ namespace mongo {
namespace repl {
namespace {
- const std::string kConfigFieldName = "config";
- const std::string kConfigVersionFieldName = "v";
- const std::string kElectionTimeFieldName = "electionTime";
- const std::string kErrMsgFieldName = "errmsg";
- const std::string kErrorCodeFieldName = "code";
- const std::string kHasDataFieldName = "hasData";
- const std::string kHasStateDisagreementFieldName = "stateDisagreement";
- const std::string kHbMessageFieldName = "hbmsg";
- const std::string kIsElectableFieldName = "e";
- const std::string kIsReplSetFieldName = "rs";
- const std::string kMemberStateFieldName = "state";
- const std::string kMismatchFieldName = "mismatch";
- const std::string kOkFieldName = "ok";
- const std::string kOpTimeFieldName = "opTime";
- const std::string kPrimaryIdFieldName = "primaryId";
- const std::string kReplSetFieldName = "set";
- const std::string kSyncSourceFieldName = "syncingTo";
- const std::string kTermFieldName = "term";
- const std::string kTimeFieldName = "time";
- const std::string kTimestampFieldName = "ts";
+const std::string kConfigFieldName = "config";
+const std::string kConfigVersionFieldName = "v";
+const std::string kElectionTimeFieldName = "electionTime";
+const std::string kErrMsgFieldName = "errmsg";
+const std::string kErrorCodeFieldName = "code";
+const std::string kHasDataFieldName = "hasData";
+const std::string kHasStateDisagreementFieldName = "stateDisagreement";
+const std::string kHbMessageFieldName = "hbmsg";
+const std::string kIsElectableFieldName = "e";
+const std::string kIsReplSetFieldName = "rs";
+const std::string kMemberStateFieldName = "state";
+const std::string kMismatchFieldName = "mismatch";
+const std::string kOkFieldName = "ok";
+const std::string kOpTimeFieldName = "opTime";
+const std::string kPrimaryIdFieldName = "primaryId";
+const std::string kReplSetFieldName = "set";
+const std::string kSyncSourceFieldName = "syncingTo";
+const std::string kTermFieldName = "term";
+const std::string kTimeFieldName = "time";
+const std::string kTimestampFieldName = "ts";
} // namespace
- void ReplSetHeartbeatResponse::addToBSON(BSONObjBuilder* builder,
- bool isProtocolVersionV1) const {
- if (_mismatch) {
- *builder << kOkFieldName << 0.0;
- *builder << kMismatchFieldName << _mismatch;
- return;
- }
-
- builder->append(kOkFieldName, 1.0);
- if (_timeSet) {
- *builder << kTimeFieldName << durationCount<Seconds>(_time);
- }
- if (_electionTimeSet) {
- builder->appendDate(kElectionTimeFieldName,
- Date_t::fromMillisSinceEpoch(_electionTime.asLL()));
- }
- if (_configSet) {
- *builder << kConfigFieldName << _config.toBSON();
- }
- if (_electableSet) {
- *builder << kIsElectableFieldName << _electable;
- }
- if (_isReplSet) {
- *builder << "rs" << _isReplSet;
- }
- if (_stateDisagreement) {
- *builder << kHasStateDisagreementFieldName << _stateDisagreement;
- }
- if (_stateSet) {
- builder->appendIntOrLL(kMemberStateFieldName, _state.s);
- }
- if (_configVersion != -1) {
- *builder << kConfigVersionFieldName << _configVersion;
- }
- *builder << kHbMessageFieldName << _hbmsg;
- if (!_setName.empty()) {
- *builder << kReplSetFieldName << _setName;
- }
- if (!_syncingTo.empty()) {
- *builder << kSyncSourceFieldName << _syncingTo.toString();
- }
- if (_hasDataSet) {
- builder->append(kHasDataFieldName, _hasData);
- }
- if (_term != -1) {
- builder->append(kTermFieldName, _term);
- }
- if (_primaryIdSet) {
- builder->append(kPrimaryIdFieldName, _primaryId);
- }
- if (_opTimeSet) {
- if (isProtocolVersionV1) {
- BSONObjBuilder opTime(builder->subobjStart(kOpTimeFieldName));
- opTime.append(kTimestampFieldName, _opTime.getTimestamp());
- opTime.append(kTermFieldName, _opTime.getTerm());
- opTime.done();
- }
- else {
- builder->appendDate(kOpTimeFieldName,
- Date_t::fromMillisSinceEpoch(_opTime.getTimestamp().asLL()));
- }
- }
-
+void ReplSetHeartbeatResponse::addToBSON(BSONObjBuilder* builder, bool isProtocolVersionV1) const {
+ if (_mismatch) {
+ *builder << kOkFieldName << 0.0;
+ *builder << kMismatchFieldName << _mismatch;
+ return;
}
- BSONObj ReplSetHeartbeatResponse::toBSON(bool isProtocolVersionV1) const {
- BSONObjBuilder builder;
- addToBSON(&builder, isProtocolVersionV1);
- return builder.obj();
+ builder->append(kOkFieldName, 1.0);
+ if (_timeSet) {
+ *builder << kTimeFieldName << durationCount<Seconds>(_time);
}
-
- Status ReplSetHeartbeatResponse::initialize(const BSONObj& doc, long long term) {
-
- // Old versions set this even though they returned not "ok"
- _mismatch = doc[kMismatchFieldName].trueValue();
- if (_mismatch)
- return Status(ErrorCodes::InconsistentReplicaSetNames,
- "replica set name doesn't match.");
-
- // Old versions sometimes set the replica set name ("set") but ok:0
- const BSONElement replSetNameElement = doc[kReplSetFieldName];
- if (replSetNameElement.eoo()) {
- _setName.clear();
- }
- else if (replSetNameElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kReplSetFieldName << "\" field in response to replSetHeartbeat to have "
- "type String, but found " << typeName(replSetNameElement.type()));
- }
- else {
- _setName = replSetNameElement.String();
- }
-
- if (_setName.empty() && !doc[kOkFieldName].trueValue()) {
- std::string errMsg = doc[kErrMsgFieldName].str();
-
- BSONElement errCodeElem = doc[kErrorCodeFieldName];
- if (errCodeElem.ok()) {
- if (!errCodeElem.isNumber())
- return Status(ErrorCodes::BadValue, "Error code is not a number!");
-
- int errorCode = errCodeElem.numberInt();
- return Status(ErrorCodes::Error(errorCode), errMsg);
- }
- return Status(ErrorCodes::UnknownError, errMsg);
- }
-
- const BSONElement hasDataElement = doc[kHasDataFieldName];
- _hasDataSet = !hasDataElement.eoo();
- _hasData = hasDataElement.trueValue();
-
- const BSONElement electionTimeElement = doc[kElectionTimeFieldName];
- if (electionTimeElement.eoo()) {
- _electionTimeSet = false;
- }
- else if (electionTimeElement.type() == bsonTimestamp) {
- _electionTimeSet = true;
- _electionTime = electionTimeElement.timestamp();
- }
- else if (electionTimeElement.type() == Date) {
- _electionTimeSet = true;
- _electionTime = Timestamp(electionTimeElement.date());
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kElectionTimeFieldName << "\" field in response to replSetHeartbeat "
- "command to have type Date or Timestamp, but found type " <<
- typeName(electionTimeElement.type()));
- }
-
- const BSONElement timeElement = doc[kTimeFieldName];
- if (timeElement.eoo()) {
- _timeSet = false;
- }
- else if (timeElement.isNumber()) {
- _timeSet = true;
- _time = Seconds(timeElement.numberLong());
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kTimeFieldName << "\" field in response to replSetHeartbeat "
- "command to have a numeric type, but found type " <<
- typeName(timeElement.type()));
- }
-
- _isReplSet = doc[kIsReplSetFieldName].trueValue();
-
- // In order to support both the 3.0(V0) and 3.2(V1) heartbeats we must parse the OpTime
- // field based on its type. If it is a Date, we parse it as the timestamp and use
- // initialize's term argument to complete the OpTime type. If it is an Object, then it's
- // V1 and we construct an OpTime out of its nested fields.
- const BSONElement opTimeElement = doc[kOpTimeFieldName];
- if (opTimeElement.eoo()) {
- _opTimeSet = false;
- }
- else if (opTimeElement.type() == bsonTimestamp) {
- _opTimeSet = true;
- _opTime = OpTime(opTimeElement.timestamp(), term);
- }
- else if (opTimeElement.type() == Date) {
- _opTimeSet = true;
- _opTime = OpTime(Timestamp(opTimeElement.date()), term);
- }
- else if (opTimeElement.type() == Object) {
- BSONObj opTime = opTimeElement.Obj();
- Timestamp ts;
- Status status = bsonExtractTimestampField(opTime, kTimestampFieldName, &ts);
- if (!status.isOK())
- return status;
- long long extractedTerm;
- status = bsonExtractIntegerField(opTime, kTermFieldName, &extractedTerm);
- if (!status.isOK())
- return status;
-
- _opTimeSet = true;
- _opTime = OpTime(ts, extractedTerm);
- // since a v1 OpTime was in the response, the member must be part of a replset
- _isReplSet = true;
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kOpTimeFieldName << "\" field in response to replSetHeartbeat "
- "command to have type Date or Timestamp, but found type " <<
- typeName(opTimeElement.type()));
- }
-
- const BSONElement electableElement = doc[kIsElectableFieldName];
- if (electableElement.eoo()) {
- _electableSet = false;
- }
- else {
- _electableSet = true;
- _electable = electableElement.trueValue();
- }
-
- const BSONElement memberStateElement = doc[kMemberStateFieldName];
- if (memberStateElement.eoo()) {
- _stateSet = false;
- }
- else if (memberStateElement.type() != NumberInt &&
- memberStateElement.type() != NumberLong) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kMemberStateFieldName << "\" field in response to replSetHeartbeat "
- "command to have type NumberInt or NumberLong, but found type " <<
- typeName(memberStateElement.type()));
- }
- else {
- long long stateInt = memberStateElement.numberLong();
- if (stateInt < 0 || stateInt > MemberState::RS_MAX) {
- return Status(ErrorCodes::BadValue, str::stream() << "Value for \"" <<
- kMemberStateFieldName << "\" in response to replSetHeartbeat is "
- "out of range; legal values are non-negative and no more than " <<
- MemberState::RS_MAX);
- }
- _stateSet = true;
- _state = MemberState(static_cast<int>(stateInt));
- }
-
- _stateDisagreement = doc[kHasStateDisagreementFieldName].trueValue();
-
-
- // Not required for the case of uninitialized members -- they have no config
- const BSONElement configVersionElement = doc[kConfigVersionFieldName];
-
- // If we have an optime then we must have a configVersion
- if (_opTimeSet && configVersionElement.eoo()) {
- return Status(ErrorCodes::NoSuchKey, str::stream() <<
- "Response to replSetHeartbeat missing required \"" <<
- kConfigVersionFieldName << "\" field even though initialized");
- }
-
- // If there is a "v" (config version) then it must be an int.
- if (!configVersionElement.eoo() && configVersionElement.type() != NumberInt) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kConfigVersionFieldName <<
- "\" field in response to replSetHeartbeat to have "
- "type NumberInt, but found " << typeName(configVersionElement.type()));
+ if (_electionTimeSet) {
+ builder->appendDate(kElectionTimeFieldName,
+ Date_t::fromMillisSinceEpoch(_electionTime.asLL()));
+ }
+ if (_configSet) {
+ *builder << kConfigFieldName << _config.toBSON();
+ }
+ if (_electableSet) {
+ *builder << kIsElectableFieldName << _electable;
+ }
+ if (_isReplSet) {
+ *builder << "rs" << _isReplSet;
+ }
+ if (_stateDisagreement) {
+ *builder << kHasStateDisagreementFieldName << _stateDisagreement;
+ }
+ if (_stateSet) {
+ builder->appendIntOrLL(kMemberStateFieldName, _state.s);
+ }
+ if (_configVersion != -1) {
+ *builder << kConfigVersionFieldName << _configVersion;
+ }
+ *builder << kHbMessageFieldName << _hbmsg;
+ if (!_setName.empty()) {
+ *builder << kReplSetFieldName << _setName;
+ }
+ if (!_syncingTo.empty()) {
+ *builder << kSyncSourceFieldName << _syncingTo.toString();
+ }
+ if (_hasDataSet) {
+ builder->append(kHasDataFieldName, _hasData);
+ }
+ if (_term != -1) {
+ builder->append(kTermFieldName, _term);
+ }
+ if (_primaryIdSet) {
+ builder->append(kPrimaryIdFieldName, _primaryId);
+ }
+ if (_opTimeSet) {
+ if (isProtocolVersionV1) {
+ BSONObjBuilder opTime(builder->subobjStart(kOpTimeFieldName));
+ opTime.append(kTimestampFieldName, _opTime.getTimestamp());
+ opTime.append(kTermFieldName, _opTime.getTerm());
+ opTime.done();
+ } else {
+ builder->appendDate(kOpTimeFieldName,
+ Date_t::fromMillisSinceEpoch(_opTime.getTimestamp().asLL()));
}
- _configVersion = configVersionElement.numberInt();
+ }
+}
+
+BSONObj ReplSetHeartbeatResponse::toBSON(bool isProtocolVersionV1) const {
+ BSONObjBuilder builder;
+ addToBSON(&builder, isProtocolVersionV1);
+ return builder.obj();
+}
+
+Status ReplSetHeartbeatResponse::initialize(const BSONObj& doc, long long term) {
+ // Old versions set this even though they returned not "ok"
+ _mismatch = doc[kMismatchFieldName].trueValue();
+ if (_mismatch)
+ return Status(ErrorCodes::InconsistentReplicaSetNames, "replica set name doesn't match.");
+
+ // Old versions sometimes set the replica set name ("set") but ok:0
+ const BSONElement replSetNameElement = doc[kReplSetFieldName];
+ if (replSetNameElement.eoo()) {
+ _setName.clear();
+ } else if (replSetNameElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kReplSetFieldName
+ << "\" field in response to replSetHeartbeat to have "
+ "type String, but found "
+ << typeName(replSetNameElement.type()));
+ } else {
+ _setName = replSetNameElement.String();
+ }
- const BSONElement hbMsgElement = doc[kHbMessageFieldName];
- if (hbMsgElement.eoo()) {
- _hbmsg.clear();
- }
- else if (hbMsgElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kHbMessageFieldName << "\" field in response to replSetHeartbeat to have "
- "type String, but found " << typeName(hbMsgElement.type()));
- }
- else {
- _hbmsg = hbMsgElement.String();
- }
+ if (_setName.empty() && !doc[kOkFieldName].trueValue()) {
+ std::string errMsg = doc[kErrMsgFieldName].str();
- const BSONElement syncingToElement = doc[kSyncSourceFieldName];
- if (syncingToElement.eoo()) {
- _syncingTo = HostAndPort();
- }
- else if (syncingToElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kSyncSourceFieldName << "\" field in response to replSetHeartbeat to "
- "have type String, but found " << typeName(syncingToElement.type()));
- }
- else {
- _syncingTo = HostAndPort(syncingToElement.String());
- }
+ BSONElement errCodeElem = doc[kErrorCodeFieldName];
+ if (errCodeElem.ok()) {
+ if (!errCodeElem.isNumber())
+ return Status(ErrorCodes::BadValue, "Error code is not a number!");
- const BSONElement rsConfigElement = doc[kConfigFieldName];
- if (rsConfigElement.eoo()) {
- _configSet = false;
- _config = ReplicaSetConfig();
- return Status::OK();
+ int errorCode = errCodeElem.numberInt();
+ return Status(ErrorCodes::Error(errorCode), errMsg);
}
- else if (rsConfigElement.type() != Object) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kConfigFieldName << "\" in response to replSetHeartbeat to have type "
- "Object, but found " << typeName(rsConfigElement.type()));
- }
- _configSet = true;
+ return Status(ErrorCodes::UnknownError, errMsg);
+ }
- return _config.initialize(rsConfigElement.Obj());
+ const BSONElement hasDataElement = doc[kHasDataFieldName];
+ _hasDataSet = !hasDataElement.eoo();
+ _hasData = hasDataElement.trueValue();
+
+ const BSONElement electionTimeElement = doc[kElectionTimeFieldName];
+ if (electionTimeElement.eoo()) {
+ _electionTimeSet = false;
+ } else if (electionTimeElement.type() == bsonTimestamp) {
+ _electionTimeSet = true;
+ _electionTime = electionTimeElement.timestamp();
+ } else if (electionTimeElement.type() == Date) {
+ _electionTimeSet = true;
+ _electionTime = Timestamp(electionTimeElement.date());
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kElectionTimeFieldName
+ << "\" field in response to replSetHeartbeat "
+ "command to have type Date or Timestamp, but found type "
+ << typeName(electionTimeElement.type()));
}
- MemberState ReplSetHeartbeatResponse::getState() const {
- invariant(_stateSet);
- return _state;
+ const BSONElement timeElement = doc[kTimeFieldName];
+ if (timeElement.eoo()) {
+ _timeSet = false;
+ } else if (timeElement.isNumber()) {
+ _timeSet = true;
+ _time = Seconds(timeElement.numberLong());
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kTimeFieldName
+ << "\" field in response to replSetHeartbeat "
+ "command to have a numeric type, but found type "
+ << typeName(timeElement.type()));
}
- Timestamp ReplSetHeartbeatResponse::getElectionTime() const {
- invariant(_electionTimeSet);
- return _electionTime;
+ _isReplSet = doc[kIsReplSetFieldName].trueValue();
+
+ // In order to support both the 3.0(V0) and 3.2(V1) heartbeats we must parse the OpTime
+ // field based on its type. If it is a Date, we parse it as the timestamp and use
+ // initialize's term argument to complete the OpTime type. If it is an Object, then it's
+ // V1 and we construct an OpTime out of its nested fields.
+ const BSONElement opTimeElement = doc[kOpTimeFieldName];
+ if (opTimeElement.eoo()) {
+ _opTimeSet = false;
+ } else if (opTimeElement.type() == bsonTimestamp) {
+ _opTimeSet = true;
+ _opTime = OpTime(opTimeElement.timestamp(), term);
+ } else if (opTimeElement.type() == Date) {
+ _opTimeSet = true;
+ _opTime = OpTime(Timestamp(opTimeElement.date()), term);
+ } else if (opTimeElement.type() == Object) {
+ BSONObj opTime = opTimeElement.Obj();
+ Timestamp ts;
+ Status status = bsonExtractTimestampField(opTime, kTimestampFieldName, &ts);
+ if (!status.isOK())
+ return status;
+ long long extractedTerm;
+ status = bsonExtractIntegerField(opTime, kTermFieldName, &extractedTerm);
+ if (!status.isOK())
+ return status;
+
+ _opTimeSet = true;
+ _opTime = OpTime(ts, extractedTerm);
+ // since a v1 OpTime was in the response, the member must be part of a replset
+ _isReplSet = true;
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kOpTimeFieldName
+ << "\" field in response to replSetHeartbeat "
+ "command to have type Date or Timestamp, but found type "
+ << typeName(opTimeElement.type()));
}
- bool ReplSetHeartbeatResponse::isElectable() const {
- invariant(_electableSet);
- return _electable;
+ const BSONElement electableElement = doc[kIsElectableFieldName];
+ if (electableElement.eoo()) {
+ _electableSet = false;
+ } else {
+ _electableSet = true;
+ _electable = electableElement.trueValue();
}
- Seconds ReplSetHeartbeatResponse::getTime() const {
- invariant(_timeSet);
- return _time;
+ const BSONElement memberStateElement = doc[kMemberStateFieldName];
+ if (memberStateElement.eoo()) {
+ _stateSet = false;
+ } else if (memberStateElement.type() != NumberInt && memberStateElement.type() != NumberLong) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream()
+ << "Expected \"" << kMemberStateFieldName
+ << "\" field in response to replSetHeartbeat "
+ "command to have type NumberInt or NumberLong, but found type "
+ << typeName(memberStateElement.type()));
+ } else {
+ long long stateInt = memberStateElement.numberLong();
+ if (stateInt < 0 || stateInt > MemberState::RS_MAX) {
+ return Status(ErrorCodes::BadValue,
+ str::stream()
+ << "Value for \"" << kMemberStateFieldName
+ << "\" in response to replSetHeartbeat is "
+ "out of range; legal values are non-negative and no more than "
+ << MemberState::RS_MAX);
+ }
+ _stateSet = true;
+ _state = MemberState(static_cast<int>(stateInt));
}
- const ReplicaSetConfig& ReplSetHeartbeatResponse::getConfig() const {
- invariant(_configSet);
- return _config;
+ _stateDisagreement = doc[kHasStateDisagreementFieldName].trueValue();
+
+
+ // Not required for the case of uninitialized members -- they have no config
+ const BSONElement configVersionElement = doc[kConfigVersionFieldName];
+
+ // If we have an optime then we must have a configVersion
+ if (_opTimeSet && configVersionElement.eoo()) {
+ return Status(ErrorCodes::NoSuchKey,
+ str::stream() << "Response to replSetHeartbeat missing required \""
+ << kConfigVersionFieldName
+ << "\" field even though initialized");
}
- long long ReplSetHeartbeatResponse::getPrimaryId() const {
- invariant(_primaryIdSet);
- return _primaryId;
+ // If there is a "v" (config version) then it must be an int.
+ if (!configVersionElement.eoo() && configVersionElement.type() != NumberInt) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kConfigVersionFieldName
+ << "\" field in response to replSetHeartbeat to have "
+ "type NumberInt, but found "
+ << typeName(configVersionElement.type()));
+ }
+ _configVersion = configVersionElement.numberInt();
+
+ const BSONElement hbMsgElement = doc[kHbMessageFieldName];
+ if (hbMsgElement.eoo()) {
+ _hbmsg.clear();
+ } else if (hbMsgElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kHbMessageFieldName
+ << "\" field in response to replSetHeartbeat to have "
+ "type String, but found " << typeName(hbMsgElement.type()));
+ } else {
+ _hbmsg = hbMsgElement.String();
}
- OpTime ReplSetHeartbeatResponse::getOpTime() const {
- invariant(_opTimeSet);
- return _opTime;
+ const BSONElement syncingToElement = doc[kSyncSourceFieldName];
+ if (syncingToElement.eoo()) {
+ _syncingTo = HostAndPort();
+ } else if (syncingToElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kSyncSourceFieldName
+ << "\" field in response to replSetHeartbeat to "
+ "have type String, but found "
+ << typeName(syncingToElement.type()));
+ } else {
+ _syncingTo = HostAndPort(syncingToElement.String());
}
-} // namespace repl
-} // namespace mongo
+ const BSONElement rsConfigElement = doc[kConfigFieldName];
+ if (rsConfigElement.eoo()) {
+ _configSet = false;
+ _config = ReplicaSetConfig();
+ return Status::OK();
+ } else if (rsConfigElement.type() != Object) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kConfigFieldName
+ << "\" in response to replSetHeartbeat to have type "
+ "Object, but found " << typeName(rsConfigElement.type()));
+ }
+ _configSet = true;
+
+ return _config.initialize(rsConfigElement.Obj());
+}
+
+MemberState ReplSetHeartbeatResponse::getState() const {
+ invariant(_stateSet);
+ return _state;
+}
+
+Timestamp ReplSetHeartbeatResponse::getElectionTime() const {
+ invariant(_electionTimeSet);
+ return _electionTime;
+}
+
+bool ReplSetHeartbeatResponse::isElectable() const {
+ invariant(_electableSet);
+ return _electable;
+}
+
+Seconds ReplSetHeartbeatResponse::getTime() const {
+ invariant(_timeSet);
+ return _time;
+}
+
+const ReplicaSetConfig& ReplSetHeartbeatResponse::getConfig() const {
+ invariant(_configSet);
+ return _config;
+}
+
+long long ReplSetHeartbeatResponse::getPrimaryId() const {
+ invariant(_primaryIdSet);
+ return _primaryId;
+}
+
+OpTime ReplSetHeartbeatResponse::getOpTime() const {
+ invariant(_opTimeSet);
+ return _opTime;
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_heartbeat_response.h b/src/mongo/db/repl/repl_set_heartbeat_response.h
index 8d8fa04b4b5..b3fba2a4803 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_response.h
+++ b/src/mongo/db/repl/repl_set_heartbeat_response.h
@@ -37,169 +37,244 @@
namespace mongo {
- class BSONObj;
- class BSONObjBuilder;
- class Status;
+class BSONObj;
+class BSONObjBuilder;
+class Status;
namespace repl {
+/**
+ * Response structure for the replSetHeartbeat command.
+ */
+class ReplSetHeartbeatResponse {
+public:
+ /**
+ * Initializes this ReplSetHeartbeatResponse from the contents of "doc".
+ * "term" is only used to complete a V0 OpTime (which is really a Timestamp).
+ */
+ Status initialize(const BSONObj& doc, long long term);
+
+ /**
+ * Appends all non-default values to "builder".
+ */
+ void addToBSON(BSONObjBuilder* builder, bool isProtocolVersionV1) const;
+
+ /**
+ * Returns a BSONObj consisting of all non-default values to "builder".
+ */
+ BSONObj toBSON(bool isProtocolVersionV1) const;
+
+ /**
+ * Returns toBSON().toString()
+ */
+ const std::string toString() const {
+ return toBSON(true).toString();
+ }
+
+ bool hasDataSet() const {
+ return _hasDataSet;
+ }
+ bool hasData() const {
+ return _hasData;
+ }
+ bool isMismatched() const {
+ return _mismatch;
+ }
+ bool isReplSet() const {
+ return _isReplSet;
+ }
+ bool isStateDisagreement() const {
+ return _stateDisagreement;
+ }
+ const std::string& getReplicaSetName() const {
+ return _setName;
+ }
+ bool hasState() const {
+ return _stateSet;
+ }
+ MemberState getState() const;
+ bool hasElectionTime() const {
+ return _electionTimeSet;
+ }
+ Timestamp getElectionTime() const;
+ bool hasIsElectable() const {
+ return _electableSet;
+ }
+ bool isElectable() const;
+ const std::string& getHbMsg() const {
+ return _hbmsg;
+ }
+ bool hasTime() const {
+ return _timeSet;
+ }
+ Seconds getTime() const;
+ const HostAndPort& getSyncingTo() const {
+ return _syncingTo;
+ }
+ int getConfigVersion() const {
+ return _configVersion;
+ }
+ bool hasConfig() const {
+ return _configSet;
+ }
+ const ReplicaSetConfig& getConfig() const;
+ bool hasPrimaryId() const {
+ return _primaryIdSet;
+ }
+ long long getPrimaryId() const;
+ long long getTerm() const {
+ return _term;
+ }
+ bool hasOpTime() const {
+ return _opTimeSet;
+ }
+ OpTime getOpTime() const;
+
+ /**
+ * Sets _mismatch to true.
+ */
+ void noteMismatched() {
+ _mismatch = true;
+ }
+
+ /**
+ * Sets _isReplSet to true.
+ */
+ void noteReplSet() {
+ _isReplSet = true;
+ }
+
+ /**
+ * Sets _stateDisagreement to true.
+ */
+ void noteStateDisagreement() {
+ _stateDisagreement = true;
+ }
+
+ /**
+ * Sets _hasData to true, and _hasDataSet to true to indicate _hasData has been modified
+ */
+ void noteHasData() {
+ _hasDataSet = _hasData = true;
+ }
+
/**
- * Response structure for the replSetHeartbeat command.
- */
- class ReplSetHeartbeatResponse {
- public:
-
- /**
- * Initializes this ReplSetHeartbeatResponse from the contents of "doc".
- * "term" is only used to complete a V0 OpTime (which is really a Timestamp).
- */
- Status initialize(const BSONObj& doc, long long term);
-
- /**
- * Appends all non-default values to "builder".
- */
- void addToBSON(BSONObjBuilder* builder, bool isProtocolVersionV1) const;
-
- /**
- * Returns a BSONObj consisting of all non-default values to "builder".
- */
- BSONObj toBSON(bool isProtocolVersionV1) const;
-
- /**
- * Returns toBSON().toString()
- */
- const std::string toString() const { return toBSON(true).toString(); }
-
- bool hasDataSet() const { return _hasDataSet; }
- bool hasData() const { return _hasData; }
- bool isMismatched() const { return _mismatch; }
- bool isReplSet() const { return _isReplSet; }
- bool isStateDisagreement() const { return _stateDisagreement; }
- const std::string& getReplicaSetName() const { return _setName; }
- bool hasState() const { return _stateSet; }
- MemberState getState() const;
- bool hasElectionTime() const { return _electionTimeSet; }
- Timestamp getElectionTime() const;
- bool hasIsElectable() const { return _electableSet; }
- bool isElectable() const;
- const std::string& getHbMsg() const { return _hbmsg; }
- bool hasTime() const { return _timeSet; }
- Seconds getTime() const;
- const HostAndPort& getSyncingTo() const { return _syncingTo; }
- int getConfigVersion() const { return _configVersion; }
- bool hasConfig() const { return _configSet; }
- const ReplicaSetConfig& getConfig() const;
- bool hasPrimaryId() const { return _primaryIdSet; }
- long long getPrimaryId() const;
- long long getTerm() const { return _term; }
- bool hasOpTime() const { return _opTimeSet; }
- OpTime getOpTime() const;
-
- /**
- * Sets _mismatch to true.
- */
- void noteMismatched() { _mismatch = true; }
-
- /**
- * Sets _isReplSet to true.
- */
- void noteReplSet() { _isReplSet = true; }
-
- /**
- * Sets _stateDisagreement to true.
- */
- void noteStateDisagreement() { _stateDisagreement = true; }
-
- /**
- * Sets _hasData to true, and _hasDataSet to true to indicate _hasData has been modified
- */
- void noteHasData() { _hasDataSet = _hasData = true;}
-
- /**
- * Sets _setName to "name".
- */
- void setSetName(std::string name) { _setName = name; }
-
- /**
- * Sets _state to "state".
- */
- void setState(MemberState state) { _stateSet = true; _state = state; }
-
- /**
- * Sets the optional "electionTime" field to the given Timestamp.
- */
- void setElectionTime(Timestamp time) { _electionTimeSet = true; _electionTime = time; }
-
- /**
- * Sets _electable to "electable" and sets _electableSet to true to indicate
- * that the value of _electable has been modified.
- */
- void setElectable(bool electable) { _electableSet = true; _electable = electable; }
-
- /**
- * Sets _hbmsg to "hbmsg".
- */
- void setHbMsg(std::string hbmsg) { _hbmsg = hbmsg; }
-
- /**
- * Sets the optional "time" field of the response to "theTime", which is
- * a count of seconds since the UNIX epoch.
- */
- void setTime(Seconds theTime) { _timeSet = true; _time = theTime; }
-
- /**
- * Sets _syncingTo to "syncingTo".
- */
- void setSyncingTo(const HostAndPort& syncingTo) { _syncingTo = syncingTo; }
-
- /**
- * Sets _configVersion to "configVersion".
- */
- void setConfigVersion(int configVersion) { _configVersion = configVersion; }
-
- /**
- * Initializes _config with "config".
- */
- void setConfig(const ReplicaSetConfig& config) { _configSet = true; _config = config; }
-
- void setPrimaryId(long long primaryId) { _primaryIdSet = true; _primaryId = primaryId; }
- void setOpTime(OpTime time) { _opTimeSet = true; _opTime = time; }
- void setTerm(long long term) { _term = term; }
- private:
- bool _electionTimeSet = false;
- Timestamp _electionTime;
-
- bool _timeSet = false;
- Seconds _time = Seconds(0); // Seconds since UNIX epoch.
-
- bool _opTimeSet = false;
- OpTime _opTime;
-
- bool _electableSet = false;
- bool _electable = false;
-
- bool _hasDataSet = false;
- bool _hasData = false;
-
- bool _mismatch = false;
- bool _isReplSet = false;
- bool _stateDisagreement = false;
-
- bool _stateSet = false;
- MemberState _state;
-
- int _configVersion = -1;
- std::string _setName;
- std::string _hbmsg;
- HostAndPort _syncingTo;
-
- bool _configSet = false;
- ReplicaSetConfig _config;
-
- bool _primaryIdSet = false;
- long long _primaryId = -1;
- long long _term = -1;
- };
-
-} // namespace repl
-} // namespace mongo
+ * Sets _setName to "name".
+ */
+ void setSetName(std::string name) {
+ _setName = name;
+ }
+
+ /**
+ * Sets _state to "state".
+ */
+ void setState(MemberState state) {
+ _stateSet = true;
+ _state = state;
+ }
+
+ /**
+ * Sets the optional "electionTime" field to the given Timestamp.
+ */
+ void setElectionTime(Timestamp time) {
+ _electionTimeSet = true;
+ _electionTime = time;
+ }
+
+ /**
+ * Sets _electable to "electable" and sets _electableSet to true to indicate
+ * that the value of _electable has been modified.
+ */
+ void setElectable(bool electable) {
+ _electableSet = true;
+ _electable = electable;
+ }
+
+ /**
+ * Sets _hbmsg to "hbmsg".
+ */
+ void setHbMsg(std::string hbmsg) {
+ _hbmsg = hbmsg;
+ }
+
+ /**
+ * Sets the optional "time" field of the response to "theTime", which is
+ * a count of seconds since the UNIX epoch.
+ */
+ void setTime(Seconds theTime) {
+ _timeSet = true;
+ _time = theTime;
+ }
+
+ /**
+ * Sets _syncingTo to "syncingTo".
+ */
+ void setSyncingTo(const HostAndPort& syncingTo) {
+ _syncingTo = syncingTo;
+ }
+
+ /**
+ * Sets _configVersion to "configVersion".
+ */
+ void setConfigVersion(int configVersion) {
+ _configVersion = configVersion;
+ }
+
+ /**
+ * Initializes _config with "config".
+ */
+ void setConfig(const ReplicaSetConfig& config) {
+ _configSet = true;
+ _config = config;
+ }
+
+ void setPrimaryId(long long primaryId) {
+ _primaryIdSet = true;
+ _primaryId = primaryId;
+ }
+ void setOpTime(OpTime time) {
+ _opTimeSet = true;
+ _opTime = time;
+ }
+ void setTerm(long long term) {
+ _term = term;
+ }
+
+private:
+ bool _electionTimeSet = false;
+ Timestamp _electionTime;
+
+ bool _timeSet = false;
+ Seconds _time = Seconds(0); // Seconds since UNIX epoch.
+
+ bool _opTimeSet = false;
+ OpTime _opTime;
+
+ bool _electableSet = false;
+ bool _electable = false;
+
+ bool _hasDataSet = false;
+ bool _hasData = false;
+
+ bool _mismatch = false;
+ bool _isReplSet = false;
+ bool _stateDisagreement = false;
+
+ bool _stateSet = false;
+ MemberState _state;
+
+ int _configVersion = -1;
+ std::string _setName;
+ std::string _hbmsg;
+ HostAndPort _syncingTo;
+
+ bool _configSet = false;
+ ReplicaSetConfig _config;
+
+ bool _primaryIdSet = false;
+ long long _primaryId = -1;
+ long long _term = -1;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp b/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp
index aed87536dcc..45c8dba3e1f 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp
+++ b/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp
@@ -37,730 +37,732 @@ namespace mongo {
namespace repl {
namespace {
- using std::unique_ptr;
-
- bool stringContains(const std::string &haystack, const std::string& needle) {
- return haystack.find(needle) != std::string::npos;
- }
-
- TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
- int fieldsSet = 2;
- ReplSetHeartbeatResponse hbResponse;
- ReplSetHeartbeatResponse hbResponseObjRoundTripChecker;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(false, hbResponse.hasElectionTime());
- ASSERT_EQUALS(false, hbResponse.hasIsElectable());
- ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(false, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
- ASSERT_EQUALS(-1, hbResponse.getConfigVersion());
-
- BSONObj hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
-
- Status initializeResult = Status::OK();
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toString());
-
- // set version
- hbResponse.setConfigVersion(1);
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(false, hbResponse.hasElectionTime());
- ASSERT_EQUALS(false, hbResponse.hasIsElectable());
- ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(false, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getConfigVersion());
-
- hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toString());
-
- // set setname
- hbResponse.setSetName("rs0");
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(false, hbResponse.hasElectionTime());
- ASSERT_EQUALS(false, hbResponse.hasIsElectable());
- ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(false, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getConfigVersion());
-
- hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toString());
-
- // set electionTime
- hbResponse.setElectionTime(Timestamp(10,0));
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(false, hbResponse.hasIsElectable());
- ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(false, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getConfigVersion());
- ASSERT_EQUALS(Timestamp(10,0), hbResponse.getElectionTime());
-
- hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(Timestamp(10,0), hbResponseObj["electionTime"].timestamp());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toString());
-
- // set opTime
- hbResponse.setOpTime(OpTime(Timestamp(10), 0));
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(false, hbResponse.hasIsElectable());
- ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getConfigVersion());
- ASSERT_EQUALS(Timestamp(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0,10), 0), hbResponse.getOpTime());
-
- hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(Timestamp(10,0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0,10), hbResponseObj["opTime"].timestamp());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(),
- hbResponseObjRoundTripChecker.toBSON(false).toString());
-
- // set time
- hbResponse.setTime(Seconds(10));
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(false, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getConfigVersion());
- ASSERT_EQUALS(Timestamp(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0,10), 0), hbResponse.getOpTime());
- ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
-
- hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(Timestamp(10,0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0,10), hbResponseObj["opTime"].timestamp());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(),
- hbResponseObjRoundTripChecker.toBSON(false).toString());
-
- // set electable
- hbResponse.setElectable(true);
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getConfigVersion());
- ASSERT_EQUALS(Timestamp(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0,10), 0), hbResponse.getOpTime());
- ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
- ASSERT_EQUALS(true, hbResponse.isElectable());
-
- hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(Timestamp(10,0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0,10), hbResponseObj["opTime"].timestamp());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(),
- hbResponseObjRoundTripChecker.toBSON(false).toString());
-
- // set config
- ReplicaSetConfig config;
- hbResponse.setConfig(config);
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getConfigVersion());
- ASSERT_EQUALS(Timestamp(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0,10), 0), hbResponse.getOpTime());
- ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(Timestamp(10,0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0,10), hbResponseObj["opTime"].timestamp());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(),
- hbResponseObjRoundTripChecker.toBSON(false).toString());
-
- // set state
- hbResponse.setState(MemberState(MemberState::RS_SECONDARY));
- ++fieldsSet;
- ASSERT_EQUALS(true, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- hbResponse.getState().toString());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getConfigVersion());
- ASSERT_EQUALS(Timestamp(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0,10), 0), hbResponse.getOpTime());
- ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(Timestamp(10,0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0,10), hbResponseObj["opTime"].timestamp());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
- ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(),
- hbResponseObjRoundTripChecker.toBSON(false).toString());
-
- // set stateDisagreement
- hbResponse.noteStateDisagreement();
- ++fieldsSet;
- ASSERT_EQUALS(true, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- hbResponse.getState().toString());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getConfigVersion());
- ASSERT_EQUALS(Timestamp(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0,10), 0), hbResponse.getOpTime());
- ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(Timestamp(10,0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0,10), hbResponseObj["opTime"].timestamp());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
- ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
- ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(),
- hbResponseObjRoundTripChecker.toBSON(false).toString());
-
- // set replSet
- hbResponse.noteReplSet();
- ++fieldsSet;
- ASSERT_EQUALS(true, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(true, hbResponse.isReplSet());
- ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- hbResponse.getState().toString());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getConfigVersion());
- ASSERT_EQUALS(Timestamp(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0,10), 0), hbResponse.getOpTime());
- ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(Timestamp(10,0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0,10), hbResponseObj["opTime"].timestamp());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
- ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
- ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["rs"].trueValue());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(),
- hbResponseObjRoundTripChecker.toBSON(false).toString());
-
- // set syncingTo
- hbResponse.setSyncingTo(HostAndPort("syncTarget"));
- ++fieldsSet;
- ASSERT_EQUALS(true, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(true, hbResponse.isReplSet());
- ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- hbResponse.getState().toString());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort("syncTarget"), hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getConfigVersion());
- ASSERT_EQUALS(Timestamp(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0,10), 0), hbResponse.getOpTime());
- ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(Timestamp(10,0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0,10), hbResponseObj["opTime"].timestamp());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
- ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
- ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["rs"].trueValue());
- ASSERT_EQUALS("syncTarget:27017", hbResponseObj["syncingTo"].String());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(),
- hbResponseObjRoundTripChecker.toBSON(false).toString());
-
- // set hbmsg
- hbResponse.setHbMsg("lub dub");
- ASSERT_EQUALS(true, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(true, hbResponse.isReplSet());
- ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- hbResponse.getState().toString());
- ASSERT_EQUALS("lub dub", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort("syncTarget"), hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getConfigVersion());
- ASSERT_EQUALS(Timestamp(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0,10), 0), hbResponse.getOpTime());
- ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("lub dub", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(Timestamp(10,0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0,10), hbResponseObj["opTime"].timestamp());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
- ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
- ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["rs"].trueValue());
- ASSERT_EQUALS("syncTarget:27017", hbResponseObj["syncingTo"].String());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(),
- hbResponseObjRoundTripChecker.toBSON(false).toString());
-
- // set mismatched
- hbResponse.noteMismatched();
- ASSERT_EQUALS(true, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(true, hbResponse.isMismatched());
- ASSERT_EQUALS(true, hbResponse.isReplSet());
- ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- hbResponse.getState().toString());
- ASSERT_EQUALS("lub dub", hbResponse.getHbMsg());
- ASSERT_EQUALS(HostAndPort("syncTarget"), hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getConfigVersion());
- ASSERT_EQUALS(Timestamp(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0,10), 0), hbResponse.getOpTime());
- ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON(false);
- ASSERT_EQUALS(2, hbResponseObj.nFields());
- ASSERT_EQUALS(true, hbResponseObj["mismatch"].trueValue());
-
- // NOTE: Does not check round-trip. Once noteMismached is set the bson will return an error
- // from initialize parsing.
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
- ASSERT_NOT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, initializeResult.code());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeWrongElectionTimeType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 << "electionTime" << "hello");
- Status result = hbResponse.initialize(initializerObj, 0);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"electionTime\" field in response to replSetHeartbeat command to "
- "have type Date or Timestamp, but found type String",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeWrongTimeType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 << "time" << "hello");
- Status result = hbResponse.initialize(initializerObj, 0);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"time\" field in response to replSetHeartbeat command to "
- "have a numeric type, but found type String",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeWrongOpTimeType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 << "opTime" << "hello");
- Status result = hbResponse.initialize(initializerObj, 0);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"opTime\" field in response to replSetHeartbeat command to "
- "have type Date or Timestamp, but found type String",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeMemberStateWrongType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 << "state" << "hello");
- Status result = hbResponse.initialize(initializerObj, 0);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"state\" field in response to replSetHeartbeat command to "
- "have type NumberInt or NumberLong, but found type String",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeMemberStateTooLow) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 << "state" << -1);
- Status result = hbResponse.initialize(initializerObj, 0);
- ASSERT_EQUALS(ErrorCodes::BadValue, result);
- ASSERT_EQUALS("Value for \"state\" in response to replSetHeartbeat is out of range; "
- "legal values are non-negative and no more than 10",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeMemberStateTooHigh) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 << "state" << 11);
- Status result = hbResponse.initialize(initializerObj, 0);
- ASSERT_EQUALS(ErrorCodes::BadValue, result);
- ASSERT_EQUALS("Value for \"state\" in response to replSetHeartbeat is out of range; "
- "legal values are non-negative and no more than 10",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeVersionWrongType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 <<
- "v" << "hello");
- Status result = hbResponse.initialize(initializerObj, 0);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"v\" field in response to replSetHeartbeat to "
- "have type NumberInt, but found String",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeReplSetNameWrongType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 <<
- "v" << 2 << // needs a version to get this far in initialize()
- "set" << 4);
- Status result = hbResponse.initialize(initializerObj, 0);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"set\" field in response to replSetHeartbeat to "
- "have type String, but found NumberInt32",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeHeartbeatMeessageWrongType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 <<
- "v" << 2 << // needs a version to get this far in initialize()
- "hbmsg" << 4);
- Status result = hbResponse.initialize(initializerObj, 0);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"hbmsg\" field in response to replSetHeartbeat to "
- "have type String, but found NumberInt32",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeSyncingToWrongType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 <<
- "v" << 2 << // needs a version to get this far in initialize()
- "syncingTo" << 4);
- Status result = hbResponse.initialize(initializerObj, 0);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"syncingTo\" field in response to replSetHeartbeat to "
- "have type String, but found NumberInt32",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeConfigWrongType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 <<
- "v" << 2 << // needs a version to get this far in initialize()
- "config" << 4);
- Status result = hbResponse.initialize(initializerObj, 0);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"config\" in response to replSetHeartbeat to "
- "have type Object, but found NumberInt32",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeBadConfig) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 <<
- "v" << 2 << // needs a version to get this far in initialize()
- "config" << BSON("illegalFieldName" << 2));
- Status result = hbResponse.initialize(initializerObj, 0);
- ASSERT_EQUALS(ErrorCodes::BadValue, result);
- ASSERT_EQUALS("Unexpected field illegalFieldName in replica set configuration",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeBothElectionTimeTypesSameResult) {
- ReplSetHeartbeatResponse hbResponseDate;
- ReplSetHeartbeatResponse hbResponseTimestamp;
- BSONObjBuilder initializerDate;
- BSONObjBuilder initializerTimestamp;
- Date_t electionTime = Date_t::fromMillisSinceEpoch(974132);
-
- initializerDate.append("ok", 1.0);
- initializerDate.append("v", 1);
- initializerDate.appendDate("electionTime", electionTime);
- Status result = hbResponseDate.initialize(initializerDate.obj(), 0);
- ASSERT_EQUALS(Status::OK(), result);
-
- initializerTimestamp.append("ok", 1.0);
- initializerTimestamp.append("v", 1);
- initializerTimestamp.appendTimestamp("electionTime", electionTime.toULL());
- result = hbResponseTimestamp.initialize(initializerTimestamp.obj(), 0);
- ASSERT_EQUALS(Status::OK(), result);
-
- ASSERT_EQUALS(hbResponseTimestamp.getElectionTime(), hbResponseTimestamp.getElectionTime());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeBothOpTimeTypesSameResult) {
- ReplSetHeartbeatResponse hbResponseDate;
- ReplSetHeartbeatResponse hbResponseTimestamp;
- BSONObjBuilder initializerDate;
- BSONObjBuilder initializerTimestamp;
- Date_t opTime = Date_t::fromMillisSinceEpoch(974132);
-
- initializerDate.append("ok", 1.0);
- initializerDate.append("v", 1);
- initializerDate.appendDate("opTime", opTime);
- Status result = hbResponseDate.initialize(initializerDate.obj(), 0);
- ASSERT_EQUALS(Status::OK(), result);
-
- initializerTimestamp.append("ok", 1.0);
- initializerTimestamp.append("v", 1);
- initializerTimestamp.appendTimestamp("opTime", opTime.toULL());
- result = hbResponseTimestamp.initialize(initializerTimestamp.obj(), 0);
- ASSERT_EQUALS(Status::OK(), result);
-
- ASSERT_EQUALS(hbResponseTimestamp.getOpTime(), hbResponseTimestamp.getOpTime());
- }
-
- TEST(ReplSetHeartbeatResponse, NoConfigStillInitializing) {
- ReplSetHeartbeatResponse hbResp;
- std::string msg = "still initializing";
- Status result = hbResp.initialize(BSON("ok" << 1.0 <<
- "rs" << true <<
- "hbmsg" << msg), 0);
- ASSERT_EQUALS(Status::OK(), result);
- ASSERT_EQUALS(true, hbResp.isReplSet());
- ASSERT_EQUALS(msg, hbResp.getHbMsg());
- }
-
- TEST(ReplSetHeartbeatResponse, InvalidResponseOpTimeMissesConfigVersion) {
- ReplSetHeartbeatResponse hbResp;
- std::string msg = "still initializing";
- Status result = hbResp.initialize(BSON("ok" << 1.0 <<
- "opTime" << Timestamp()), 0);
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.code());
- ASSERT_TRUE(stringContains(result.reason(), "\"v\""))
- << result.reason() << " doesn't contain 'v' field required error msg";
- }
-
- TEST(ReplSetHeartbeatResponse, MismatchedRepliSetNames) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 0.0 << "mismatch" << true);
- Status result = hbResponse.initialize(initializerObj, 0);
- ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, result.code());
- }
-
- TEST(ReplSetHeartbeatResponse, AuthFailure) {
- ReplSetHeartbeatResponse hbResp;
- std::string errMsg = "Unauthorized";
- Status result = hbResp.initialize(BSON("ok" << 0.0 <<
- "errmsg" << errMsg <<
- "code" << ErrorCodes::Unauthorized), 0);
- ASSERT_EQUALS(ErrorCodes::Unauthorized, result.code());
- ASSERT_EQUALS(errMsg, result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, ServerError) {
- ReplSetHeartbeatResponse hbResp;
- std::string errMsg = "Random Error";
- Status result = hbResp.initialize(BSON("ok" << 0.0 << "errmsg" << errMsg ), 0);
- ASSERT_EQUALS(ErrorCodes::UnknownError, result.code());
- ASSERT_EQUALS(errMsg, result.reason());
- }
+using std::unique_ptr;
+
+bool stringContains(const std::string& haystack, const std::string& needle) {
+ return haystack.find(needle) != std::string::npos;
+}
+
+TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
+ int fieldsSet = 2;
+ ReplSetHeartbeatResponse hbResponse;
+ ReplSetHeartbeatResponse hbResponseObjRoundTripChecker;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(false, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(false, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(false, hbResponse.hasTime());
+ ASSERT_EQUALS(false, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(-1, hbResponse.getConfigVersion());
+
+ BSONObj hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+
+ Status initializeResult = Status::OK();
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toString());
+
+ // set version
+ hbResponse.setConfigVersion(1);
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(false, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(false, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(false, hbResponse.hasTime());
+ ASSERT_EQUALS(false, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toString());
+
+ // set setname
+ hbResponse.setSetName("rs0");
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(false, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(false, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(false, hbResponse.hasTime());
+ ASSERT_EQUALS(false, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toString());
+
+ // set electionTime
+ hbResponse.setElectionTime(Timestamp(10, 0));
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(false, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(false, hbResponse.hasTime());
+ ASSERT_EQUALS(false, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toString());
+
+ // set opTime
+ hbResponse.setOpTime(OpTime(Timestamp(10), 0));
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(false, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(false, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON(false).toString());
+
+ // set time
+ hbResponse.setTime(Seconds(10));
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(false, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON(false).toString());
+
+ // set electable
+ hbResponse.setElectable(true);
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON(false).toString());
+
+ // set config
+ ReplicaSetConfig config;
+ hbResponse.setConfig(config);
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON(false).toString());
+
+ // set state
+ hbResponse.setState(MemberState(MemberState::RS_SECONDARY));
+ ++fieldsSet;
+ ASSERT_EQUALS(true, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ hbResponse.getState().toString());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
+ ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON(false).toString());
+
+ // set stateDisagreement
+ hbResponse.noteStateDisagreement();
+ ++fieldsSet;
+ ASSERT_EQUALS(true, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ hbResponse.getState().toString());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
+ ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
+ ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON(false).toString());
+
+ // set replSet
+ hbResponse.noteReplSet();
+ ++fieldsSet;
+ ASSERT_EQUALS(true, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(true, hbResponse.isReplSet());
+ ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ hbResponse.getState().toString());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
+ ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
+ ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["rs"].trueValue());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON(false).toString());
+
+ // set syncingTo
+ hbResponse.setSyncingTo(HostAndPort("syncTarget"));
+ ++fieldsSet;
+ ASSERT_EQUALS(true, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(true, hbResponse.isReplSet());
+ ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ hbResponse.getState().toString());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort("syncTarget"), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
+ ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
+ ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["rs"].trueValue());
+ ASSERT_EQUALS("syncTarget:27017", hbResponseObj["syncingTo"].String());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON(false).toString());
+
+ // set hbmsg
+ hbResponse.setHbMsg("lub dub");
+ ASSERT_EQUALS(true, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(true, hbResponse.isReplSet());
+ ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ hbResponse.getState().toString());
+ ASSERT_EQUALS("lub dub", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort("syncTarget"), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("lub dub", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
+ ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
+ ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["rs"].trueValue());
+ ASSERT_EQUALS("syncTarget:27017", hbResponseObj["syncingTo"].String());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON(false).toString());
+
+ // set mismatched
+ hbResponse.noteMismatched();
+ ASSERT_EQUALS(true, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(true, hbResponse.isMismatched());
+ ASSERT_EQUALS(true, hbResponse.isReplSet());
+ ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ hbResponse.getState().toString());
+ ASSERT_EQUALS("lub dub", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort("syncTarget"), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(2, hbResponseObj.nFields());
+ ASSERT_EQUALS(true, hbResponseObj["mismatch"].trueValue());
+
+ // NOTE: Does not check round-trip. Once noteMismached is set the bson will return an error
+ // from initialize parsing.
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_NOT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, initializeResult.code());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeWrongElectionTimeType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "electionTime"
+ << "hello");
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"electionTime\" field in response to replSetHeartbeat command to "
+ "have type Date or Timestamp, but found type String",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeWrongTimeType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "time"
+ << "hello");
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"time\" field in response to replSetHeartbeat command to "
+ "have a numeric type, but found type String",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeWrongOpTimeType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "opTime"
+ << "hello");
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"opTime\" field in response to replSetHeartbeat command to "
+ "have type Date or Timestamp, but found type String",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeMemberStateWrongType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "state"
+ << "hello");
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"state\" field in response to replSetHeartbeat command to "
+ "have type NumberInt or NumberLong, but found type String",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeMemberStateTooLow) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "state" << -1);
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::BadValue, result);
+ ASSERT_EQUALS(
+ "Value for \"state\" in response to replSetHeartbeat is out of range; "
+ "legal values are non-negative and no more than 10",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeMemberStateTooHigh) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "state" << 11);
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::BadValue, result);
+ ASSERT_EQUALS(
+ "Value for \"state\" in response to replSetHeartbeat is out of range; "
+ "legal values are non-negative and no more than 10",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeVersionWrongType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "v"
+ << "hello");
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"v\" field in response to replSetHeartbeat to "
+ "have type NumberInt, but found String",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeReplSetNameWrongType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj =
+ BSON("ok" << 1.0 << "v" << 2 << // needs a version to get this far in initialize()
+ "set" << 4);
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"set\" field in response to replSetHeartbeat to "
+ "have type String, but found NumberInt32",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeHeartbeatMeessageWrongType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj =
+ BSON("ok" << 1.0 << "v" << 2 << // needs a version to get this far in initialize()
+ "hbmsg" << 4);
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"hbmsg\" field in response to replSetHeartbeat to "
+ "have type String, but found NumberInt32",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeSyncingToWrongType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj =
+ BSON("ok" << 1.0 << "v" << 2 << // needs a version to get this far in initialize()
+ "syncingTo" << 4);
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"syncingTo\" field in response to replSetHeartbeat to "
+ "have type String, but found NumberInt32",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeConfigWrongType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj =
+ BSON("ok" << 1.0 << "v" << 2 << // needs a version to get this far in initialize()
+ "config" << 4);
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"config\" in response to replSetHeartbeat to "
+ "have type Object, but found NumberInt32",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeBadConfig) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj =
+ BSON("ok" << 1.0 << "v" << 2 << // needs a version to get this far in initialize()
+ "config" << BSON("illegalFieldName" << 2));
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::BadValue, result);
+ ASSERT_EQUALS("Unexpected field illegalFieldName in replica set configuration",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeBothElectionTimeTypesSameResult) {
+ ReplSetHeartbeatResponse hbResponseDate;
+ ReplSetHeartbeatResponse hbResponseTimestamp;
+ BSONObjBuilder initializerDate;
+ BSONObjBuilder initializerTimestamp;
+ Date_t electionTime = Date_t::fromMillisSinceEpoch(974132);
+
+ initializerDate.append("ok", 1.0);
+ initializerDate.append("v", 1);
+ initializerDate.appendDate("electionTime", electionTime);
+ Status result = hbResponseDate.initialize(initializerDate.obj(), 0);
+ ASSERT_EQUALS(Status::OK(), result);
+
+ initializerTimestamp.append("ok", 1.0);
+ initializerTimestamp.append("v", 1);
+ initializerTimestamp.appendTimestamp("electionTime", electionTime.toULL());
+ result = hbResponseTimestamp.initialize(initializerTimestamp.obj(), 0);
+ ASSERT_EQUALS(Status::OK(), result);
+
+ ASSERT_EQUALS(hbResponseTimestamp.getElectionTime(), hbResponseTimestamp.getElectionTime());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeBothOpTimeTypesSameResult) {
+ ReplSetHeartbeatResponse hbResponseDate;
+ ReplSetHeartbeatResponse hbResponseTimestamp;
+ BSONObjBuilder initializerDate;
+ BSONObjBuilder initializerTimestamp;
+ Date_t opTime = Date_t::fromMillisSinceEpoch(974132);
+
+ initializerDate.append("ok", 1.0);
+ initializerDate.append("v", 1);
+ initializerDate.appendDate("opTime", opTime);
+ Status result = hbResponseDate.initialize(initializerDate.obj(), 0);
+ ASSERT_EQUALS(Status::OK(), result);
+
+ initializerTimestamp.append("ok", 1.0);
+ initializerTimestamp.append("v", 1);
+ initializerTimestamp.appendTimestamp("opTime", opTime.toULL());
+ result = hbResponseTimestamp.initialize(initializerTimestamp.obj(), 0);
+ ASSERT_EQUALS(Status::OK(), result);
+
+ ASSERT_EQUALS(hbResponseTimestamp.getOpTime(), hbResponseTimestamp.getOpTime());
+}
+
+TEST(ReplSetHeartbeatResponse, NoConfigStillInitializing) {
+ ReplSetHeartbeatResponse hbResp;
+ std::string msg = "still initializing";
+ Status result = hbResp.initialize(BSON("ok" << 1.0 << "rs" << true << "hbmsg" << msg), 0);
+ ASSERT_EQUALS(Status::OK(), result);
+ ASSERT_EQUALS(true, hbResp.isReplSet());
+ ASSERT_EQUALS(msg, hbResp.getHbMsg());
+}
+
+TEST(ReplSetHeartbeatResponse, InvalidResponseOpTimeMissesConfigVersion) {
+ ReplSetHeartbeatResponse hbResp;
+ std::string msg = "still initializing";
+ Status result = hbResp.initialize(BSON("ok" << 1.0 << "opTime" << Timestamp()), 0);
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.code());
+ ASSERT_TRUE(stringContains(result.reason(), "\"v\""))
+ << result.reason() << " doesn't contain 'v' field required error msg";
+}
+
+TEST(ReplSetHeartbeatResponse, MismatchedRepliSetNames) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 0.0 << "mismatch" << true);
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, result.code());
+}
+
+TEST(ReplSetHeartbeatResponse, AuthFailure) {
+ ReplSetHeartbeatResponse hbResp;
+ std::string errMsg = "Unauthorized";
+ Status result = hbResp.initialize(
+ BSON("ok" << 0.0 << "errmsg" << errMsg << "code" << ErrorCodes::Unauthorized), 0);
+ ASSERT_EQUALS(ErrorCodes::Unauthorized, result.code());
+ ASSERT_EQUALS(errMsg, result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, ServerError) {
+ ReplSetHeartbeatResponse hbResp;
+ std::string errMsg = "Random Error";
+ Status result = hbResp.initialize(BSON("ok" << 0.0 << "errmsg" << errMsg), 0);
+ ASSERT_EQUALS(ErrorCodes::UnknownError, result.code());
+ ASSERT_EQUALS(errMsg, result.reason());
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/repl_set_html_summary.cpp b/src/mongo/db/repl/repl_set_html_summary.cpp
index e0d95a6e8db..821e8258d2e 100644
--- a/src/mongo/db/repl/repl_set_html_summary.cpp
+++ b/src/mongo/db/repl/repl_set_html_summary.cpp
@@ -42,181 +42,175 @@
namespace mongo {
namespace repl {
- ReplSetHtmlSummary::ReplSetHtmlSummary() : _selfIndex(-1), _primaryIndex(-1), _selfUptime(0) {}
+ReplSetHtmlSummary::ReplSetHtmlSummary() : _selfIndex(-1), _primaryIndex(-1), _selfUptime(0) {}
namespace {
- /**
- * Turns an unsigned int representing a duration of time in milliseconds and turns it into
- * a human readable time string representation.
- */
- std::string ago(unsigned int duration) {
- std::stringstream s;
- if( duration < 180 ) {
- s << duration << " sec";
- if( duration != 1 ) s << 's';
- }
- else if( duration < 3600 ) {
- s.precision(2);
- s << duration / 60.0 << " mins";
- }
- else {
- s.precision(2);
- s << duration / 3600.0 << " hrs";
- }
- return s.str();
+/**
+ * Turns an unsigned int representing a duration of time in milliseconds and turns it into
+ * a human readable time string representation.
+ */
+std::string ago(unsigned int duration) {
+ std::stringstream s;
+ if (duration < 180) {
+ s << duration << " sec";
+ if (duration != 1)
+ s << 's';
+ } else if (duration < 3600) {
+ s.precision(2);
+ s << duration / 60.0 << " mins";
+ } else {
+ s.precision(2);
+ s << duration / 3600.0 << " hrs";
}
+ return s.str();
+}
- unsigned int timeDifference(Date_t now, Date_t past) {
- return static_cast<unsigned int>(past != Date_t() ? durationCount<Seconds>(now - past) : 0);
- }
+unsigned int timeDifference(Date_t now, Date_t past) {
+ return static_cast<unsigned int>(past != Date_t() ? durationCount<Seconds>(now - past) : 0);
+}
- std::string stateAsHtml(const MemberState& s) {
- using namespace html;
-
- if( s.s == MemberState::RS_STARTUP )
- return a("",
- "server still starting up, or still trying to initiate the set",
- "STARTUP");
- if( s.s == MemberState::RS_PRIMARY )
- return a("", "this server thinks it is primary", "PRIMARY");
- if( s.s == MemberState::RS_SECONDARY )
- return a("", "this server thinks it is a secondary (slave mode)", "SECONDARY");
- if( s.s == MemberState::RS_RECOVERING )
- return a("",
- "recovering/resyncing; after recovery usually auto-transitions to secondary",
- "RECOVERING");
- if( s.s == MemberState::RS_STARTUP2 )
- return a("", "loaded config, still determining who is primary", "STARTUP2");
- if( s.s == MemberState::RS_ARBITER )
- return a("", "this server is an arbiter only", "ARBITER");
- if( s.s == MemberState::RS_DOWN )
- return a("", "member is down, slow, or unreachable", "DOWN");
- if( s.s == MemberState::RS_ROLLBACK )
- return a("", "rolling back operations to get in sync", "ROLLBACK");
- if( s.s == MemberState::RS_UNKNOWN)
- return a("", "we do not know what state this node is in", "UNKNOWN");
- if( s.s == MemberState::RS_REMOVED)
- return a("", "this server has been removed from the replica set config", "ROLLBACK");
- return "";
- }
+std::string stateAsHtml(const MemberState& s) {
+ using namespace html;
+
+ if (s.s == MemberState::RS_STARTUP)
+ return a("", "server still starting up, or still trying to initiate the set", "STARTUP");
+ if (s.s == MemberState::RS_PRIMARY)
+ return a("", "this server thinks it is primary", "PRIMARY");
+ if (s.s == MemberState::RS_SECONDARY)
+ return a("", "this server thinks it is a secondary (slave mode)", "SECONDARY");
+ if (s.s == MemberState::RS_RECOVERING)
+ return a("",
+ "recovering/resyncing; after recovery usually auto-transitions to secondary",
+ "RECOVERING");
+ if (s.s == MemberState::RS_STARTUP2)
+ return a("", "loaded config, still determining who is primary", "STARTUP2");
+ if (s.s == MemberState::RS_ARBITER)
+ return a("", "this server is an arbiter only", "ARBITER");
+ if (s.s == MemberState::RS_DOWN)
+ return a("", "member is down, slow, or unreachable", "DOWN");
+ if (s.s == MemberState::RS_ROLLBACK)
+ return a("", "rolling back operations to get in sync", "ROLLBACK");
+ if (s.s == MemberState::RS_UNKNOWN)
+ return a("", "we do not know what state this node is in", "UNKNOWN");
+ if (s.s == MemberState::RS_REMOVED)
+ return a("", "this server has been removed from the replica set config", "ROLLBACK");
+ return "";
+}
}
- const std::string ReplSetHtmlSummary::toHtmlString() const {
- using namespace html;
+const std::string ReplSetHtmlSummary::toHtmlString() const {
+ using namespace html;
- std::stringstream s;
+ std::stringstream s;
- if (!_config.isInitialized()) {
- s << p("Still starting up, or else replset is not yet initiated.");
- return s.str();
- }
- if (_selfIndex < 0) {
- s << p("This node is not a member of its replica set configuration, it most likely was"
- " removed recently");
- return s.str();
- }
+ if (!_config.isInitialized()) {
+ s << p("Still starting up, or else replset is not yet initiated.");
+ return s.str();
+ }
+ if (_selfIndex < 0) {
+ s << p(
+ "This node is not a member of its replica set configuration, it most likely was"
+ " removed recently");
+ return s.str();
+ }
- int votesUp = 0;
- int totalVotes = 0;
- // Build table of node information.
- std::stringstream memberTable;
- const char *h[] =
- {"Member",
- "<a title=\"member id in the replset config\">id</a>",
- "Up",
- "<a title=\"length of time we have been continuously connected to the other member "
- "with no reconnects (for self, shows uptime)\">cctime</a>",
- "<a title=\"when this server last received a heartbeat response - includes error code "
- "responses\">Last heartbeat</a>",
- "Votes",
- "Priority",
- "State",
- "Messages",
- "<a title=\"how up to date this server is. this value polled every few seconds so "
- "actually lag is typically lower than value shown here.\">optime</a>",
- 0
- };
- memberTable << table(h);
-
- for (int i = 0; i < _config.getNumMembers(); ++i) {
- const MemberConfig& memberConfig = _config.getMemberAt(i);
- const MemberHeartbeatData& memberHB = _hbData[i];
- bool isSelf = _selfIndex == i;
- bool up = memberHB.getHealth() > 0;
-
- totalVotes += memberConfig.getNumVotes();
- if (up || isSelf) {
- votesUp += memberConfig.getNumVotes();
- }
+ int votesUp = 0;
+ int totalVotes = 0;
+ // Build table of node information.
+ std::stringstream memberTable;
+ const char* h[] = {
+ "Member",
+ "<a title=\"member id in the replset config\">id</a>",
+ "Up",
+ "<a title=\"length of time we have been continuously connected to the other member "
+ "with no reconnects (for self, shows uptime)\">cctime</a>",
+ "<a title=\"when this server last received a heartbeat response - includes error code "
+ "responses\">Last heartbeat</a>",
+ "Votes",
+ "Priority",
+ "State",
+ "Messages",
+ "<a title=\"how up to date this server is. this value polled every few seconds so "
+ "actually lag is typically lower than value shown here.\">optime</a>",
+ 0};
+ memberTable << table(h);
+
+ for (int i = 0; i < _config.getNumMembers(); ++i) {
+ const MemberConfig& memberConfig = _config.getMemberAt(i);
+ const MemberHeartbeatData& memberHB = _hbData[i];
+ bool isSelf = _selfIndex == i;
+ bool up = memberHB.getHealth() > 0;
+
+ totalVotes += memberConfig.getNumVotes();
+ if (up || isSelf) {
+ votesUp += memberConfig.getNumVotes();
+ }
- memberTable << tr();
- if (isSelf) {
- memberTable << td(memberConfig.getHostAndPort().toString() + " (me)");
- memberTable << td(memberConfig.getId());
- memberTable << td("1"); // up
- memberTable << td(ago(_selfUptime));
- memberTable << td(""); // last heartbeat
- memberTable << td(std::to_string(memberConfig.getNumVotes()));
- memberTable << td(std::to_string(memberConfig.getPriority()));
- memberTable << td(stateAsHtml(_selfState) +
- (memberConfig.isHidden() ? " (hidden)" : ""));
- memberTable << td(_selfHeartbeatMessage);
- memberTable << td(_selfOptime.toString());
+ memberTable << tr();
+ if (isSelf) {
+ memberTable << td(memberConfig.getHostAndPort().toString() + " (me)");
+ memberTable << td(memberConfig.getId());
+ memberTable << td("1"); // up
+ memberTable << td(ago(_selfUptime));
+ memberTable << td(""); // last heartbeat
+ memberTable << td(std::to_string(memberConfig.getNumVotes()));
+ memberTable << td(std::to_string(memberConfig.getPriority()));
+ memberTable << td(stateAsHtml(_selfState) +
+ (memberConfig.isHidden() ? " (hidden)" : ""));
+ memberTable << td(_selfHeartbeatMessage);
+ memberTable << td(_selfOptime.toString());
+ } else {
+ std::stringstream link;
+ link << "http://" << memberConfig.getHostAndPort().host() << ':'
+ << (memberConfig.getHostAndPort().port() + 1000) << "/_replSet";
+ memberTable << td(a(link.str(), "", memberConfig.getHostAndPort().toString()));
+ memberTable << td(memberConfig.getId());
+ memberTable << td(red(str::stream() << memberHB.getHealth(), !up));
+ const unsigned int uptime = timeDifference(_now, memberHB.getUpSince());
+ memberTable << td(ago(uptime));
+ if (memberHB.getLastHeartbeat() == Date_t()) {
+ memberTable << td("never");
+ } else {
+ memberTable << td(ago(timeDifference(_now, memberHB.getLastHeartbeat())));
}
- else {
- std::stringstream link;
- link << "http://" << memberConfig.getHostAndPort().host() << ':' <<
- (memberConfig.getHostAndPort().port() + 1000) << "/_replSet";
- memberTable << td( a(link.str(), "", memberConfig.getHostAndPort().toString()) );
- memberTable << td(memberConfig.getId());
- memberTable << td(red(str::stream() << memberHB.getHealth(), !up));
- const unsigned int uptime = timeDifference(_now, memberHB.getUpSince());
- memberTable << td(ago(uptime));
- if (memberHB.getLastHeartbeat() == Date_t()) {
- memberTable << td("never");
- }
- else {
- memberTable << td(ago(timeDifference(_now, memberHB.getLastHeartbeat())));
- }
- memberTable << td(std::to_string(memberConfig.getNumVotes()));
- memberTable << td(std::to_string(memberConfig.getPriority()));
- std::string state = memberHB.getState().toString() +
- (memberConfig.isHidden() ? " (hidden)" : "");
- if (up) {
- memberTable << td(state);
- }
- else {
- memberTable << td( grey(str::stream() << "(was " << state << ')', true) );
- }
- memberTable << td(grey(memberHB.getLastHeartbeatMsg(), !up));
- // TODO(dannenberg): change timestamp to optime in V1
- memberTable << td(memberHB.getLastHeartbeat() == Date_t() ?
- "?" : memberHB.getOpTime().toString());
+ memberTable << td(std::to_string(memberConfig.getNumVotes()));
+ memberTable << td(std::to_string(memberConfig.getPriority()));
+ std::string state =
+ memberHB.getState().toString() + (memberConfig.isHidden() ? " (hidden)" : "");
+ if (up) {
+ memberTable << td(state);
+ } else {
+ memberTable << td(grey(str::stream() << "(was " << state << ')', true));
}
- memberTable << _tr();
+ memberTable << td(grey(memberHB.getLastHeartbeatMsg(), !up));
+ // TODO(dannenberg): change timestamp to optime in V1
+ memberTable << td(
+ memberHB.getLastHeartbeat() == Date_t() ? "?" : memberHB.getOpTime().toString());
}
- memberTable << _table();
+ memberTable << _tr();
+ }
+ memberTable << _table();
- s << table(0, false);
- s << tr("Set name:", _config.getReplSetName());
- bool majorityUp = votesUp * 2 > totalVotes;
- s << tr("Majority up:", majorityUp ? "yes" : "no" );
+ s << table(0, false);
+ s << tr("Set name:", _config.getReplSetName());
+ bool majorityUp = votesUp * 2 > totalVotes;
+ s << tr("Majority up:", majorityUp ? "yes" : "no");
- const MemberConfig& selfConfig = _config.getMemberAt(_selfIndex);
+ const MemberConfig& selfConfig = _config.getMemberAt(_selfIndex);
- if (_primaryIndex >= 0 && _primaryIndex != _selfIndex && !selfConfig.isArbiter()) {
- int lag = _hbData[_primaryIndex].getOpTime().getTimestamp().getSecs() -
- _selfOptime.getTimestamp().getSecs();
- s << tr("Lag: ", str::stream() << lag << " secs");
- }
+ if (_primaryIndex >= 0 && _primaryIndex != _selfIndex && !selfConfig.isArbiter()) {
+ int lag = _hbData[_primaryIndex].getOpTime().getTimestamp().getSecs() -
+ _selfOptime.getTimestamp().getSecs();
+ s << tr("Lag: ", str::stream() << lag << " secs");
+ }
- s << _table();
+ s << _table();
- s << memberTable.str();
+ s << memberTable.str();
- return s.str();
- }
+ return s.str();
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_html_summary.h b/src/mongo/db/repl/repl_set_html_summary.h
index 997c5a3ceac..70e3c3eae3b 100644
--- a/src/mongo/db/repl/repl_set_html_summary.h
+++ b/src/mongo/db/repl/repl_set_html_summary.h
@@ -39,64 +39,63 @@ namespace mongo {
namespace repl {
- /**
- * Class containing all the information needed to build the replSet page on http interface,
- * and the logic to generate that page.
- */
- class ReplSetHtmlSummary {
- public:
- ReplSetHtmlSummary();
-
- const std::string toHtmlString() const;
-
- void setConfig(const ReplicaSetConfig& config) {
- _config = config;
- }
-
- void setHBData(const std::vector<MemberHeartbeatData>& hbData) {
- _hbData = hbData;
- }
-
- void setSelfIndex(int index) {
- _selfIndex = index;
- }
-
- void setPrimaryIndex(int index) {
- _primaryIndex = index;
- }
-
- void setSelfOptime(const OpTime& ts) {
- _selfOptime = ts;
- }
-
- void setSelfUptime(unsigned int time) {
- _selfUptime = time;
- }
-
- void setNow(Date_t now) {
- _now = now;
- }
-
- void setSelfState(const MemberState& state) {
- _selfState = state;
- }
-
- void setSelfHeartbeatMessage(StringData msg) {
- _selfHeartbeatMessage = msg.toString();
- }
-
- private:
-
- ReplicaSetConfig _config;
- std::vector<MemberHeartbeatData> _hbData;
- Date_t _now;
- int _selfIndex;
- int _primaryIndex;
- OpTime _selfOptime;
- unsigned int _selfUptime;
- MemberState _selfState;
- std::string _selfHeartbeatMessage;
- };
-
-} // namespace repl
-} // namespace mongo
+/**
+ * Class containing all the information needed to build the replSet page on http interface,
+ * and the logic to generate that page.
+ */
+class ReplSetHtmlSummary {
+public:
+ ReplSetHtmlSummary();
+
+ const std::string toHtmlString() const;
+
+ void setConfig(const ReplicaSetConfig& config) {
+ _config = config;
+ }
+
+ void setHBData(const std::vector<MemberHeartbeatData>& hbData) {
+ _hbData = hbData;
+ }
+
+ void setSelfIndex(int index) {
+ _selfIndex = index;
+ }
+
+ void setPrimaryIndex(int index) {
+ _primaryIndex = index;
+ }
+
+ void setSelfOptime(const OpTime& ts) {
+ _selfOptime = ts;
+ }
+
+ void setSelfUptime(unsigned int time) {
+ _selfUptime = time;
+ }
+
+ void setNow(Date_t now) {
+ _now = now;
+ }
+
+ void setSelfState(const MemberState& state) {
+ _selfState = state;
+ }
+
+ void setSelfHeartbeatMessage(StringData msg) {
+ _selfHeartbeatMessage = msg.toString();
+ }
+
+private:
+ ReplicaSetConfig _config;
+ std::vector<MemberHeartbeatData> _hbData;
+ Date_t _now;
+ int _selfIndex;
+ int _primaryIndex;
+ OpTime _selfOptime;
+ unsigned int _selfUptime;
+ MemberState _selfState;
+ std::string _selfHeartbeatMessage;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_request_votes.cpp b/src/mongo/db/repl/repl_set_request_votes.cpp
index 3d823de4930..92a72a1a6c4 100644
--- a/src/mongo/db/repl/repl_set_request_votes.cpp
+++ b/src/mongo/db/repl/repl_set_request_votes.cpp
@@ -36,36 +36,35 @@
namespace mongo {
namespace repl {
- class CmdReplSetRequestVotes : public ReplSetCommand {
- public:
- CmdReplSetRequestVotes() : ReplSetCommand("replSetRequestVotes") { }
- private:
- bool run(OperationContext* txn,
- const std::string&,
- BSONObj& cmdObj,
- int,
- std::string& errmsg,
- BSONObjBuilder& result) final {
+class CmdReplSetRequestVotes : public ReplSetCommand {
+public:
+ CmdReplSetRequestVotes() : ReplSetCommand("replSetRequestVotes") {}
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK()) {
- return appendCommandStatus(result, status);
- }
-
- ReplSetRequestVotesArgs parsedArgs;
- status = parsedArgs.initialize(cmdObj);
- if (!status.isOK()) {
- return appendCommandStatus(result, status);
- }
+private:
+ bool run(OperationContext* txn,
+ const std::string&,
+ BSONObj& cmdObj,
+ int,
+ std::string& errmsg,
+ BSONObjBuilder& result) final {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK()) {
+ return appendCommandStatus(result, status);
+ }
- ReplSetRequestVotesResponse response;
- status = getGlobalReplicationCoordinator()->processReplSetRequestVotes(txn,
- parsedArgs,
- &response);
- response.addToBSON(&result);
+ ReplSetRequestVotesArgs parsedArgs;
+ status = parsedArgs.initialize(cmdObj);
+ if (!status.isOK()) {
return appendCommandStatus(result, status);
}
- } cmdReplSetRequestVotes;
-} // namespace repl
-} // namespace mongo
+ ReplSetRequestVotesResponse response;
+ status = getGlobalReplicationCoordinator()->processReplSetRequestVotes(
+ txn, parsedArgs, &response);
+ response.addToBSON(&result);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetRequestVotes;
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_request_votes_args.cpp b/src/mongo/db/repl/repl_set_request_votes_args.cpp
index 8c29e37b29d..8a119ff0013 100644
--- a/src/mongo/db/repl/repl_set_request_votes_args.cpp
+++ b/src/mongo/db/repl/repl_set_request_votes_args.cpp
@@ -36,173 +36,167 @@ namespace mongo {
namespace repl {
namespace {
- const std::string kCandidateIdFieldName = "candidateId";
- const std::string kCommandName = "replSetRequestVotes";
- const std::string kConfigVersionFieldName = "configVersion";
- const std::string kDryRunFieldName = "dryRun";
- const std::string kLastCommittedOpFieldName = "lastCommittedOp";
- const std::string kOkFieldName = "ok";
- const std::string kOpTimeFieldName = "ts";
- const std::string kReasonFieldName = "reason";
- const std::string kSetNameFieldName = "setName";
- const std::string kTermFieldName = "term";
- const std::string kVoteGrantedFieldName = "voteGranted";
-
- const std::string kLegalArgsFieldNames[] = {
- kCandidateIdFieldName,
- kCommandName,
- kConfigVersionFieldName,
- kDryRunFieldName,
- kLastCommittedOpFieldName,
- kOpTimeFieldName,
- kSetNameFieldName,
- kTermFieldName,
- };
-
- const std::string kLegalResponseFieldNames[] = {
- kOkFieldName,
- kReasonFieldName,
- kTermFieldName,
- kVoteGrantedFieldName,
- };
+const std::string kCandidateIdFieldName = "candidateId";
+const std::string kCommandName = "replSetRequestVotes";
+const std::string kConfigVersionFieldName = "configVersion";
+const std::string kDryRunFieldName = "dryRun";
+const std::string kLastCommittedOpFieldName = "lastCommittedOp";
+const std::string kOkFieldName = "ok";
+const std::string kOpTimeFieldName = "ts";
+const std::string kReasonFieldName = "reason";
+const std::string kSetNameFieldName = "setName";
+const std::string kTermFieldName = "term";
+const std::string kVoteGrantedFieldName = "voteGranted";
+
+const std::string kLegalArgsFieldNames[] = {
+ kCandidateIdFieldName,
+ kCommandName,
+ kConfigVersionFieldName,
+ kDryRunFieldName,
+ kLastCommittedOpFieldName,
+ kOpTimeFieldName,
+ kSetNameFieldName,
+ kTermFieldName,
+};
+
+const std::string kLegalResponseFieldNames[] = {
+ kOkFieldName, kReasonFieldName, kTermFieldName, kVoteGrantedFieldName,
+};
} // namespace
- Status ReplSetRequestVotesArgs::initialize(const BSONObj& argsObj) {
- Status status = bsonCheckOnlyHasFields("ReplSetRequestVotes",
- argsObj,
- kLegalArgsFieldNames);
- if (!status.isOK())
- return status;
-
- status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term);
- if (!status.isOK())
- return status;
-
- status = bsonExtractIntegerField(argsObj, kCandidateIdFieldName, &_candidateId);
- if (!status.isOK())
- return status;
-
- status = bsonExtractIntegerField(argsObj, kConfigVersionFieldName, &_cfgver);
- if (!status.isOK())
- return status;
-
- status = bsonExtractStringField(argsObj, kSetNameFieldName, &_setName);
- if (!status.isOK())
- return status;
-
- status = bsonExtractBooleanField(argsObj, kDryRunFieldName, &_dryRun);
- if (!status.isOK())
- return status;
-
- // extracting the lastCommittedOp is a bit of a process
- BSONObj lastCommittedOp = argsObj[kLastCommittedOpFieldName].Obj();
- Timestamp ts;
- status = bsonExtractTimestampField(lastCommittedOp, kOpTimeFieldName, &ts);
- if (!status.isOK())
- return status;
- long long term;
- status = bsonExtractIntegerField(lastCommittedOp, kTermFieldName, &term);
- if (!status.isOK())
- return status;
- _lastCommittedOp = OpTime(lastCommittedOp[kOpTimeFieldName].timestamp(),
- lastCommittedOp[kTermFieldName].Long());
-
- return Status::OK();
- }
-
- const std::string& ReplSetRequestVotesArgs::getSetName() const {
- return _setName;
- }
-
- long long ReplSetRequestVotesArgs::getTerm() const {
- return _term;
- }
-
- long long ReplSetRequestVotesArgs::getCandidateId() const {
- return _candidateId;
- }
-
- long long ReplSetRequestVotesArgs::getConfigVersion() const {
- return _cfgver;
- }
-
- OpTime ReplSetRequestVotesArgs::getLastCommittedOp() const {
- return _lastCommittedOp;
- }
-
- bool ReplSetRequestVotesArgs::isADryRun() const {
- return _dryRun;
- }
-
- void ReplSetRequestVotesArgs::addToBSON(BSONObjBuilder* builder) const {
- builder->append(kCommandName, 1);
- builder->append(kSetNameFieldName, _setName);
- builder->append(kDryRunFieldName, _dryRun);
- builder->append(kTermFieldName, _term);
- builder->appendIntOrLL(kCandidateIdFieldName, _candidateId);
- builder->appendIntOrLL(kConfigVersionFieldName, _cfgver);
- BSONObjBuilder lastCommittedOp(builder->subobjStart(kLastCommittedOpFieldName));
- lastCommittedOp.append(kOpTimeFieldName, _lastCommittedOp.getTimestamp());
- lastCommittedOp.append(kTermFieldName, _lastCommittedOp.getTerm());
- lastCommittedOp.done();
- }
-
- Status ReplSetRequestVotesResponse::initialize(const BSONObj& argsObj) {
- Status status = bsonCheckOnlyHasFields("ReplSetRequestVotes",
- argsObj,
- kLegalResponseFieldNames);
- if (!status.isOK())
- return status;
-
- status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term);
- if (!status.isOK())
- return status;
-
- status = bsonExtractBooleanField(argsObj, kVoteGrantedFieldName, &_voteGranted);
- if (!status.isOK())
- return status;
-
- status = bsonExtractStringField(argsObj, kReasonFieldName, &_reason);
- if (!status.isOK())
- return status;
-
- status = bsonExtractBooleanField(argsObj, kOkFieldName, &_ok);
- if (!status.isOK())
- return status;
-
- return Status::OK();
- }
-
- bool ReplSetRequestVotesResponse::getOk() const {
- return _ok;
- }
-
- long long ReplSetRequestVotesResponse::getTerm() const {
- return _term;
- }
-
- bool ReplSetRequestVotesResponse::getVoteGranted() const {
- return _voteGranted;
- }
-
- const std::string& ReplSetRequestVotesResponse::getReason() const {
- return _reason;
- }
-
- void ReplSetRequestVotesResponse::addToBSON(BSONObjBuilder* builder) const {
- builder->append(kOkFieldName, _ok);
- builder->append(kTermFieldName, _term);
- builder->append(kVoteGrantedFieldName, _voteGranted);
- builder->append(kReasonFieldName, _reason);
- }
-
- BSONObj ReplSetRequestVotesResponse::toBSON() const {
- BSONObjBuilder builder;
- addToBSON(&builder);
- return builder.obj();
- }
-
-} // namespace repl
-} // namespace mongo
+Status ReplSetRequestVotesArgs::initialize(const BSONObj& argsObj) {
+ Status status = bsonCheckOnlyHasFields("ReplSetRequestVotes", argsObj, kLegalArgsFieldNames);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractIntegerField(argsObj, kCandidateIdFieldName, &_candidateId);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractIntegerField(argsObj, kConfigVersionFieldName, &_cfgver);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractStringField(argsObj, kSetNameFieldName, &_setName);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractBooleanField(argsObj, kDryRunFieldName, &_dryRun);
+ if (!status.isOK())
+ return status;
+
+ // extracting the lastCommittedOp is a bit of a process
+ BSONObj lastCommittedOp = argsObj[kLastCommittedOpFieldName].Obj();
+ Timestamp ts;
+ status = bsonExtractTimestampField(lastCommittedOp, kOpTimeFieldName, &ts);
+ if (!status.isOK())
+ return status;
+ long long term;
+ status = bsonExtractIntegerField(lastCommittedOp, kTermFieldName, &term);
+ if (!status.isOK())
+ return status;
+ _lastCommittedOp = OpTime(lastCommittedOp[kOpTimeFieldName].timestamp(),
+ lastCommittedOp[kTermFieldName].Long());
+
+ return Status::OK();
+}
+
+const std::string& ReplSetRequestVotesArgs::getSetName() const {
+ return _setName;
+}
+
+long long ReplSetRequestVotesArgs::getTerm() const {
+ return _term;
+}
+
+long long ReplSetRequestVotesArgs::getCandidateId() const {
+ return _candidateId;
+}
+
+long long ReplSetRequestVotesArgs::getConfigVersion() const {
+ return _cfgver;
+}
+
+OpTime ReplSetRequestVotesArgs::getLastCommittedOp() const {
+ return _lastCommittedOp;
+}
+
+bool ReplSetRequestVotesArgs::isADryRun() const {
+ return _dryRun;
+}
+
+void ReplSetRequestVotesArgs::addToBSON(BSONObjBuilder* builder) const {
+ builder->append(kCommandName, 1);
+ builder->append(kSetNameFieldName, _setName);
+ builder->append(kDryRunFieldName, _dryRun);
+ builder->append(kTermFieldName, _term);
+ builder->appendIntOrLL(kCandidateIdFieldName, _candidateId);
+ builder->appendIntOrLL(kConfigVersionFieldName, _cfgver);
+ BSONObjBuilder lastCommittedOp(builder->subobjStart(kLastCommittedOpFieldName));
+ lastCommittedOp.append(kOpTimeFieldName, _lastCommittedOp.getTimestamp());
+ lastCommittedOp.append(kTermFieldName, _lastCommittedOp.getTerm());
+ lastCommittedOp.done();
+}
+
+Status ReplSetRequestVotesResponse::initialize(const BSONObj& argsObj) {
+ Status status =
+ bsonCheckOnlyHasFields("ReplSetRequestVotes", argsObj, kLegalResponseFieldNames);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractBooleanField(argsObj, kVoteGrantedFieldName, &_voteGranted);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractStringField(argsObj, kReasonFieldName, &_reason);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractBooleanField(argsObj, kOkFieldName, &_ok);
+ if (!status.isOK())
+ return status;
+
+ return Status::OK();
+}
+
+bool ReplSetRequestVotesResponse::getOk() const {
+ return _ok;
+}
+
+long long ReplSetRequestVotesResponse::getTerm() const {
+ return _term;
+}
+
+bool ReplSetRequestVotesResponse::getVoteGranted() const {
+ return _voteGranted;
+}
+
+const std::string& ReplSetRequestVotesResponse::getReason() const {
+ return _reason;
+}
+
+void ReplSetRequestVotesResponse::addToBSON(BSONObjBuilder* builder) const {
+ builder->append(kOkFieldName, _ok);
+ builder->append(kTermFieldName, _term);
+ builder->append(kVoteGrantedFieldName, _voteGranted);
+ builder->append(kReasonFieldName, _reason);
+}
+
+BSONObj ReplSetRequestVotesResponse::toBSON() const {
+ BSONObjBuilder builder;
+ addToBSON(&builder);
+ return builder.obj();
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_request_votes_args.h b/src/mongo/db/repl/repl_set_request_votes_args.h
index a0f2dc2157b..00ba4d06c1c 100644
--- a/src/mongo/db/repl/repl_set_request_votes_args.h
+++ b/src/mongo/db/repl/repl_set_request_votes_args.h
@@ -34,55 +34,63 @@
namespace mongo {
- class BSONObj;
+class BSONObj;
namespace repl {
- class ReplSetRequestVotesArgs {
- public:
- Status initialize(const BSONObj& argsObj);
-
- const std::string& getSetName() const;
- long long getTerm() const;
- long long getCandidateId() const;
- long long getConfigVersion() const;
- OpTime getLastCommittedOp() const;
- bool isADryRun() const;
-
- void addToBSON(BSONObjBuilder* builder) const;
-
- private:
- std::string _setName; // Name of the replset
- long long _term = -1; // Current known term of the command issuer
- long long _candidateId = -1; // replSet id of the member who sent the replSetRequestVotesCmd
- long long _cfgver = -1; // replSet config version known to the command issuer
- OpTime _lastCommittedOp; // The last known committed op of the command issuer
- bool _dryRun = false; // Indicates this is a pre-election check when true
- };
-
- class ReplSetRequestVotesResponse {
- public:
- Status initialize(const BSONObj& argsObj);
-
- void setOk(bool ok) { _ok = ok; }
- void setVoteGranted(bool voteGranted) { _voteGranted = voteGranted; }
- void setTerm(long long term) { _term = term; }
- void setReason(const std::string& reason) { _reason = reason; }
-
- bool getOk() const;
- long long getTerm() const;
- bool getVoteGranted() const;
- const std::string& getReason() const;
-
- void addToBSON(BSONObjBuilder* builder) const;
- BSONObj toBSON() const;
-
- private:
- bool _ok = false;
- long long _term = -1;
- bool _voteGranted = false;
- std::string _reason;
- };
-
-} // namespace repl
-} // namespace mongo
+class ReplSetRequestVotesArgs {
+public:
+ Status initialize(const BSONObj& argsObj);
+
+ const std::string& getSetName() const;
+ long long getTerm() const;
+ long long getCandidateId() const;
+ long long getConfigVersion() const;
+ OpTime getLastCommittedOp() const;
+ bool isADryRun() const;
+
+ void addToBSON(BSONObjBuilder* builder) const;
+
+private:
+ std::string _setName; // Name of the replset
+ long long _term = -1; // Current known term of the command issuer
+ long long _candidateId = -1; // replSet id of the member who sent the replSetRequestVotesCmd
+ long long _cfgver = -1; // replSet config version known to the command issuer
+ OpTime _lastCommittedOp; // The last known committed op of the command issuer
+ bool _dryRun = false; // Indicates this is a pre-election check when true
+};
+
+class ReplSetRequestVotesResponse {
+public:
+ Status initialize(const BSONObj& argsObj);
+
+ void setOk(bool ok) {
+ _ok = ok;
+ }
+ void setVoteGranted(bool voteGranted) {
+ _voteGranted = voteGranted;
+ }
+ void setTerm(long long term) {
+ _term = term;
+ }
+ void setReason(const std::string& reason) {
+ _reason = reason;
+ }
+
+ bool getOk() const;
+ long long getTerm() const;
+ bool getVoteGranted() const;
+ const std::string& getReason() const;
+
+ void addToBSON(BSONObjBuilder* builder) const;
+ BSONObj toBSON() const;
+
+private:
+ bool _ok = false;
+ long long _term = -1;
+ bool _voteGranted = false;
+ std::string _reason;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_settings.cpp b/src/mongo/db/repl/repl_settings.cpp
index 3b22a3203eb..a385d89c55b 100644
--- a/src/mongo/db/repl/repl_settings.cpp
+++ b/src/mongo/db/repl/repl_settings.cpp
@@ -35,13 +35,12 @@
namespace mongo {
namespace repl {
- MONGO_EXPORT_STARTUP_SERVER_PARAMETER(maxSyncSourceLagSecs, int, 30);
- MONGO_INITIALIZER(maxSyncSourceLagSecsCheck) (InitializerContext*) {
- if (maxSyncSourceLagSecs < 1) {
- return Status(ErrorCodes::BadValue, "maxSyncSourceLagSecs must be > 0");
- }
- return Status::OK();
+MONGO_EXPORT_STARTUP_SERVER_PARAMETER(maxSyncSourceLagSecs, int, 30);
+MONGO_INITIALIZER(maxSyncSourceLagSecsCheck)(InitializerContext*) {
+ if (maxSyncSourceLagSecs < 1) {
+ return Status(ErrorCodes::BadValue, "maxSyncSourceLagSecs must be > 0");
}
-
+ return Status::OK();
+}
}
}
diff --git a/src/mongo/db/repl/repl_settings.h b/src/mongo/db/repl/repl_settings.h
index cec0b90040f..5c1e6032acc 100644
--- a/src/mongo/db/repl/repl_settings.h
+++ b/src/mongo/db/repl/repl_settings.h
@@ -38,90 +38,91 @@
namespace mongo {
namespace repl {
- extern int maxSyncSourceLagSecs;
-
- bool anyReplEnabled();
-
- /* replication slave? (possibly with slave)
- --slave cmd line setting -> SimpleSlave
- */
- typedef enum { NotSlave=0, SimpleSlave } SlaveTypes;
-
- class ReplSettings {
- public:
- SlaveTypes slave;
-
- /** true means we are master and doing replication. if we are not writing to oplog, this won't be true. */
- bool master;
-
- bool fastsync;
-
- bool autoresync;
-
- int slavedelay;
-
- long long oplogSize; // --oplogSize
-
- // for master/slave replication
- std::string source; // --source
- std::string only; // --only
- int pretouch; // --pretouch for replication application (experimental)
-
- std::string replSet; // --replSet[/<seedlist>]
- std::string ourSetName() const {
- std::string setname;
- size_t sl = replSet.find('/');
- if( sl == std::string::npos )
- return replSet;
- return replSet.substr(0, sl);
- }
- bool usingReplSets() const { return !replSet.empty(); }
-
- std::string rsIndexPrefetch;// --indexPrefetch
-
- ReplSettings()
- : slave(NotSlave),
- master(false),
- fastsync(),
- autoresync(false),
- slavedelay(),
- oplogSize(0),
- pretouch(0) {
- }
-
- // TODO(spencer): Remove explicit copy constructor after we no longer have mutable state
- // in ReplSettings.
- ReplSettings(const ReplSettings& other) :
- slave(other.slave),
- master(other.master),
- fastsync(other.fastsync),
- autoresync(other.autoresync),
- slavedelay(other.slavedelay),
- oplogSize(other.oplogSize),
- source(other.source),
- only(other.only),
- pretouch(other.pretouch),
- replSet(other.replSet),
- rsIndexPrefetch(other.rsIndexPrefetch) {}
-
- ReplSettings& operator=(const ReplSettings& other) {
- if (this == &other) return *this;
-
- slave = other.slave;
- master = other.master;
- fastsync = other.fastsync;
- autoresync = other.autoresync;
- slavedelay = other.slavedelay;
- oplogSize = other.oplogSize;
- source = other.source;
- only = other.only;
- pretouch = other.pretouch;
- replSet = other.replSet;
- rsIndexPrefetch = other.rsIndexPrefetch;
- return *this;
- }
+extern int maxSyncSourceLagSecs;
+
+bool anyReplEnabled();
- };
+/* replication slave? (possibly with slave)
+ --slave cmd line setting -> SimpleSlave
+*/
+typedef enum { NotSlave = 0, SimpleSlave } SlaveTypes;
+
+class ReplSettings {
+public:
+ SlaveTypes slave;
+
+ /** true means we are master and doing replication. if we are not writing to oplog, this won't be true. */
+ bool master;
+
+ bool fastsync;
+
+ bool autoresync;
+
+ int slavedelay;
+
+ long long oplogSize; // --oplogSize
+
+ // for master/slave replication
+ std::string source; // --source
+ std::string only; // --only
+ int pretouch; // --pretouch for replication application (experimental)
+
+ std::string replSet; // --replSet[/<seedlist>]
+ std::string ourSetName() const {
+ std::string setname;
+ size_t sl = replSet.find('/');
+ if (sl == std::string::npos)
+ return replSet;
+ return replSet.substr(0, sl);
+ }
+ bool usingReplSets() const {
+ return !replSet.empty();
+ }
+
+ std::string rsIndexPrefetch; // --indexPrefetch
+
+ ReplSettings()
+ : slave(NotSlave),
+ master(false),
+ fastsync(),
+ autoresync(false),
+ slavedelay(),
+ oplogSize(0),
+ pretouch(0) {}
+
+ // TODO(spencer): Remove explicit copy constructor after we no longer have mutable state
+ // in ReplSettings.
+ ReplSettings(const ReplSettings& other)
+ : slave(other.slave),
+ master(other.master),
+ fastsync(other.fastsync),
+ autoresync(other.autoresync),
+ slavedelay(other.slavedelay),
+ oplogSize(other.oplogSize),
+ source(other.source),
+ only(other.only),
+ pretouch(other.pretouch),
+ replSet(other.replSet),
+ rsIndexPrefetch(other.rsIndexPrefetch) {}
+
+ ReplSettings& operator=(const ReplSettings& other) {
+ if (this == &other)
+ return *this;
-} // namespace repl
-} // namespace mongo
+ slave = other.slave;
+ master = other.master;
+ fastsync = other.fastsync;
+ autoresync = other.autoresync;
+ slavedelay = other.slavedelay;
+ oplogSize = other.oplogSize;
+ source = other.source;
+ only = other.only;
+ pretouch = other.pretouch;
+ replSet = other.replSet;
+ rsIndexPrefetch = other.rsIndexPrefetch;
+ return *this;
+ }
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replica_set_config.cpp b/src/mongo/db/repl/replica_set_config.cpp
index ab901fe5886..eef500a2328 100644
--- a/src/mongo/db/repl/replica_set_config.cpp
+++ b/src/mongo/db/repl/replica_set_config.cpp
@@ -41,542 +41,521 @@ namespace mongo {
namespace repl {
#ifndef _MSC_VER
- const size_t ReplicaSetConfig::kMaxMembers;
- const size_t ReplicaSetConfig::kMaxVotingMembers;
+const size_t ReplicaSetConfig::kMaxMembers;
+const size_t ReplicaSetConfig::kMaxVotingMembers;
#endif
- const std::string ReplicaSetConfig::kVersionFieldName = "version";
- const std::string ReplicaSetConfig::kMajorityWriteConcernModeName = "$majority";
- const Seconds ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod(10);
+const std::string ReplicaSetConfig::kVersionFieldName = "version";
+const std::string ReplicaSetConfig::kMajorityWriteConcernModeName = "$majority";
+const Seconds ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod(10);
namespace {
- const std::string kIdFieldName = "_id";
- const std::string kMembersFieldName = "members";
- const std::string kSettingsFieldName = "settings";
- const std::string kStepDownCheckWriteConcernModeName = "$stepDownCheck";
- const std::string kProtocolVersionFieldName = "protocolVersion";
-
- const std::string kLegalConfigTopFieldNames[] = {
- kIdFieldName,
- ReplicaSetConfig::kVersionFieldName,
- kMembersFieldName,
- kSettingsFieldName,
- kProtocolVersionFieldName
- };
-
- const std::string kHeartbeatTimeoutFieldName = "heartbeatTimeoutSecs";
- const std::string kChainingAllowedFieldName = "chainingAllowed";
- const std::string kGetLastErrorDefaultsFieldName = "getLastErrorDefaults";
- const std::string kGetLastErrorModesFieldName = "getLastErrorModes";
+const std::string kIdFieldName = "_id";
+const std::string kMembersFieldName = "members";
+const std::string kSettingsFieldName = "settings";
+const std::string kStepDownCheckWriteConcernModeName = "$stepDownCheck";
+const std::string kProtocolVersionFieldName = "protocolVersion";
-} // namespace
-
- ReplicaSetConfig::ReplicaSetConfig() : _isInitialized(false),
- _heartbeatTimeoutPeriod(0),
- _protocolVersion(0) {}
-
- Status ReplicaSetConfig::initialize(const BSONObj& cfg) {
- _isInitialized = false;
- _members.clear();
- Status status = bsonCheckOnlyHasFields(
- "replica set configuration", cfg, kLegalConfigTopFieldNames);
- if (!status.isOK())
- return status;
-
- //
- // Parse replSetName
- //
- status = bsonExtractStringField(cfg, kIdFieldName, &_replSetName);
- if (!status.isOK())
- return status;
+const std::string kLegalConfigTopFieldNames[] = {kIdFieldName,
+ ReplicaSetConfig::kVersionFieldName,
+ kMembersFieldName,
+ kSettingsFieldName,
+ kProtocolVersionFieldName};
- //
- // Parse version
- //
- status = bsonExtractIntegerField(cfg, kVersionFieldName, &_version);
- if (!status.isOK())
- return status;
+const std::string kHeartbeatTimeoutFieldName = "heartbeatTimeoutSecs";
+const std::string kChainingAllowedFieldName = "chainingAllowed";
+const std::string kGetLastErrorDefaultsFieldName = "getLastErrorDefaults";
+const std::string kGetLastErrorModesFieldName = "getLastErrorModes";
- //
- // Parse members
- //
- BSONElement membersElement;
- status = bsonExtractTypedField(cfg, kMembersFieldName, Array, &membersElement);
- if (!status.isOK())
- return status;
-
- for (BSONObj::iterator membersIterator(membersElement.Obj()); membersIterator.more();) {
- BSONElement memberElement = membersIterator.next();
- if (memberElement.type() != Object) {
- return Status(ErrorCodes::TypeMismatch, str::stream() <<
- "Expected type of " << kMembersFieldName << "." <<
- memberElement.fieldName() << " to be Object, but found " <<
- typeName(memberElement.type()));
- }
- _members.resize(_members.size() + 1);
- status = _members.back().initialize(memberElement.Obj(), &_tagConfig);
- if (!status.isOK())
- return status;
- }
+} // namespace
- //
- // Parse settings
- //
- BSONElement settingsElement;
- status = bsonExtractTypedField(cfg, kSettingsFieldName, Object, &settingsElement);
- BSONObj settings;
- if (status.isOK()) {
- settings = settingsElement.Obj();
- }
- else if (status != ErrorCodes::NoSuchKey) {
- return status;
- }
- status = _parseSettingsSubdocument(settings);
+ReplicaSetConfig::ReplicaSetConfig()
+ : _isInitialized(false), _heartbeatTimeoutPeriod(0), _protocolVersion(0) {}
+
+Status ReplicaSetConfig::initialize(const BSONObj& cfg) {
+ _isInitialized = false;
+ _members.clear();
+ Status status =
+ bsonCheckOnlyHasFields("replica set configuration", cfg, kLegalConfigTopFieldNames);
+ if (!status.isOK())
+ return status;
+
+ //
+ // Parse replSetName
+ //
+ status = bsonExtractStringField(cfg, kIdFieldName, &_replSetName);
+ if (!status.isOK())
+ return status;
+
+ //
+ // Parse version
+ //
+ status = bsonExtractIntegerField(cfg, kVersionFieldName, &_version);
+ if (!status.isOK())
+ return status;
+
+ //
+ // Parse members
+ //
+ BSONElement membersElement;
+ status = bsonExtractTypedField(cfg, kMembersFieldName, Array, &membersElement);
+ if (!status.isOK())
+ return status;
+
+ for (BSONObj::iterator membersIterator(membersElement.Obj()); membersIterator.more();) {
+ BSONElement memberElement = membersIterator.next();
+ if (memberElement.type() != Object) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected type of " << kMembersFieldName << "."
+ << memberElement.fieldName() << " to be Object, but found "
+ << typeName(memberElement.type()));
+ }
+ _members.resize(_members.size() + 1);
+ status = _members.back().initialize(memberElement.Obj(), &_tagConfig);
if (!status.isOK())
return status;
+ }
- //
- // Parse protocol version
- //
- BSONElement protocolVersionElement;
- status = bsonExtractIntegerField(cfg, kProtocolVersionFieldName, &_protocolVersion);
- if (!status.isOK() && status != ErrorCodes::NoSuchKey) {
- return status;
- }
-
- _calculateMajorities();
- _addInternalWriteConcernModes();
- _isInitialized = true;
- return Status::OK();
+ //
+ // Parse settings
+ //
+ BSONElement settingsElement;
+ status = bsonExtractTypedField(cfg, kSettingsFieldName, Object, &settingsElement);
+ BSONObj settings;
+ if (status.isOK()) {
+ settings = settingsElement.Obj();
+ } else if (status != ErrorCodes::NoSuchKey) {
+ return status;
+ }
+ status = _parseSettingsSubdocument(settings);
+ if (!status.isOK())
+ return status;
+
+ //
+ // Parse protocol version
+ //
+ BSONElement protocolVersionElement;
+ status = bsonExtractIntegerField(cfg, kProtocolVersionFieldName, &_protocolVersion);
+ if (!status.isOK() && status != ErrorCodes::NoSuchKey) {
+ return status;
}
- Status ReplicaSetConfig::_parseSettingsSubdocument(const BSONObj& settings) {
- //
- // Parse heartbeatTimeoutSecs
- //
- BSONElement hbTimeoutSecsElement = settings[kHeartbeatTimeoutFieldName];
- if (hbTimeoutSecsElement.eoo()) {
- _heartbeatTimeoutPeriod = Seconds(kDefaultHeartbeatTimeoutPeriod);
- }
- else if (hbTimeoutSecsElement.isNumber()) {
- _heartbeatTimeoutPeriod = Seconds(hbTimeoutSecsElement.numberInt());
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected type of " <<
- kSettingsFieldName << "." << kHeartbeatTimeoutFieldName <<
- " to be a number, but found a value of type " <<
- typeName(hbTimeoutSecsElement.type()));
- }
+ _calculateMajorities();
+ _addInternalWriteConcernModes();
+ _isInitialized = true;
+ return Status::OK();
+}
+
+Status ReplicaSetConfig::_parseSettingsSubdocument(const BSONObj& settings) {
+ //
+ // Parse heartbeatTimeoutSecs
+ //
+ BSONElement hbTimeoutSecsElement = settings[kHeartbeatTimeoutFieldName];
+ if (hbTimeoutSecsElement.eoo()) {
+ _heartbeatTimeoutPeriod = Seconds(kDefaultHeartbeatTimeoutPeriod);
+ } else if (hbTimeoutSecsElement.isNumber()) {
+ _heartbeatTimeoutPeriod = Seconds(hbTimeoutSecsElement.numberInt());
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected type of " << kSettingsFieldName << "."
+ << kHeartbeatTimeoutFieldName
+ << " to be a number, but found a value of type "
+ << typeName(hbTimeoutSecsElement.type()));
+ }
- //
- // Parse chainingAllowed
- //
- Status status = bsonExtractBooleanFieldWithDefault(settings,
- kChainingAllowedFieldName,
- true,
- &_chainingAllowed);
+ //
+ // Parse chainingAllowed
+ //
+ Status status = bsonExtractBooleanFieldWithDefault(
+ settings, kChainingAllowedFieldName, true, &_chainingAllowed);
+ if (!status.isOK())
+ return status;
+
+ //
+ // Parse getLastErrorDefaults
+ //
+ BSONElement gleDefaultsElement;
+ status = bsonExtractTypedField(
+ settings, kGetLastErrorDefaultsFieldName, Object, &gleDefaultsElement);
+ if (status.isOK()) {
+ status = _defaultWriteConcern.parse(gleDefaultsElement.Obj());
if (!status.isOK())
return status;
+ } else if (status == ErrorCodes::NoSuchKey) {
+ // Default write concern is w: 1.
+ _defaultWriteConcern.reset();
+ _defaultWriteConcern.wNumNodes = 1;
+ } else {
+ return status;
+ }
- //
- // Parse getLastErrorDefaults
- //
- BSONElement gleDefaultsElement;
- status = bsonExtractTypedField(settings,
- kGetLastErrorDefaultsFieldName,
- Object,
- &gleDefaultsElement);
- if (status.isOK()) {
- status = _defaultWriteConcern.parse(gleDefaultsElement.Obj());
- if (!status.isOK())
- return status;
- }
- else if (status == ErrorCodes::NoSuchKey) {
- // Default write concern is w: 1.
- _defaultWriteConcern.reset();
- _defaultWriteConcern.wNumNodes = 1;
- }
- else {
- return status;
- }
+ //
+ // Parse getLastErrorModes
+ //
+ BSONElement gleModesElement;
+ status = bsonExtractTypedField(settings, kGetLastErrorModesFieldName, Object, &gleModesElement);
+ BSONObj gleModes;
+ if (status.isOK()) {
+ gleModes = gleModesElement.Obj();
+ } else if (status != ErrorCodes::NoSuchKey) {
+ return status;
+ }
- //
- // Parse getLastErrorModes
- //
- BSONElement gleModesElement;
- status = bsonExtractTypedField(settings,
- kGetLastErrorModesFieldName,
- Object,
- &gleModesElement);
- BSONObj gleModes;
- if (status.isOK()) {
- gleModes = gleModesElement.Obj();
- }
- else if (status != ErrorCodes::NoSuchKey) {
- return status;
+ for (BSONObj::iterator gleModeIter(gleModes); gleModeIter.more();) {
+ const BSONElement modeElement = gleModeIter.next();
+ if (_customWriteConcernModes.find(modeElement.fieldNameStringData()) !=
+ _customWriteConcernModes.end()) {
+ return Status(ErrorCodes::DuplicateKey,
+ str::stream() << kSettingsFieldName << '.' << kGetLastErrorModesFieldName
+ << " contains multiple fields named "
+ << modeElement.fieldName());
+ }
+ if (modeElement.type() != Object) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected " << kSettingsFieldName << '.'
+ << kGetLastErrorModesFieldName << '.'
+ << modeElement.fieldName() << " to be an Object, not "
+ << typeName(modeElement.type()));
}
-
- for (BSONObj::iterator gleModeIter(gleModes); gleModeIter.more();) {
- const BSONElement modeElement = gleModeIter.next();
- if (_customWriteConcernModes.find(modeElement.fieldNameStringData()) !=
- _customWriteConcernModes.end()) {
-
- return Status(ErrorCodes::DuplicateKey, str::stream() << kSettingsFieldName <<
- '.' << kGetLastErrorModesFieldName <<
- " contains multiple fields named " << modeElement.fieldName());
+ ReplicaSetTagPattern pattern = _tagConfig.makePattern();
+ for (BSONObj::iterator constraintIter(modeElement.Obj()); constraintIter.more();) {
+ const BSONElement constraintElement = constraintIter.next();
+ if (!constraintElement.isNumber()) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream()
+ << "Expected " << kSettingsFieldName << '.'
+ << kGetLastErrorModesFieldName << '.' << modeElement.fieldName()
+ << '.' << constraintElement.fieldName() << " to be a number, not "
+ << typeName(constraintElement.type()));
}
- if (modeElement.type() != Object) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected " <<
- kSettingsFieldName << '.' << kGetLastErrorModesFieldName << '.' <<
- modeElement.fieldName() << " to be an Object, not " <<
- typeName(modeElement.type()));
+ const int minCount = constraintElement.numberInt();
+ if (minCount <= 0) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "Value of " << kSettingsFieldName << '.'
+ << kGetLastErrorModesFieldName << '.'
+ << modeElement.fieldName() << '.'
+ << constraintElement.fieldName()
+ << " must be positive, but found " << minCount);
}
- ReplicaSetTagPattern pattern = _tagConfig.makePattern();
- for (BSONObj::iterator constraintIter(modeElement.Obj()); constraintIter.more();) {
- const BSONElement constraintElement = constraintIter.next();
- if (!constraintElement.isNumber()) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected " <<
- kSettingsFieldName << '.' << kGetLastErrorModesFieldName << '.' <<
- modeElement.fieldName() << '.' << constraintElement.fieldName() <<
- " to be a number, not " << typeName(constraintElement.type()));
- }
- const int minCount = constraintElement.numberInt();
- if (minCount <= 0) {
- return Status(ErrorCodes::BadValue, str::stream() << "Value of " <<
- kSettingsFieldName << '.' << kGetLastErrorModesFieldName << '.' <<
- modeElement.fieldName() << '.' << constraintElement.fieldName() <<
- " must be positive, but found " << minCount);
- }
- status = _tagConfig.addTagCountConstraintToPattern(
- &pattern,
- constraintElement.fieldNameStringData(),
- minCount);
- if (!status.isOK()) {
- return status;
- }
+ status = _tagConfig.addTagCountConstraintToPattern(
+ &pattern, constraintElement.fieldNameStringData(), minCount);
+ if (!status.isOK()) {
+ return status;
}
- _customWriteConcernModes[modeElement.fieldNameStringData()] = pattern;
}
- return Status::OK();
+ _customWriteConcernModes[modeElement.fieldNameStringData()] = pattern;
+ }
+ return Status::OK();
+}
+
+Status ReplicaSetConfig::validate() const {
+ if (_version <= 0 || _version > std::numeric_limits<int>::max()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kVersionFieldName << " field value of " << _version
+ << " is out of range");
+ }
+ if (_replSetName.empty()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "Replica set configuration must have non-empty "
+ << kIdFieldName << " field");
+ }
+ if (_heartbeatTimeoutPeriod < Seconds(0)) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kSettingsFieldName << '.' << kHeartbeatTimeoutFieldName
+ << " field value must be non-negative, "
+ "but found " << _heartbeatTimeoutPeriod.count());
+ }
+ if (_members.size() > kMaxMembers || _members.empty()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "Replica set configuration contains " << _members.size()
+ << " members, but must have at least 1 and no more than "
+ << kMaxMembers);
}
- Status ReplicaSetConfig::validate() const {
- if (_version <= 0 || _version > std::numeric_limits<int>::max()) {
- return Status(ErrorCodes::BadValue, str::stream() << kVersionFieldName <<
- " field value of " << _version << " is out of range");
- }
- if (_replSetName.empty()) {
- return Status(ErrorCodes::BadValue, str::stream() <<
- "Replica set configuration must have non-empty " << kIdFieldName <<
- " field");
+ size_t localhostCount = 0;
+ size_t voterCount = 0;
+ size_t arbiterCount = 0;
+ size_t electableCount = 0;
+ for (size_t i = 0; i < _members.size(); ++i) {
+ const MemberConfig& memberI = _members[i];
+ Status status = memberI.validate();
+ if (!status.isOK())
+ return status;
+ if (memberI.getHostAndPort().isLocalHost()) {
+ ++localhostCount;
}
- if (_heartbeatTimeoutPeriod < Seconds(0)) {
- return Status(ErrorCodes::BadValue, str::stream() << kSettingsFieldName << '.' <<
- kHeartbeatTimeoutFieldName << " field value must be non-negative, "
- "but found " << _heartbeatTimeoutPeriod.count());
+ if (memberI.isVoter()) {
+ ++voterCount;
}
- if (_members.size() > kMaxMembers || _members.empty()) {
- return Status(ErrorCodes::BadValue, str::stream() <<
- "Replica set configuration contains " << _members.size() <<
- " members, but must have at least 1 and no more than " << kMaxMembers);
+ // Nodes may be arbiters or electable, or neither, but never both.
+ if (memberI.isArbiter()) {
+ ++arbiterCount;
+ } else if (memberI.getPriority() > 0) {
+ ++electableCount;
}
-
- size_t localhostCount = 0;
- size_t voterCount = 0;
- size_t arbiterCount = 0;
- size_t electableCount = 0;
- for (size_t i = 0; i < _members.size(); ++i) {
- const MemberConfig& memberI = _members[i];
- Status status = memberI.validate();
- if (!status.isOK())
- return status;
- if (memberI.getHostAndPort().isLocalHost()) {
- ++localhostCount;
- }
- if (memberI.isVoter()) {
- ++voterCount;
- }
- // Nodes may be arbiters or electable, or neither, but never both.
- if (memberI.isArbiter()) {
- ++arbiterCount;
- }
- else if (memberI.getPriority() > 0) {
- ++electableCount;
+ for (size_t j = 0; j < _members.size(); ++j) {
+ if (i == j)
+ continue;
+ const MemberConfig& memberJ = _members[j];
+ if (memberI.getId() == memberJ.getId()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream()
+ << "Found two member configurations with same "
+ << MemberConfig::kIdFieldName << " field, " << kMembersFieldName
+ << "." << i << "." << MemberConfig::kIdFieldName
+ << " == " << kMembersFieldName << "." << j << "."
+ << MemberConfig::kIdFieldName << " == " << memberI.getId());
}
- for (size_t j = 0; j < _members.size(); ++j) {
- if (i == j)
- continue;
- const MemberConfig& memberJ = _members[j];
- if (memberI.getId() == memberJ.getId()) {
- return Status(
- ErrorCodes::BadValue, str::stream() <<
- "Found two member configurations with same " <<
- MemberConfig::kIdFieldName << " field, " <<
- kMembersFieldName << "." << i << "." << MemberConfig::kIdFieldName <<
- " == " <<
- kMembersFieldName << "." << j << "." << MemberConfig::kIdFieldName <<
- " == " << memberI.getId());
- }
- if (memberI.getHostAndPort() == memberJ.getHostAndPort()) {
- return Status(
- ErrorCodes::BadValue, str::stream() <<
- "Found two member configurations with same " <<
- MemberConfig::kHostFieldName << " field, " <<
- kMembersFieldName << "." << i << "." << MemberConfig::kHostFieldName <<
- " == " <<
- kMembersFieldName << "." << j << "." << MemberConfig::kHostFieldName <<
- " == " << memberI.getHostAndPort().toString());
- }
+ if (memberI.getHostAndPort() == memberJ.getHostAndPort()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "Found two member configurations with same "
+ << MemberConfig::kHostFieldName << " field, "
+ << kMembersFieldName << "." << i << "."
+ << MemberConfig::kHostFieldName
+ << " == " << kMembersFieldName << "." << j << "."
+ << MemberConfig::kHostFieldName
+ << " == " << memberI.getHostAndPort().toString());
}
}
+ }
- if (localhostCount != 0 && localhostCount != _members.size()) {
- return Status(ErrorCodes::BadValue, str::stream() <<
- "Either all host names in a replica set configuration must be localhost "
- "references, or none must be; found " << localhostCount << " out of " <<
- _members.size());
- }
+ if (localhostCount != 0 && localhostCount != _members.size()) {
+ return Status(
+ ErrorCodes::BadValue,
+ str::stream()
+ << "Either all host names in a replica set configuration must be localhost "
+ "references, or none must be; found " << localhostCount << " out of "
+ << _members.size());
+ }
- if (voterCount > kMaxVotingMembers || voterCount == 0) {
- return Status(ErrorCodes::BadValue, str::stream() <<
- "Replica set configuration contains " << voterCount <<
- " voting members, but must be at least 1 and no more than " <<
- kMaxVotingMembers);
- }
+ if (voterCount > kMaxVotingMembers || voterCount == 0) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "Replica set configuration contains " << voterCount
+ << " voting members, but must be at least 1 and no more than "
+ << kMaxVotingMembers);
+ }
- if (electableCount == 0) {
- return Status(ErrorCodes::BadValue, "Replica set configuration must contain at least "
- "one non-arbiter member with priority > 0");
- }
+ if (electableCount == 0) {
+ return Status(ErrorCodes::BadValue,
+ "Replica set configuration must contain at least "
+ "one non-arbiter member with priority > 0");
+ }
- // TODO(schwerin): Validate satisfiability of write modes? Omitting for backwards
- // compatibility.
- if (_defaultWriteConcern.wMode.empty()) {
- if (_defaultWriteConcern.wNumNodes == 0) {
- return Status(ErrorCodes::BadValue,
- "Default write concern mode must wait for at least 1 member");
- }
+ // TODO(schwerin): Validate satisfiability of write modes? Omitting for backwards
+ // compatibility.
+ if (_defaultWriteConcern.wMode.empty()) {
+ if (_defaultWriteConcern.wNumNodes == 0) {
+ return Status(ErrorCodes::BadValue,
+ "Default write concern mode must wait for at least 1 member");
}
- else {
- if (WriteConcernOptions::kMajority != _defaultWriteConcern.wMode &&
- !findCustomWriteMode(_defaultWriteConcern.wMode).isOK()) {
- return Status(ErrorCodes::BadValue, str::stream() <<
- "Default write concern requires undefined write mode " <<
- _defaultWriteConcern.wMode);
- }
- }
-
- if (_protocolVersion < 0 || _protocolVersion > std::numeric_limits<int>::max()) {
- return Status(ErrorCodes::BadValue, str::stream() << kProtocolVersionFieldName <<
- " field value of " << _protocolVersion << " is out of range");
+ } else {
+ if (WriteConcernOptions::kMajority != _defaultWriteConcern.wMode &&
+ !findCustomWriteMode(_defaultWriteConcern.wMode).isOK()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "Default write concern requires undefined write mode "
+ << _defaultWriteConcern.wMode);
}
-
- return Status::OK();
}
- Status ReplicaSetConfig::checkIfWriteConcernCanBeSatisfied(
- const WriteConcernOptions& writeConcern) const {
- if (!writeConcern.wMode.empty() && writeConcern.wMode != WriteConcernOptions::kMajority) {
- StatusWith<ReplicaSetTagPattern> tagPatternStatus =
- findCustomWriteMode(writeConcern.wMode);
- if (!tagPatternStatus.isOK()) {
- return tagPatternStatus.getStatus();
- }
+ if (_protocolVersion < 0 || _protocolVersion > std::numeric_limits<int>::max()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kProtocolVersionFieldName << " field value of "
+ << _protocolVersion << " is out of range");
+ }
- ReplicaSetTagMatch matcher(tagPatternStatus.getValue());
- for (size_t j = 0; j < _members.size(); ++j) {
- const MemberConfig& memberConfig = _members[j];
- for (MemberConfig::TagIterator it = memberConfig.tagsBegin();
- it != memberConfig.tagsEnd(); ++it) {
- if (matcher.update(*it)) {
- return Status::OK();
- }
+ return Status::OK();
+}
+
+Status ReplicaSetConfig::checkIfWriteConcernCanBeSatisfied(
+ const WriteConcernOptions& writeConcern) const {
+ if (!writeConcern.wMode.empty() && writeConcern.wMode != WriteConcernOptions::kMajority) {
+ StatusWith<ReplicaSetTagPattern> tagPatternStatus = findCustomWriteMode(writeConcern.wMode);
+ if (!tagPatternStatus.isOK()) {
+ return tagPatternStatus.getStatus();
+ }
+
+ ReplicaSetTagMatch matcher(tagPatternStatus.getValue());
+ for (size_t j = 0; j < _members.size(); ++j) {
+ const MemberConfig& memberConfig = _members[j];
+ for (MemberConfig::TagIterator it = memberConfig.tagsBegin();
+ it != memberConfig.tagsEnd();
+ ++it) {
+ if (matcher.update(*it)) {
+ return Status::OK();
}
}
- // Even if all the nodes in the set had a given write it still would not satisfy this
- // write concern mode.
- return Status(ErrorCodes::CannotSatisfyWriteConcern,
- str::stream() << "Not enough nodes match write concern mode \""
- << writeConcern.wMode << "\"");
}
- else {
- int nodesRemaining = writeConcern.wNumNodes;
- for (size_t j = 0; j < _members.size(); ++j) {
- if (!_members[j].isArbiter()) { // Only count data-bearing nodes
- --nodesRemaining;
- if (nodesRemaining <= 0) {
- return Status::OK();
- }
+ // Even if all the nodes in the set had a given write it still would not satisfy this
+ // write concern mode.
+ return Status(ErrorCodes::CannotSatisfyWriteConcern,
+ str::stream() << "Not enough nodes match write concern mode \""
+ << writeConcern.wMode << "\"");
+ } else {
+ int nodesRemaining = writeConcern.wNumNodes;
+ for (size_t j = 0; j < _members.size(); ++j) {
+ if (!_members[j].isArbiter()) { // Only count data-bearing nodes
+ --nodesRemaining;
+ if (nodesRemaining <= 0) {
+ return Status::OK();
}
}
- return Status(ErrorCodes::CannotSatisfyWriteConcern, "Not enough data-bearing nodes");
}
+ return Status(ErrorCodes::CannotSatisfyWriteConcern, "Not enough data-bearing nodes");
}
+}
- const MemberConfig& ReplicaSetConfig::getMemberAt(size_t i) const {
- invariant(i < _members.size());
- return _members[i];
- }
+const MemberConfig& ReplicaSetConfig::getMemberAt(size_t i) const {
+ invariant(i < _members.size());
+ return _members[i];
+}
- const MemberConfig* ReplicaSetConfig::findMemberByID(int id) const {
- for (std::vector<MemberConfig>::const_iterator it = _members.begin();
- it != _members.end(); ++it) {
- if (it->getId() == id) {
- return &(*it);
- }
+const MemberConfig* ReplicaSetConfig::findMemberByID(int id) const {
+ for (std::vector<MemberConfig>::const_iterator it = _members.begin(); it != _members.end();
+ ++it) {
+ if (it->getId() == id) {
+ return &(*it);
}
- return NULL;
}
+ return NULL;
+}
- const int ReplicaSetConfig::findMemberIndexByHostAndPort(const HostAndPort& hap) const {
- int x = 0;
- for (std::vector<MemberConfig>::const_iterator it = _members.begin();
- it != _members.end(); ++it) {
-
- if (it->getHostAndPort() == hap) {
- return x;
- }
- ++x;
+const int ReplicaSetConfig::findMemberIndexByHostAndPort(const HostAndPort& hap) const {
+ int x = 0;
+ for (std::vector<MemberConfig>::const_iterator it = _members.begin(); it != _members.end();
+ ++it) {
+ if (it->getHostAndPort() == hap) {
+ return x;
}
- return -1;
+ ++x;
}
+ return -1;
+}
- const int ReplicaSetConfig::findMemberIndexByConfigId(long long configId) const {
- int x = 0;
- for (const auto& member : _members) {
-
- if (member.getId() == configId) {
- return x;
- }
- ++x;
+const int ReplicaSetConfig::findMemberIndexByConfigId(long long configId) const {
+ int x = 0;
+ for (const auto& member : _members) {
+ if (member.getId() == configId) {
+ return x;
}
- return -1;
+ ++x;
}
-
- const MemberConfig* ReplicaSetConfig::findMemberByHostAndPort(const HostAndPort& hap) const {
- int idx = findMemberIndexByHostAndPort(hap);
- return idx != -1 ? &getMemberAt(idx) : NULL;
+ return -1;
+}
+
+const MemberConfig* ReplicaSetConfig::findMemberByHostAndPort(const HostAndPort& hap) const {
+ int idx = findMemberIndexByHostAndPort(hap);
+ return idx != -1 ? &getMemberAt(idx) : NULL;
+}
+
+ReplicaSetTag ReplicaSetConfig::findTag(StringData key, StringData value) const {
+ return _tagConfig.findTag(key, value);
+}
+
+StatusWith<ReplicaSetTagPattern> ReplicaSetConfig::findCustomWriteMode(
+ StringData patternName) const {
+ const StringMap<ReplicaSetTagPattern>::const_iterator iter =
+ _customWriteConcernModes.find(patternName);
+ if (iter == _customWriteConcernModes.end()) {
+ return StatusWith<ReplicaSetTagPattern>(
+ ErrorCodes::UnknownReplWriteConcern,
+ str::stream() << "No write concern mode named '" << escape(patternName.toString())
+ << "' found in replica set configuration");
}
-
- ReplicaSetTag ReplicaSetConfig::findTag(StringData key, StringData value) const {
- return _tagConfig.findTag(key, value);
+ return StatusWith<ReplicaSetTagPattern>(iter->second);
+}
+
+void ReplicaSetConfig::_calculateMajorities() {
+ const int voters = std::count_if(_members.begin(),
+ _members.end(),
+ stdx::bind(&MemberConfig::isVoter, stdx::placeholders::_1));
+ const int arbiters =
+ std::count_if(_members.begin(),
+ _members.end(),
+ stdx::bind(&MemberConfig::isArbiter, stdx::placeholders::_1));
+ _totalVotingMembers = voters;
+ _majorityVoteCount = voters / 2 + 1;
+ _writeMajority = std::min(_majorityVoteCount, voters - arbiters);
+}
+
+void ReplicaSetConfig::_addInternalWriteConcernModes() {
+ // $majority: the majority of voting nodes or all non-arbiter voting nodes if
+ // the majority of voting nodes are arbiters.
+ ReplicaSetTagPattern pattern = _tagConfig.makePattern();
+
+ Status status = _tagConfig.addTagCountConstraintToPattern(
+ &pattern, MemberConfig::kInternalVoterTagName, _writeMajority);
+
+ if (status.isOK()) {
+ _customWriteConcernModes[kMajorityWriteConcernModeName] = pattern;
+ } else if (status != ErrorCodes::NoSuchKey) {
+ // NoSuchKey means we have no $voter-tagged nodes in this config;
+ // other errors are unexpected.
+ fassert(28693, status);
}
- StatusWith<ReplicaSetTagPattern> ReplicaSetConfig::findCustomWriteMode(
- StringData patternName) const {
-
- const StringMap<ReplicaSetTagPattern>::const_iterator iter = _customWriteConcernModes.find(
- patternName);
- if (iter == _customWriteConcernModes.end()) {
- return StatusWith<ReplicaSetTagPattern>(
- ErrorCodes::UnknownReplWriteConcern,
- str::stream() <<
- "No write concern mode named '" << escape(patternName.toString()) <<
- "' found in replica set configuration");
- }
- return StatusWith<ReplicaSetTagPattern>(iter->second);
+ // $stepDownCheck: one electable node plus ourselves
+ pattern = _tagConfig.makePattern();
+ status = _tagConfig.addTagCountConstraintToPattern(
+ &pattern, MemberConfig::kInternalElectableTagName, 2);
+ if (status.isOK()) {
+ _customWriteConcernModes[kStepDownCheckWriteConcernModeName] = pattern;
+ } else if (status != ErrorCodes::NoSuchKey) {
+ // NoSuchKey means we have no $electable-tagged nodes in this config;
+ // other errors are unexpected
+ fassert(28694, status);
}
+}
- void ReplicaSetConfig::_calculateMajorities() {
- const int voters = std::count_if(
- _members.begin(),
- _members.end(),
- stdx::bind(&MemberConfig::isVoter, stdx::placeholders::_1));
- const int arbiters = std::count_if(
- _members.begin(),
- _members.end(),
- stdx::bind(&MemberConfig::isArbiter, stdx::placeholders::_1));
- _totalVotingMembers = voters;
- _majorityVoteCount = voters / 2 + 1;
- _writeMajority = std::min(_majorityVoteCount, voters - arbiters);
- }
-
- void ReplicaSetConfig::_addInternalWriteConcernModes() {
- // $majority: the majority of voting nodes or all non-arbiter voting nodes if
- // the majority of voting nodes are arbiters.
- ReplicaSetTagPattern pattern = _tagConfig.makePattern();
-
- Status status = _tagConfig.addTagCountConstraintToPattern(
- &pattern,
- MemberConfig::kInternalVoterTagName,
- _writeMajority);
-
- if (status.isOK()) {
- _customWriteConcernModes[kMajorityWriteConcernModeName] = pattern;
- }
- else if (status != ErrorCodes::NoSuchKey) {
- // NoSuchKey means we have no $voter-tagged nodes in this config;
- // other errors are unexpected.
- fassert(28693, status);
- }
+BSONObj ReplicaSetConfig::toBSON() const {
+ BSONObjBuilder configBuilder;
+ configBuilder.append(kIdFieldName, _replSetName);
+ configBuilder.appendIntOrLL(kVersionFieldName, _version);
- // $stepDownCheck: one electable node plus ourselves
- pattern = _tagConfig.makePattern();
- status = _tagConfig.addTagCountConstraintToPattern(&pattern,
- MemberConfig::kInternalElectableTagName,
- 2);
- if (status.isOK()) {
- _customWriteConcernModes[kStepDownCheckWriteConcernModeName] = pattern;
- }
- else if (status != ErrorCodes::NoSuchKey) {
- // NoSuchKey means we have no $electable-tagged nodes in this config;
- // other errors are unexpected
- fassert(28694, status);
- }
+ BSONArrayBuilder members(configBuilder.subarrayStart(kMembersFieldName));
+ for (MemberIterator mem = membersBegin(); mem != membersEnd(); mem++) {
+ members.append(mem->toBSON(getTagConfig()));
}
-
- BSONObj ReplicaSetConfig::toBSON() const {
- BSONObjBuilder configBuilder;
- configBuilder.append(kIdFieldName, _replSetName);
- configBuilder.appendIntOrLL(kVersionFieldName, _version);
-
- BSONArrayBuilder members(configBuilder.subarrayStart(kMembersFieldName));
- for (MemberIterator mem = membersBegin(); mem != membersEnd(); mem++) {
- members.append(mem->toBSON(getTagConfig()));
- }
- members.done();
-
- BSONObjBuilder settingsBuilder(configBuilder.subobjStart(kSettingsFieldName));
- settingsBuilder.append(kChainingAllowedFieldName, _chainingAllowed);
- settingsBuilder.appendIntOrLL(kHeartbeatTimeoutFieldName, _heartbeatTimeoutPeriod.count());
-
- BSONObjBuilder gleModes(settingsBuilder.subobjStart(kGetLastErrorModesFieldName));
- for (StringMap<ReplicaSetTagPattern>::const_iterator mode =
- _customWriteConcernModes.begin();
- mode != _customWriteConcernModes.end();
- ++mode) {
- if (mode->first[0] == '$') {
- // Filter out internal modes
- continue;
- }
- BSONObjBuilder modeBuilder(gleModes.subobjStart(mode->first));
- for (ReplicaSetTagPattern::ConstraintIterator itr = mode->second.constraintsBegin();
- itr != mode->second.constraintsEnd();
- itr++) {
- modeBuilder.append(_tagConfig.getTagKey(ReplicaSetTag(itr->getKeyIndex(), 0)),
- itr->getMinCount());
- }
- modeBuilder.done();
- }
- gleModes.done();
-
- settingsBuilder.append(kGetLastErrorDefaultsFieldName, _defaultWriteConcern.toBSON());
- settingsBuilder.append(kProtocolVersionFieldName, _protocolVersion);
- settingsBuilder.done();
- return configBuilder.obj();
+ members.done();
+
+ BSONObjBuilder settingsBuilder(configBuilder.subobjStart(kSettingsFieldName));
+ settingsBuilder.append(kChainingAllowedFieldName, _chainingAllowed);
+ settingsBuilder.appendIntOrLL(kHeartbeatTimeoutFieldName, _heartbeatTimeoutPeriod.count());
+
+ BSONObjBuilder gleModes(settingsBuilder.subobjStart(kGetLastErrorModesFieldName));
+ for (StringMap<ReplicaSetTagPattern>::const_iterator mode = _customWriteConcernModes.begin();
+ mode != _customWriteConcernModes.end();
+ ++mode) {
+ if (mode->first[0] == '$') {
+ // Filter out internal modes
+ continue;
+ }
+ BSONObjBuilder modeBuilder(gleModes.subobjStart(mode->first));
+ for (ReplicaSetTagPattern::ConstraintIterator itr = mode->second.constraintsBegin();
+ itr != mode->second.constraintsEnd();
+ itr++) {
+ modeBuilder.append(_tagConfig.getTagKey(ReplicaSetTag(itr->getKeyIndex(), 0)),
+ itr->getMinCount());
+ }
+ modeBuilder.done();
}
-
- std::vector<std::string> ReplicaSetConfig::getWriteConcernNames() const {
- std::vector<std::string> names;
- for (StringMap<ReplicaSetTagPattern>::const_iterator mode =
- _customWriteConcernModes.begin();
- mode != _customWriteConcernModes.end();
- ++mode) {
- names.push_back(mode->first);
- }
- return names;
- }
+ gleModes.done();
+
+ settingsBuilder.append(kGetLastErrorDefaultsFieldName, _defaultWriteConcern.toBSON());
+ settingsBuilder.append(kProtocolVersionFieldName, _protocolVersion);
+ settingsBuilder.done();
+ return configBuilder.obj();
+}
+
+std::vector<std::string> ReplicaSetConfig::getWriteConcernNames() const {
+ std::vector<std::string> names;
+ for (StringMap<ReplicaSetTagPattern>::const_iterator mode = _customWriteConcernModes.begin();
+ mode != _customWriteConcernModes.end();
+ ++mode) {
+ names.push_back(mode->first);
+ }
+ return names;
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replica_set_config.h b/src/mongo/db/repl/replica_set_config.h
index fcd880705ea..2db58856234 100644
--- a/src/mongo/db/repl/replica_set_config.h
+++ b/src/mongo/db/repl/replica_set_config.h
@@ -41,219 +41,249 @@
namespace mongo {
- class BSONObj;
+class BSONObj;
namespace repl {
+/**
+ * Representation of the configuration information about a particular replica set.
+ */
+class ReplicaSetConfig {
+public:
+ typedef std::vector<MemberConfig>::const_iterator MemberIterator;
+
+ static const std::string kVersionFieldName;
+ static const std::string kMajorityWriteConcernModeName;
+
+ static const size_t kMaxMembers = 50;
+ static const size_t kMaxVotingMembers = 7;
+ static const Seconds kDefaultHeartbeatTimeoutPeriod;
+
+ ReplicaSetConfig();
+ std::string asBson() {
+ return "";
+ }
+ /**
+ * Initializes this ReplicaSetConfig from the contents of "cfg".
+ */
+ Status initialize(const BSONObj& cfg);
+
+ /**
+ * Returns true if this object has been successfully initialized or copied from
+ * an initialized object.
+ */
+ bool isInitialized() const {
+ return _isInitialized;
+ }
+
+ /**
+ * Performs basic consistency checks on the replica set configuration.
+ */
+ Status validate() const;
+
+ /**
+ * Checks if this configuration can satisfy the given write concern.
+ *
+ * Things that are taken into consideration include:
+ * 1. If the set has enough data-bearing members.
+ * 2. If the write concern mode exists.
+ * 3. If there are enough members for the write concern mode specified.
+ */
+ Status checkIfWriteConcernCanBeSatisfied(const WriteConcernOptions& writeConcern) const;
+
+ /**
+ * Gets the version of this configuration.
+ *
+ * The version number sequences configurations of the replica set, so that
+ * nodes may distinguish between "older" and "newer" configurations.
+ */
+ long long getConfigVersion() const {
+ return _version;
+ }
+
+ /**
+ * Gets the name (_id field value) of the replica set described by this configuration.
+ */
+ const std::string& getReplSetName() const {
+ return _replSetName;
+ }
+
+ /**
+ * Gets the number of members in this configuration.
+ */
+ int getNumMembers() const {
+ return _members.size();
+ }
+
+ /**
+ * Gets a begin iterator over the MemberConfigs stored in this ReplicaSetConfig.
+ */
+ MemberIterator membersBegin() const {
+ return _members.begin();
+ }
+
+ /**
+ * Gets an end iterator over the MemberConfigs stored in this ReplicaSetConfig.
+ */
+ MemberIterator membersEnd() const {
+ return _members.end();
+ }
+
+ /**
+ * Access a MemberConfig element by index.
+ */
+ const MemberConfig& getMemberAt(size_t i) const;
+
+ /**
+ * Returns a pointer to the MemberConfig corresponding to the member with the given _id in
+ * the config, or NULL if there is no member with that ID.
+ */
+ const MemberConfig* findMemberByID(int id) const;
+
+ /**
+ * Returns a pointer to the MemberConfig corresponding to the member with the given
+ * HostAndPort in the config, or NULL if there is no member with that address.
+ */
+ const MemberConfig* findMemberByHostAndPort(const HostAndPort& hap) const;
+
+ /**
+ * Returns a MemberConfig index position corresponding to the member with the given
+ * HostAndPort in the config, or -1 if there is no member with that address.
+ */
+ const int findMemberIndexByHostAndPort(const HostAndPort& hap) const;
+
+ /**
+ * Returns a MemberConfig index position corresponding to the member with the given
+ * _id in the config, or -1 if there is no member with that address.
+ */
+ const int findMemberIndexByConfigId(long long configId) const;
+
+ /**
+ * Gets the default write concern for the replica set described by this configuration.
+ */
+ const WriteConcernOptions& getDefaultWriteConcern() const {
+ return _defaultWriteConcern;
+ }
+
+ /**
+ * Gets the amount of time to wait for a response to hearbeats sent to other
+ * nodes in the replica set.
+ */
+ Seconds getHeartbeatTimeoutPeriod() const {
+ return _heartbeatTimeoutPeriod;
+ }
+
+ /**
+ * Gets the amount of time to wait for a response to hearbeats sent to other
+ * nodes in the replica set, as above, but returns a Milliseconds instead of
+ * Seconds object.
+ */
+ Milliseconds getHeartbeatTimeoutPeriodMillis() const {
+ return _heartbeatTimeoutPeriod;
+ }
+
+ /**
+ * Gets the number of votes required to win an election.
+ */
+ int getMajorityVoteCount() const {
+ return _majorityVoteCount;
+ }
+
+ /**
+ * Gets the number of voters.
+ */
+ int getTotalVotingMembers() const {
+ return _totalVotingMembers;
+ }
+
+ /**
+ * Returns true if automatic (not explicitly set) chaining is allowed.
+ */
+ bool isChainingAllowed() const {
+ return _chainingAllowed;
+ }
+
+ /**
+ * Returns a ReplicaSetTag with the given "key" and "value", or an invalid
+ * tag if the configuration describes no such tag.
+ */
+ ReplicaSetTag findTag(StringData key, StringData value) const;
+
+ /**
+ * Returns the pattern corresponding to "patternName" in this configuration.
+ * If "patternName" is not a valid pattern in this configuration, returns
+ * ErrorCodes::NoSuchKey.
+ */
+ StatusWith<ReplicaSetTagPattern> findCustomWriteMode(StringData patternName) const;
+
+ /**
+ * Returns the "tags configuration" for this replicaset.
+ *
+ * NOTE(schwerin): Not clear if this should be used other than for reporting/debugging.
+ */
+ const ReplicaSetTagConfig& getTagConfig() const {
+ return _tagConfig;
+ }
+
+ /**
+ * Returns the config as a BSONObj.
+ */
+ BSONObj toBSON() const;
+
+ /**
+ * Returns a vector of strings which are the names of the WriteConcernModes.
+ * Currently used in unit tests to compare two configs.
+ */
+ std::vector<std::string> getWriteConcernNames() const;
+
+ /**
+ * Returns the number of voting data-bearing members that must acknowledge a write
+ * in order to satisfy a write concern of {w: "majority"}.
+ */
+ int getWriteMajority() const {
+ return _writeMajority;
+ }
+
+ /**
+ * Gets the protocol version for this configuration.
+ *
+ * The protocol version number currently determines what election protocol is used by the
+ * cluster; 0 is the default and indicates the old 3.0 election protocol.
+ */
+ long long getProtocolVersion() const {
+ return _protocolVersion;
+ }
+
+private:
+ /**
+ * Parses the "settings" subdocument of a replica set configuration.
+ */
+ Status _parseSettingsSubdocument(const BSONObj& settings);
+
+ /**
+ * Calculates and stores the majority for electing a primary (_majorityVoteCount).
+ */
+ void _calculateMajorities();
+
/**
- * Representation of the configuration information about a particular replica set.
+ * Adds internal write concern modes to the getLastErrorModes list.
*/
- class ReplicaSetConfig {
- public:
- typedef std::vector<MemberConfig>::const_iterator MemberIterator;
-
- static const std::string kVersionFieldName;
- static const std::string kMajorityWriteConcernModeName;
-
- static const size_t kMaxMembers = 50;
- static const size_t kMaxVotingMembers = 7;
- static const Seconds kDefaultHeartbeatTimeoutPeriod;
-
- ReplicaSetConfig();
- std::string asBson() { return ""; }
- /**
- * Initializes this ReplicaSetConfig from the contents of "cfg".
- */
- Status initialize(const BSONObj& cfg);
-
- /**
- * Returns true if this object has been successfully initialized or copied from
- * an initialized object.
- */
- bool isInitialized() const { return _isInitialized; }
-
- /**
- * Performs basic consistency checks on the replica set configuration.
- */
- Status validate() const;
-
- /**
- * Checks if this configuration can satisfy the given write concern.
- *
- * Things that are taken into consideration include:
- * 1. If the set has enough data-bearing members.
- * 2. If the write concern mode exists.
- * 3. If there are enough members for the write concern mode specified.
- */
- Status checkIfWriteConcernCanBeSatisfied(const WriteConcernOptions& writeConcern) const;
-
- /**
- * Gets the version of this configuration.
- *
- * The version number sequences configurations of the replica set, so that
- * nodes may distinguish between "older" and "newer" configurations.
- */
- long long getConfigVersion() const { return _version; }
-
- /**
- * Gets the name (_id field value) of the replica set described by this configuration.
- */
- const std::string& getReplSetName() const { return _replSetName; }
-
- /**
- * Gets the number of members in this configuration.
- */
- int getNumMembers() const { return _members.size(); }
-
- /**
- * Gets a begin iterator over the MemberConfigs stored in this ReplicaSetConfig.
- */
- MemberIterator membersBegin() const { return _members.begin(); }
-
- /**
- * Gets an end iterator over the MemberConfigs stored in this ReplicaSetConfig.
- */
- MemberIterator membersEnd() const { return _members.end(); }
-
- /**
- * Access a MemberConfig element by index.
- */
- const MemberConfig& getMemberAt(size_t i) const;
-
- /**
- * Returns a pointer to the MemberConfig corresponding to the member with the given _id in
- * the config, or NULL if there is no member with that ID.
- */
- const MemberConfig* findMemberByID(int id) const;
-
- /**
- * Returns a pointer to the MemberConfig corresponding to the member with the given
- * HostAndPort in the config, or NULL if there is no member with that address.
- */
- const MemberConfig* findMemberByHostAndPort(const HostAndPort& hap) const;
-
- /**
- * Returns a MemberConfig index position corresponding to the member with the given
- * HostAndPort in the config, or -1 if there is no member with that address.
- */
- const int findMemberIndexByHostAndPort(const HostAndPort& hap) const;
-
- /**
- * Returns a MemberConfig index position corresponding to the member with the given
- * _id in the config, or -1 if there is no member with that address.
- */
- const int findMemberIndexByConfigId(long long configId) const;
-
- /**
- * Gets the default write concern for the replica set described by this configuration.
- */
- const WriteConcernOptions& getDefaultWriteConcern() const { return _defaultWriteConcern; }
-
- /**
- * Gets the amount of time to wait for a response to hearbeats sent to other
- * nodes in the replica set.
- */
- Seconds getHeartbeatTimeoutPeriod() const { return _heartbeatTimeoutPeriod; }
-
- /**
- * Gets the amount of time to wait for a response to hearbeats sent to other
- * nodes in the replica set, as above, but returns a Milliseconds instead of
- * Seconds object.
- */
- Milliseconds getHeartbeatTimeoutPeriodMillis() const {
- return _heartbeatTimeoutPeriod;
- }
-
- /**
- * Gets the number of votes required to win an election.
- */
- int getMajorityVoteCount() const { return _majorityVoteCount; }
-
- /**
- * Gets the number of voters.
- */
- int getTotalVotingMembers() const { return _totalVotingMembers; }
-
- /**
- * Returns true if automatic (not explicitly set) chaining is allowed.
- */
- bool isChainingAllowed() const { return _chainingAllowed; }
-
- /**
- * Returns a ReplicaSetTag with the given "key" and "value", or an invalid
- * tag if the configuration describes no such tag.
- */
- ReplicaSetTag findTag(StringData key, StringData value) const;
-
- /**
- * Returns the pattern corresponding to "patternName" in this configuration.
- * If "patternName" is not a valid pattern in this configuration, returns
- * ErrorCodes::NoSuchKey.
- */
- StatusWith<ReplicaSetTagPattern> findCustomWriteMode(StringData patternName) const;
-
- /**
- * Returns the "tags configuration" for this replicaset.
- *
- * NOTE(schwerin): Not clear if this should be used other than for reporting/debugging.
- */
- const ReplicaSetTagConfig& getTagConfig() const { return _tagConfig; }
-
- /**
- * Returns the config as a BSONObj.
- */
- BSONObj toBSON() const;
-
- /**
- * Returns a vector of strings which are the names of the WriteConcernModes.
- * Currently used in unit tests to compare two configs.
- */
- std::vector<std::string> getWriteConcernNames() const;
-
- /**
- * Returns the number of voting data-bearing members that must acknowledge a write
- * in order to satisfy a write concern of {w: "majority"}.
- */
- int getWriteMajority() const { return _writeMajority; }
-
- /**
- * Gets the protocol version for this configuration.
- *
- * The protocol version number currently determines what election protocol is used by the
- * cluster; 0 is the default and indicates the old 3.0 election protocol.
- */
- long long getProtocolVersion() const { return _protocolVersion; }
-
- private:
- /**
- * Parses the "settings" subdocument of a replica set configuration.
- */
- Status _parseSettingsSubdocument(const BSONObj& settings);
-
- /**
- * Calculates and stores the majority for electing a primary (_majorityVoteCount).
- */
- void _calculateMajorities();
-
- /**
- * Adds internal write concern modes to the getLastErrorModes list.
- */
- void _addInternalWriteConcernModes();
-
- bool _isInitialized;
- long long _version;
- std::string _replSetName;
- std::vector<MemberConfig> _members;
- WriteConcernOptions _defaultWriteConcern;
- Seconds _heartbeatTimeoutPeriod;
- bool _chainingAllowed;
- int _majorityVoteCount;
- int _writeMajority;
- int _totalVotingMembers;
- ReplicaSetTagConfig _tagConfig;
- StringMap<ReplicaSetTagPattern> _customWriteConcernModes;
- long long _protocolVersion;
- };
+ void _addInternalWriteConcernModes();
+
+ bool _isInitialized;
+ long long _version;
+ std::string _replSetName;
+ std::vector<MemberConfig> _members;
+ WriteConcernOptions _defaultWriteConcern;
+ Seconds _heartbeatTimeoutPeriod;
+ bool _chainingAllowed;
+ int _majorityVoteCount;
+ int _writeMajority;
+ int _totalVotingMembers;
+ ReplicaSetTagConfig _tagConfig;
+ StringMap<ReplicaSetTagPattern> _customWriteConcernModes;
+ long long _protocolVersion;
+};
} // namespace repl
diff --git a/src/mongo/db/repl/replica_set_config_checks.cpp b/src/mongo/db/repl/replica_set_config_checks.cpp
index 7b97d3679a3..6b972063c6a 100644
--- a/src/mongo/db/repl/replica_set_config_checks.cpp
+++ b/src/mongo/db/repl/replica_set_config_checks.cpp
@@ -40,247 +40,234 @@ namespace mongo {
namespace repl {
namespace {
- /**
- * Finds the index of the one member configuration in "newConfig" that corresponds
- * to the current node (as identified by "externalState").
- *
- * Returns an error if the current node does not appear or appears multiple times in
- * "newConfig".
- */
- StatusWith<int> findSelfInConfig(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& newConfig) {
-
- std::vector<ReplicaSetConfig::MemberIterator> meConfigs;
- for (ReplicaSetConfig::MemberIterator iter = newConfig.membersBegin();
- iter != newConfig.membersEnd();
- ++iter) {
- if (externalState->isSelf(iter->getHostAndPort())) {
- meConfigs.push_back(iter);
- }
- }
- if (meConfigs.empty()) {
- return StatusWith<int>(ErrorCodes::NodeNotFound, str::stream() <<
- "No host described in new configuration " <<
- newConfig.getConfigVersion() << " for replica set " <<
- newConfig.getReplSetName() << " maps to this node");
- }
- if (meConfigs.size() > 1) {
- str::stream message;
- message << "The hosts " << meConfigs.front()->getHostAndPort().toString();
- for (size_t i = 1; i < meConfigs.size() - 1; ++i) {
- message << ", " << meConfigs[i]->getHostAndPort().toString();
- }
- message << " and " << meConfigs.back()->getHostAndPort().toString() <<
- " all map to this node in new configuration version " <<
- newConfig.getConfigVersion() << " for replica set " << newConfig.getReplSetName();
- return StatusWith<int>(ErrorCodes::DuplicateKey, message);
+/**
+ * Finds the index of the one member configuration in "newConfig" that corresponds
+ * to the current node (as identified by "externalState").
+ *
+ * Returns an error if the current node does not appear or appears multiple times in
+ * "newConfig".
+ */
+StatusWith<int> findSelfInConfig(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& newConfig) {
+ std::vector<ReplicaSetConfig::MemberIterator> meConfigs;
+ for (ReplicaSetConfig::MemberIterator iter = newConfig.membersBegin();
+ iter != newConfig.membersEnd();
+ ++iter) {
+ if (externalState->isSelf(iter->getHostAndPort())) {
+ meConfigs.push_back(iter);
}
-
- int myIndex = std::distance(newConfig.membersBegin(), meConfigs.front());
- invariant(myIndex >= 0 && myIndex < newConfig.getNumMembers());
- return StatusWith<int>(myIndex);
}
-
- /**
- * Checks if the node with the given config index is electable, returning a useful
- * status message if not.
- */
- Status checkElectable(const ReplicaSetConfig& newConfig, int configIndex) {
- const MemberConfig& myConfig = newConfig.getMemberAt(configIndex);
- if (!myConfig.isElectable()) {
- return Status(
- ErrorCodes::NodeNotElectable, str::stream() <<
- "This node, " << myConfig.getHostAndPort().toString() << ", with _id " <<
- myConfig.getId() << " is not electable under the new configuration version " <<
- newConfig.getConfigVersion() << " for replica set " <<
- newConfig.getReplSetName());
- }
- return Status::OK();
+ if (meConfigs.empty()) {
+ return StatusWith<int>(ErrorCodes::NodeNotFound,
+ str::stream() << "No host described in new configuration "
+ << newConfig.getConfigVersion() << " for replica set "
+ << newConfig.getReplSetName() << " maps to this node");
}
-
- /**
- * Like findSelfInConfig, above, but also returns an error if the member configuration
- * for this node is not electable, as this is a requirement for nodes accepting
- * reconfig or initiate commands.
- */
- StatusWith<int> findSelfInConfigIfElectable(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& newConfig) {
- StatusWith<int> result = findSelfInConfig(externalState, newConfig);
- if (result.isOK()) {
- Status status = checkElectable(newConfig, result.getValue());
- if (!status.isOK()) {
- return StatusWith<int>(status);
- }
+ if (meConfigs.size() > 1) {
+ str::stream message;
+ message << "The hosts " << meConfigs.front()->getHostAndPort().toString();
+ for (size_t i = 1; i < meConfigs.size() - 1; ++i) {
+ message << ", " << meConfigs[i]->getHostAndPort().toString();
}
- return result;
+ message << " and " << meConfigs.back()->getHostAndPort().toString()
+ << " all map to this node in new configuration version "
+ << newConfig.getConfigVersion() << " for replica set "
+ << newConfig.getReplSetName();
+ return StatusWith<int>(ErrorCodes::DuplicateKey, message);
}
- /**
- * Compares two initialized and validated replica set configurations, and checks to
- * see if "newConfig" is a legal successor configuration to "oldConfig".
- *
- * Returns Status::OK() if "newConfig" may replace "oldConfig", or an indicative error
- * otherwise.
- *
- * The checks performed by this test are necessary, but may not be sufficient for
- * ensuring that "newConfig" is a legal successor to "oldConfig". For example,
- * a legal reconfiguration must typically be executed on a node that is currently
- * primary under "oldConfig" and is electable under "newConfig". Such checks that
- * require knowledge of which node is executing the configuration are out of scope
- * for this function.
- */
- Status validateOldAndNewConfigsCompatible(
- const ReplicaSetConfig& oldConfig,
- const ReplicaSetConfig& newConfig) {
- invariant(newConfig.isInitialized());
- invariant(oldConfig.isInitialized());
-
- if (oldConfig.getConfigVersion() >= newConfig.getConfigVersion()) {
- return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream() <<
- "New replica set configuration version must be greater than old, but " <<
- newConfig.getConfigVersion() << " is not greater than " <<
- oldConfig.getConfigVersion() << " for replica set " <<
- newConfig.getReplSetName());
- }
+ int myIndex = std::distance(newConfig.membersBegin(), meConfigs.front());
+ invariant(myIndex >= 0 && myIndex < newConfig.getNumMembers());
+ return StatusWith<int>(myIndex);
+}
- if (oldConfig.getReplSetName() != newConfig.getReplSetName()) {
- return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream() <<
- "New and old configurations differ in replica set name; "
- "old was " << oldConfig.getReplSetName() << ", and new is " <<
- newConfig.getReplSetName());
- }
-
- //
- // For every member config mNew in newConfig, if there exists member config mOld
- // in oldConfig such that mNew.getHostAndPort() == mOld.getHostAndPort(), it is required
- // that mNew.getId() == mOld.getId().
- //
- // Also, one may not use reconfig to change the value of the buildIndexes or
- // arbiterOnly flags.
- //
- for (ReplicaSetConfig::MemberIterator mNew = newConfig.membersBegin();
- mNew != newConfig.membersEnd();
- ++mNew) {
- for (ReplicaSetConfig::MemberIterator mOld = oldConfig.membersBegin();
- mOld != oldConfig.membersEnd();
- ++mOld) {
-
- const bool idsEqual = mOld->getId() == mNew->getId();
- const bool hostsEqual = mOld->getHostAndPort() == mNew->getHostAndPort();
- if (!idsEqual && !hostsEqual) {
- continue;
- }
- if (hostsEqual && !idsEqual) {
- return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream() <<
- "New and old configurations both have members with " <<
- MemberConfig::kHostFieldName << " of " <<
- mOld->getHostAndPort().toString() <<
- " but in the new configuration the " <<
- MemberConfig::kIdFieldName << " field is " <<
- mNew->getId() << " and in the old configuration it is " <<
- mOld->getId() <<
- " for replica set " << newConfig.getReplSetName());
- }
- // At this point, the _id and host fields are equal, so we're looking at the old and
- // new configurations for the same member node.
- const bool buildIndexesFlagsEqual =
- mOld->shouldBuildIndexes() == mNew->shouldBuildIndexes();
- if (!buildIndexesFlagsEqual) {
- return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream() <<
- "New and old configurations differ in the setting of the "
- "buildIndexes field for member " <<
- mOld->getHostAndPort().toString() <<
- "; to make this change, remove then re-add the member");
- }
- const bool arbiterFlagsEqual = mOld->isArbiter() == mNew->isArbiter();
- if (!arbiterFlagsEqual) {
- return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream() <<
- "New and old configurations differ in the setting of the "
- "arbiterOnly field for member " <<
- mOld->getHostAndPort().toString() <<
- "; to make this change, remove then re-add the member");
-
- }
- }
- }
- return Status::OK();
+/**
+ * Checks if the node with the given config index is electable, returning a useful
+ * status message if not.
+ */
+Status checkElectable(const ReplicaSetConfig& newConfig, int configIndex) {
+ const MemberConfig& myConfig = newConfig.getMemberAt(configIndex);
+ if (!myConfig.isElectable()) {
+ return Status(ErrorCodes::NodeNotElectable,
+ str::stream() << "This node, " << myConfig.getHostAndPort().toString()
+ << ", with _id " << myConfig.getId()
+ << " is not electable under the new configuration version "
+ << newConfig.getConfigVersion() << " for replica set "
+ << newConfig.getReplSetName());
}
-} // namespace
+ return Status::OK();
+}
- StatusWith<int> validateConfigForStartUp(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& oldConfig,
- const ReplicaSetConfig& newConfig) {
- Status status = newConfig.validate();
+/**
+ * Like findSelfInConfig, above, but also returns an error if the member configuration
+ * for this node is not electable, as this is a requirement for nodes accepting
+ * reconfig or initiate commands.
+ */
+StatusWith<int> findSelfInConfigIfElectable(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& newConfig) {
+ StatusWith<int> result = findSelfInConfig(externalState, newConfig);
+ if (result.isOK()) {
+ Status status = checkElectable(newConfig, result.getValue());
if (!status.isOK()) {
return StatusWith<int>(status);
}
- if (oldConfig.isInitialized()) {
- status = validateOldAndNewConfigsCompatible(oldConfig, newConfig);
- if (!status.isOK()) {
- return StatusWith<int>(status);
- }
- }
- return findSelfInConfig(externalState, newConfig);
}
+ return result;
+}
- StatusWith<int> validateConfigForInitiate(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& newConfig) {
- Status status = newConfig.validate();
- if (!status.isOK()) {
- return StatusWith<int>(status);
- }
- if (newConfig.getConfigVersion() != 1) {
- return StatusWith<int>(
- ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream() << "Configuration used to initiate a replica set must " <<
- " have version 1, but found " << newConfig.getConfigVersion());
- }
- return findSelfInConfigIfElectable(externalState, newConfig);
+/**
+ * Compares two initialized and validated replica set configurations, and checks to
+ * see if "newConfig" is a legal successor configuration to "oldConfig".
+ *
+ * Returns Status::OK() if "newConfig" may replace "oldConfig", or an indicative error
+ * otherwise.
+ *
+ * The checks performed by this test are necessary, but may not be sufficient for
+ * ensuring that "newConfig" is a legal successor to "oldConfig". For example,
+ * a legal reconfiguration must typically be executed on a node that is currently
+ * primary under "oldConfig" and is electable under "newConfig". Such checks that
+ * require knowledge of which node is executing the configuration are out of scope
+ * for this function.
+ */
+Status validateOldAndNewConfigsCompatible(const ReplicaSetConfig& oldConfig,
+ const ReplicaSetConfig& newConfig) {
+ invariant(newConfig.isInitialized());
+ invariant(oldConfig.isInitialized());
+
+ if (oldConfig.getConfigVersion() >= newConfig.getConfigVersion()) {
+ return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream()
+ << "New replica set configuration version must be greater than old, but "
+ << newConfig.getConfigVersion() << " is not greater than "
+ << oldConfig.getConfigVersion() << " for replica set "
+ << newConfig.getReplSetName());
}
- StatusWith<int> validateConfigForReconfig(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& oldConfig,
- const ReplicaSetConfig& newConfig,
- bool force) {
+ if (oldConfig.getReplSetName() != newConfig.getReplSetName()) {
+ return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream() << "New and old configurations differ in replica set name; "
+ "old was " << oldConfig.getReplSetName() << ", and new is "
+ << newConfig.getReplSetName());
+ }
- Status status = newConfig.validate();
- if (!status.isOK()) {
- return StatusWith<int>(status);
+ //
+ // For every member config mNew in newConfig, if there exists member config mOld
+ // in oldConfig such that mNew.getHostAndPort() == mOld.getHostAndPort(), it is required
+ // that mNew.getId() == mOld.getId().
+ //
+ // Also, one may not use reconfig to change the value of the buildIndexes or
+ // arbiterOnly flags.
+ //
+ for (ReplicaSetConfig::MemberIterator mNew = newConfig.membersBegin();
+ mNew != newConfig.membersEnd();
+ ++mNew) {
+ for (ReplicaSetConfig::MemberIterator mOld = oldConfig.membersBegin();
+ mOld != oldConfig.membersEnd();
+ ++mOld) {
+ const bool idsEqual = mOld->getId() == mNew->getId();
+ const bool hostsEqual = mOld->getHostAndPort() == mNew->getHostAndPort();
+ if (!idsEqual && !hostsEqual) {
+ continue;
+ }
+ if (hostsEqual && !idsEqual) {
+ return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream()
+ << "New and old configurations both have members with "
+ << MemberConfig::kHostFieldName << " of "
+ << mOld->getHostAndPort().toString()
+ << " but in the new configuration the "
+ << MemberConfig::kIdFieldName << " field is " << mNew->getId()
+ << " and in the old configuration it is " << mOld->getId()
+ << " for replica set " << newConfig.getReplSetName());
+ }
+ // At this point, the _id and host fields are equal, so we're looking at the old and
+ // new configurations for the same member node.
+ const bool buildIndexesFlagsEqual =
+ mOld->shouldBuildIndexes() == mNew->shouldBuildIndexes();
+ if (!buildIndexesFlagsEqual) {
+ return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream()
+ << "New and old configurations differ in the setting of the "
+ "buildIndexes field for member "
+ << mOld->getHostAndPort().toString()
+ << "; to make this change, remove then re-add the member");
+ }
+ const bool arbiterFlagsEqual = mOld->isArbiter() == mNew->isArbiter();
+ if (!arbiterFlagsEqual) {
+ return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream()
+ << "New and old configurations differ in the setting of the "
+ "arbiterOnly field for member "
+ << mOld->getHostAndPort().toString()
+ << "; to make this change, remove then re-add the member");
+ }
}
+ }
+ return Status::OK();
+}
+} // namespace
+StatusWith<int> validateConfigForStartUp(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& oldConfig,
+ const ReplicaSetConfig& newConfig) {
+ Status status = newConfig.validate();
+ if (!status.isOK()) {
+ return StatusWith<int>(status);
+ }
+ if (oldConfig.isInitialized()) {
status = validateOldAndNewConfigsCompatible(oldConfig, newConfig);
if (!status.isOK()) {
return StatusWith<int>(status);
}
+ }
+ return findSelfInConfig(externalState, newConfig);
+}
- if (force) {
- return findSelfInConfig(externalState, newConfig);
- }
-
- return findSelfInConfigIfElectable(externalState, newConfig);
+StatusWith<int> validateConfigForInitiate(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& newConfig) {
+ Status status = newConfig.validate();
+ if (!status.isOK()) {
+ return StatusWith<int>(status);
+ }
+ if (newConfig.getConfigVersion() != 1) {
+ return StatusWith<int>(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream() << "Configuration used to initiate a replica set must "
+ << " have version 1, but found "
+ << newConfig.getConfigVersion());
}
+ return findSelfInConfigIfElectable(externalState, newConfig);
+}
- StatusWith<int> validateConfigForHeartbeatReconfig(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& newConfig) {
+StatusWith<int> validateConfigForReconfig(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& oldConfig,
+ const ReplicaSetConfig& newConfig,
+ bool force) {
+ Status status = newConfig.validate();
+ if (!status.isOK()) {
+ return StatusWith<int>(status);
+ }
- Status status = newConfig.validate();
- if (!status.isOK()) {
- return StatusWith<int>(status);
- }
+ status = validateOldAndNewConfigsCompatible(oldConfig, newConfig);
+ if (!status.isOK()) {
+ return StatusWith<int>(status);
+ }
+ if (force) {
return findSelfInConfig(externalState, newConfig);
}
+ return findSelfInConfigIfElectable(externalState, newConfig);
+}
+
+StatusWith<int> validateConfigForHeartbeatReconfig(
+ ReplicationCoordinatorExternalState* externalState, const ReplicaSetConfig& newConfig) {
+ Status status = newConfig.validate();
+ if (!status.isOK()) {
+ return StatusWith<int>(status);
+ }
+
+ return findSelfInConfig(externalState, newConfig);
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replica_set_config_checks.h b/src/mongo/db/repl/replica_set_config_checks.h
index ba7ad90f3fc..adeb4758093 100644
--- a/src/mongo/db/repl/replica_set_config_checks.h
+++ b/src/mongo/db/repl/replica_set_config_checks.h
@@ -33,61 +33,57 @@
namespace mongo {
namespace repl {
- class ReplicationCoordinatorExternalState;
- class ReplicaSetConfig;
+class ReplicationCoordinatorExternalState;
+class ReplicaSetConfig;
- /**
- * Validates that "newConfig" is a legal configuration that the current
- * node can accept from its local storage during startup.
- *
- * Returns the index of the current node's member configuration in "newConfig",
- * on success, and an indicative error on failure.
- *
- * If "oldConfig" is valid, this method only succeds if "newConfig" is a legal
- * successor configuration.
- */
- StatusWith<int> validateConfigForStartUp(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& oldConfig,
- const ReplicaSetConfig& newConfig);
+/**
+ * Validates that "newConfig" is a legal configuration that the current
+ * node can accept from its local storage during startup.
+ *
+ * Returns the index of the current node's member configuration in "newConfig",
+ * on success, and an indicative error on failure.
+ *
+ * If "oldConfig" is valid, this method only succeds if "newConfig" is a legal
+ * successor configuration.
+ */
+StatusWith<int> validateConfigForStartUp(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& oldConfig,
+ const ReplicaSetConfig& newConfig);
- /**
- * Validates that "newConfig" is a legal initial configuration that can be
- * initiated by the current node (identified via "externalState").
- *
- * Returns the index of the current node's member configuration in "newConfig",
- * on success, and an indicative error on failure.
- */
- StatusWith<int> validateConfigForInitiate(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& newConfig);
+/**
+ * Validates that "newConfig" is a legal initial configuration that can be
+ * initiated by the current node (identified via "externalState").
+ *
+ * Returns the index of the current node's member configuration in "newConfig",
+ * on success, and an indicative error on failure.
+ */
+StatusWith<int> validateConfigForInitiate(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& newConfig);
- /**
- * Validates that "newConfig" is a legal successor configuration to "oldConfig" that can be
- * initiated by the current node (identified via "externalState").
- *
- * If "force" is set to true, then compatibility with the old configuration and electability of
- * the current node in "newConfig" are not considered when determining if the reconfig is valid.
- *
- * Returns the index of the current node's member configuration in "newConfig",
- * on success, and an indicative error on failure.
- */
- StatusWith<int> validateConfigForReconfig(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& oldConfig,
- const ReplicaSetConfig& newConfig,
- bool force);
+/**
+ * Validates that "newConfig" is a legal successor configuration to "oldConfig" that can be
+ * initiated by the current node (identified via "externalState").
+ *
+ * If "force" is set to true, then compatibility with the old configuration and electability of
+ * the current node in "newConfig" are not considered when determining if the reconfig is valid.
+ *
+ * Returns the index of the current node's member configuration in "newConfig",
+ * on success, and an indicative error on failure.
+ */
+StatusWith<int> validateConfigForReconfig(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& oldConfig,
+ const ReplicaSetConfig& newConfig,
+ bool force);
- /**
- * Validates that "newConfig" is an acceptable configuration when received in a heartbeat
- * reasponse.
- *
- * If the new configuration omits the current node, but is otherwise valid, returns
- * ErrorCodes::NodeNotFound. If the configuration is wholly valid, returns Status::OK().
- * Otherwise, returns some other error status.
- */
- StatusWith<int> validateConfigForHeartbeatReconfig(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& newConfig);
+/**
+ * Validates that "newConfig" is an acceptable configuration when received in a heartbeat
+ * reasponse.
+ *
+ * If the new configuration omits the current node, but is otherwise valid, returns
+ * ErrorCodes::NodeNotFound. If the configuration is wholly valid, returns Status::OK().
+ * Otherwise, returns some other error status.
+ */
+StatusWith<int> validateConfigForHeartbeatReconfig(
+ ReplicationCoordinatorExternalState* externalState, const ReplicaSetConfig& newConfig);
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replica_set_config_checks_test.cpp b/src/mongo/db/repl/replica_set_config_checks_test.cpp
index efb39f5e0fa..d495421689d 100644
--- a/src/mongo/db/repl/replica_set_config_checks_test.cpp
+++ b/src/mongo/db/repl/replica_set_config_checks_test.cpp
@@ -40,660 +40,679 @@ namespace mongo {
namespace repl {
namespace {
- TEST(ValidateConfigForInitiate, VersionMustBe1) {
- ReplicationCoordinatorExternalStateMock rses;
- rses.addSelf(HostAndPort("h1"));
-
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1")))));
- ASSERT_EQUALS(
- ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForInitiate(&rses, config).getStatus());
- }
-
- TEST(ValidateConfigForInitiate, MustFindSelf) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
- ReplicationCoordinatorExternalStateMock notPresentExternalState;
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ReplicationCoordinatorExternalStateMock presentTwiceExternalState;
- presentTwiceExternalState.addSelf(HostAndPort("h3"));
- presentTwiceExternalState.addSelf(HostAndPort("h1"));
-
- ASSERT_EQUALS(ErrorCodes::NodeNotFound,
- validateConfigForInitiate(&notPresentExternalState, config).getStatus());
- ASSERT_EQUALS(ErrorCodes::DuplicateKey,
- validateConfigForInitiate(&presentTwiceExternalState, config).getStatus());
- ASSERT_EQUALS(1, unittest::assertGet(validateConfigForInitiate(&presentOnceExternalState,
- config)));
- }
-
- TEST(ValidateConfigForInitiate, SelfMustBeElectable) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "priority" << 0) <<
- BSON("_id" << 3 << "host" << "h3")))));
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
-
- ASSERT_EQUALS(ErrorCodes::NodeNotElectable,
- validateConfigForInitiate(&presentOnceExternalState, config).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, NewConfigVersionNumberMustBeHigherThanOld) {
- ReplicationCoordinatorExternalStateMock externalState;
- externalState.addSelf(HostAndPort("h1"));
-
- ReplicaSetConfig oldConfig;
- ReplicaSetConfig newConfig;
-
- // Two configurations, identical except for version.
- ASSERT_OK(oldConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(newConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(oldConfig.validate());
- ASSERT_OK(newConfig.validate());
-
- // Can reconfig from old to new.
- ASSERT_OK(validateConfigForReconfig(&externalState,
- oldConfig,
- newConfig,
- false).getStatus());
-
-
- // Cannot reconfig from old to old (versions must be different).
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- oldConfig,
- false).getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- oldConfig,
- true).getStatus());
-
- // Cannot reconfig from new to old (versions must increase).
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- newConfig,
- oldConfig,
- false).getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- newConfig,
- oldConfig,
- true).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, NewConfigMustNotChangeSetName) {
- ReplicationCoordinatorExternalStateMock externalState;
- externalState.addSelf(HostAndPort("h1"));
-
- ReplicaSetConfig oldConfig;
- ReplicaSetConfig newConfig;
-
- // Two configurations, compatible except for set name.
- ASSERT_OK(oldConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(newConfig.initialize(
- BSON("_id" << "rs1" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(oldConfig.validate());
- ASSERT_OK(newConfig.validate());
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- newConfig,
- false).getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- newConfig,
- oldConfig,
- true).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, NewConfigMustNotFlipBuildIndexesFlag) {
- ReplicationCoordinatorExternalStateMock externalState;
- externalState.addSelf(HostAndPort("h1"));
-
- ReplicaSetConfig oldConfig;
- ReplicaSetConfig newConfig;
- ReplicaSetConfig oldConfigRefresh;
-
- // Three configurations, two compatible except that h2 flips the buildIndex flag.
- // The third, compatible with the first.
- ASSERT_OK(oldConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "buildIndexes" << false <<
- "priority" << 0) <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(newConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "buildIndexes" << true <<
- "priority" << 0) <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(oldConfigRefresh.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "buildIndexes" << false <<
- "priority" << 0) <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(oldConfig.validate());
- ASSERT_OK(newConfig.validate());
- ASSERT_OK(oldConfigRefresh.validate());
- ASSERT_OK(validateConfigForReconfig(&externalState,
- oldConfig,
- oldConfigRefresh,
- false).getStatus());
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- newConfig,
- false).getStatus());
-
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- newConfig,
- true).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, NewConfigMustNotFlipArbiterFlag) {
- ReplicationCoordinatorExternalStateMock externalState;
- externalState.addSelf(HostAndPort("h1"));
-
- ReplicaSetConfig oldConfig;
- ReplicaSetConfig newConfig;
- ReplicaSetConfig oldConfigRefresh;
-
- // Three configurations, two compatible except that h2 flips the arbiterOnly flag.
- // The third, compatible with the first.
- ASSERT_OK(oldConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "arbiterOnly" << false) <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(newConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "arbiterOnly" << true) <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(oldConfigRefresh.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "arbiterOnly" << false) <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(oldConfig.validate());
- ASSERT_OK(newConfig.validate());
- ASSERT_OK(oldConfigRefresh.validate());
- ASSERT_OK(validateConfigForReconfig(&externalState,
- oldConfig,
- oldConfigRefresh,
- false).getStatus());
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- newConfig,
- false).getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- newConfig,
- true).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, HostAndIdRemappingRestricted) {
- // When reconfiguring a replica set, it is allowed to introduce (host, id) pairs
- // absent from the old config only when the hosts and ids were both individually
- // absent in the old config.
-
- ReplicationCoordinatorExternalStateMock externalState;
- externalState.addSelf(HostAndPort("h1"));
-
- ReplicaSetConfig oldConfig;
- ReplicaSetConfig legalNewConfigWithNewHostAndId;
- ReplicaSetConfig illegalNewConfigReusingHost;
- ReplicaSetConfig illegalNewConfigReusingId;
-
- ASSERT_OK(oldConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
- ASSERT_OK(oldConfig.validate());
-
- //
- // Here, the new config is valid because we've replaced (2, "h2") with
- // (4, "h4"), so neither the member _id or host name were reused.
- //
- ASSERT_OK(legalNewConfigWithNewHostAndId.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 4 << "host" << "h4") <<
- BSON("_id" << 3 << "host" << "h3")))));
- ASSERT_OK(legalNewConfigWithNewHostAndId.validate());
- ASSERT_OK(validateConfigForReconfig(&externalState,
- oldConfig,
- legalNewConfigWithNewHostAndId,
- false).getStatus());
-
- //
- // Here, the new config is invalid because we've reused host name "h2" with
- // new _id 4.
- //
- ASSERT_OK(illegalNewConfigReusingHost.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 4 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
- ASSERT_OK(illegalNewConfigReusingHost.validate());
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- illegalNewConfigReusingHost,
- false).getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- illegalNewConfigReusingHost,
- true).getStatus());
- //
- // Here, the new config is valid, because all we've changed is the name of
- // the host representing _id 2.
- //
- ASSERT_OK(illegalNewConfigReusingId.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h4") <<
- BSON("_id" << 3 << "host" << "h3")))));
- ASSERT_OK(illegalNewConfigReusingId.validate());
- ASSERT_OK(validateConfigForReconfig(&externalState,
- oldConfig,
- illegalNewConfigReusingId,
- false).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, MustFindSelf) {
- // Old and new config are same except for version change; this is just testing that we can
- // find ourself in the new config.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
- ReplicationCoordinatorExternalStateMock notPresentExternalState;
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ReplicationCoordinatorExternalStateMock presentThriceExternalState;
- presentThriceExternalState.addSelf(HostAndPort("h3"));
- presentThriceExternalState.addSelf(HostAndPort("h2"));
- presentThriceExternalState.addSelf(HostAndPort("h1"));
-
- ASSERT_EQUALS(ErrorCodes::NodeNotFound,
- validateConfigForReconfig(&notPresentExternalState,
- oldConfig,
- newConfig,
- false).getStatus());
- ASSERT_EQUALS(ErrorCodes::DuplicateKey,
- validateConfigForReconfig(&presentThriceExternalState,
- oldConfig,
- newConfig,
- false).getStatus());
- ASSERT_EQUALS(1, unittest::assertGet(validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- false)));
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NodeNotFound,
- validateConfigForReconfig(&notPresentExternalState,
- oldConfig,
- newConfig,
- true).getStatus());
- ASSERT_EQUALS(ErrorCodes::DuplicateKey,
- validateConfigForReconfig(&presentThriceExternalState,
- oldConfig,
- newConfig,
- true).getStatus());
- ASSERT_EQUALS(1, unittest::assertGet(validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- true)));
- }
-
- TEST(ValidateConfigForReconfig, SelfMustEndElectable) {
- // Old and new config are same except for version change and the electability of one node;
- // this is just testing that we must be electable in the new config.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "priority" << 0) <<
- BSON("_id" << 3 << "host" << "h3")))));
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
-
- ASSERT_EQUALS(ErrorCodes::NodeNotElectable,
- validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- false).getStatus());
- // Forced reconfig does not require electability.
- ASSERT_OK(validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- true).getStatus());
- }
-
- TEST(ValidateConfigForInitiate, NewConfigInvalid) {
- // The new config is not valid due to a duplicate _id value. This tests that if the new
- // config is invalid, validateConfigForInitiate will return a status indicating what is
- // wrong with the new config.
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 0 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::BadValue, validateConfigForInitiate(&presentOnceExternalState,
- newConfig).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, NewConfigInvalid) {
- // The new config is not valid due to a duplicate _id value. This tests that if the new
- // config is invalid, validateConfigForReconfig will return a status indicating what is
- // wrong with the new config.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2")))));
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 0 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::BadValue, validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- false).getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::BadValue, validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- true).getStatus());
- }
-
- TEST(ValidateConfigForStartUp, NewConfigInvalid) {
- // The new config is not valid due to a duplicate _id value. This tests that if the new
- // config is invalid, validateConfigForStartUp will return a status indicating what is wrong
- // with the new config.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2")))));
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 0 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::BadValue, validateConfigForStartUp(&presentOnceExternalState,
- oldConfig,
- newConfig).getStatus());
- }
-
- TEST(ValidateConfigForStartUp, OldAndNewConfigIncompatible) {
- // The new config is not compatible with the old config due to a member changing _ids. This
- // tests that validateConfigForStartUp will return a status indicating the incompatiblilty
- // between the old and new config.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 1 << "host" << "h3")))));
-
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 1 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForStartUp(&presentOnceExternalState,
- oldConfig,
- newConfig).getStatus());
- }
-
- TEST(ValidateConfigForStartUp, OldAndNewConfigCompatible) {
- // The new config is compatible with the old config. This tests that
- // validateConfigForStartUp will return a Status::OK() indicating the validity of this
- // config change.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 1 << "host" << "h3")))));
-
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2" <<
- "priority" << 3) <<
- BSON("_id" << 1 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_OK(validateConfigForStartUp(&presentOnceExternalState,
- oldConfig,
- newConfig).getStatus());
- }
-
- TEST(ValidateConfigForHeartbeatReconfig, NewConfigInvalid) {
- // The new config is not valid due to a duplicate _id value. This tests that if the new
- // config is invalid, validateConfigForHeartbeatReconfig will return a status indicating
- // what is wrong with the new config.
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 0 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::BadValue,
- validateConfigForHeartbeatReconfig(&presentOnceExternalState,
- newConfig).getStatus());
- }
-
- TEST(ValidateConfigForHeartbeatReconfig, NewConfigValid) {
- // The new config is valid. This tests that validateConfigForHeartbeatReconfig will return
- // a Status::OK() indicating the validity of this config change.
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 1 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_OK(validateConfigForHeartbeatReconfig(&presentOnceExternalState,
- newConfig).getStatus());
- }
-
- TEST(ValidateForReconfig, ForceStillNeedsValidConfig) {
- // The new config is invalid due to two nodes with the same _id value. This tests that
- // ValidateForReconfig fails with an invalid config, even if force is true.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 1 << "host" << "h3")))));
-
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 0 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::BadValue,
- validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- true).getStatus());
- }
-
- TEST(ValidateForReconfig, ForceStillNeedsSelfPresent) {
- // The new config does not contain self. This tests that ValidateForReconfig fails
- // if the member receiving it is absent from the config, even if force is true.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 1 << "host" << "h3")))));
-
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h3") <<
- BSON("_id" << 2 << "host" << "h4")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::NodeNotFound,
- validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- true).getStatus());
- }
+TEST(ValidateConfigForInitiate, VersionMustBe1) {
+ ReplicationCoordinatorExternalStateMock rses;
+ rses.addSelf(HostAndPort("h1"));
+
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")))));
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForInitiate(&rses, config).getStatus());
+}
+
+TEST(ValidateConfigForInitiate, MustFindSelf) {
+ ReplicaSetConfig config;
+ ASSERT_OK(
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ReplicationCoordinatorExternalStateMock notPresentExternalState;
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ReplicationCoordinatorExternalStateMock presentTwiceExternalState;
+ presentTwiceExternalState.addSelf(HostAndPort("h3"));
+ presentTwiceExternalState.addSelf(HostAndPort("h1"));
+
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound,
+ validateConfigForInitiate(&notPresentExternalState, config).getStatus());
+ ASSERT_EQUALS(ErrorCodes::DuplicateKey,
+ validateConfigForInitiate(&presentTwiceExternalState, config).getStatus());
+ ASSERT_EQUALS(
+ 1, unittest::assertGet(validateConfigForInitiate(&presentOnceExternalState, config)));
+}
+
+TEST(ValidateConfigForInitiate, SelfMustBeElectable) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "priority" << 0)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+
+ ASSERT_EQUALS(ErrorCodes::NodeNotElectable,
+ validateConfigForInitiate(&presentOnceExternalState, config).getStatus());
+}
+
+TEST(ValidateConfigForReconfig, NewConfigVersionNumberMustBeHigherThanOld) {
+ ReplicationCoordinatorExternalStateMock externalState;
+ externalState.addSelf(HostAndPort("h1"));
+
+ ReplicaSetConfig oldConfig;
+ ReplicaSetConfig newConfig;
+
+ // Two configurations, identical except for version.
+ ASSERT_OK(
+ oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(
+ newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(oldConfig.validate());
+ ASSERT_OK(newConfig.validate());
+
+ // Can reconfig from old to new.
+ ASSERT_OK(validateConfigForReconfig(&externalState, oldConfig, newConfig, false).getStatus());
+
+
+ // Cannot reconfig from old to old (versions must be different).
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, oldConfig, false).getStatus());
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, oldConfig, true).getStatus());
+
+ // Cannot reconfig from new to old (versions must increase).
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, newConfig, oldConfig, false).getStatus());
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, newConfig, oldConfig, true).getStatus());
+}
+
+TEST(ValidateConfigForReconfig, NewConfigMustNotChangeSetName) {
+ ReplicationCoordinatorExternalStateMock externalState;
+ externalState.addSelf(HostAndPort("h1"));
+
+ ReplicaSetConfig oldConfig;
+ ReplicaSetConfig newConfig;
+
+ // Two configurations, compatible except for set name.
+ ASSERT_OK(
+ oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(
+ newConfig.initialize(BSON("_id"
+ << "rs1"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(oldConfig.validate());
+ ASSERT_OK(newConfig.validate());
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, newConfig, false).getStatus());
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, newConfig, oldConfig, true).getStatus());
+}
+
+TEST(ValidateConfigForReconfig, NewConfigMustNotFlipBuildIndexesFlag) {
+ ReplicationCoordinatorExternalStateMock externalState;
+ externalState.addSelf(HostAndPort("h1"));
+
+ ReplicaSetConfig oldConfig;
+ ReplicaSetConfig newConfig;
+ ReplicaSetConfig oldConfigRefresh;
+
+ // Three configurations, two compatible except that h2 flips the buildIndex flag.
+ // The third, compatible with the first.
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "buildIndexes" << false
+ << "priority" << 0)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "buildIndexes" << true
+ << "priority" << 0)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(
+ oldConfigRefresh.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "buildIndexes" << false
+ << "priority" << 0)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(oldConfig.validate());
+ ASSERT_OK(newConfig.validate());
+ ASSERT_OK(oldConfigRefresh.validate());
+ ASSERT_OK(
+ validateConfigForReconfig(&externalState, oldConfig, oldConfigRefresh, false).getStatus());
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, newConfig, false).getStatus());
+
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, newConfig, true).getStatus());
+}
+
+TEST(ValidateConfigForReconfig, NewConfigMustNotFlipArbiterFlag) {
+ ReplicationCoordinatorExternalStateMock externalState;
+ externalState.addSelf(HostAndPort("h1"));
+
+ ReplicaSetConfig oldConfig;
+ ReplicaSetConfig newConfig;
+ ReplicaSetConfig oldConfigRefresh;
+
+ // Three configurations, two compatible except that h2 flips the arbiterOnly flag.
+ // The third, compatible with the first.
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "arbiterOnly" << false)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(
+ oldConfigRefresh.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "arbiterOnly" << false)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(oldConfig.validate());
+ ASSERT_OK(newConfig.validate());
+ ASSERT_OK(oldConfigRefresh.validate());
+ ASSERT_OK(
+ validateConfigForReconfig(&externalState, oldConfig, oldConfigRefresh, false).getStatus());
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, newConfig, false).getStatus());
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, newConfig, true).getStatus());
+}
+
+TEST(ValidateConfigForReconfig, HostAndIdRemappingRestricted) {
+ // When reconfiguring a replica set, it is allowed to introduce (host, id) pairs
+ // absent from the old config only when the hosts and ids were both individually
+ // absent in the old config.
+
+ ReplicationCoordinatorExternalStateMock externalState;
+ externalState.addSelf(HostAndPort("h1"));
+
+ ReplicaSetConfig oldConfig;
+ ReplicaSetConfig legalNewConfigWithNewHostAndId;
+ ReplicaSetConfig illegalNewConfigReusingHost;
+ ReplicaSetConfig illegalNewConfigReusingId;
+
+ ASSERT_OK(
+ oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ASSERT_OK(oldConfig.validate());
+
+ //
+ // Here, the new config is valid because we've replaced (2, "h2") with
+ // (4, "h4"), so neither the member _id or host name were reused.
+ //
+ ASSERT_OK(
+ legalNewConfigWithNewHostAndId.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 4 << "host"
+ << "h4")
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ASSERT_OK(legalNewConfigWithNewHostAndId.validate());
+ ASSERT_OK(validateConfigForReconfig(
+ &externalState, oldConfig, legalNewConfigWithNewHostAndId, false).getStatus());
+
+ //
+ // Here, the new config is invalid because we've reused host name "h2" with
+ // new _id 4.
+ //
+ ASSERT_OK(illegalNewConfigReusingHost.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 4 << "host"
+ << "h2")
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ASSERT_OK(illegalNewConfigReusingHost.validate());
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(
+ &externalState, oldConfig, illegalNewConfigReusingHost, false).getStatus());
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(
+ &externalState, oldConfig, illegalNewConfigReusingHost, true).getStatus());
+ //
+ // Here, the new config is valid, because all we've changed is the name of
+ // the host representing _id 2.
+ //
+ ASSERT_OK(illegalNewConfigReusingId.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h4")
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ASSERT_OK(illegalNewConfigReusingId.validate());
+ ASSERT_OK(validateConfigForReconfig(&externalState, oldConfig, illegalNewConfigReusingId, false)
+ .getStatus());
+}
+
+TEST(ValidateConfigForReconfig, MustFindSelf) {
+ // Old and new config are same except for version change; this is just testing that we can
+ // find ourself in the new config.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(
+ oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(
+ newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ReplicationCoordinatorExternalStateMock notPresentExternalState;
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ReplicationCoordinatorExternalStateMock presentThriceExternalState;
+ presentThriceExternalState.addSelf(HostAndPort("h3"));
+ presentThriceExternalState.addSelf(HostAndPort("h2"));
+ presentThriceExternalState.addSelf(HostAndPort("h1"));
+
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound,
+ validateConfigForReconfig(&notPresentExternalState, oldConfig, newConfig, false)
+ .getStatus());
+ ASSERT_EQUALS(ErrorCodes::DuplicateKey,
+ validateConfigForReconfig(
+ &presentThriceExternalState, oldConfig, newConfig, false).getStatus());
+ ASSERT_EQUALS(1,
+ unittest::assertGet(validateConfigForReconfig(
+ &presentOnceExternalState, oldConfig, newConfig, false)));
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound,
+ validateConfigForReconfig(&notPresentExternalState, oldConfig, newConfig, true)
+ .getStatus());
+ ASSERT_EQUALS(ErrorCodes::DuplicateKey,
+ validateConfigForReconfig(&presentThriceExternalState, oldConfig, newConfig, true)
+ .getStatus());
+ ASSERT_EQUALS(1,
+ unittest::assertGet(validateConfigForReconfig(
+ &presentOnceExternalState, oldConfig, newConfig, true)));
+}
+
+TEST(ValidateConfigForReconfig, SelfMustEndElectable) {
+ // Old and new config are same except for version change and the electability of one node;
+ // this is just testing that we must be electable in the new config.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(
+ oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "priority" << 0)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+
+ ASSERT_EQUALS(ErrorCodes::NodeNotElectable,
+ validateConfigForReconfig(&presentOnceExternalState, oldConfig, newConfig, false)
+ .getStatus());
+ // Forced reconfig does not require electability.
+ ASSERT_OK(validateConfigForReconfig(&presentOnceExternalState, oldConfig, newConfig, true)
+ .getStatus());
+}
+
+TEST(ValidateConfigForInitiate, NewConfigInvalid) {
+ // The new config is not valid due to a duplicate _id value. This tests that if the new
+ // config is invalid, validateConfigForInitiate will return a status indicating what is
+ // wrong with the new config.
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 0 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(ErrorCodes::BadValue,
+ validateConfigForInitiate(&presentOnceExternalState, newConfig).getStatus());
+}
+
+TEST(ValidateConfigForReconfig, NewConfigInvalid) {
+ // The new config is not valid due to a duplicate _id value. This tests that if the new
+ // config is invalid, validateConfigForReconfig will return a status indicating what is
+ // wrong with the new config.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")))));
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 0 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(ErrorCodes::BadValue,
+ validateConfigForReconfig(&presentOnceExternalState, oldConfig, newConfig, false)
+ .getStatus());
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(ErrorCodes::BadValue,
+ validateConfigForReconfig(&presentOnceExternalState, oldConfig, newConfig, true)
+ .getStatus());
+}
+
+TEST(ValidateConfigForStartUp, NewConfigInvalid) {
+ // The new config is not valid due to a duplicate _id value. This tests that if the new
+ // config is invalid, validateConfigForStartUp will return a status indicating what is wrong
+ // with the new config.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")))));
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 0 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(
+ ErrorCodes::BadValue,
+ validateConfigForStartUp(&presentOnceExternalState, oldConfig, newConfig).getStatus());
+}
+
+TEST(ValidateConfigForStartUp, OldAndNewConfigIncompatible) {
+ // The new config is not compatible with the old config due to a member changing _ids. This
+ // tests that validateConfigForStartUp will return a status indicating the incompatiblilty
+ // between the old and new config.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 2 << "host"
+ << "h2")
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForStartUp(&presentOnceExternalState, oldConfig, newConfig).getStatus());
+}
+
+TEST(ValidateConfigForStartUp, OldAndNewConfigCompatible) {
+ // The new config is compatible with the old config. This tests that
+ // validateConfigForStartUp will return a Status::OK() indicating the validity of this
+ // config change.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2"
+ << "priority" << 3)
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_OK(
+ validateConfigForStartUp(&presentOnceExternalState, oldConfig, newConfig).getStatus());
+}
+
+TEST(ValidateConfigForHeartbeatReconfig, NewConfigInvalid) {
+ // The new config is not valid due to a duplicate _id value. This tests that if the new
+ // config is invalid, validateConfigForHeartbeatReconfig will return a status indicating
+ // what is wrong with the new config.
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 0 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(
+ ErrorCodes::BadValue,
+ validateConfigForHeartbeatReconfig(&presentOnceExternalState, newConfig).getStatus());
+}
+
+TEST(ValidateConfigForHeartbeatReconfig, NewConfigValid) {
+ // The new config is valid. This tests that validateConfigForHeartbeatReconfig will return
+ // a Status::OK() indicating the validity of this config change.
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_OK(validateConfigForHeartbeatReconfig(&presentOnceExternalState, newConfig).getStatus());
+}
+
+TEST(ValidateForReconfig, ForceStillNeedsValidConfig) {
+ // The new config is invalid due to two nodes with the same _id value. This tests that
+ // ValidateForReconfig fails with an invalid config, even if force is true.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 0 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(ErrorCodes::BadValue,
+ validateConfigForReconfig(&presentOnceExternalState, oldConfig, newConfig, true)
+ .getStatus());
+}
+
+TEST(ValidateForReconfig, ForceStillNeedsSelfPresent) {
+ // The new config does not contain self. This tests that ValidateForReconfig fails
+ // if the member receiving it is absent from the config, even if force is true.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h3")
+ << BSON("_id" << 2 << "host"
+ << "h4")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound,
+ validateConfigForReconfig(&presentOnceExternalState, oldConfig, newConfig, true)
+ .getStatus());
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/replica_set_config_test.cpp b/src/mongo/db/repl/replica_set_config_test.cpp
index 8bc5a247498..734552d4ed1 100644
--- a/src/mongo/db/repl/replica_set_config_test.cpp
+++ b/src/mongo/db/repl/replica_set_config_test.cpp
@@ -36,682 +36,731 @@ namespace mongo {
namespace repl {
namespace {
- TEST(ReplicaSetConfig, ParseMinimalConfigAndCheckDefaults) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS("rs0", config.getReplSetName());
- ASSERT_EQUALS(1, config.getConfigVersion());
- ASSERT_EQUALS(1, config.getNumMembers());
- ASSERT_EQUALS(0, config.membersBegin()->getId());
- ASSERT_EQUALS(1, config.getDefaultWriteConcern().wNumNodes);
- ASSERT_EQUALS("", config.getDefaultWriteConcern().wMode);
- ASSERT_EQUALS(Seconds(10), config.getHeartbeatTimeoutPeriod());
- ASSERT_TRUE(config.isChainingAllowed());
- ASSERT_EQUALS(0, config.getProtocolVersion());
- }
-
- TEST(ReplicaSetConfig, ParseLargeConfigAndCheckAccessors) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1234 <<
- "members" << BSON_ARRAY(BSON("_id" << 234 <<
- "host" << "localhost:12345" <<
- "tags" << BSON("NYC" << "NY"))) <<
- "settings" << BSON("getLastErrorDefaults" <<
- BSON("w" << "majority") <<
- "getLastErrorModes" << BSON("eastCoast" <<
- BSON("NYC" << 1 )) <<
- "chainingAllowed" << false <<
- "heartbeatTimeoutSecs" << 120) <<
- "protocolVersion" << 2)));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS("rs0", config.getReplSetName());
- ASSERT_EQUALS(1234, config.getConfigVersion());
- ASSERT_EQUALS(1, config.getNumMembers());
- ASSERT_EQUALS(234, config.membersBegin()->getId());
- ASSERT_EQUALS(0, config.getDefaultWriteConcern().wNumNodes);
- ASSERT_EQUALS("majority", config.getDefaultWriteConcern().wMode);
- ASSERT_FALSE(config.isChainingAllowed());
- ASSERT_EQUALS(Seconds(120), config.getHeartbeatTimeoutPeriod());
- ASSERT_EQUALS(2, config.getProtocolVersion());
- }
-
- TEST(ReplicaSetConfig, MajorityCalculationThreeVotersNoArbiters) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1" << "votes" << 0) <<
- BSON("_id" << 5 << "host" << "h5:1" << "votes" << 0)))));
- ASSERT_OK(config.validate());
-
- ASSERT_EQUALS(2, config.getWriteMajority());
- }
-
- TEST(ReplicaSetConfig, MajorityCalculationNearlyHalfArbiters) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2) <<
- BSON("host" << "node4:12345" <<
- "_id" << 3 <<
- "arbiterOnly" << true) <<
- BSON("host" << "node5:12345" <<
- "_id" << 4 <<
- "arbiterOnly" << true)))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS(3, config.getWriteMajority());
- }
-
- TEST(ReplicaSetConfig, MajorityCalculationNearlyHalfArbitersOthersNoVote) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" <<
- "_id" << 0 <<
- "votes" << 0) <<
- BSON("host" << "node2:12345" <<
- "_id" << 1 <<
- "votes" << 0) <<
- BSON("host" << "node3:12345" <<
- "_id" << 2 <<
- "votes" << 0) <<
- BSON("host" << "node4:12345" <<
- "_id" << 3 <<
- "arbiterOnly" << true) <<
- BSON("host" << "node5:12345" <<
- "_id" << 4 <<
- "arbiterOnly" << true)))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS(0, config.getWriteMajority());
- }
-
- TEST(ReplicaSetConfig, MajorityCalculationEvenNumberOfMembers) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2) <<
- BSON("host" << "node4:12345" << "_id" << 3)))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS(3, config.getWriteMajority());
- }
-
- TEST(ReplicaSetConfig, MajorityCalculationNearlyHalfSecondariesNoVotes) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" <<
- "_id" << 1 <<
- "votes" << 0) <<
- BSON("host" << "node3:12345" <<
- "_id" << 2 <<
- "votes" << 0) <<
- BSON("host" << "node4:12345" << "_id" << 3) <<
- BSON("host" << "node5:12345" << "_id" << 4)))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS(2, config.getWriteMajority());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithBadOrMissingIdField) {
- ReplicaSetConfig config;
- // Replica set name must be a string.
- ASSERT_EQUALS(
- ErrorCodes::TypeMismatch,
- config.initialize(
- BSON("_id" << 1 <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
-
- // Replica set name must be present.
- ASSERT_EQUALS(
- ErrorCodes::NoSuchKey,
- config.initialize(
- BSON("version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
-
- // Empty repl set name parses, but does not validate.
- ASSERT_OK(config.initialize(
- BSON("_id" << "" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
-
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithBadOrMissingVersionField) {
- ReplicaSetConfig config;
- // Config version field must be present.
- ASSERT_EQUALS(
- ErrorCodes::NoSuchKey,
- config.initialize(
- BSON("_id" << "rs0" <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
- ASSERT_EQUALS(
- ErrorCodes::TypeMismatch,
- config.initialize(
- BSON("_id" << "rs0" <<
- "version" << "1" <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
-
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1.0 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
- ASSERT_OK(config.validate());
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 0.0 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" <<
- static_cast<long long>(std::numeric_limits<int>::max()) + 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithBadMembers) {
- ReplicaSetConfig config;
- ASSERT_EQUALS(ErrorCodes::TypeMismatch,
- config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345") <<
- "localhost:23456"))));
- ASSERT_EQUALS(ErrorCodes::NoSuchKey,
- config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("host" << "localhost:12345")))));
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithLocalNonLocalHostMix) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost") <<
- BSON("_id" << 1 <<
- "host" << "otherhost")))));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNoElectableNodes) {
- ReplicaSetConfig config;
- const BSONObj configBsonNoElectableNodes = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1" << "priority" << 0) <<
- BSON("_id" << 1 << "host" << "localhost:2" << "priority" << 0)));
-
- ASSERT_OK(config.initialize(configBsonNoElectableNodes));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
-
- const BSONObj configBsonNoElectableNodesOneArbiter = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1" << "arbiterOnly" << 1) <<
- BSON("_id" << 1 << "host" << "localhost:2" << "priority" << 0)));
-
- ASSERT_OK(config.initialize(configBsonNoElectableNodesOneArbiter));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
-
- const BSONObj configBsonNoElectableNodesTwoArbiters = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1" << "arbiterOnly" << 1) <<
- BSON("_id" << 1 << "host" << "localhost:2" << "arbiterOnly" << 1)));
-
- ASSERT_OK(config.initialize(configBsonNoElectableNodesOneArbiter));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
-
- const BSONObj configBsonOneElectableNode = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1" << "priority" << 0) <<
- BSON("_id" << 1 << "host" << "localhost:2" << "priority" << 1)));
- ASSERT_OK(config.initialize(configBsonOneElectableNode));
- ASSERT_OK(config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithTooFewVoters) {
- ReplicaSetConfig config;
- const BSONObj configBsonNoVoters = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1" << "votes" << 0) <<
- BSON("_id" << 1 << "host" << "localhost:2" << "votes" << 0)));
-
- ASSERT_OK(config.initialize(configBsonNoVoters));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
-
- const BSONObj configBsonOneVoter = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1" << "votes" << 0) <<
- BSON("_id" << 1 << "host" << "localhost:2" << "votes" << 1)));
- ASSERT_OK(config.initialize(configBsonOneVoter));
- ASSERT_OK(config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithTooManyVoters) {
- ReplicaSetConfig config;
- namespace mmb = mutablebson;
- mmb::Document configDoc;
- mmb::Element configDocRoot = configDoc.root();
- ASSERT_OK(configDocRoot.appendString("_id", "rs0"));
- ASSERT_OK(configDocRoot.appendInt("version", 1));
- mmb::Element membersArray = configDoc.makeElementArray("members");
- ASSERT_OK(configDocRoot.pushBack(membersArray));
- for (size_t i = 0; i < ReplicaSetConfig::kMaxVotingMembers + 1; ++i) {
- mmb::Element memberElement = configDoc.makeElementObject("");
- ASSERT_OK(membersArray.pushBack(memberElement));
- ASSERT_OK(memberElement.appendInt("_id", i));
- ASSERT_OK(memberElement.appendString(
- "host", std::string(str::stream() << "localhost" << i + 1)));
- ASSERT_OK(memberElement.appendInt("votes", 1));
- }
-
- const BSONObj configBsonTooManyVoters = configDoc.getObject();
-
- membersArray.leftChild().findFirstChildNamed("votes").setValueInt(0);
- const BSONObj configBsonMaxVoters = configDoc.getObject();
-
-
- ASSERT_OK(config.initialize(configBsonMaxVoters));
- ASSERT_OK(config.validate());
- ASSERT_OK(config.initialize(configBsonTooManyVoters));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithDuplicateHost) {
- ReplicaSetConfig config;
- const BSONObj configBson = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1") <<
- BSON("_id" << 1 << "host" << "localhost:1")));
- ASSERT_OK(config.initialize(configBson));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithTooManyNodes) {
- ReplicaSetConfig config;
- namespace mmb = mutablebson;
- mmb::Document configDoc;
- mmb::Element configDocRoot = configDoc.root();
- ASSERT_OK(configDocRoot.appendString("_id", "rs0"));
- ASSERT_OK(configDocRoot.appendInt("version", 1));
- mmb::Element membersArray = configDoc.makeElementArray("members");
- ASSERT_OK(configDocRoot.pushBack(membersArray));
- for (size_t i = 0; i < ReplicaSetConfig::kMaxMembers; ++i) {
- mmb::Element memberElement = configDoc.makeElementObject("");
- ASSERT_OK(membersArray.pushBack(memberElement));
- ASSERT_OK(memberElement.appendInt("_id", i));
- ASSERT_OK(memberElement.appendString(
- "host", std::string(str::stream() << "localhost" << i + 1)));
- if (i >= ReplicaSetConfig::kMaxVotingMembers) {
- ASSERT_OK(memberElement.appendInt("votes", 0));
- }
- }
- const BSONObj configBsonMaxNodes = configDoc.getObject();
-
+TEST(ReplicaSetConfig, ParseMinimalConfigAndCheckDefaults) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS("rs0", config.getReplSetName());
+ ASSERT_EQUALS(1, config.getConfigVersion());
+ ASSERT_EQUALS(1, config.getNumMembers());
+ ASSERT_EQUALS(0, config.membersBegin()->getId());
+ ASSERT_EQUALS(1, config.getDefaultWriteConcern().wNumNodes);
+ ASSERT_EQUALS("", config.getDefaultWriteConcern().wMode);
+ ASSERT_EQUALS(Seconds(10), config.getHeartbeatTimeoutPeriod());
+ ASSERT_TRUE(config.isChainingAllowed());
+ ASSERT_EQUALS(0, config.getProtocolVersion());
+}
+
+TEST(ReplicaSetConfig, ParseLargeConfigAndCheckAccessors) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON(
+ "_id"
+ << "rs0"
+ << "version" << 1234 << "members" << BSON_ARRAY(BSON("_id" << 234 << "host"
+ << "localhost:12345"
+ << "tags" << BSON("NYC"
+ << "NY")))
+ << "settings" << BSON("getLastErrorDefaults"
+ << BSON("w"
+ << "majority") << "getLastErrorModes"
+ << BSON("eastCoast" << BSON("NYC" << 1)) << "chainingAllowed" << false
+ << "heartbeatTimeoutSecs" << 120) << "protocolVersion" << 2)));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS("rs0", config.getReplSetName());
+ ASSERT_EQUALS(1234, config.getConfigVersion());
+ ASSERT_EQUALS(1, config.getNumMembers());
+ ASSERT_EQUALS(234, config.membersBegin()->getId());
+ ASSERT_EQUALS(0, config.getDefaultWriteConcern().wNumNodes);
+ ASSERT_EQUALS("majority", config.getDefaultWriteConcern().wMode);
+ ASSERT_FALSE(config.isChainingAllowed());
+ ASSERT_EQUALS(Seconds(120), config.getHeartbeatTimeoutPeriod());
+ ASSERT_EQUALS(2, config.getProtocolVersion());
+}
+
+TEST(ReplicaSetConfig, MajorityCalculationThreeVotersNoArbiters) {
+ ReplicaSetConfig config;
+ ASSERT_OK(
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1"
+ << "votes" << 0)
+ << BSON("_id" << 5 << "host"
+ << "h5:1"
+ << "votes" << 0)))));
+ ASSERT_OK(config.validate());
+
+ ASSERT_EQUALS(2, config.getWriteMajority());
+}
+
+TEST(ReplicaSetConfig, MajorityCalculationNearlyHalfArbiters) {
+ ReplicaSetConfig config;
+ ASSERT_OK(
+ config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3 << "arbiterOnly" << true)
+ << BSON("host"
+ << "node5:12345"
+ << "_id" << 4 << "arbiterOnly" << true)))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS(3, config.getWriteMajority());
+}
+
+TEST(ReplicaSetConfig, MajorityCalculationNearlyHalfArbitersOthersNoVote) {
+ ReplicaSetConfig config;
+ ASSERT_OK(
+ config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0 << "votes" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1 << "votes" << 0)
+ << BSON("host"
+ << "node3:12345"
+ << "_id" << 2 << "votes" << 0)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3 << "arbiterOnly" << true)
+ << BSON("host"
+ << "node5:12345"
+ << "_id" << 4 << "arbiterOnly" << true)))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS(0, config.getWriteMajority());
+}
+
+TEST(ReplicaSetConfig, MajorityCalculationEvenNumberOfMembers) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3)))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS(3, config.getWriteMajority());
+}
+
+TEST(ReplicaSetConfig, MajorityCalculationNearlyHalfSecondariesNoVotes) {
+ ReplicaSetConfig config;
+ ASSERT_OK(
+ config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1 << "votes" << 0)
+ << BSON("host"
+ << "node3:12345"
+ << "_id" << 2 << "votes" << 0)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3) << BSON("host"
+ << "node5:12345"
+ << "_id" << 4)))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS(2, config.getWriteMajority());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithBadOrMissingIdField) {
+ ReplicaSetConfig config;
+ // Replica set name must be a string.
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ config.initialize(BSON("_id" << 1 << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+
+ // Replica set name must be present.
+ ASSERT_EQUALS(
+ ErrorCodes::NoSuchKey,
+ config.initialize(
+ BSON("version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+
+ // Empty repl set name parses, but does not validate.
+ ASSERT_OK(config.initialize(BSON("_id"
+ << ""
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithBadOrMissingVersionField) {
+ ReplicaSetConfig config;
+ // Config version field must be present.
+ ASSERT_EQUALS(
+ ErrorCodes::NoSuchKey,
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+ ASSERT_EQUALS(
+ ErrorCodes::TypeMismatch,
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version"
+ << "1"
+ << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1.0 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+ ASSERT_OK(config.validate());
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 0.0 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+ ASSERT_OK(
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version"
+ << static_cast<long long>(std::numeric_limits<int>::max()) + 1
+ << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithBadMembers) {
+ ReplicaSetConfig config;
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")
+ << "localhost:23456"))));
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey,
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "localhost:12345")))));
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithLocalNonLocalHostMix) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost")
+ << BSON("_id" << 1 << "host"
+ << "otherhost")))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNoElectableNodes) {
+ ReplicaSetConfig config;
+ const BSONObj configBsonNoElectableNodes = BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1"
+ << "priority" << 0)
+ << BSON("_id" << 1 << "host"
+ << "localhost:2"
+ << "priority"
+ << 0)));
+
+ ASSERT_OK(config.initialize(configBsonNoElectableNodes));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+
+ const BSONObj configBsonNoElectableNodesOneArbiter =
+ BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1"
+ << "arbiterOnly" << 1)
+ << BSON("_id" << 1 << "host"
+ << "localhost:2"
+ << "priority" << 0)));
+
+ ASSERT_OK(config.initialize(configBsonNoElectableNodesOneArbiter));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+
+ const BSONObj configBsonNoElectableNodesTwoArbiters =
+ BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1"
+ << "arbiterOnly" << 1)
+ << BSON("_id" << 1 << "host"
+ << "localhost:2"
+ << "arbiterOnly" << 1)));
+
+ ASSERT_OK(config.initialize(configBsonNoElectableNodesOneArbiter));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+
+ const BSONObj configBsonOneElectableNode = BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1"
+ << "priority" << 0)
+ << BSON("_id" << 1 << "host"
+ << "localhost:2"
+ << "priority"
+ << 1)));
+ ASSERT_OK(config.initialize(configBsonOneElectableNode));
+ ASSERT_OK(config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithTooFewVoters) {
+ ReplicaSetConfig config;
+ const BSONObj configBsonNoVoters = BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1"
+ << "votes" << 0)
+ << BSON("_id" << 1 << "host"
+ << "localhost:2"
+ << "votes" << 0)));
+
+ ASSERT_OK(config.initialize(configBsonNoVoters));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+
+ const BSONObj configBsonOneVoter = BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1"
+ << "votes" << 0)
+ << BSON("_id" << 1 << "host"
+ << "localhost:2"
+ << "votes" << 1)));
+ ASSERT_OK(config.initialize(configBsonOneVoter));
+ ASSERT_OK(config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithTooManyVoters) {
+ ReplicaSetConfig config;
+ namespace mmb = mutablebson;
+ mmb::Document configDoc;
+ mmb::Element configDocRoot = configDoc.root();
+ ASSERT_OK(configDocRoot.appendString("_id", "rs0"));
+ ASSERT_OK(configDocRoot.appendInt("version", 1));
+ mmb::Element membersArray = configDoc.makeElementArray("members");
+ ASSERT_OK(configDocRoot.pushBack(membersArray));
+ for (size_t i = 0; i < ReplicaSetConfig::kMaxVotingMembers + 1; ++i) {
mmb::Element memberElement = configDoc.makeElementObject("");
ASSERT_OK(membersArray.pushBack(memberElement));
- ASSERT_OK(memberElement.appendInt("_id", ReplicaSetConfig::kMaxMembers));
- ASSERT_OK(memberElement.appendString(
- "host", std::string(str::stream() <<
- "localhost" << ReplicaSetConfig::kMaxMembers + 1)));
- ASSERT_OK(memberElement.appendInt("votes", 0));
- const BSONObj configBsonTooManyNodes = configDoc.getObject();
-
-
- ASSERT_OK(config.initialize(configBsonMaxNodes));
- ASSERT_OK(config.validate());
- ASSERT_OK(config.initialize(configBsonTooManyNodes));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithUnexpectedField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "unexpectedfield" << "value"));
- ASSERT_EQUALS(ErrorCodes::BadValue, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonArrayMembersField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << "value"));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonNumericHeartbeatTimeoutSecsField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << "no")));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonBoolChainingAllowedField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("chainingAllowed" << "no")));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+ ASSERT_OK(memberElement.appendInt("_id", i));
+ ASSERT_OK(
+ memberElement.appendString("host", std::string(str::stream() << "localhost" << i + 1)));
+ ASSERT_OK(memberElement.appendInt("votes", 1));
}
- TEST(ReplicaSetConfig, ParseFailsWithNonObjectSettingsField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << "none"));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithGetLastErrorDefaultsFieldUnparseable) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("getLastErrorDefaults" << BSON(
- "fsync" << "seven"))));
- ASSERT_EQUALS(ErrorCodes::FailedToParse, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonObjectGetLastErrorDefaultsField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("getLastErrorDefaults" << "no")));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonObjectGetLastErrorModesField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("getLastErrorModes" << "no")));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithDuplicateGetLastErrorModesField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "tags" << BSON("tag" << "yes"))) <<
- "settings" << BSON("getLastErrorModes" << BSON(
- "one" << BSON("tag" << 1) <<
- "one" << BSON("tag" << 1)))));
- ASSERT_EQUALS(ErrorCodes::DuplicateKey, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonObjectGetLastErrorModesEntryField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "tags" << BSON("tag" << "yes"))) <<
- "settings" << BSON("getLastErrorModes" << BSON(
- "one" << 1))));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonNumericGetLastErrorModesConstraintValue) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "tags" << BSON("tag" << "yes"))) <<
- "settings" << BSON("getLastErrorModes" << BSON(
- "one" << BSON("tag" << "no")))));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNegativeGetLastErrorModesConstraintValue) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "tags" << BSON("tag" << "yes"))) <<
- "settings" << BSON("getLastErrorModes" << BSON(
- "one" << BSON("tag" << -1)))));
- ASSERT_EQUALS(ErrorCodes::BadValue, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonExistentGetLastErrorModesConstraintTag) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "tags" << BSON("tag" << "yes"))) <<
- "settings" << BSON("getLastErrorModes" << BSON(
- "one" << BSON("tag2" << 1)))));
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, status);
- }
-
- TEST(ReplicaSetConfig, ValidateFailsWithDuplicateMemberId) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345") <<
- BSON("_id" << 0 <<
- "host" << "someoneelse:12345"))));
- ASSERT_OK(status);
-
- status = config.validate();
- ASSERT_EQUALS(ErrorCodes::BadValue, status);
- }
-
- TEST(ReplicaSetConfig, ValidateFailsWithInvalidMember) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "hidden" << true))));
- ASSERT_OK(status);
-
- status = config.validate();
- ASSERT_EQUALS(ErrorCodes::BadValue, status);
- }
-
- TEST(ReplicaSetConfig, ChainingAllowedField) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("chainingAllowed" << true))));
- ASSERT_OK(config.validate());
- ASSERT_TRUE(config.isChainingAllowed());
-
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("chainingAllowed" << false))));
- ASSERT_OK(config.validate());
- ASSERT_FALSE(config.isChainingAllowed());
- }
-
- TEST(ReplicaSetConfig, HeartbeatTimeoutField) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 20))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS(Seconds(20), config.getHeartbeatTimeoutPeriod());
-
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << -20))));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, GleDefaultField) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON(
- "getLastErrorDefaults" << BSON("w" << "majority")))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS("majority", config.getDefaultWriteConcern().wMode);
-
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON(
- "getLastErrorDefaults" << BSON("w" << "frim")))));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
-
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON(
- "getLastErrorDefaults" << BSON("w" << 0)))));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
-
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "tags" << BSON("a" << "v"))) <<
- "settings" << BSON(
- "getLastErrorDefaults" << BSON("w" << "frim") <<
- "getLastErrorModes" << BSON("frim" << BSON("a" << 1))))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS("frim", config.getDefaultWriteConcern().wMode);
- ASSERT_OK(config.findCustomWriteMode("frim").getStatus());
- }
-
- bool operator==(const MemberConfig& a, const MemberConfig& b) {
- // do tag comparisons
- for (MemberConfig::TagIterator itrA = a.tagsBegin(); itrA != a.tagsEnd(); ++itrA) {
- if (std::find(b.tagsBegin(), b.tagsEnd(), *itrA) == b.tagsEnd()) {
- return false;
- }
+ const BSONObj configBsonTooManyVoters = configDoc.getObject();
+
+ membersArray.leftChild().findFirstChildNamed("votes").setValueInt(0);
+ const BSONObj configBsonMaxVoters = configDoc.getObject();
+
+
+ ASSERT_OK(config.initialize(configBsonMaxVoters));
+ ASSERT_OK(config.validate());
+ ASSERT_OK(config.initialize(configBsonTooManyVoters));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithDuplicateHost) {
+ ReplicaSetConfig config;
+ const BSONObj configBson = BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1")
+ << BSON("_id" << 1 << "host"
+ << "localhost:1")));
+ ASSERT_OK(config.initialize(configBson));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithTooManyNodes) {
+ ReplicaSetConfig config;
+ namespace mmb = mutablebson;
+ mmb::Document configDoc;
+ mmb::Element configDocRoot = configDoc.root();
+ ASSERT_OK(configDocRoot.appendString("_id", "rs0"));
+ ASSERT_OK(configDocRoot.appendInt("version", 1));
+ mmb::Element membersArray = configDoc.makeElementArray("members");
+ ASSERT_OK(configDocRoot.pushBack(membersArray));
+ for (size_t i = 0; i < ReplicaSetConfig::kMaxMembers; ++i) {
+ mmb::Element memberElement = configDoc.makeElementObject("");
+ ASSERT_OK(membersArray.pushBack(memberElement));
+ ASSERT_OK(memberElement.appendInt("_id", i));
+ ASSERT_OK(
+ memberElement.appendString("host", std::string(str::stream() << "localhost" << i + 1)));
+ if (i >= ReplicaSetConfig::kMaxVotingMembers) {
+ ASSERT_OK(memberElement.appendInt("votes", 0));
}
- return a.getId() == b.getId() &&
- a.getHostAndPort() == b.getHostAndPort() &&
- a.getPriority() == b.getPriority() &&
- a.getSlaveDelay() == b.getSlaveDelay() &&
- a.isVoter() == b.isVoter() &&
- a.isArbiter() == b.isArbiter() &&
- a.isHidden() == b.isHidden() &&
- a.shouldBuildIndexes() == b.shouldBuildIndexes() &&
- a.getNumTags() == b.getNumTags();
}
-
- bool operator==(const ReplicaSetConfig& a, const ReplicaSetConfig& b) {
- // compare WriteConcernModes
- std::vector<std::string> modeNames = a.getWriteConcernNames();
- for (std::vector<std::string>::iterator it = modeNames.begin();
- it != modeNames.end();
- it++) {
- ReplicaSetTagPattern patternA = a.findCustomWriteMode(*it).getValue();
- ReplicaSetTagPattern patternB = b.findCustomWriteMode(*it).getValue();
- for (ReplicaSetTagPattern::ConstraintIterator itrA = patternA.constraintsBegin();
- itrA != patternA.constraintsEnd();
- itrA++) {
- bool same = false;
- for (ReplicaSetTagPattern::ConstraintIterator itrB = patternB.constraintsBegin();
- itrB != patternB.constraintsEnd();
- itrB++) {
- if (itrA->getKeyIndex() == itrB->getKeyIndex() &&
- itrA->getMinCount() == itrB->getMinCount()) {
- same = true;
- break;
- }
- }
- if (!same) {
- return false;
- }
- }
+ const BSONObj configBsonMaxNodes = configDoc.getObject();
+
+ mmb::Element memberElement = configDoc.makeElementObject("");
+ ASSERT_OK(membersArray.pushBack(memberElement));
+ ASSERT_OK(memberElement.appendInt("_id", ReplicaSetConfig::kMaxMembers));
+ ASSERT_OK(memberElement.appendString(
+ "host", std::string(str::stream() << "localhost" << ReplicaSetConfig::kMaxMembers + 1)));
+ ASSERT_OK(memberElement.appendInt("votes", 0));
+ const BSONObj configBsonTooManyNodes = configDoc.getObject();
+
+
+ ASSERT_OK(config.initialize(configBsonMaxNodes));
+ ASSERT_OK(config.validate());
+ ASSERT_OK(config.initialize(configBsonTooManyNodes));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithUnexpectedField) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "unexpectedfield"
+ << "value"));
+ ASSERT_EQUALS(ErrorCodes::BadValue, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonArrayMembersField) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << "value"));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonNumericHeartbeatTimeoutSecsField) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"))
+ << "settings" << BSON("heartbeatTimeoutSecs"
+ << "no")));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonBoolChainingAllowedField) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"))
+ << "settings" << BSON("chainingAllowed"
+ << "no")));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonObjectSettingsField) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << "none"));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithGetLastErrorDefaultsFieldUnparseable) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("getLastErrorDefaults" << BSON("fsync"
+ << "seven"))));
+ ASSERT_EQUALS(ErrorCodes::FailedToParse, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonObjectGetLastErrorDefaultsField) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"))
+ << "settings" << BSON("getLastErrorDefaults"
+ << "no")));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonObjectGetLastErrorModesField) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"))
+ << "settings" << BSON("getLastErrorModes"
+ << "no")));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithDuplicateGetLastErrorModesField) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "tags" << BSON("tag"
+ << "yes"))) << "settings"
+ << BSON("getLastErrorModes"
+ << BSON("one" << BSON("tag" << 1) << "one"
+ << BSON("tag" << 1)))));
+ ASSERT_EQUALS(ErrorCodes::DuplicateKey, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonObjectGetLastErrorModesEntryField) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "tags" << BSON("tag"
+ << "yes"))) << "settings"
+ << BSON("getLastErrorModes" << BSON("one" << 1))));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonNumericGetLastErrorModesConstraintValue) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "tags" << BSON("tag"
+ << "yes"))) << "settings"
+ << BSON("getLastErrorModes" << BSON("one" << BSON("tag"
+ << "no")))));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNegativeGetLastErrorModesConstraintValue) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "tags" << BSON("tag"
+ << "yes"))) << "settings"
+ << BSON("getLastErrorModes" << BSON("one" << BSON("tag" << -1)))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonExistentGetLastErrorModesConstraintTag) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "tags" << BSON("tag"
+ << "yes"))) << "settings"
+ << BSON("getLastErrorModes" << BSON("one" << BSON("tag2" << 1)))));
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, status);
+}
+
+TEST(ReplicaSetConfig, ValidateFailsWithDuplicateMemberId) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")
+ << BSON("_id" << 0 << "host"
+ << "someoneelse:12345"))));
+ ASSERT_OK(status);
+
+ status = config.validate();
+ ASSERT_EQUALS(ErrorCodes::BadValue, status);
+}
+
+TEST(ReplicaSetConfig, ValidateFailsWithInvalidMember) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "hidden" << true))));
+ ASSERT_OK(status);
+
+ status = config.validate();
+ ASSERT_EQUALS(ErrorCodes::BadValue, status);
+}
+
+TEST(ReplicaSetConfig, ChainingAllowedField) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("chainingAllowed" << true))));
+ ASSERT_OK(config.validate());
+ ASSERT_TRUE(config.isChainingAllowed());
+
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("chainingAllowed" << false))));
+ ASSERT_OK(config.validate());
+ ASSERT_FALSE(config.isChainingAllowed());
+}
+
+TEST(ReplicaSetConfig, HeartbeatTimeoutField) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("heartbeatTimeoutSecs" << 20))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS(Seconds(20), config.getHeartbeatTimeoutPeriod());
+
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("heartbeatTimeoutSecs" << -20))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, GleDefaultField) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("getLastErrorDefaults" << BSON("w"
+ << "majority")))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS("majority", config.getDefaultWriteConcern().wMode);
+
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("getLastErrorDefaults" << BSON("w"
+ << "frim")))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("getLastErrorDefaults" << BSON("w" << 0)))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "tags" << BSON("a"
+ << "v")))
+ << "settings" << BSON("getLastErrorDefaults"
+ << BSON("w"
+ << "frim") << "getLastErrorModes"
+ << BSON("frim" << BSON("a" << 1))))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS("frim", config.getDefaultWriteConcern().wMode);
+ ASSERT_OK(config.findCustomWriteMode("frim").getStatus());
+}
+
+bool operator==(const MemberConfig& a, const MemberConfig& b) {
+ // do tag comparisons
+ for (MemberConfig::TagIterator itrA = a.tagsBegin(); itrA != a.tagsEnd(); ++itrA) {
+ if (std::find(b.tagsBegin(), b.tagsEnd(), *itrA) == b.tagsEnd()) {
+ return false;
}
-
- // compare the members
- for (ReplicaSetConfig::MemberIterator memA = a.membersBegin();
- memA != a.membersEnd();
- memA++) {
+ }
+ return a.getId() == b.getId() && a.getHostAndPort() == b.getHostAndPort() &&
+ a.getPriority() == b.getPriority() && a.getSlaveDelay() == b.getSlaveDelay() &&
+ a.isVoter() == b.isVoter() && a.isArbiter() == b.isArbiter() &&
+ a.isHidden() == b.isHidden() && a.shouldBuildIndexes() == b.shouldBuildIndexes() &&
+ a.getNumTags() == b.getNumTags();
+}
+
+bool operator==(const ReplicaSetConfig& a, const ReplicaSetConfig& b) {
+ // compare WriteConcernModes
+ std::vector<std::string> modeNames = a.getWriteConcernNames();
+ for (std::vector<std::string>::iterator it = modeNames.begin(); it != modeNames.end(); it++) {
+ ReplicaSetTagPattern patternA = a.findCustomWriteMode(*it).getValue();
+ ReplicaSetTagPattern patternB = b.findCustomWriteMode(*it).getValue();
+ for (ReplicaSetTagPattern::ConstraintIterator itrA = patternA.constraintsBegin();
+ itrA != patternA.constraintsEnd();
+ itrA++) {
bool same = false;
- for (ReplicaSetConfig::MemberIterator memB = b.membersBegin();
- memB != b.membersEnd();
- memB++) {
- if (*memA == *memB) {
+ for (ReplicaSetTagPattern::ConstraintIterator itrB = patternB.constraintsBegin();
+ itrB != patternB.constraintsEnd();
+ itrB++) {
+ if (itrA->getKeyIndex() == itrB->getKeyIndex() &&
+ itrA->getMinCount() == itrB->getMinCount()) {
same = true;
break;
}
@@ -720,292 +769,446 @@ namespace {
return false;
}
}
-
- // simple comparisons
- return a.getReplSetName() == b.getReplSetName() &&
- a.getConfigVersion() == b.getConfigVersion() &&
- a.getNumMembers() == b.getNumMembers() &&
- a.getHeartbeatTimeoutPeriod() == b.getHeartbeatTimeoutPeriod() &&
- a.isChainingAllowed() == b.isChainingAllowed() &&
- a.getDefaultWriteConcern().wNumNodes == b.getDefaultWriteConcern().wNumNodes &&
- a.getDefaultWriteConcern().wMode == b.getDefaultWriteConcern().wMode &&
- a.getProtocolVersion() == b.getProtocolVersion();
}
- TEST(ReplicaSetConfig, toBSONRoundTripAbility) {
- ReplicaSetConfig configA;
- ReplicaSetConfig configB;
- ASSERT_OK(configA.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 20))));
- ASSERT_OK(configB.initialize(configA.toBSON()));
- ASSERT_TRUE(configA == configB);
- }
-
- TEST(ReplicaSetConfig, toBSONRoundTripAbilityLarge) {
- ReplicaSetConfig configA;
- ReplicaSetConfig configB;
- ASSERT_OK(configA.initialize(
- BSON("_id" << "asdf"
- << "version" << 9
- << "members" << BSON_ARRAY(
- BSON("_id" << 0
- << "host" << "localhost:12345"
- << "arbiterOnly" << true
- << "votes" << 1
- ) <<
- BSON("_id" << 3
- << "host" << "localhost:3828"
- << "arbiterOnly" << false
- << "hidden" << true
- << "buildIndexes" << false
- << "priority" << 0
- << "slaveDelay" << 17
- << "votes" << 0
- << "tags" << BSON("coast" << "east" << "ssd" << "true")
- ) <<
- BSON("_id" << 2
- << "host" << "foo.com:3828"
- << "priority" << 9
- << "votes" << 0
- << "tags" << BSON("coast" << "west" << "hdd" << "true")
- ))
- << "settings" << BSON("heartbeatTimeoutSecs" << 20
- << "chainingAllowd" << true
- << "getLastErrorDefaults" << BSON("w" << "majority")
- << "getLastErrorModes" << BSON(
- "disks" << BSON("ssd" << 1 << "hdd" << 1)
- << "coasts" << BSON("coast" << 2)))
- )));
- ASSERT_OK(configB.initialize(configA.toBSON()));
- ASSERT_TRUE(configA == configB);
- }
-
- TEST(ReplicaSetConfig, toBSONRoundTripAbilityInvalid) {
- ReplicaSetConfig configA;
- ReplicaSetConfig configB;
- ASSERT_OK(configA.initialize(
- BSON("_id" << ""
- << "version" << -3
- << "members" << BSON_ARRAY(
- BSON("_id" << 0
- << "host" << "localhost:12345"
- << "arbiterOnly" << true
- << "votes" << 0
- ) <<
- BSON("_id" << 0
- << "host" << "localhost:3828"
- << "arbiterOnly" << false
- << "buildIndexes" << false
- << "priority" << 2
- ) <<
- BSON("_id" << 2
- << "host" << "localhost:3828"
- << "priority" << 9
- << "votes" << 0
- ))
- << "settings" << BSON("heartbeatTimeoutSecs" << -20))));
- ASSERT_OK(configB.initialize(configA.toBSON()));
- ASSERT_NOT_OK(configA.validate());
- ASSERT_NOT_OK(configB.validate());
- ASSERT_TRUE(configA == configB);
- }
-
- TEST(ReplicaSetConfig, CheckIfWriteConcernCanBeSatisfied) {
- ReplicaSetConfig configA;
- ASSERT_OK(configA.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "node0" <<
- "tags" << BSON("dc" << "NA" <<
- "rack" << "rackNA1")) <<
- BSON("_id" << 1 <<
- "host" << "node1" <<
- "tags" << BSON("dc" << "NA" <<
- "rack" << "rackNA2")) <<
- BSON("_id" << 2 <<
- "host" << "node2" <<
- "tags" << BSON("dc" << "NA" <<
- "rack" << "rackNA3")) <<
- BSON("_id" << 3 <<
- "host" << "node3" <<
- "tags" << BSON("dc" << "EU" <<
- "rack" << "rackEU1")) <<
- BSON("_id" << 4 <<
- "host" << "node4" <<
- "tags" << BSON("dc" << "EU" <<
- "rack" << "rackEU2")) <<
- BSON("_id" << 5 <<
- "host" << "node5" <<
- "arbiterOnly" << true)) <<
- "settings" << BSON("getLastErrorModes" <<
- BSON("valid" << BSON("dc" << 2 << "rack" << 3) <<
- "invalidNotEnoughValues" << BSON("dc" << 3) <<
- "invalidNotEnoughNodes" << BSON("rack" << 6))))));
-
- WriteConcernOptions validNumberWC;
- validNumberWC.wNumNodes = 5;
- ASSERT_OK(configA.checkIfWriteConcernCanBeSatisfied(validNumberWC));
-
- WriteConcernOptions invalidNumberWC;
- invalidNumberWC.wNumNodes = 6;
- ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern,
- configA.checkIfWriteConcernCanBeSatisfied(invalidNumberWC));
-
- WriteConcernOptions majorityWC;
- majorityWC.wMode = "majority";
- ASSERT_OK(configA.checkIfWriteConcernCanBeSatisfied(majorityWC));
-
- WriteConcernOptions validModeWC;
- validModeWC.wMode = "valid";
- ASSERT_OK(configA.checkIfWriteConcernCanBeSatisfied(validModeWC));
-
- WriteConcernOptions fakeModeWC;
- fakeModeWC.wMode = "fake";
- ASSERT_EQUALS(ErrorCodes::UnknownReplWriteConcern,
- configA.checkIfWriteConcernCanBeSatisfied(fakeModeWC));
-
- WriteConcernOptions invalidModeNotEnoughValuesWC;
- invalidModeNotEnoughValuesWC.wMode = "invalidNotEnoughValues";
- ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern,
- configA.checkIfWriteConcernCanBeSatisfied(invalidModeNotEnoughValuesWC));
-
- WriteConcernOptions invalidModeNotEnoughNodesWC;
- invalidModeNotEnoughNodesWC.wMode = "invalidNotEnoughNodes";
- ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern,
- configA.checkIfWriteConcernCanBeSatisfied(invalidModeNotEnoughNodesWC));
- }
-
- TEST(ReplicaSetConfig, CheckMaximumNodesOkay) {
- ReplicaSetConfig configA;
- ReplicaSetConfig configB;
- ASSERT_OK(configA.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node0") <<
- BSON("_id" << 1 << "host" << "node1") <<
- BSON("_id" << 2 << "host" << "node2") <<
- BSON("_id" << 3 << "host" << "node3") <<
- BSON("_id" << 4 << "host" << "node4") <<
- BSON("_id" << 5 << "host" << "node5") <<
- BSON("_id" << 6 << "host" << "node6") <<
- BSON("_id" << 7 << "host" << "node7" << "votes" << 0) <<
- BSON("_id" << 8 << "host" << "node8" << "votes" << 0) <<
- BSON("_id" << 9 << "host" << "node9" << "votes" << 0) <<
- BSON("_id" << 10 << "host" << "node10" << "votes" << 0) <<
- BSON("_id" << 11 << "host" << "node11" << "votes" << 0) <<
- BSON("_id" << 12 << "host" << "node12" << "votes" << 0) <<
- BSON("_id" << 13 << "host" << "node13" << "votes" << 0) <<
- BSON("_id" << 14 << "host" << "node14" << "votes" << 0) <<
- BSON("_id" << 15 << "host" << "node15" << "votes" << 0) <<
- BSON("_id" << 16 << "host" << "node16" << "votes" << 0) <<
- BSON("_id" << 17 << "host" << "node17" << "votes" << 0) <<
- BSON("_id" << 18 << "host" << "node18" << "votes" << 0) <<
- BSON("_id" << 19 << "host" << "node19" << "votes" << 0) <<
- BSON("_id" << 20 << "host" << "node20" << "votes" << 0) <<
- BSON("_id" << 21 << "host" << "node21" << "votes" << 0) <<
- BSON("_id" << 22 << "host" << "node22" << "votes" << 0) <<
- BSON("_id" << 23 << "host" << "node23" << "votes" << 0) <<
- BSON("_id" << 24 << "host" << "node24" << "votes" << 0) <<
- BSON("_id" << 25 << "host" << "node25" << "votes" << 0) <<
- BSON("_id" << 26 << "host" << "node26" << "votes" << 0) <<
- BSON("_id" << 27 << "host" << "node27" << "votes" << 0) <<
- BSON("_id" << 28 << "host" << "node28" << "votes" << 0) <<
- BSON("_id" << 29 << "host" << "node29" << "votes" << 0) <<
- BSON("_id" << 30 << "host" << "node30" << "votes" << 0) <<
- BSON("_id" << 31 << "host" << "node31" << "votes" << 0) <<
- BSON("_id" << 32 << "host" << "node32" << "votes" << 0) <<
- BSON("_id" << 33 << "host" << "node33" << "votes" << 0) <<
- BSON("_id" << 34 << "host" << "node34" << "votes" << 0) <<
- BSON("_id" << 35 << "host" << "node35" << "votes" << 0) <<
- BSON("_id" << 36 << "host" << "node36" << "votes" << 0) <<
- BSON("_id" << 37 << "host" << "node37" << "votes" << 0) <<
- BSON("_id" << 38 << "host" << "node38" << "votes" << 0) <<
- BSON("_id" << 39 << "host" << "node39" << "votes" << 0) <<
- BSON("_id" << 40 << "host" << "node40" << "votes" << 0) <<
- BSON("_id" << 41 << "host" << "node41" << "votes" << 0) <<
- BSON("_id" << 42 << "host" << "node42" << "votes" << 0) <<
- BSON("_id" << 43 << "host" << "node43" << "votes" << 0) <<
- BSON("_id" << 44 << "host" << "node44" << "votes" << 0) <<
- BSON("_id" << 45 << "host" << "node45" << "votes" << 0) <<
- BSON("_id" << 46 << "host" << "node46" << "votes" << 0) <<
- BSON("_id" << 47 << "host" << "node47" << "votes" << 0) <<
- BSON("_id" << 48 << "host" << "node48" << "votes" << 0) <<
- BSON("_id" << 49 << "host" << "node49" << "votes" << 0)))));
- ASSERT_OK(configB.initialize(configA.toBSON()));
- ASSERT_OK(configA.validate());
- ASSERT_OK(configB.validate());
- ASSERT_TRUE(configA == configB);
+ // compare the members
+ for (ReplicaSetConfig::MemberIterator memA = a.membersBegin(); memA != a.membersEnd(); memA++) {
+ bool same = false;
+ for (ReplicaSetConfig::MemberIterator memB = b.membersBegin(); memB != b.membersEnd();
+ memB++) {
+ if (*memA == *memB) {
+ same = true;
+ break;
+ }
+ }
+ if (!same) {
+ return false;
+ }
}
- TEST(ReplicaSetConfig, CheckBeyondMaximumNodesFailsValidate) {
- ReplicaSetConfig configA;
- ReplicaSetConfig configB;
- ASSERT_OK(configA.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node0") <<
- BSON("_id" << 1 << "host" << "node1") <<
- BSON("_id" << 2 << "host" << "node2") <<
- BSON("_id" << 3 << "host" << "node3") <<
- BSON("_id" << 4 << "host" << "node4") <<
- BSON("_id" << 5 << "host" << "node5") <<
- BSON("_id" << 6 << "host" << "node6") <<
- BSON("_id" << 7 << "host" << "node7" << "votes" << 0) <<
- BSON("_id" << 8 << "host" << "node8" << "votes" << 0) <<
- BSON("_id" << 9 << "host" << "node9" << "votes" << 0) <<
- BSON("_id" << 10 << "host" << "node10" << "votes" << 0) <<
- BSON("_id" << 11 << "host" << "node11" << "votes" << 0) <<
- BSON("_id" << 12 << "host" << "node12" << "votes" << 0) <<
- BSON("_id" << 13 << "host" << "node13" << "votes" << 0) <<
- BSON("_id" << 14 << "host" << "node14" << "votes" << 0) <<
- BSON("_id" << 15 << "host" << "node15" << "votes" << 0) <<
- BSON("_id" << 16 << "host" << "node16" << "votes" << 0) <<
- BSON("_id" << 17 << "host" << "node17" << "votes" << 0) <<
- BSON("_id" << 18 << "host" << "node18" << "votes" << 0) <<
- BSON("_id" << 19 << "host" << "node19" << "votes" << 0) <<
- BSON("_id" << 20 << "host" << "node20" << "votes" << 0) <<
- BSON("_id" << 21 << "host" << "node21" << "votes" << 0) <<
- BSON("_id" << 22 << "host" << "node22" << "votes" << 0) <<
- BSON("_id" << 23 << "host" << "node23" << "votes" << 0) <<
- BSON("_id" << 24 << "host" << "node24" << "votes" << 0) <<
- BSON("_id" << 25 << "host" << "node25" << "votes" << 0) <<
- BSON("_id" << 26 << "host" << "node26" << "votes" << 0) <<
- BSON("_id" << 27 << "host" << "node27" << "votes" << 0) <<
- BSON("_id" << 28 << "host" << "node28" << "votes" << 0) <<
- BSON("_id" << 29 << "host" << "node29" << "votes" << 0) <<
- BSON("_id" << 30 << "host" << "node30" << "votes" << 0) <<
- BSON("_id" << 31 << "host" << "node31" << "votes" << 0) <<
- BSON("_id" << 32 << "host" << "node32" << "votes" << 0) <<
- BSON("_id" << 33 << "host" << "node33" << "votes" << 0) <<
- BSON("_id" << 34 << "host" << "node34" << "votes" << 0) <<
- BSON("_id" << 35 << "host" << "node35" << "votes" << 0) <<
- BSON("_id" << 36 << "host" << "node36" << "votes" << 0) <<
- BSON("_id" << 37 << "host" << "node37" << "votes" << 0) <<
- BSON("_id" << 38 << "host" << "node38" << "votes" << 0) <<
- BSON("_id" << 39 << "host" << "node39" << "votes" << 0) <<
- BSON("_id" << 40 << "host" << "node40" << "votes" << 0) <<
- BSON("_id" << 41 << "host" << "node41" << "votes" << 0) <<
- BSON("_id" << 42 << "host" << "node42" << "votes" << 0) <<
- BSON("_id" << 43 << "host" << "node43" << "votes" << 0) <<
- BSON("_id" << 44 << "host" << "node44" << "votes" << 0) <<
- BSON("_id" << 45 << "host" << "node45" << "votes" << 0) <<
- BSON("_id" << 46 << "host" << "node46" << "votes" << 0) <<
- BSON("_id" << 47 << "host" << "node47" << "votes" << 0) <<
- BSON("_id" << 48 << "host" << "node48" << "votes" << 0) <<
- BSON("_id" << 49 << "host" << "node49" << "votes" << 0) <<
- BSON("_id" << 50 << "host" << "node50" << "votes" << 0)))));
- ASSERT_OK(configB.initialize(configA.toBSON()));
- ASSERT_NOT_OK(configA.validate());
- ASSERT_NOT_OK(configB.validate());
- ASSERT_TRUE(configA == configB);
- }
+ // simple comparisons
+ return a.getReplSetName() == b.getReplSetName() &&
+ a.getConfigVersion() == b.getConfigVersion() && a.getNumMembers() == b.getNumMembers() &&
+ a.getHeartbeatTimeoutPeriod() == b.getHeartbeatTimeoutPeriod() &&
+ a.isChainingAllowed() == b.isChainingAllowed() &&
+ a.getDefaultWriteConcern().wNumNodes == b.getDefaultWriteConcern().wNumNodes &&
+ a.getDefaultWriteConcern().wMode == b.getDefaultWriteConcern().wMode &&
+ a.getProtocolVersion() == b.getProtocolVersion();
+}
+
+TEST(ReplicaSetConfig, toBSONRoundTripAbility) {
+ ReplicaSetConfig configA;
+ ReplicaSetConfig configB;
+ ASSERT_OK(configA.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("heartbeatTimeoutSecs" << 20))));
+ ASSERT_OK(configB.initialize(configA.toBSON()));
+ ASSERT_TRUE(configA == configB);
+}
+
+TEST(ReplicaSetConfig, toBSONRoundTripAbilityLarge) {
+ ReplicaSetConfig configA;
+ ReplicaSetConfig configB;
+ ASSERT_OK(configA.initialize(BSON(
+ "_id"
+ << "asdf"
+ << "version" << 9 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "arbiterOnly" << true << "votes" << 1)
+ << BSON("_id" << 3 << "host"
+ << "localhost:3828"
+ << "arbiterOnly" << false << "hidden" << true << "buildIndexes"
+ << false << "priority" << 0 << "slaveDelay" << 17 << "votes"
+ << 0 << "tags" << BSON("coast"
+ << "east"
+ << "ssd"
+ << "true"))
+ << BSON("_id" << 2 << "host"
+ << "foo.com:3828"
+ << "priority" << 9 << "votes" << 0 << "tags"
+ << BSON("coast"
+ << "west"
+ << "hdd"
+ << "true"))) << "settings"
+ << BSON("heartbeatTimeoutSecs" << 20 << "chainingAllowd" << true << "getLastErrorDefaults"
+ << BSON("w"
+ << "majority") << "getLastErrorModes"
+ << BSON("disks" << BSON("ssd" << 1 << "hdd" << 1) << "coasts"
+ << BSON("coast" << 2))))));
+ ASSERT_OK(configB.initialize(configA.toBSON()));
+ ASSERT_TRUE(configA == configB);
+}
+
+TEST(ReplicaSetConfig, toBSONRoundTripAbilityInvalid) {
+ ReplicaSetConfig configA;
+ ReplicaSetConfig configB;
+ ASSERT_OK(
+ configA.initialize(BSON("_id"
+ << ""
+ << "version" << -3 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "arbiterOnly" << true << "votes" << 0)
+ << BSON("_id" << 0 << "host"
+ << "localhost:3828"
+ << "arbiterOnly" << false
+ << "buildIndexes" << false << "priority"
+ << 2)
+ << BSON("_id" << 2 << "host"
+ << "localhost:3828"
+ << "priority" << 9 << "votes" << 0))
+ << "settings" << BSON("heartbeatTimeoutSecs" << -20))));
+ ASSERT_OK(configB.initialize(configA.toBSON()));
+ ASSERT_NOT_OK(configA.validate());
+ ASSERT_NOT_OK(configB.validate());
+ ASSERT_TRUE(configA == configB);
+}
+
+TEST(ReplicaSetConfig, CheckIfWriteConcernCanBeSatisfied) {
+ ReplicaSetConfig configA;
+ ASSERT_OK(configA.initialize(BSON(
+ "_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node0"
+ << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rackNA1"))
+ << BSON("_id" << 1 << "host"
+ << "node1"
+ << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rackNA2"))
+ << BSON("_id" << 2 << "host"
+ << "node2"
+ << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rackNA3"))
+ << BSON("_id" << 3 << "host"
+ << "node3"
+ << "tags" << BSON("dc"
+ << "EU"
+ << "rack"
+ << "rackEU1"))
+ << BSON("_id" << 4 << "host"
+ << "node4"
+ << "tags" << BSON("dc"
+ << "EU"
+ << "rack"
+ << "rackEU2"))
+ << BSON("_id" << 5 << "host"
+ << "node5"
+ << "arbiterOnly" << true))
+ << "settings" << BSON("getLastErrorModes"
+ << BSON("valid" << BSON("dc" << 2 << "rack" << 3)
+ << "invalidNotEnoughValues" << BSON("dc" << 3)
+ << "invalidNotEnoughNodes" << BSON("rack" << 6))))));
+
+ WriteConcernOptions validNumberWC;
+ validNumberWC.wNumNodes = 5;
+ ASSERT_OK(configA.checkIfWriteConcernCanBeSatisfied(validNumberWC));
+
+ WriteConcernOptions invalidNumberWC;
+ invalidNumberWC.wNumNodes = 6;
+ ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern,
+ configA.checkIfWriteConcernCanBeSatisfied(invalidNumberWC));
+
+ WriteConcernOptions majorityWC;
+ majorityWC.wMode = "majority";
+ ASSERT_OK(configA.checkIfWriteConcernCanBeSatisfied(majorityWC));
+
+ WriteConcernOptions validModeWC;
+ validModeWC.wMode = "valid";
+ ASSERT_OK(configA.checkIfWriteConcernCanBeSatisfied(validModeWC));
+
+ WriteConcernOptions fakeModeWC;
+ fakeModeWC.wMode = "fake";
+ ASSERT_EQUALS(ErrorCodes::UnknownReplWriteConcern,
+ configA.checkIfWriteConcernCanBeSatisfied(fakeModeWC));
+
+ WriteConcernOptions invalidModeNotEnoughValuesWC;
+ invalidModeNotEnoughValuesWC.wMode = "invalidNotEnoughValues";
+ ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern,
+ configA.checkIfWriteConcernCanBeSatisfied(invalidModeNotEnoughValuesWC));
+
+ WriteConcernOptions invalidModeNotEnoughNodesWC;
+ invalidModeNotEnoughNodesWC.wMode = "invalidNotEnoughNodes";
+ ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern,
+ configA.checkIfWriteConcernCanBeSatisfied(invalidModeNotEnoughNodesWC));
+}
+
+TEST(ReplicaSetConfig, CheckMaximumNodesOkay) {
+ ReplicaSetConfig configA;
+ ReplicaSetConfig configB;
+ ASSERT_OK(configA.initialize(
+ BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node0")
+ << BSON("_id" << 1 << "host"
+ << "node1") << BSON("_id" << 2 << "host"
+ << "node2")
+ << BSON("_id" << 3 << "host"
+ << "node3") << BSON("_id" << 4 << "host"
+ << "node4")
+ << BSON("_id" << 5 << "host"
+ << "node5") << BSON("_id" << 6 << "host"
+ << "node6")
+ << BSON("_id" << 7 << "host"
+ << "node7"
+ << "votes" << 0) << BSON("_id" << 8 << "host"
+ << "node8"
+ << "votes" << 0)
+ << BSON("_id" << 9 << "host"
+ << "node9"
+ << "votes" << 0) << BSON("_id" << 10 << "host"
+ << "node10"
+ << "votes" << 0)
+ << BSON("_id" << 11 << "host"
+ << "node11"
+ << "votes" << 0) << BSON("_id" << 12 << "host"
+ << "node12"
+ << "votes" << 0)
+ << BSON("_id" << 13 << "host"
+ << "node13"
+ << "votes" << 0) << BSON("_id" << 14 << "host"
+ << "node14"
+ << "votes" << 0)
+ << BSON("_id" << 15 << "host"
+ << "node15"
+ << "votes" << 0) << BSON("_id" << 16 << "host"
+ << "node16"
+ << "votes" << 0)
+ << BSON("_id" << 17 << "host"
+ << "node17"
+ << "votes" << 0) << BSON("_id" << 18 << "host"
+ << "node18"
+ << "votes" << 0)
+ << BSON("_id" << 19 << "host"
+ << "node19"
+ << "votes" << 0) << BSON("_id" << 20 << "host"
+ << "node20"
+ << "votes" << 0)
+ << BSON("_id" << 21 << "host"
+ << "node21"
+ << "votes" << 0) << BSON("_id" << 22 << "host"
+ << "node22"
+ << "votes" << 0)
+ << BSON("_id" << 23 << "host"
+ << "node23"
+ << "votes" << 0) << BSON("_id" << 24 << "host"
+ << "node24"
+ << "votes" << 0)
+ << BSON("_id" << 25 << "host"
+ << "node25"
+ << "votes" << 0) << BSON("_id" << 26 << "host"
+ << "node26"
+ << "votes" << 0)
+ << BSON("_id" << 27 << "host"
+ << "node27"
+ << "votes" << 0) << BSON("_id" << 28 << "host"
+ << "node28"
+ << "votes" << 0)
+ << BSON("_id" << 29 << "host"
+ << "node29"
+ << "votes" << 0) << BSON("_id" << 30 << "host"
+ << "node30"
+ << "votes" << 0)
+ << BSON("_id" << 31 << "host"
+ << "node31"
+ << "votes" << 0) << BSON("_id" << 32 << "host"
+ << "node32"
+ << "votes" << 0)
+ << BSON("_id" << 33 << "host"
+ << "node33"
+ << "votes" << 0) << BSON("_id" << 34 << "host"
+ << "node34"
+ << "votes" << 0)
+ << BSON("_id" << 35 << "host"
+ << "node35"
+ << "votes" << 0) << BSON("_id" << 36 << "host"
+ << "node36"
+ << "votes" << 0)
+ << BSON("_id" << 37 << "host"
+ << "node37"
+ << "votes" << 0) << BSON("_id" << 38 << "host"
+ << "node38"
+ << "votes" << 0)
+ << BSON("_id" << 39 << "host"
+ << "node39"
+ << "votes" << 0) << BSON("_id" << 40 << "host"
+ << "node40"
+ << "votes" << 0)
+ << BSON("_id" << 41 << "host"
+ << "node41"
+ << "votes" << 0) << BSON("_id" << 42 << "host"
+ << "node42"
+ << "votes" << 0)
+ << BSON("_id" << 43 << "host"
+ << "node43"
+ << "votes" << 0) << BSON("_id" << 44 << "host"
+ << "node44"
+ << "votes" << 0)
+ << BSON("_id" << 45 << "host"
+ << "node45"
+ << "votes" << 0) << BSON("_id" << 46 << "host"
+ << "node46"
+ << "votes" << 0)
+ << BSON("_id" << 47 << "host"
+ << "node47"
+ << "votes" << 0) << BSON("_id" << 48 << "host"
+ << "node48"
+ << "votes" << 0)
+ << BSON("_id" << 49 << "host"
+ << "node49"
+ << "votes" << 0)))));
+ ASSERT_OK(configB.initialize(configA.toBSON()));
+ ASSERT_OK(configA.validate());
+ ASSERT_OK(configB.validate());
+ ASSERT_TRUE(configA == configB);
+}
+
+TEST(ReplicaSetConfig, CheckBeyondMaximumNodesFailsValidate) {
+ ReplicaSetConfig configA;
+ ReplicaSetConfig configB;
+ ASSERT_OK(configA.initialize(
+ BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node0")
+ << BSON("_id" << 1 << "host"
+ << "node1") << BSON("_id" << 2 << "host"
+ << "node2")
+ << BSON("_id" << 3 << "host"
+ << "node3") << BSON("_id" << 4 << "host"
+ << "node4")
+ << BSON("_id" << 5 << "host"
+ << "node5") << BSON("_id" << 6 << "host"
+ << "node6")
+ << BSON("_id" << 7 << "host"
+ << "node7"
+ << "votes" << 0) << BSON("_id" << 8 << "host"
+ << "node8"
+ << "votes" << 0)
+ << BSON("_id" << 9 << "host"
+ << "node9"
+ << "votes" << 0) << BSON("_id" << 10 << "host"
+ << "node10"
+ << "votes" << 0)
+ << BSON("_id" << 11 << "host"
+ << "node11"
+ << "votes" << 0) << BSON("_id" << 12 << "host"
+ << "node12"
+ << "votes" << 0)
+ << BSON("_id" << 13 << "host"
+ << "node13"
+ << "votes" << 0) << BSON("_id" << 14 << "host"
+ << "node14"
+ << "votes" << 0)
+ << BSON("_id" << 15 << "host"
+ << "node15"
+ << "votes" << 0) << BSON("_id" << 16 << "host"
+ << "node16"
+ << "votes" << 0)
+ << BSON("_id" << 17 << "host"
+ << "node17"
+ << "votes" << 0) << BSON("_id" << 18 << "host"
+ << "node18"
+ << "votes" << 0)
+ << BSON("_id" << 19 << "host"
+ << "node19"
+ << "votes" << 0) << BSON("_id" << 20 << "host"
+ << "node20"
+ << "votes" << 0)
+ << BSON("_id" << 21 << "host"
+ << "node21"
+ << "votes" << 0) << BSON("_id" << 22 << "host"
+ << "node22"
+ << "votes" << 0)
+ << BSON("_id" << 23 << "host"
+ << "node23"
+ << "votes" << 0) << BSON("_id" << 24 << "host"
+ << "node24"
+ << "votes" << 0)
+ << BSON("_id" << 25 << "host"
+ << "node25"
+ << "votes" << 0) << BSON("_id" << 26 << "host"
+ << "node26"
+ << "votes" << 0)
+ << BSON("_id" << 27 << "host"
+ << "node27"
+ << "votes" << 0) << BSON("_id" << 28 << "host"
+ << "node28"
+ << "votes" << 0)
+ << BSON("_id" << 29 << "host"
+ << "node29"
+ << "votes" << 0) << BSON("_id" << 30 << "host"
+ << "node30"
+ << "votes" << 0)
+ << BSON("_id" << 31 << "host"
+ << "node31"
+ << "votes" << 0) << BSON("_id" << 32 << "host"
+ << "node32"
+ << "votes" << 0)
+ << BSON("_id" << 33 << "host"
+ << "node33"
+ << "votes" << 0) << BSON("_id" << 34 << "host"
+ << "node34"
+ << "votes" << 0)
+ << BSON("_id" << 35 << "host"
+ << "node35"
+ << "votes" << 0) << BSON("_id" << 36 << "host"
+ << "node36"
+ << "votes" << 0)
+ << BSON("_id" << 37 << "host"
+ << "node37"
+ << "votes" << 0) << BSON("_id" << 38 << "host"
+ << "node38"
+ << "votes" << 0)
+ << BSON("_id" << 39 << "host"
+ << "node39"
+ << "votes" << 0) << BSON("_id" << 40 << "host"
+ << "node40"
+ << "votes" << 0)
+ << BSON("_id" << 41 << "host"
+ << "node41"
+ << "votes" << 0) << BSON("_id" << 42 << "host"
+ << "node42"
+ << "votes" << 0)
+ << BSON("_id" << 43 << "host"
+ << "node43"
+ << "votes" << 0) << BSON("_id" << 44 << "host"
+ << "node44"
+ << "votes" << 0)
+ << BSON("_id" << 45 << "host"
+ << "node45"
+ << "votes" << 0) << BSON("_id" << 46 << "host"
+ << "node46"
+ << "votes" << 0)
+ << BSON("_id" << 47 << "host"
+ << "node47"
+ << "votes" << 0) << BSON("_id" << 48 << "host"
+ << "node48"
+ << "votes" << 0)
+ << BSON("_id" << 49 << "host"
+ << "node49"
+ << "votes" << 0) << BSON("_id" << 50 << "host"
+ << "node50"
+ << "votes" << 0)))));
+ ASSERT_OK(configB.initialize(configA.toBSON()));
+ ASSERT_NOT_OK(configA.validate());
+ ASSERT_NOT_OK(configB.validate());
+ ASSERT_TRUE(configA == configB);
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/replica_set_tag.cpp b/src/mongo/db/repl/replica_set_tag.cpp
index 1d6fcc0766d..ed6781cfc95 100644
--- a/src/mongo/db/repl/replica_set_tag.cpp
+++ b/src/mongo/db/repl/replica_set_tag.cpp
@@ -41,206 +41,198 @@
namespace mongo {
namespace repl {
- bool ReplicaSetTag::operator==(const ReplicaSetTag& other) const {
- return _keyIndex == other._keyIndex && _valueIndex == other._valueIndex;
- }
-
- bool ReplicaSetTag::operator!=(const ReplicaSetTag& other) const {
- return !(*this == other);
- }
-
- void ReplicaSetTagPattern::addTagCountConstraint(int32_t keyIndex, int32_t minCount) {
- const std::vector<TagCountConstraint>::iterator iter = std::find_if(
- _constraints.begin(),
- _constraints.end(),
- stdx::bind(std::equal_to<int32_t>(),
- keyIndex,
- stdx::bind(&TagCountConstraint::getKeyIndex, stdx::placeholders::_1)));
- if (iter == _constraints.end()) {
- _constraints.push_back(TagCountConstraint(keyIndex, minCount));
- }
- else if (iter->getMinCount() < minCount) {
- *iter = TagCountConstraint(keyIndex, minCount);
- }
- }
-
- ReplicaSetTagPattern::TagCountConstraint::TagCountConstraint(int32_t keyIndex,
- int32_t minCount) :
- _keyIndex(keyIndex), _minCount(minCount) {}
-
- ReplicaSetTagMatch::ReplicaSetTagMatch(const ReplicaSetTagPattern& pattern) {
- for (ReplicaSetTagPattern::ConstraintIterator iter = pattern.constraintsBegin();
- iter != pattern.constraintsEnd();
- ++iter) {
-
- _boundTagValues.push_back(BoundTagValue(*iter));
- }
- }
-
- bool ReplicaSetTagMatch::update(const ReplicaSetTag& tag) {
- const std::vector<BoundTagValue>::iterator iter = std::find_if(
- _boundTagValues.begin(),
- _boundTagValues.end(),
- stdx::bind(std::equal_to<int32_t>(), tag.getKeyIndex(), stdx::bind(
- &BoundTagValue::getKeyIndex, stdx::placeholders::_1)));
- if (iter != _boundTagValues.end()) {
- if (!sequenceContains(iter->boundValues, tag.getValueIndex())) {
- iter->boundValues.push_back(tag.getValueIndex());
- }
+bool ReplicaSetTag::operator==(const ReplicaSetTag& other) const {
+ return _keyIndex == other._keyIndex && _valueIndex == other._valueIndex;
+}
+
+bool ReplicaSetTag::operator!=(const ReplicaSetTag& other) const {
+ return !(*this == other);
+}
+
+void ReplicaSetTagPattern::addTagCountConstraint(int32_t keyIndex, int32_t minCount) {
+ const std::vector<TagCountConstraint>::iterator iter = std::find_if(
+ _constraints.begin(),
+ _constraints.end(),
+ stdx::bind(std::equal_to<int32_t>(),
+ keyIndex,
+ stdx::bind(&TagCountConstraint::getKeyIndex, stdx::placeholders::_1)));
+ if (iter == _constraints.end()) {
+ _constraints.push_back(TagCountConstraint(keyIndex, minCount));
+ } else if (iter->getMinCount() < minCount) {
+ *iter = TagCountConstraint(keyIndex, minCount);
+ }
+}
+
+ReplicaSetTagPattern::TagCountConstraint::TagCountConstraint(int32_t keyIndex, int32_t minCount)
+ : _keyIndex(keyIndex), _minCount(minCount) {}
+
+ReplicaSetTagMatch::ReplicaSetTagMatch(const ReplicaSetTagPattern& pattern) {
+ for (ReplicaSetTagPattern::ConstraintIterator iter = pattern.constraintsBegin();
+ iter != pattern.constraintsEnd();
+ ++iter) {
+ _boundTagValues.push_back(BoundTagValue(*iter));
+ }
+}
+
+bool ReplicaSetTagMatch::update(const ReplicaSetTag& tag) {
+ const std::vector<BoundTagValue>::iterator iter =
+ std::find_if(_boundTagValues.begin(),
+ _boundTagValues.end(),
+ stdx::bind(std::equal_to<int32_t>(),
+ tag.getKeyIndex(),
+ stdx::bind(&BoundTagValue::getKeyIndex, stdx::placeholders::_1)));
+ if (iter != _boundTagValues.end()) {
+ if (!sequenceContains(iter->boundValues, tag.getValueIndex())) {
+ iter->boundValues.push_back(tag.getValueIndex());
}
- return isSatisfied();
}
-
- bool ReplicaSetTagMatch::isSatisfied() const {
- const std::vector<BoundTagValue>::const_iterator iter = std::find_if(
- _boundTagValues.begin(),
- _boundTagValues.end(),
- stdx::bind(std::logical_not<bool>(),
- stdx::bind(&BoundTagValue::isSatisfied, stdx::placeholders::_1)));
- return iter == _boundTagValues.end();
- }
-
- bool ReplicaSetTagMatch::BoundTagValue::isSatisfied() const {
- return constraint.getMinCount() <= int32_t(boundValues.size());
- }
-
- ReplicaSetTag ReplicaSetTagConfig::makeTag(StringData key, StringData value) {
- int32_t keyIndex = _findKeyIndex(key);
- if (size_t(keyIndex) == _tagData.size()) {
- _tagData.push_back(make_pair(key.toString(), ValueVector()));
- }
- ValueVector& values = _tagData[keyIndex].second;
- for (size_t valueIndex = 0; valueIndex < values.size(); ++valueIndex) {
- if (values[valueIndex] != value)
- continue;
- return ReplicaSetTag(keyIndex, int32_t(valueIndex));
- }
- values.push_back(value.toString());
- return ReplicaSetTag(keyIndex, int32_t(values.size()) - 1);
- }
-
- ReplicaSetTag ReplicaSetTagConfig::findTag(StringData key,
- StringData value) const {
- int32_t keyIndex = _findKeyIndex(key);
- if (size_t(keyIndex) == _tagData.size())
- return ReplicaSetTag(-1, -1);
- const ValueVector& values = _tagData[keyIndex].second;
- for (size_t valueIndex = 0; valueIndex < values.size(); ++valueIndex) {
- if (values[valueIndex] == value) {
- return ReplicaSetTag(keyIndex, int32_t(valueIndex));
- }
- }
+ return isSatisfied();
+}
+
+bool ReplicaSetTagMatch::isSatisfied() const {
+ const std::vector<BoundTagValue>::const_iterator iter =
+ std::find_if(_boundTagValues.begin(),
+ _boundTagValues.end(),
+ stdx::bind(std::logical_not<bool>(),
+ stdx::bind(&BoundTagValue::isSatisfied, stdx::placeholders::_1)));
+ return iter == _boundTagValues.end();
+}
+
+bool ReplicaSetTagMatch::BoundTagValue::isSatisfied() const {
+ return constraint.getMinCount() <= int32_t(boundValues.size());
+}
+
+ReplicaSetTag ReplicaSetTagConfig::makeTag(StringData key, StringData value) {
+ int32_t keyIndex = _findKeyIndex(key);
+ if (size_t(keyIndex) == _tagData.size()) {
+ _tagData.push_back(make_pair(key.toString(), ValueVector()));
+ }
+ ValueVector& values = _tagData[keyIndex].second;
+ for (size_t valueIndex = 0; valueIndex < values.size(); ++valueIndex) {
+ if (values[valueIndex] != value)
+ continue;
+ return ReplicaSetTag(keyIndex, int32_t(valueIndex));
+ }
+ values.push_back(value.toString());
+ return ReplicaSetTag(keyIndex, int32_t(values.size()) - 1);
+}
+
+ReplicaSetTag ReplicaSetTagConfig::findTag(StringData key, StringData value) const {
+ int32_t keyIndex = _findKeyIndex(key);
+ if (size_t(keyIndex) == _tagData.size())
return ReplicaSetTag(-1, -1);
- }
-
- ReplicaSetTagPattern ReplicaSetTagConfig::makePattern() const {
- return ReplicaSetTagPattern();
- }
-
- Status ReplicaSetTagConfig::addTagCountConstraintToPattern(ReplicaSetTagPattern* pattern,
- StringData tagKey,
- int32_t minCount) const {
- int32_t keyIndex = _findKeyIndex(tagKey);
- if (size_t(keyIndex) == _tagData.size()) {
- return Status(ErrorCodes::NoSuchKey,
- str::stream() << "No replica set tag key " << tagKey << " in config");
- }
- pattern->addTagCountConstraint(keyIndex, minCount);
- return Status::OK();
- }
-
- int32_t ReplicaSetTagConfig::_findKeyIndex(StringData key) const {
- size_t i;
- for (i = 0; i < _tagData.size(); ++i) {
- if (_tagData[i].first == key) {
- break;
- }
- }
- return int32_t(i);
- }
-
- std::string ReplicaSetTagConfig::getTagKey(const ReplicaSetTag& tag) const {
- invariant(tag.isValid() && size_t(tag.getKeyIndex()) < _tagData.size());
- return _tagData[tag.getKeyIndex()].first;
- }
-
- std::string ReplicaSetTagConfig::getTagValue(const ReplicaSetTag& tag) const {
- invariant(tag.isValid() && size_t(tag.getKeyIndex()) < _tagData.size());
- const ValueVector& values = _tagData[tag.getKeyIndex()].second;
- invariant(tag.getValueIndex() >= 0 && size_t(tag.getValueIndex()) < values.size());
- return values[tag.getValueIndex()];
- }
-
- void ReplicaSetTagConfig::put(const ReplicaSetTag& tag, std::ostream& os) const {
- BSONObjBuilder builder;
- _appendTagKey(tag.getKeyIndex(), &builder);
- _appendTagValue(tag.getKeyIndex(), tag.getValueIndex(), &builder);
- os << builder.done();
- }
-
- void ReplicaSetTagConfig::put(const ReplicaSetTagPattern& pattern, std::ostream& os) const {
- BSONObjBuilder builder;
- BSONArrayBuilder allConstraintsBuilder(builder.subarrayStart("constraints"));
- for (ReplicaSetTagPattern::ConstraintIterator iter = pattern.constraintsBegin();
- iter != pattern.constraintsEnd();
- ++iter) {
-
- BSONObjBuilder constraintBuilder(allConstraintsBuilder.subobjStart());
- _appendConstraint(*iter, &constraintBuilder);
+ const ValueVector& values = _tagData[keyIndex].second;
+ for (size_t valueIndex = 0; valueIndex < values.size(); ++valueIndex) {
+ if (values[valueIndex] == value) {
+ return ReplicaSetTag(keyIndex, int32_t(valueIndex));
}
- allConstraintsBuilder.doneFast();
- os << builder.done();
}
-
- void ReplicaSetTagConfig::put(const ReplicaSetTagMatch& matcher, std::ostream& os) const {
- BSONObjBuilder builder;
- BSONArrayBuilder allBindingsBuilder(builder.subarrayStart("bindings"));
- for (size_t i = 0; i < matcher._boundTagValues.size(); ++i) {
-
- BSONObjBuilder bindingBuilder(allBindingsBuilder.subobjStart());
- _appendConstraint(matcher._boundTagValues[i].constraint, &bindingBuilder);
- BSONArrayBuilder boundValues(bindingBuilder.subarrayStart("boundValues"));
- for (size_t j = 0; j < matcher._boundTagValues[i].boundValues.size(); ++j) {
- BSONObjBuilder bvb(boundValues.subobjStart());
- _appendTagValue(matcher._boundTagValues[i].constraint.getKeyIndex(),
- matcher._boundTagValues[i].boundValues[j],
- &bvb);
- }
+ return ReplicaSetTag(-1, -1);
+}
+
+ReplicaSetTagPattern ReplicaSetTagConfig::makePattern() const {
+ return ReplicaSetTagPattern();
+}
+
+Status ReplicaSetTagConfig::addTagCountConstraintToPattern(ReplicaSetTagPattern* pattern,
+ StringData tagKey,
+ int32_t minCount) const {
+ int32_t keyIndex = _findKeyIndex(tagKey);
+ if (size_t(keyIndex) == _tagData.size()) {
+ return Status(ErrorCodes::NoSuchKey,
+ str::stream() << "No replica set tag key " << tagKey << " in config");
+ }
+ pattern->addTagCountConstraint(keyIndex, minCount);
+ return Status::OK();
+}
+
+int32_t ReplicaSetTagConfig::_findKeyIndex(StringData key) const {
+ size_t i;
+ for (i = 0; i < _tagData.size(); ++i) {
+ if (_tagData[i].first == key) {
+ break;
}
- allBindingsBuilder.doneFast();
- os << builder.done();
}
-
- void ReplicaSetTagConfig::_appendTagKey(int32_t keyIndex, BSONObjBuilder* builder) const {
- if (keyIndex < 0 || size_t(keyIndex) >= _tagData.size()) {
- builder->append("tagKey", int(keyIndex));
- }
- else {
- builder->append("tagKey", _tagData[keyIndex].first);
+ return int32_t(i);
+}
+
+std::string ReplicaSetTagConfig::getTagKey(const ReplicaSetTag& tag) const {
+ invariant(tag.isValid() && size_t(tag.getKeyIndex()) < _tagData.size());
+ return _tagData[tag.getKeyIndex()].first;
+}
+
+std::string ReplicaSetTagConfig::getTagValue(const ReplicaSetTag& tag) const {
+ invariant(tag.isValid() && size_t(tag.getKeyIndex()) < _tagData.size());
+ const ValueVector& values = _tagData[tag.getKeyIndex()].second;
+ invariant(tag.getValueIndex() >= 0 && size_t(tag.getValueIndex()) < values.size());
+ return values[tag.getValueIndex()];
+}
+
+void ReplicaSetTagConfig::put(const ReplicaSetTag& tag, std::ostream& os) const {
+ BSONObjBuilder builder;
+ _appendTagKey(tag.getKeyIndex(), &builder);
+ _appendTagValue(tag.getKeyIndex(), tag.getValueIndex(), &builder);
+ os << builder.done();
+}
+
+void ReplicaSetTagConfig::put(const ReplicaSetTagPattern& pattern, std::ostream& os) const {
+ BSONObjBuilder builder;
+ BSONArrayBuilder allConstraintsBuilder(builder.subarrayStart("constraints"));
+ for (ReplicaSetTagPattern::ConstraintIterator iter = pattern.constraintsBegin();
+ iter != pattern.constraintsEnd();
+ ++iter) {
+ BSONObjBuilder constraintBuilder(allConstraintsBuilder.subobjStart());
+ _appendConstraint(*iter, &constraintBuilder);
+ }
+ allConstraintsBuilder.doneFast();
+ os << builder.done();
+}
+
+void ReplicaSetTagConfig::put(const ReplicaSetTagMatch& matcher, std::ostream& os) const {
+ BSONObjBuilder builder;
+ BSONArrayBuilder allBindingsBuilder(builder.subarrayStart("bindings"));
+ for (size_t i = 0; i < matcher._boundTagValues.size(); ++i) {
+ BSONObjBuilder bindingBuilder(allBindingsBuilder.subobjStart());
+ _appendConstraint(matcher._boundTagValues[i].constraint, &bindingBuilder);
+ BSONArrayBuilder boundValues(bindingBuilder.subarrayStart("boundValues"));
+ for (size_t j = 0; j < matcher._boundTagValues[i].boundValues.size(); ++j) {
+ BSONObjBuilder bvb(boundValues.subobjStart());
+ _appendTagValue(matcher._boundTagValues[i].constraint.getKeyIndex(),
+ matcher._boundTagValues[i].boundValues[j],
+ &bvb);
}
}
-
- void ReplicaSetTagConfig::_appendTagValue(int32_t keyIndex,
- int32_t valueIndex,
- BSONObjBuilder* builder) const {
- if (keyIndex < 0 || size_t(keyIndex) >= _tagData.size()) {
- builder->append("tagValue", valueIndex);
- return;
- }
- KeyValueVector::const_reference keyEntry = _tagData[keyIndex];
- if (valueIndex < 0 || size_t(valueIndex) < keyEntry.second.size()) {
- builder->append("tagValue", valueIndex);
- }
- builder->append("tagValue", keyEntry.second[valueIndex]);
- }
-
- void ReplicaSetTagConfig::_appendConstraint(
- const ReplicaSetTagPattern::TagCountConstraint& constraint,
- BSONObjBuilder* builder) const {
-
- _appendTagKey(constraint.getKeyIndex(), builder);
- builder->append("minCount", int(constraint.getMinCount()));
- }
+ allBindingsBuilder.doneFast();
+ os << builder.done();
+}
+
+void ReplicaSetTagConfig::_appendTagKey(int32_t keyIndex, BSONObjBuilder* builder) const {
+ if (keyIndex < 0 || size_t(keyIndex) >= _tagData.size()) {
+ builder->append("tagKey", int(keyIndex));
+ } else {
+ builder->append("tagKey", _tagData[keyIndex].first);
+ }
+}
+
+void ReplicaSetTagConfig::_appendTagValue(int32_t keyIndex,
+ int32_t valueIndex,
+ BSONObjBuilder* builder) const {
+ if (keyIndex < 0 || size_t(keyIndex) >= _tagData.size()) {
+ builder->append("tagValue", valueIndex);
+ return;
+ }
+ KeyValueVector::const_reference keyEntry = _tagData[keyIndex];
+ if (valueIndex < 0 || size_t(valueIndex) < keyEntry.second.size()) {
+ builder->append("tagValue", valueIndex);
+ }
+ builder->append("tagValue", keyEntry.second[valueIndex]);
+}
+
+void ReplicaSetTagConfig::_appendConstraint(
+ const ReplicaSetTagPattern::TagCountConstraint& constraint, BSONObjBuilder* builder) const {
+ _appendTagKey(constraint.getKeyIndex(), builder);
+ builder->append("minCount", int(constraint.getMinCount()));
+}
} // namespace repl
diff --git a/src/mongo/db/repl/replica_set_tag.h b/src/mongo/db/repl/replica_set_tag.h
index 8c93f62cbc0..4227ec56956 100644
--- a/src/mongo/db/repl/replica_set_tag.h
+++ b/src/mongo/db/repl/replica_set_tag.h
@@ -38,265 +38,282 @@
#include "mongo/platform/cstdint.h"
namespace mongo {
- class BSONObjBuilder;
+class BSONObjBuilder;
namespace repl {
+/**
+ * Representation of a tag on a replica set node.
+ *
+ * Tags are only meaningful when used with a copy of the ReplicaSetTagConfig that
+ * created them.
+ */
+class ReplicaSetTag {
+public:
/**
- * Representation of a tag on a replica set node.
- *
- * Tags are only meaningful when used with a copy of the ReplicaSetTagConfig that
- * created them.
+ * Default constructor, produces an uninitialized tag.
*/
- class ReplicaSetTag {
- public:
- /**
- * Default constructor, produces an uninitialized tag.
- */
- ReplicaSetTag() {}
-
- /**
- * Constructs a tag with the given key and value indexes.
- * Do not call directly; used by ReplicaSetTagConfig.
- */
- ReplicaSetTag(int32_t keyIndex, int32_t valueIndex) :
- _keyIndex(keyIndex),
- _valueIndex(valueIndex) {}
-
- /**
- * Returns true if the tag is not explicitly invalid.
- */
- bool isValid() const { return _keyIndex >= 0; }
-
- /**
- * Gets the key index of the tag.
- */
- int32_t getKeyIndex() const { return _keyIndex; }
-
- /**
- * Gets the value index of the tag.
- */
- int32_t getValueIndex() const { return _valueIndex; }
-
- /**
- * Compares two tags from the *same* ReplicaSetTagConfig for equality.
- */
- bool operator==(const ReplicaSetTag& other) const;
-
- /**
- * Compares two tags from the *same* ReplicaSetTagConfig for inequality.
- */
- bool operator!=(const ReplicaSetTag& other) const;
+ ReplicaSetTag() {}
- private:
- // The index of the key in the associated ReplicaSetTagConfig.
- int32_t _keyIndex;
+ /**
+ * Constructs a tag with the given key and value indexes.
+ * Do not call directly; used by ReplicaSetTagConfig.
+ */
+ ReplicaSetTag(int32_t keyIndex, int32_t valueIndex)
+ : _keyIndex(keyIndex), _valueIndex(valueIndex) {}
- // The index of the value in the entry for the key in the associated ReplicaSetTagConfig.
- int32_t _valueIndex;
- };
+ /**
+ * Returns true if the tag is not explicitly invalid.
+ */
+ bool isValid() const {
+ return _keyIndex >= 0;
+ }
+
+ /**
+ * Gets the key index of the tag.
+ */
+ int32_t getKeyIndex() const {
+ return _keyIndex;
+ }
+
+ /**
+ * Gets the value index of the tag.
+ */
+ int32_t getValueIndex() const {
+ return _valueIndex;
+ }
+
+ /**
+ * Compares two tags from the *same* ReplicaSetTagConfig for equality.
+ */
+ bool operator==(const ReplicaSetTag& other) const;
+
+ /**
+ * Compares two tags from the *same* ReplicaSetTagConfig for inequality.
+ */
+ bool operator!=(const ReplicaSetTag& other) const;
+
+private:
+ // The index of the key in the associated ReplicaSetTagConfig.
+ int32_t _keyIndex;
+
+ // The index of the value in the entry for the key in the associated ReplicaSetTagConfig.
+ int32_t _valueIndex;
+};
+/**
+ * Representation of a tag matching pattern, like { "dc": 2, "rack": 3 }, of the form
+ * used for tagged replica set writes.
+ */
+class ReplicaSetTagPattern {
+public:
/**
- * Representation of a tag matching pattern, like { "dc": 2, "rack": 3 }, of the form
- * used for tagged replica set writes.
+ * Representation of a single tag's minimum count constraint in a pattern.
*/
- class ReplicaSetTagPattern {
+ class TagCountConstraint {
public:
- /**
- * Representation of a single tag's minimum count constraint in a pattern.
- */
- class TagCountConstraint {
- public:
- TagCountConstraint() {}
- TagCountConstraint(int32_t keyIndex, int32_t minCount);
- int32_t getKeyIndex() const { return _keyIndex; }
- int32_t getMinCount() const { return _minCount; }
- private:
- int32_t _keyIndex;
- int32_t _minCount;
- };
-
- typedef std::vector<TagCountConstraint>::const_iterator ConstraintIterator;
-
- /**
- * Adds a count constraint for the given key index with the given count.
- *
- * Do not call directly, but use the addTagCountConstraintToPattern method
- * of ReplicaSetTagConfig.
- */
- void addTagCountConstraint(int32_t keyIndex, int32_t minCount);
-
- /**
- * Gets the begin iterator over the constraints in this pattern.
- */
- ConstraintIterator constraintsBegin() const { return _constraints.begin(); }
-
- /**
- * Gets the end iterator over the constraints in this pattern.
- */
- ConstraintIterator constraintsEnd() const { return _constraints.end(); }
+ TagCountConstraint() {}
+ TagCountConstraint(int32_t keyIndex, int32_t minCount);
+ int32_t getKeyIndex() const {
+ return _keyIndex;
+ }
+ int32_t getMinCount() const {
+ return _minCount;
+ }
private:
- std::vector<TagCountConstraint> _constraints;
+ int32_t _keyIndex;
+ int32_t _minCount;
};
+ typedef std::vector<TagCountConstraint>::const_iterator ConstraintIterator;
+
/**
- * State object for progressive detection of ReplicaSetTagPattern constraint satisfaction.
+ * Adds a count constraint for the given key index with the given count.
*
- * This is an abstraction of the replica set write tag satisfaction problem.
+ * Do not call directly, but use the addTagCountConstraintToPattern method
+ * of ReplicaSetTagConfig.
+ */
+ void addTagCountConstraint(int32_t keyIndex, int32_t minCount);
+
+ /**
+ * Gets the begin iterator over the constraints in this pattern.
+ */
+ ConstraintIterator constraintsBegin() const {
+ return _constraints.begin();
+ }
+
+ /**
+ * Gets the end iterator over the constraints in this pattern.
+ */
+ ConstraintIterator constraintsEnd() const {
+ return _constraints.end();
+ }
+
+private:
+ std::vector<TagCountConstraint> _constraints;
+};
+
+/**
+ * State object for progressive detection of ReplicaSetTagPattern constraint satisfaction.
+ *
+ * This is an abstraction of the replica set write tag satisfaction problem.
+ *
+ * Replica set tag matching is an event-driven constraint satisfaction process. This type
+ * represents the state of that process. It is initialized from a pattern object, then
+ * progressively updated with tags. After processing a sequence of tags sufficient to satisfy
+ * the pattern, isSatisfied() becomes true.
+ */
+class ReplicaSetTagMatch {
+ friend class ReplicaSetTagConfig;
+
+public:
+ /**
+ * Constructs an empty match object, equivalent to one that matches an
+ * empty pattern.
+ */
+ ReplicaSetTagMatch() {}
+
+ /**
+ * Constructs a clean match object for the given pattern.
+ */
+ explicit ReplicaSetTagMatch(const ReplicaSetTagPattern& pattern);
+
+ /**
+ * Updates the match state based on the data for the given tag.
*
- * Replica set tag matching is an event-driven constraint satisfaction process. This type
- * represents the state of that process. It is initialized from a pattern object, then
- * progressively updated with tags. After processing a sequence of tags sufficient to satisfy
- * the pattern, isSatisfied() becomes true.
+ * Returns true if, after this update, isSatisfied() is true.
*/
- class ReplicaSetTagMatch {
- friend class ReplicaSetTagConfig;
- public:
- /**
- * Constructs an empty match object, equivalent to one that matches an
- * empty pattern.
- */
- ReplicaSetTagMatch() {}
-
- /**
- * Constructs a clean match object for the given pattern.
- */
- explicit ReplicaSetTagMatch(const ReplicaSetTagPattern& pattern);
-
- /**
- * Updates the match state based on the data for the given tag.
- *
- * Returns true if, after this update, isSatisfied() is true.
- */
- bool update(const ReplicaSetTag& tag);
-
- /**
- * Returns true if the match has received a sequence of tags sufficient to satisfy the
- * pattern.
- */
+ bool update(const ReplicaSetTag& tag);
+
+ /**
+ * Returns true if the match has received a sequence of tags sufficient to satisfy the
+ * pattern.
+ */
+ bool isSatisfied() const;
+
+private:
+ /**
+ * Representation of the state related to a single tag key in the match pattern.
+ * Consists of a constraint (key index and min count for satisfaction) and a list
+ * of already observed values.
+ *
+ * A BoundTagValue is satisfied when the size of boundValues is at least
+ * constraint.getMinCount().
+ */
+ struct BoundTagValue {
+ BoundTagValue() {}
+ explicit BoundTagValue(const ReplicaSetTagPattern::TagCountConstraint& aConstraint)
+ : constraint(aConstraint) {}
+
+ int32_t getKeyIndex() const {
+ return constraint.getKeyIndex();
+ }
bool isSatisfied() const;
- private:
- /**
- * Representation of the state related to a single tag key in the match pattern.
- * Consists of a constraint (key index and min count for satisfaction) and a list
- * of already observed values.
- *
- * A BoundTagValue is satisfied when the size of boundValues is at least
- * constraint.getMinCount().
- */
- struct BoundTagValue {
- BoundTagValue() {}
- explicit BoundTagValue(const ReplicaSetTagPattern::TagCountConstraint& aConstraint) :
- constraint(aConstraint) {}
-
- int32_t getKeyIndex() const { return constraint.getKeyIndex(); }
- bool isSatisfied() const;
-
- ReplicaSetTagPattern::TagCountConstraint constraint;
- std::vector<int32_t> boundValues;
- };
- std::vector<BoundTagValue> _boundTagValues;
+ ReplicaSetTagPattern::TagCountConstraint constraint;
+ std::vector<int32_t> boundValues;
};
+ std::vector<BoundTagValue> _boundTagValues;
+};
+
+/**
+ * Representation of the tag configuration information for a replica set.
+ *
+ * This type, like all in this file, is copyable. Tags and patterns from one instance of this
+ * class are compatible with other instances of this class that are *copies* of the original
+ * instance.
+ */
+class ReplicaSetTagConfig {
+public:
+ /**
+ * Finds or allocates a tag with the given "key" and "value" strings.
+ */
+ ReplicaSetTag makeTag(StringData key, StringData value);
+
+ /**
+ * Finds a tag with the given key and value strings, or returns a tag whose isValid() method
+ * returns false if the configuration has never allocated such a tag via makeTag().
+ */
+ ReplicaSetTag findTag(StringData key, StringData value) const;
+
+ /**
+ * Makes a new, empty pattern object.
+ */
+ ReplicaSetTagPattern makePattern() const;
/**
- * Representation of the tag configuration information for a replica set.
+ * Adds a constraint clause to the given "pattern". This particular
+ * constraint requires that at least "minCount" distinct tags with the given "tagKey"
+ * be observed. Two tags "t1" and "t2" are distinct if "t1 != t2", so this constraint
+ * means that we must see at least "minCount" tags with the specified "tagKey".
+ */
+ Status addTagCountConstraintToPattern(ReplicaSetTagPattern* pattern,
+ StringData tagKey,
+ int32_t minCount) const;
+
+ /**
+ * Gets the string key for the given "tag".
*
- * This type, like all in this file, is copyable. Tags and patterns from one instance of this
- * class are compatible with other instances of this class that are *copies* of the original
- * instance.
+ * Behavior is undefined if "tag" is not valid or was not from this
+ * config or one of its copies.
*/
- class ReplicaSetTagConfig {
- public:
- /**
- * Finds or allocates a tag with the given "key" and "value" strings.
- */
- ReplicaSetTag makeTag(StringData key, StringData value);
-
- /**
- * Finds a tag with the given key and value strings, or returns a tag whose isValid() method
- * returns false if the configuration has never allocated such a tag via makeTag().
- */
- ReplicaSetTag findTag(StringData key, StringData value) const;
-
- /**
- * Makes a new, empty pattern object.
- */
- ReplicaSetTagPattern makePattern() const;
-
- /**
- * Adds a constraint clause to the given "pattern". This particular
- * constraint requires that at least "minCount" distinct tags with the given "tagKey"
- * be observed. Two tags "t1" and "t2" are distinct if "t1 != t2", so this constraint
- * means that we must see at least "minCount" tags with the specified "tagKey".
- */
- Status addTagCountConstraintToPattern(ReplicaSetTagPattern* pattern,
- StringData tagKey,
- int32_t minCount) const;
-
- /**
- * Gets the string key for the given "tag".
- *
- * Behavior is undefined if "tag" is not valid or was not from this
- * config or one of its copies.
- */
- std::string getTagKey(const ReplicaSetTag& tag) const;
-
- /**
- * Gets the string value for the given "tag".
- *
- * Like getTagKey, above, behavior is undefined if "tag" is not valid or was not from this
- * config or one of its copies.
- */
- std::string getTagValue(const ReplicaSetTag& tag) const;
-
- /**
- * Helper that writes a string debugging representation of "tag" to "os".
- */
- void put(const ReplicaSetTag& tag, std::ostream& os) const;
-
- /**
- * Helper that writes a string debugging representation of "pattern" to "os".
- */
- void put(const ReplicaSetTagPattern& pattern, std::ostream& os) const;
-
- /**
- * Helper that writes a string debugging representation of "matcher" to "os".
- */
- void put(const ReplicaSetTagMatch& matcher, std::ostream& os) const;
+ std::string getTagKey(const ReplicaSetTag& tag) const;
- private:
- typedef std::vector<std::string> ValueVector;
- typedef std::vector<std::pair<std::string, ValueVector> > KeyValueVector;
-
- /**
- * Returns the index corresponding to "key", or _tagData.size() if there is no
- * such index.
- */
- int32_t _findKeyIndex(StringData key) const;
-
- /**
- * Helper that writes a "tagKey" field for the given "keyIndex" to "builder".
- */
- void _appendTagKey(int32_t keyIndex, BSONObjBuilder* builder) const;
-
- /**
- * Helper that writes a "tagValue" field for the given "keyIndex" and "valueIndex"
- * to "builder".
- */
- void _appendTagValue(int32_t keyIndex, int32_t valueIndex, BSONObjBuilder* builder) const;
-
- /**
- * Helper that writes a constraint object to "builder".
- */
- void _appendConstraint(const ReplicaSetTagPattern::TagCountConstraint& constraint,
- BSONObjBuilder* builder) const;
-
- // Data about known tags. Conceptually, it maps between keys and their indexes,
- // keys and their associated values, and (key, value) pairs and the values' indexes.
- KeyValueVector _tagData;
- };
+ /**
+ * Gets the string value for the given "tag".
+ *
+ * Like getTagKey, above, behavior is undefined if "tag" is not valid or was not from this
+ * config or one of its copies.
+ */
+ std::string getTagValue(const ReplicaSetTag& tag) const;
+
+ /**
+ * Helper that writes a string debugging representation of "tag" to "os".
+ */
+ void put(const ReplicaSetTag& tag, std::ostream& os) const;
+
+ /**
+ * Helper that writes a string debugging representation of "pattern" to "os".
+ */
+ void put(const ReplicaSetTagPattern& pattern, std::ostream& os) const;
+
+ /**
+ * Helper that writes a string debugging representation of "matcher" to "os".
+ */
+ void put(const ReplicaSetTagMatch& matcher, std::ostream& os) const;
+
+private:
+ typedef std::vector<std::string> ValueVector;
+ typedef std::vector<std::pair<std::string, ValueVector>> KeyValueVector;
+
+ /**
+ * Returns the index corresponding to "key", or _tagData.size() if there is no
+ * such index.
+ */
+ int32_t _findKeyIndex(StringData key) const;
+
+ /**
+ * Helper that writes a "tagKey" field for the given "keyIndex" to "builder".
+ */
+ void _appendTagKey(int32_t keyIndex, BSONObjBuilder* builder) const;
+
+ /**
+ * Helper that writes a "tagValue" field for the given "keyIndex" and "valueIndex"
+ * to "builder".
+ */
+ void _appendTagValue(int32_t keyIndex, int32_t valueIndex, BSONObjBuilder* builder) const;
+
+ /**
+ * Helper that writes a constraint object to "builder".
+ */
+ void _appendConstraint(const ReplicaSetTagPattern::TagCountConstraint& constraint,
+ BSONObjBuilder* builder) const;
+
+ // Data about known tags. Conceptually, it maps between keys and their indexes,
+ // keys and their associated values, and (key, value) pairs and the values' indexes.
+ KeyValueVector _tagData;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replica_set_tag_test.cpp b/src/mongo/db/repl/replica_set_tag_test.cpp
index 1a2bdf9e120..1d70ee39bbe 100644
--- a/src/mongo/db/repl/replica_set_tag_test.cpp
+++ b/src/mongo/db/repl/replica_set_tag_test.cpp
@@ -33,129 +33,129 @@ namespace mongo {
namespace repl {
namespace {
- template <typename T>
- class StreamPutter {
- public:
- StreamPutter(const ReplicaSetTagConfig& tagConfig, const T& item) :
- _tagConfig(&tagConfig), _item(&item) {}
- void put(std::ostream& os) const {
- _tagConfig->put(*_item, os);
- }
-
- private:
- const ReplicaSetTagConfig* _tagConfig;
- const T* _item;
- };
-
- template <typename T>
- StreamPutter<T> streamput(const ReplicaSetTagConfig& tagConfig, const T& item) {
- return StreamPutter<T>(tagConfig, item);
+template <typename T>
+class StreamPutter {
+public:
+ StreamPutter(const ReplicaSetTagConfig& tagConfig, const T& item)
+ : _tagConfig(&tagConfig), _item(&item) {}
+ void put(std::ostream& os) const {
+ _tagConfig->put(*_item, os);
}
- template <typename T>
- std::ostream& operator<<(std::ostream& os, const StreamPutter<T>& putter) {
- putter.put(os);
- return os;
+private:
+ const ReplicaSetTagConfig* _tagConfig;
+ const T* _item;
+};
+
+template <typename T>
+StreamPutter<T> streamput(const ReplicaSetTagConfig& tagConfig, const T& item) {
+ return StreamPutter<T>(tagConfig, item);
+}
+
+template <typename T>
+std::ostream& operator<<(std::ostream& os, const StreamPutter<T>& putter) {
+ putter.put(os);
+ return os;
+}
+
+TEST(ReplicaSetTagConfigTest, MakeAndFindTags) {
+ ReplicaSetTagConfig tagConfig;
+ ReplicaSetTag dcNY = tagConfig.makeTag("dc", "ny");
+ ReplicaSetTag dcRI = tagConfig.makeTag("dc", "ri");
+ ReplicaSetTag rack1 = tagConfig.makeTag("rack", "1");
+ ReplicaSetTag rack2 = tagConfig.makeTag("rack", "2");
+ ASSERT_TRUE(dcNY.isValid());
+ ASSERT_EQUALS("dc", tagConfig.getTagKey(dcNY));
+ ASSERT_EQUALS("ny", tagConfig.getTagValue(dcNY));
+ ASSERT_EQUALS("dc", tagConfig.getTagKey(dcRI));
+ ASSERT_EQUALS("ri", tagConfig.getTagValue(dcRI));
+ ASSERT_EQUALS("rack", tagConfig.getTagKey(rack1));
+ ASSERT_EQUALS("1", tagConfig.getTagValue(rack1));
+ ASSERT_EQUALS("rack", tagConfig.getTagKey(rack2));
+ ASSERT_EQUALS("2", tagConfig.getTagValue(rack2));
+
+ ASSERT_EQUALS(rack1.getKeyIndex(), rack2.getKeyIndex());
+ ASSERT_NOT_EQUALS(rack1.getKeyIndex(), dcRI.getKeyIndex());
+ ASSERT_NOT_EQUALS(rack1.getValueIndex(), rack2.getValueIndex());
+
+ ASSERT_TRUE(rack1 == tagConfig.makeTag("rack", "1"));
+ ASSERT_TRUE(rack1 == tagConfig.findTag("rack", "1"));
+ ASSERT_FALSE(tagConfig.findTag("rack", "7").isValid());
+ ASSERT_FALSE(tagConfig.findTag("country", "us").isValid());
+}
+
+class ReplicaSetTagMatchTest : public unittest::Test {
+public:
+ void setUp() {
+ dcNY = tagConfig.makeTag("dc", "ny");
+ dcVA = tagConfig.makeTag("dc", "va");
+ dcRI = tagConfig.makeTag("dc", "ri");
+ rack1 = tagConfig.makeTag("rack", "1");
+ rack2 = tagConfig.makeTag("rack", "2");
+ rack3 = tagConfig.makeTag("rack", "3");
+ rack4 = tagConfig.makeTag("rack", "4");
}
- TEST(ReplicaSetTagConfigTest, MakeAndFindTags) {
- ReplicaSetTagConfig tagConfig;
- ReplicaSetTag dcNY = tagConfig.makeTag("dc", "ny");
- ReplicaSetTag dcRI = tagConfig.makeTag("dc", "ri");
- ReplicaSetTag rack1 = tagConfig.makeTag("rack", "1");
- ReplicaSetTag rack2 = tagConfig.makeTag("rack", "2");
- ASSERT_TRUE(dcNY.isValid());
- ASSERT_EQUALS("dc", tagConfig.getTagKey(dcNY));
- ASSERT_EQUALS("ny", tagConfig.getTagValue(dcNY));
- ASSERT_EQUALS("dc", tagConfig.getTagKey(dcRI));
- ASSERT_EQUALS("ri", tagConfig.getTagValue(dcRI));
- ASSERT_EQUALS("rack", tagConfig.getTagKey(rack1));
- ASSERT_EQUALS("1", tagConfig.getTagValue(rack1));
- ASSERT_EQUALS("rack", tagConfig.getTagKey(rack2));
- ASSERT_EQUALS("2", tagConfig.getTagValue(rack2));
-
- ASSERT_EQUALS(rack1.getKeyIndex(), rack2.getKeyIndex());
- ASSERT_NOT_EQUALS(rack1.getKeyIndex(), dcRI.getKeyIndex());
- ASSERT_NOT_EQUALS(rack1.getValueIndex(), rack2.getValueIndex());
-
- ASSERT_TRUE(rack1 == tagConfig.makeTag("rack", "1"));
- ASSERT_TRUE(rack1 == tagConfig.findTag("rack", "1"));
- ASSERT_FALSE(tagConfig.findTag("rack", "7").isValid());
- ASSERT_FALSE(tagConfig.findTag("country", "us").isValid());
- }
-
- class ReplicaSetTagMatchTest : public unittest::Test {
- public:
- void setUp() {
- dcNY = tagConfig.makeTag("dc", "ny");
- dcVA = tagConfig.makeTag("dc", "va");
- dcRI = tagConfig.makeTag("dc", "ri");
- rack1 = tagConfig.makeTag("rack", "1");
- rack2 = tagConfig.makeTag("rack", "2");
- rack3 = tagConfig.makeTag("rack", "3");
- rack4 = tagConfig.makeTag("rack", "4");
- }
-
- protected:
- ReplicaSetTagConfig tagConfig;
- ReplicaSetTag dcNY;
- ReplicaSetTag dcVA;
- ReplicaSetTag dcRI;
- ReplicaSetTag rack1;
- ReplicaSetTag rack2;
- ReplicaSetTag rack3;
- ReplicaSetTag rack4;
- };
-
- TEST_F(ReplicaSetTagMatchTest, EmptyPatternAlwaysSatisfied) {
- ReplicaSetTagPattern pattern = tagConfig.makePattern();
- ASSERT_TRUE(ReplicaSetTagMatch(pattern).isSatisfied());
- ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "dc", 0));
- ASSERT_TRUE(ReplicaSetTagMatch(pattern).isSatisfied());
- }
-
- TEST_F(ReplicaSetTagMatchTest, SingleTagConstraint) {
- ReplicaSetTagPattern pattern = tagConfig.makePattern();
- ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "dc", 2));
- ReplicaSetTagMatch matcher(pattern);
- ASSERT_FALSE(matcher.isSatisfied());
- ASSERT_FALSE(matcher.update(dcVA)); // One DC alone won't satisfy "dc: 2".
- ASSERT_FALSE(matcher.update(rack2)); // Adding one rack won't satisfy.
- ASSERT_FALSE(matcher.update(rack3)); // Two racks won't satisfy "dc: 2".
- ASSERT_FALSE(matcher.update(dcVA)); // Same tag twice won't satisfy.
- ASSERT_TRUE(matcher.update(dcRI)); // Two DCs satisfies.
- ASSERT_TRUE(matcher.isSatisfied());
- ASSERT_TRUE(matcher.update(dcNY)); // Three DCs satisfies.
- ASSERT_TRUE(matcher.update(rack1)); // Once matcher is satisfied, it stays satisfied.
- }
-
- TEST_F(ReplicaSetTagMatchTest, MaskingConstraints) {
- // The highest count constraint for a tag key is the only one that matters.
- ReplicaSetTagPattern pattern = tagConfig.makePattern();
- ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "rack", 2));
- ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "rack", 3));
- ReplicaSetTagMatch matcher(pattern);
- ASSERT_FALSE(matcher.isSatisfied());
- ASSERT_FALSE(matcher.update(rack2));
- ASSERT_FALSE(matcher.update(rack3));
- ASSERT_FALSE(matcher.update(rack2));
- ASSERT_TRUE(matcher.update(rack1));
- }
-
- TEST_F(ReplicaSetTagMatchTest, MultipleConstraints) {
- ReplicaSetTagPattern pattern = tagConfig.makePattern();
- ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "dc", 3));
- ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "rack", 2));
- ReplicaSetTagMatch matcher(pattern);
- ASSERT_FALSE(matcher.isSatisfied());
- ASSERT_FALSE(matcher.update(dcVA));
- ASSERT_FALSE(matcher.update(rack2));
- ASSERT_FALSE(matcher.update(rack3));
- ASSERT_FALSE(matcher.update(dcVA));
- ASSERT_FALSE(matcher.update(dcRI));
- ASSERT_TRUE(matcher.update(dcNY));
- ASSERT_TRUE(matcher.isSatisfied());
- }
+protected:
+ ReplicaSetTagConfig tagConfig;
+ ReplicaSetTag dcNY;
+ ReplicaSetTag dcVA;
+ ReplicaSetTag dcRI;
+ ReplicaSetTag rack1;
+ ReplicaSetTag rack2;
+ ReplicaSetTag rack3;
+ ReplicaSetTag rack4;
+};
+
+TEST_F(ReplicaSetTagMatchTest, EmptyPatternAlwaysSatisfied) {
+ ReplicaSetTagPattern pattern = tagConfig.makePattern();
+ ASSERT_TRUE(ReplicaSetTagMatch(pattern).isSatisfied());
+ ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "dc", 0));
+ ASSERT_TRUE(ReplicaSetTagMatch(pattern).isSatisfied());
+}
+
+TEST_F(ReplicaSetTagMatchTest, SingleTagConstraint) {
+ ReplicaSetTagPattern pattern = tagConfig.makePattern();
+ ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "dc", 2));
+ ReplicaSetTagMatch matcher(pattern);
+ ASSERT_FALSE(matcher.isSatisfied());
+ ASSERT_FALSE(matcher.update(dcVA)); // One DC alone won't satisfy "dc: 2".
+ ASSERT_FALSE(matcher.update(rack2)); // Adding one rack won't satisfy.
+ ASSERT_FALSE(matcher.update(rack3)); // Two racks won't satisfy "dc: 2".
+ ASSERT_FALSE(matcher.update(dcVA)); // Same tag twice won't satisfy.
+ ASSERT_TRUE(matcher.update(dcRI)); // Two DCs satisfies.
+ ASSERT_TRUE(matcher.isSatisfied());
+ ASSERT_TRUE(matcher.update(dcNY)); // Three DCs satisfies.
+ ASSERT_TRUE(matcher.update(rack1)); // Once matcher is satisfied, it stays satisfied.
+}
+
+TEST_F(ReplicaSetTagMatchTest, MaskingConstraints) {
+ // The highest count constraint for a tag key is the only one that matters.
+ ReplicaSetTagPattern pattern = tagConfig.makePattern();
+ ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "rack", 2));
+ ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "rack", 3));
+ ReplicaSetTagMatch matcher(pattern);
+ ASSERT_FALSE(matcher.isSatisfied());
+ ASSERT_FALSE(matcher.update(rack2));
+ ASSERT_FALSE(matcher.update(rack3));
+ ASSERT_FALSE(matcher.update(rack2));
+ ASSERT_TRUE(matcher.update(rack1));
+}
+
+TEST_F(ReplicaSetTagMatchTest, MultipleConstraints) {
+ ReplicaSetTagPattern pattern = tagConfig.makePattern();
+ ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "dc", 3));
+ ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "rack", 2));
+ ReplicaSetTagMatch matcher(pattern);
+ ASSERT_FALSE(matcher.isSatisfied());
+ ASSERT_FALSE(matcher.update(dcVA));
+ ASSERT_FALSE(matcher.update(rack2));
+ ASSERT_FALSE(matcher.update(rack3));
+ ASSERT_FALSE(matcher.update(dcVA));
+ ASSERT_FALSE(matcher.update(dcRI));
+ ASSERT_TRUE(matcher.update(dcNY));
+ ASSERT_TRUE(matcher.isSatisfied());
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/replication_coordinator.cpp b/src/mongo/db/repl/replication_coordinator.cpp
index 8d1ab039fea..f7c3c200245 100644
--- a/src/mongo/db/repl/replication_coordinator.cpp
+++ b/src/mongo/db/repl/replication_coordinator.cpp
@@ -36,29 +36,29 @@ namespace repl {
namespace {
- const auto getReplicationCoordinator =
- ServiceContext::declareDecoration<std::unique_ptr<ReplicationCoordinator>>();
+const auto getReplicationCoordinator =
+ ServiceContext::declareDecoration<std::unique_ptr<ReplicationCoordinator>>();
}
- ReplicationCoordinator::ReplicationCoordinator() {}
- ReplicationCoordinator::~ReplicationCoordinator() {}
+ReplicationCoordinator::ReplicationCoordinator() {}
+ReplicationCoordinator::~ReplicationCoordinator() {}
- // TODO(dannenberg) remove when master slave is removed
- const char *replAllDead = 0;
+// TODO(dannenberg) remove when master slave is removed
+const char* replAllDead = 0;
- ReplicationCoordinator* ReplicationCoordinator::get(ServiceContext* service) {
- return getReplicationCoordinator(service).get();
- }
+ReplicationCoordinator* ReplicationCoordinator::get(ServiceContext* service) {
+ return getReplicationCoordinator(service).get();
+}
- ReplicationCoordinator* ReplicationCoordinator::get(ServiceContext& service) {
- return getReplicationCoordinator(service).get();
- }
+ReplicationCoordinator* ReplicationCoordinator::get(ServiceContext& service) {
+ return getReplicationCoordinator(service).get();
+}
- void ReplicationCoordinator::set(ServiceContext* service,
- std::unique_ptr<ReplicationCoordinator> replCoord) {
- auto& coordinator = getReplicationCoordinator(service);
- coordinator = std::move(replCoord);
- }
+void ReplicationCoordinator::set(ServiceContext* service,
+ std::unique_ptr<ReplicationCoordinator> replCoord) {
+ auto& coordinator = getReplicationCoordinator(service);
+ coordinator = std::move(replCoord);
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index 44d78da3374..9bc7179bb90 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -40,617 +40,605 @@
namespace mongo {
- class BSONObj;
- class BSONObjBuilder;
- class IndexDescriptor;
- class NamespaceString;
- class OperationContext;
- class ServiceContext;
- class Timestamp;
- struct WriteConcernOptions;
+class BSONObj;
+class BSONObjBuilder;
+class IndexDescriptor;
+class NamespaceString;
+class OperationContext;
+class ServiceContext;
+class Timestamp;
+struct WriteConcernOptions;
namespace repl {
- class BackgroundSync;
- class HandshakeArgs;
- class IsMasterResponse;
- class OplogReader;
- class OpTime;
- class ReadAfterOpTimeArgs;
- class ReadAfterOpTimeResponse;
- class ReplSetDeclareElectionWinnerArgs;
- class ReplSetDeclareElectionWinnerResponse;
- class ReplSetHeartbeatArgs;
- class ReplSetHeartbeatArgsV1;
- class ReplSetHeartbeatResponse;
- class ReplSetHtmlSummary;
- class ReplSetRequestVotesArgs;
- class ReplSetRequestVotesResponse;
- class ReplicaSetConfig;
- class UpdatePositionArgs;
-
- /**
- * Global variable that contains a std::string telling why master/slave halted
+class BackgroundSync;
+class HandshakeArgs;
+class IsMasterResponse;
+class OplogReader;
+class OpTime;
+class ReadAfterOpTimeArgs;
+class ReadAfterOpTimeResponse;
+class ReplSetDeclareElectionWinnerArgs;
+class ReplSetDeclareElectionWinnerResponse;
+class ReplSetHeartbeatArgs;
+class ReplSetHeartbeatArgsV1;
+class ReplSetHeartbeatResponse;
+class ReplSetHtmlSummary;
+class ReplSetRequestVotesArgs;
+class ReplSetRequestVotesResponse;
+class ReplicaSetConfig;
+class UpdatePositionArgs;
+
+/**
+ * Global variable that contains a std::string telling why master/slave halted
+ *
+ * "dead" means something really bad happened like replication falling completely out of sync.
+ * when non-null, we are dead and the string is informational
+ *
+ * TODO(dannenberg) remove when master slave goes
+ */
+extern const char* replAllDead;
+
+/**
+ * The ReplicationCoordinator is responsible for coordinating the interaction of replication
+ * with the rest of the system. The public methods on ReplicationCoordinator are the public
+ * API that the replication subsystem presents to the rest of the codebase.
+ */
+class ReplicationCoordinator : public ReplicationProgressManager {
+ MONGO_DISALLOW_COPYING(ReplicationCoordinator);
+
+public:
+ static ReplicationCoordinator* get(ServiceContext* service);
+ static ReplicationCoordinator* get(ServiceContext& service);
+ static void set(ServiceContext* service,
+ std::unique_ptr<ReplicationCoordinator> replCoordinator);
+
+ struct StatusAndDuration {
+ public:
+ Status status;
+ Milliseconds duration;
+
+ StatusAndDuration(const Status& stat, Milliseconds ms) : status(stat), duration(ms) {}
+ };
+
+ virtual ~ReplicationCoordinator();
+
+ /**
+ * Does any initial bookkeeping needed to start replication, and instructs the other
+ * components of the replication system to start up whatever threads and do whatever
+ * initialization they need.
+ */
+ virtual void startReplication(OperationContext* txn) = 0;
+
+ /**
+ * Does whatever cleanup is required to stop replication, including instructing the other
+ * components of the replication system to shut down and stop any threads they are using,
+ * blocking until all replication-related shutdown tasks are complete.
+ */
+ virtual void shutdown() = 0;
+
+ /**
+ * Returns a reference to the parsed command line arguments that are related to replication.
+ */
+ virtual const ReplSettings& getSettings() const = 0;
+
+ enum Mode { modeNone = 0, modeReplSet, modeMasterSlave };
+
+ /**
+ * Returns a value indicating whether this node was configured at start-up to run
+ * standalone, as part of a master-slave pair, or as a member of a replica set.
+ */
+ virtual Mode getReplicationMode() const = 0;
+
+ /**
+ * Returns true if this node is configured to be a member of a replica set or master/slave
+ * setup.
+ */
+ virtual bool isReplEnabled() const = 0;
+
+ /**
+ * Returns the current replica set state of this node (PRIMARY, SECONDARY, STARTUP, etc).
+ * It is invalid to call this unless getReplicationMode() == modeReplSet.
+ */
+ virtual MemberState getMemberState() const = 0;
+
+ /**
+ * Returns true if this node is in state PRIMARY or SECONDARY.
*
- * "dead" means something really bad happened like replication falling completely out of sync.
- * when non-null, we are dead and the string is informational
+ * It is invalid to call this unless getReplicationMode() == modeReplSet.
*
- * TODO(dannenberg) remove when master slave goes
+ * This method may be optimized to reduce synchronization overhead compared to
+ * reading the current member state with getMemberState().
*/
- extern const char *replAllDead;
-
+ virtual bool isInPrimaryOrSecondaryState() const = 0;
+
+
/**
- * The ReplicationCoordinator is responsible for coordinating the interaction of replication
- * with the rest of the system. The public methods on ReplicationCoordinator are the public
- * API that the replication subsystem presents to the rest of the codebase.
+ * Returns how slave delayed this node is configured to be.
+ *
+ * Raises a DBException if this node is not a member of the current replica set
+ * configuration.
*/
- class ReplicationCoordinator : public ReplicationProgressManager {
- MONGO_DISALLOW_COPYING(ReplicationCoordinator);
+ virtual Seconds getSlaveDelaySecs() const = 0;
- public:
- static ReplicationCoordinator* get(ServiceContext* service);
- static ReplicationCoordinator* get(ServiceContext& service);
- static void set(ServiceContext* service,
- std::unique_ptr<ReplicationCoordinator> replCoordinator);
-
- struct StatusAndDuration {
- public:
- Status status;
- Milliseconds duration;
-
- StatusAndDuration(const Status& stat, Milliseconds ms) : status(stat),
- duration(ms) {}
- };
-
- virtual ~ReplicationCoordinator();
-
- /**
- * Does any initial bookkeeping needed to start replication, and instructs the other
- * components of the replication system to start up whatever threads and do whatever
- * initialization they need.
- */
- virtual void startReplication(OperationContext* txn) = 0;
-
- /**
- * Does whatever cleanup is required to stop replication, including instructing the other
- * components of the replication system to shut down and stop any threads they are using,
- * blocking until all replication-related shutdown tasks are complete.
- */
- virtual void shutdown() = 0;
-
- /**
- * Returns a reference to the parsed command line arguments that are related to replication.
- */
- virtual const ReplSettings& getSettings() const = 0;
-
- enum Mode {
- modeNone = 0,
- modeReplSet,
- modeMasterSlave
- };
-
- /**
- * Returns a value indicating whether this node was configured at start-up to run
- * standalone, as part of a master-slave pair, or as a member of a replica set.
- */
- virtual Mode getReplicationMode() const = 0;
-
- /**
- * Returns true if this node is configured to be a member of a replica set or master/slave
- * setup.
- */
- virtual bool isReplEnabled() const = 0;
-
- /**
- * Returns the current replica set state of this node (PRIMARY, SECONDARY, STARTUP, etc).
- * It is invalid to call this unless getReplicationMode() == modeReplSet.
- */
- virtual MemberState getMemberState() const = 0;
-
- /**
- * Returns true if this node is in state PRIMARY or SECONDARY.
- *
- * It is invalid to call this unless getReplicationMode() == modeReplSet.
- *
- * This method may be optimized to reduce synchronization overhead compared to
- * reading the current member state with getMemberState().
- */
- virtual bool isInPrimaryOrSecondaryState() const = 0;
-
-
- /**
- * Returns how slave delayed this node is configured to be.
- *
- * Raises a DBException if this node is not a member of the current replica set
- * configuration.
- */
- virtual Seconds getSlaveDelaySecs() const = 0;
-
- /**
- * Clears the list of sync sources we have blacklisted.
- */
- virtual void clearSyncSourceBlacklist() = 0;
-
- /**
- * Blocks the calling thread for up to writeConcern.wTimeout millis, or until "opTime" has
- * been replicated to at least a set of nodes that satisfies the writeConcern, whichever
- * comes first. A writeConcern.wTimeout of 0 indicates no timeout (block forever) and a
- * writeConcern.wTimeout of -1 indicates return immediately after checking. Return codes:
- * ErrorCodes::WriteConcernFailed if the writeConcern.wTimeout is reached before
- * the data has been sufficiently replicated
- * ErrorCodes::ExceededTimeLimit if the txn->getMaxTimeMicrosRemaining is reached before
- * the data has been sufficiently replicated
- * ErrorCodes::NotMaster if the node is not Primary/Master
- * ErrorCodes::UnknownReplWriteConcern if the writeConcern.wMode contains a write concern
- * mode that is not known
- * ErrorCodes::ShutdownInProgress if we are mid-shutdown
- * ErrorCodes::Interrupted if the operation was killed with killop()
- */
- virtual StatusAndDuration awaitReplication(OperationContext* txn,
- const OpTime& opTime,
- const WriteConcernOptions& writeConcern) = 0;
-
- /**
- * Like awaitReplication(), above, but waits for the replication of the last operation
- * performed on the client associated with "txn".
- */
- virtual StatusAndDuration awaitReplicationOfLastOpForClient(
- OperationContext* txn,
- const WriteConcernOptions& writeConcern) = 0;
-
- /**
- * Causes this node to relinquish being primary for at least 'stepdownTime'. If 'force' is
- * false, before doing so it will wait for 'waitTime' for one other node to be within 10
- * seconds of this node's optime before stepping down. Returns a Status with the code
- * ErrorCodes::ExceededTimeLimit if no secondary catches up within waitTime,
- * ErrorCodes::NotMaster if you are no longer primary when trying to step down,
- * ErrorCodes::SecondaryAheadOfPrimary if we are primary but there is another node that
- * seems to be ahead of us in replication, and Status::OK otherwise.
- */
- virtual Status stepDown(OperationContext* txn,
- bool force,
- const Milliseconds& waitTime,
- const Milliseconds& stepdownTime) = 0;
-
- /**
- * Returns true if the node can be considered master for the purpose of introspective
- * commands such as isMaster() and rs.status().
- */
- virtual bool isMasterForReportingPurposes() = 0;
-
- /**
- * Returns true if it is valid for this node to accept writes on the given database.
- * Currently this is true only if this node is Primary, master in master/slave,
- * a standalone, or is writing to the local database.
- *
- * If a node was started with the replSet argument, but has not yet received a config, it
- * will not be able to receive writes to a database other than local (it will not be
- * treated as standalone node).
- *
- * NOTE: This function can only be meaningfully called while the caller holds the global
- * lock in some mode other than MODE_NONE.
- */
- virtual bool canAcceptWritesForDatabase(StringData dbName) = 0;
-
- /**
- * Returns true if it is valid for this node to accept writes on the given namespace.
- *
- * The result of this function should be consistent with canAcceptWritesForDatabase()
- * for the database the namespace refers to, with additional checks on the collection.
- */
- virtual bool canAcceptWritesFor(const NamespaceString& ns) = 0;
-
- /**
- * Checks if the current replica set configuration can satisfy the given write concern.
- *
- * Things that are taken into consideration include:
- * 1. If the set has enough data-bearing members.
- * 2. If the write concern mode exists.
- * 3. If there are enough members for the write concern mode specified.
- */
- virtual Status checkIfWriteConcernCanBeSatisfied(
- const WriteConcernOptions& writeConcern) const = 0;
-
- /**
- * Returns Status::OK() if it is valid for this node to serve reads on the given collection
- * and an errorcode indicating why the node cannot if it cannot.
- */
- virtual Status checkCanServeReadsFor(OperationContext* txn,
- const NamespaceString& ns,
- bool slaveOk) = 0;
-
- /**
- * Returns true if this node should ignore unique index constraints on new documents.
- * Currently this is needed for nodes in STARTUP2, RECOVERING, and ROLLBACK states.
- */
- virtual bool shouldIgnoreUniqueIndex(const IndexDescriptor* idx) = 0;
-
- /**
- * Updates our internal tracking of the last OpTime applied for the given slave
- * identified by "rid". Only valid to call in master/slave mode
- */
- virtual Status setLastOptimeForSlave(const OID& rid, const Timestamp& ts) = 0;
-
- /**
- * Updates our internal tracking of the last OpTime applied to this node.
- *
- * The new value of "opTime" must be no less than any prior value passed to this method, and
- * it is the caller's job to properly synchronize this behavior. The exception to this rule
- * is that after calls to resetLastOpTimeFromOplog(), the minimum acceptable value for
- * "opTime" is reset based on the contents of the oplog, and may go backwards due to
- * rollback.
- */
- virtual void setMyLastOptime(const OpTime& opTime) = 0;
-
- /**
- * Same as above, but used during places we need to zero our last optime.
- */
- virtual void resetMyLastOptime() = 0;
-
- /**
- * Updates our the message we include in heartbeat responses.
- */
- virtual void setMyHeartbeatMessage(const std::string& msg) = 0;
-
- /**
- * Returns the last optime recorded by setMyLastOptime.
- */
- virtual OpTime getMyLastOptime() const = 0;
-
- /**
- * Waits until the optime of the current node is at least the opTime specified in
- * 'settings'.
- *
- * The returned ReadAfterOpTimeResponse object's didWait() method returns true if
- * an attempt was made to wait for the specified opTime. Cases when this can be
- * false could include:
- *
- * 1. No read after opTime was specified.
- * 2. Attempting to do read after opTime when node is not a replica set member.
- *
- * Note: getDuration() on the returned ReadAfterOpTimeResponse will only be valid if
- * its didWait() method returns true.
- */
- virtual ReadAfterOpTimeResponse waitUntilOpTime(OperationContext* txn,
- const ReadAfterOpTimeArgs& settings) = 0;
-
- /**
- * Retrieves and returns the current election id, which is a unique id that is local to
- * this node and changes every time we become primary.
- * TODO(spencer): Use term instead.
- */
- virtual OID getElectionId() = 0;
-
- /**
- * Returns the RID for this node. The RID is used to identify this node to our sync source
- * when sending updates about our replication progress.
- */
- virtual OID getMyRID() const = 0;
-
- /**
- * Returns the id for this node as specified in the current replica set configuration.
- */
- virtual int getMyId() const = 0;
-
- /**
- * Sets this node into a specific follower mode.
- *
- * Returns true if the follower mode was successfully set. Returns false if the
- * node is or becomes a leader before setFollowerMode completes.
- *
- * Follower modes are RS_STARTUP2 (initial sync), RS_SECONDARY, RS_ROLLBACK and
- * RS_RECOVERING. They are the valid states of a node whose topology coordinator has the
- * follower role.
- *
- * This is essentially an interface that allows the applier to prevent the node from
- * becoming a candidate or accepting reads, depending on circumstances in the oplog
- * application process.
- */
- virtual bool setFollowerMode(const MemberState& newState) = 0;
-
- /**
- * Returns true if the coordinator wants the applier to pause application.
- *
- * If this returns true, the applier should call signalDrainComplete() when it has
- * completed draining its operation buffer and no further ops are being applied.
- */
- virtual bool isWaitingForApplierToDrain() = 0;
-
- /**
- * Signals that a previously requested pause and drain of the applier buffer
- * has completed.
- *
- * This is an interface that allows the applier to reenable writes after
- * a successful election triggers the draining of the applier buffer.
- */
- virtual void signalDrainComplete(OperationContext* txn) = 0;
-
- /**
- * Signals the sync source feedback thread to wake up and send a handshake and
- * replSetUpdatePosition command to our sync source.
- */
- virtual void signalUpstreamUpdater() = 0;
-
- /**
- * Prepares a BSONObj describing an invocation of the replSetUpdatePosition command that can
- * be sent to this node's sync source to update it about our progress in replication.
- *
- * The returned bool indicates whether or not the command was created.
- */
- virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) = 0;
-
- /**
- * Handles an incoming replSetGetStatus command. Adds BSON to 'result'.
- */
- virtual Status processReplSetGetStatus(BSONObjBuilder* result) = 0;
-
- /**
- * Handles an incoming isMaster command for a replica set node. Should not be
- * called on a master-slave or standalone node.
- */
- virtual void fillIsMasterForReplSet(IsMasterResponse* result) = 0;
-
- /**
- * Adds to "result" a description of the slaveInfo data structure used to map RIDs to their
- * last known optimes.
- */
- virtual void appendSlaveInfoData(BSONObjBuilder* result) = 0;
-
- /**
- * Returns a copy of the current ReplicaSetConfig.
- */
- virtual ReplicaSetConfig getConfig() const = 0;
-
- /**
- * Handles an incoming replSetGetConfig command. Adds BSON to 'result'.
- */
- virtual void processReplSetGetConfig(BSONObjBuilder* result) = 0;
-
- /**
- * Toggles maintenanceMode to the value expressed by 'activate'
- * return Status::OK if the change worked, NotSecondary if it failed because we are
- * PRIMARY, and OperationFailed if we are not currently in maintenance mode
- */
- virtual Status setMaintenanceMode(bool activate) = 0;
-
- /**
- * Retrieves the current count of maintenanceMode and returns 'true' if greater than 0.
- */
- virtual bool getMaintenanceMode() = 0;
-
- /**
- * Handles an incoming replSetSyncFrom command. Adds BSON to 'result'
- * returns Status::OK if the sync target could be set and an ErrorCode indicating why it
- * couldn't otherwise.
- */
- virtual Status processReplSetSyncFrom(const HostAndPort& target,
- BSONObjBuilder* resultObj) = 0;
-
- /**
- * Handles an incoming replSetFreeze command. Adds BSON to 'resultObj'
- * returns Status::OK() if the node is a member of a replica set with a config and an
- * error Status otherwise
- */
- virtual Status processReplSetFreeze(int secs, BSONObjBuilder* resultObj) = 0;
-
- /**
- * Handles an incoming heartbeat command with arguments 'args'. Populates 'response';
- * returns a Status with either OK or an error message.
- */
- virtual Status processHeartbeat(const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response) = 0;
- virtual Status processHeartbeatV1(const ReplSetHeartbeatArgsV1& args,
- ReplSetHeartbeatResponse* response) = 0;
-
-
- /**
- * Arguments for the replSetReconfig command.
- */
- struct ReplSetReconfigArgs {
- BSONObj newConfigObj;
- bool force;
- };
-
- /**
- * Handles an incoming replSetReconfig command. Adds BSON to 'resultObj';
- * returns a Status with either OK or an error message.
- */
- virtual Status processReplSetReconfig(OperationContext* txn,
- const ReplSetReconfigArgs& args,
- BSONObjBuilder* resultObj) = 0;
-
- /*
- * Handles an incoming replSetInitiate command. If "configObj" is empty, generates a default
- * configuration to use.
- * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
- */
- virtual Status processReplSetInitiate(OperationContext* txn,
- const BSONObj& configObj,
- BSONObjBuilder* resultObj) = 0;
-
- /*
- * Handles an incoming replSetGetRBID command.
- * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
- */
- virtual Status processReplSetGetRBID(BSONObjBuilder* resultObj) = 0;
-
- /**
- * Increments this process's rollback id. Called every time a rollback occurs.
- */
- virtual void incrementRollbackID() = 0;
-
- /**
- * Arguments to the replSetFresh command.
- */
- struct ReplSetFreshArgs {
- std::string setName; // Name of the replset
- HostAndPort who; // host and port of the member that sent the replSetFresh command
- unsigned id; // replSet id of the member that sent the replSetFresh command
- int cfgver; // replSet config version that the member who sent the command thinks it has
- Timestamp opTime; // last optime seen by the member who sent the replSetFresh command
- };
-
- /*
- * Handles an incoming replSetFresh command.
- * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
- */
- virtual Status processReplSetFresh(const ReplSetFreshArgs& args,
- BSONObjBuilder* resultObj) = 0;
-
- /**
- * Arguments to the replSetElect command.
- */
- struct ReplSetElectArgs {
- std::string set; // Name of the replset
- int whoid; // replSet id of the member that sent the replSetFresh command
- int cfgver; // replSet config version that the member who sent the command thinks it has
- OID round; // unique ID for this election
- };
-
- /*
- * Handles an incoming replSetElect command.
- * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
- */
- virtual Status processReplSetElect(const ReplSetElectArgs& args,
- BSONObjBuilder* resultObj) = 0;
-
- /**
- * Handles an incoming replSetUpdatePosition command, updating each node's oplog progress.
- * Returns Status::OK() if all updates are processed correctly, NodeNotFound
- * if any updating node cannot be found in the config, InvalidReplicaSetConfig if the
- * "configVersion" sent in any of the updates doesn't match our config version, or
- * NotMasterOrSecondaryCode if we are in state REMOVED or otherwise don't have a valid
- * replica set config.
- * If a non-OK status is returned, it is unspecified whether none or some of the updates
- * were applied.
- * "configVersion" will be populated with our config version if and only if we return
- * InvalidReplicaSetConfig.
- */
- virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates,
- long long* configVersion) = 0;
-
- /**
- * Handles an incoming Handshake command. Associates the node's 'remoteID' with its
- * 'handshake' object. This association is used to update internal representation of
- * replication progress and to forward the node's replication progress upstream when this
- * node is being chained through in master/slave replication.
- *
- * Returns ErrorCodes::IllegalOperation if we're not running with master/slave replication.
- */
- virtual Status processHandshake(OperationContext* txn, const HandshakeArgs& handshake) = 0;
-
- /**
- * Returns a bool indicating whether or not this node builds indexes.
- */
- virtual bool buildsIndexes() = 0;
-
- /**
- * Returns a vector of members that have applied the operation with OpTime 'op'.
- */
- virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op) = 0;
-
- /**
- * Returns a vector of the members other than ourself in the replica set, as specified in
- * the replica set config. Invalid to call if we are not in replica set mode. Returns
- * an empty vector if we do not have a valid config.
- */
- virtual std::vector<HostAndPort> getOtherNodesInReplSet() const = 0;
-
- /**
- * Returns a BSONObj containing a representation of the current default write concern.
- */
- virtual WriteConcernOptions getGetLastErrorDefault() = 0;
-
- /**
- * Checks that the --replSet flag was passed when starting up the node and that the node
- * has a valid replica set config.
- *
- * Returns a Status indicating whether those conditions are met with errorcode
- * NoReplicationEnabled if --replSet was not present during start up or with errorcode
- * NotYetInitialized in the absence of a valid config. Also adds error info to "result".
- */
- virtual Status checkReplEnabledForCommand(BSONObjBuilder* result) = 0;
-
- /**
- * Chooses a viable sync source, or, if none available, returns empty HostAndPort.
- */
- virtual HostAndPort chooseNewSyncSource() = 0;
-
- /**
- * Blacklists choosing 'host' as a sync source until time 'until'.
- */
- virtual void blacklistSyncSource(const HostAndPort& host, Date_t until) = 0;
-
- /**
- * Loads the optime from the last op in the oplog into the coordinator's lastOpApplied
- * value.
- */
- virtual void resetLastOpTimeFromOplog(OperationContext* txn) = 0;
-
- /**
- * Determines if a new sync source should be considered.
- * currentSource: the current sync source
- */
- virtual bool shouldChangeSyncSource(const HostAndPort& currentSource) = 0;
-
- /**
- * Returns the OpTime of the latest replica set-committed op known to this server.
- * Committed means a majority of the voting nodes of the config are known to have the
- * operation in their oplogs. This implies such ops will never be rolled back.
- */
- virtual OpTime getLastCommittedOpTime() const = 0;
-
- /*
- * Handles an incoming replSetRequestVotes command.
- * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
- */
- virtual Status processReplSetRequestVotes(OperationContext* txn,
- const ReplSetRequestVotesArgs& args,
- ReplSetRequestVotesResponse* response) = 0;
-
- /*
- * Handles an incoming replSetDeclareElectionWinner command.
- * Returns a Status with either OK or an error message.
- * Populates responseTerm with the current term from our perspective.
- */
- virtual Status processReplSetDeclareElectionWinner(
- const ReplSetDeclareElectionWinnerArgs& args,
- long long* responseTerm) = 0;
-
- /**
- * Prepares a BSONObj describing the current term, primary, and lastOp information.
- */
- virtual void prepareCursorResponseInfo(BSONObjBuilder* objBuilder) = 0;
-
- /**
- * Returns true if the V1 election protocol is being used and false otherwise.
- */
- virtual bool isV1ElectionProtocol() = 0;
-
- /**
- * Writes into 'output' all the information needed to generate a summary of the current
- * replication state for use by the web interface.
- */
- virtual void summarizeAsHtml(ReplSetHtmlSummary* output) = 0;
-
- /**
- * Returns the current term.
- */
- virtual long long getTerm() = 0;
-
- /**
- * Attempts to update the current term for the V1 election protocol. If the term changes and
- * this node is primary, relinquishes primary.
- * Returns true if the term was updated (that is, when "term" was higher than the previously
- * recorded term) and false otherwise.
- */
- virtual bool updateTerm(long long term) = 0;
-
- protected:
-
- ReplicationCoordinator();
+ /**
+ * Clears the list of sync sources we have blacklisted.
+ */
+ virtual void clearSyncSourceBlacklist() = 0;
+
+ /**
+ * Blocks the calling thread for up to writeConcern.wTimeout millis, or until "opTime" has
+ * been replicated to at least a set of nodes that satisfies the writeConcern, whichever
+ * comes first. A writeConcern.wTimeout of 0 indicates no timeout (block forever) and a
+ * writeConcern.wTimeout of -1 indicates return immediately after checking. Return codes:
+ * ErrorCodes::WriteConcernFailed if the writeConcern.wTimeout is reached before
+ * the data has been sufficiently replicated
+ * ErrorCodes::ExceededTimeLimit if the txn->getMaxTimeMicrosRemaining is reached before
+ * the data has been sufficiently replicated
+ * ErrorCodes::NotMaster if the node is not Primary/Master
+ * ErrorCodes::UnknownReplWriteConcern if the writeConcern.wMode contains a write concern
+ * mode that is not known
+ * ErrorCodes::ShutdownInProgress if we are mid-shutdown
+ * ErrorCodes::Interrupted if the operation was killed with killop()
+ */
+ virtual StatusAndDuration awaitReplication(OperationContext* txn,
+ const OpTime& opTime,
+ const WriteConcernOptions& writeConcern) = 0;
+
+ /**
+ * Like awaitReplication(), above, but waits for the replication of the last operation
+ * performed on the client associated with "txn".
+ */
+ virtual StatusAndDuration awaitReplicationOfLastOpForClient(
+ OperationContext* txn, const WriteConcernOptions& writeConcern) = 0;
+
+ /**
+ * Causes this node to relinquish being primary for at least 'stepdownTime'. If 'force' is
+ * false, before doing so it will wait for 'waitTime' for one other node to be within 10
+ * seconds of this node's optime before stepping down. Returns a Status with the code
+ * ErrorCodes::ExceededTimeLimit if no secondary catches up within waitTime,
+ * ErrorCodes::NotMaster if you are no longer primary when trying to step down,
+ * ErrorCodes::SecondaryAheadOfPrimary if we are primary but there is another node that
+ * seems to be ahead of us in replication, and Status::OK otherwise.
+ */
+ virtual Status stepDown(OperationContext* txn,
+ bool force,
+ const Milliseconds& waitTime,
+ const Milliseconds& stepdownTime) = 0;
+
+ /**
+ * Returns true if the node can be considered master for the purpose of introspective
+ * commands such as isMaster() and rs.status().
+ */
+ virtual bool isMasterForReportingPurposes() = 0;
+
+ /**
+ * Returns true if it is valid for this node to accept writes on the given database.
+ * Currently this is true only if this node is Primary, master in master/slave,
+ * a standalone, or is writing to the local database.
+ *
+ * If a node was started with the replSet argument, but has not yet received a config, it
+ * will not be able to receive writes to a database other than local (it will not be
+ * treated as standalone node).
+ *
+ * NOTE: This function can only be meaningfully called while the caller holds the global
+ * lock in some mode other than MODE_NONE.
+ */
+ virtual bool canAcceptWritesForDatabase(StringData dbName) = 0;
+
+ /**
+ * Returns true if it is valid for this node to accept writes on the given namespace.
+ *
+ * The result of this function should be consistent with canAcceptWritesForDatabase()
+ * for the database the namespace refers to, with additional checks on the collection.
+ */
+ virtual bool canAcceptWritesFor(const NamespaceString& ns) = 0;
+
+ /**
+ * Checks if the current replica set configuration can satisfy the given write concern.
+ *
+ * Things that are taken into consideration include:
+ * 1. If the set has enough data-bearing members.
+ * 2. If the write concern mode exists.
+ * 3. If there are enough members for the write concern mode specified.
+ */
+ virtual Status checkIfWriteConcernCanBeSatisfied(
+ const WriteConcernOptions& writeConcern) const = 0;
+
+ /**
+ * Returns Status::OK() if it is valid for this node to serve reads on the given collection
+ * and an errorcode indicating why the node cannot if it cannot.
+ */
+ virtual Status checkCanServeReadsFor(OperationContext* txn,
+ const NamespaceString& ns,
+ bool slaveOk) = 0;
+
+ /**
+ * Returns true if this node should ignore unique index constraints on new documents.
+ * Currently this is needed for nodes in STARTUP2, RECOVERING, and ROLLBACK states.
+ */
+ virtual bool shouldIgnoreUniqueIndex(const IndexDescriptor* idx) = 0;
+
+ /**
+ * Updates our internal tracking of the last OpTime applied for the given slave
+ * identified by "rid". Only valid to call in master/slave mode
+ */
+ virtual Status setLastOptimeForSlave(const OID& rid, const Timestamp& ts) = 0;
+
+ /**
+ * Updates our internal tracking of the last OpTime applied to this node.
+ *
+ * The new value of "opTime" must be no less than any prior value passed to this method, and
+ * it is the caller's job to properly synchronize this behavior. The exception to this rule
+ * is that after calls to resetLastOpTimeFromOplog(), the minimum acceptable value for
+ * "opTime" is reset based on the contents of the oplog, and may go backwards due to
+ * rollback.
+ */
+ virtual void setMyLastOptime(const OpTime& opTime) = 0;
+
+ /**
+ * Same as above, but used during places we need to zero our last optime.
+ */
+ virtual void resetMyLastOptime() = 0;
+
+ /**
+ * Updates our the message we include in heartbeat responses.
+ */
+ virtual void setMyHeartbeatMessage(const std::string& msg) = 0;
+
+ /**
+ * Returns the last optime recorded by setMyLastOptime.
+ */
+ virtual OpTime getMyLastOptime() const = 0;
+
+ /**
+ * Waits until the optime of the current node is at least the opTime specified in
+ * 'settings'.
+ *
+ * The returned ReadAfterOpTimeResponse object's didWait() method returns true if
+ * an attempt was made to wait for the specified opTime. Cases when this can be
+ * false could include:
+ *
+ * 1. No read after opTime was specified.
+ * 2. Attempting to do read after opTime when node is not a replica set member.
+ *
+ * Note: getDuration() on the returned ReadAfterOpTimeResponse will only be valid if
+ * its didWait() method returns true.
+ */
+ virtual ReadAfterOpTimeResponse waitUntilOpTime(OperationContext* txn,
+ const ReadAfterOpTimeArgs& settings) = 0;
+
+ /**
+ * Retrieves and returns the current election id, which is a unique id that is local to
+ * this node and changes every time we become primary.
+ * TODO(spencer): Use term instead.
+ */
+ virtual OID getElectionId() = 0;
+
+ /**
+ * Returns the RID for this node. The RID is used to identify this node to our sync source
+ * when sending updates about our replication progress.
+ */
+ virtual OID getMyRID() const = 0;
+
+ /**
+ * Returns the id for this node as specified in the current replica set configuration.
+ */
+ virtual int getMyId() const = 0;
+
+ /**
+ * Sets this node into a specific follower mode.
+ *
+ * Returns true if the follower mode was successfully set. Returns false if the
+ * node is or becomes a leader before setFollowerMode completes.
+ *
+ * Follower modes are RS_STARTUP2 (initial sync), RS_SECONDARY, RS_ROLLBACK and
+ * RS_RECOVERING. They are the valid states of a node whose topology coordinator has the
+ * follower role.
+ *
+ * This is essentially an interface that allows the applier to prevent the node from
+ * becoming a candidate or accepting reads, depending on circumstances in the oplog
+ * application process.
+ */
+ virtual bool setFollowerMode(const MemberState& newState) = 0;
+
+ /**
+ * Returns true if the coordinator wants the applier to pause application.
+ *
+ * If this returns true, the applier should call signalDrainComplete() when it has
+ * completed draining its operation buffer and no further ops are being applied.
+ */
+ virtual bool isWaitingForApplierToDrain() = 0;
+
+ /**
+ * Signals that a previously requested pause and drain of the applier buffer
+ * has completed.
+ *
+ * This is an interface that allows the applier to reenable writes after
+ * a successful election triggers the draining of the applier buffer.
+ */
+ virtual void signalDrainComplete(OperationContext* txn) = 0;
+
+ /**
+ * Signals the sync source feedback thread to wake up and send a handshake and
+ * replSetUpdatePosition command to our sync source.
+ */
+ virtual void signalUpstreamUpdater() = 0;
+
+ /**
+ * Prepares a BSONObj describing an invocation of the replSetUpdatePosition command that can
+ * be sent to this node's sync source to update it about our progress in replication.
+ *
+ * The returned bool indicates whether or not the command was created.
+ */
+ virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) = 0;
+
+ /**
+ * Handles an incoming replSetGetStatus command. Adds BSON to 'result'.
+ */
+ virtual Status processReplSetGetStatus(BSONObjBuilder* result) = 0;
+
+ /**
+ * Handles an incoming isMaster command for a replica set node. Should not be
+ * called on a master-slave or standalone node.
+ */
+ virtual void fillIsMasterForReplSet(IsMasterResponse* result) = 0;
+
+ /**
+ * Adds to "result" a description of the slaveInfo data structure used to map RIDs to their
+ * last known optimes.
+ */
+ virtual void appendSlaveInfoData(BSONObjBuilder* result) = 0;
+
+ /**
+ * Returns a copy of the current ReplicaSetConfig.
+ */
+ virtual ReplicaSetConfig getConfig() const = 0;
+
+ /**
+ * Handles an incoming replSetGetConfig command. Adds BSON to 'result'.
+ */
+ virtual void processReplSetGetConfig(BSONObjBuilder* result) = 0;
+
+ /**
+ * Toggles maintenanceMode to the value expressed by 'activate'
+ * return Status::OK if the change worked, NotSecondary if it failed because we are
+ * PRIMARY, and OperationFailed if we are not currently in maintenance mode
+ */
+ virtual Status setMaintenanceMode(bool activate) = 0;
+
+ /**
+ * Retrieves the current count of maintenanceMode and returns 'true' if greater than 0.
+ */
+ virtual bool getMaintenanceMode() = 0;
+
+ /**
+ * Handles an incoming replSetSyncFrom command. Adds BSON to 'result'
+ * returns Status::OK if the sync target could be set and an ErrorCode indicating why it
+ * couldn't otherwise.
+ */
+ virtual Status processReplSetSyncFrom(const HostAndPort& target, BSONObjBuilder* resultObj) = 0;
+
+ /**
+ * Handles an incoming replSetFreeze command. Adds BSON to 'resultObj'
+ * returns Status::OK() if the node is a member of a replica set with a config and an
+ * error Status otherwise
+ */
+ virtual Status processReplSetFreeze(int secs, BSONObjBuilder* resultObj) = 0;
+
+ /**
+ * Handles an incoming heartbeat command with arguments 'args'. Populates 'response';
+ * returns a Status with either OK or an error message.
+ */
+ virtual Status processHeartbeat(const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response) = 0;
+ virtual Status processHeartbeatV1(const ReplSetHeartbeatArgsV1& args,
+ ReplSetHeartbeatResponse* response) = 0;
+
+
+ /**
+ * Arguments for the replSetReconfig command.
+ */
+ struct ReplSetReconfigArgs {
+ BSONObj newConfigObj;
+ bool force;
+ };
+
+ /**
+ * Handles an incoming replSetReconfig command. Adds BSON to 'resultObj';
+ * returns a Status with either OK or an error message.
+ */
+ virtual Status processReplSetReconfig(OperationContext* txn,
+ const ReplSetReconfigArgs& args,
+ BSONObjBuilder* resultObj) = 0;
+
+ /*
+ * Handles an incoming replSetInitiate command. If "configObj" is empty, generates a default
+ * configuration to use.
+ * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
+ */
+ virtual Status processReplSetInitiate(OperationContext* txn,
+ const BSONObj& configObj,
+ BSONObjBuilder* resultObj) = 0;
+
+ /*
+ * Handles an incoming replSetGetRBID command.
+ * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
+ */
+ virtual Status processReplSetGetRBID(BSONObjBuilder* resultObj) = 0;
+
+ /**
+ * Increments this process's rollback id. Called every time a rollback occurs.
+ */
+ virtual void incrementRollbackID() = 0;
+
+ /**
+ * Arguments to the replSetFresh command.
+ */
+ struct ReplSetFreshArgs {
+ std::string setName; // Name of the replset
+ HostAndPort who; // host and port of the member that sent the replSetFresh command
+ unsigned id; // replSet id of the member that sent the replSetFresh command
+ int cfgver; // replSet config version that the member who sent the command thinks it has
+ Timestamp opTime; // last optime seen by the member who sent the replSetFresh command
+ };
+
+ /*
+ * Handles an incoming replSetFresh command.
+ * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
+ */
+ virtual Status processReplSetFresh(const ReplSetFreshArgs& args, BSONObjBuilder* resultObj) = 0;
+ /**
+ * Arguments to the replSetElect command.
+ */
+ struct ReplSetElectArgs {
+ std::string set; // Name of the replset
+ int whoid; // replSet id of the member that sent the replSetFresh command
+ int cfgver; // replSet config version that the member who sent the command thinks it has
+ OID round; // unique ID for this election
};
-} // namespace repl
-} // namespace mongo
+ /*
+ * Handles an incoming replSetElect command.
+ * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
+ */
+ virtual Status processReplSetElect(const ReplSetElectArgs& args, BSONObjBuilder* resultObj) = 0;
+
+ /**
+ * Handles an incoming replSetUpdatePosition command, updating each node's oplog progress.
+ * Returns Status::OK() if all updates are processed correctly, NodeNotFound
+ * if any updating node cannot be found in the config, InvalidReplicaSetConfig if the
+ * "configVersion" sent in any of the updates doesn't match our config version, or
+ * NotMasterOrSecondaryCode if we are in state REMOVED or otherwise don't have a valid
+ * replica set config.
+ * If a non-OK status is returned, it is unspecified whether none or some of the updates
+ * were applied.
+ * "configVersion" will be populated with our config version if and only if we return
+ * InvalidReplicaSetConfig.
+ */
+ virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates,
+ long long* configVersion) = 0;
+
+ /**
+ * Handles an incoming Handshake command. Associates the node's 'remoteID' with its
+ * 'handshake' object. This association is used to update internal representation of
+ * replication progress and to forward the node's replication progress upstream when this
+ * node is being chained through in master/slave replication.
+ *
+ * Returns ErrorCodes::IllegalOperation if we're not running with master/slave replication.
+ */
+ virtual Status processHandshake(OperationContext* txn, const HandshakeArgs& handshake) = 0;
+
+ /**
+ * Returns a bool indicating whether or not this node builds indexes.
+ */
+ virtual bool buildsIndexes() = 0;
+
+ /**
+ * Returns a vector of members that have applied the operation with OpTime 'op'.
+ */
+ virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op) = 0;
+
+ /**
+ * Returns a vector of the members other than ourself in the replica set, as specified in
+ * the replica set config. Invalid to call if we are not in replica set mode. Returns
+ * an empty vector if we do not have a valid config.
+ */
+ virtual std::vector<HostAndPort> getOtherNodesInReplSet() const = 0;
+
+ /**
+ * Returns a BSONObj containing a representation of the current default write concern.
+ */
+ virtual WriteConcernOptions getGetLastErrorDefault() = 0;
+
+ /**
+ * Checks that the --replSet flag was passed when starting up the node and that the node
+ * has a valid replica set config.
+ *
+ * Returns a Status indicating whether those conditions are met with errorcode
+ * NoReplicationEnabled if --replSet was not present during start up or with errorcode
+ * NotYetInitialized in the absence of a valid config. Also adds error info to "result".
+ */
+ virtual Status checkReplEnabledForCommand(BSONObjBuilder* result) = 0;
+
+ /**
+ * Chooses a viable sync source, or, if none available, returns empty HostAndPort.
+ */
+ virtual HostAndPort chooseNewSyncSource() = 0;
+
+ /**
+ * Blacklists choosing 'host' as a sync source until time 'until'.
+ */
+ virtual void blacklistSyncSource(const HostAndPort& host, Date_t until) = 0;
+
+ /**
+ * Loads the optime from the last op in the oplog into the coordinator's lastOpApplied
+ * value.
+ */
+ virtual void resetLastOpTimeFromOplog(OperationContext* txn) = 0;
+
+ /**
+ * Determines if a new sync source should be considered.
+ * currentSource: the current sync source
+ */
+ virtual bool shouldChangeSyncSource(const HostAndPort& currentSource) = 0;
+
+ /**
+ * Returns the OpTime of the latest replica set-committed op known to this server.
+ * Committed means a majority of the voting nodes of the config are known to have the
+ * operation in their oplogs. This implies such ops will never be rolled back.
+ */
+ virtual OpTime getLastCommittedOpTime() const = 0;
+
+ /*
+ * Handles an incoming replSetRequestVotes command.
+ * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
+ */
+ virtual Status processReplSetRequestVotes(OperationContext* txn,
+ const ReplSetRequestVotesArgs& args,
+ ReplSetRequestVotesResponse* response) = 0;
+
+ /*
+ * Handles an incoming replSetDeclareElectionWinner command.
+ * Returns a Status with either OK or an error message.
+ * Populates responseTerm with the current term from our perspective.
+ */
+ virtual Status processReplSetDeclareElectionWinner(const ReplSetDeclareElectionWinnerArgs& args,
+ long long* responseTerm) = 0;
+
+ /**
+ * Prepares a BSONObj describing the current term, primary, and lastOp information.
+ */
+ virtual void prepareCursorResponseInfo(BSONObjBuilder* objBuilder) = 0;
+
+ /**
+ * Returns true if the V1 election protocol is being used and false otherwise.
+ */
+ virtual bool isV1ElectionProtocol() = 0;
+
+ /**
+ * Writes into 'output' all the information needed to generate a summary of the current
+ * replication state for use by the web interface.
+ */
+ virtual void summarizeAsHtml(ReplSetHtmlSummary* output) = 0;
+
+ /**
+ * Returns the current term.
+ */
+ virtual long long getTerm() = 0;
+
+ /**
+ * Attempts to update the current term for the V1 election protocol. If the term changes and
+ * this node is primary, relinquishes primary.
+ * Returns true if the term was updated (that is, when "term" was higher than the previously
+ * recorded term) and false otherwise.
+ */
+ virtual bool updateTerm(long long term) = 0;
+
+protected:
+ ReplicationCoordinator();
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_external_state.cpp b/src/mongo/db/repl/replication_coordinator_external_state.cpp
index 68403755b07..fbeddfba68a 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state.cpp
@@ -33,8 +33,8 @@
namespace mongo {
namespace repl {
- ReplicationCoordinatorExternalState::ReplicationCoordinatorExternalState() {}
- ReplicationCoordinatorExternalState::~ReplicationCoordinatorExternalState() {}
+ReplicationCoordinatorExternalState::ReplicationCoordinatorExternalState() {}
+ReplicationCoordinatorExternalState::~ReplicationCoordinatorExternalState() {}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_external_state.h b/src/mongo/db/repl/replication_coordinator_external_state.h
index 10e842bf36a..44e60c9b113 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state.h
@@ -36,150 +36,150 @@
namespace mongo {
- class BSONObj;
- class OID;
- class OperationContext;
- class Status;
- struct HostAndPort;
- template <typename T> class StatusWith;
+class BSONObj;
+class OID;
+class OperationContext;
+class Status;
+struct HostAndPort;
+template <typename T>
+class StatusWith;
namespace repl {
- class LastVote;
-
- /**
- * This class represents the interface the ReplicationCoordinator uses to interact with the
- * rest of the system. All functionality of the ReplicationCoordinatorImpl that would introduce
- * dependencies on large sections of the server code and thus break the unit testability of
- * ReplicationCoordinatorImpl should be moved here.
- */
- class ReplicationCoordinatorExternalState {
- MONGO_DISALLOW_COPYING(ReplicationCoordinatorExternalState);
- public:
-
- ReplicationCoordinatorExternalState();
- virtual ~ReplicationCoordinatorExternalState();
-
- /**
- * Starts the background sync, producer, and sync source feedback threads
- *
- * NOTE: Only starts threads if they are not already started,
- */
- virtual void startThreads() = 0;
-
- /**
- * Starts the Master/Slave threads and sets up logOp
- */
- virtual void startMasterSlave(OperationContext* txn) = 0;
-
- /**
- * Performs any necessary external state specific shutdown tasks, such as cleaning up
- * the threads it started.
- */
- virtual void shutdown() = 0;
-
- /**
- * Creates the oplog and writes the first entry.
- */
- virtual void initiateOplog(OperationContext* txn) = 0;
-
- /**
- * Simple wrapper around SyncSourceFeedback::forwardSlaveProgress. Signals to the
- * SyncSourceFeedback thread that it needs to wake up and send a replSetUpdatePosition
- * command upstream.
- */
- virtual void forwardSlaveProgress() = 0;
-
- /**
- * Queries the singleton document in local.me. If it exists and our hostname has not
- * changed since we wrote, returns the RID stored in the object. If the document does not
- * exist or our hostname doesn't match what was recorded in local.me, generates a new OID
- * to use as our RID, stores it in local.me, and returns it.
- */
- virtual OID ensureMe(OperationContext*) = 0;
-
- /**
- * Returns true if "host" is one of the network identities of this node.
- */
- virtual bool isSelf(const HostAndPort& host) = 0;
-
- /**
- * Gets the replica set config document from local storage, or returns an error.
- */
- virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* txn) = 0;
-
- /**
- * Stores the replica set config document in local storage, or returns an error.
- */
- virtual Status storeLocalConfigDocument(OperationContext* txn, const BSONObj& config) = 0;
-
- /**
- * Gets the replica set lastVote document from local storage, or returns an error.
- */
- virtual StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* txn) = 0;
-
- /**
- * Stores the replica set lastVote document in local storage, or returns an error.
- */
- virtual Status storeLocalLastVoteDocument(OperationContext* txn,
- const LastVote& lastVote) = 0;
-
- /**
- * Sets the global opTime to be 'newTime'.
- */
- virtual void setGlobalTimestamp(const Timestamp& newTime) = 0;
-
- /**
- * Gets the last optime of an operation performed on this host, from stable
- * storage.
- */
- virtual StatusWith<OpTime> loadLastOpTime(OperationContext* txn) = 0;
-
- /**
- * Returns the HostAndPort of the remote client connected to us that initiated the operation
- * represented by "txn".
- */
- virtual HostAndPort getClientHostAndPort(const OperationContext* txn) = 0;
-
- /**
- * Closes all connections except those marked with the keepOpen property, which should
- * just be connections used for heartbeating.
- * This is used during stepdown, and transition out of primary.
- */
- virtual void closeConnections() = 0;
-
- /**
- * Kills all operations that have a Client that is associated with an incoming user
- * connection. Used during stepdown.
- */
- virtual void killAllUserOperations(OperationContext* txn) = 0;
-
- /**
- * Clears all cached sharding metadata on this server. This is called after stepDown to
- * ensure that if the node becomes primary again in the future it will reload an up-to-date
- * version of the sharding data.
- */
- virtual void clearShardingState() = 0;
-
- /**
- * Notifies the bgsync and syncSourceFeedback threads to choose a new sync source.
- */
- virtual void signalApplierToChooseNewSyncSource() = 0;
-
- /**
- * Returns an OperationContext, owned by the caller, that may be used in methods of
- * the same instance that require an OperationContext.
- */
- virtual OperationContext* createOperationContext(const std::string& threadName) = 0;
-
- /**
- * Drops all temporary collections on all databases except "local".
- *
- * The implementation may assume that the caller has acquired the global exclusive lock
- * for "txn".
- */
- virtual void dropAllTempCollections(OperationContext* txn) = 0;
- };
-
-} // namespace repl
-} // namespace mongo
+class LastVote;
+
+/**
+ * This class represents the interface the ReplicationCoordinator uses to interact with the
+ * rest of the system. All functionality of the ReplicationCoordinatorImpl that would introduce
+ * dependencies on large sections of the server code and thus break the unit testability of
+ * ReplicationCoordinatorImpl should be moved here.
+ */
+class ReplicationCoordinatorExternalState {
+ MONGO_DISALLOW_COPYING(ReplicationCoordinatorExternalState);
+
+public:
+ ReplicationCoordinatorExternalState();
+ virtual ~ReplicationCoordinatorExternalState();
+
+ /**
+ * Starts the background sync, producer, and sync source feedback threads
+ *
+ * NOTE: Only starts threads if they are not already started,
+ */
+ virtual void startThreads() = 0;
+
+ /**
+ * Starts the Master/Slave threads and sets up logOp
+ */
+ virtual void startMasterSlave(OperationContext* txn) = 0;
+
+ /**
+ * Performs any necessary external state specific shutdown tasks, such as cleaning up
+ * the threads it started.
+ */
+ virtual void shutdown() = 0;
+
+ /**
+ * Creates the oplog and writes the first entry.
+ */
+ virtual void initiateOplog(OperationContext* txn) = 0;
+
+ /**
+ * Simple wrapper around SyncSourceFeedback::forwardSlaveProgress. Signals to the
+ * SyncSourceFeedback thread that it needs to wake up and send a replSetUpdatePosition
+ * command upstream.
+ */
+ virtual void forwardSlaveProgress() = 0;
+
+ /**
+ * Queries the singleton document in local.me. If it exists and our hostname has not
+ * changed since we wrote, returns the RID stored in the object. If the document does not
+ * exist or our hostname doesn't match what was recorded in local.me, generates a new OID
+ * to use as our RID, stores it in local.me, and returns it.
+ */
+ virtual OID ensureMe(OperationContext*) = 0;
+
+ /**
+ * Returns true if "host" is one of the network identities of this node.
+ */
+ virtual bool isSelf(const HostAndPort& host) = 0;
+
+ /**
+ * Gets the replica set config document from local storage, or returns an error.
+ */
+ virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* txn) = 0;
+
+ /**
+ * Stores the replica set config document in local storage, or returns an error.
+ */
+ virtual Status storeLocalConfigDocument(OperationContext* txn, const BSONObj& config) = 0;
+
+ /**
+ * Gets the replica set lastVote document from local storage, or returns an error.
+ */
+ virtual StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* txn) = 0;
+
+ /**
+ * Stores the replica set lastVote document in local storage, or returns an error.
+ */
+ virtual Status storeLocalLastVoteDocument(OperationContext* txn, const LastVote& lastVote) = 0;
+
+ /**
+ * Sets the global opTime to be 'newTime'.
+ */
+ virtual void setGlobalTimestamp(const Timestamp& newTime) = 0;
+
+ /**
+ * Gets the last optime of an operation performed on this host, from stable
+ * storage.
+ */
+ virtual StatusWith<OpTime> loadLastOpTime(OperationContext* txn) = 0;
+
+ /**
+ * Returns the HostAndPort of the remote client connected to us that initiated the operation
+ * represented by "txn".
+ */
+ virtual HostAndPort getClientHostAndPort(const OperationContext* txn) = 0;
+
+ /**
+ * Closes all connections except those marked with the keepOpen property, which should
+ * just be connections used for heartbeating.
+ * This is used during stepdown, and transition out of primary.
+ */
+ virtual void closeConnections() = 0;
+
+ /**
+ * Kills all operations that have a Client that is associated with an incoming user
+ * connection. Used during stepdown.
+ */
+ virtual void killAllUserOperations(OperationContext* txn) = 0;
+
+ /**
+ * Clears all cached sharding metadata on this server. This is called after stepDown to
+ * ensure that if the node becomes primary again in the future it will reload an up-to-date
+ * version of the sharding data.
+ */
+ virtual void clearShardingState() = 0;
+
+ /**
+ * Notifies the bgsync and syncSourceFeedback threads to choose a new sync source.
+ */
+ virtual void signalApplierToChooseNewSyncSource() = 0;
+
+ /**
+ * Returns an OperationContext, owned by the caller, that may be used in methods of
+ * the same instance that require an OperationContext.
+ */
+ virtual OperationContext* createOperationContext(const std::string& threadName) = 0;
+
+ /**
+ * Drops all temporary collections on all databases except "local".
+ *
+ * The implementation may assume that the caller has acquired the global exclusive lock
+ * for "txn".
+ */
+ virtual void dropAllTempCollections(OperationContext* txn) = 0;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index a1b5c609bf8..34976b02ba5 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -70,272 +70,256 @@ namespace mongo {
namespace repl {
namespace {
- const char configCollectionName[] = "local.system.replset";
- const char configDatabaseName[] = "local";
- const char lastVoteCollectionName[] = "local.replset.election";
- const char lastVoteDatabaseName[] = "local";
- const char meCollectionName[] = "local.me";
- const char meDatabaseName[] = "local";
- const char tsFieldName[] = "ts";
+const char configCollectionName[] = "local.system.replset";
+const char configDatabaseName[] = "local";
+const char lastVoteCollectionName[] = "local.replset.election";
+const char lastVoteDatabaseName[] = "local";
+const char meCollectionName[] = "local.me";
+const char meDatabaseName[] = "local";
+const char tsFieldName[] = "ts";
} // namespace
- ReplicationCoordinatorExternalStateImpl::ReplicationCoordinatorExternalStateImpl() :
- _startedThreads(false)
- , _nextThreadId(0) {}
- ReplicationCoordinatorExternalStateImpl::~ReplicationCoordinatorExternalStateImpl() {}
+ReplicationCoordinatorExternalStateImpl::ReplicationCoordinatorExternalStateImpl()
+ : _startedThreads(false), _nextThreadId(0) {}
+ReplicationCoordinatorExternalStateImpl::~ReplicationCoordinatorExternalStateImpl() {}
- void ReplicationCoordinatorExternalStateImpl::startThreads() {
- stdx::lock_guard<stdx::mutex> lk(_threadMutex);
- if (_startedThreads) {
- return;
- }
- log() << "Starting replication applier threads";
- _applierThread.reset(new stdx::thread(runSyncThread));
+void ReplicationCoordinatorExternalStateImpl::startThreads() {
+ stdx::lock_guard<stdx::mutex> lk(_threadMutex);
+ if (_startedThreads) {
+ return;
+ }
+ log() << "Starting replication applier threads";
+ _applierThread.reset(new stdx::thread(runSyncThread));
+ BackgroundSync* bgsync = BackgroundSync::get();
+ _producerThread.reset(new stdx::thread(stdx::bind(&BackgroundSync::producerThread, bgsync)));
+ _syncSourceFeedbackThread.reset(
+ new stdx::thread(stdx::bind(&SyncSourceFeedback::run, &_syncSourceFeedback)));
+ _startedThreads = true;
+}
+
+void ReplicationCoordinatorExternalStateImpl::startMasterSlave(OperationContext* txn) {
+ repl::startMasterSlave(txn);
+}
+
+void ReplicationCoordinatorExternalStateImpl::shutdown() {
+ stdx::lock_guard<stdx::mutex> lk(_threadMutex);
+ if (_startedThreads) {
+ log() << "Stopping replication applier threads";
+ _syncSourceFeedback.shutdown();
+ _syncSourceFeedbackThread->join();
+ _applierThread->join();
BackgroundSync* bgsync = BackgroundSync::get();
- _producerThread.reset(new stdx::thread(stdx::bind(&BackgroundSync::producerThread,
- bgsync)));
- _syncSourceFeedbackThread.reset(new stdx::thread(stdx::bind(&SyncSourceFeedback::run,
- &_syncSourceFeedback)));
- _startedThreads = true;
+ bgsync->shutdown();
+ _producerThread->join();
}
+}
- void ReplicationCoordinatorExternalStateImpl::startMasterSlave(OperationContext* txn) {
- repl::startMasterSlave(txn);
- }
+void ReplicationCoordinatorExternalStateImpl::initiateOplog(OperationContext* txn) {
+ createOplog(txn);
- void ReplicationCoordinatorExternalStateImpl::shutdown() {
- stdx::lock_guard<stdx::mutex> lk(_threadMutex);
- if (_startedThreads) {
- log() << "Stopping replication applier threads";
- _syncSourceFeedback.shutdown();
- _syncSourceFeedbackThread->join();
- _applierThread->join();
- BackgroundSync* bgsync = BackgroundSync::get();
- bgsync->shutdown();
- _producerThread->join();
- }
- }
-
- void ReplicationCoordinatorExternalStateImpl::initiateOplog(OperationContext* txn) {
- createOplog(txn);
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction scopedXact(txn, MODE_X);
+ Lock::GlobalWrite globalWrite(txn->lockState());
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction scopedXact(txn, MODE_X);
- Lock::GlobalWrite globalWrite(txn->lockState());
-
- WriteUnitOfWork wuow(txn);
- getGlobalServiceContext()->getOpObserver()->onOpMessage(txn, BSON("msg" << "initiating set"));
- wuow.commit();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "initiate oplog entry", "local.oplog.rs");
+ WriteUnitOfWork wuow(txn);
+ getGlobalServiceContext()->getOpObserver()->onOpMessage(txn,
+ BSON("msg"
+ << "initiating set"));
+ wuow.commit();
}
-
- void ReplicationCoordinatorExternalStateImpl::forwardSlaveProgress() {
- _syncSourceFeedback.forwardSlaveProgress();
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "initiate oplog entry", "local.oplog.rs");
+}
+
+void ReplicationCoordinatorExternalStateImpl::forwardSlaveProgress() {
+ _syncSourceFeedback.forwardSlaveProgress();
+}
+
+OID ReplicationCoordinatorExternalStateImpl::ensureMe(OperationContext* txn) {
+ std::string myname = getHostName();
+ OID myRID;
+ {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock lock(txn->lockState(), meDatabaseName, MODE_X);
+
+ BSONObj me;
+ // local.me is an identifier for a server for getLastError w:2+
+ // TODO: handle WriteConflictExceptions below
+ if (!Helpers::getSingleton(txn, meCollectionName, me) || !me.hasField("host") ||
+ me["host"].String() != myname) {
+ myRID = OID::gen();
+
+ // clean out local.me
+ Helpers::emptyCollection(txn, meCollectionName);
+
+ // repopulate
+ BSONObjBuilder b;
+ b.append("_id", myRID);
+ b.append("host", myname);
+ Helpers::putSingleton(txn, meCollectionName, b.done());
+ } else {
+ myRID = me["_id"].OID();
+ }
}
+ return myRID;
+}
- OID ReplicationCoordinatorExternalStateImpl::ensureMe(OperationContext* txn) {
- std::string myname = getHostName();
- OID myRID;
- {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock lock(txn->lockState(), meDatabaseName, MODE_X);
-
- BSONObj me;
- // local.me is an identifier for a server for getLastError w:2+
- // TODO: handle WriteConflictExceptions below
- if (!Helpers::getSingleton(txn, meCollectionName, me) ||
- !me.hasField("host") ||
- me["host"].String() != myname) {
-
- myRID = OID::gen();
-
- // clean out local.me
- Helpers::emptyCollection(txn, meCollectionName);
-
- // repopulate
- BSONObjBuilder b;
- b.append("_id", myRID);
- b.append("host", myname);
- Helpers::putSingleton(txn, meCollectionName, b.done());
- } else {
- myRID = me["_id"].OID();
+StatusWith<BSONObj> ReplicationCoordinatorExternalStateImpl::loadLocalConfigDocument(
+ OperationContext* txn) {
+ try {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ BSONObj config;
+ if (!Helpers::getSingleton(txn, configCollectionName, config)) {
+ return StatusWith<BSONObj>(
+ ErrorCodes::NoMatchingDocument,
+ str::stream() << "Did not find replica set configuration document in "
+ << configCollectionName);
}
+ return StatusWith<BSONObj>(config);
}
- return myRID;
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "load replica set config", configCollectionName);
+ } catch (const DBException& ex) {
+ return StatusWith<BSONObj>(ex.toStatus());
}
+}
- StatusWith<BSONObj> ReplicationCoordinatorExternalStateImpl::loadLocalConfigDocument(
- OperationContext* txn) {
- try {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- BSONObj config;
- if (!Helpers::getSingleton(txn, configCollectionName, config)) {
- return StatusWith<BSONObj>(
- ErrorCodes::NoMatchingDocument,
- str::stream() << "Did not find replica set configuration document in "
- << configCollectionName);
- }
- return StatusWith<BSONObj>(config);
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn,
- "load replica set config",
- configCollectionName);
- }
- catch (const DBException& ex) {
- return StatusWith<BSONObj>(ex.toStatus());
+Status ReplicationCoordinatorExternalStateImpl::storeLocalConfigDocument(OperationContext* txn,
+ const BSONObj& config) {
+ try {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock dbWriteLock(txn->lockState(), configDatabaseName, MODE_X);
+ Helpers::putSingleton(txn, configCollectionName, config);
+ return Status::OK();
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "save replica set config", configCollectionName);
+ } catch (const DBException& ex) {
+ return ex.toStatus();
}
+}
- Status ReplicationCoordinatorExternalStateImpl::storeLocalConfigDocument(
- OperationContext* txn,
- const BSONObj& config) {
- try {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock dbWriteLock(txn->lockState(), configDatabaseName, MODE_X);
- Helpers::putSingleton(txn, configCollectionName, config);
- return Status::OK();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn,
- "save replica set config",
- configCollectionName);
- }
- catch (const DBException& ex) {
- return ex.toStatus();
+StatusWith<LastVote> ReplicationCoordinatorExternalStateImpl::loadLocalLastVoteDocument(
+ OperationContext* txn) {
+ try {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ BSONObj lastVoteObj;
+ if (!Helpers::getSingleton(txn, lastVoteCollectionName, lastVoteObj)) {
+ return StatusWith<LastVote>(ErrorCodes::NoMatchingDocument,
+ str::stream()
+ << "Did not find replica set lastVote document in "
+ << lastVoteCollectionName);
+ }
+ LastVote lastVote;
+ lastVote.initialize(lastVoteObj);
+ return StatusWith<LastVote>(lastVote);
}
-
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(
+ txn, "load replica set lastVote", lastVoteCollectionName);
+ } catch (const DBException& ex) {
+ return StatusWith<LastVote>(ex.toStatus());
}
+}
- StatusWith<LastVote> ReplicationCoordinatorExternalStateImpl::loadLocalLastVoteDocument(
- OperationContext* txn) {
- try {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- BSONObj lastVoteObj;
- if (!Helpers::getSingleton(txn, lastVoteCollectionName, lastVoteObj)) {
- return StatusWith<LastVote>(
- ErrorCodes::NoMatchingDocument,
- str::stream() << "Did not find replica set lastVote document in "
- << lastVoteCollectionName);
- }
- LastVote lastVote;
- lastVote.initialize(lastVoteObj);
- return StatusWith<LastVote>(lastVote);
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn,
- "load replica set lastVote",
- lastVoteCollectionName);
- }
- catch (const DBException& ex) {
- return StatusWith<LastVote>(ex.toStatus());
+Status ReplicationCoordinatorExternalStateImpl::storeLocalLastVoteDocument(
+ OperationContext* txn, const LastVote& lastVote) {
+ BSONObj lastVoteObj = lastVote.toBSON();
+ try {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock dbWriteLock(txn->lockState(), lastVoteDatabaseName, MODE_X);
+ Helpers::putSingleton(txn, lastVoteCollectionName, lastVoteObj);
+ return Status::OK();
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(
+ txn, "save replica set lastVote", lastVoteCollectionName);
+ MONGO_UNREACHABLE;
+ } catch (const DBException& ex) {
+ return ex.toStatus();
}
-
- Status ReplicationCoordinatorExternalStateImpl::storeLocalLastVoteDocument(
- OperationContext* txn,
- const LastVote& lastVote) {
- BSONObj lastVoteObj = lastVote.toBSON();
- try {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock dbWriteLock(txn->lockState(), lastVoteDatabaseName, MODE_X);
- Helpers::putSingleton(txn, lastVoteCollectionName, lastVoteObj);
- return Status::OK();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn,
- "save replica set lastVote",
- lastVoteCollectionName);
- MONGO_UNREACHABLE;
- }
- catch (const DBException& ex) {
- return ex.toStatus();
+}
+
+void ReplicationCoordinatorExternalStateImpl::setGlobalTimestamp(const Timestamp& newTime) {
+ setNewTimestamp(newTime);
+}
+
+StatusWith<OpTime> ReplicationCoordinatorExternalStateImpl::loadLastOpTime(OperationContext* txn) {
+ // TODO: handle WriteConflictExceptions below
+ try {
+ BSONObj oplogEntry;
+ if (!Helpers::getLast(txn, rsOplogName.c_str(), oplogEntry)) {
+ return StatusWith<OpTime>(ErrorCodes::NoMatchingDocument,
+ str::stream() << "Did not find any entries in "
+ << rsOplogName);
}
-
- }
-
- void ReplicationCoordinatorExternalStateImpl::setGlobalTimestamp(const Timestamp& newTime) {
- setNewTimestamp(newTime);
- }
-
- StatusWith<OpTime> ReplicationCoordinatorExternalStateImpl::loadLastOpTime(
- OperationContext* txn) {
-
- // TODO: handle WriteConflictExceptions below
- try {
- BSONObj oplogEntry;
- if (!Helpers::getLast(txn, rsOplogName.c_str(), oplogEntry)) {
- return StatusWith<OpTime>(
- ErrorCodes::NoMatchingDocument,
- str::stream() << "Did not find any entries in " << rsOplogName);
- }
- BSONElement tsElement = oplogEntry[tsFieldName];
- if (tsElement.eoo()) {
- return StatusWith<OpTime>(
- ErrorCodes::NoSuchKey,
- str::stream() << "Most recent entry in " << rsOplogName << " missing \"" <<
- tsFieldName << "\" field");
- }
- if (tsElement.type() != bsonTimestamp) {
- return StatusWith<OpTime>(
- ErrorCodes::TypeMismatch,
- str::stream() << "Expected type of \"" << tsFieldName <<
- "\" in most recent " << rsOplogName <<
- " entry to have type Timestamp, but found " << typeName(tsElement.type()));
- }
- return StatusWith<OpTime>(extractOpTime(oplogEntry));
+ BSONElement tsElement = oplogEntry[tsFieldName];
+ if (tsElement.eoo()) {
+ return StatusWith<OpTime>(ErrorCodes::NoSuchKey,
+ str::stream() << "Most recent entry in " << rsOplogName
+ << " missing \"" << tsFieldName << "\" field");
}
- catch (const DBException& ex) {
- return StatusWith<OpTime>(ex.toStatus());
+ if (tsElement.type() != bsonTimestamp) {
+ return StatusWith<OpTime>(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected type of \"" << tsFieldName
+ << "\" in most recent " << rsOplogName
+ << " entry to have type Timestamp, but found "
+ << typeName(tsElement.type()));
}
+ return StatusWith<OpTime>(extractOpTime(oplogEntry));
+ } catch (const DBException& ex) {
+ return StatusWith<OpTime>(ex.toStatus());
}
-
- bool ReplicationCoordinatorExternalStateImpl::isSelf(const HostAndPort& host) {
- return repl::isSelf(host);
-
- }
-
- HostAndPort ReplicationCoordinatorExternalStateImpl::getClientHostAndPort(
- const OperationContext* txn) {
- return HostAndPort(txn->getClient()->clientAddress(true));
- }
-
- void ReplicationCoordinatorExternalStateImpl::closeConnections() {
- MessagingPort::closeAllSockets(executor::NetworkInterface::kMessagingPortKeepOpen);
- }
-
- void ReplicationCoordinatorExternalStateImpl::killAllUserOperations(OperationContext* txn) {
- ServiceContext* environment = getGlobalServiceContext();
- environment->killAllUserOperations(txn);
- }
-
- void ReplicationCoordinatorExternalStateImpl::clearShardingState() {
- shardingState.clearCollectionMetadata();
- }
-
- void ReplicationCoordinatorExternalStateImpl::signalApplierToChooseNewSyncSource() {
- BackgroundSync::get()->clearSyncTarget();
- }
-
- OperationContext* ReplicationCoordinatorExternalStateImpl::createOperationContext(
- const std::string& threadName) {
- Client::initThreadIfNotAlready(threadName.c_str());
- return new OperationContextImpl();
- }
-
- void ReplicationCoordinatorExternalStateImpl::dropAllTempCollections(OperationContext* txn) {
- std::vector<std::string> dbNames;
- StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
- storageEngine->listDatabases(&dbNames);
-
- for (std::vector<std::string>::iterator it = dbNames.begin(); it != dbNames.end(); ++it) {
- // The local db is special because it isn't replicated. It is cleared at startup even on
- // replica set members.
- if (*it == "local")
- continue;
- LOG(2) << "Removing temporary collections from " << *it;
- Database* db = dbHolder().get(txn, *it);
- // Since we must be holding the global lock during this function, if listDatabases
- // returned this dbname, we should be able to get a reference to it - it can't have
- // been dropped.
- invariant(db);
- db->clearTmpCollections(txn);
- }
+}
+
+bool ReplicationCoordinatorExternalStateImpl::isSelf(const HostAndPort& host) {
+ return repl::isSelf(host);
+}
+
+HostAndPort ReplicationCoordinatorExternalStateImpl::getClientHostAndPort(
+ const OperationContext* txn) {
+ return HostAndPort(txn->getClient()->clientAddress(true));
+}
+
+void ReplicationCoordinatorExternalStateImpl::closeConnections() {
+ MessagingPort::closeAllSockets(executor::NetworkInterface::kMessagingPortKeepOpen);
+}
+
+void ReplicationCoordinatorExternalStateImpl::killAllUserOperations(OperationContext* txn) {
+ ServiceContext* environment = getGlobalServiceContext();
+ environment->killAllUserOperations(txn);
+}
+
+void ReplicationCoordinatorExternalStateImpl::clearShardingState() {
+ shardingState.clearCollectionMetadata();
+}
+
+void ReplicationCoordinatorExternalStateImpl::signalApplierToChooseNewSyncSource() {
+ BackgroundSync::get()->clearSyncTarget();
+}
+
+OperationContext* ReplicationCoordinatorExternalStateImpl::createOperationContext(
+ const std::string& threadName) {
+ Client::initThreadIfNotAlready(threadName.c_str());
+ return new OperationContextImpl();
+}
+
+void ReplicationCoordinatorExternalStateImpl::dropAllTempCollections(OperationContext* txn) {
+ std::vector<std::string> dbNames;
+ StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
+ storageEngine->listDatabases(&dbNames);
+
+ for (std::vector<std::string>::iterator it = dbNames.begin(); it != dbNames.end(); ++it) {
+ // The local db is special because it isn't replicated. It is cleared at startup even on
+ // replica set members.
+ if (*it == "local")
+ continue;
+ LOG(2) << "Removing temporary collections from " << *it;
+ Database* db = dbHolder().get(txn, *it);
+ // Since we must be holding the global lock during this function, if listDatabases
+ // returned this dbname, we should be able to get a reference to it - it can't have
+ // been dropped.
+ invariant(db);
+ db->clearTmpCollections(txn);
}
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.h b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
index 4b08a2c126a..d0f64c82c1d 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
@@ -37,61 +37,61 @@
namespace mongo {
namespace repl {
- class ReplicationCoordinatorExternalStateImpl : public ReplicationCoordinatorExternalState {
- MONGO_DISALLOW_COPYING(ReplicationCoordinatorExternalStateImpl);
- public:
+class ReplicationCoordinatorExternalStateImpl : public ReplicationCoordinatorExternalState {
+ MONGO_DISALLOW_COPYING(ReplicationCoordinatorExternalStateImpl);
- ReplicationCoordinatorExternalStateImpl();
- virtual ~ReplicationCoordinatorExternalStateImpl();
- virtual void startThreads();
- virtual void startMasterSlave(OperationContext* txn);
- virtual void shutdown();
- virtual void initiateOplog(OperationContext* txn);
- virtual void forwardSlaveProgress();
- virtual OID ensureMe(OperationContext* txn);
- virtual bool isSelf(const HostAndPort& host);
- virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* txn);
- virtual Status storeLocalConfigDocument(OperationContext* txn, const BSONObj& config);
- virtual StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* txn);
- virtual Status storeLocalLastVoteDocument(OperationContext* txn, const LastVote& lastVote);
- virtual void setGlobalTimestamp(const Timestamp& newTime);
- virtual StatusWith<OpTime> loadLastOpTime(OperationContext* txn);
- virtual HostAndPort getClientHostAndPort(const OperationContext* txn);
- virtual void closeConnections();
- virtual void killAllUserOperations(OperationContext* txn);
- virtual void clearShardingState();
- virtual void signalApplierToChooseNewSyncSource();
- virtual OperationContext* createOperationContext(const std::string& threadName);
- virtual void dropAllTempCollections(OperationContext* txn);
+public:
+ ReplicationCoordinatorExternalStateImpl();
+ virtual ~ReplicationCoordinatorExternalStateImpl();
+ virtual void startThreads();
+ virtual void startMasterSlave(OperationContext* txn);
+ virtual void shutdown();
+ virtual void initiateOplog(OperationContext* txn);
+ virtual void forwardSlaveProgress();
+ virtual OID ensureMe(OperationContext* txn);
+ virtual bool isSelf(const HostAndPort& host);
+ virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* txn);
+ virtual Status storeLocalConfigDocument(OperationContext* txn, const BSONObj& config);
+ virtual StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* txn);
+ virtual Status storeLocalLastVoteDocument(OperationContext* txn, const LastVote& lastVote);
+ virtual void setGlobalTimestamp(const Timestamp& newTime);
+ virtual StatusWith<OpTime> loadLastOpTime(OperationContext* txn);
+ virtual HostAndPort getClientHostAndPort(const OperationContext* txn);
+ virtual void closeConnections();
+ virtual void killAllUserOperations(OperationContext* txn);
+ virtual void clearShardingState();
+ virtual void signalApplierToChooseNewSyncSource();
+ virtual OperationContext* createOperationContext(const std::string& threadName);
+ virtual void dropAllTempCollections(OperationContext* txn);
- std::string getNextOpContextThreadName();
+ std::string getNextOpContextThreadName();
- private:
- // Guards starting threads and setting _startedThreads
- stdx::mutex _threadMutex;
+private:
+ // Guards starting threads and setting _startedThreads
+ stdx::mutex _threadMutex;
- // True when the threads have been started
- bool _startedThreads;
+ // True when the threads have been started
+ bool _startedThreads;
- // The SyncSourceFeedback class is responsible for sending replSetUpdatePosition commands
- // for forwarding replication progress information upstream when there is chained
- // replication.
- SyncSourceFeedback _syncSourceFeedback;
+ // The SyncSourceFeedback class is responsible for sending replSetUpdatePosition commands
+ // for forwarding replication progress information upstream when there is chained
+ // replication.
+ SyncSourceFeedback _syncSourceFeedback;
- // Thread running SyncSourceFeedback::run().
- std::unique_ptr<stdx::thread> _syncSourceFeedbackThread;
+ // Thread running SyncSourceFeedback::run().
+ std::unique_ptr<stdx::thread> _syncSourceFeedbackThread;
- // Thread running runSyncThread().
- std::unique_ptr<stdx::thread> _applierThread;
+ // Thread running runSyncThread().
+ std::unique_ptr<stdx::thread> _applierThread;
- // Thread running BackgroundSync::producerThread().
- std::unique_ptr<stdx::thread> _producerThread;
+ // Thread running BackgroundSync::producerThread().
+ std::unique_ptr<stdx::thread> _producerThread;
- // Mutex guarding the _nextThreadId value to prevent concurrent incrementing.
- stdx::mutex _nextThreadIdMutex;
- // Number used to uniquely name threads.
- long long _nextThreadId;
- };
+ // Mutex guarding the _nextThreadId value to prevent concurrent incrementing.
+ stdx::mutex _nextThreadIdMutex;
+ // Number used to uniquely name threads.
+ long long _nextThreadId;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
index 2ab0103f6b1..ee6594084c0 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
@@ -42,156 +42,147 @@
namespace mongo {
namespace repl {
- ReplicationCoordinatorExternalStateMock::ReplicationCoordinatorExternalStateMock()
- : _localRsConfigDocument(ErrorCodes::NoMatchingDocument, "No local config document"),
- _localRsLastVoteDocument(ErrorCodes::NoMatchingDocument, "No local lastVote document"),
- _lastOpTime(ErrorCodes::NoMatchingDocument, "No last oplog entry"),
- _canAcquireGlobalSharedLock(true),
- _storeLocalConfigDocumentStatus(Status::OK()),
- _storeLocalLastVoteDocumentStatus(Status::OK()),
- _storeLocalConfigDocumentShouldHang(false),
- _storeLocalLastVoteDocumentShouldHang(false),
- _connectionsClosed(false) {
- }
-
- ReplicationCoordinatorExternalStateMock::~ReplicationCoordinatorExternalStateMock() {}
-
- void ReplicationCoordinatorExternalStateMock::startThreads() {}
- void ReplicationCoordinatorExternalStateMock::startMasterSlave(OperationContext*) {}
- void ReplicationCoordinatorExternalStateMock::initiateOplog(OperationContext* txn) {}
- void ReplicationCoordinatorExternalStateMock::shutdown() {}
- void ReplicationCoordinatorExternalStateMock::forwardSlaveProgress() {}
-
- OID ReplicationCoordinatorExternalStateMock::ensureMe(OperationContext*) {
- return OID::gen();
- }
-
- bool ReplicationCoordinatorExternalStateMock::isSelf(const HostAndPort& host) {
- return sequenceContains(_selfHosts, host);
- }
-
- void ReplicationCoordinatorExternalStateMock::addSelf(const HostAndPort& host) {
- _selfHosts.push_back(host);
+ReplicationCoordinatorExternalStateMock::ReplicationCoordinatorExternalStateMock()
+ : _localRsConfigDocument(ErrorCodes::NoMatchingDocument, "No local config document"),
+ _localRsLastVoteDocument(ErrorCodes::NoMatchingDocument, "No local lastVote document"),
+ _lastOpTime(ErrorCodes::NoMatchingDocument, "No last oplog entry"),
+ _canAcquireGlobalSharedLock(true),
+ _storeLocalConfigDocumentStatus(Status::OK()),
+ _storeLocalLastVoteDocumentStatus(Status::OK()),
+ _storeLocalConfigDocumentShouldHang(false),
+ _storeLocalLastVoteDocumentShouldHang(false),
+ _connectionsClosed(false) {}
+
+ReplicationCoordinatorExternalStateMock::~ReplicationCoordinatorExternalStateMock() {}
+
+void ReplicationCoordinatorExternalStateMock::startThreads() {}
+void ReplicationCoordinatorExternalStateMock::startMasterSlave(OperationContext*) {}
+void ReplicationCoordinatorExternalStateMock::initiateOplog(OperationContext* txn) {}
+void ReplicationCoordinatorExternalStateMock::shutdown() {}
+void ReplicationCoordinatorExternalStateMock::forwardSlaveProgress() {}
+
+OID ReplicationCoordinatorExternalStateMock::ensureMe(OperationContext*) {
+ return OID::gen();
+}
+
+bool ReplicationCoordinatorExternalStateMock::isSelf(const HostAndPort& host) {
+ return sequenceContains(_selfHosts, host);
+}
+
+void ReplicationCoordinatorExternalStateMock::addSelf(const HostAndPort& host) {
+ _selfHosts.push_back(host);
+}
+
+HostAndPort ReplicationCoordinatorExternalStateMock::getClientHostAndPort(
+ const OperationContext* txn) {
+ return _clientHostAndPort;
+}
+
+void ReplicationCoordinatorExternalStateMock::setClientHostAndPort(
+ const HostAndPort& clientHostAndPort) {
+ _clientHostAndPort = clientHostAndPort;
+}
+
+StatusWith<BSONObj> ReplicationCoordinatorExternalStateMock::loadLocalConfigDocument(
+ OperationContext* txn) {
+ return _localRsConfigDocument;
+}
+
+Status ReplicationCoordinatorExternalStateMock::storeLocalConfigDocument(OperationContext* txn,
+ const BSONObj& config) {
+ {
+ stdx::unique_lock<stdx::mutex> lock(_shouldHangConfigMutex);
+ while (_storeLocalConfigDocumentShouldHang) {
+ _shouldHangConfigCondVar.wait(lock);
+ }
}
-
- HostAndPort ReplicationCoordinatorExternalStateMock::getClientHostAndPort(
- const OperationContext* txn) {
- return _clientHostAndPort;
+ if (_storeLocalConfigDocumentStatus.isOK()) {
+ setLocalConfigDocument(StatusWith<BSONObj>(config));
+ return Status::OK();
}
+ return _storeLocalConfigDocumentStatus;
+}
- void ReplicationCoordinatorExternalStateMock::setClientHostAndPort(
- const HostAndPort& clientHostAndPort) {
- _clientHostAndPort = clientHostAndPort;
- }
+void ReplicationCoordinatorExternalStateMock::setLocalConfigDocument(
+ const StatusWith<BSONObj>& localConfigDocument) {
+ _localRsConfigDocument = localConfigDocument;
+}
- StatusWith<BSONObj> ReplicationCoordinatorExternalStateMock::loadLocalConfigDocument(
- OperationContext* txn) {
- return _localRsConfigDocument;
- }
+StatusWith<LastVote> ReplicationCoordinatorExternalStateMock::loadLocalLastVoteDocument(
+ OperationContext* txn) {
+ return _localRsLastVoteDocument;
+}
- Status ReplicationCoordinatorExternalStateMock::storeLocalConfigDocument(
- OperationContext* txn,
- const BSONObj& config) {
- {
- stdx::unique_lock<stdx::mutex> lock(_shouldHangConfigMutex);
- while (_storeLocalConfigDocumentShouldHang) {
- _shouldHangConfigCondVar.wait(lock);
- }
- }
- if (_storeLocalConfigDocumentStatus.isOK()) {
- setLocalConfigDocument(StatusWith<BSONObj>(config));
- return Status::OK();
+Status ReplicationCoordinatorExternalStateMock::storeLocalLastVoteDocument(
+ OperationContext* txn, const LastVote& lastVote) {
+ {
+ stdx::unique_lock<stdx::mutex> lock(_shouldHangLastVoteMutex);
+ while (_storeLocalLastVoteDocumentShouldHang) {
+ _shouldHangLastVoteCondVar.wait(lock);
}
- return _storeLocalConfigDocumentStatus;
- }
-
- void ReplicationCoordinatorExternalStateMock::setLocalConfigDocument(
- const StatusWith<BSONObj>& localConfigDocument) {
-
- _localRsConfigDocument = localConfigDocument;
}
-
- StatusWith<LastVote> ReplicationCoordinatorExternalStateMock::loadLocalLastVoteDocument(
- OperationContext* txn) {
- return _localRsLastVoteDocument;
+ if (_storeLocalLastVoteDocumentStatus.isOK()) {
+ setLocalLastVoteDocument(StatusWith<LastVote>(lastVote));
+ return Status::OK();
}
+ return _storeLocalLastVoteDocumentStatus;
+}
- Status ReplicationCoordinatorExternalStateMock::storeLocalLastVoteDocument(
- OperationContext* txn,
- const LastVote& lastVote) {
- {
- stdx::unique_lock<stdx::mutex> lock(_shouldHangLastVoteMutex);
- while (_storeLocalLastVoteDocumentShouldHang) {
- _shouldHangLastVoteCondVar.wait(lock);
- }
- }
- if (_storeLocalLastVoteDocumentStatus.isOK()) {
- setLocalLastVoteDocument(StatusWith<LastVote>(lastVote));
- return Status::OK();
- }
- return _storeLocalLastVoteDocumentStatus;
- }
+void ReplicationCoordinatorExternalStateMock::setLocalLastVoteDocument(
+ const StatusWith<LastVote>& localLastVoteDocument) {
+ _localRsLastVoteDocument = localLastVoteDocument;
+}
- void ReplicationCoordinatorExternalStateMock::setLocalLastVoteDocument(
- const StatusWith<LastVote>& localLastVoteDocument) {
+void ReplicationCoordinatorExternalStateMock::setGlobalTimestamp(const Timestamp& newTime) {}
- _localRsLastVoteDocument = localLastVoteDocument;
- }
-
- void ReplicationCoordinatorExternalStateMock::setGlobalTimestamp(const Timestamp& newTime) {
- }
-
- StatusWith<OpTime> ReplicationCoordinatorExternalStateMock::loadLastOpTime(
- OperationContext* txn) {
- return _lastOpTime;
- }
+StatusWith<OpTime> ReplicationCoordinatorExternalStateMock::loadLastOpTime(OperationContext* txn) {
+ return _lastOpTime;
+}
- void ReplicationCoordinatorExternalStateMock::setLastOpTime(
- const StatusWith<OpTime>& lastApplied) {
- _lastOpTime = lastApplied;
- }
+void ReplicationCoordinatorExternalStateMock::setLastOpTime(const StatusWith<OpTime>& lastApplied) {
+ _lastOpTime = lastApplied;
+}
- void ReplicationCoordinatorExternalStateMock::setStoreLocalConfigDocumentStatus(Status status) {
- _storeLocalConfigDocumentStatus = status;
- }
+void ReplicationCoordinatorExternalStateMock::setStoreLocalConfigDocumentStatus(Status status) {
+ _storeLocalConfigDocumentStatus = status;
+}
- void ReplicationCoordinatorExternalStateMock::setStoreLocalConfigDocumentToHang(bool hang) {
- stdx::unique_lock<stdx::mutex> lock(_shouldHangConfigMutex);
- _storeLocalConfigDocumentShouldHang = hang;
- if (!hang) {
- _shouldHangConfigCondVar.notify_all();
- }
+void ReplicationCoordinatorExternalStateMock::setStoreLocalConfigDocumentToHang(bool hang) {
+ stdx::unique_lock<stdx::mutex> lock(_shouldHangConfigMutex);
+ _storeLocalConfigDocumentShouldHang = hang;
+ if (!hang) {
+ _shouldHangConfigCondVar.notify_all();
}
+}
- void ReplicationCoordinatorExternalStateMock::setStoreLocalLastVoteDocumentStatus(
- Status status) {
- _storeLocalLastVoteDocumentStatus = status;
- }
+void ReplicationCoordinatorExternalStateMock::setStoreLocalLastVoteDocumentStatus(Status status) {
+ _storeLocalLastVoteDocumentStatus = status;
+}
- void ReplicationCoordinatorExternalStateMock::setStoreLocalLastVoteDocumentToHang(bool hang) {
- stdx::unique_lock<stdx::mutex> lock(_shouldHangLastVoteMutex);
- _storeLocalLastVoteDocumentShouldHang = hang;
- if (!hang) {
- _shouldHangLastVoteCondVar.notify_all();
- }
+void ReplicationCoordinatorExternalStateMock::setStoreLocalLastVoteDocumentToHang(bool hang) {
+ stdx::unique_lock<stdx::mutex> lock(_shouldHangLastVoteMutex);
+ _storeLocalLastVoteDocumentShouldHang = hang;
+ if (!hang) {
+ _shouldHangLastVoteCondVar.notify_all();
}
+}
- void ReplicationCoordinatorExternalStateMock::closeConnections() {
- _connectionsClosed = true;
- }
+void ReplicationCoordinatorExternalStateMock::closeConnections() {
+ _connectionsClosed = true;
+}
- void ReplicationCoordinatorExternalStateMock::killAllUserOperations(OperationContext* txn) {}
+void ReplicationCoordinatorExternalStateMock::killAllUserOperations(OperationContext* txn) {}
- void ReplicationCoordinatorExternalStateMock::clearShardingState() {}
+void ReplicationCoordinatorExternalStateMock::clearShardingState() {}
- void ReplicationCoordinatorExternalStateMock::signalApplierToChooseNewSyncSource() {}
+void ReplicationCoordinatorExternalStateMock::signalApplierToChooseNewSyncSource() {}
- OperationContext* ReplicationCoordinatorExternalStateMock::createOperationContext(
- const std::string& threadName) {
- return new OperationContextReplMock;
- }
+OperationContext* ReplicationCoordinatorExternalStateMock::createOperationContext(
+ const std::string& threadName) {
+ return new OperationContextReplMock;
+}
- void ReplicationCoordinatorExternalStateMock::dropAllTempCollections(OperationContext* txn) {}
+void ReplicationCoordinatorExternalStateMock::dropAllTempCollections(OperationContext* txn) {}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
index 1602601a33a..a93b4a5038a 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
@@ -44,103 +44,104 @@
namespace mongo {
namespace repl {
- class ReplicationCoordinatorExternalStateMock : public ReplicationCoordinatorExternalState {
- MONGO_DISALLOW_COPYING(ReplicationCoordinatorExternalStateMock);
- public:
- class GlobalSharedLockAcquirer;
-
- ReplicationCoordinatorExternalStateMock();
- virtual ~ReplicationCoordinatorExternalStateMock();
- virtual void startThreads();
- virtual void startMasterSlave(OperationContext*);
- virtual void shutdown();
- virtual void initiateOplog(OperationContext* txn);
- virtual void forwardSlaveProgress();
- virtual OID ensureMe(OperationContext*);
- virtual bool isSelf(const HostAndPort& host);
- virtual HostAndPort getClientHostAndPort(const OperationContext* txn);
- virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* txn);
- virtual Status storeLocalConfigDocument(OperationContext* txn, const BSONObj& config);
- virtual StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* txn);
- virtual Status storeLocalLastVoteDocument(OperationContext* txn, const LastVote& lastVote);
- virtual void setGlobalTimestamp(const Timestamp& newTime);
- virtual StatusWith<OpTime> loadLastOpTime(OperationContext* txn);
- virtual void closeConnections();
- virtual void killAllUserOperations(OperationContext* txn);
- virtual void clearShardingState();
- virtual void signalApplierToChooseNewSyncSource();
- virtual OperationContext* createOperationContext(const std::string& threadName);
- virtual void dropAllTempCollections(OperationContext* txn);
-
- /**
- * Adds "host" to the list of hosts that this mock will match when responding to "isSelf"
- * messages.
- */
- void addSelf(const HostAndPort& host);
-
- /**
- * Sets the return value for subsequent calls to loadLocalConfigDocument().
- */
- void setLocalConfigDocument(const StatusWith<BSONObj>& localConfigDocument);
-
- /**
- * Sets the return value for subsequent calls to loadLocalLastVoteDocument().
- */
- void setLocalLastVoteDocument(const StatusWith<LastVote>& localLastVoteDocument);
-
- /**
- * Sets the return value for subsequent calls to getClientHostAndPort().
- */
- void setClientHostAndPort(const HostAndPort& clientHostAndPort);
-
- /**
- * Sets the return value for subsequent calls to loadLastOpTimeApplied.
- */
- void setLastOpTime(const StatusWith<OpTime>& lastApplied);
-
- /**
- * Sets the return value for subsequent calls to storeLocalConfigDocument().
- * If "status" is Status::OK(), the subsequent calls will call the underlying funtion.
- */
- void setStoreLocalConfigDocumentStatus(Status status);
-
- /**
- * Sets whether or not subsequent calls to storeLocalConfigDocument() should hang
- * indefinitely or not based on the value of "hang".
- */
- void setStoreLocalConfigDocumentToHang(bool hang);
-
- /**
- * Sets the return value for subsequent calls to storeLocalLastVoteDocument().
- * If "status" is Status::OK(), the subsequent calls will call the underlying funtion.
- */
- void setStoreLocalLastVoteDocumentStatus(Status status);
-
- /**
- * Sets whether or not subsequent calls to storeLocalLastVoteDocument() should hang
- * indefinitely or not based on the value of "hang".
- */
- void setStoreLocalLastVoteDocumentToHang(bool hang);
-
- private:
- StatusWith<BSONObj> _localRsConfigDocument;
- StatusWith<LastVote> _localRsLastVoteDocument;
- StatusWith<OpTime> _lastOpTime;
- std::vector<HostAndPort> _selfHosts;
- bool _canAcquireGlobalSharedLock;
- Status _storeLocalConfigDocumentStatus;
- Status _storeLocalLastVoteDocumentStatus;
- // mutex and cond var for controlling stroeLocalConfigDocument()'s hanging
- stdx::mutex _shouldHangConfigMutex;
- stdx::condition_variable _shouldHangConfigCondVar;
- // mutex and cond var for controlling stroeLocalLastVoteDocument()'s hanging
- stdx::mutex _shouldHangLastVoteMutex;
- stdx::condition_variable _shouldHangLastVoteCondVar;
- bool _storeLocalConfigDocumentShouldHang;
- bool _storeLocalLastVoteDocumentShouldHang;
- bool _connectionsClosed;
- HostAndPort _clientHostAndPort;
- };
-
-} // namespace repl
-} // namespace mongo
+class ReplicationCoordinatorExternalStateMock : public ReplicationCoordinatorExternalState {
+ MONGO_DISALLOW_COPYING(ReplicationCoordinatorExternalStateMock);
+
+public:
+ class GlobalSharedLockAcquirer;
+
+ ReplicationCoordinatorExternalStateMock();
+ virtual ~ReplicationCoordinatorExternalStateMock();
+ virtual void startThreads();
+ virtual void startMasterSlave(OperationContext*);
+ virtual void shutdown();
+ virtual void initiateOplog(OperationContext* txn);
+ virtual void forwardSlaveProgress();
+ virtual OID ensureMe(OperationContext*);
+ virtual bool isSelf(const HostAndPort& host);
+ virtual HostAndPort getClientHostAndPort(const OperationContext* txn);
+ virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* txn);
+ virtual Status storeLocalConfigDocument(OperationContext* txn, const BSONObj& config);
+ virtual StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* txn);
+ virtual Status storeLocalLastVoteDocument(OperationContext* txn, const LastVote& lastVote);
+ virtual void setGlobalTimestamp(const Timestamp& newTime);
+ virtual StatusWith<OpTime> loadLastOpTime(OperationContext* txn);
+ virtual void closeConnections();
+ virtual void killAllUserOperations(OperationContext* txn);
+ virtual void clearShardingState();
+ virtual void signalApplierToChooseNewSyncSource();
+ virtual OperationContext* createOperationContext(const std::string& threadName);
+ virtual void dropAllTempCollections(OperationContext* txn);
+
+ /**
+ * Adds "host" to the list of hosts that this mock will match when responding to "isSelf"
+ * messages.
+ */
+ void addSelf(const HostAndPort& host);
+
+ /**
+ * Sets the return value for subsequent calls to loadLocalConfigDocument().
+ */
+ void setLocalConfigDocument(const StatusWith<BSONObj>& localConfigDocument);
+
+ /**
+ * Sets the return value for subsequent calls to loadLocalLastVoteDocument().
+ */
+ void setLocalLastVoteDocument(const StatusWith<LastVote>& localLastVoteDocument);
+
+ /**
+ * Sets the return value for subsequent calls to getClientHostAndPort().
+ */
+ void setClientHostAndPort(const HostAndPort& clientHostAndPort);
+
+ /**
+ * Sets the return value for subsequent calls to loadLastOpTimeApplied.
+ */
+ void setLastOpTime(const StatusWith<OpTime>& lastApplied);
+
+ /**
+ * Sets the return value for subsequent calls to storeLocalConfigDocument().
+ * If "status" is Status::OK(), the subsequent calls will call the underlying funtion.
+ */
+ void setStoreLocalConfigDocumentStatus(Status status);
+
+ /**
+ * Sets whether or not subsequent calls to storeLocalConfigDocument() should hang
+ * indefinitely or not based on the value of "hang".
+ */
+ void setStoreLocalConfigDocumentToHang(bool hang);
+
+ /**
+ * Sets the return value for subsequent calls to storeLocalLastVoteDocument().
+ * If "status" is Status::OK(), the subsequent calls will call the underlying funtion.
+ */
+ void setStoreLocalLastVoteDocumentStatus(Status status);
+
+ /**
+ * Sets whether or not subsequent calls to storeLocalLastVoteDocument() should hang
+ * indefinitely or not based on the value of "hang".
+ */
+ void setStoreLocalLastVoteDocumentToHang(bool hang);
+
+private:
+ StatusWith<BSONObj> _localRsConfigDocument;
+ StatusWith<LastVote> _localRsLastVoteDocument;
+ StatusWith<OpTime> _lastOpTime;
+ std::vector<HostAndPort> _selfHosts;
+ bool _canAcquireGlobalSharedLock;
+ Status _storeLocalConfigDocumentStatus;
+ Status _storeLocalLastVoteDocumentStatus;
+ // mutex and cond var for controlling stroeLocalConfigDocument()'s hanging
+ stdx::mutex _shouldHangConfigMutex;
+ stdx::condition_variable _shouldHangConfigCondVar;
+ // mutex and cond var for controlling stroeLocalLastVoteDocument()'s hanging
+ stdx::mutex _shouldHangLastVoteMutex;
+ stdx::condition_variable _shouldHangLastVoteCondVar;
+ bool _storeLocalConfigDocumentShouldHang;
+ bool _storeLocalLastVoteDocumentShouldHang;
+ bool _connectionsClosed;
+ HostAndPort _clientHostAndPort;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_global.cpp b/src/mongo/db/repl/replication_coordinator_global.cpp
index e35ead35e96..c7a163218f7 100644
--- a/src/mongo/db/repl/replication_coordinator_global.cpp
+++ b/src/mongo/db/repl/replication_coordinator_global.cpp
@@ -34,16 +34,15 @@
namespace mongo {
namespace repl {
- ReplicationCoordinator* getGlobalReplicationCoordinator() {
- ReplicationCoordinator* globalReplCoordinator = ReplicationCoordinator::get(
- getGlobalServiceContext());
- return globalReplCoordinator;
- }
+ReplicationCoordinator* getGlobalReplicationCoordinator() {
+ ReplicationCoordinator* globalReplCoordinator =
+ ReplicationCoordinator::get(getGlobalServiceContext());
+ return globalReplCoordinator;
+}
- void setGlobalReplicationCoordinator(ReplicationCoordinator* coord) {
- repl::ReplicationCoordinator::set(getGlobalServiceContext(),
- std::move(
- std::unique_ptr<ReplicationCoordinator>(coord)));
- }
-} // namespace repl
-} // namespace mongo
+void setGlobalReplicationCoordinator(ReplicationCoordinator* coord) {
+ repl::ReplicationCoordinator::set(getGlobalServiceContext(),
+ std::move(std::unique_ptr<ReplicationCoordinator>(coord)));
+}
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_global.h b/src/mongo/db/repl/replication_coordinator_global.h
index c107959dbf6..a18033fd162 100644
--- a/src/mongo/db/repl/replication_coordinator_global.h
+++ b/src/mongo/db/repl/replication_coordinator_global.h
@@ -33,8 +33,8 @@
namespace mongo {
namespace repl {
- ReplicationCoordinator* getGlobalReplicationCoordinator();
- void setGlobalReplicationCoordinator(ReplicationCoordinator* coordinator);
+ReplicationCoordinator* getGlobalReplicationCoordinator();
+void setGlobalReplicationCoordinator(ReplicationCoordinator* coordinator);
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index bd1378699ad..e3ba34932de 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -77,716 +77,683 @@ namespace mongo {
namespace repl {
namespace {
- using executor::NetworkInterface;
+using executor::NetworkInterface;
- void lockAndCall(stdx::unique_lock<stdx::mutex>* lk, const stdx::function<void ()>& fn) {
- if (!lk->owns_lock()) {
- lk->lock();
- }
- fn();
+void lockAndCall(stdx::unique_lock<stdx::mutex>* lk, const stdx::function<void()>& fn) {
+ if (!lk->owns_lock()) {
+ lk->lock();
}
+ fn();
+}
- /**
- * Implements the force-reconfig behavior of incrementing config version by a large random
- * number.
- */
- BSONObj incrementConfigVersionByRandom(BSONObj config) {
- BSONObjBuilder builder;
- for (BSONObjIterator iter(config); iter.more(); iter.next()) {
- BSONElement elem = *iter;
- if (elem.fieldNameStringData() == ReplicaSetConfig::kVersionFieldName &&
- elem.isNumber()) {
-
- std::unique_ptr<SecureRandom> generator(SecureRandom::create());
- const int random = std::abs(static_cast<int>(generator->nextInt64()) % 100000);
- builder.appendIntOrLL(ReplicaSetConfig::kVersionFieldName,
- elem.numberLong() + 10000 + random);
- }
- else {
- builder.append(elem);
- }
- }
- return builder.obj();
- }
-
-} //namespace
-
- struct ReplicationCoordinatorImpl::WaiterInfo {
-
- /**
- * Constructor takes the list of waiters and enqueues itself on the list, removing itself
- * in the destructor.
- */
- WaiterInfo(std::vector<WaiterInfo*>* _list,
- unsigned int _opID,
- const OpTime* _opTime,
- const WriteConcernOptions* _writeConcern,
- stdx::condition_variable* _condVar) : list(_list),
- master(true),
- opID(_opID),
- opTime(_opTime),
- writeConcern(_writeConcern),
- condVar(_condVar) {
- list->push_back(this);
- }
-
- ~WaiterInfo() {
- list->erase(std::remove(list->begin(), list->end(), this), list->end());
- }
-
- std::vector<WaiterInfo*>* list;
- bool master; // Set to false to indicate that stepDown was called while waiting
- const unsigned int opID;
- const OpTime* opTime;
- const WriteConcernOptions* writeConcern;
- stdx::condition_variable* condVar;
- };
-
-namespace {
- ReplicationCoordinator::Mode getReplicationModeFromSettings(const ReplSettings& settings) {
- if (settings.usingReplSets()) {
- return ReplicationCoordinator::modeReplSet;
- }
- if (settings.master || settings.slave) {
- return ReplicationCoordinator::modeMasterSlave;
+/**
+ * Implements the force-reconfig behavior of incrementing config version by a large random
+ * number.
+ */
+BSONObj incrementConfigVersionByRandom(BSONObj config) {
+ BSONObjBuilder builder;
+ for (BSONObjIterator iter(config); iter.more(); iter.next()) {
+ BSONElement elem = *iter;
+ if (elem.fieldNameStringData() == ReplicaSetConfig::kVersionFieldName && elem.isNumber()) {
+ std::unique_ptr<SecureRandom> generator(SecureRandom::create());
+ const int random = std::abs(static_cast<int>(generator->nextInt64()) % 100000);
+ builder.appendIntOrLL(ReplicaSetConfig::kVersionFieldName,
+ elem.numberLong() + 10000 + random);
+ } else {
+ builder.append(elem);
}
- return ReplicationCoordinator::modeNone;
}
-} // namespace
-
- ReplicationCoordinatorImpl::ReplicationCoordinatorImpl(
- const ReplSettings& settings,
- ReplicationCoordinatorExternalState* externalState,
- TopologyCoordinator* topCoord,
- int64_t prngSeed,
- NetworkInterface* network,
- StorageInterface* storage,
- ReplicationExecutor* replExec) :
- _settings(settings),
- _replMode(getReplicationModeFromSettings(settings)),
- _topCoord(topCoord),
- _replExecutorIfOwned(replExec ? nullptr :
- new ReplicationExecutor(network,
- storage,
- prngSeed)),
- _replExecutor(replExec ? *replExec : *_replExecutorIfOwned),
- _externalState(externalState),
- _inShutdown(false),
- _memberState(MemberState::RS_STARTUP),
- _isWaitingForDrainToComplete(false),
- _rsConfigState(kConfigPreStart),
- _selfIndex(-1),
- _sleptLastElection(false),
- _canAcceptNonLocalWrites(!(settings.usingReplSets() || settings.slave)),
- _canServeNonLocalReads(0U),
- _dr(DataReplicatorOptions(), &_replExecutor, this) {
-
- if (!isReplEnabled()) {
- return;
- }
+ return builder.obj();
+}
- std::unique_ptr<SecureRandom> rbidGenerator(SecureRandom::create());
- _rbid = static_cast<int>(rbidGenerator->nextInt64());
- if (_rbid < 0) {
- // Ensure _rbid is always positive
- _rbid = -_rbid;
- }
+} // namespace
- // Make sure there is always an entry in _slaveInfo for ourself.
- SlaveInfo selfInfo;
- selfInfo.self = true;
- _slaveInfo.push_back(selfInfo);
- }
-
- ReplicationCoordinatorImpl::ReplicationCoordinatorImpl(
- const ReplSettings& settings,
- ReplicationCoordinatorExternalState* externalState,
- NetworkInterface* network,
- StorageInterface* storage,
- TopologyCoordinator* topCoord,
- int64_t prngSeed) : ReplicationCoordinatorImpl(settings,
- externalState,
- topCoord,
- prngSeed,
- network,
- storage,
- nullptr) { }
-
- ReplicationCoordinatorImpl::ReplicationCoordinatorImpl(
- const ReplSettings& settings,
- ReplicationCoordinatorExternalState* externalState,
- TopologyCoordinator* topCoord,
- ReplicationExecutor* replExec,
- int64_t prngSeed) : ReplicationCoordinatorImpl(settings,
- externalState,
- topCoord,
- prngSeed,
- nullptr,
- nullptr,
- replExec) { }
-
- ReplicationCoordinatorImpl::~ReplicationCoordinatorImpl() {}
-
- void ReplicationCoordinatorImpl::waitForStartUpComplete() {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- while (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
- _rsConfigStateChange.wait(lk);
- }
- }
+struct ReplicationCoordinatorImpl::WaiterInfo {
+ /**
+ * Constructor takes the list of waiters and enqueues itself on the list, removing itself
+ * in the destructor.
+ */
+ WaiterInfo(std::vector<WaiterInfo*>* _list,
+ unsigned int _opID,
+ const OpTime* _opTime,
+ const WriteConcernOptions* _writeConcern,
+ stdx::condition_variable* _condVar)
+ : list(_list),
+ master(true),
+ opID(_opID),
+ opTime(_opTime),
+ writeConcern(_writeConcern),
+ condVar(_condVar) {
+ list->push_back(this);
+ }
+
+ ~WaiterInfo() {
+ list->erase(std::remove(list->begin(), list->end(), this), list->end());
+ }
+
+ std::vector<WaiterInfo*>* list;
+ bool master; // Set to false to indicate that stepDown was called while waiting
+ const unsigned int opID;
+ const OpTime* opTime;
+ const WriteConcernOptions* writeConcern;
+ stdx::condition_variable* condVar;
+};
- ReplicaSetConfig ReplicationCoordinatorImpl::getReplicaSetConfig_forTest() {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- return _rsConfig;
+namespace {
+ReplicationCoordinator::Mode getReplicationModeFromSettings(const ReplSettings& settings) {
+ if (settings.usingReplSets()) {
+ return ReplicationCoordinator::modeReplSet;
}
-
- void ReplicationCoordinatorImpl::_updateLastVote(const LastVote& lastVote) {
- _topCoord->loadLastVote(lastVote);
+ if (settings.master || settings.slave) {
+ return ReplicationCoordinator::modeMasterSlave;
}
+ return ReplicationCoordinator::modeNone;
+}
+} // namespace
- bool ReplicationCoordinatorImpl::_startLoadLocalConfig(OperationContext* txn) {
-
- StatusWith<LastVote> lastVote = _externalState->loadLocalLastVoteDocument(txn);
- if (!lastVote.isOK()) {
- log() << "Did not find local voted for document at startup; " << lastVote.getStatus();
- }
- else {
- LastVote vote = lastVote.getValue();
- _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_updateLastVote,
- this,
- vote));
- }
-
- StatusWith<BSONObj> cfg = _externalState->loadLocalConfigDocument(txn);
- if (!cfg.isOK()) {
- log() << "Did not find local replica set configuration document at startup; " <<
- cfg.getStatus();
- return true;
- }
- ReplicaSetConfig localConfig;
- Status status = localConfig.initialize(cfg.getValue());
- if (!status.isOK()) {
- error() << "Locally stored replica set configuration does not parse; See "
- "http://www.mongodb.org/dochub/core/recover-replica-set-from-invalid-config "
- "for information on how to recover from this. Got \"" <<
- status << "\" while parsing " << cfg.getValue();
- fassertFailedNoTrace(28545);
- }
-
- StatusWith<OpTime> lastOpTimeStatus = _externalState->loadLastOpTime(txn);
-
- // Use a callback here, because _finishLoadLocalConfig calls isself() which requires
- // that the server's networking layer be up and running and accepting connections, which
- // doesn't happen until startReplication finishes.
+ReplicationCoordinatorImpl::ReplicationCoordinatorImpl(
+ const ReplSettings& settings,
+ ReplicationCoordinatorExternalState* externalState,
+ TopologyCoordinator* topCoord,
+ int64_t prngSeed,
+ NetworkInterface* network,
+ StorageInterface* storage,
+ ReplicationExecutor* replExec)
+ : _settings(settings),
+ _replMode(getReplicationModeFromSettings(settings)),
+ _topCoord(topCoord),
+ _replExecutorIfOwned(replExec ? nullptr
+ : new ReplicationExecutor(network, storage, prngSeed)),
+ _replExecutor(replExec ? *replExec : *_replExecutorIfOwned),
+ _externalState(externalState),
+ _inShutdown(false),
+ _memberState(MemberState::RS_STARTUP),
+ _isWaitingForDrainToComplete(false),
+ _rsConfigState(kConfigPreStart),
+ _selfIndex(-1),
+ _sleptLastElection(false),
+ _canAcceptNonLocalWrites(!(settings.usingReplSets() || settings.slave)),
+ _canServeNonLocalReads(0U),
+ _dr(DataReplicatorOptions(), &_replExecutor, this) {
+ if (!isReplEnabled()) {
+ return;
+ }
+
+ std::unique_ptr<SecureRandom> rbidGenerator(SecureRandom::create());
+ _rbid = static_cast<int>(rbidGenerator->nextInt64());
+ if (_rbid < 0) {
+ // Ensure _rbid is always positive
+ _rbid = -_rbid;
+ }
+
+ // Make sure there is always an entry in _slaveInfo for ourself.
+ SlaveInfo selfInfo;
+ selfInfo.self = true;
+ _slaveInfo.push_back(selfInfo);
+}
+
+ReplicationCoordinatorImpl::ReplicationCoordinatorImpl(
+ const ReplSettings& settings,
+ ReplicationCoordinatorExternalState* externalState,
+ NetworkInterface* network,
+ StorageInterface* storage,
+ TopologyCoordinator* topCoord,
+ int64_t prngSeed)
+ : ReplicationCoordinatorImpl(
+ settings, externalState, topCoord, prngSeed, network, storage, nullptr) {}
+
+ReplicationCoordinatorImpl::ReplicationCoordinatorImpl(
+ const ReplSettings& settings,
+ ReplicationCoordinatorExternalState* externalState,
+ TopologyCoordinator* topCoord,
+ ReplicationExecutor* replExec,
+ int64_t prngSeed)
+ : ReplicationCoordinatorImpl(
+ settings, externalState, topCoord, prngSeed, nullptr, nullptr, replExec) {}
+
+ReplicationCoordinatorImpl::~ReplicationCoordinatorImpl() {}
+
+void ReplicationCoordinatorImpl::waitForStartUpComplete() {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ while (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
+ _rsConfigStateChange.wait(lk);
+ }
+}
+
+ReplicaSetConfig ReplicationCoordinatorImpl::getReplicaSetConfig_forTest() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _rsConfig;
+}
+
+void ReplicationCoordinatorImpl::_updateLastVote(const LastVote& lastVote) {
+ _topCoord->loadLastVote(lastVote);
+}
+
+bool ReplicationCoordinatorImpl::_startLoadLocalConfig(OperationContext* txn) {
+ StatusWith<LastVote> lastVote = _externalState->loadLocalLastVoteDocument(txn);
+ if (!lastVote.isOK()) {
+ log() << "Did not find local voted for document at startup; " << lastVote.getStatus();
+ } else {
+ LastVote vote = lastVote.getValue();
_replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_finishLoadLocalConfig,
- this,
- stdx::placeholders::_1,
- localConfig,
- lastOpTimeStatus));
- return false;
+ stdx::bind(&ReplicationCoordinatorImpl::_updateLastVote, this, vote));
}
- void ReplicationCoordinatorImpl::_finishLoadLocalConfig(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicaSetConfig& localConfig,
- const StatusWith<OpTime>& lastOpTimeStatus) {
- if (!cbData.status.isOK()) {
- LOG(1) << "Loading local replica set configuration failed due to " << cbData.status;
- return;
- }
-
- StatusWith<int> myIndex = validateConfigForStartUp(_externalState.get(),
- _rsConfig,
- localConfig);
- if (!myIndex.isOK()) {
- if (myIndex.getStatus() == ErrorCodes::NodeNotFound ||
- myIndex.getStatus() == ErrorCodes::DuplicateKey) {
- warning() << "Locally stored replica set configuration does not have a valid entry "
- "for the current node; waiting for reconfig or remote heartbeat; Got \"" <<
- myIndex.getStatus() << "\" while validating " << localConfig.toBSON();
- myIndex = StatusWith<int>(-1);
- }
- else {
- error() << "Locally stored replica set configuration is invalid; See "
- "http://www.mongodb.org/dochub/core/recover-replica-set-from-invalid-config"
- " for information on how to recover from this. Got \"" <<
- myIndex.getStatus() << "\" while validating " << localConfig.toBSON();
- fassertFailedNoTrace(28544);
- }
- }
-
- if (localConfig.getReplSetName() != _settings.ourSetName()) {
- warning() << "Local replica set configuration document reports set name of " <<
- localConfig.getReplSetName() << ", but command line reports " <<
- _settings.ourSetName() << "; waitng for reconfig or remote heartbeat";
+ StatusWith<BSONObj> cfg = _externalState->loadLocalConfigDocument(txn);
+ if (!cfg.isOK()) {
+ log() << "Did not find local replica set configuration document at startup; "
+ << cfg.getStatus();
+ return true;
+ }
+ ReplicaSetConfig localConfig;
+ Status status = localConfig.initialize(cfg.getValue());
+ if (!status.isOK()) {
+ error() << "Locally stored replica set configuration does not parse; See "
+ "http://www.mongodb.org/dochub/core/recover-replica-set-from-invalid-config "
+ "for information on how to recover from this. Got \"" << status
+ << "\" while parsing " << cfg.getValue();
+ fassertFailedNoTrace(28545);
+ }
+
+ StatusWith<OpTime> lastOpTimeStatus = _externalState->loadLastOpTime(txn);
+
+ // Use a callback here, because _finishLoadLocalConfig calls isself() which requires
+ // that the server's networking layer be up and running and accepting connections, which
+ // doesn't happen until startReplication finishes.
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_finishLoadLocalConfig,
+ this,
+ stdx::placeholders::_1,
+ localConfig,
+ lastOpTimeStatus));
+ return false;
+}
+
+void ReplicationCoordinatorImpl::_finishLoadLocalConfig(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplicaSetConfig& localConfig,
+ const StatusWith<OpTime>& lastOpTimeStatus) {
+ if (!cbData.status.isOK()) {
+ LOG(1) << "Loading local replica set configuration failed due to " << cbData.status;
+ return;
+ }
+
+ StatusWith<int> myIndex =
+ validateConfigForStartUp(_externalState.get(), _rsConfig, localConfig);
+ if (!myIndex.isOK()) {
+ if (myIndex.getStatus() == ErrorCodes::NodeNotFound ||
+ myIndex.getStatus() == ErrorCodes::DuplicateKey) {
+ warning() << "Locally stored replica set configuration does not have a valid entry "
+ "for the current node; waiting for reconfig or remote heartbeat; Got \""
+ << myIndex.getStatus() << "\" while validating " << localConfig.toBSON();
myIndex = StatusWith<int>(-1);
+ } else {
+ error() << "Locally stored replica set configuration is invalid; See "
+ "http://www.mongodb.org/dochub/core/recover-replica-set-from-invalid-config"
+ " for information on how to recover from this. Got \"" << myIndex.getStatus()
+ << "\" while validating " << localConfig.toBSON();
+ fassertFailedNoTrace(28544);
}
-
- // Do not check optime, if this node is an arbiter.
- bool isArbiter = myIndex.getValue() != -1 &&
- localConfig.getMemberAt(myIndex.getValue()).isArbiter();
- OpTime lastOpTime;
- if (!isArbiter) {
- if (!lastOpTimeStatus.isOK()) {
- warning() << "Failed to load timestamp of most recently applied operation; " <<
- lastOpTimeStatus.getStatus();
- }
- else {
- lastOpTime = lastOpTimeStatus.getValue();
- }
- }
-
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- invariant(_rsConfigState == kConfigStartingUp);
- const PostMemberStateUpdateAction action =
- _setCurrentRSConfig_inlock(localConfig, myIndex.getValue());
- _setMyLastOptime_inlock(&lk, lastOpTime, false);
- _externalState->setGlobalTimestamp(lastOpTime.getTimestamp());
- if (lk.owns_lock()) {
- lk.unlock();
- }
- _performPostMemberStateUpdateAction(action);
- _externalState->startThreads();
}
- void ReplicationCoordinatorImpl::startReplication(OperationContext* txn) {
- if (!isReplEnabled()) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- _setConfigState_inlock(kConfigReplicationDisabled);
- return;
- }
-
- {
- OID rid = _externalState->ensureMe(txn);
-
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- fassert(18822, !_inShutdown);
- _setConfigState_inlock(kConfigStartingUp);
- _myRID = rid;
- _slaveInfo[_getMyIndexInSlaveInfo_inlock()].rid = rid;
- }
-
- if (!_settings.usingReplSets()) {
- // Must be Master/Slave
- invariant(_settings.master || _settings.slave);
- _externalState->startMasterSlave(txn);
- return;
- }
-
- _topCoordDriverThread.reset(new stdx::thread(stdx::bind(&ReplicationExecutor::run,
- &_replExecutor)));
+ if (localConfig.getReplSetName() != _settings.ourSetName()) {
+ warning() << "Local replica set configuration document reports set name of "
+ << localConfig.getReplSetName() << ", but command line reports "
+ << _settings.ourSetName() << "; waitng for reconfig or remote heartbeat";
+ myIndex = StatusWith<int>(-1);
+ }
- bool doneLoadingConfig = _startLoadLocalConfig(txn);
- if (doneLoadingConfig) {
- // If we're not done loading the config, then the config state will be set by
- // _finishLoadLocalConfig.
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- invariant(!_rsConfig.isInitialized());
- _setConfigState_inlock(kConfigUninitialized);
+ // Do not check optime, if this node is an arbiter.
+ bool isArbiter =
+ myIndex.getValue() != -1 && localConfig.getMemberAt(myIndex.getValue()).isArbiter();
+ OpTime lastOpTime;
+ if (!isArbiter) {
+ if (!lastOpTimeStatus.isOK()) {
+ warning() << "Failed to load timestamp of most recently applied operation; "
+ << lastOpTimeStatus.getStatus();
+ } else {
+ lastOpTime = lastOpTimeStatus.getValue();
}
}
- void ReplicationCoordinatorImpl::shutdown() {
- // Shutdown must:
- // * prevent new threads from blocking in awaitReplication
- // * wake up all existing threads blocking in awaitReplication
- // * tell the ReplicationExecutor to shut down
- // * wait for the thread running the ReplicationExecutor to finish
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ invariant(_rsConfigState == kConfigStartingUp);
+ const PostMemberStateUpdateAction action =
+ _setCurrentRSConfig_inlock(localConfig, myIndex.getValue());
+ _setMyLastOptime_inlock(&lk, lastOpTime, false);
+ _externalState->setGlobalTimestamp(lastOpTime.getTimestamp());
+ if (lk.owns_lock()) {
+ lk.unlock();
+ }
+ _performPostMemberStateUpdateAction(action);
+ _externalState->startThreads();
+}
- if (!_settings.usingReplSets()) {
- return;
- }
+void ReplicationCoordinatorImpl::startReplication(OperationContext* txn) {
+ if (!isReplEnabled()) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _setConfigState_inlock(kConfigReplicationDisabled);
+ return;
+ }
- {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- fassert(28533, !_inShutdown);
- _inShutdown = true;
- if (_rsConfigState == kConfigPreStart) {
- warning() << "ReplicationCoordinatorImpl::shutdown() called before "
- "startReplication() finished. Shutting down without cleaning up the "
- "replication system";
- return;
- }
- fassert(18823, _rsConfigState != kConfigStartingUp);
- for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
- it != _replicationWaiterList.end(); ++it) {
- WaiterInfo* waiter = *it;
- waiter->condVar->notify_all();
- }
- }
+ {
+ OID rid = _externalState->ensureMe(txn);
- _replExecutor.shutdown();
- _topCoordDriverThread->join(); // must happen outside _mutex
- _externalState->shutdown();
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ fassert(18822, !_inShutdown);
+ _setConfigState_inlock(kConfigStartingUp);
+ _myRID = rid;
+ _slaveInfo[_getMyIndexInSlaveInfo_inlock()].rid = rid;
}
- const ReplSettings& ReplicationCoordinatorImpl::getSettings() const {
- return _settings;
+ if (!_settings.usingReplSets()) {
+ // Must be Master/Slave
+ invariant(_settings.master || _settings.slave);
+ _externalState->startMasterSlave(txn);
+ return;
}
- ReplicationCoordinator::Mode ReplicationCoordinatorImpl::getReplicationMode() const {
- return _replMode;
- }
+ _topCoordDriverThread.reset(
+ new stdx::thread(stdx::bind(&ReplicationExecutor::run, &_replExecutor)));
- MemberState ReplicationCoordinatorImpl::getMemberState() const {
+ bool doneLoadingConfig = _startLoadLocalConfig(txn);
+ if (doneLoadingConfig) {
+ // If we're not done loading the config, then the config state will be set by
+ // _finishLoadLocalConfig.
stdx::lock_guard<stdx::mutex> lk(_mutex);
- return _getMemberState_inlock();
+ invariant(!_rsConfig.isInitialized());
+ _setConfigState_inlock(kConfigUninitialized);
}
+}
- MemberState ReplicationCoordinatorImpl::_getMemberState_inlock() const {
- return _memberState;
+void ReplicationCoordinatorImpl::shutdown() {
+ // Shutdown must:
+ // * prevent new threads from blocking in awaitReplication
+ // * wake up all existing threads blocking in awaitReplication
+ // * tell the ReplicationExecutor to shut down
+ // * wait for the thread running the ReplicationExecutor to finish
+
+ if (!_settings.usingReplSets()) {
+ return;
}
- Seconds ReplicationCoordinatorImpl::getSlaveDelaySecs() const {
+ {
stdx::lock_guard<stdx::mutex> lk(_mutex);
- invariant(_rsConfig.isInitialized());
- uassert(28524,
- "Node not a member of the current set configuration",
- _selfIndex != -1);
- return _rsConfig.getMemberAt(_selfIndex).getSlaveDelay();
- }
-
- void ReplicationCoordinatorImpl::clearSyncSourceBlacklist() {
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_clearSyncSourceBlacklist_finish,
- this,
- stdx::placeholders::_1));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ fassert(28533, !_inShutdown);
+ _inShutdown = true;
+ if (_rsConfigState == kConfigPreStart) {
+ warning() << "ReplicationCoordinatorImpl::shutdown() called before "
+ "startReplication() finished. Shutting down without cleaning up the "
+ "replication system";
return;
}
- fassert(18907, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
+ fassert(18823, _rsConfigState != kConfigStartingUp);
+ for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
+ it != _replicationWaiterList.end();
+ ++it) {
+ WaiterInfo* waiter = *it;
+ waiter->condVar->notify_all();
+ }
+ }
+
+ _replExecutor.shutdown();
+ _topCoordDriverThread->join(); // must happen outside _mutex
+ _externalState->shutdown();
+}
+
+const ReplSettings& ReplicationCoordinatorImpl::getSettings() const {
+ return _settings;
+}
+
+ReplicationCoordinator::Mode ReplicationCoordinatorImpl::getReplicationMode() const {
+ return _replMode;
+}
+
+MemberState ReplicationCoordinatorImpl::getMemberState() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _getMemberState_inlock();
+}
+
+MemberState ReplicationCoordinatorImpl::_getMemberState_inlock() const {
+ return _memberState;
+}
+
+Seconds ReplicationCoordinatorImpl::getSlaveDelaySecs() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ invariant(_rsConfig.isInitialized());
+ uassert(28524, "Node not a member of the current set configuration", _selfIndex != -1);
+ return _rsConfig.getMemberAt(_selfIndex).getSlaveDelay();
+}
+
+void ReplicationCoordinatorImpl::clearSyncSourceBlacklist() {
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_clearSyncSourceBlacklist_finish,
+ this,
+ stdx::placeholders::_1));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(18907, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+}
+
+void ReplicationCoordinatorImpl::_clearSyncSourceBlacklist_finish(
+ const ReplicationExecutor::CallbackArgs& cbData) {
+ if (cbData.status == ErrorCodes::CallbackCanceled)
+ return;
+ _topCoord->clearSyncSourceBlacklist();
+}
+
+bool ReplicationCoordinatorImpl::setFollowerMode(const MemberState& newState) {
+ StatusWith<ReplicationExecutor::EventHandle> finishedSettingFollowerMode =
+ _replExecutor.makeEvent();
+ if (finishedSettingFollowerMode.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return false;
}
-
- void ReplicationCoordinatorImpl::_clearSyncSourceBlacklist_finish(
- const ReplicationExecutor::CallbackArgs& cbData) {
- if (cbData.status == ErrorCodes::CallbackCanceled)
- return;
- _topCoord->clearSyncSourceBlacklist();
+ fassert(18812, finishedSettingFollowerMode.getStatus());
+ bool success = false;
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_setFollowerModeFinish,
+ this,
+ stdx::placeholders::_1,
+ newState,
+ finishedSettingFollowerMode.getValue(),
+ &success));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return false;
}
+ fassert(18699, cbh.getStatus());
+ _replExecutor.waitForEvent(finishedSettingFollowerMode.getValue());
+ return success;
+}
- bool ReplicationCoordinatorImpl::setFollowerMode(const MemberState& newState) {
- StatusWith<ReplicationExecutor::EventHandle> finishedSettingFollowerMode =
- _replExecutor.makeEvent();
- if (finishedSettingFollowerMode.getStatus() == ErrorCodes::ShutdownInProgress) {
- return false;
- }
- fassert(18812, finishedSettingFollowerMode.getStatus());
- bool success = false;
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_setFollowerModeFinish,
- this,
- stdx::placeholders::_1,
- newState,
- finishedSettingFollowerMode.getValue(),
- &success));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return false;
- }
- fassert(18699, cbh.getStatus());
- _replExecutor.waitForEvent(finishedSettingFollowerMode.getValue());
- return success;
+void ReplicationCoordinatorImpl::_setFollowerModeFinish(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const MemberState& newState,
+ const ReplicationExecutor::EventHandle& finishedSettingFollowerMode,
+ bool* success) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
}
-
- void ReplicationCoordinatorImpl::_setFollowerModeFinish(
- const ReplicationExecutor::CallbackArgs& cbData,
- const MemberState& newState,
- const ReplicationExecutor::EventHandle& finishedSettingFollowerMode,
- bool* success) {
-
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
- if (newState == _topCoord->getMemberState()) {
- *success = true;
- _replExecutor.signalEvent(finishedSettingFollowerMode);
- return;
- }
- if (_topCoord->getRole() == TopologyCoordinator::Role::leader) {
- *success = false;
- _replExecutor.signalEvent(finishedSettingFollowerMode);
- return;
- }
-
- if (_topCoord->getRole() == TopologyCoordinator::Role::candidate) {
- // We are a candidate, which means _topCoord believs us to be in state RS_SECONDARY, and
- // we know that newState != RS_SECONDARY because we would have returned early, above if
- // the old and new state were equal. So, cancel the running election and try again to
- // finish setting the follower mode.
- invariant(_freshnessChecker);
- _freshnessChecker->cancel(&_replExecutor);
- if (_electCmdRunner) {
- _electCmdRunner->cancel(&_replExecutor);
- }
- _replExecutor.onEvent(
- _electionFinishedEvent,
- stdx::bind(&ReplicationCoordinatorImpl::_setFollowerModeFinish,
- this,
- stdx::placeholders::_1,
- newState,
- finishedSettingFollowerMode,
- success));
- return;
- }
-
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- _topCoord->setFollowerMode(newState.s);
-
- const PostMemberStateUpdateAction action =
- _updateMemberStateFromTopologyCoordinator_inlock();
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+ if (newState == _topCoord->getMemberState()) {
*success = true;
_replExecutor.signalEvent(finishedSettingFollowerMode);
+ return;
}
-
- bool ReplicationCoordinatorImpl::isWaitingForApplierToDrain() {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- return _isWaitingForDrainToComplete;
- }
-
- void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* txn) {
- // This logic is a little complicated in order to avoid acquiring the global exclusive lock
- // unnecessarily. This is important because the applier may call signalDrainComplete()
- // whenever it wants, not only when the ReplicationCoordinator is expecting it.
- //
- // The steps are:
- // 1.) Check to see if we're waiting for this signal. If not, return early.
- // 2.) Otherwise, release the mutex while acquiring the global exclusive lock,
- // since that might take a while (NB there's a deadlock cycle otherwise, too).
- // 3.) Re-check to see if we've somehow left drain mode. If we have not, clear
- // _isWaitingForDrainToComplete, set the flag allowing non-local database writes and
- // drop the mutex. At this point, no writes can occur from other threads, due to the
- // global exclusive lock.
- // 4.) Drop all temp collections.
- // 5.) Drop the global exclusive lock.
- //
- // Because replicatable writes are forbidden while in drain mode, and we don't exit drain
- // mode until we have the global exclusive lock, which forbids all other threads from making
- // writes, we know that from the time that _isWaitingForDrainToComplete is set in
- // _performPostMemberStateUpdateAction(kActionWinElection) until this method returns, no
- // external writes will be processed. This is important so that a new temp collection isn't
- // introduced on the new primary before we drop all the temp collections.
-
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- if (!_isWaitingForDrainToComplete) {
- return;
- }
- lk.unlock();
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite globalWriteLock(txn->lockState());
- lk.lock();
- if (!_isWaitingForDrainToComplete) {
- return;
+ if (_topCoord->getRole() == TopologyCoordinator::Role::leader) {
+ *success = false;
+ _replExecutor.signalEvent(finishedSettingFollowerMode);
+ return;
+ }
+
+ if (_topCoord->getRole() == TopologyCoordinator::Role::candidate) {
+ // We are a candidate, which means _topCoord believs us to be in state RS_SECONDARY, and
+ // we know that newState != RS_SECONDARY because we would have returned early, above if
+ // the old and new state were equal. So, cancel the running election and try again to
+ // finish setting the follower mode.
+ invariant(_freshnessChecker);
+ _freshnessChecker->cancel(&_replExecutor);
+ if (_electCmdRunner) {
+ _electCmdRunner->cancel(&_replExecutor);
+ }
+ _replExecutor.onEvent(_electionFinishedEvent,
+ stdx::bind(&ReplicationCoordinatorImpl::_setFollowerModeFinish,
+ this,
+ stdx::placeholders::_1,
+ newState,
+ finishedSettingFollowerMode,
+ success));
+ return;
+ }
+
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ _topCoord->setFollowerMode(newState.s);
+
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator_inlock();
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+ *success = true;
+ _replExecutor.signalEvent(finishedSettingFollowerMode);
+}
+
+bool ReplicationCoordinatorImpl::isWaitingForApplierToDrain() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _isWaitingForDrainToComplete;
+}
+
+void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* txn) {
+ // This logic is a little complicated in order to avoid acquiring the global exclusive lock
+ // unnecessarily. This is important because the applier may call signalDrainComplete()
+ // whenever it wants, not only when the ReplicationCoordinator is expecting it.
+ //
+ // The steps are:
+ // 1.) Check to see if we're waiting for this signal. If not, return early.
+ // 2.) Otherwise, release the mutex while acquiring the global exclusive lock,
+ // since that might take a while (NB there's a deadlock cycle otherwise, too).
+ // 3.) Re-check to see if we've somehow left drain mode. If we have not, clear
+ // _isWaitingForDrainToComplete, set the flag allowing non-local database writes and
+ // drop the mutex. At this point, no writes can occur from other threads, due to the
+ // global exclusive lock.
+ // 4.) Drop all temp collections.
+ // 5.) Drop the global exclusive lock.
+ //
+ // Because replicatable writes are forbidden while in drain mode, and we don't exit drain
+ // mode until we have the global exclusive lock, which forbids all other threads from making
+ // writes, we know that from the time that _isWaitingForDrainToComplete is set in
+ // _performPostMemberStateUpdateAction(kActionWinElection) until this method returns, no
+ // external writes will be processed. This is important so that a new temp collection isn't
+ // introduced on the new primary before we drop all the temp collections.
+
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ if (!_isWaitingForDrainToComplete) {
+ return;
+ }
+ lk.unlock();
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite globalWriteLock(txn->lockState());
+ lk.lock();
+ if (!_isWaitingForDrainToComplete) {
+ return;
+ }
+ _isWaitingForDrainToComplete = false;
+ _canAcceptNonLocalWrites = true;
+ lk.unlock();
+ _externalState->dropAllTempCollections(txn);
+ log() << "transition to primary complete; database writes are now permitted" << rsLog;
+}
+
+void ReplicationCoordinatorImpl::signalUpstreamUpdater() {
+ _externalState->forwardSlaveProgress();
+}
+
+ReplicationCoordinatorImpl::SlaveInfo* ReplicationCoordinatorImpl::_findSlaveInfoByMemberID_inlock(
+ int memberId) {
+ for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
+ if (it->memberId == memberId) {
+ return &(*it);
+ }
+ }
+ return NULL;
+}
+
+ReplicationCoordinatorImpl::SlaveInfo* ReplicationCoordinatorImpl::_findSlaveInfoByRID_inlock(
+ const OID& rid) {
+ for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
+ if (it->rid == rid) {
+ return &(*it);
+ }
+ }
+ return NULL;
+}
+
+void ReplicationCoordinatorImpl::_addSlaveInfo_inlock(const SlaveInfo& slaveInfo) {
+ invariant(getReplicationMode() == modeMasterSlave);
+ _slaveInfo.push_back(slaveInfo);
+
+ // Wake up any threads waiting for replication that now have their replication
+ // check satisfied
+ _wakeReadyWaiters_inlock();
+}
+
+void ReplicationCoordinatorImpl::_updateSlaveInfoOptime_inlock(SlaveInfo* slaveInfo,
+ const OpTime& opTime) {
+ slaveInfo->opTime = opTime;
+
+ // Wake up any threads waiting for replication that now have their replication
+ // check satisfied
+ _wakeReadyWaiters_inlock();
+}
+
+void ReplicationCoordinatorImpl::_updateSlaveInfoFromConfig_inlock() {
+ invariant(_settings.usingReplSets());
+
+ SlaveInfoVector oldSlaveInfos;
+ _slaveInfo.swap(oldSlaveInfos);
+
+ if (_selfIndex == -1) {
+ // If we aren't in the config then the only data we care about is for ourself
+ for (SlaveInfoVector::const_iterator it = oldSlaveInfos.begin(); it != oldSlaveInfos.end();
+ ++it) {
+ if (it->self) {
+ SlaveInfo slaveInfo = *it;
+ slaveInfo.memberId = -1;
+ _slaveInfo.push_back(slaveInfo);
+ return;
+ }
}
- _isWaitingForDrainToComplete = false;
- _canAcceptNonLocalWrites = true;
- lk.unlock();
- _externalState->dropAllTempCollections(txn);
- log() << "transition to primary complete; database writes are now permitted" << rsLog;
+ invariant(false); // There should always have been an entry for ourself
}
- void ReplicationCoordinatorImpl::signalUpstreamUpdater() {
- _externalState->forwardSlaveProgress();
- }
+ for (int i = 0; i < _rsConfig.getNumMembers(); ++i) {
+ const MemberConfig& memberConfig = _rsConfig.getMemberAt(i);
+ int memberId = memberConfig.getId();
+ const HostAndPort& memberHostAndPort = memberConfig.getHostAndPort();
- ReplicationCoordinatorImpl::SlaveInfo*
- ReplicationCoordinatorImpl::_findSlaveInfoByMemberID_inlock(int memberId) {
- for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
- if (it->memberId == memberId) {
- return &(*it);
- }
- }
- return NULL;
- }
+ SlaveInfo slaveInfo;
- ReplicationCoordinatorImpl::SlaveInfo*
- ReplicationCoordinatorImpl::_findSlaveInfoByRID_inlock(const OID& rid) {
- for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
- if (it->rid == rid) {
- return &(*it);
+ // Check if the node existed with the same member ID and hostname in the old data
+ for (SlaveInfoVector::const_iterator it = oldSlaveInfos.begin(); it != oldSlaveInfos.end();
+ ++it) {
+ if ((it->memberId == memberId && it->hostAndPort == memberHostAndPort) ||
+ (i == _selfIndex && it->self)) {
+ slaveInfo = *it;
}
}
- return NULL;
- }
- void ReplicationCoordinatorImpl::_addSlaveInfo_inlock(const SlaveInfo& slaveInfo) {
- invariant(getReplicationMode() == modeMasterSlave);
+ // Make sure you have the most up-to-date info for member ID and hostAndPort.
+ slaveInfo.memberId = memberId;
+ slaveInfo.hostAndPort = memberHostAndPort;
_slaveInfo.push_back(slaveInfo);
-
- // Wake up any threads waiting for replication that now have their replication
- // check satisfied
- _wakeReadyWaiters_inlock();
}
+ invariant(static_cast<int>(_slaveInfo.size()) == _rsConfig.getNumMembers());
+}
- void ReplicationCoordinatorImpl::_updateSlaveInfoOptime_inlock(SlaveInfo* slaveInfo,
- const OpTime& opTime) {
-
- slaveInfo->opTime = opTime;
-
- // Wake up any threads waiting for replication that now have their replication
- // check satisfied
- _wakeReadyWaiters_inlock();
- }
-
- void ReplicationCoordinatorImpl::_updateSlaveInfoFromConfig_inlock() {
+size_t ReplicationCoordinatorImpl::_getMyIndexInSlaveInfo_inlock() const {
+ if (getReplicationMode() == modeMasterSlave) {
+ // Self data always lives in the first entry in _slaveInfo for master/slave
+ return 0;
+ } else {
invariant(_settings.usingReplSets());
-
- SlaveInfoVector oldSlaveInfos;
- _slaveInfo.swap(oldSlaveInfos);
-
if (_selfIndex == -1) {
- // If we aren't in the config then the only data we care about is for ourself
- for (SlaveInfoVector::const_iterator it = oldSlaveInfos.begin();
- it != oldSlaveInfos.end(); ++it) {
- if (it->self) {
- SlaveInfo slaveInfo = *it;
- slaveInfo.memberId = -1;
- _slaveInfo.push_back(slaveInfo);
- return;
- }
- }
- invariant(false); // There should always have been an entry for ourself
- }
-
- for (int i = 0; i < _rsConfig.getNumMembers(); ++i) {
- const MemberConfig& memberConfig = _rsConfig.getMemberAt(i);
- int memberId = memberConfig.getId();
- const HostAndPort& memberHostAndPort = memberConfig.getHostAndPort();
-
- SlaveInfo slaveInfo;
-
- // Check if the node existed with the same member ID and hostname in the old data
- for (SlaveInfoVector::const_iterator it = oldSlaveInfos.begin();
- it != oldSlaveInfos.end(); ++it) {
- if ((it->memberId == memberId && it->hostAndPort == memberHostAndPort)
- || (i == _selfIndex && it->self)) {
- slaveInfo = *it;
- }
- }
-
- // Make sure you have the most up-to-date info for member ID and hostAndPort.
- slaveInfo.memberId = memberId;
- slaveInfo.hostAndPort = memberHostAndPort;
- _slaveInfo.push_back(slaveInfo);
- }
- invariant(static_cast<int>(_slaveInfo.size()) == _rsConfig.getNumMembers());
- }
-
- size_t ReplicationCoordinatorImpl::_getMyIndexInSlaveInfo_inlock() const {
- if (getReplicationMode() == modeMasterSlave) {
- // Self data always lives in the first entry in _slaveInfo for master/slave
+ invariant(_slaveInfo.size() == 1);
return 0;
- }
- else {
- invariant(_settings.usingReplSets());
- if (_selfIndex == -1) {
- invariant(_slaveInfo.size() == 1);
- return 0;
- }
- else {
- return _selfIndex;
- }
+ } else {
+ return _selfIndex;
}
}
+}
- Status ReplicationCoordinatorImpl::setLastOptimeForSlave(const OID& rid,
- const Timestamp& ts) {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- massert(28576,
- "Received an old style replication progress update, which is only used for Master/"
- "Slave replication now, but this node is not using Master/Slave replication. "
- "This is likely caused by an old (pre-2.6) member syncing from this node.",
- getReplicationMode() == modeMasterSlave);
-
- // Term == 0 for master-slave
- OpTime opTime(ts, OpTime::kDefaultTerm);
- SlaveInfo* slaveInfo = _findSlaveInfoByRID_inlock(rid);
- if (slaveInfo) {
- if (slaveInfo->opTime < opTime) {
- _updateSlaveInfoOptime_inlock(slaveInfo, opTime);
- }
- }
- else {
- SlaveInfo newSlaveInfo;
- newSlaveInfo.rid = rid;
- newSlaveInfo.opTime = opTime;
- _addSlaveInfo_inlock(newSlaveInfo);
- }
- return Status::OK();
- }
+Status ReplicationCoordinatorImpl::setLastOptimeForSlave(const OID& rid, const Timestamp& ts) {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ massert(28576,
+ "Received an old style replication progress update, which is only used for Master/"
+ "Slave replication now, but this node is not using Master/Slave replication. "
+ "This is likely caused by an old (pre-2.6) member syncing from this node.",
+ getReplicationMode() == modeMasterSlave);
- void ReplicationCoordinatorImpl::setMyHeartbeatMessage(const std::string& msg) {
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&TopologyCoordinator::setMyHeartbeatMessage,
- _topCoord.get(),
- _replExecutor.now(),
- msg));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
+ // Term == 0 for master-slave
+ OpTime opTime(ts, OpTime::kDefaultTerm);
+ SlaveInfo* slaveInfo = _findSlaveInfoByRID_inlock(rid);
+ if (slaveInfo) {
+ if (slaveInfo->opTime < opTime) {
+ _updateSlaveInfoOptime_inlock(slaveInfo, opTime);
+ }
+ } else {
+ SlaveInfo newSlaveInfo;
+ newSlaveInfo.rid = rid;
+ newSlaveInfo.opTime = opTime;
+ _addSlaveInfo_inlock(newSlaveInfo);
+ }
+ return Status::OK();
+}
+
+void ReplicationCoordinatorImpl::setMyHeartbeatMessage(const std::string& msg) {
+ CBHStatus cbh = _replExecutor.scheduleWork(stdx::bind(
+ &TopologyCoordinator::setMyHeartbeatMessage, _topCoord.get(), _replExecutor.now(), msg));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(28540, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+}
+
+void ReplicationCoordinatorImpl::setMyLastOptime(const OpTime& opTime) {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ _setMyLastOptime_inlock(&lock, opTime, false);
+}
+
+void ReplicationCoordinatorImpl::resetMyLastOptime() {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ // Reset to uninitialized OpTime
+ _setMyLastOptime_inlock(&lock, OpTime(), true);
+}
+
+void ReplicationCoordinatorImpl::_setMyLastOptime_inlock(stdx::unique_lock<stdx::mutex>* lock,
+ const OpTime& opTime,
+ bool isRollbackAllowed) {
+ invariant(lock->owns_lock());
+ SlaveInfo* mySlaveInfo = &_slaveInfo[_getMyIndexInSlaveInfo_inlock()];
+ invariant(isRollbackAllowed || mySlaveInfo->opTime <= opTime);
+ _updateSlaveInfoOptime_inlock(mySlaveInfo, opTime);
+
+ if (getReplicationMode() != modeReplSet) {
+ return;
+ }
+
+ for (auto& opTimeWaiter : _opTimeWaiterList) {
+ if (*(opTimeWaiter->opTime) <= opTime) {
+ opTimeWaiter->condVar->notify_all();
}
- fassert(28540, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- }
-
- void ReplicationCoordinatorImpl::setMyLastOptime(const OpTime& opTime) {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- _setMyLastOptime_inlock(&lock, opTime, false);
}
- void ReplicationCoordinatorImpl::resetMyLastOptime() {
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- // Reset to uninitialized OpTime
- _setMyLastOptime_inlock(&lock, OpTime(), true);
+ if (_getMemberState_inlock().primary()) {
+ return;
}
- void ReplicationCoordinatorImpl::_setMyLastOptime_inlock(
- stdx::unique_lock<stdx::mutex>* lock, const OpTime& opTime, bool isRollbackAllowed) {
- invariant(lock->owns_lock());
- SlaveInfo* mySlaveInfo = &_slaveInfo[_getMyIndexInSlaveInfo_inlock()];
- invariant(isRollbackAllowed || mySlaveInfo->opTime <= opTime);
- _updateSlaveInfoOptime_inlock(mySlaveInfo, opTime);
-
- if (getReplicationMode() != modeReplSet) {
- return;
- }
+ lock->unlock();
- for (auto& opTimeWaiter : _opTimeWaiterList) {
- if (*(opTimeWaiter->opTime) <= opTime) {
- opTimeWaiter->condVar->notify_all();
- }
- }
+ _externalState->forwardSlaveProgress(); // Must do this outside _mutex
+}
- if (_getMemberState_inlock().primary()) {
- return;
- }
+OpTime ReplicationCoordinatorImpl::getMyLastOptime() const {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ return _getMyLastOptime_inlock();
+}
- lock->unlock();
+ReadAfterOpTimeResponse ReplicationCoordinatorImpl::waitUntilOpTime(
+ OperationContext* txn, const ReadAfterOpTimeArgs& settings) {
+ const auto& ts = settings.getOpTime();
- _externalState->forwardSlaveProgress(); // Must do this outside _mutex
+ if (ts.isNull()) {
+ return ReadAfterOpTimeResponse();
}
- OpTime ReplicationCoordinatorImpl::getMyLastOptime() const {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- return _getMyLastOptime_inlock();
+ if (getReplicationMode() != repl::ReplicationCoordinator::modeReplSet) {
+ return ReadAfterOpTimeResponse(
+ Status(ErrorCodes::NotAReplicaSet,
+ "node needs to be a replica set member to use read after opTime"));
}
- ReadAfterOpTimeResponse ReplicationCoordinatorImpl::waitUntilOpTime(
- OperationContext* txn,
- const ReadAfterOpTimeArgs& settings) {
- const auto& ts = settings.getOpTime();
-
- if (ts.isNull()) {
- return ReadAfterOpTimeResponse();
- }
-
- if (getReplicationMode() != repl::ReplicationCoordinator::modeReplSet) {
- return ReadAfterOpTimeResponse(Status(ErrorCodes::NotAReplicaSet,
- "node needs to be a replica set member to use read after opTime"));
- }
-
- // TODO: SERVER-18298 enable code once V1 protocol is fully implemented.
+// TODO: SERVER-18298 enable code once V1 protocol is fully implemented.
#if 0
if (!isV1ElectionProtocol()) {
return ReadAfterOpTimeResponse(Status(ErrorCodes::IncompatibleElectionProtocol,
@@ -795,1013 +762,976 @@ namespace {
}
#endif
- Timer timer;
- stdx::unique_lock<stdx::mutex> lock(_mutex);
-
- while (ts > _getMyLastOptime_inlock()) {
- Status interruptedStatus = txn->checkForInterruptNoAssert();
- if (!interruptedStatus.isOK()) {
- return ReadAfterOpTimeResponse(interruptedStatus, Milliseconds(timer.millis()));
- }
-
- if (_inShutdown) {
- return ReadAfterOpTimeResponse(
- Status(ErrorCodes::ShutdownInProgress, "shutting down"),
- Milliseconds(timer.millis()));
- }
-
- stdx::condition_variable condVar;
- WaiterInfo waitInfo(&_opTimeWaiterList,
- txn->getOpID(),
- &ts,
- nullptr, // Don't care about write concern.
- &condVar);
+ Timer timer;
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
- if (CurOp::get(txn)->isMaxTimeSet()) {
- condVar.wait_for(lock, Microseconds(txn->getRemainingMaxTimeMicros()));
- }
- else {
- condVar.wait(lock);
- }
+ while (ts > _getMyLastOptime_inlock()) {
+ Status interruptedStatus = txn->checkForInterruptNoAssert();
+ if (!interruptedStatus.isOK()) {
+ return ReadAfterOpTimeResponse(interruptedStatus, Milliseconds(timer.millis()));
}
- return ReadAfterOpTimeResponse(Status::OK(), Milliseconds(timer.millis()));
- }
+ if (_inShutdown) {
+ return ReadAfterOpTimeResponse(Status(ErrorCodes::ShutdownInProgress, "shutting down"),
+ Milliseconds(timer.millis()));
+ }
- OpTime ReplicationCoordinatorImpl::_getMyLastOptime_inlock() const {
- return _slaveInfo[_getMyIndexInSlaveInfo_inlock()].opTime;
+ stdx::condition_variable condVar;
+ WaiterInfo waitInfo(&_opTimeWaiterList,
+ txn->getOpID(),
+ &ts,
+ nullptr, // Don't care about write concern.
+ &condVar);
+
+ if (CurOp::get(txn)->isMaxTimeSet()) {
+ condVar.wait_for(lock, Microseconds(txn->getRemainingMaxTimeMicros()));
+ } else {
+ condVar.wait(lock);
+ }
}
- Status ReplicationCoordinatorImpl::setLastOptime_forTest(long long cfgVer,
- long long memberId,
- const OpTime& opTime) {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- invariant(getReplicationMode() == modeReplSet);
+ return ReadAfterOpTimeResponse(Status::OK(), Milliseconds(timer.millis()));
+}
- const UpdatePositionArgs::UpdateInfo update(OID(), opTime, cfgVer, memberId);
- long long configVersion;
- return _setLastOptime_inlock(update, &configVersion);
- }
+OpTime ReplicationCoordinatorImpl::_getMyLastOptime_inlock() const {
+ return _slaveInfo[_getMyIndexInSlaveInfo_inlock()].opTime;
+}
- Status ReplicationCoordinatorImpl::_setLastOptime_inlock(
- const UpdatePositionArgs::UpdateInfo& args, long long* configVersion) {
+Status ReplicationCoordinatorImpl::setLastOptime_forTest(long long cfgVer,
+ long long memberId,
+ const OpTime& opTime) {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ invariant(getReplicationMode() == modeReplSet);
- if (_selfIndex == -1) {
- // Ignore updates when we're in state REMOVED
- return Status(ErrorCodes::NotMasterOrSecondaryCode,
- "Received replSetUpdatePosition command but we are in state REMOVED");
- }
- invariant(getReplicationMode() == modeReplSet);
-
- if (args.memberId < 0) {
- std::string errmsg = str::stream()
- << "Received replSetUpdatePosition for node with memberId "
- << args.memberId << " which is negative and therefore invalid";
- LOG(1) << errmsg;
- return Status(ErrorCodes::NodeNotFound, errmsg);
- }
+ const UpdatePositionArgs::UpdateInfo update(OID(), opTime, cfgVer, memberId);
+ long long configVersion;
+ return _setLastOptime_inlock(update, &configVersion);
+}
- if (args.rid == _getMyRID_inlock() ||
- args.memberId == _rsConfig.getMemberAt(_selfIndex).getId()) {
- // Do not let remote nodes tell us what our optime is.
- return Status::OK();
- }
+Status ReplicationCoordinatorImpl::_setLastOptime_inlock(const UpdatePositionArgs::UpdateInfo& args,
+ long long* configVersion) {
+ if (_selfIndex == -1) {
+ // Ignore updates when we're in state REMOVED
+ return Status(ErrorCodes::NotMasterOrSecondaryCode,
+ "Received replSetUpdatePosition command but we are in state REMOVED");
+ }
+ invariant(getReplicationMode() == modeReplSet);
- LOG(2) << "received notification that node with memberID " << args.memberId <<
- " in config with version " << args.cfgver << " has reached optime: " << args.ts;
-
- SlaveInfo* slaveInfo = NULL;
- if (args.cfgver != _rsConfig.getConfigVersion()) {
- std::string errmsg = str::stream()
- << "Received replSetUpdatePosition for node with memberId "
- << args.memberId << " whose config version of " << args.cfgver
- << " doesn't match our config version of "
- << _rsConfig.getConfigVersion();
- LOG(1) << errmsg;
- *configVersion = _rsConfig.getConfigVersion();
- return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
- }
+ if (args.memberId < 0) {
+ std::string errmsg = str::stream()
+ << "Received replSetUpdatePosition for node with memberId " << args.memberId
+ << " which is negative and therefore invalid";
+ LOG(1) << errmsg;
+ return Status(ErrorCodes::NodeNotFound, errmsg);
+ }
- slaveInfo = _findSlaveInfoByMemberID_inlock(args.memberId);
- if (!slaveInfo) {
- invariant(!_rsConfig.findMemberByID(args.memberId));
+ if (args.rid == _getMyRID_inlock() ||
+ args.memberId == _rsConfig.getMemberAt(_selfIndex).getId()) {
+ // Do not let remote nodes tell us what our optime is.
+ return Status::OK();
+ }
- std::string errmsg = str::stream()
- << "Received replSetUpdatePosition for node with memberId "
- << args.memberId << " which doesn't exist in our config";
- LOG(1) << errmsg;
- return Status(ErrorCodes::NodeNotFound, errmsg);
- }
+ LOG(2) << "received notification that node with memberID " << args.memberId
+ << " in config with version " << args.cfgver << " has reached optime: " << args.ts;
- invariant(args.memberId == slaveInfo->memberId);
+ SlaveInfo* slaveInfo = NULL;
+ if (args.cfgver != _rsConfig.getConfigVersion()) {
+ std::string errmsg = str::stream()
+ << "Received replSetUpdatePosition for node with memberId " << args.memberId
+ << " whose config version of " << args.cfgver << " doesn't match our config version of "
+ << _rsConfig.getConfigVersion();
+ LOG(1) << errmsg;
+ *configVersion = _rsConfig.getConfigVersion();
+ return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
+ }
- LOG(3) << "Node with memberID " << args.memberId << " currently has optime " <<
- slaveInfo->opTime << "; updating to " << args.ts;
+ slaveInfo = _findSlaveInfoByMemberID_inlock(args.memberId);
+ if (!slaveInfo) {
+ invariant(!_rsConfig.findMemberByID(args.memberId));
- // Only update remote optimes if they increase.
- if (slaveInfo->opTime < args.ts) {
- _updateSlaveInfoOptime_inlock(slaveInfo, args.ts);
- }
- _updateLastCommittedOpTime_inlock();
- return Status::OK();
+ std::string errmsg = str::stream()
+ << "Received replSetUpdatePosition for node with memberId " << args.memberId
+ << " which doesn't exist in our config";
+ LOG(1) << errmsg;
+ return Status(ErrorCodes::NodeNotFound, errmsg);
}
- void ReplicationCoordinatorImpl::interrupt(unsigned opId) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
- it != _replicationWaiterList.end(); ++it) {
- WaiterInfo* info = *it;
- if (info->opID == opId) {
- info->condVar->notify_all();
- return;
- }
- }
+ invariant(args.memberId == slaveInfo->memberId);
- for (auto& opTimeWaiter : _opTimeWaiterList) {
- if (opTimeWaiter->opID == opId) {
- opTimeWaiter->condVar->notify_all();
- return;
- }
- }
+ LOG(3) << "Node with memberID " << args.memberId << " currently has optime "
+ << slaveInfo->opTime << "; updating to " << args.ts;
- _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback,
- this,
- stdx::placeholders::_1));
+ // Only update remote optimes if they increase.
+ if (slaveInfo->opTime < args.ts) {
+ _updateSlaveInfoOptime_inlock(slaveInfo, args.ts);
}
+ _updateLastCommittedOpTime_inlock();
+ return Status::OK();
+}
- void ReplicationCoordinatorImpl::interruptAll() {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
- it != _replicationWaiterList.end(); ++it) {
- WaiterInfo* info = *it;
+void ReplicationCoordinatorImpl::interrupt(unsigned opId) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
+ it != _replicationWaiterList.end();
+ ++it) {
+ WaiterInfo* info = *it;
+ if (info->opID == opId) {
info->condVar->notify_all();
+ return;
}
+ }
- for (auto& opTimeWaiter : _opTimeWaiterList) {
+ for (auto& opTimeWaiter : _opTimeWaiterList) {
+ if (opTimeWaiter->opID == opId) {
opTimeWaiter->condVar->notify_all();
+ return;
}
-
- _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback,
- this,
- stdx::placeholders::_1));
}
- bool ReplicationCoordinatorImpl::_doneWaitingForReplication_inlock(
- const OpTime& opTime, const WriteConcernOptions& writeConcern) {
- Status status = _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
- if (!status.isOK()) {
- return true;
- }
+ _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback,
+ this,
+ stdx::placeholders::_1));
+}
- if (!writeConcern.wMode.empty()) {
- StringData patternName;
- if (writeConcern.wMode == WriteConcernOptions::kMajority) {
- patternName = ReplicaSetConfig::kMajorityWriteConcernModeName;
- }
- else {
- patternName = writeConcern.wMode;
- }
- StatusWith<ReplicaSetTagPattern> tagPattern =
- _rsConfig.findCustomWriteMode(patternName);
- if (!tagPattern.isOK()) {
- return true;
- }
- return _haveTaggedNodesReachedOpTime_inlock(opTime, tagPattern.getValue());
- }
- else {
- return _haveNumNodesReachedOpTime_inlock(opTime, writeConcern.wNumNodes);
- }
+void ReplicationCoordinatorImpl::interruptAll() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
+ it != _replicationWaiterList.end();
+ ++it) {
+ WaiterInfo* info = *it;
+ info->condVar->notify_all();
}
- bool ReplicationCoordinatorImpl::_haveNumNodesReachedOpTime_inlock(const OpTime& opTime,
- int numNodes) {
- if (_getMyLastOptime_inlock() < opTime) {
- // Secondaries that are for some reason ahead of us should not allow us to
- // satisfy a write concern if we aren't caught up ourselves.
- return false;
- }
+ for (auto& opTimeWaiter : _opTimeWaiterList) {
+ opTimeWaiter->condVar->notify_all();
+ }
- for (SlaveInfoVector::iterator it = _slaveInfo.begin();
- it != _slaveInfo.end(); ++it) {
+ _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback,
+ this,
+ stdx::placeholders::_1));
+}
- const OpTime& slaveTime = it->opTime;
- if (slaveTime >= opTime) {
- --numNodes;
- }
+bool ReplicationCoordinatorImpl::_doneWaitingForReplication_inlock(
+ const OpTime& opTime, const WriteConcernOptions& writeConcern) {
+ Status status = _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
+ if (!status.isOK()) {
+ return true;
+ }
- if (numNodes <= 0) {
- return true;
- }
+ if (!writeConcern.wMode.empty()) {
+ StringData patternName;
+ if (writeConcern.wMode == WriteConcernOptions::kMajority) {
+ patternName = ReplicaSetConfig::kMajorityWriteConcernModeName;
+ } else {
+ patternName = writeConcern.wMode;
}
- return false;
+ StatusWith<ReplicaSetTagPattern> tagPattern = _rsConfig.findCustomWriteMode(patternName);
+ if (!tagPattern.isOK()) {
+ return true;
+ }
+ return _haveTaggedNodesReachedOpTime_inlock(opTime, tagPattern.getValue());
+ } else {
+ return _haveNumNodesReachedOpTime_inlock(opTime, writeConcern.wNumNodes);
}
+}
- bool ReplicationCoordinatorImpl::_haveTaggedNodesReachedOpTime_inlock(
- const OpTime& opTime, const ReplicaSetTagPattern& tagPattern) {
-
- ReplicaSetTagMatch matcher(tagPattern);
- for (SlaveInfoVector::iterator it = _slaveInfo.begin();
- it != _slaveInfo.end(); ++it) {
-
- const OpTime& slaveTime = it->opTime;
- if (slaveTime >= opTime) {
- // This node has reached the desired optime, now we need to check if it is a part
- // of the tagPattern.
- const MemberConfig* memberConfig = _rsConfig.findMemberByID(it->memberId);
- invariant(memberConfig);
- for (MemberConfig::TagIterator it = memberConfig->tagsBegin();
- it != memberConfig->tagsEnd(); ++it) {
- if (matcher.update(*it)) {
- return true;
- }
- }
- }
- }
+bool ReplicationCoordinatorImpl::_haveNumNodesReachedOpTime_inlock(const OpTime& opTime,
+ int numNodes) {
+ if (_getMyLastOptime_inlock() < opTime) {
+ // Secondaries that are for some reason ahead of us should not allow us to
+ // satisfy a write concern if we aren't caught up ourselves.
return false;
}
- ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::awaitReplication(
- OperationContext* txn,
- const OpTime& opTime,
- const WriteConcernOptions& writeConcern) {
- Timer timer;
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- return _awaitReplication_inlock(&timer, &lock, txn, opTime, writeConcern);
- }
-
- ReplicationCoordinator::StatusAndDuration
- ReplicationCoordinatorImpl::awaitReplicationOfLastOpForClient(
- OperationContext* txn,
- const WriteConcernOptions& writeConcern) {
- Timer timer;
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- return _awaitReplication_inlock(
- &timer,
- &lock,
- txn,
- ReplClientInfo::forClient(txn->getClient()).getLastOp(),
- writeConcern);
- }
-
- ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::_awaitReplication_inlock(
- const Timer* timer,
- stdx::unique_lock<stdx::mutex>* lock,
- OperationContext* txn,
- const OpTime& opTime,
- const WriteConcernOptions& writeConcern) {
-
- const Mode replMode = getReplicationMode();
- if (replMode == modeNone || serverGlobalParams.configsvr) {
- // no replication check needed (validated above)
- return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
- }
-
- if (replMode == modeMasterSlave && writeConcern.wMode == WriteConcernOptions::kMajority) {
- // with master/slave, majority is equivalent to w=1
- return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
- }
-
- if (opTime.isNull()) {
- // If waiting for the empty optime, always say it's been replicated.
- return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
- }
-
- if (replMode == modeReplSet && !_memberState.primary()) {
- return StatusAndDuration(Status(ErrorCodes::NotMaster,
- "Not master while waiting for replication"),
- Milliseconds(timer->millis()));
+ for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
+ const OpTime& slaveTime = it->opTime;
+ if (slaveTime >= opTime) {
+ --numNodes;
}
- if (writeConcern.wMode.empty()) {
- if (writeConcern.wNumNodes < 1) {
- return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
- }
- else if (writeConcern.wNumNodes == 1 && _getMyLastOptime_inlock() >= opTime) {
- return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
- }
+ if (numNodes <= 0) {
+ return true;
}
+ }
+ return false;
+}
- // Must hold _mutex before constructing waitInfo as it will modify _replicationWaiterList
- stdx::condition_variable condVar;
- WaiterInfo waitInfo(
- &_replicationWaiterList, txn->getOpID(), &opTime, &writeConcern, &condVar);
- while (!_doneWaitingForReplication_inlock(opTime, writeConcern)) {
- const Milliseconds elapsed{timer->millis()};
-
- Status interruptedStatus = txn->checkForInterruptNoAssert();
- if (!interruptedStatus.isOK()) {
- return StatusAndDuration(interruptedStatus, elapsed);
- }
-
- if (!waitInfo.master) {
- return StatusAndDuration(Status(ErrorCodes::NotMaster,
- "Not master anymore while waiting for replication"
- " - this most likely means that a step down"
- " occurred while waiting for replication"),
- elapsed);
- }
-
- if (writeConcern.wTimeout != WriteConcernOptions::kNoTimeout &&
- elapsed > Milliseconds{writeConcern.wTimeout}) {
- return StatusAndDuration(Status(ErrorCodes::WriteConcernFailed,
- "waiting for replication timed out"),
- elapsed);
- }
-
- if (_inShutdown) {
- return StatusAndDuration(Status(ErrorCodes::ShutdownInProgress,
- "Replication is being shut down"),
- elapsed);
- }
-
- const Microseconds maxTimeMicrosRemaining{txn->getRemainingMaxTimeMicros()};
- Microseconds waitTime = Microseconds::max();
- if (maxTimeMicrosRemaining != Microseconds::zero()) {
- waitTime = maxTimeMicrosRemaining;
- }
- if (writeConcern.wTimeout != WriteConcernOptions::kNoTimeout) {
- waitTime = std::min<Microseconds>(Milliseconds{writeConcern.wTimeout} - elapsed,
- waitTime);
- }
-
- if (waitTime == Microseconds::max()) {
- condVar.wait(*lock);
- }
- else {
- condVar.wait_for(*lock, waitTime);
+bool ReplicationCoordinatorImpl::_haveTaggedNodesReachedOpTime_inlock(
+ const OpTime& opTime, const ReplicaSetTagPattern& tagPattern) {
+ ReplicaSetTagMatch matcher(tagPattern);
+ for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
+ const OpTime& slaveTime = it->opTime;
+ if (slaveTime >= opTime) {
+ // This node has reached the desired optime, now we need to check if it is a part
+ // of the tagPattern.
+ const MemberConfig* memberConfig = _rsConfig.findMemberByID(it->memberId);
+ invariant(memberConfig);
+ for (MemberConfig::TagIterator it = memberConfig->tagsBegin();
+ it != memberConfig->tagsEnd();
+ ++it) {
+ if (matcher.update(*it)) {
+ return true;
+ }
}
}
-
- Status status = _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
- if (!status.isOK()) {
- return StatusAndDuration(status, Milliseconds(timer->millis()));
- }
-
+ }
+ return false;
+}
+
+ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::awaitReplication(
+ OperationContext* txn, const OpTime& opTime, const WriteConcernOptions& writeConcern) {
+ Timer timer;
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ return _awaitReplication_inlock(&timer, &lock, txn, opTime, writeConcern);
+}
+
+ReplicationCoordinator::StatusAndDuration
+ReplicationCoordinatorImpl::awaitReplicationOfLastOpForClient(
+ OperationContext* txn, const WriteConcernOptions& writeConcern) {
+ Timer timer;
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ return _awaitReplication_inlock(
+ &timer, &lock, txn, ReplClientInfo::forClient(txn->getClient()).getLastOp(), writeConcern);
+}
+
+ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::_awaitReplication_inlock(
+ const Timer* timer,
+ stdx::unique_lock<stdx::mutex>* lock,
+ OperationContext* txn,
+ const OpTime& opTime,
+ const WriteConcernOptions& writeConcern) {
+ const Mode replMode = getReplicationMode();
+ if (replMode == modeNone || serverGlobalParams.configsvr) {
+ // no replication check needed (validated above)
return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
}
- Status ReplicationCoordinatorImpl::stepDown(OperationContext* txn,
- bool force,
- const Milliseconds& waitTime,
- const Milliseconds& stepdownTime) {
- const Date_t startTime = _replExecutor.now();
- const Date_t stepDownUntil = startTime + stepdownTime;
- const Date_t waitUntil = startTime + waitTime;
-
- if (!getMemberState().primary()) {
- // Note this check is inherently racy - it's always possible for the node to
- // stepdown from some other path before we acquire the global shared lock, but
- // that's okay because we are resiliant to that happening in _stepDownContinue.
- return Status(ErrorCodes::NotMaster, "not primary so can't step down");
- }
-
- LockResult lockState = txn->lockState()->lockGlobalBegin(MODE_S);
- // We've requested the global shared lock which will stop new writes from coming in,
- // but existing writes could take a long time to finish, so kill all user operations
- // to help us get the global lock faster.
- _externalState->killAllUserOperations(txn);
-
- if (lockState == LOCK_WAITING) {
- lockState = txn->lockState()->lockGlobalComplete(
- durationCount<Milliseconds>(stepdownTime));
- if (lockState == LOCK_TIMEOUT) {
- return Status(ErrorCodes::ExceededTimeLimit,
- "Could not acquire the global shared lock within the amount of time "
- "specified that we should step down for");
- }
- }
- invariant(lockState == LOCK_OK);
- ON_BLOCK_EXIT(&Locker::unlockAll, txn->lockState());
- // From this point onward we are guaranteed to be holding the global shared lock.
+ if (replMode == modeMasterSlave && writeConcern.wMode == WriteConcernOptions::kMajority) {
+ // with master/slave, majority is equivalent to w=1
+ return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
+ }
- StatusWith<ReplicationExecutor::EventHandle> finishedEvent = _replExecutor.makeEvent();
- if (finishedEvent.getStatus() == ErrorCodes::ShutdownInProgress) {
- return finishedEvent.getStatus();
- }
- fassert(26000, finishedEvent.getStatus());
- Status result(ErrorCodes::InternalError, "didn't set status in _stepDownContinue");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_stepDownContinue,
- this,
- stdx::placeholders::_1,
- finishedEvent.getValue(),
- txn,
- waitUntil,
- stepDownUntil,
- force,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return cbh.getStatus();
- }
- fassert(18809, cbh.getStatus());
- cbh = _replExecutor.scheduleWorkAt(
- waitUntil,
- stdx::bind(&ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback,
- this,
- stdx::placeholders::_1));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return cbh.getStatus();
- }
- fassert(26001, cbh.getStatus());
- _replExecutor.waitForEvent(finishedEvent.getValue());
- return result;
+ if (opTime.isNull()) {
+ // If waiting for the empty optime, always say it's been replicated.
+ return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
}
- void ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback(
- const ReplicationExecutor::CallbackArgs& cbData) {
- if (!cbData.status.isOK()) {
- return;
- }
+ if (replMode == modeReplSet && !_memberState.primary()) {
+ return StatusAndDuration(
+ Status(ErrorCodes::NotMaster, "Not master while waiting for replication"),
+ Milliseconds(timer->millis()));
+ }
- _signalStepDownWaiters();
- }
-
- void ReplicationCoordinatorImpl::_signalStepDownWaiters() {
- std::for_each(_stepDownWaiters.begin(),
- _stepDownWaiters.end(),
- stdx::bind(&ReplicationExecutor::signalEvent,
- &_replExecutor,
- stdx::placeholders::_1));
- _stepDownWaiters.clear();
- }
-
- void ReplicationCoordinatorImpl::_stepDownContinue(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicationExecutor::EventHandle finishedEvent,
- OperationContext* txn,
- const Date_t waitUntil,
- const Date_t stepDownUntil,
- bool force,
- Status* result) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- // Cancelation only occurs on shutdown, which will also handle signaling the event.
- *result = Status(ErrorCodes::ShutdownInProgress, "Shutting down replication");
- return;
+ if (writeConcern.wMode.empty()) {
+ if (writeConcern.wNumNodes < 1) {
+ return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
+ } else if (writeConcern.wNumNodes == 1 && _getMyLastOptime_inlock() >= opTime) {
+ return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
}
+ }
- ScopeGuard allFinishedGuard = MakeGuard(
- stdx::bind(&ReplicationExecutor::signalEvent, &_replExecutor, finishedEvent));
- if (!cbData.status.isOK()) {
- *result = cbData.status;
- return;
- }
+ // Must hold _mutex before constructing waitInfo as it will modify _replicationWaiterList
+ stdx::condition_variable condVar;
+ WaiterInfo waitInfo(&_replicationWaiterList, txn->getOpID(), &opTime, &writeConcern, &condVar);
+ while (!_doneWaitingForReplication_inlock(opTime, writeConcern)) {
+ const Milliseconds elapsed{timer->millis()};
Status interruptedStatus = txn->checkForInterruptNoAssert();
if (!interruptedStatus.isOK()) {
- *result = interruptedStatus;
- return;
+ return StatusAndDuration(interruptedStatus, elapsed);
}
- if (_topCoord->getRole() != TopologyCoordinator::Role::leader) {
- *result = Status(ErrorCodes::NotMaster,
- "Already stepped down from primary while processing step down "
- "request");
- return;
- }
- const Date_t now = _replExecutor.now();
- if (now >= stepDownUntil) {
- *result = Status(ErrorCodes::ExceededTimeLimit,
- "By the time we were ready to step down, we were already past the "
- "time we were supposed to step down until");
- return;
+ if (!waitInfo.master) {
+ return StatusAndDuration(Status(ErrorCodes::NotMaster,
+ "Not master anymore while waiting for replication"
+ " - this most likely means that a step down"
+ " occurred while waiting for replication"),
+ elapsed);
}
- bool forceNow = now >= waitUntil ? force : false;
- if (_topCoord->stepDown(stepDownUntil, forceNow, getMyLastOptime())) {
- // Schedule work to (potentially) step back up once the stepdown period has ended.
- _replExecutor.scheduleWorkAt(stepDownUntil,
- stdx::bind(&ReplicationCoordinatorImpl::_handleTimePassing,
- this,
- stdx::placeholders::_1));
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- const PostMemberStateUpdateAction action =
- _updateMemberStateFromTopologyCoordinator_inlock();
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
- *result = Status::OK();
- return;
+ if (writeConcern.wTimeout != WriteConcernOptions::kNoTimeout &&
+ elapsed > Milliseconds{writeConcern.wTimeout}) {
+ return StatusAndDuration(
+ Status(ErrorCodes::WriteConcernFailed, "waiting for replication timed out"),
+ elapsed);
}
- // Step down failed. Keep waiting if we can, otherwise finish.
- if (now >= waitUntil) {
- *result = Status(ErrorCodes::ExceededTimeLimit, str::stream() <<
- "No electable secondaries caught up as of " <<
- dateToISOStringLocal(now) <<
- ". Please use {force: true} to force node to step down.");
- return;
+ if (_inShutdown) {
+ return StatusAndDuration(
+ Status(ErrorCodes::ShutdownInProgress, "Replication is being shut down"), elapsed);
}
- if (_stepDownWaiters.empty()) {
- StatusWith<ReplicationExecutor::EventHandle> reschedEvent =
- _replExecutor.makeEvent();
- if (!reschedEvent.isOK()) {
- *result = reschedEvent.getStatus();
- return;
- }
- _stepDownWaiters.push_back(reschedEvent.getValue());
+ const Microseconds maxTimeMicrosRemaining{txn->getRemainingMaxTimeMicros()};
+ Microseconds waitTime = Microseconds::max();
+ if (maxTimeMicrosRemaining != Microseconds::zero()) {
+ waitTime = maxTimeMicrosRemaining;
}
- CBHStatus cbh = _replExecutor.onEvent(
- _stepDownWaiters.back(),
- stdx::bind(&ReplicationCoordinatorImpl::_stepDownContinue,
- this,
- stdx::placeholders::_1,
- finishedEvent,
- txn,
- waitUntil,
- stepDownUntil,
- force,
- result));
- if (!cbh.isOK()) {
- *result = cbh.getStatus();
- return;
+ if (writeConcern.wTimeout != WriteConcernOptions::kNoTimeout) {
+ waitTime =
+ std::min<Microseconds>(Milliseconds{writeConcern.wTimeout} - elapsed, waitTime);
}
- allFinishedGuard.Dismiss();
- }
- void ReplicationCoordinatorImpl::_handleTimePassing(
- const ReplicationExecutor::CallbackArgs& cbData) {
- if (!cbData.status.isOK()) {
- return;
- }
+ if (waitTime == Microseconds::max()) {
+ condVar.wait(*lock);
+ } else {
+ condVar.wait_for(*lock, waitTime);
+ }
+ }
+
+ Status status = _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
+ if (!status.isOK()) {
+ return StatusAndDuration(status, Milliseconds(timer->millis()));
+ }
+
+ return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
+}
+
+Status ReplicationCoordinatorImpl::stepDown(OperationContext* txn,
+ bool force,
+ const Milliseconds& waitTime,
+ const Milliseconds& stepdownTime) {
+ const Date_t startTime = _replExecutor.now();
+ const Date_t stepDownUntil = startTime + stepdownTime;
+ const Date_t waitUntil = startTime + waitTime;
+
+ if (!getMemberState().primary()) {
+ // Note this check is inherently racy - it's always possible for the node to
+ // stepdown from some other path before we acquire the global shared lock, but
+ // that's okay because we are resiliant to that happening in _stepDownContinue.
+ return Status(ErrorCodes::NotMaster, "not primary so can't step down");
+ }
+
+ LockResult lockState = txn->lockState()->lockGlobalBegin(MODE_S);
+ // We've requested the global shared lock which will stop new writes from coming in,
+ // but existing writes could take a long time to finish, so kill all user operations
+ // to help us get the global lock faster.
+ _externalState->killAllUserOperations(txn);
+
+ if (lockState == LOCK_WAITING) {
+ lockState = txn->lockState()->lockGlobalComplete(durationCount<Milliseconds>(stepdownTime));
+ if (lockState == LOCK_TIMEOUT) {
+ return Status(ErrorCodes::ExceededTimeLimit,
+ "Could not acquire the global shared lock within the amount of time "
+ "specified that we should step down for");
+ }
+ }
+ invariant(lockState == LOCK_OK);
+ ON_BLOCK_EXIT(&Locker::unlockAll, txn->lockState());
+ // From this point onward we are guaranteed to be holding the global shared lock.
+
+ StatusWith<ReplicationExecutor::EventHandle> finishedEvent = _replExecutor.makeEvent();
+ if (finishedEvent.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return finishedEvent.getStatus();
+ }
+ fassert(26000, finishedEvent.getStatus());
+ Status result(ErrorCodes::InternalError, "didn't set status in _stepDownContinue");
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_stepDownContinue,
+ this,
+ stdx::placeholders::_1,
+ finishedEvent.getValue(),
+ txn,
+ waitUntil,
+ stepDownUntil,
+ force,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return cbh.getStatus();
+ }
+ fassert(18809, cbh.getStatus());
+ cbh = _replExecutor.scheduleWorkAt(
+ waitUntil,
+ stdx::bind(&ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback,
+ this,
+ stdx::placeholders::_1));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return cbh.getStatus();
+ }
+ fassert(26001, cbh.getStatus());
+ _replExecutor.waitForEvent(finishedEvent.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback(
+ const ReplicationExecutor::CallbackArgs& cbData) {
+ if (!cbData.status.isOK()) {
+ return;
+ }
+
+ _signalStepDownWaiters();
+}
+
+void ReplicationCoordinatorImpl::_signalStepDownWaiters() {
+ std::for_each(
+ _stepDownWaiters.begin(),
+ _stepDownWaiters.end(),
+ stdx::bind(&ReplicationExecutor::signalEvent, &_replExecutor, stdx::placeholders::_1));
+ _stepDownWaiters.clear();
+}
+
+void ReplicationCoordinatorImpl::_stepDownContinue(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplicationExecutor::EventHandle finishedEvent,
+ OperationContext* txn,
+ const Date_t waitUntil,
+ const Date_t stepDownUntil,
+ bool force,
+ Status* result) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ // Cancelation only occurs on shutdown, which will also handle signaling the event.
+ *result = Status(ErrorCodes::ShutdownInProgress, "Shutting down replication");
+ return;
+ }
+
+ ScopeGuard allFinishedGuard =
+ MakeGuard(stdx::bind(&ReplicationExecutor::signalEvent, &_replExecutor, finishedEvent));
+ if (!cbData.status.isOK()) {
+ *result = cbData.status;
+ return;
+ }
+
+ Status interruptedStatus = txn->checkForInterruptNoAssert();
+ if (!interruptedStatus.isOK()) {
+ *result = interruptedStatus;
+ return;
+ }
+
+ if (_topCoord->getRole() != TopologyCoordinator::Role::leader) {
+ *result = Status(ErrorCodes::NotMaster,
+ "Already stepped down from primary while processing step down "
+ "request");
+ return;
+ }
+ const Date_t now = _replExecutor.now();
+ if (now >= stepDownUntil) {
+ *result = Status(ErrorCodes::ExceededTimeLimit,
+ "By the time we were ready to step down, we were already past the "
+ "time we were supposed to step down until");
+ return;
+ }
+ bool forceNow = now >= waitUntil ? force : false;
+ if (_topCoord->stepDown(stepDownUntil, forceNow, getMyLastOptime())) {
+ // Schedule work to (potentially) step back up once the stepdown period has ended.
+ _replExecutor.scheduleWorkAt(stepDownUntil,
+ stdx::bind(&ReplicationCoordinatorImpl::_handleTimePassing,
+ this,
+ stdx::placeholders::_1));
- if (_topCoord->becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(_replExecutor.now())) {
- _performPostMemberStateUpdateAction(kActionWinElection);
- }
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ const PostMemberStateUpdateAction action =
+ _updateMemberStateFromTopologyCoordinator_inlock();
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+ *result = Status::OK();
+ return;
}
- bool ReplicationCoordinatorImpl::isMasterForReportingPurposes() {
- if (_settings.usingReplSets()) {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- if (getReplicationMode() == modeReplSet && _getMemberState_inlock().primary()) {
- return true;
- }
- return false;
- }
-
- if (!_settings.slave)
- return true;
-
-
- // TODO(dannenberg) replAllDead is bad and should be removed when master slave is removed
- if (replAllDead) {
- return false;
- }
-
- if (_settings.master) {
- // if running with --master --slave, allow.
- return true;
- }
-
- return false;
+ // Step down failed. Keep waiting if we can, otherwise finish.
+ if (now >= waitUntil) {
+ *result = Status(ErrorCodes::ExceededTimeLimit,
+ str::stream() << "No electable secondaries caught up as of "
+ << dateToISOStringLocal(now)
+ << ". Please use {force: true} to force node to step down.");
+ return;
}
- bool ReplicationCoordinatorImpl::canAcceptWritesForDatabase(StringData dbName) {
- // _canAcceptNonLocalWrites is always true for standalone nodes, always false for nodes
- // started with --slave, and adjusted based on primary+drain state in replica sets.
- //
- // That is, stand-alone nodes, non-slave nodes and drained replica set primaries can always
- // accept writes. Similarly, writes are always permitted to the "local" database. Finally,
- // in the event that a node is started with --slave and --master, we allow writes unless the
- // master/slave system has set the replAllDead flag.
- if (_canAcceptNonLocalWrites) {
- return true;
- }
- if (dbName == "local") {
- return true;
+ if (_stepDownWaiters.empty()) {
+ StatusWith<ReplicationExecutor::EventHandle> reschedEvent = _replExecutor.makeEvent();
+ if (!reschedEvent.isOK()) {
+ *result = reschedEvent.getStatus();
+ return;
}
- return !replAllDead && _settings.master;
+ _stepDownWaiters.push_back(reschedEvent.getValue());
}
-
- bool ReplicationCoordinatorImpl::canAcceptWritesFor(const NamespaceString& ns) {
- if (_memberState.rollback() && ns.isOplog()) {
- return false;
- }
- StringData dbName = ns.db();
- return canAcceptWritesForDatabase(dbName);
+ CBHStatus cbh = _replExecutor.onEvent(_stepDownWaiters.back(),
+ stdx::bind(&ReplicationCoordinatorImpl::_stepDownContinue,
+ this,
+ stdx::placeholders::_1,
+ finishedEvent,
+ txn,
+ waitUntil,
+ stepDownUntil,
+ force,
+ result));
+ if (!cbh.isOK()) {
+ *result = cbh.getStatus();
+ return;
}
+ allFinishedGuard.Dismiss();
+}
- Status ReplicationCoordinatorImpl::checkCanServeReadsFor(OperationContext* txn,
- const NamespaceString& ns,
- bool slaveOk) {
- if (_memberState.rollback() && ns.isOplog()) {
- return Status(ErrorCodes::NotMasterOrSecondaryCode,
- "cannot read from oplog collection while in rollback");
- }
- if (txn->getClient()->isInDirectClient()) {
- return Status::OK();
- }
- if (canAcceptWritesFor(ns)) {
- return Status::OK();
- }
- if (_settings.slave || _settings.master) {
- return Status::OK();
- }
- if (slaveOk) {
- if (_canServeNonLocalReads.loadRelaxed()) {
- return Status::OK();
- }
- return Status(
- ErrorCodes::NotMasterOrSecondaryCode,
- "not master or secondary; cannot currently read from this replSet member");
- }
- return Status(ErrorCodes::NotMasterNoSlaveOkCode, "not master and slaveOk=false");
+void ReplicationCoordinatorImpl::_handleTimePassing(
+ const ReplicationExecutor::CallbackArgs& cbData) {
+ if (!cbData.status.isOK()) {
+ return;
}
- bool ReplicationCoordinatorImpl::isInPrimaryOrSecondaryState() const {
- return _canServeNonLocalReads.loadRelaxed();
+ if (_topCoord->becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(_replExecutor.now())) {
+ _performPostMemberStateUpdateAction(kActionWinElection);
}
+}
- bool ReplicationCoordinatorImpl::shouldIgnoreUniqueIndex(const IndexDescriptor* idx) {
- if (!idx->unique()) {
- return false;
- }
- // Never ignore _id index
- if (idx->isIdIndex()) {
- return false;
- }
- if (nsToDatabaseSubstring(idx->parentNS()) == "local" ) {
- // always enforce on local
- return false;
- }
+bool ReplicationCoordinatorImpl::isMasterForReportingPurposes() {
+ if (_settings.usingReplSets()) {
stdx::lock_guard<stdx::mutex> lock(_mutex);
- if (getReplicationMode() != modeReplSet) {
- return false;
- }
- // see SERVER-6671
- MemberState ms = _getMemberState_inlock();
- switch ( ms.s ) {
- case MemberState::RS_SECONDARY:
- case MemberState::RS_RECOVERING:
- case MemberState::RS_ROLLBACK:
- case MemberState::RS_STARTUP2:
+ if (getReplicationMode() == modeReplSet && _getMemberState_inlock().primary()) {
return true;
- default:
- return false;
}
+ return false;
}
- OID ReplicationCoordinatorImpl::getElectionId() {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- return _electionId;
- }
-
- OID ReplicationCoordinatorImpl::getMyRID() const {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- return _getMyRID_inlock();
- }
-
- OID ReplicationCoordinatorImpl::_getMyRID_inlock() const {
- return _myRID;
- }
+ if (!_settings.slave)
+ return true;
- int ReplicationCoordinatorImpl::getMyId() const {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- return _getMyId_inlock();
- }
- int ReplicationCoordinatorImpl::_getMyId_inlock() const {
- const MemberConfig& self = _rsConfig.getMemberAt(_selfIndex);
- return self.getId();
+ // TODO(dannenberg) replAllDead is bad and should be removed when master slave is removed
+ if (replAllDead) {
+ return false;
}
- bool ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommand(
- BSONObjBuilder* cmdBuilder) {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- invariant(_rsConfig.isInitialized());
- // do not send updates if we have been removed from the config
- if (_selfIndex == -1) {
- return false;
- }
- cmdBuilder->append("replSetUpdatePosition", 1);
- // create an array containing objects each member connected to us and for ourself
- BSONArrayBuilder arrayBuilder(cmdBuilder->subarrayStart("optimes"));
- {
- for (SlaveInfoVector::const_iterator itr = _slaveInfo.begin();
- itr != _slaveInfo.end(); ++itr) {
- if (itr->opTime.isNull()) {
- // Don't include info on members we haven't heard from yet.
- continue;
- }
- BSONObjBuilder entry(arrayBuilder.subobjStart());
- entry.append("_id", itr->rid);
- entry.append("optime", itr->opTime.getTimestamp());
- entry.append("memberId", itr->memberId);
- entry.append("cfgver", _rsConfig.getConfigVersion());
- // SERVER-14550 Even though the "config" field isn't used on the other end in 3.0,
- // we need to keep sending it for 2.6 compatibility.
- // TODO(spencer): Remove this after 3.0 is released.
- const MemberConfig* member = _rsConfig.findMemberByID(itr->memberId);
- fassert(18651, member);
- entry.append("config", member->toBSON(_rsConfig.getTagConfig()));
- }
- }
+ if (_settings.master) {
+ // if running with --master --slave, allow.
return true;
}
- Status ReplicationCoordinatorImpl::processReplSetGetStatus(BSONObjBuilder* response) {
- Status result(ErrorCodes::InternalError, "didn't set status in prepareStatusResponse");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&TopologyCoordinator::prepareStatusResponse,
- _topCoord.get(),
- stdx::placeholders::_1,
- _replExecutor.now(),
- time(0) - serverGlobalParams.started,
- getMyLastOptime(),
- response,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- }
- fassert(18640, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
+ return false;
+}
- return result;
+bool ReplicationCoordinatorImpl::canAcceptWritesForDatabase(StringData dbName) {
+ // _canAcceptNonLocalWrites is always true for standalone nodes, always false for nodes
+ // started with --slave, and adjusted based on primary+drain state in replica sets.
+ //
+ // That is, stand-alone nodes, non-slave nodes and drained replica set primaries can always
+ // accept writes. Similarly, writes are always permitted to the "local" database. Finally,
+ // in the event that a node is started with --slave and --master, we allow writes unless the
+ // master/slave system has set the replAllDead flag.
+ if (_canAcceptNonLocalWrites) {
+ return true;
}
-
- void ReplicationCoordinatorImpl::fillIsMasterForReplSet(IsMasterResponse* response) {
- invariant(getSettings().usingReplSets());
-
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_fillIsMasterForReplSet_finish,
- this,
- stdx::placeholders::_1,
- response));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- response->markAsShutdownInProgress();
- return;
- }
- fassert(28602, cbh.getStatus());
-
- _replExecutor.wait(cbh.getValue());
- if (isWaitingForApplierToDrain()) {
- // Report that we are secondary to ismaster callers until drain completes.
- response->setIsMaster(false);
- response->setIsSecondary(true);
- }
+ if (dbName == "local") {
+ return true;
}
+ return !replAllDead && _settings.master;
+}
- void ReplicationCoordinatorImpl::_fillIsMasterForReplSet_finish(
- const ReplicationExecutor::CallbackArgs& cbData, IsMasterResponse* response) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- response->markAsShutdownInProgress();
- return;
- }
- _topCoord->fillIsMasterForReplSet(response);
+bool ReplicationCoordinatorImpl::canAcceptWritesFor(const NamespaceString& ns) {
+ if (_memberState.rollback() && ns.isOplog()) {
+ return false;
}
+ StringData dbName = ns.db();
+ return canAcceptWritesForDatabase(dbName);
+}
- void ReplicationCoordinatorImpl::appendSlaveInfoData(BSONObjBuilder* result) {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- BSONArrayBuilder replicationProgress(result->subarrayStart("replicationProgress"));
- {
- for (SlaveInfoVector::const_iterator itr = _slaveInfo.begin();
- itr != _slaveInfo.end(); ++itr) {
- BSONObjBuilder entry(replicationProgress.subobjStart());
- entry.append("rid", itr->rid);
- // TODO(siyuan) Output term of OpTime
- entry.append("optime", itr->opTime.getTimestamp());
- entry.append("host", itr->hostAndPort.toString());
- if (getReplicationMode() == modeReplSet) {
- if (_selfIndex == -1) {
- continue;
- }
- invariant(itr->memberId >= 0);
- entry.append("memberId", itr->memberId);
- }
- }
- }
+Status ReplicationCoordinatorImpl::checkCanServeReadsFor(OperationContext* txn,
+ const NamespaceString& ns,
+ bool slaveOk) {
+ if (_memberState.rollback() && ns.isOplog()) {
+ return Status(ErrorCodes::NotMasterOrSecondaryCode,
+ "cannot read from oplog collection while in rollback");
}
-
- ReplicaSetConfig ReplicationCoordinatorImpl::getConfig() const {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- return _rsConfig;
+ if (txn->getClient()->isInDirectClient()) {
+ return Status::OK();
}
-
- void ReplicationCoordinatorImpl::processReplSetGetConfig(BSONObjBuilder* result) {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- result->append("config", _rsConfig.toBSON());
+ if (canAcceptWritesFor(ns)) {
+ return Status::OK();
}
-
- bool ReplicationCoordinatorImpl::getMaintenanceMode() {
- bool maintenanceMode(false);
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_getMaintenanceMode_helper,
- this,
- stdx::placeholders::_1,
- &maintenanceMode));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return false;
- }
- fassert(18811, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return maintenanceMode;
+ if (_settings.slave || _settings.master) {
+ return Status::OK();
}
-
- void ReplicationCoordinatorImpl::_getMaintenanceMode_helper(
- const ReplicationExecutor::CallbackArgs& cbData,
- bool* maintenanceMode) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
+ if (slaveOk) {
+ if (_canServeNonLocalReads.loadRelaxed()) {
+ return Status::OK();
}
- *maintenanceMode = _topCoord->getMaintenanceCount() > 0;
+ return Status(ErrorCodes::NotMasterOrSecondaryCode,
+ "not master or secondary; cannot currently read from this replSet member");
}
+ return Status(ErrorCodes::NotMasterNoSlaveOkCode, "not master and slaveOk=false");
+}
- Status ReplicationCoordinatorImpl::setMaintenanceMode(bool activate) {
- if (getReplicationMode() != modeReplSet) {
- return Status(ErrorCodes::NoReplicationEnabled,
- "can only set maintenance mode on replica set members");
- }
+bool ReplicationCoordinatorImpl::isInPrimaryOrSecondaryState() const {
+ return _canServeNonLocalReads.loadRelaxed();
+}
- Status result(ErrorCodes::InternalError, "didn't set status in _setMaintenanceMode_helper");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_setMaintenanceMode_helper,
- this,
- stdx::placeholders::_1,
- activate,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return cbh.getStatus();
- }
- fassert(18698, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return result;
+bool ReplicationCoordinatorImpl::shouldIgnoreUniqueIndex(const IndexDescriptor* idx) {
+ if (!idx->unique()) {
+ return false;
}
-
- void ReplicationCoordinatorImpl::_setMaintenanceMode_helper(
- const ReplicationExecutor::CallbackArgs& cbData,
- bool activate,
- Status* result) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
- return;
- }
-
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- if (_getMemberState_inlock().primary()) {
- *result = Status(ErrorCodes::NotSecondary, "primaries can't modify maintenance mode");
- return;
- }
-
- int curMaintenanceCalls = _topCoord->getMaintenanceCount();
- if (activate) {
- log() << "going into maintenance mode with " << curMaintenanceCalls
- << " other maintenance mode tasks in progress" << rsLog;
- _topCoord->adjustMaintenanceCountBy(1);
- }
- else if (curMaintenanceCalls > 0) {
- invariant(_topCoord->getRole() == TopologyCoordinator::Role::follower);
-
- _topCoord->adjustMaintenanceCountBy(-1);
-
- log() << "leaving maintenance mode (" << curMaintenanceCalls-1
- << " other maintenance mode tasks ongoing)" << rsLog;
- } else {
- warning() << "Attempted to leave maintenance mode but it is not currently active";
- *result = Status(ErrorCodes::OperationFailed, "already out of maintenance mode");
- return;
- }
-
- const PostMemberStateUpdateAction action =
- _updateMemberStateFromTopologyCoordinator_inlock();
- *result = Status::OK();
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+ // Never ignore _id index
+ if (idx->isIdIndex()) {
+ return false;
}
-
- Status ReplicationCoordinatorImpl::processReplSetSyncFrom(const HostAndPort& target,
- BSONObjBuilder* resultObj) {
- Status result(ErrorCodes::InternalError, "didn't set status in prepareSyncFromResponse");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&TopologyCoordinator::prepareSyncFromResponse,
- _topCoord.get(),
- stdx::placeholders::_1,
- target,
- _getMyLastOptime_inlock(),
- resultObj,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- }
- fassert(18649, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return result;
- }
-
- Status ReplicationCoordinatorImpl::processReplSetFreeze(int secs, BSONObjBuilder* resultObj) {
- Status result(ErrorCodes::InternalError, "didn't set status in prepareFreezeResponse");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_processReplSetFreeze_finish,
- this,
- stdx::placeholders::_1,
- secs,
- resultObj,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return cbh.getStatus();
- }
- fassert(18641, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return result;
- }
-
- void ReplicationCoordinatorImpl::_processReplSetFreeze_finish(
- const ReplicationExecutor::CallbackArgs& cbData,
- int secs,
- BSONObjBuilder* response,
- Status* result) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
- return;
- }
-
- _topCoord->prepareFreezeResponse(_replExecutor.now(), secs, response);
-
- if (_topCoord->getRole() == TopologyCoordinator::Role::candidate) {
- // If we just unfroze and ended our stepdown period and we are a one node replica set,
- // the topology coordinator will have gone into the candidate role to signal that we
- // need to elect ourself.
- _performPostMemberStateUpdateAction(kActionWinElection);
- }
- *result = Status::OK();
+ if (nsToDatabaseSubstring(idx->parentNS()) == "local") {
+ // always enforce on local
+ return false;
}
-
- Status ReplicationCoordinatorImpl::processHeartbeat(const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response) {
- {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- if (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
- return Status(ErrorCodes::NotYetInitialized,
- "Received heartbeat while still initializing replication system");
- }
- }
-
- Status result(ErrorCodes::InternalError, "didn't set status in prepareHeartbeatResponse");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_processHeartbeatFinish,
- this,
- stdx::placeholders::_1,
- args,
- response,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- }
- fassert(18508, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return result;
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ if (getReplicationMode() != modeReplSet) {
+ return false;
}
-
- void ReplicationCoordinatorImpl::_processHeartbeatFinish(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response,
- Status* outStatus) {
-
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- *outStatus = Status(ErrorCodes::ShutdownInProgress, "Replication shutdown in progress");
- return;
- }
- fassert(18910, cbData.status);
- const Date_t now = _replExecutor.now();
- *outStatus = _topCoord->prepareHeartbeatResponse(
- now,
- args,
- _settings.ourSetName(),
- getMyLastOptime(),
- response);
- if ((outStatus->isOK() || *outStatus == ErrorCodes::InvalidReplicaSetConfig) &&
- _selfIndex < 0) {
- // If this node does not belong to the configuration it knows about, send heartbeats
- // back to any node that sends us a heartbeat, in case one of those remote nodes has
- // a configuration that contains us. Chances are excellent that it will, since that
- // is the only reason for a remote node to send this node a heartbeat request.
- if (!args.getSenderHost().empty() && _seedList.insert(args.getSenderHost()).second) {
- _scheduleHeartbeatToTarget(args.getSenderHost(), -1, now);
+ // see SERVER-6671
+ MemberState ms = _getMemberState_inlock();
+ switch (ms.s) {
+ case MemberState::RS_SECONDARY:
+ case MemberState::RS_RECOVERING:
+ case MemberState::RS_ROLLBACK:
+ case MemberState::RS_STARTUP2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+OID ReplicationCoordinatorImpl::getElectionId() {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ return _electionId;
+}
+
+OID ReplicationCoordinatorImpl::getMyRID() const {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ return _getMyRID_inlock();
+}
+
+OID ReplicationCoordinatorImpl::_getMyRID_inlock() const {
+ return _myRID;
+}
+
+int ReplicationCoordinatorImpl::getMyId() const {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ return _getMyId_inlock();
+}
+
+int ReplicationCoordinatorImpl::_getMyId_inlock() const {
+ const MemberConfig& self = _rsConfig.getMemberAt(_selfIndex);
+ return self.getId();
+}
+
+bool ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ invariant(_rsConfig.isInitialized());
+ // do not send updates if we have been removed from the config
+ if (_selfIndex == -1) {
+ return false;
+ }
+ cmdBuilder->append("replSetUpdatePosition", 1);
+ // create an array containing objects each member connected to us and for ourself
+ BSONArrayBuilder arrayBuilder(cmdBuilder->subarrayStart("optimes"));
+ {
+ for (SlaveInfoVector::const_iterator itr = _slaveInfo.begin(); itr != _slaveInfo.end();
+ ++itr) {
+ if (itr->opTime.isNull()) {
+ // Don't include info on members we haven't heard from yet.
+ continue;
+ }
+ BSONObjBuilder entry(arrayBuilder.subobjStart());
+ entry.append("_id", itr->rid);
+ entry.append("optime", itr->opTime.getTimestamp());
+ entry.append("memberId", itr->memberId);
+ entry.append("cfgver", _rsConfig.getConfigVersion());
+ // SERVER-14550 Even though the "config" field isn't used on the other end in 3.0,
+ // we need to keep sending it for 2.6 compatibility.
+ // TODO(spencer): Remove this after 3.0 is released.
+ const MemberConfig* member = _rsConfig.findMemberByID(itr->memberId);
+ fassert(18651, member);
+ entry.append("config", member->toBSON(_rsConfig.getTagConfig()));
+ }
+ }
+ return true;
+}
+
+Status ReplicationCoordinatorImpl::processReplSetGetStatus(BSONObjBuilder* response) {
+ Status result(ErrorCodes::InternalError, "didn't set status in prepareStatusResponse");
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&TopologyCoordinator::prepareStatusResponse,
+ _topCoord.get(),
+ stdx::placeholders::_1,
+ _replExecutor.now(),
+ time(0) - serverGlobalParams.started,
+ getMyLastOptime(),
+ response,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ }
+ fassert(18640, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+
+ return result;
+}
+
+void ReplicationCoordinatorImpl::fillIsMasterForReplSet(IsMasterResponse* response) {
+ invariant(getSettings().usingReplSets());
+
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_fillIsMasterForReplSet_finish,
+ this,
+ stdx::placeholders::_1,
+ response));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ response->markAsShutdownInProgress();
+ return;
+ }
+ fassert(28602, cbh.getStatus());
+
+ _replExecutor.wait(cbh.getValue());
+ if (isWaitingForApplierToDrain()) {
+ // Report that we are secondary to ismaster callers until drain completes.
+ response->setIsMaster(false);
+ response->setIsSecondary(true);
+ }
+}
+
+void ReplicationCoordinatorImpl::_fillIsMasterForReplSet_finish(
+ const ReplicationExecutor::CallbackArgs& cbData, IsMasterResponse* response) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ response->markAsShutdownInProgress();
+ return;
+ }
+ _topCoord->fillIsMasterForReplSet(response);
+}
+
+void ReplicationCoordinatorImpl::appendSlaveInfoData(BSONObjBuilder* result) {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ BSONArrayBuilder replicationProgress(result->subarrayStart("replicationProgress"));
+ {
+ for (SlaveInfoVector::const_iterator itr = _slaveInfo.begin(); itr != _slaveInfo.end();
+ ++itr) {
+ BSONObjBuilder entry(replicationProgress.subobjStart());
+ entry.append("rid", itr->rid);
+ // TODO(siyuan) Output term of OpTime
+ entry.append("optime", itr->opTime.getTimestamp());
+ entry.append("host", itr->hostAndPort.toString());
+ if (getReplicationMode() == modeReplSet) {
+ if (_selfIndex == -1) {
+ continue;
+ }
+ invariant(itr->memberId >= 0);
+ entry.append("memberId", itr->memberId);
}
}
}
+}
- Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* txn,
- const ReplSetReconfigArgs& args,
- BSONObjBuilder* resultObj) {
-
- log() << "replSetReconfig admin command received from client";
+ReplicaSetConfig ReplicationCoordinatorImpl::getConfig() const {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ return _rsConfig;
+}
- stdx::unique_lock<stdx::mutex> lk(_mutex);
+void ReplicationCoordinatorImpl::processReplSetGetConfig(BSONObjBuilder* result) {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ result->append("config", _rsConfig.toBSON());
+}
- while (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
- _rsConfigStateChange.wait(lk);
- }
-
- switch (_rsConfigState) {
+bool ReplicationCoordinatorImpl::getMaintenanceMode() {
+ bool maintenanceMode(false);
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_getMaintenanceMode_helper,
+ this,
+ stdx::placeholders::_1,
+ &maintenanceMode));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return false;
+ }
+ fassert(18811, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return maintenanceMode;
+}
+
+void ReplicationCoordinatorImpl::_getMaintenanceMode_helper(
+ const ReplicationExecutor::CallbackArgs& cbData, bool* maintenanceMode) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+ *maintenanceMode = _topCoord->getMaintenanceCount() > 0;
+}
+
+Status ReplicationCoordinatorImpl::setMaintenanceMode(bool activate) {
+ if (getReplicationMode() != modeReplSet) {
+ return Status(ErrorCodes::NoReplicationEnabled,
+ "can only set maintenance mode on replica set members");
+ }
+
+ Status result(ErrorCodes::InternalError, "didn't set status in _setMaintenanceMode_helper");
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_setMaintenanceMode_helper,
+ this,
+ stdx::placeholders::_1,
+ activate,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return cbh.getStatus();
+ }
+ fassert(18698, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_setMaintenanceMode_helper(
+ const ReplicationExecutor::CallbackArgs& cbData, bool activate, Status* result) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
+ return;
+ }
+
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ if (_getMemberState_inlock().primary()) {
+ *result = Status(ErrorCodes::NotSecondary, "primaries can't modify maintenance mode");
+ return;
+ }
+
+ int curMaintenanceCalls = _topCoord->getMaintenanceCount();
+ if (activate) {
+ log() << "going into maintenance mode with " << curMaintenanceCalls
+ << " other maintenance mode tasks in progress" << rsLog;
+ _topCoord->adjustMaintenanceCountBy(1);
+ } else if (curMaintenanceCalls > 0) {
+ invariant(_topCoord->getRole() == TopologyCoordinator::Role::follower);
+
+ _topCoord->adjustMaintenanceCountBy(-1);
+
+ log() << "leaving maintenance mode (" << curMaintenanceCalls - 1
+ << " other maintenance mode tasks ongoing)" << rsLog;
+ } else {
+ warning() << "Attempted to leave maintenance mode but it is not currently active";
+ *result = Status(ErrorCodes::OperationFailed, "already out of maintenance mode");
+ return;
+ }
+
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator_inlock();
+ *result = Status::OK();
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+}
+
+Status ReplicationCoordinatorImpl::processReplSetSyncFrom(const HostAndPort& target,
+ BSONObjBuilder* resultObj) {
+ Status result(ErrorCodes::InternalError, "didn't set status in prepareSyncFromResponse");
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&TopologyCoordinator::prepareSyncFromResponse,
+ _topCoord.get(),
+ stdx::placeholders::_1,
+ target,
+ _getMyLastOptime_inlock(),
+ resultObj,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ }
+ fassert(18649, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+Status ReplicationCoordinatorImpl::processReplSetFreeze(int secs, BSONObjBuilder* resultObj) {
+ Status result(ErrorCodes::InternalError, "didn't set status in prepareFreezeResponse");
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_processReplSetFreeze_finish,
+ this,
+ stdx::placeholders::_1,
+ secs,
+ resultObj,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return cbh.getStatus();
+ }
+ fassert(18641, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_processReplSetFreeze_finish(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ int secs,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
+ return;
+ }
+
+ _topCoord->prepareFreezeResponse(_replExecutor.now(), secs, response);
+
+ if (_topCoord->getRole() == TopologyCoordinator::Role::candidate) {
+ // If we just unfroze and ended our stepdown period and we are a one node replica set,
+ // the topology coordinator will have gone into the candidate role to signal that we
+ // need to elect ourself.
+ _performPostMemberStateUpdateAction(kActionWinElection);
+ }
+ *result = Status::OK();
+}
+
+Status ReplicationCoordinatorImpl::processHeartbeat(const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response) {
+ {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ if (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
+ return Status(ErrorCodes::NotYetInitialized,
+ "Received heartbeat while still initializing replication system");
+ }
+ }
+
+ Status result(ErrorCodes::InternalError, "didn't set status in prepareHeartbeatResponse");
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_processHeartbeatFinish,
+ this,
+ stdx::placeholders::_1,
+ args,
+ response,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ }
+ fassert(18508, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_processHeartbeatFinish(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response,
+ Status* outStatus) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ *outStatus = Status(ErrorCodes::ShutdownInProgress, "Replication shutdown in progress");
+ return;
+ }
+ fassert(18910, cbData.status);
+ const Date_t now = _replExecutor.now();
+ *outStatus = _topCoord->prepareHeartbeatResponse(
+ now, args, _settings.ourSetName(), getMyLastOptime(), response);
+ if ((outStatus->isOK() || *outStatus == ErrorCodes::InvalidReplicaSetConfig) &&
+ _selfIndex < 0) {
+ // If this node does not belong to the configuration it knows about, send heartbeats
+ // back to any node that sends us a heartbeat, in case one of those remote nodes has
+ // a configuration that contains us. Chances are excellent that it will, since that
+ // is the only reason for a remote node to send this node a heartbeat request.
+ if (!args.getSenderHost().empty() && _seedList.insert(args.getSenderHost()).second) {
+ _scheduleHeartbeatToTarget(args.getSenderHost(), -1, now);
+ }
+ }
+}
+
+Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* txn,
+ const ReplSetReconfigArgs& args,
+ BSONObjBuilder* resultObj) {
+ log() << "replSetReconfig admin command received from client";
+
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+
+ while (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
+ _rsConfigStateChange.wait(lk);
+ }
+
+ switch (_rsConfigState) {
case kConfigSteady:
break;
case kConfigUninitialized:
return Status(ErrorCodes::NotYetInitialized,
"Node not yet initialized; use the replSetInitiate command");
case kConfigReplicationDisabled:
- invariant(false); // should be unreachable due to !_settings.usingReplSets() check above
+ invariant(
+ false); // should be unreachable due to !_settings.usingReplSets() check above
case kConfigInitiating:
case kConfigReconfiguring:
case kConfigHBReconfiguring:
@@ -1811,283 +1741,264 @@ namespace {
default:
severe() << "Unexpected _rsConfigState " << int(_rsConfigState);
fassertFailed(18914);
- }
-
- invariant(_rsConfig.isInitialized());
-
- if (!args.force && !_getMemberState_inlock().primary()) {
- return Status(ErrorCodes::NotMaster, str::stream() <<
- "replSetReconfig should only be run on PRIMARY, but my state is " <<
- _getMemberState_inlock().toString() <<
- "; use the \"force\" argument to override");
- }
-
- _setConfigState_inlock(kConfigReconfiguring);
- ScopeGuard configStateGuard = MakeGuard(
- lockAndCall,
- &lk,
- stdx::bind(&ReplicationCoordinatorImpl::_setConfigState_inlock,
- this,
- kConfigSteady));
-
- ReplicaSetConfig oldConfig = _rsConfig;
- lk.unlock();
-
- ReplicaSetConfig newConfig;
- BSONObj newConfigObj = args.newConfigObj;
- if (args.force) {
- newConfigObj = incrementConfigVersionByRandom(newConfigObj);
- }
- Status status = newConfig.initialize(newConfigObj);
- if (!status.isOK()) {
- error() << "replSetReconfig got " << status << " while parsing " << newConfigObj;
- return Status(ErrorCodes::InvalidReplicaSetConfig, status.reason());;
- }
- if (newConfig.getReplSetName() != _settings.ourSetName()) {
- str::stream errmsg;
- errmsg << "Attempting to reconfigure a replica set with name " <<
- newConfig.getReplSetName() << ", but command line reports " <<
- _settings.ourSetName() << "; rejecting";
- error() << std::string(errmsg);
- return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
- }
-
- StatusWith<int> myIndex = validateConfigForReconfig(
- _externalState.get(),
- oldConfig,
- newConfig,
- args.force);
- if (!myIndex.isOK()) {
- error() << "replSetReconfig got " << myIndex.getStatus() << " while validating " <<
- newConfigObj;
- return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- myIndex.getStatus().reason());
- }
-
- log() << "replSetReconfig config object with " << newConfig.getNumMembers() <<
- " members parses ok";
-
- if (!args.force) {
- status = checkQuorumForReconfig(&_replExecutor,
- newConfig,
- myIndex.getValue());
- if (!status.isOK()) {
- error() << "replSetReconfig failed; " << status;
- return status;
- }
- }
-
- status = _externalState->storeLocalConfigDocument(txn, newConfig.toBSON());
- if (!status.isOK()) {
- error() << "replSetReconfig failed to store config document; " << status;
- return status;
- }
-
- const stdx::function<void (const ReplicationExecutor::CallbackArgs&)> reconfigFinishFn(
- stdx::bind(&ReplicationCoordinatorImpl::_finishReplSetReconfig,
- this,
- stdx::placeholders::_1,
- newConfig,
- myIndex.getValue()));
-
- // If it's a force reconfig, the primary node may not be electable after the configuration
- // change. In case we are that primary node, finish the reconfig under the global lock,
- // so that the step down occurs safely.
- CBHStatus cbh =
- args.force ?
- _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn) :
- _replExecutor.scheduleWork(reconfigFinishFn);
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return status;
- }
- fassert(18824, cbh.getStatus());
- configStateGuard.Dismiss();
- _replExecutor.wait(cbh.getValue());
- return Status::OK();
}
- void ReplicationCoordinatorImpl::_finishReplSetReconfig(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicaSetConfig& newConfig,
- int myIndex) {
+ invariant(_rsConfig.isInitialized());
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- invariant(_rsConfigState == kConfigReconfiguring);
- invariant(_rsConfig.isInitialized());
- const PostMemberStateUpdateAction action = _setCurrentRSConfig_inlock(newConfig, myIndex);
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+ if (!args.force && !_getMemberState_inlock().primary()) {
+ return Status(ErrorCodes::NotMaster,
+ str::stream()
+ << "replSetReconfig should only be run on PRIMARY, but my state is "
+ << _getMemberState_inlock().toString()
+ << "; use the \"force\" argument to override");
}
- Status ReplicationCoordinatorImpl::processReplSetInitiate(OperationContext* txn,
- const BSONObj& configObj,
- BSONObjBuilder* resultObj) {
- log() << "replSetInitiate admin command received from client";
+ _setConfigState_inlock(kConfigReconfiguring);
+ ScopeGuard configStateGuard = MakeGuard(
+ lockAndCall,
+ &lk,
+ stdx::bind(&ReplicationCoordinatorImpl::_setConfigState_inlock, this, kConfigSteady));
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- if (!_settings.usingReplSets()) {
- return Status(ErrorCodes::NoReplicationEnabled, "server is not running with --replSet");
- }
-
- while (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
- _rsConfigStateChange.wait(lk);
- }
+ ReplicaSetConfig oldConfig = _rsConfig;
+ lk.unlock();
- if (_rsConfigState != kConfigUninitialized) {
- resultObj->append("info",
- "try querying local.system.replset to see current configuration");
- return Status(ErrorCodes::AlreadyInitialized, "already initialized");
- }
- invariant(!_rsConfig.isInitialized());
- _setConfigState_inlock(kConfigInitiating);
- ScopeGuard configStateGuard = MakeGuard(
- lockAndCall,
- &lk,
- stdx::bind(&ReplicationCoordinatorImpl::_setConfigState_inlock,
- this,
- kConfigUninitialized));
- lk.unlock();
-
- ReplicaSetConfig newConfig;
- Status status = newConfig.initialize(configObj);
- if (!status.isOK()) {
- error() << "replSet initiate got " << status << " while parsing " << configObj;
- return Status(ErrorCodes::InvalidReplicaSetConfig, status.reason());;
- }
- if (newConfig.getReplSetName() != _settings.ourSetName()) {
- str::stream errmsg;
- errmsg << "Attempting to initiate a replica set with name " <<
- newConfig.getReplSetName() << ", but command line reports " <<
- _settings.ourSetName() << "; rejecting";
- error() << std::string(errmsg);
- return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
- }
-
- StatusWith<int> myIndex = validateConfigForInitiate(_externalState.get(), newConfig);
- if (!myIndex.isOK()) {
- error() << "replSet initiate got " << myIndex.getStatus() << " while validating " <<
- configObj;
- return Status(ErrorCodes::InvalidReplicaSetConfig, myIndex.getStatus().reason());
- }
-
- log() << "replSetInitiate config object with " << newConfig.getNumMembers() <<
- " members parses ok";
+ ReplicaSetConfig newConfig;
+ BSONObj newConfigObj = args.newConfigObj;
+ if (args.force) {
+ newConfigObj = incrementConfigVersionByRandom(newConfigObj);
+ }
+ Status status = newConfig.initialize(newConfigObj);
+ if (!status.isOK()) {
+ error() << "replSetReconfig got " << status << " while parsing " << newConfigObj;
+ return Status(ErrorCodes::InvalidReplicaSetConfig, status.reason());
+ ;
+ }
+ if (newConfig.getReplSetName() != _settings.ourSetName()) {
+ str::stream errmsg;
+ errmsg << "Attempting to reconfigure a replica set with name " << newConfig.getReplSetName()
+ << ", but command line reports " << _settings.ourSetName() << "; rejecting";
+ error() << std::string(errmsg);
+ return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
+ }
- status = checkQuorumForInitiate(
- &_replExecutor,
- newConfig,
- myIndex.getValue());
+ StatusWith<int> myIndex =
+ validateConfigForReconfig(_externalState.get(), oldConfig, newConfig, args.force);
+ if (!myIndex.isOK()) {
+ error() << "replSetReconfig got " << myIndex.getStatus() << " while validating "
+ << newConfigObj;
+ return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ myIndex.getStatus().reason());
+ }
- if (!status.isOK()) {
- error() << "replSetInitiate failed; " << status;
- return status;
- }
+ log() << "replSetReconfig config object with " << newConfig.getNumMembers()
+ << " members parses ok";
- status = _externalState->storeLocalConfigDocument(txn, newConfig.toBSON());
+ if (!args.force) {
+ status = checkQuorumForReconfig(&_replExecutor, newConfig, myIndex.getValue());
if (!status.isOK()) {
- error() << "replSetInitiate failed to store config document; " << status;
+ error() << "replSetReconfig failed; " << status;
return status;
}
+ }
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_finishReplSetInitiate,
- this,
- stdx::placeholders::_1,
- newConfig,
- myIndex.getValue()));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return status;
- }
- configStateGuard.Dismiss();
- fassert(18654, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
-
- if (status.isOK()) {
- // Create the oplog with the first entry, and start repl threads.
- _externalState->initiateOplog(txn);
- _externalState->startThreads();
- }
+ status = _externalState->storeLocalConfigDocument(txn, newConfig.toBSON());
+ if (!status.isOK()) {
+ error() << "replSetReconfig failed to store config document; " << status;
return status;
}
- void ReplicationCoordinatorImpl::_finishReplSetInitiate(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicaSetConfig& newConfig,
- int myIndex) {
+ const stdx::function<void(const ReplicationExecutor::CallbackArgs&)> reconfigFinishFn(
+ stdx::bind(&ReplicationCoordinatorImpl::_finishReplSetReconfig,
+ this,
+ stdx::placeholders::_1,
+ newConfig,
+ myIndex.getValue()));
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- invariant(_rsConfigState == kConfigInitiating);
- invariant(!_rsConfig.isInitialized());
- const PostMemberStateUpdateAction action = _setCurrentRSConfig_inlock(newConfig, myIndex);
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+ // If it's a force reconfig, the primary node may not be electable after the configuration
+ // change. In case we are that primary node, finish the reconfig under the global lock,
+ // so that the step down occurs safely.
+ CBHStatus cbh = args.force ? _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn)
+ : _replExecutor.scheduleWork(reconfigFinishFn);
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return status;
+ }
+ fassert(18824, cbh.getStatus());
+ configStateGuard.Dismiss();
+ _replExecutor.wait(cbh.getValue());
+ return Status::OK();
+}
+
+void ReplicationCoordinatorImpl::_finishReplSetReconfig(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplicaSetConfig& newConfig,
+ int myIndex) {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ invariant(_rsConfigState == kConfigReconfiguring);
+ invariant(_rsConfig.isInitialized());
+ const PostMemberStateUpdateAction action = _setCurrentRSConfig_inlock(newConfig, myIndex);
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+}
+
+Status ReplicationCoordinatorImpl::processReplSetInitiate(OperationContext* txn,
+ const BSONObj& configObj,
+ BSONObjBuilder* resultObj) {
+ log() << "replSetInitiate admin command received from client";
+
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ if (!_settings.usingReplSets()) {
+ return Status(ErrorCodes::NoReplicationEnabled, "server is not running with --replSet");
+ }
+
+ while (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
+ _rsConfigStateChange.wait(lk);
+ }
+
+ if (_rsConfigState != kConfigUninitialized) {
+ resultObj->append("info", "try querying local.system.replset to see current configuration");
+ return Status(ErrorCodes::AlreadyInitialized, "already initialized");
+ }
+ invariant(!_rsConfig.isInitialized());
+ _setConfigState_inlock(kConfigInitiating);
+ ScopeGuard configStateGuard = MakeGuard(
+ lockAndCall,
+ &lk,
+ stdx::bind(
+ &ReplicationCoordinatorImpl::_setConfigState_inlock, this, kConfigUninitialized));
+ lk.unlock();
+
+ ReplicaSetConfig newConfig;
+ Status status = newConfig.initialize(configObj);
+ if (!status.isOK()) {
+ error() << "replSet initiate got " << status << " while parsing " << configObj;
+ return Status(ErrorCodes::InvalidReplicaSetConfig, status.reason());
+ ;
+ }
+ if (newConfig.getReplSetName() != _settings.ourSetName()) {
+ str::stream errmsg;
+ errmsg << "Attempting to initiate a replica set with name " << newConfig.getReplSetName()
+ << ", but command line reports " << _settings.ourSetName() << "; rejecting";
+ error() << std::string(errmsg);
+ return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
+ }
+
+ StatusWith<int> myIndex = validateConfigForInitiate(_externalState.get(), newConfig);
+ if (!myIndex.isOK()) {
+ error() << "replSet initiate got " << myIndex.getStatus() << " while validating "
+ << configObj;
+ return Status(ErrorCodes::InvalidReplicaSetConfig, myIndex.getStatus().reason());
+ }
+
+ log() << "replSetInitiate config object with " << newConfig.getNumMembers()
+ << " members parses ok";
+
+ status = checkQuorumForInitiate(&_replExecutor, newConfig, myIndex.getValue());
+
+ if (!status.isOK()) {
+ error() << "replSetInitiate failed; " << status;
+ return status;
}
- void ReplicationCoordinatorImpl::_setConfigState_inlock(ConfigState newState) {
- if (newState != _rsConfigState) {
- _rsConfigState = newState;
- _rsConfigStateChange.notify_all();
- }
+ status = _externalState->storeLocalConfigDocument(txn, newConfig.toBSON());
+ if (!status.isOK()) {
+ error() << "replSetInitiate failed to store config document; " << status;
+ return status;
}
- ReplicationCoordinatorImpl::PostMemberStateUpdateAction
- ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator_inlock() {
- const MemberState newState = _topCoord->getMemberState();
- if (newState == _memberState) {
- if (_topCoord->getRole() == TopologyCoordinator::Role::candidate) {
- invariant(_rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 &&
- _rsConfig.getMemberAt(0).isElectable());
- return kActionWinElection;
- }
- return kActionNone;
- }
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_finishReplSetInitiate,
+ this,
+ stdx::placeholders::_1,
+ newConfig,
+ myIndex.getValue()));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return status;
+ }
+ configStateGuard.Dismiss();
+ fassert(18654, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
- PostMemberStateUpdateAction result;
- if (_memberState.primary() || newState.removed() || newState.rollback()) {
- // Wake up any threads blocked in awaitReplication, close connections, etc.
- for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
- it != _replicationWaiterList.end(); ++it) {
- WaiterInfo* info = *it;
- info->master = false;
- info->condVar->notify_all();
- }
- _isWaitingForDrainToComplete = false;
- _canAcceptNonLocalWrites = false;
- result = kActionCloseAllConnections;
- }
- else {
- result = kActionFollowerModeStateChange;
+ if (status.isOK()) {
+ // Create the oplog with the first entry, and start repl threads.
+ _externalState->initiateOplog(txn);
+ _externalState->startThreads();
+ }
+ return status;
+}
+
+void ReplicationCoordinatorImpl::_finishReplSetInitiate(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplicaSetConfig& newConfig,
+ int myIndex) {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ invariant(_rsConfigState == kConfigInitiating);
+ invariant(!_rsConfig.isInitialized());
+ const PostMemberStateUpdateAction action = _setCurrentRSConfig_inlock(newConfig, myIndex);
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+}
+
+void ReplicationCoordinatorImpl::_setConfigState_inlock(ConfigState newState) {
+ if (newState != _rsConfigState) {
+ _rsConfigState = newState;
+ _rsConfigStateChange.notify_all();
+ }
+}
+
+ReplicationCoordinatorImpl::PostMemberStateUpdateAction
+ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator_inlock() {
+ const MemberState newState = _topCoord->getMemberState();
+ if (newState == _memberState) {
+ if (_topCoord->getRole() == TopologyCoordinator::Role::candidate) {
+ invariant(_rsConfig.getNumMembers() == 1 && _selfIndex == 0 &&
+ _rsConfig.getMemberAt(0).isElectable());
+ return kActionWinElection;
}
+ return kActionNone;
+ }
- if (_memberState.secondary() && !newState.primary()) {
- // Switching out of SECONDARY, but not to PRIMARY.
- _canServeNonLocalReads.store(0U);
- }
- else if (!_memberState.primary() && newState.secondary()) {
- // Switching into SECONDARY, but not from PRIMARY.
- _canServeNonLocalReads.store(1U);
+ PostMemberStateUpdateAction result;
+ if (_memberState.primary() || newState.removed() || newState.rollback()) {
+ // Wake up any threads blocked in awaitReplication, close connections, etc.
+ for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
+ it != _replicationWaiterList.end();
+ ++it) {
+ WaiterInfo* info = *it;
+ info->master = false;
+ info->condVar->notify_all();
}
+ _isWaitingForDrainToComplete = false;
+ _canAcceptNonLocalWrites = false;
+ result = kActionCloseAllConnections;
+ } else {
+ result = kActionFollowerModeStateChange;
+ }
- if (newState.secondary() && _topCoord->getRole() == TopologyCoordinator::Role::candidate) {
- // When transitioning to SECONDARY, the only way for _topCoord to report the candidate
- // role is if the configuration represents a single-node replica set. In that case, the
- // overriding requirement is to elect this singleton node primary.
- invariant(_rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 &&
- _rsConfig.getMemberAt(0).isElectable());
- result = kActionWinElection;
- }
+ if (_memberState.secondary() && !newState.primary()) {
+ // Switching out of SECONDARY, but not to PRIMARY.
+ _canServeNonLocalReads.store(0U);
+ } else if (!_memberState.primary() && newState.secondary()) {
+ // Switching into SECONDARY, but not from PRIMARY.
+ _canServeNonLocalReads.store(1U);
+ }
- _memberState = newState;
- log() << "transition to " << newState.toString() << rsLog;
- return result;
+ if (newState.secondary() && _topCoord->getRole() == TopologyCoordinator::Role::candidate) {
+ // When transitioning to SECONDARY, the only way for _topCoord to report the candidate
+ // role is if the configuration represents a single-node replica set. In that case, the
+ // overriding requirement is to elect this singleton node primary.
+ invariant(_rsConfig.getNumMembers() == 1 && _selfIndex == 0 &&
+ _rsConfig.getMemberAt(0).isElectable());
+ result = kActionWinElection;
}
- void ReplicationCoordinatorImpl::_performPostMemberStateUpdateAction(
- PostMemberStateUpdateAction action) {
+ _memberState = newState;
+ log() << "transition to " << newState.toString() << rsLog;
+ return result;
+}
- switch (action) {
+void ReplicationCoordinatorImpl::_performPostMemberStateUpdateAction(
+ PostMemberStateUpdateAction action) {
+ switch (action) {
case kActionNone:
break;
case kActionFollowerModeStateChange:
@@ -2114,702 +2025,671 @@ namespace {
default:
severe() << "Unknown post member state update action " << static_cast<int>(action);
fassertFailed(26010);
- }
}
-
- Status ReplicationCoordinatorImpl::processReplSetGetRBID(BSONObjBuilder* resultObj) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- resultObj->append("rbid", _rbid);
- return Status::OK();
- }
-
- void ReplicationCoordinatorImpl::incrementRollbackID() {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- ++_rbid;
- }
-
- Status ReplicationCoordinatorImpl::processReplSetFresh(const ReplSetFreshArgs& args,
- BSONObjBuilder* resultObj) {
-
- Status result(ErrorCodes::InternalError, "didn't set status in prepareFreshResponse");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_processReplSetFresh_finish,
- this,
- stdx::placeholders::_1,
- args,
- resultObj,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- }
- fassert(18652, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return result;
- }
-
- void ReplicationCoordinatorImpl::_processReplSetFresh_finish(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplSetFreshArgs& args,
- BSONObjBuilder* response,
- Status* result) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- return;
+}
+
+Status ReplicationCoordinatorImpl::processReplSetGetRBID(BSONObjBuilder* resultObj) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ resultObj->append("rbid", _rbid);
+ return Status::OK();
+}
+
+void ReplicationCoordinatorImpl::incrementRollbackID() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ ++_rbid;
+}
+
+Status ReplicationCoordinatorImpl::processReplSetFresh(const ReplSetFreshArgs& args,
+ BSONObjBuilder* resultObj) {
+ Status result(ErrorCodes::InternalError, "didn't set status in prepareFreshResponse");
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_processReplSetFresh_finish,
+ this,
+ stdx::placeholders::_1,
+ args,
+ resultObj,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ }
+ fassert(18652, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_processReplSetFresh_finish(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplSetFreshArgs& args,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ return;
+ }
+
+ _topCoord->prepareFreshResponse(args, _replExecutor.now(), getMyLastOptime(), response, result);
+}
+
+Status ReplicationCoordinatorImpl::processReplSetElect(const ReplSetElectArgs& args,
+ BSONObjBuilder* responseObj) {
+ Status result = Status(ErrorCodes::InternalError, "status not set by callback");
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_processReplSetElect_finish,
+ this,
+ stdx::placeholders::_1,
+ args,
+ responseObj,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ }
+ fassert(18657, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_processReplSetElect_finish(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplSetElectArgs& args,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ return;
+ }
+
+ _topCoord->prepareElectResponse(args, _replExecutor.now(), getMyLastOptime(), response, result);
+}
+
+ReplicationCoordinatorImpl::PostMemberStateUpdateAction
+ReplicationCoordinatorImpl::_setCurrentRSConfig_inlock(const ReplicaSetConfig& newConfig,
+ int myIndex) {
+ invariant(_settings.usingReplSets());
+ _cancelHeartbeats();
+ _setConfigState_inlock(kConfigSteady);
+ // Must get this before changing our config.
+ OpTime myOptime = _getMyLastOptime_inlock();
+ _topCoord->updateConfig(newConfig, myIndex, _replExecutor.now(), myOptime);
+ _rsConfig = newConfig;
+ log() << "New replica set config in use: " << _rsConfig.toBSON() << rsLog;
+ _selfIndex = myIndex;
+ if (_selfIndex >= 0) {
+ log() << "This node is " << _rsConfig.getMemberAt(_selfIndex).getHostAndPort()
+ << " in the config";
+ } else {
+ log() << "This node is not a member of the config";
+ }
+
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator_inlock();
+ _updateSlaveInfoFromConfig_inlock();
+ if (_selfIndex >= 0) {
+ // Don't send heartbeats if we're not in the config, if we get re-added one of the
+ // nodes in the set will contact us.
+ _startHeartbeats();
+ }
+ _wakeReadyWaiters_inlock();
+ return action;
+}
+
+void ReplicationCoordinatorImpl::_wakeReadyWaiters_inlock() {
+ for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
+ it != _replicationWaiterList.end();
+ ++it) {
+ WaiterInfo* info = *it;
+ if (_doneWaitingForReplication_inlock(*info->opTime, *info->writeConcern)) {
+ info->condVar->notify_all();
}
-
- _topCoord->prepareFreshResponse(
- args, _replExecutor.now(), getMyLastOptime(), response, result);
}
+}
- Status ReplicationCoordinatorImpl::processReplSetElect(const ReplSetElectArgs& args,
- BSONObjBuilder* responseObj) {
- Status result = Status(ErrorCodes::InternalError, "status not set by callback");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_processReplSetElect_finish,
- this,
- stdx::placeholders::_1,
- args,
- responseObj,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- }
- fassert(18657, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return result;
- }
-
- void ReplicationCoordinatorImpl::_processReplSetElect_finish(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplSetElectArgs& args,
- BSONObjBuilder* response,
- Status* result) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- return;
- }
-
- _topCoord->prepareElectResponse(
- args, _replExecutor.now(), getMyLastOptime(), response, result);
- }
-
- ReplicationCoordinatorImpl::PostMemberStateUpdateAction
- ReplicationCoordinatorImpl::_setCurrentRSConfig_inlock(
- const ReplicaSetConfig& newConfig,
- int myIndex) {
- invariant(_settings.usingReplSets());
- _cancelHeartbeats();
- _setConfigState_inlock(kConfigSteady);
- // Must get this before changing our config.
- OpTime myOptime = _getMyLastOptime_inlock();
- _topCoord->updateConfig(
- newConfig,
- myIndex,
- _replExecutor.now(),
- myOptime);
- _rsConfig = newConfig;
- log() << "New replica set config in use: " << _rsConfig.toBSON() << rsLog;
- _selfIndex = myIndex;
- if (_selfIndex >= 0) {
- log() << "This node is " <<
- _rsConfig.getMemberAt(_selfIndex).getHostAndPort() << " in the config";
- }
- else {
- log() << "This node is not a member of the config";
- }
-
- const PostMemberStateUpdateAction action =
- _updateMemberStateFromTopologyCoordinator_inlock();
- _updateSlaveInfoFromConfig_inlock();
- if (_selfIndex >= 0) {
- // Don't send heartbeats if we're not in the config, if we get re-added one of the
- // nodes in the set will contact us.
- _startHeartbeats();
- }
- _wakeReadyWaiters_inlock();
- return action;
- }
-
- void ReplicationCoordinatorImpl::_wakeReadyWaiters_inlock(){
- for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
- it != _replicationWaiterList.end(); ++it) {
- WaiterInfo* info = *it;
- if (_doneWaitingForReplication_inlock(*info->opTime, *info->writeConcern)) {
- info->condVar->notify_all();
- }
+Status ReplicationCoordinatorImpl::processReplSetUpdatePosition(const UpdatePositionArgs& updates,
+ long long* configVersion) {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ Status status = Status::OK();
+ bool somethingChanged = false;
+ for (UpdatePositionArgs::UpdateIterator update = updates.updatesBegin();
+ update != updates.updatesEnd();
+ ++update) {
+ status = _setLastOptime_inlock(*update, configVersion);
+ if (!status.isOK()) {
+ break;
}
+ somethingChanged = true;
}
- Status ReplicationCoordinatorImpl::processReplSetUpdatePosition(
- const UpdatePositionArgs& updates, long long* configVersion) {
-
- stdx::unique_lock<stdx::mutex> lock(_mutex);
- Status status = Status::OK();
- bool somethingChanged = false;
- for (UpdatePositionArgs::UpdateIterator update = updates.updatesBegin();
- update != updates.updatesEnd();
- ++update) {
- status = _setLastOptime_inlock(*update, configVersion);
- if (!status.isOK()) {
- break;
- }
- somethingChanged = true;
- }
-
- if (somethingChanged && !_getMemberState_inlock().primary()) {
- lock.unlock();
- // Must do this outside _mutex
- // TODO: enable _dr, remove _externalState when DataReplicator is used excl.
- //_dr.slavesHaveProgressed();
- _externalState->forwardSlaveProgress();
- }
- return status;
+ if (somethingChanged && !_getMemberState_inlock().primary()) {
+ lock.unlock();
+ // Must do this outside _mutex
+ // TODO: enable _dr, remove _externalState when DataReplicator is used excl.
+ //_dr.slavesHaveProgressed();
+ _externalState->forwardSlaveProgress();
}
+ return status;
+}
- Status ReplicationCoordinatorImpl::processHandshake(OperationContext* txn,
- const HandshakeArgs& handshake) {
- LOG(2) << "Received handshake " << handshake.toBSON();
-
- stdx::unique_lock<stdx::mutex> lock(_mutex);
-
- if (getReplicationMode() != modeMasterSlave) {
- return Status(ErrorCodes::IllegalOperation,
- "The handshake command is only used for master/slave replication");
- }
-
- SlaveInfo* slaveInfo = _findSlaveInfoByRID_inlock(handshake.getRid());
- if (slaveInfo) {
- return Status::OK(); // nothing to do
- }
-
- SlaveInfo newSlaveInfo;
- newSlaveInfo.rid = handshake.getRid();
- newSlaveInfo.memberId = -1;
- newSlaveInfo.hostAndPort = _externalState->getClientHostAndPort(txn);
- // Don't call _addSlaveInfo_inlock as that would wake sleepers unnecessarily.
- _slaveInfo.push_back(newSlaveInfo);
+Status ReplicationCoordinatorImpl::processHandshake(OperationContext* txn,
+ const HandshakeArgs& handshake) {
+ LOG(2) << "Received handshake " << handshake.toBSON();
- return Status::OK();
- }
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
- bool ReplicationCoordinatorImpl::buildsIndexes() {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- if (_selfIndex == -1) {
- return true;
- }
- const MemberConfig& self = _rsConfig.getMemberAt(_selfIndex);
- return self.shouldBuildIndexes();
+ if (getReplicationMode() != modeMasterSlave) {
+ return Status(ErrorCodes::IllegalOperation,
+ "The handshake command is only used for master/slave replication");
}
- std::vector<HostAndPort> ReplicationCoordinatorImpl::getHostsWrittenTo(const OpTime& op) {
- std::vector<HostAndPort> hosts;
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- for (size_t i = 0; i < _slaveInfo.size(); ++i) {
- const SlaveInfo& slaveInfo = _slaveInfo[i];
- if (slaveInfo.opTime < op) {
- continue;
- }
-
- if (getReplicationMode() == modeMasterSlave && slaveInfo.rid == _getMyRID_inlock()) {
- // Master-slave doesn't know the HostAndPort for itself at this point.
- continue;
- }
- hosts.push_back(slaveInfo.hostAndPort);
- }
- return hosts;
+ SlaveInfo* slaveInfo = _findSlaveInfoByRID_inlock(handshake.getRid());
+ if (slaveInfo) {
+ return Status::OK(); // nothing to do
}
- std::vector<HostAndPort> ReplicationCoordinatorImpl::getOtherNodesInReplSet() const {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- invariant(_settings.usingReplSets());
+ SlaveInfo newSlaveInfo;
+ newSlaveInfo.rid = handshake.getRid();
+ newSlaveInfo.memberId = -1;
+ newSlaveInfo.hostAndPort = _externalState->getClientHostAndPort(txn);
+ // Don't call _addSlaveInfo_inlock as that would wake sleepers unnecessarily.
+ _slaveInfo.push_back(newSlaveInfo);
- std::vector<HostAndPort> nodes;
- if (_selfIndex == -1) {
- return nodes;
- }
-
- for (int i = 0; i < _rsConfig.getNumMembers(); ++i) {
- if (i == _selfIndex)
- continue;
-
- nodes.push_back(_rsConfig.getMemberAt(i).getHostAndPort());
- }
- return nodes;
- }
+ return Status::OK();
+}
- Status ReplicationCoordinatorImpl::checkIfWriteConcernCanBeSatisfied(
- const WriteConcernOptions& writeConcern) const {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- return _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
+bool ReplicationCoordinatorImpl::buildsIndexes() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ if (_selfIndex == -1) {
+ return true;
}
+ const MemberConfig& self = _rsConfig.getMemberAt(_selfIndex);
+ return self.shouldBuildIndexes();
+}
- Status ReplicationCoordinatorImpl::_checkIfWriteConcernCanBeSatisfied_inlock(
- const WriteConcernOptions& writeConcern) const {
- if (getReplicationMode() == modeNone) {
- return Status(ErrorCodes::NoReplicationEnabled,
- "No replication enabled when checking if write concern can be satisfied");
+std::vector<HostAndPort> ReplicationCoordinatorImpl::getHostsWrittenTo(const OpTime& op) {
+ std::vector<HostAndPort> hosts;
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ for (size_t i = 0; i < _slaveInfo.size(); ++i) {
+ const SlaveInfo& slaveInfo = _slaveInfo[i];
+ if (slaveInfo.opTime < op) {
+ continue;
}
- if (getReplicationMode() == modeMasterSlave) {
- if (!writeConcern.wMode.empty()) {
- return Status(ErrorCodes::UnknownReplWriteConcern,
- "Cannot use named write concern modes in master-slave");
- }
- // No way to know how many slaves there are, so assume any numeric mode is possible.
- return Status::OK();
+ if (getReplicationMode() == modeMasterSlave && slaveInfo.rid == _getMyRID_inlock()) {
+ // Master-slave doesn't know the HostAndPort for itself at this point.
+ continue;
}
-
- invariant(getReplicationMode() == modeReplSet);
- return _rsConfig.checkIfWriteConcernCanBeSatisfied(writeConcern);
+ hosts.push_back(slaveInfo.hostAndPort);
}
+ return hosts;
+}
- WriteConcernOptions ReplicationCoordinatorImpl::getGetLastErrorDefault() {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- if (_rsConfig.isInitialized()) {
- return _rsConfig.getDefaultWriteConcern();
- }
- return WriteConcernOptions();
- }
-
- Status ReplicationCoordinatorImpl::checkReplEnabledForCommand(BSONObjBuilder* result) {
- if (!_settings.usingReplSets()) {
- if (serverGlobalParams.configsvr) {
- result->append("info", "configsvr"); // for shell prompt
- }
- return Status(ErrorCodes::NoReplicationEnabled, "not running with --replSet");
- }
+std::vector<HostAndPort> ReplicationCoordinatorImpl::getOtherNodesInReplSet() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ invariant(_settings.usingReplSets());
- if (getMemberState().startup()) {
- result->append("info", "run rs.initiate(...) if not yet done for the set");
- return Status(ErrorCodes::NotYetInitialized, "no replset config has been received");
- }
-
- return Status::OK();
- }
-
- bool ReplicationCoordinatorImpl::isReplEnabled() const {
- return getReplicationMode() != modeNone;
+ std::vector<HostAndPort> nodes;
+ if (_selfIndex == -1) {
+ return nodes;
}
- void ReplicationCoordinatorImpl::_chooseNewSyncSource(
- const ReplicationExecutor::CallbackArgs& cbData,
- HostAndPort* newSyncSource) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
- *newSyncSource = _topCoord->chooseNewSyncSource(_replExecutor.now(),
- getMyLastOptime());
- }
+ for (int i = 0; i < _rsConfig.getNumMembers(); ++i) {
+ if (i == _selfIndex)
+ continue;
- HostAndPort ReplicationCoordinatorImpl::chooseNewSyncSource() {
- HostAndPort newSyncSource;
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_chooseNewSyncSource,
- this,
- stdx::placeholders::_1,
- &newSyncSource));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return newSyncSource; // empty
- }
- fassert(18740, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return newSyncSource;
+ nodes.push_back(_rsConfig.getMemberAt(i).getHostAndPort());
}
+ return nodes;
+}
- void ReplicationCoordinatorImpl::_blacklistSyncSource(
- const ReplicationExecutor::CallbackArgs& cbData,
- const HostAndPort& host,
- Date_t until) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
- _topCoord->blacklistSyncSource(host, until);
-
- CBHStatus cbh = _replExecutor.scheduleWorkAt(
- until,
- stdx::bind(&ReplicationCoordinatorImpl::_unblacklistSyncSource,
- this,
- stdx::placeholders::_1,
- host));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(28610, cbh.getStatus());
- }
+Status ReplicationCoordinatorImpl::checkIfWriteConcernCanBeSatisfied(
+ const WriteConcernOptions& writeConcern) const {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ return _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
+}
- void ReplicationCoordinatorImpl::_unblacklistSyncSource(
- const ReplicationExecutor::CallbackArgs& cbData,
- const HostAndPort& host) {
- if (cbData.status == ErrorCodes::CallbackCanceled)
- return;
- _topCoord->unblacklistSyncSource(host, _replExecutor.now());
+Status ReplicationCoordinatorImpl::_checkIfWriteConcernCanBeSatisfied_inlock(
+ const WriteConcernOptions& writeConcern) const {
+ if (getReplicationMode() == modeNone) {
+ return Status(ErrorCodes::NoReplicationEnabled,
+ "No replication enabled when checking if write concern can be satisfied");
}
- void ReplicationCoordinatorImpl::blacklistSyncSource(const HostAndPort& host, Date_t until) {
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_blacklistSyncSource,
- this,
- stdx::placeholders::_1,
- host,
- until));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(18741, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- }
-
- void ReplicationCoordinatorImpl::resetLastOpTimeFromOplog(OperationContext* txn) {
- StatusWith<OpTime> lastOpTimeStatus = _externalState->loadLastOpTime(txn);
- OpTime lastOpTime;
- if (!lastOpTimeStatus.isOK()) {
- warning() << "Failed to load timestamp of most recently applied operation; " <<
- lastOpTimeStatus.getStatus();
- }
- else {
- lastOpTime = lastOpTimeStatus.getValue();
+ if (getReplicationMode() == modeMasterSlave) {
+ if (!writeConcern.wMode.empty()) {
+ return Status(ErrorCodes::UnknownReplWriteConcern,
+ "Cannot use named write concern modes in master-slave");
}
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- _setMyLastOptime_inlock(&lk, lastOpTime, true);
- _externalState->setGlobalTimestamp(lastOpTime.getTimestamp());
+ // No way to know how many slaves there are, so assume any numeric mode is possible.
+ return Status::OK();
}
- void ReplicationCoordinatorImpl::_shouldChangeSyncSource(
- const ReplicationExecutor::CallbackArgs& cbData,
- const HostAndPort& currentSource,
- bool* shouldChange) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
-
- *shouldChange = _topCoord->shouldChangeSyncSource(currentSource, _replExecutor.now());
+ invariant(getReplicationMode() == modeReplSet);
+ return _rsConfig.checkIfWriteConcernCanBeSatisfied(writeConcern);
+}
+
+WriteConcernOptions ReplicationCoordinatorImpl::getGetLastErrorDefault() {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ if (_rsConfig.isInitialized()) {
+ return _rsConfig.getDefaultWriteConcern();
+ }
+ return WriteConcernOptions();
+}
+
+Status ReplicationCoordinatorImpl::checkReplEnabledForCommand(BSONObjBuilder* result) {
+ if (!_settings.usingReplSets()) {
+ if (serverGlobalParams.configsvr) {
+ result->append("info", "configsvr"); // for shell prompt
+ }
+ return Status(ErrorCodes::NoReplicationEnabled, "not running with --replSet");
+ }
+
+ if (getMemberState().startup()) {
+ result->append("info", "run rs.initiate(...) if not yet done for the set");
+ return Status(ErrorCodes::NotYetInitialized, "no replset config has been received");
+ }
+
+ return Status::OK();
+}
+
+bool ReplicationCoordinatorImpl::isReplEnabled() const {
+ return getReplicationMode() != modeNone;
+}
+
+void ReplicationCoordinatorImpl::_chooseNewSyncSource(
+ const ReplicationExecutor::CallbackArgs& cbData, HostAndPort* newSyncSource) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+ *newSyncSource = _topCoord->chooseNewSyncSource(_replExecutor.now(), getMyLastOptime());
+}
+
+HostAndPort ReplicationCoordinatorImpl::chooseNewSyncSource() {
+ HostAndPort newSyncSource;
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_chooseNewSyncSource,
+ this,
+ stdx::placeholders::_1,
+ &newSyncSource));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return newSyncSource; // empty
+ }
+ fassert(18740, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return newSyncSource;
+}
+
+void ReplicationCoordinatorImpl::_blacklistSyncSource(
+ const ReplicationExecutor::CallbackArgs& cbData, const HostAndPort& host, Date_t until) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+ _topCoord->blacklistSyncSource(host, until);
+
+ CBHStatus cbh =
+ _replExecutor.scheduleWorkAt(until,
+ stdx::bind(&ReplicationCoordinatorImpl::_unblacklistSyncSource,
+ this,
+ stdx::placeholders::_1,
+ host));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(28610, cbh.getStatus());
+}
+
+void ReplicationCoordinatorImpl::_unblacklistSyncSource(
+ const ReplicationExecutor::CallbackArgs& cbData, const HostAndPort& host) {
+ if (cbData.status == ErrorCodes::CallbackCanceled)
+ return;
+ _topCoord->unblacklistSyncSource(host, _replExecutor.now());
+}
+
+void ReplicationCoordinatorImpl::blacklistSyncSource(const HostAndPort& host, Date_t until) {
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_blacklistSyncSource,
+ this,
+ stdx::placeholders::_1,
+ host,
+ until));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(18741, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+}
+
+void ReplicationCoordinatorImpl::resetLastOpTimeFromOplog(OperationContext* txn) {
+ StatusWith<OpTime> lastOpTimeStatus = _externalState->loadLastOpTime(txn);
+ OpTime lastOpTime;
+ if (!lastOpTimeStatus.isOK()) {
+ warning() << "Failed to load timestamp of most recently applied operation; "
+ << lastOpTimeStatus.getStatus();
+ } else {
+ lastOpTime = lastOpTimeStatus.getValue();
+ }
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ _setMyLastOptime_inlock(&lk, lastOpTime, true);
+ _externalState->setGlobalTimestamp(lastOpTime.getTimestamp());
+}
+
+void ReplicationCoordinatorImpl::_shouldChangeSyncSource(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const HostAndPort& currentSource,
+ bool* shouldChange) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+
+ *shouldChange = _topCoord->shouldChangeSyncSource(currentSource, _replExecutor.now());
+}
+
+bool ReplicationCoordinatorImpl::shouldChangeSyncSource(const HostAndPort& currentSource) {
+ bool shouldChange(false);
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_shouldChangeSyncSource,
+ this,
+ stdx::placeholders::_1,
+ currentSource,
+ &shouldChange));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return false;
}
+ fassert(18906, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return shouldChange;
+}
- bool ReplicationCoordinatorImpl::shouldChangeSyncSource(const HostAndPort& currentSource) {
- bool shouldChange(false);
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_shouldChangeSyncSource,
- this,
- stdx::placeholders::_1,
- currentSource,
- &shouldChange));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return false;
- }
- fassert(18906, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return shouldChange;
+void ReplicationCoordinatorImpl::_updateLastCommittedOpTime_inlock() {
+ if (!_getMemberState_inlock().primary()) {
+ return;
}
+ StatusWith<ReplicaSetTagPattern> tagPattern =
+ _rsConfig.findCustomWriteMode(ReplicaSetConfig::kMajorityWriteConcernModeName);
+ invariant(tagPattern.isOK());
+ ReplicaSetTagMatch matcher{tagPattern.getValue()};
- void ReplicationCoordinatorImpl::_updateLastCommittedOpTime_inlock() {
- if (!_getMemberState_inlock().primary()) {
- return;
- }
- StatusWith<ReplicaSetTagPattern> tagPattern =
- _rsConfig.findCustomWriteMode(ReplicaSetConfig::kMajorityWriteConcernModeName);
- invariant(tagPattern.isOK());
- ReplicaSetTagMatch matcher{tagPattern.getValue()};
-
- std::vector<OpTime> votingNodesOpTimes;
+ std::vector<OpTime> votingNodesOpTimes;
- for (const auto& sI : _slaveInfo) {
- auto memberConfig = _rsConfig.findMemberByID(sI.memberId);
- invariant(memberConfig);
- for (auto tagIt = memberConfig->tagsBegin();
- tagIt != memberConfig->tagsEnd(); ++tagIt) {
- if (matcher.update(*tagIt)) {
- votingNodesOpTimes.push_back(sI.opTime);
- break;
- }
+ for (const auto& sI : _slaveInfo) {
+ auto memberConfig = _rsConfig.findMemberByID(sI.memberId);
+ invariant(memberConfig);
+ for (auto tagIt = memberConfig->tagsBegin(); tagIt != memberConfig->tagsEnd(); ++tagIt) {
+ if (matcher.update(*tagIt)) {
+ votingNodesOpTimes.push_back(sI.opTime);
+ break;
}
}
- invariant(votingNodesOpTimes.size() > 0);
- std::sort(votingNodesOpTimes.begin(), votingNodesOpTimes.end());
-
- // Use the index of the minimum quorum in the vector of nodes.
- _lastCommittedOpTime = votingNodesOpTimes[(votingNodesOpTimes.size() - 1) / 2];
}
+ invariant(votingNodesOpTimes.size() > 0);
+ std::sort(votingNodesOpTimes.begin(), votingNodesOpTimes.end());
- OpTime ReplicationCoordinatorImpl::getLastCommittedOpTime() const {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- return _lastCommittedOpTime;
- }
-
- Status ReplicationCoordinatorImpl::processReplSetRequestVotes(
- OperationContext* txn,
- const ReplSetRequestVotesArgs& args,
- ReplSetRequestVotesResponse* response) {
- if (!isV1ElectionProtocol()) {
- return {ErrorCodes::BadValue, "not using election protocol v1"};
- }
-
- updateTerm(args.getTerm());
-
- Status result{ErrorCodes::InternalError, "didn't set status in processReplSetRequestVotes"};
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_processReplSetRequestVotes_finish,
- this,
- stdx::placeholders::_1,
- args,
- response,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return cbh.getStatus();
- }
- _replExecutor.wait(cbh.getValue());
- if (response->getVoteGranted()) {
- LastVote lastVote;
- lastVote.setTerm(args.getTerm());
- lastVote.setCandidateId(args.getCandidateId());
-
- Status status = _externalState->storeLocalLastVoteDocument(txn, lastVote);
- if (!status.isOK()) {
- error() << "replSetRequestVotes failed to store LastVote document; " << status;
- return status;
- }
-
- }
- return result;
- }
+ // Use the index of the minimum quorum in the vector of nodes.
+ _lastCommittedOpTime = votingNodesOpTimes[(votingNodesOpTimes.size() - 1) / 2];
+}
- void ReplicationCoordinatorImpl::_processReplSetRequestVotes_finish(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplSetRequestVotesArgs& args,
- ReplSetRequestVotesResponse* response,
- Status* result) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
- return;
- }
+OpTime ReplicationCoordinatorImpl::getLastCommittedOpTime() const {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ return _lastCommittedOpTime;
+}
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- _topCoord->processReplSetRequestVotes(args, response, getMyLastOptime());
- *result = Status::OK();
+Status ReplicationCoordinatorImpl::processReplSetRequestVotes(
+ OperationContext* txn,
+ const ReplSetRequestVotesArgs& args,
+ ReplSetRequestVotesResponse* response) {
+ if (!isV1ElectionProtocol()) {
+ return {ErrorCodes::BadValue, "not using election protocol v1"};
}
- Status ReplicationCoordinatorImpl::processReplSetDeclareElectionWinner(
- const ReplSetDeclareElectionWinnerArgs& args,
- long long* responseTerm) {
- if (!isV1ElectionProtocol()) {
- return {ErrorCodes::BadValue, "not using election protocol v1"};
- }
+ updateTerm(args.getTerm());
- updateTerm(args.getTerm());
-
- Status result{ErrorCodes::InternalError,
- "didn't set status in processReplSetDeclareElectionWinner"};
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_processReplSetDeclareElectionWinner_finish,
- this,
- stdx::placeholders::_1,
- args,
- responseTerm,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return cbh.getStatus();
- }
- _replExecutor.wait(cbh.getValue());
- return result;
+ Status result{ErrorCodes::InternalError, "didn't set status in processReplSetRequestVotes"};
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_processReplSetRequestVotes_finish,
+ this,
+ stdx::placeholders::_1,
+ args,
+ response,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return cbh.getStatus();
}
+ _replExecutor.wait(cbh.getValue());
+ if (response->getVoteGranted()) {
+ LastVote lastVote;
+ lastVote.setTerm(args.getTerm());
+ lastVote.setCandidateId(args.getCandidateId());
- void ReplicationCoordinatorImpl::_processReplSetDeclareElectionWinner_finish(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplSetDeclareElectionWinnerArgs& args,
- long long* responseTerm,
- Status* result) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
- return;
- }
- *result = _topCoord->processReplSetDeclareElectionWinner(args, responseTerm);
- }
-
- void ReplicationCoordinatorImpl::prepareCursorResponseInfo(BSONObjBuilder* objBuilder) {
- if (getReplicationMode() == modeReplSet && isV1ElectionProtocol()) {
- BSONObjBuilder replObj(objBuilder->subobjStart("repl"));
- _topCoord->prepareCursorResponseInfo(objBuilder, getLastCommittedOpTime());
- replObj.done();
+ Status status = _externalState->storeLocalLastVoteDocument(txn, lastVote);
+ if (!status.isOK()) {
+ error() << "replSetRequestVotes failed to store LastVote document; " << status;
+ return status;
}
}
-
- bool ReplicationCoordinatorImpl::isV1ElectionProtocol() {
- return getConfig().getProtocolVersion() == 1;
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_processReplSetRequestVotes_finish(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplSetRequestVotesArgs& args,
+ ReplSetRequestVotesResponse* response,
+ Status* result) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
+ return;
+ }
+
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ _topCoord->processReplSetRequestVotes(args, response, getMyLastOptime());
+ *result = Status::OK();
+}
+
+Status ReplicationCoordinatorImpl::processReplSetDeclareElectionWinner(
+ const ReplSetDeclareElectionWinnerArgs& args, long long* responseTerm) {
+ if (!isV1ElectionProtocol()) {
+ return {ErrorCodes::BadValue, "not using election protocol v1"};
+ }
+
+ updateTerm(args.getTerm());
+
+ Status result{ErrorCodes::InternalError,
+ "didn't set status in processReplSetDeclareElectionWinner"};
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_processReplSetDeclareElectionWinner_finish,
+ this,
+ stdx::placeholders::_1,
+ args,
+ responseTerm,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return cbh.getStatus();
+ }
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_processReplSetDeclareElectionWinner_finish(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplSetDeclareElectionWinnerArgs& args,
+ long long* responseTerm,
+ Status* result) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
+ return;
+ }
+ *result = _topCoord->processReplSetDeclareElectionWinner(args, responseTerm);
+}
+
+void ReplicationCoordinatorImpl::prepareCursorResponseInfo(BSONObjBuilder* objBuilder) {
+ if (getReplicationMode() == modeReplSet && isV1ElectionProtocol()) {
+ BSONObjBuilder replObj(objBuilder->subobjStart("repl"));
+ _topCoord->prepareCursorResponseInfo(objBuilder, getLastCommittedOpTime());
+ replObj.done();
+ }
+}
+
+bool ReplicationCoordinatorImpl::isV1ElectionProtocol() {
+ return getConfig().getProtocolVersion() == 1;
+}
+
+Status ReplicationCoordinatorImpl::processHeartbeatV1(const ReplSetHeartbeatArgsV1& args,
+ ReplSetHeartbeatResponse* response) {
+ {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ if (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
+ return Status(ErrorCodes::NotYetInitialized,
+ "Received heartbeat while still initializing replication system");
+ }
+ }
+
+ Status result(ErrorCodes::InternalError, "didn't set status in prepareHeartbeatResponse");
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_processHeartbeatFinishV1,
+ this,
+ stdx::placeholders::_1,
+ args,
+ response,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return {ErrorCodes::ShutdownInProgress, "replication shutdown in progress"};
+ }
+ fassert(28645, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_processHeartbeatFinishV1(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplSetHeartbeatArgsV1& args,
+ ReplSetHeartbeatResponse* response,
+ Status* outStatus) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ *outStatus = {ErrorCodes::ShutdownInProgress, "Replication shutdown in progress"};
+ return;
+ }
+ fassert(28655, cbData.status);
+ const Date_t now = _replExecutor.now();
+ *outStatus = _topCoord->prepareHeartbeatResponseV1(
+ now, args, _settings.ourSetName(), getMyLastOptime(), response);
+ if ((outStatus->isOK() || *outStatus == ErrorCodes::InvalidReplicaSetConfig) &&
+ _selfIndex < 0) {
+ // If this node does not belong to the configuration it knows about, send heartbeats
+ // back to any node that sends us a heartbeat, in case one of those remote nodes has
+ // a configuration that contains us. Chances are excellent that it will, since that
+ // is the only reason for a remote node to send this node a heartbeat request.
+ if (!args.getSenderHost().empty() && _seedList.insert(args.getSenderHost()).second) {
+ _scheduleHeartbeatToTarget(args.getSenderHost(), -1, now);
+ }
+ }
+}
+
+void ReplicationCoordinatorImpl::summarizeAsHtml(ReplSetHtmlSummary* output) {
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_summarizeAsHtml_finish,
+ this,
+ stdx::placeholders::_1,
+ output));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(28638, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+}
+
+void ReplicationCoordinatorImpl::_summarizeAsHtml_finish(const CallbackArgs& cbData,
+ ReplSetHtmlSummary* output) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+
+ output->setSelfOptime(getMyLastOptime());
+ output->setSelfUptime(time(0) - serverGlobalParams.started);
+ output->setNow(_replExecutor.now());
+
+ _topCoord->summarizeAsHtml(output);
+}
+
+long long ReplicationCoordinatorImpl::getTerm() {
+ long long term = OpTime::kDefaultTerm;
+ CBHStatus cbh = _replExecutor.scheduleWork(stdx::bind(
+ &ReplicationCoordinatorImpl::_getTerm_helper, this, stdx::placeholders::_1, &term));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return term;
}
-
- Status ReplicationCoordinatorImpl::processHeartbeatV1(const ReplSetHeartbeatArgsV1& args,
- ReplSetHeartbeatResponse* response) {
- {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- if (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
- return Status(ErrorCodes::NotYetInitialized,
- "Received heartbeat while still initializing replication system");
- }
- }
-
- Status result(ErrorCodes::InternalError, "didn't set status in prepareHeartbeatResponse");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_processHeartbeatFinishV1,
- this,
- stdx::placeholders::_1,
- args,
- response,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return {ErrorCodes::ShutdownInProgress, "replication shutdown in progress"};
- }
- fassert(28645, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return result;
+ fassert(28660, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return term;
+}
+
+void ReplicationCoordinatorImpl::_getTerm_helper(const ReplicationExecutor::CallbackArgs& cbData,
+ long long* term) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+ *term = _topCoord->getTerm();
+}
+
+bool ReplicationCoordinatorImpl::updateTerm(long long term) {
+ bool updated = false;
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_updateTerm_helper,
+ this,
+ stdx::placeholders::_1,
+ term,
+ &updated,
+ nullptr));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return false;
}
-
- void ReplicationCoordinatorImpl::_processHeartbeatFinishV1(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplSetHeartbeatArgsV1& args,
- ReplSetHeartbeatResponse* response,
- Status* outStatus) {
-
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- *outStatus = {ErrorCodes::ShutdownInProgress, "Replication shutdown in progress"};
- return;
- }
- fassert(28655, cbData.status);
- const Date_t now = _replExecutor.now();
- *outStatus = _topCoord->prepareHeartbeatResponseV1(
- now,
- args,
- _settings.ourSetName(),
- getMyLastOptime(),
- response);
- if ((outStatus->isOK() || *outStatus == ErrorCodes::InvalidReplicaSetConfig) &&
- _selfIndex < 0) {
- // If this node does not belong to the configuration it knows about, send heartbeats
- // back to any node that sends us a heartbeat, in case one of those remote nodes has
- // a configuration that contains us. Chances are excellent that it will, since that
- // is the only reason for a remote node to send this node a heartbeat request.
- if (!args.getSenderHost().empty() && _seedList.insert(args.getSenderHost()).second) {
- _scheduleHeartbeatToTarget(args.getSenderHost(), -1, now);
- }
- }
+ fassert(28670, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return updated;
+}
+
+bool ReplicationCoordinatorImpl::updateTerm_forTest(long long term) {
+ bool updated = false;
+ Handle cbHandle;
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_updateTerm_helper,
+ this,
+ stdx::placeholders::_1,
+ term,
+ &updated,
+ &cbHandle));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return false;
}
+ fassert(28673, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ _replExecutor.wait(cbHandle);
+ return updated;
+}
- void ReplicationCoordinatorImpl::summarizeAsHtml(ReplSetHtmlSummary* output) {
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_summarizeAsHtml_finish,
- this,
- stdx::placeholders::_1,
- output));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(28638, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
+void ReplicationCoordinatorImpl::_updateTerm_helper(const ReplicationExecutor::CallbackArgs& cbData,
+ long long term,
+ bool* updated,
+ Handle* cbHandle) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
}
- void ReplicationCoordinatorImpl::_summarizeAsHtml_finish(const CallbackArgs& cbData,
- ReplSetHtmlSummary* output) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
-
- output->setSelfOptime(getMyLastOptime());
- output->setSelfUptime(time(0) - serverGlobalParams.started);
- output->setNow(_replExecutor.now());
+ *updated = _updateTerm_incallback(term, cbHandle);
+}
- _topCoord->summarizeAsHtml(output);
- }
-
- long long ReplicationCoordinatorImpl::getTerm() {
- long long term = OpTime::kDefaultTerm;
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_getTerm_helper,
- this,
- stdx::placeholders::_1,
- &term));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return term;
- }
- fassert(28660, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return term;
- }
+bool ReplicationCoordinatorImpl::_updateTerm_incallback(long long term, Handle* cbHandle) {
+ bool updated = _topCoord->updateTerm(term);
- void ReplicationCoordinatorImpl::_getTerm_helper(
- const ReplicationExecutor::CallbackArgs& cbData,
- long long* term) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
- *term = _topCoord->getTerm();
- }
-
- bool ReplicationCoordinatorImpl::updateTerm(long long term) {
- bool updated = false;
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_updateTerm_helper,
- this,
- stdx::placeholders::_1,
- term,
- &updated,
- nullptr));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return false;
- }
- fassert(28670, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return updated;
- }
-
- bool ReplicationCoordinatorImpl::updateTerm_forTest(long long term) {
- bool updated = false;
- Handle cbHandle;
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_updateTerm_helper,
- this,
- stdx::placeholders::_1,
- term,
- &updated,
- &cbHandle));
+ if (updated && getMemberState().primary()) {
+ log() << "stepping down from primary, because a new term has begun";
+ _topCoord->prepareForStepDown();
+ CBHStatus cbh = _replExecutor.scheduleWorkWithGlobalExclusiveLock(
+ stdx::bind(&ReplicationCoordinatorImpl::_stepDownFinish, this, stdx::placeholders::_1));
if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return false;
- }
- fassert(28673, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- _replExecutor.wait(cbHandle);
- return updated;
- }
-
- void ReplicationCoordinatorImpl::_updateTerm_helper(
- const ReplicationExecutor::CallbackArgs& cbData,
- long long term,
- bool* updated,
- Handle* cbHandle) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
+ return true;
}
-
- *updated = _updateTerm_incallback(term, cbHandle);
- }
-
- bool ReplicationCoordinatorImpl::_updateTerm_incallback(long long term, Handle* cbHandle) {
- bool updated = _topCoord->updateTerm(term);
-
- if (updated && getMemberState().primary()) {
- log() << "stepping down from primary, because a new term has begun";
- _topCoord->prepareForStepDown();
- CBHStatus cbh = _replExecutor.scheduleWorkWithGlobalExclusiveLock(
- stdx::bind(&ReplicationCoordinatorImpl::_stepDownFinish,
- this,
- stdx::placeholders::_1));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return true;
- }
- fassert(28672, cbh.getStatus());
- if (cbHandle) {
- *cbHandle = cbh.getValue();
- }
+ fassert(28672, cbh.getStatus());
+ if (cbHandle) {
+ *cbHandle = cbh.getValue();
}
- return updated;
}
+ return updated;
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 8fcd9671dae..7183145abcd 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -53,991 +53,973 @@
namespace mongo {
- class Timer;
- template <typename T> class StatusWith;
+class Timer;
+template <typename T>
+class StatusWith;
namespace repl {
- class ElectCmdRunner;
- class ElectionWinnerDeclarer;
- class FreshnessChecker;
- class HandshakeArgs;
- class HeartbeatResponseAction;
- class LastVote;
- class OplogReader;
- class ReplSetDeclareElectionWinnerArgs;
- class ReplSetRequestVotesArgs;
- class ReplicaSetConfig;
- class SyncSourceFeedback;
- class TopologyCoordinator;
- class VoteRequester;
+class ElectCmdRunner;
+class ElectionWinnerDeclarer;
+class FreshnessChecker;
+class HandshakeArgs;
+class HeartbeatResponseAction;
+class LastVote;
+class OplogReader;
+class ReplSetDeclareElectionWinnerArgs;
+class ReplSetRequestVotesArgs;
+class ReplicaSetConfig;
+class SyncSourceFeedback;
+class TopologyCoordinator;
+class VoteRequester;
- class ReplicationCoordinatorImpl : public ReplicationCoordinator,
- public KillOpListenerInterface {
- MONGO_DISALLOW_COPYING(ReplicationCoordinatorImpl);
+class ReplicationCoordinatorImpl : public ReplicationCoordinator, public KillOpListenerInterface {
+ MONGO_DISALLOW_COPYING(ReplicationCoordinatorImpl);
- public:
+public:
+ // Takes ownership of the "externalState", "topCoord" and "network" objects.
+ ReplicationCoordinatorImpl(const ReplSettings& settings,
+ ReplicationCoordinatorExternalState* externalState,
+ executor::NetworkInterface* network,
+ StorageInterface* storage,
+ TopologyCoordinator* topoCoord,
+ int64_t prngSeed);
+ // Takes ownership of the "externalState" and "topCoord" objects.
+ ReplicationCoordinatorImpl(const ReplSettings& settings,
+ ReplicationCoordinatorExternalState* externalState,
+ TopologyCoordinator* topoCoord,
+ ReplicationExecutor* replExec,
+ int64_t prngSeed);
+ virtual ~ReplicationCoordinatorImpl();
- // Takes ownership of the "externalState", "topCoord" and "network" objects.
- ReplicationCoordinatorImpl(const ReplSettings& settings,
- ReplicationCoordinatorExternalState* externalState,
- executor::NetworkInterface* network,
- StorageInterface* storage,
- TopologyCoordinator* topoCoord,
- int64_t prngSeed);
- // Takes ownership of the "externalState" and "topCoord" objects.
- ReplicationCoordinatorImpl(const ReplSettings& settings,
- ReplicationCoordinatorExternalState* externalState,
- TopologyCoordinator* topoCoord,
- ReplicationExecutor* replExec,
- int64_t prngSeed);
- virtual ~ReplicationCoordinatorImpl();
+ // ================== Members of public ReplicationCoordinator API ===================
- // ================== Members of public ReplicationCoordinator API ===================
+ virtual void startReplication(OperationContext* txn) override;
- virtual void startReplication(OperationContext* txn) override;
+ virtual void shutdown() override;
- virtual void shutdown() override;
+ virtual const ReplSettings& getSettings() const override;
- virtual const ReplSettings& getSettings() const override;
+ virtual Mode getReplicationMode() const override;
- virtual Mode getReplicationMode() const override;
+ virtual MemberState getMemberState() const override;
- virtual MemberState getMemberState() const override;
+ virtual bool isInPrimaryOrSecondaryState() const override;
- virtual bool isInPrimaryOrSecondaryState() const override;
+ virtual Seconds getSlaveDelaySecs() const override;
- virtual Seconds getSlaveDelaySecs() const override;
+ virtual void clearSyncSourceBlacklist() override;
- virtual void clearSyncSourceBlacklist() override;
+ /*
+ * Implementation of the KillOpListenerInterface interrupt method so that we can wake up
+ * threads blocked in awaitReplication() when a killOp command comes in.
+ */
+ virtual void interrupt(unsigned opId);
- /*
- * Implementation of the KillOpListenerInterface interrupt method so that we can wake up
- * threads blocked in awaitReplication() when a killOp command comes in.
- */
- virtual void interrupt(unsigned opId);
+ /*
+ * Implementation of the KillOpListenerInterface interruptAll method so that we can wake up
+ * threads blocked in awaitReplication() when we kill all operations.
+ */
+ virtual void interruptAll();
- /*
- * Implementation of the KillOpListenerInterface interruptAll method so that we can wake up
- * threads blocked in awaitReplication() when we kill all operations.
- */
- virtual void interruptAll();
+ virtual ReplicationCoordinator::StatusAndDuration awaitReplication(
+ OperationContext* txn, const OpTime& opTime, const WriteConcernOptions& writeConcern);
- virtual ReplicationCoordinator::StatusAndDuration awaitReplication(
- OperationContext* txn,
- const OpTime& opTime,
- const WriteConcernOptions& writeConcern);
+ virtual ReplicationCoordinator::StatusAndDuration awaitReplicationOfLastOpForClient(
+ OperationContext* txn, const WriteConcernOptions& writeConcern);
- virtual ReplicationCoordinator::StatusAndDuration awaitReplicationOfLastOpForClient(
- OperationContext* txn,
- const WriteConcernOptions& writeConcern);
+ virtual Status stepDown(OperationContext* txn,
+ bool force,
+ const Milliseconds& waitTime,
+ const Milliseconds& stepdownTime);
- virtual Status stepDown(OperationContext* txn,
- bool force,
- const Milliseconds& waitTime,
- const Milliseconds& stepdownTime);
+ virtual bool isMasterForReportingPurposes();
- virtual bool isMasterForReportingPurposes();
+ virtual bool canAcceptWritesForDatabase(StringData dbName);
- virtual bool canAcceptWritesForDatabase(StringData dbName);
+ bool canAcceptWritesFor(const NamespaceString& ns) override;
- bool canAcceptWritesFor(const NamespaceString& ns) override;
+ virtual Status checkIfWriteConcernCanBeSatisfied(const WriteConcernOptions& writeConcern) const;
- virtual Status checkIfWriteConcernCanBeSatisfied(
- const WriteConcernOptions& writeConcern) const;
+ virtual Status checkCanServeReadsFor(OperationContext* txn,
+ const NamespaceString& ns,
+ bool slaveOk);
- virtual Status checkCanServeReadsFor(OperationContext* txn,
- const NamespaceString& ns,
- bool slaveOk);
+ virtual bool shouldIgnoreUniqueIndex(const IndexDescriptor* idx);
- virtual bool shouldIgnoreUniqueIndex(const IndexDescriptor* idx);
+ virtual Status setLastOptimeForSlave(const OID& rid, const Timestamp& ts);
- virtual Status setLastOptimeForSlave(const OID& rid, const Timestamp& ts);
+ virtual void setMyLastOptime(const OpTime& opTime);
- virtual void setMyLastOptime(const OpTime& opTime);
+ virtual void resetMyLastOptime();
- virtual void resetMyLastOptime();
+ virtual void setMyHeartbeatMessage(const std::string& msg);
- virtual void setMyHeartbeatMessage(const std::string& msg);
+ virtual OpTime getMyLastOptime() const override;
- virtual OpTime getMyLastOptime() const override;
+ virtual ReadAfterOpTimeResponse waitUntilOpTime(OperationContext* txn,
+ const ReadAfterOpTimeArgs& settings) override;
- virtual ReadAfterOpTimeResponse waitUntilOpTime(
- OperationContext* txn,
- const ReadAfterOpTimeArgs& settings) override;
+ virtual OID getElectionId() override;
- virtual OID getElectionId() override;
+ virtual OID getMyRID() const override;
- virtual OID getMyRID() const override;
+ virtual int getMyId() const override;
- virtual int getMyId() const override;
+ virtual bool setFollowerMode(const MemberState& newState) override;
- virtual bool setFollowerMode(const MemberState& newState) override;
+ virtual bool isWaitingForApplierToDrain() override;
- virtual bool isWaitingForApplierToDrain() override;
+ virtual void signalDrainComplete(OperationContext* txn) override;
- virtual void signalDrainComplete(OperationContext* txn) override;
+ virtual void signalUpstreamUpdater() override;
- virtual void signalUpstreamUpdater() override;
+ virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) override;
- virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) override;
+ virtual Status processReplSetGetStatus(BSONObjBuilder* result) override;
- virtual Status processReplSetGetStatus(BSONObjBuilder* result) override;
+ virtual void fillIsMasterForReplSet(IsMasterResponse* result) override;
- virtual void fillIsMasterForReplSet(IsMasterResponse* result) override;
+ virtual void appendSlaveInfoData(BSONObjBuilder* result) override;
- virtual void appendSlaveInfoData(BSONObjBuilder* result) override;
+ virtual ReplicaSetConfig getConfig() const override;
- virtual ReplicaSetConfig getConfig() const override;
+ virtual void processReplSetGetConfig(BSONObjBuilder* result) override;
- virtual void processReplSetGetConfig(BSONObjBuilder* result) override;
+ virtual Status setMaintenanceMode(bool activate) override;
- virtual Status setMaintenanceMode(bool activate) override;
+ virtual bool getMaintenanceMode() override;
- virtual bool getMaintenanceMode() override;
+ virtual Status processReplSetSyncFrom(const HostAndPort& target,
+ BSONObjBuilder* resultObj) override;
- virtual Status processReplSetSyncFrom(const HostAndPort& target,
- BSONObjBuilder* resultObj) override;
+ virtual Status processReplSetFreeze(int secs, BSONObjBuilder* resultObj) override;
- virtual Status processReplSetFreeze(int secs, BSONObjBuilder* resultObj) override;
+ virtual Status processHeartbeat(const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response) override;
- virtual Status processHeartbeat(const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response) override;
+ virtual Status processReplSetReconfig(OperationContext* txn,
+ const ReplSetReconfigArgs& args,
+ BSONObjBuilder* resultObj) override;
- virtual Status processReplSetReconfig(OperationContext* txn,
- const ReplSetReconfigArgs& args,
- BSONObjBuilder* resultObj) override;
+ virtual Status processReplSetInitiate(OperationContext* txn,
+ const BSONObj& configObj,
+ BSONObjBuilder* resultObj) override;
- virtual Status processReplSetInitiate(OperationContext* txn,
- const BSONObj& configObj,
- BSONObjBuilder* resultObj) override;
+ virtual Status processReplSetGetRBID(BSONObjBuilder* resultObj) override;
- virtual Status processReplSetGetRBID(BSONObjBuilder* resultObj) override;
+ virtual void incrementRollbackID() override;
- virtual void incrementRollbackID() override;
+ virtual Status processReplSetFresh(const ReplSetFreshArgs& args,
+ BSONObjBuilder* resultObj) override;
- virtual Status processReplSetFresh(const ReplSetFreshArgs& args,
- BSONObjBuilder* resultObj) override;
+ virtual Status processReplSetElect(const ReplSetElectArgs& args,
+ BSONObjBuilder* response) override;
- virtual Status processReplSetElect(const ReplSetElectArgs& args,
- BSONObjBuilder* response) override;
+ virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates,
+ long long* configVersion) override;
- virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates,
- long long* configVersion) override;
+ virtual Status processHandshake(OperationContext* txn, const HandshakeArgs& handshake) override;
- virtual Status processHandshake(OperationContext* txn,
- const HandshakeArgs& handshake) override;
+ virtual bool buildsIndexes() override;
- virtual bool buildsIndexes() override;
+ virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op) override;
- virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op) override;
+ virtual std::vector<HostAndPort> getOtherNodesInReplSet() const override;
- virtual std::vector<HostAndPort> getOtherNodesInReplSet() const override;
+ virtual WriteConcernOptions getGetLastErrorDefault() override;
- virtual WriteConcernOptions getGetLastErrorDefault() override;
+ virtual Status checkReplEnabledForCommand(BSONObjBuilder* result) override;
- virtual Status checkReplEnabledForCommand(BSONObjBuilder* result) override;
+ virtual bool isReplEnabled() const override;
- virtual bool isReplEnabled() const override;
+ virtual HostAndPort chooseNewSyncSource() override;
- virtual HostAndPort chooseNewSyncSource() override;
+ virtual void blacklistSyncSource(const HostAndPort& host, Date_t until) override;
- virtual void blacklistSyncSource(const HostAndPort& host, Date_t until) override;
+ virtual void resetLastOpTimeFromOplog(OperationContext* txn) override;
- virtual void resetLastOpTimeFromOplog(OperationContext* txn) override;
+ virtual bool shouldChangeSyncSource(const HostAndPort& currentSource) override;
- virtual bool shouldChangeSyncSource(const HostAndPort& currentSource) override;
+ virtual OpTime getLastCommittedOpTime() const override;
- virtual OpTime getLastCommittedOpTime() const override;
+ virtual Status processReplSetRequestVotes(OperationContext* txn,
+ const ReplSetRequestVotesArgs& args,
+ ReplSetRequestVotesResponse* response) override;
- virtual Status processReplSetRequestVotes(OperationContext* txn,
- const ReplSetRequestVotesArgs& args,
- ReplSetRequestVotesResponse* response) override;
+ virtual Status processReplSetDeclareElectionWinner(const ReplSetDeclareElectionWinnerArgs& args,
+ long long* responseTerm) override;
- virtual Status processReplSetDeclareElectionWinner(
- const ReplSetDeclareElectionWinnerArgs& args,
- long long* responseTerm) override;
+ virtual void prepareCursorResponseInfo(BSONObjBuilder* objBuilder);
- virtual void prepareCursorResponseInfo(BSONObjBuilder* objBuilder);
+ virtual Status processHeartbeatV1(const ReplSetHeartbeatArgsV1& args,
+ ReplSetHeartbeatResponse* response) override;
- virtual Status processHeartbeatV1(const ReplSetHeartbeatArgsV1& args,
- ReplSetHeartbeatResponse* response) override;
+ virtual bool isV1ElectionProtocol() override;
- virtual bool isV1ElectionProtocol() override;
+ virtual void summarizeAsHtml(ReplSetHtmlSummary* s) override;
- virtual void summarizeAsHtml(ReplSetHtmlSummary* s) override;
+ /**
+ * Get current term from topology coordinator
+ */
+ virtual long long getTerm() override;
- /**
- * Get current term from topology coordinator
- */
- virtual long long getTerm() override;
+ virtual bool updateTerm(long long term) override;
- virtual bool updateTerm(long long term) override;
+ // ================== Test support API ===================
- // ================== Test support API ===================
+ /**
+ * If called after startReplication(), blocks until all asynchronous
+ * activities associated with replication start-up complete.
+ */
+ void waitForStartUpComplete();
- /**
- * If called after startReplication(), blocks until all asynchronous
- * activities associated with replication start-up complete.
- */
- void waitForStartUpComplete();
+ /**
+ * Gets the replica set configuration in use by the node.
+ */
+ ReplicaSetConfig getReplicaSetConfig_forTest();
- /**
- * Gets the replica set configuration in use by the node.
- */
- ReplicaSetConfig getReplicaSetConfig_forTest();
+ /**
+ * Simple wrapper around _setLastOptime_inlock to make it easier to test.
+ */
+ Status setLastOptime_forTest(long long cfgVer, long long memberId, const OpTime& opTime);
- /**
- * Simple wrapper around _setLastOptime_inlock to make it easier to test.
- */
- Status setLastOptime_forTest(long long cfgVer, long long memberId, const OpTime& opTime);
-
- bool updateTerm_forTest(long long term);
-
- private:
- ReplicationCoordinatorImpl(const ReplSettings& settings,
- ReplicationCoordinatorExternalState* externalState,
- TopologyCoordinator* topCoord,
- int64_t prngSeed,
- executor::NetworkInterface* network,
- StorageInterface* storage,
- ReplicationExecutor* replExec);
- /**
- * Configuration states for a replica set node.
- *
- * Transition diagram:
- *
- * PreStart ------------------> ReplicationDisabled
- * |
- * |
- * v
- * StartingUp -------> Uninitialized <------> Initiating
- * \ ^ |
- * ------- | |
- * | | |
- * v v |
- * Reconfig <---> Steady <----> HBReconfig |
- * ^ /
- * | /
- * \ /
- * -----------------------
- */
- enum ConfigState {
- kConfigPreStart,
- kConfigStartingUp,
- kConfigReplicationDisabled,
- kConfigUninitialized,
- kConfigSteady,
- kConfigInitiating,
- kConfigReconfiguring,
- kConfigHBReconfiguring
- };
-
- /**
- * Type describing actions to take after a change to the MemberState _memberState.
- */
- enum PostMemberStateUpdateAction {
- kActionNone,
- kActionCloseAllConnections, // Also indicates that we should clear sharding state.
- kActionFollowerModeStateChange,
- kActionWinElection
- };
-
- // Struct that holds information about clients waiting for replication.
- struct WaiterInfo;
-
- // Struct that holds information about nodes in this replication group, mainly used for
- // tracking replication progress for write concern satisfaction.
- struct SlaveInfo {
- OpTime opTime; // Our last known OpTime that this slave has replicated to.
- HostAndPort hostAndPort; // Client address of the slave.
- int memberId; // Id of the node in the replica set config, or -1 if we're not a replSet.
- OID rid; // RID of the node.
- bool self; // Whether this SlaveInfo stores the information about ourself
- SlaveInfo() : memberId(-1), self(false) {}
- };
-
- typedef std::vector<SlaveInfo> SlaveInfoVector;
-
- typedef std::vector<ReplicationExecutor::CallbackHandle> HeartbeatHandles;
-
- /**
- * Looks up the SlaveInfo in _slaveInfo associated with the given RID and returns a pointer
- * to it, or returns NULL if there is no SlaveInfo with the given RID.
- */
- SlaveInfo* _findSlaveInfoByRID_inlock(const OID& rid);
-
- /**
- * Looks up the SlaveInfo in _slaveInfo associated with the given member ID and returns a
- * pointer to it, or returns NULL if there is no SlaveInfo with the given member ID.
- */
- SlaveInfo* _findSlaveInfoByMemberID_inlock(int memberID);
-
- /**
- * Adds the given SlaveInfo to _slaveInfo and wakes up any threads waiting for replication
- * that now have their write concern satisfied. Only valid to call in master/slave setups.
- */
- void _addSlaveInfo_inlock(const SlaveInfo& slaveInfo);
-
- /**
- * Updates the item in _slaveInfo pointed to by 'slaveInfo' with the given OpTime 'opTime'
- * and wakes up any threads waiting for replication that now have their write concern
- * satisfied.
- */
- void _updateSlaveInfoOptime_inlock(SlaveInfo* slaveInfo, const OpTime& opTime);
-
- /**
- * Returns the index into _slaveInfo where data corresponding to ourself is stored.
- * For more info on the rules about how we know where our entry is, see the comment for
- * _slaveInfo.
- */
- size_t _getMyIndexInSlaveInfo_inlock() const;
-
- /**
- * Helper method that removes entries from _slaveInfo if they correspond to a node
- * with a member ID that is not in the current replica set config. Will always leave an
- * entry for ourself at the beginning of _slaveInfo, even if we aren't present in the
- * config.
- */
- void _updateSlaveInfoFromConfig_inlock();
-
- /**
- * Helper to update our saved config, cancel any pending heartbeats, and kick off sending
- * new heartbeats based on the new config. Must *only* be called from within the
- * ReplicationExecutor context.
- *
- * Returns an action to be performed after unlocking _mutex, via
- * _performPostMemberStateUpdateAction.
- */
- PostMemberStateUpdateAction _setCurrentRSConfig_inlock(
- const ReplicaSetConfig& newConfig,
- int myIndex);
-
- /**
- * Helper to wake waiters in _replicationWaiterList that are doneWaitingForReplication.
- */
- void _wakeReadyWaiters_inlock();
-
- /**
- * Helper method for setting/unsetting maintenance mode. Scheduled by setMaintenanceMode()
- * to run in a global write lock in the replication executor thread.
- */
- void _setMaintenanceMode_helper(const ReplicationExecutor::CallbackArgs& cbData,
- bool activate,
- Status* result);
-
- /**
- * Helper method for retrieving maintenance mode. Scheduled by getMaintenanceMode() to run
- * in the replication executor thread.
- */
- void _getMaintenanceMode_helper(const ReplicationExecutor::CallbackArgs& cbData,
- bool* maintenanceMode);
-
- /**
- * Bottom half of fillIsMasterForReplSet.
- */
- void _fillIsMasterForReplSet_finish(const ReplicationExecutor::CallbackArgs& cbData,
- IsMasterResponse* result);
-
- /**
- * Bottom half of processReplSetFresh.
- */
- void _processReplSetFresh_finish(const ReplicationExecutor::CallbackArgs& cbData,
- const ReplSetFreshArgs& args,
- BSONObjBuilder* response,
- Status* result);
-
- /**
- * Bottom half of processReplSetElect.
- */
- void _processReplSetElect_finish(const ReplicationExecutor::CallbackArgs& cbData,
- const ReplSetElectArgs& args,
- BSONObjBuilder* response,
- Status* result);
-
- /**
- * Bottom half of processReplSetFreeze.
- */
- void _processReplSetFreeze_finish(const ReplicationExecutor::CallbackArgs& cbData,
- int secs,
- BSONObjBuilder* response,
- Status* result);
- /*
- * Bottom half of clearSyncSourceBlacklist
- */
- void _clearSyncSourceBlacklist_finish(const ReplicationExecutor::CallbackArgs& cbData);
-
- /**
- * Bottom half of processReplSetDeclareElectionWinner.
- */
- void _processReplSetDeclareElectionWinner_finish(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplSetDeclareElectionWinnerArgs& args,
- long long* responseTerm,
- Status* result);
-
- /**
- * Bottom half of processReplSetRequestVotes.
- */
- void _processReplSetRequestVotes_finish(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplSetRequestVotesArgs& args,
- ReplSetRequestVotesResponse* response,
- Status* result);
-
- /**
- * Scheduled to cause the ReplicationCoordinator to reconsider any state that might
- * need to change as a result of time passing - for instance becoming PRIMARY when a single
- * node replica set member's stepDown period ends.
- */
- void _handleTimePassing(const ReplicationExecutor::CallbackArgs& cbData);
-
- /**
- * Helper method for _awaitReplication that takes an already locked unique_lock and a
- * Timer for timing the operation which has been counting since before the lock was
- * acquired.
- */
- ReplicationCoordinator::StatusAndDuration _awaitReplication_inlock(
- const Timer* timer,
- stdx::unique_lock<stdx::mutex>* lock,
- OperationContext* txn,
- const OpTime& opTime,
- const WriteConcernOptions& writeConcern);
-
- /*
- * Returns true if the given writeConcern is satisfied up to "optime" or is unsatisfiable.
- */
- bool _doneWaitingForReplication_inlock(const OpTime& opTime,
- const WriteConcernOptions& writeConcern);
-
- /**
- * Helper for _doneWaitingForReplication_inlock that takes an integer write concern.
- */
- bool _haveNumNodesReachedOpTime_inlock(const OpTime& opTime, int numNodes);
-
- /**
- * Helper for _doneWaitingForReplication_inlock that takes a tag pattern representing a
- * named write concern mode.
- */
- bool _haveTaggedNodesReachedOpTime_inlock(const OpTime& opTime,
- const ReplicaSetTagPattern& tagPattern);
-
- Status _checkIfWriteConcernCanBeSatisfied_inlock(
- const WriteConcernOptions& writeConcern) const;
-
- /**
- * Triggers all callbacks that are blocked waiting for new heartbeat data
- * to decide whether or not to finish a step down.
- * Should only be called from executor callbacks.
- */
- void _signalStepDownWaitersFromCallback(const ReplicationExecutor::CallbackArgs& cbData);
- void _signalStepDownWaiters();
-
- /**
- * Helper for stepDown run within a ReplicationExecutor callback. This method assumes
- * it is running within a global shared lock, and thus that no writes are going on at the
- * same time.
- */
- void _stepDownContinue(const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicationExecutor::EventHandle finishedEvent,
- OperationContext* txn,
- Date_t waitUntil,
- Date_t stepdownUntil,
- bool force,
- Status* result);
-
- OID _getMyRID_inlock() const;
-
- int _getMyId_inlock() const;
-
- OpTime _getMyLastOptime_inlock() const;
-
- /**
- * Bottom half of setFollowerMode.
- *
- * May reschedule itself after the current election, so it is not sufficient to
- * wait for a callback scheduled to execute this method to complete. Instead,
- * supply an event, "finishedSettingFollowerMode", and wait for that event to
- * be signaled. Do not observe "*success" until after the event is signaled.
- */
- void _setFollowerModeFinish(
- const ReplicationExecutor::CallbackArgs& cbData,
- const MemberState& newState,
- const ReplicationExecutor::EventHandle& finishedSettingFollowerMode,
- bool* success);
-
- /**
- * Helper method for updating our tracking of the last optime applied by a given node.
- * This is only valid to call on replica sets.
- * "configVersion" will be populated with our config version if it and the configVersion
- * of "args" differ.
- */
- Status _setLastOptime_inlock(const UpdatePositionArgs::UpdateInfo& args,
- long long* configVersion);
-
- /**
- * Helper method for setMyLastOptime that takes in a unique lock on
- * _mutex. The passed in lock must already be locked. It is unspecified what state the
- * lock will be in after this method finishes.
- *
- * This function has the same rules for "opTime" as setMyLastOptime(), unless
- * "isRollbackAllowed" is true.
- */
- void _setMyLastOptime_inlock(stdx::unique_lock<stdx::mutex>* lock,
- const OpTime& opTime,
- bool isRollbackAllowed);
-
- /**
- * Schedules a heartbeat to be sent to "target" at "when". "targetIndex" is the index
- * into the replica set config members array that corresponds to the "target", or -1 if
- * "target" is not in _rsConfig.
- */
- void _scheduleHeartbeatToTarget(const HostAndPort& target, int targetIndex, Date_t when);
-
- /**
- * Processes each heartbeat response.
- *
- * Schedules additional heartbeats, triggers elections and step downs, etc.
- */
- void _handleHeartbeatResponse(const ReplicationExecutor::RemoteCommandCallbackArgs& cbData,
- int targetIndex);
-
- void _handleHeartbeatResponseV1(
- const ReplicationExecutor::RemoteCommandCallbackArgs& cbData,
- int targetIndex);
-
- void _trackHeartbeatHandle(const StatusWith<ReplicationExecutor::CallbackHandle>& handle);
-
- void _untrackHeartbeatHandle(const ReplicationExecutor::CallbackHandle& handle);
-
- /**
- * Helper for _handleHeartbeatResponse.
- *
- * Updates the optime associated with the member at "memberIndex" in our config.
- */
- void _updateOpTimeFromHeartbeat_inlock(int memberIndex, const OpTime& optime);
-
- /**
- * Starts a heartbeat for each member in the current config. Called within the executor
- * context.
- */
- void _startHeartbeats();
-
- /**
- * Cancels all heartbeats. Called within executor context.
- */
- void _cancelHeartbeats();
-
- /**
- * Asynchronously sends a heartbeat to "target". "targetIndex" is the index
- * into the replica set config members array that corresponds to the "target", or -1 if
- * we don't have a valid replica set config.
- *
- * Scheduled by _scheduleHeartbeatToTarget.
- */
- void _doMemberHeartbeat(ReplicationExecutor::CallbackArgs cbData,
- const HostAndPort& target,
- int targetIndex);
-
-
- MemberState _getMemberState_inlock() const;
-
- /**
- * Callback that gives the TopologyCoordinator an initial LastVote document from
- * local storage.
- *
- * Called only during replication startup. All other updates come from the
- * TopologyCoordinator itself.
- */
- void _updateLastVote(const LastVote& lastVote);
-
- /**
- * Starts loading the replication configuration from local storage, and if it is valid,
- * schedules a callback (of _finishLoadLocalConfig) to set it as the current replica set
- * config (sets _rsConfig and _thisMembersConfigIndex).
- * Returns true if it finishes loading the local config, which most likely means there
- * was no local config at all or it was invalid in some way, and false if there was a valid
- * config detected but more work is needed to set it as the local config (which will be
- * handled by the callback to _finishLoadLocalConfig).
- */
- bool _startLoadLocalConfig(OperationContext* txn);
-
- /**
- * Callback that finishes the work started in _startLoadLocalConfig and sets _rsConfigState
- * to kConfigSteady, so that we can begin processing heartbeats and reconfigs.
- */
- void _finishLoadLocalConfig(const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicaSetConfig& localConfig,
- const StatusWith<OpTime>& lastOpTimeStatus);
-
- /**
- * Callback that finishes the work of processReplSetInitiate() inside the replication
- * executor context, in the event of a successful quorum check.
- */
- void _finishReplSetInitiate(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicaSetConfig& newConfig,
- int myIndex);
-
- /**
- * Callback that finishes the work of processReplSetReconfig inside the replication
- * executor context, in the event of a successful quorum check.
- */
- void _finishReplSetReconfig(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicaSetConfig& newConfig,
- int myIndex);
-
- /**
- * Changes _rsConfigState to newState, and notify any waiters.
- */
- void _setConfigState_inlock(ConfigState newState);
-
- /**
- * Updates the cached value, _memberState, to match _topCoord's reported
- * member state, from getMemberState().
- *
- * Returns an enum indicating what action to take after releasing _mutex, if any.
- * Call performPostMemberStateUpdateAction on the return value after releasing
- * _mutex.
- */
- PostMemberStateUpdateAction _updateMemberStateFromTopologyCoordinator_inlock();
-
- /**
- * Performs a post member-state update action. Do not call while holding _mutex.
- */
- void _performPostMemberStateUpdateAction(PostMemberStateUpdateAction action);
-
- /**
- * Begins an attempt to elect this node.
- * Called after an incoming heartbeat changes this node's view of the set such that it
- * believes it can be elected PRIMARY.
- * For proper concurrency, must be called via a ReplicationExecutor callback.
- *
- * For old style elections the election path is:
- * _startElectSelf()
- * _onFreshnessCheckComplete()
- * _onElectCmdRunnerComplete()
- * For V1 (raft) style elections the election path is:
- * _startElectSelfV1()
- * _onDryRunComplete()
- * _onVoteRequestComplete()
- * _onElectionWinnerDeclarerComplete()
- */
- void _startElectSelf();
- void _startElectSelfV1();
-
- /**
- * Callback called when the FreshnessChecker has completed; checks the results and
- * decides whether to continue election proceedings.
- **/
- void _onFreshnessCheckComplete();
-
- /**
- * Callback called when the ElectCmdRunner has completed; checks the results and
- * decides whether to complete the election and change state to primary.
- **/
- void _onElectCmdRunnerComplete();
-
- /**
- * Callback called when the dryRun VoteRequester has completed; checks the results and
- * decides whether to conduct a proper election.
- * "originalTerm" was the term during which the dry run began, if the term has since
- * changed, do not run for election.
- */
- void _onDryRunComplete(long long originalTerm);
-
- /**
- * Callback called when the VoteRequester has completed; checks the results and
- * decides whether to change state to primary and alert other nodes of our primary-ness.
- * "originalTerm" was the term during which the election began, if the term has since
- * changed, do not step up as primary.
- */
- void _onVoteRequestComplete(long long originalTerm);
-
- /**
- * Callback called when the ElectWinnerDeclarer has completed; checks the results and
- * if we received any negative responses, relinquish primary.
- */
- void _onElectionWinnerDeclarerComplete();
-
- /**
- * Callback called after a random delay, to prevent repeated election ties.
- */
- void _recoverFromElectionTie(const ReplicationExecutor::CallbackArgs& cbData);
-
- /**
- * Chooses a new sync source. Must be scheduled as a callback.
- *
- * Calls into the Topology Coordinator, which uses its current view of the set to choose
- * the most appropriate sync source.
- */
- void _chooseNewSyncSource(const ReplicationExecutor::CallbackArgs& cbData,
- HostAndPort* newSyncSource);
-
- /**
- * Adds 'host' to the sync source blacklist until 'until'. A blacklisted source cannot
- * be chosen as a sync source. Schedules a callback to unblacklist the sync source to be
- * run at 'until'.
- *
- * Must be scheduled as a callback.
- */
- void _blacklistSyncSource(const ReplicationExecutor::CallbackArgs& cbData,
- const HostAndPort& host,
- Date_t until);
-
- /**
- * Removes 'host' from the sync source blacklist. If 'host' isn't found, it's simply
- * ignored and no error is thrown.
- *
- * Must be scheduled as a callback.
- */
- void _unblacklistSyncSource(const ReplicationExecutor::CallbackArgs& cbData,
- const HostAndPort& host);
-
- /**
- * Determines if a new sync source should be considered.
- *
- * Must be scheduled as a callback.
- */
- void _shouldChangeSyncSource(const ReplicationExecutor::CallbackArgs& cbData,
- const HostAndPort& currentSource,
- bool* shouldChange);
-
- /**
- * Schedules a request that the given host step down; logs any errors.
- */
- void _requestRemotePrimaryStepdown(const HostAndPort& target);
-
- void _heartbeatStepDownStart();
-
- /**
- * Completes a step-down of the current node. Must be run with a global
- * shared or global exclusive lock.
- */
- void _stepDownFinish(const ReplicationExecutor::CallbackArgs& cbData);
-
- /**
- * Schedules a replica set config change.
- */
- void _scheduleHeartbeatReconfig(const ReplicaSetConfig& newConfig);
-
- /**
- * Callback that continues a heartbeat-initiated reconfig after a running election
- * completes.
- */
- void _heartbeatReconfigAfterElectionCanceled(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicaSetConfig& newConfig);
-
- /**
- * Method to write a configuration transmitted via heartbeat message to stable storage.
- */
- void _heartbeatReconfigStore(const ReplicationExecutor::CallbackArgs& cbd,
- const ReplicaSetConfig& newConfig);
-
- /**
- * Conclusion actions of a heartbeat-triggered reconfiguration.
- */
- void _heartbeatReconfigFinish(const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicaSetConfig& newConfig,
- StatusWith<int> myIndex);
-
- /**
- * Utility method that schedules or performs actions specified by a HeartbeatResponseAction
- * returned by a TopologyCoordinator::processHeartbeatResponse(V1) call with the given
- * value of "responseStatus".
- */
- void _handleHeartbeatResponseAction(
- const HeartbeatResponseAction& action,
- const StatusWith<ReplSetHeartbeatResponse>& responseStatus);
-
- /**
- * Bottom half of processHeartbeat(), which runs in the replication executor.
- */
- void _processHeartbeatFinish(const ReplicationExecutor::CallbackArgs& cbData,
- const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response,
- Status* outStatus);
-
- /**
- * Bottom half of processHeartbeatV1(), which runs in the replication executor.
- */
- void _processHeartbeatFinishV1(const ReplicationExecutor::CallbackArgs& cbData,
- const ReplSetHeartbeatArgsV1& args,
- ReplSetHeartbeatResponse* response,
- Status* outStatus);
- /**
- * Scan the SlaveInfoVector and determine the highest OplogEntry present on a majority of
- * servers; set _lastCommittedOpTime to this new entry, if greater than the current entry.
- */
- void _updateLastCommittedOpTime_inlock();
-
- void _summarizeAsHtml_finish(const ReplicationExecutor::CallbackArgs& cbData,
- ReplSetHtmlSummary* output);
-
- /**
- * Callback that gets the current term from topology coordinator.
- */
- void _getTerm_helper(const ReplicationExecutor::CallbackArgs& cbData, long long* term);
-
-
- /**
- * Callback that attempts to set the current term in topology coordinator and
- * relinquishes primary if the term actually changes and we are primary.
- */
- void _updateTerm_helper(const ReplicationExecutor::CallbackArgs& cbData,
- long long term,
- bool* updated,
- Handle* cbHandle);
- bool _updateTerm_incallback(long long term, Handle* cbHandle);
-
- //
- // All member variables are labeled with one of the following codes indicating the
- // synchronization rules for accessing them.
- //
- // (R) Read-only in concurrent operation; no synchronization required.
- // (S) Self-synchronizing; access in any way from any context.
- // (PS) Pointer is read-only in concurrent operation, item pointed to is self-synchronizing;
- // Access in any context.
- // (M) Reads and writes guarded by _mutex
- // (X) Reads and writes must be performed in a callback in _replExecutor
- // (MX) Must hold _mutex and be in a callback in _replExecutor to write; must either hold
- // _mutex or be in a callback in _replExecutor to read.
- // (GX) Readable under a global intent lock. Must either hold global lock in exclusive
- // mode (MODE_X) or both hold global lock in shared mode (MODE_S) and be in executor
- // context to write.
- // (I) Independently synchronized, see member variable comment.
-
- // Protects member data of this ReplicationCoordinator.
- mutable stdx::mutex _mutex; // (S)
-
- // Handles to actively queued heartbeats.
- HeartbeatHandles _heartbeatHandles; // (X)
-
- // When this node does not know itself to be a member of a config, it adds
- // every host that sends it a heartbeat request to this set, and also starts
- // sending heartbeat requests to that host. This set is cleared whenever
- // a node discovers that it is a member of a config.
- unordered_set<HostAndPort> _seedList; // (X)
-
- // Parsed command line arguments related to replication.
- const ReplSettings _settings; // (R)
-
- // Mode of replication specified by _settings.
- const Mode _replMode; // (R)
-
- // Pointer to the TopologyCoordinator owned by this ReplicationCoordinator.
- std::unique_ptr<TopologyCoordinator> _topCoord; // (X)
-
- // If the executer is owned then this will be set, but should not be used.
- // This is only used to clean up and destroy the replExec if owned
- std::unique_ptr<ReplicationExecutor> _replExecutorIfOwned; // (S)
- // Executor that drives the topology coordinator.
- ReplicationExecutor& _replExecutor; // (S)
-
- // Pointer to the ReplicationCoordinatorExternalState owned by this ReplicationCoordinator.
- std::unique_ptr<ReplicationCoordinatorExternalState> _externalState; // (PS)
-
- // Thread that drives actions in the topology coordinator
- // Set in startReplication() and thereafter accessed in shutdown.
- std::unique_ptr<stdx::thread> _topCoordDriverThread; // (I)
-
- // Our RID, used to identify us to our sync source when sending replication progress
- // updates upstream. Set once in startReplication() and then never modified again.
- OID _myRID; // (M)
-
- // Rollback ID. Used to check if a rollback happened during some interval of time
- // TODO: ideally this should only change on rollbacks NOT on mongod restarts also.
- int _rbid; // (M)
-
- // list of information about clients waiting on replication. Does *not* own the
- // WaiterInfos.
- std::vector<WaiterInfo*> _replicationWaiterList; // (M)
-
- // list of information about clients waiting for a particular opTime.
- // Does *not* own the WaiterInfos.
- std::vector<WaiterInfo*> _opTimeWaiterList; // (M)
-
- // Set to true when we are in the process of shutting down replication.
- bool _inShutdown; // (M)
-
- // Election ID of the last election that resulted in this node becoming primary.
- OID _electionId; // (M)
-
- // Vector containing known information about each member (such as replication
- // progress and member ID) in our replica set or each member replicating from
- // us in a master-slave deployment. In master/slave, the first entry is
- // guaranteed to correspond to ourself. In replica sets where we don't have a
- // valid config or are in state REMOVED then the vector will be a single element
- // just with info about ourself. In replica sets with a valid config the elements
- // will be in the same order as the members in the replica set config, thus
- // the entry for ourself will be at _thisMemberConfigIndex.
- SlaveInfoVector _slaveInfo; // (M)
-
- // Current ReplicaSet state.
- MemberState _memberState; // (MX)
-
- // True if we are waiting for the applier to finish draining.
- bool _isWaitingForDrainToComplete; // (M)
-
- // Used to signal threads waiting for changes to _rsConfigState.
- stdx::condition_variable _rsConfigStateChange; // (M)
-
- // Represents the configuration state of the coordinator, which controls how and when
- // _rsConfig may change. See the state transition diagram in the type definition of
- // ConfigState for details.
- ConfigState _rsConfigState; // (M)
-
- // The current ReplicaSet configuration object, including the information about tag groups
- // that is used to satisfy write concern requests with named gle modes.
- ReplicaSetConfig _rsConfig; // (MX)
-
- // This member's index position in the current config.
- int _selfIndex; // (MX)
-
- // Vector of events that should be signaled whenever new heartbeat data comes in.
- std::vector<ReplicationExecutor::EventHandle> _stepDownWaiters; // (X)
+ bool updateTerm_forTest(long long term);
- // State for conducting an election of this node.
- // the presence of a non-null _freshnessChecker pointer indicates that an election is
- // currently in progress. When using the V1 protocol, a non-null _voteRequester pointer
- // indicates this instead.
- // Only one election is allowed at a time.
- std::unique_ptr<FreshnessChecker> _freshnessChecker; // (X)
+private:
+ ReplicationCoordinatorImpl(const ReplSettings& settings,
+ ReplicationCoordinatorExternalState* externalState,
+ TopologyCoordinator* topCoord,
+ int64_t prngSeed,
+ executor::NetworkInterface* network,
+ StorageInterface* storage,
+ ReplicationExecutor* replExec);
+ /**
+ * Configuration states for a replica set node.
+ *
+ * Transition diagram:
+ *
+ * PreStart ------------------> ReplicationDisabled
+ * |
+ * |
+ * v
+ * StartingUp -------> Uninitialized <------> Initiating
+ * \ ^ |
+ * ------- | |
+ * | | |
+ * v v |
+ * Reconfig <---> Steady <----> HBReconfig |
+ * ^ /
+ * | /
+ * \ /
+ * -----------------------
+ */
+ enum ConfigState {
+ kConfigPreStart,
+ kConfigStartingUp,
+ kConfigReplicationDisabled,
+ kConfigUninitialized,
+ kConfigSteady,
+ kConfigInitiating,
+ kConfigReconfiguring,
+ kConfigHBReconfiguring
+ };
+
+ /**
+ * Type describing actions to take after a change to the MemberState _memberState.
+ */
+ enum PostMemberStateUpdateAction {
+ kActionNone,
+ kActionCloseAllConnections, // Also indicates that we should clear sharding state.
+ kActionFollowerModeStateChange,
+ kActionWinElection
+ };
- std::unique_ptr<ElectCmdRunner> _electCmdRunner; // (X)
+ // Struct that holds information about clients waiting for replication.
+ struct WaiterInfo;
+
+ // Struct that holds information about nodes in this replication group, mainly used for
+ // tracking replication progress for write concern satisfaction.
+ struct SlaveInfo {
+ OpTime opTime; // Our last known OpTime that this slave has replicated to.
+ HostAndPort hostAndPort; // Client address of the slave.
+ int memberId; // Id of the node in the replica set config, or -1 if we're not a replSet.
+ OID rid; // RID of the node.
+ bool self; // Whether this SlaveInfo stores the information about ourself
+ SlaveInfo() : memberId(-1), self(false) {}
+ };
- std::unique_ptr<VoteRequester> _voteRequester; // (X)
+ typedef std::vector<SlaveInfo> SlaveInfoVector;
+
+ typedef std::vector<ReplicationExecutor::CallbackHandle> HeartbeatHandles;
+
+ /**
+ * Looks up the SlaveInfo in _slaveInfo associated with the given RID and returns a pointer
+ * to it, or returns NULL if there is no SlaveInfo with the given RID.
+ */
+ SlaveInfo* _findSlaveInfoByRID_inlock(const OID& rid);
+
+ /**
+ * Looks up the SlaveInfo in _slaveInfo associated with the given member ID and returns a
+ * pointer to it, or returns NULL if there is no SlaveInfo with the given member ID.
+ */
+ SlaveInfo* _findSlaveInfoByMemberID_inlock(int memberID);
+
+ /**
+ * Adds the given SlaveInfo to _slaveInfo and wakes up any threads waiting for replication
+ * that now have their write concern satisfied. Only valid to call in master/slave setups.
+ */
+ void _addSlaveInfo_inlock(const SlaveInfo& slaveInfo);
+
+ /**
+ * Updates the item in _slaveInfo pointed to by 'slaveInfo' with the given OpTime 'opTime'
+ * and wakes up any threads waiting for replication that now have their write concern
+ * satisfied.
+ */
+ void _updateSlaveInfoOptime_inlock(SlaveInfo* slaveInfo, const OpTime& opTime);
+
+ /**
+ * Returns the index into _slaveInfo where data corresponding to ourself is stored.
+ * For more info on the rules about how we know where our entry is, see the comment for
+ * _slaveInfo.
+ */
+ size_t _getMyIndexInSlaveInfo_inlock() const;
+
+ /**
+ * Helper method that removes entries from _slaveInfo if they correspond to a node
+ * with a member ID that is not in the current replica set config. Will always leave an
+ * entry for ourself at the beginning of _slaveInfo, even if we aren't present in the
+ * config.
+ */
+ void _updateSlaveInfoFromConfig_inlock();
+
+ /**
+ * Helper to update our saved config, cancel any pending heartbeats, and kick off sending
+ * new heartbeats based on the new config. Must *only* be called from within the
+ * ReplicationExecutor context.
+ *
+ * Returns an action to be performed after unlocking _mutex, via
+ * _performPostMemberStateUpdateAction.
+ */
+ PostMemberStateUpdateAction _setCurrentRSConfig_inlock(const ReplicaSetConfig& newConfig,
+ int myIndex);
+
+ /**
+ * Helper to wake waiters in _replicationWaiterList that are doneWaitingForReplication.
+ */
+ void _wakeReadyWaiters_inlock();
+
+ /**
+ * Helper method for setting/unsetting maintenance mode. Scheduled by setMaintenanceMode()
+ * to run in a global write lock in the replication executor thread.
+ */
+ void _setMaintenanceMode_helper(const ReplicationExecutor::CallbackArgs& cbData,
+ bool activate,
+ Status* result);
+
+ /**
+ * Helper method for retrieving maintenance mode. Scheduled by getMaintenanceMode() to run
+ * in the replication executor thread.
+ */
+ void _getMaintenanceMode_helper(const ReplicationExecutor::CallbackArgs& cbData,
+ bool* maintenanceMode);
+
+ /**
+ * Bottom half of fillIsMasterForReplSet.
+ */
+ void _fillIsMasterForReplSet_finish(const ReplicationExecutor::CallbackArgs& cbData,
+ IsMasterResponse* result);
+
+ /**
+ * Bottom half of processReplSetFresh.
+ */
+ void _processReplSetFresh_finish(const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplSetFreshArgs& args,
+ BSONObjBuilder* response,
+ Status* result);
+
+ /**
+ * Bottom half of processReplSetElect.
+ */
+ void _processReplSetElect_finish(const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplSetElectArgs& args,
+ BSONObjBuilder* response,
+ Status* result);
+
+ /**
+ * Bottom half of processReplSetFreeze.
+ */
+ void _processReplSetFreeze_finish(const ReplicationExecutor::CallbackArgs& cbData,
+ int secs,
+ BSONObjBuilder* response,
+ Status* result);
+ /*
+ * Bottom half of clearSyncSourceBlacklist
+ */
+ void _clearSyncSourceBlacklist_finish(const ReplicationExecutor::CallbackArgs& cbData);
+
+ /**
+ * Bottom half of processReplSetDeclareElectionWinner.
+ */
+ void _processReplSetDeclareElectionWinner_finish(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplSetDeclareElectionWinnerArgs& args,
+ long long* responseTerm,
+ Status* result);
+
+ /**
+ * Bottom half of processReplSetRequestVotes.
+ */
+ void _processReplSetRequestVotes_finish(const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplSetRequestVotesArgs& args,
+ ReplSetRequestVotesResponse* response,
+ Status* result);
+
+ /**
+ * Scheduled to cause the ReplicationCoordinator to reconsider any state that might
+ * need to change as a result of time passing - for instance becoming PRIMARY when a single
+ * node replica set member's stepDown period ends.
+ */
+ void _handleTimePassing(const ReplicationExecutor::CallbackArgs& cbData);
+
+ /**
+ * Helper method for _awaitReplication that takes an already locked unique_lock and a
+ * Timer for timing the operation which has been counting since before the lock was
+ * acquired.
+ */
+ ReplicationCoordinator::StatusAndDuration _awaitReplication_inlock(
+ const Timer* timer,
+ stdx::unique_lock<stdx::mutex>* lock,
+ OperationContext* txn,
+ const OpTime& opTime,
+ const WriteConcernOptions& writeConcern);
+
+ /*
+ * Returns true if the given writeConcern is satisfied up to "optime" or is unsatisfiable.
+ */
+ bool _doneWaitingForReplication_inlock(const OpTime& opTime,
+ const WriteConcernOptions& writeConcern);
+
+ /**
+ * Helper for _doneWaitingForReplication_inlock that takes an integer write concern.
+ */
+ bool _haveNumNodesReachedOpTime_inlock(const OpTime& opTime, int numNodes);
+
+ /**
+ * Helper for _doneWaitingForReplication_inlock that takes a tag pattern representing a
+ * named write concern mode.
+ */
+ bool _haveTaggedNodesReachedOpTime_inlock(const OpTime& opTime,
+ const ReplicaSetTagPattern& tagPattern);
+
+ Status _checkIfWriteConcernCanBeSatisfied_inlock(const WriteConcernOptions& writeConcern) const;
+
+ /**
+ * Triggers all callbacks that are blocked waiting for new heartbeat data
+ * to decide whether or not to finish a step down.
+ * Should only be called from executor callbacks.
+ */
+ void _signalStepDownWaitersFromCallback(const ReplicationExecutor::CallbackArgs& cbData);
+ void _signalStepDownWaiters();
+
+ /**
+ * Helper for stepDown run within a ReplicationExecutor callback. This method assumes
+ * it is running within a global shared lock, and thus that no writes are going on at the
+ * same time.
+ */
+ void _stepDownContinue(const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplicationExecutor::EventHandle finishedEvent,
+ OperationContext* txn,
+ Date_t waitUntil,
+ Date_t stepdownUntil,
+ bool force,
+ Status* result);
+
+ OID _getMyRID_inlock() const;
+
+ int _getMyId_inlock() const;
+
+ OpTime _getMyLastOptime_inlock() const;
+
+ /**
+ * Bottom half of setFollowerMode.
+ *
+ * May reschedule itself after the current election, so it is not sufficient to
+ * wait for a callback scheduled to execute this method to complete. Instead,
+ * supply an event, "finishedSettingFollowerMode", and wait for that event to
+ * be signaled. Do not observe "*success" until after the event is signaled.
+ */
+ void _setFollowerModeFinish(const ReplicationExecutor::CallbackArgs& cbData,
+ const MemberState& newState,
+ const ReplicationExecutor::EventHandle& finishedSettingFollowerMode,
+ bool* success);
+
+ /**
+ * Helper method for updating our tracking of the last optime applied by a given node.
+ * This is only valid to call on replica sets.
+ * "configVersion" will be populated with our config version if it and the configVersion
+ * of "args" differ.
+ */
+ Status _setLastOptime_inlock(const UpdatePositionArgs::UpdateInfo& args,
+ long long* configVersion);
+
+ /**
+ * Helper method for setMyLastOptime that takes in a unique lock on
+ * _mutex. The passed in lock must already be locked. It is unspecified what state the
+ * lock will be in after this method finishes.
+ *
+ * This function has the same rules for "opTime" as setMyLastOptime(), unless
+ * "isRollbackAllowed" is true.
+ */
+ void _setMyLastOptime_inlock(stdx::unique_lock<stdx::mutex>* lock,
+ const OpTime& opTime,
+ bool isRollbackAllowed);
+
+ /**
+ * Schedules a heartbeat to be sent to "target" at "when". "targetIndex" is the index
+ * into the replica set config members array that corresponds to the "target", or -1 if
+ * "target" is not in _rsConfig.
+ */
+ void _scheduleHeartbeatToTarget(const HostAndPort& target, int targetIndex, Date_t when);
+
+ /**
+ * Processes each heartbeat response.
+ *
+ * Schedules additional heartbeats, triggers elections and step downs, etc.
+ */
+ void _handleHeartbeatResponse(const ReplicationExecutor::RemoteCommandCallbackArgs& cbData,
+ int targetIndex);
+
+ void _handleHeartbeatResponseV1(const ReplicationExecutor::RemoteCommandCallbackArgs& cbData,
+ int targetIndex);
+
+ void _trackHeartbeatHandle(const StatusWith<ReplicationExecutor::CallbackHandle>& handle);
+
+ void _untrackHeartbeatHandle(const ReplicationExecutor::CallbackHandle& handle);
+
+ /**
+ * Helper for _handleHeartbeatResponse.
+ *
+ * Updates the optime associated with the member at "memberIndex" in our config.
+ */
+ void _updateOpTimeFromHeartbeat_inlock(int memberIndex, const OpTime& optime);
+
+ /**
+ * Starts a heartbeat for each member in the current config. Called within the executor
+ * context.
+ */
+ void _startHeartbeats();
+
+ /**
+ * Cancels all heartbeats. Called within executor context.
+ */
+ void _cancelHeartbeats();
+
+ /**
+ * Asynchronously sends a heartbeat to "target". "targetIndex" is the index
+ * into the replica set config members array that corresponds to the "target", or -1 if
+ * we don't have a valid replica set config.
+ *
+ * Scheduled by _scheduleHeartbeatToTarget.
+ */
+ void _doMemberHeartbeat(ReplicationExecutor::CallbackArgs cbData,
+ const HostAndPort& target,
+ int targetIndex);
+
+
+ MemberState _getMemberState_inlock() const;
+
+ /**
+ * Callback that gives the TopologyCoordinator an initial LastVote document from
+ * local storage.
+ *
+ * Called only during replication startup. All other updates come from the
+ * TopologyCoordinator itself.
+ */
+ void _updateLastVote(const LastVote& lastVote);
+
+ /**
+ * Starts loading the replication configuration from local storage, and if it is valid,
+ * schedules a callback (of _finishLoadLocalConfig) to set it as the current replica set
+ * config (sets _rsConfig and _thisMembersConfigIndex).
+ * Returns true if it finishes loading the local config, which most likely means there
+ * was no local config at all or it was invalid in some way, and false if there was a valid
+ * config detected but more work is needed to set it as the local config (which will be
+ * handled by the callback to _finishLoadLocalConfig).
+ */
+ bool _startLoadLocalConfig(OperationContext* txn);
+
+ /**
+ * Callback that finishes the work started in _startLoadLocalConfig and sets _rsConfigState
+ * to kConfigSteady, so that we can begin processing heartbeats and reconfigs.
+ */
+ void _finishLoadLocalConfig(const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplicaSetConfig& localConfig,
+ const StatusWith<OpTime>& lastOpTimeStatus);
+
+ /**
+ * Callback that finishes the work of processReplSetInitiate() inside the replication
+ * executor context, in the event of a successful quorum check.
+ */
+ void _finishReplSetInitiate(const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplicaSetConfig& newConfig,
+ int myIndex);
+
+ /**
+ * Callback that finishes the work of processReplSetReconfig inside the replication
+ * executor context, in the event of a successful quorum check.
+ */
+ void _finishReplSetReconfig(const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplicaSetConfig& newConfig,
+ int myIndex);
+
+ /**
+ * Changes _rsConfigState to newState, and notify any waiters.
+ */
+ void _setConfigState_inlock(ConfigState newState);
+
+ /**
+ * Updates the cached value, _memberState, to match _topCoord's reported
+ * member state, from getMemberState().
+ *
+ * Returns an enum indicating what action to take after releasing _mutex, if any.
+ * Call performPostMemberStateUpdateAction on the return value after releasing
+ * _mutex.
+ */
+ PostMemberStateUpdateAction _updateMemberStateFromTopologyCoordinator_inlock();
+
+ /**
+ * Performs a post member-state update action. Do not call while holding _mutex.
+ */
+ void _performPostMemberStateUpdateAction(PostMemberStateUpdateAction action);
+
+ /**
+ * Begins an attempt to elect this node.
+ * Called after an incoming heartbeat changes this node's view of the set such that it
+ * believes it can be elected PRIMARY.
+ * For proper concurrency, must be called via a ReplicationExecutor callback.
+ *
+ * For old style elections the election path is:
+ * _startElectSelf()
+ * _onFreshnessCheckComplete()
+ * _onElectCmdRunnerComplete()
+ * For V1 (raft) style elections the election path is:
+ * _startElectSelfV1()
+ * _onDryRunComplete()
+ * _onVoteRequestComplete()
+ * _onElectionWinnerDeclarerComplete()
+ */
+ void _startElectSelf();
+ void _startElectSelfV1();
+
+ /**
+ * Callback called when the FreshnessChecker has completed; checks the results and
+ * decides whether to continue election proceedings.
+ **/
+ void _onFreshnessCheckComplete();
+
+ /**
+ * Callback called when the ElectCmdRunner has completed; checks the results and
+ * decides whether to complete the election and change state to primary.
+ **/
+ void _onElectCmdRunnerComplete();
+
+ /**
+ * Callback called when the dryRun VoteRequester has completed; checks the results and
+ * decides whether to conduct a proper election.
+ * "originalTerm" was the term during which the dry run began, if the term has since
+ * changed, do not run for election.
+ */
+ void _onDryRunComplete(long long originalTerm);
+
+ /**
+ * Callback called when the VoteRequester has completed; checks the results and
+ * decides whether to change state to primary and alert other nodes of our primary-ness.
+ * "originalTerm" was the term during which the election began, if the term has since
+ * changed, do not step up as primary.
+ */
+ void _onVoteRequestComplete(long long originalTerm);
+
+ /**
+ * Callback called when the ElectWinnerDeclarer has completed; checks the results and
+ * if we received any negative responses, relinquish primary.
+ */
+ void _onElectionWinnerDeclarerComplete();
+
+ /**
+ * Callback called after a random delay, to prevent repeated election ties.
+ */
+ void _recoverFromElectionTie(const ReplicationExecutor::CallbackArgs& cbData);
+
+ /**
+ * Chooses a new sync source. Must be scheduled as a callback.
+ *
+ * Calls into the Topology Coordinator, which uses its current view of the set to choose
+ * the most appropriate sync source.
+ */
+ void _chooseNewSyncSource(const ReplicationExecutor::CallbackArgs& cbData,
+ HostAndPort* newSyncSource);
+
+ /**
+ * Adds 'host' to the sync source blacklist until 'until'. A blacklisted source cannot
+ * be chosen as a sync source. Schedules a callback to unblacklist the sync source to be
+ * run at 'until'.
+ *
+ * Must be scheduled as a callback.
+ */
+ void _blacklistSyncSource(const ReplicationExecutor::CallbackArgs& cbData,
+ const HostAndPort& host,
+ Date_t until);
+
+ /**
+ * Removes 'host' from the sync source blacklist. If 'host' isn't found, it's simply
+ * ignored and no error is thrown.
+ *
+ * Must be scheduled as a callback.
+ */
+ void _unblacklistSyncSource(const ReplicationExecutor::CallbackArgs& cbData,
+ const HostAndPort& host);
+
+ /**
+ * Determines if a new sync source should be considered.
+ *
+ * Must be scheduled as a callback.
+ */
+ void _shouldChangeSyncSource(const ReplicationExecutor::CallbackArgs& cbData,
+ const HostAndPort& currentSource,
+ bool* shouldChange);
+
+ /**
+ * Schedules a request that the given host step down; logs any errors.
+ */
+ void _requestRemotePrimaryStepdown(const HostAndPort& target);
+
+ void _heartbeatStepDownStart();
+
+ /**
+ * Completes a step-down of the current node. Must be run with a global
+ * shared or global exclusive lock.
+ */
+ void _stepDownFinish(const ReplicationExecutor::CallbackArgs& cbData);
+
+ /**
+ * Schedules a replica set config change.
+ */
+ void _scheduleHeartbeatReconfig(const ReplicaSetConfig& newConfig);
+
+ /**
+ * Callback that continues a heartbeat-initiated reconfig after a running election
+ * completes.
+ */
+ void _heartbeatReconfigAfterElectionCanceled(const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplicaSetConfig& newConfig);
+
+ /**
+ * Method to write a configuration transmitted via heartbeat message to stable storage.
+ */
+ void _heartbeatReconfigStore(const ReplicationExecutor::CallbackArgs& cbd,
+ const ReplicaSetConfig& newConfig);
+
+ /**
+ * Conclusion actions of a heartbeat-triggered reconfiguration.
+ */
+ void _heartbeatReconfigFinish(const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplicaSetConfig& newConfig,
+ StatusWith<int> myIndex);
+
+ /**
+ * Utility method that schedules or performs actions specified by a HeartbeatResponseAction
+ * returned by a TopologyCoordinator::processHeartbeatResponse(V1) call with the given
+ * value of "responseStatus".
+ */
+ void _handleHeartbeatResponseAction(const HeartbeatResponseAction& action,
+ const StatusWith<ReplSetHeartbeatResponse>& responseStatus);
+
+ /**
+ * Bottom half of processHeartbeat(), which runs in the replication executor.
+ */
+ void _processHeartbeatFinish(const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response,
+ Status* outStatus);
+
+ /**
+ * Bottom half of processHeartbeatV1(), which runs in the replication executor.
+ */
+ void _processHeartbeatFinishV1(const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplSetHeartbeatArgsV1& args,
+ ReplSetHeartbeatResponse* response,
+ Status* outStatus);
+ /**
+ * Scan the SlaveInfoVector and determine the highest OplogEntry present on a majority of
+ * servers; set _lastCommittedOpTime to this new entry, if greater than the current entry.
+ */
+ void _updateLastCommittedOpTime_inlock();
+
+ void _summarizeAsHtml_finish(const ReplicationExecutor::CallbackArgs& cbData,
+ ReplSetHtmlSummary* output);
+
+ /**
+ * Callback that gets the current term from topology coordinator.
+ */
+ void _getTerm_helper(const ReplicationExecutor::CallbackArgs& cbData, long long* term);
+
+
+ /**
+ * Callback that attempts to set the current term in topology coordinator and
+ * relinquishes primary if the term actually changes and we are primary.
+ */
+ void _updateTerm_helper(const ReplicationExecutor::CallbackArgs& cbData,
+ long long term,
+ bool* updated,
+ Handle* cbHandle);
+ bool _updateTerm_incallback(long long term, Handle* cbHandle);
+
+ //
+ // All member variables are labeled with one of the following codes indicating the
+ // synchronization rules for accessing them.
+ //
+ // (R) Read-only in concurrent operation; no synchronization required.
+ // (S) Self-synchronizing; access in any way from any context.
+ // (PS) Pointer is read-only in concurrent operation, item pointed to is self-synchronizing;
+ // Access in any context.
+ // (M) Reads and writes guarded by _mutex
+ // (X) Reads and writes must be performed in a callback in _replExecutor
+ // (MX) Must hold _mutex and be in a callback in _replExecutor to write; must either hold
+ // _mutex or be in a callback in _replExecutor to read.
+ // (GX) Readable under a global intent lock. Must either hold global lock in exclusive
+ // mode (MODE_X) or both hold global lock in shared mode (MODE_S) and be in executor
+ // context to write.
+ // (I) Independently synchronized, see member variable comment.
+
+ // Protects member data of this ReplicationCoordinator.
+ mutable stdx::mutex _mutex; // (S)
+
+ // Handles to actively queued heartbeats.
+ HeartbeatHandles _heartbeatHandles; // (X)
+
+ // When this node does not know itself to be a member of a config, it adds
+ // every host that sends it a heartbeat request to this set, and also starts
+ // sending heartbeat requests to that host. This set is cleared whenever
+ // a node discovers that it is a member of a config.
+ unordered_set<HostAndPort> _seedList; // (X)
+
+ // Parsed command line arguments related to replication.
+ const ReplSettings _settings; // (R)
+
+ // Mode of replication specified by _settings.
+ const Mode _replMode; // (R)
+
+ // Pointer to the TopologyCoordinator owned by this ReplicationCoordinator.
+ std::unique_ptr<TopologyCoordinator> _topCoord; // (X)
+
+ // If the executer is owned then this will be set, but should not be used.
+ // This is only used to clean up and destroy the replExec if owned
+ std::unique_ptr<ReplicationExecutor> _replExecutorIfOwned; // (S)
+ // Executor that drives the topology coordinator.
+ ReplicationExecutor& _replExecutor; // (S)
+
+ // Pointer to the ReplicationCoordinatorExternalState owned by this ReplicationCoordinator.
+ std::unique_ptr<ReplicationCoordinatorExternalState> _externalState; // (PS)
+
+ // Thread that drives actions in the topology coordinator
+ // Set in startReplication() and thereafter accessed in shutdown.
+ std::unique_ptr<stdx::thread> _topCoordDriverThread; // (I)
+
+ // Our RID, used to identify us to our sync source when sending replication progress
+ // updates upstream. Set once in startReplication() and then never modified again.
+ OID _myRID; // (M)
+
+ // Rollback ID. Used to check if a rollback happened during some interval of time
+ // TODO: ideally this should only change on rollbacks NOT on mongod restarts also.
+ int _rbid; // (M)
+
+ // list of information about clients waiting on replication. Does *not* own the
+ // WaiterInfos.
+ std::vector<WaiterInfo*> _replicationWaiterList; // (M)
+
+ // list of information about clients waiting for a particular opTime.
+ // Does *not* own the WaiterInfos.
+ std::vector<WaiterInfo*> _opTimeWaiterList; // (M)
+
+ // Set to true when we are in the process of shutting down replication.
+ bool _inShutdown; // (M)
+
+ // Election ID of the last election that resulted in this node becoming primary.
+ OID _electionId; // (M)
+
+ // Vector containing known information about each member (such as replication
+ // progress and member ID) in our replica set or each member replicating from
+ // us in a master-slave deployment. In master/slave, the first entry is
+ // guaranteed to correspond to ourself. In replica sets where we don't have a
+ // valid config or are in state REMOVED then the vector will be a single element
+ // just with info about ourself. In replica sets with a valid config the elements
+ // will be in the same order as the members in the replica set config, thus
+ // the entry for ourself will be at _thisMemberConfigIndex.
+ SlaveInfoVector _slaveInfo; // (M)
+
+ // Current ReplicaSet state.
+ MemberState _memberState; // (MX)
+
+ // True if we are waiting for the applier to finish draining.
+ bool _isWaitingForDrainToComplete; // (M)
+
+ // Used to signal threads waiting for changes to _rsConfigState.
+ stdx::condition_variable _rsConfigStateChange; // (M)
+
+ // Represents the configuration state of the coordinator, which controls how and when
+ // _rsConfig may change. See the state transition diagram in the type definition of
+ // ConfigState for details.
+ ConfigState _rsConfigState; // (M)
+
+ // The current ReplicaSet configuration object, including the information about tag groups
+ // that is used to satisfy write concern requests with named gle modes.
+ ReplicaSetConfig _rsConfig; // (MX)
+
+ // This member's index position in the current config.
+ int _selfIndex; // (MX)
+
+ // Vector of events that should be signaled whenever new heartbeat data comes in.
+ std::vector<ReplicationExecutor::EventHandle> _stepDownWaiters; // (X)
+
+ // State for conducting an election of this node.
+ // the presence of a non-null _freshnessChecker pointer indicates that an election is
+ // currently in progress. When using the V1 protocol, a non-null _voteRequester pointer
+ // indicates this instead.
+ // Only one election is allowed at a time.
+ std::unique_ptr<FreshnessChecker> _freshnessChecker; // (X)
- std::unique_ptr<ElectionWinnerDeclarer> _electionWinnerDeclarer; // (X)
+ std::unique_ptr<ElectCmdRunner> _electCmdRunner; // (X)
- // Event that the election code will signal when the in-progress election completes.
- // Unspecified value when _freshnessChecker is NULL.
- ReplicationExecutor::EventHandle _electionFinishedEvent; // (X)
+ std::unique_ptr<VoteRequester> _voteRequester; // (X)
- // Whether we slept last time we attempted an election but possibly tied with other nodes.
- bool _sleptLastElection; // (X)
+ std::unique_ptr<ElectionWinnerDeclarer> _electionWinnerDeclarer; // (X)
- // Flag that indicates whether writes to databases other than "local" are allowed. Used to
- // answer canAcceptWritesForDatabase() and canAcceptWritesFor() questions.
- // Always true for standalone nodes and masters in master-slave relationships.
- bool _canAcceptNonLocalWrites; // (GX)
+ // Event that the election code will signal when the in-progress election completes.
+ // Unspecified value when _freshnessChecker is NULL.
+ ReplicationExecutor::EventHandle _electionFinishedEvent; // (X)
- // Flag that indicates whether reads from databases other than "local" are allowed. Unlike
- // _canAcceptNonLocalWrites, above, this question is about admission control on secondaries,
- // and we do not require that its observers be strongly synchronized. Accidentally
- // providing the prior value for a limited period of time is acceptable. Also unlike
- // _canAcceptNonLocalWrites, its value is only meaningful on replica set secondaries.
- AtomicUInt32 _canServeNonLocalReads; // (S)
+ // Whether we slept last time we attempted an election but possibly tied with other nodes.
+ bool _sleptLastElection; // (X)
- // OpTime of the latest committed operation. Matches the concurrency level of _slaveInfo.
- OpTime _lastCommittedOpTime; // (M)
+ // Flag that indicates whether writes to databases other than "local" are allowed. Used to
+ // answer canAcceptWritesForDatabase() and canAcceptWritesFor() questions.
+ // Always true for standalone nodes and masters in master-slave relationships.
+ bool _canAcceptNonLocalWrites; // (GX)
- // Data Replicator used to replicate data
- DataReplicator _dr; // (S)
+ // Flag that indicates whether reads from databases other than "local" are allowed. Unlike
+ // _canAcceptNonLocalWrites, above, this question is about admission control on secondaries,
+ // and we do not require that its observers be strongly synchronized. Accidentally
+ // providing the prior value for a limited period of time is acceptable. Also unlike
+ // _canAcceptNonLocalWrites, its value is only meaningful on replica set secondaries.
+ AtomicUInt32 _canServeNonLocalReads; // (S)
- };
+ // OpTime of the latest committed operation. Matches the concurrency level of _slaveInfo.
+ OpTime _lastCommittedOpTime; // (M)
+
+ // Data Replicator used to replicate data
+ DataReplicator _dr; // (S)
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
index 35f5fdf9f9d..d298decf65f 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
@@ -42,54 +42,55 @@ namespace mongo {
namespace repl {
namespace {
- class LoseElectionGuard {
- MONGO_DISALLOW_COPYING(LoseElectionGuard);
- public:
- LoseElectionGuard(
- TopologyCoordinator* topCoord,
- ReplicationExecutor* executor,
- std::unique_ptr<FreshnessChecker>* freshnessChecker,
- std::unique_ptr<ElectCmdRunner>* electCmdRunner,
- ReplicationExecutor::EventHandle* electionFinishedEvent)
- : _topCoord(topCoord),
- _executor(executor),
- _freshnessChecker(freshnessChecker),
- _electCmdRunner(electCmdRunner),
- _electionFinishedEvent(electionFinishedEvent),
- _dismissed(false) {
+class LoseElectionGuard {
+ MONGO_DISALLOW_COPYING(LoseElectionGuard);
+
+public:
+ LoseElectionGuard(TopologyCoordinator* topCoord,
+ ReplicationExecutor* executor,
+ std::unique_ptr<FreshnessChecker>* freshnessChecker,
+ std::unique_ptr<ElectCmdRunner>* electCmdRunner,
+ ReplicationExecutor::EventHandle* electionFinishedEvent)
+ : _topCoord(topCoord),
+ _executor(executor),
+ _freshnessChecker(freshnessChecker),
+ _electCmdRunner(electCmdRunner),
+ _electionFinishedEvent(electionFinishedEvent),
+ _dismissed(false) {}
+
+ ~LoseElectionGuard() {
+ if (_dismissed) {
+ return;
}
-
- ~LoseElectionGuard() {
- if (_dismissed) {
- return;
- }
- _topCoord->processLoseElection();
- _freshnessChecker->reset(NULL);
- _electCmdRunner->reset(NULL);
- if (_electionFinishedEvent->isValid()) {
- _executor->signalEvent(*_electionFinishedEvent);
- }
+ _topCoord->processLoseElection();
+ _freshnessChecker->reset(NULL);
+ _electCmdRunner->reset(NULL);
+ if (_electionFinishedEvent->isValid()) {
+ _executor->signalEvent(*_electionFinishedEvent);
}
+ }
- void dismiss() { _dismissed = true; }
+ void dismiss() {
+ _dismissed = true;
+ }
- private:
- TopologyCoordinator* const _topCoord;
- ReplicationExecutor* const _executor;
- std::unique_ptr<FreshnessChecker>* const _freshnessChecker;
- std::unique_ptr<ElectCmdRunner>* const _electCmdRunner;
- const ReplicationExecutor::EventHandle* _electionFinishedEvent;
- bool _dismissed;
- };
+private:
+ TopologyCoordinator* const _topCoord;
+ ReplicationExecutor* const _executor;
+ std::unique_ptr<FreshnessChecker>* const _freshnessChecker;
+ std::unique_ptr<ElectCmdRunner>* const _electCmdRunner;
+ const ReplicationExecutor::EventHandle* _electionFinishedEvent;
+ bool _dismissed;
+};
} // namespace
- void ReplicationCoordinatorImpl::_startElectSelf() {
- invariant(!_freshnessChecker);
- invariant(!_electCmdRunner);
+void ReplicationCoordinatorImpl::_startElectSelf() {
+ invariant(!_freshnessChecker);
+ invariant(!_electCmdRunner);
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- switch (_rsConfigState) {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ switch (_rsConfigState) {
case kConfigSteady:
break;
case kConfigInitiating:
@@ -100,183 +101,183 @@ namespace {
_topCoord->processLoseElection();
return;
default:
- severe() << "Entered replica set election code while in illegal config state " <<
- int(_rsConfigState);
+ severe() << "Entered replica set election code while in illegal config state "
+ << int(_rsConfigState);
fassertFailed(18913);
- }
+ }
- log() << "Standing for election";
- const StatusWith<ReplicationExecutor::EventHandle> finishEvh = _replExecutor.makeEvent();
- if (finishEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(18680, finishEvh.getStatus());
- _electionFinishedEvent = finishEvh.getValue();
- LoseElectionGuard lossGuard(_topCoord.get(),
- &_replExecutor,
- &_freshnessChecker,
- &_electCmdRunner,
- &_electionFinishedEvent);
+ log() << "Standing for election";
+ const StatusWith<ReplicationExecutor::EventHandle> finishEvh = _replExecutor.makeEvent();
+ if (finishEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(18680, finishEvh.getStatus());
+ _electionFinishedEvent = finishEvh.getValue();
+ LoseElectionGuard lossGuard(_topCoord.get(),
+ &_replExecutor,
+ &_freshnessChecker,
+ &_electCmdRunner,
+ &_electionFinishedEvent);
+
+
+ invariant(_rsConfig.getMemberAt(_selfIndex).isElectable());
+ OpTime lastOpTimeApplied(_getMyLastOptime_inlock());
+
+ if (lastOpTimeApplied.isNull()) {
+ log() << "not trying to elect self, "
+ "do not yet have a complete set of data from any point in time";
+ return;
+ }
+ _freshnessChecker.reset(new FreshnessChecker);
+
+ // This is necessary because the freshnessChecker may call directly into winning an
+ // election, if there are no other MaybeUp nodes. Winning an election attempts to lock
+ // _mutex again.
+ lk.unlock();
+
+ StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _freshnessChecker->start(
+ &_replExecutor,
+ lastOpTimeApplied.getTimestamp(),
+ _rsConfig,
+ _selfIndex,
+ _topCoord->getMaybeUpHostAndPorts(),
+ stdx::bind(&ReplicationCoordinatorImpl::_onFreshnessCheckComplete, this));
+ if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(18681, nextPhaseEvh.getStatus());
+ lossGuard.dismiss();
+}
+
+void ReplicationCoordinatorImpl::_onFreshnessCheckComplete() {
+ invariant(_freshnessChecker);
+ invariant(!_electCmdRunner);
+ LoseElectionGuard lossGuard(_topCoord.get(),
+ &_replExecutor,
+ &_freshnessChecker,
+ &_electCmdRunner,
+ &_electionFinishedEvent);
+
+ if (_freshnessChecker->isCanceled()) {
+ LOG(2) << "Election canceled during freshness check phase";
+ return;
+ }
- invariant(_rsConfig.getMemberAt(_selfIndex).isElectable());
- OpTime lastOpTimeApplied(_getMyLastOptime_inlock());
+ const Date_t now(_replExecutor.now());
+ const FreshnessChecker::ElectionAbortReason abortReason =
+ _freshnessChecker->shouldAbortElection();
- if (lastOpTimeApplied.isNull()) {
- log() << "not trying to elect self, "
- "do not yet have a complete set of data from any point in time";
+ // need to not sleep after last time sleeping,
+ switch (abortReason) {
+ case FreshnessChecker::None:
+ break;
+ case FreshnessChecker::FreshnessTie:
+ if ((_selfIndex != 0) && !_sleptLastElection) {
+ const auto ms = Milliseconds(_replExecutor.nextRandomInt64(1000) + 50);
+ const Date_t nextCandidateTime = now + ms;
+ log() << "possible election tie; sleeping " << ms.count() << "ms until "
+ << dateToISOStringLocal(nextCandidateTime);
+ _topCoord->setElectionSleepUntil(nextCandidateTime);
+ _replExecutor.scheduleWorkAt(
+ nextCandidateTime,
+ stdx::bind(&ReplicationCoordinatorImpl::_recoverFromElectionTie,
+ this,
+ stdx::placeholders::_1));
+ _sleptLastElection = true;
+ return;
+ }
+ _sleptLastElection = false;
+ break;
+ case FreshnessChecker::FresherNodeFound:
+ log() << "not electing self, we are not freshest";
return;
- }
-
- _freshnessChecker.reset(new FreshnessChecker);
-
- // This is necessary because the freshnessChecker may call directly into winning an
- // election, if there are no other MaybeUp nodes. Winning an election attempts to lock
- // _mutex again.
- lk.unlock();
-
- StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _freshnessChecker->start(
- &_replExecutor,
- lastOpTimeApplied.getTimestamp(),
- _rsConfig,
- _selfIndex,
- _topCoord->getMaybeUpHostAndPorts(),
- stdx::bind(&ReplicationCoordinatorImpl::_onFreshnessCheckComplete, this));
- if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ case FreshnessChecker::QuorumUnreachable:
+ log() << "not electing self, we could not contact enough voting members";
+ return;
+ default:
+ log() << "not electing self due to election abort message :"
+ << static_cast<int>(abortReason);
return;
- }
- fassert(18681, nextPhaseEvh.getStatus());
- lossGuard.dismiss();
}
- void ReplicationCoordinatorImpl::_onFreshnessCheckComplete() {
- invariant(_freshnessChecker);
- invariant(!_electCmdRunner);
- LoseElectionGuard lossGuard(_topCoord.get(),
- &_replExecutor,
- &_freshnessChecker,
- &_electCmdRunner,
- &_electionFinishedEvent);
-
- if (_freshnessChecker->isCanceled()) {
- LOG(2) << "Election canceled during freshness check phase";
- return;
- }
+ log() << "running for election";
+ // Secure our vote for ourself first
+ if (!_topCoord->voteForMyself(now)) {
+ return;
+ }
- const Date_t now(_replExecutor.now());
- const FreshnessChecker::ElectionAbortReason abortReason =
- _freshnessChecker->shouldAbortElection();
-
- // need to not sleep after last time sleeping,
- switch (abortReason) {
- case FreshnessChecker::None:
- break;
- case FreshnessChecker::FreshnessTie:
- if ((_selfIndex != 0) && !_sleptLastElection) {
- const auto ms = Milliseconds(_replExecutor.nextRandomInt64(1000) + 50);
- const Date_t nextCandidateTime = now + ms;
- log() << "possible election tie; sleeping " << ms.count() << "ms until " <<
- dateToISOStringLocal(nextCandidateTime);
- _topCoord->setElectionSleepUntil(nextCandidateTime);
- _replExecutor.scheduleWorkAt(
- nextCandidateTime,
- stdx::bind(&ReplicationCoordinatorImpl::_recoverFromElectionTie,
- this,
- stdx::placeholders::_1));
- _sleptLastElection = true;
- return;
- }
- _sleptLastElection = false;
- break;
- case FreshnessChecker::FresherNodeFound:
- log() << "not electing self, we are not freshest";
- return;
- case FreshnessChecker::QuorumUnreachable:
- log() << "not electing self, we could not contact enough voting members";
- return;
- default:
- log() << "not electing self due to election abort message :"
- << static_cast<int>(abortReason);
- return;
- }
+ _electCmdRunner.reset(new ElectCmdRunner);
+ StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _electCmdRunner->start(
+ &_replExecutor,
+ _rsConfig,
+ _selfIndex,
+ _topCoord->getMaybeUpHostAndPorts(),
+ stdx::bind(&ReplicationCoordinatorImpl::_onElectCmdRunnerComplete, this));
+ if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(18685, nextPhaseEvh.getStatus());
+ lossGuard.dismiss();
+}
+
+void ReplicationCoordinatorImpl::_onElectCmdRunnerComplete() {
+ LoseElectionGuard lossGuard(_topCoord.get(),
+ &_replExecutor,
+ &_freshnessChecker,
+ &_electCmdRunner,
+ &_electionFinishedEvent);
+
+ invariant(_freshnessChecker);
+ invariant(_electCmdRunner);
+ if (_electCmdRunner->isCanceled()) {
+ LOG(2) << "Election canceled during elect self phase";
+ return;
+ }
- log() << "running for election";
- // Secure our vote for ourself first
- if (!_topCoord->voteForMyself(now)) {
- return;
- }
+ const int receivedVotes = _electCmdRunner->getReceivedVotes();
- _electCmdRunner.reset(new ElectCmdRunner);
- StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _electCmdRunner->start(
- &_replExecutor,
- _rsConfig,
- _selfIndex,
- _topCoord->getMaybeUpHostAndPorts(),
- stdx::bind(&ReplicationCoordinatorImpl::_onElectCmdRunnerComplete, this));
- if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(18685, nextPhaseEvh.getStatus());
- lossGuard.dismiss();
+ if (receivedVotes < _rsConfig.getMajorityVoteCount()) {
+ log() << "couldn't elect self, only received " << receivedVotes
+ << " votes, but needed at least " << _rsConfig.getMajorityVoteCount();
+ // Suppress ourselves from standing for election again, giving other nodes a chance
+ // to win their elections.
+ const auto ms = Milliseconds(_replExecutor.nextRandomInt64(1000) + 50);
+ const Date_t now(_replExecutor.now());
+ const Date_t nextCandidateTime = now + ms;
+ log() << "waiting until " << nextCandidateTime << " before standing for election again";
+ _topCoord->setElectionSleepUntil(nextCandidateTime);
+ _replExecutor.scheduleWorkAt(
+ nextCandidateTime,
+ stdx::bind(&ReplicationCoordinatorImpl::_recoverFromElectionTie,
+ this,
+ stdx::placeholders::_1));
+ return;
}
- void ReplicationCoordinatorImpl::_onElectCmdRunnerComplete() {
- LoseElectionGuard lossGuard(_topCoord.get(),
- &_replExecutor,
- &_freshnessChecker,
- &_electCmdRunner,
- &_electionFinishedEvent);
-
- invariant(_freshnessChecker);
- invariant(_electCmdRunner);
- if (_electCmdRunner->isCanceled()) {
- LOG(2) << "Election canceled during elect self phase";
- return;
- }
+ if (_rsConfig.getConfigVersion() != _freshnessChecker->getOriginalConfigVersion()) {
+ log() << "config version changed during our election, ignoring result";
+ return;
+ }
- const int receivedVotes = _electCmdRunner->getReceivedVotes();
-
- if (receivedVotes < _rsConfig.getMajorityVoteCount()) {
- log() << "couldn't elect self, only received " << receivedVotes <<
- " votes, but needed at least " << _rsConfig.getMajorityVoteCount();
- // Suppress ourselves from standing for election again, giving other nodes a chance
- // to win their elections.
- const auto ms = Milliseconds(_replExecutor.nextRandomInt64(1000) + 50);
- const Date_t now(_replExecutor.now());
- const Date_t nextCandidateTime = now + ms;
- log() << "waiting until " << nextCandidateTime << " before standing for election again";
- _topCoord->setElectionSleepUntil(nextCandidateTime);
- _replExecutor.scheduleWorkAt(
- nextCandidateTime,
- stdx::bind(&ReplicationCoordinatorImpl::_recoverFromElectionTie,
- this,
- stdx::placeholders::_1));
- return;
- }
+ log() << "election succeeded, assuming primary role";
- if (_rsConfig.getConfigVersion() != _freshnessChecker->getOriginalConfigVersion()) {
- log() << "config version changed during our election, ignoring result";
- return;
- }
-
- log() << "election succeeded, assuming primary role";
+ lossGuard.dismiss();
+ _freshnessChecker.reset(NULL);
+ _electCmdRunner.reset(NULL);
+ _performPostMemberStateUpdateAction(kActionWinElection);
+ _replExecutor.signalEvent(_electionFinishedEvent);
+}
- lossGuard.dismiss();
- _freshnessChecker.reset(NULL);
- _electCmdRunner.reset(NULL);
- _performPostMemberStateUpdateAction(kActionWinElection);
- _replExecutor.signalEvent(_electionFinishedEvent);
+void ReplicationCoordinatorImpl::_recoverFromElectionTie(
+ const ReplicationExecutor::CallbackArgs& cbData) {
+ if (!cbData.status.isOK()) {
+ return;
}
-
- void ReplicationCoordinatorImpl::_recoverFromElectionTie(
- const ReplicationExecutor::CallbackArgs& cbData) {
- if (!cbData.status.isOK()) {
- return;
- }
- if (_topCoord->checkShouldStandForElection(_replExecutor.now(), getMyLastOptime())) {
- _startElectSelf();
- }
+ if (_topCoord->checkShouldStandForElection(_replExecutor.now(), getMyLastOptime())) {
+ _startElectSelf();
}
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp
index a0e4149de24..b187ccd74c2 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp
@@ -48,367 +48,371 @@ namespace mongo {
namespace repl {
namespace {
- using executor::NetworkInterfaceMock;
-
- class ReplCoordElectTest : public ReplCoordTest {
- protected:
- void simulateEnoughHeartbeatsForElectability();
- void simulateFreshEnoughForElectability();
- };
-
- void ReplCoordElectTest::simulateEnoughHeartbeatsForElectability() {
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- for (int i = 0; i < rsConfig.getNumMembers() - 1; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgs hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(rsConfig.getReplSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(rsConfig.getConfigVersion());
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj, false);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
+using executor::NetworkInterfaceMock;
+
+class ReplCoordElectTest : public ReplCoordTest {
+protected:
+ void simulateEnoughHeartbeatsForElectability();
+ void simulateFreshEnoughForElectability();
+};
+
+void ReplCoordElectTest::simulateEnoughHeartbeatsForElectability() {
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ for (int i = 0; i < rsConfig.getNumMembers() - 1; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgs hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(rsConfig.getReplSetName());
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(rsConfig.getConfigVersion());
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj, false);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
- net->exitNetwork();
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+}
- void ReplCoordElectTest::simulateFreshEnoughForElectability() {
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- for (int i = 0; i < rsConfig.getNumMembers() - 1; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- if (request.cmdObj.firstElement().fieldNameStringData() == "replSetFresh") {
- net->scheduleResponse(noi, net->now(), makeResponseStatus(
- BSON("ok" << 1 <<
- "fresher" << false <<
- "opTime" << Date_t::fromMillisSinceEpoch(
- Timestamp(0, 0).asLL()) <<
- "veto" << false)));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
+void ReplCoordElectTest::simulateFreshEnoughForElectability() {
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ for (int i = 0; i < rsConfig.getNumMembers() - 1; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ if (request.cmdObj.firstElement().fieldNameStringData() == "replSetFresh") {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "fresher" << false << "opTime"
+ << Date_t::fromMillisSinceEpoch(Timestamp(0, 0).asLL())
+ << "veto" << false)));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
- net->exitNetwork();
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+}
- TEST_F(ReplCoordElectTest, ElectTooSoon) {
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- // Election never starts because we haven't set a lastOpTimeApplied value yet, via a
- // heartbeat.
- startCapturingLogMessages();
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345"))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- simulateEnoughHeartbeatsForElectability();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("node has no applied oplog entries"));
- }
+TEST_F(ReplCoordElectTest, ElectTooSoon) {
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ // Election never starts because we haven't set a lastOpTimeApplied value yet, via a
+ // heartbeat.
+ startCapturingLogMessages();
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ simulateEnoughHeartbeatsForElectability();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("node has no applied oplog entries"));
+}
- /**
- * This test checks that an election can happen when only one node is up, and it has the
- * vote(s) to win.
- */
- TEST_F(ReplCoordElectTest, ElectTwoNodesWithOneZeroVoter) {
- OperationContextReplMock txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345" <<
- "votes" << 0 << "hidden" << true <<
- "priority" << 0))),
- HostAndPort("node1", 12345));
-
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
-
- ASSERT(getReplCoord()->getMemberState().secondary()) <<
- getReplCoord()->getMemberState().toString();
-
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(10,0), 0));
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- // blackhole heartbeat
- net->scheduleResponse(noi,
- net->now(),
- ResponseStatus(ErrorCodes::OperationFailed, "timeout"));
- net->runReadyNetworkOperations();
- // blackhole freshness
- const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
- net->scheduleResponse(noi2,
- net->now(),
- ResponseStatus(ErrorCodes::OperationFailed, "timeout"));
- net->runReadyNetworkOperations();
- net->exitNetwork();
-
- ASSERT(getReplCoord()->getMemberState().primary()) <<
- getReplCoord()->getMemberState().toString();
- ASSERT(getReplCoord()->isWaitingForApplierToDrain());
-
- // Since we're still in drain mode, expect that we report ismaster: false, issecondary:true.
- IsMasterResponse imResponse;
- getReplCoord()->fillIsMasterForReplSet(&imResponse);
- ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- getReplCoord()->signalDrainComplete(&txn);
- getReplCoord()->fillIsMasterForReplSet(&imResponse);
- ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- }
+/**
+ * This test checks that an election can happen when only one node is up, and it has the
+ * vote(s) to win.
+ */
+TEST_F(ReplCoordElectTest, ElectTwoNodesWithOneZeroVoter) {
+ OperationContextReplMock txn;
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"
+ << "votes" << 0 << "hidden" << true << "priority" << 0))),
+ HostAndPort("node1", 12345));
+
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+
+ ASSERT(getReplCoord()->getMemberState().secondary())
+ << getReplCoord()->getMemberState().toString();
+
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0));
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ // blackhole heartbeat
+ net->scheduleResponse(noi, net->now(), ResponseStatus(ErrorCodes::OperationFailed, "timeout"));
+ net->runReadyNetworkOperations();
+ // blackhole freshness
+ const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
+ net->scheduleResponse(noi2, net->now(), ResponseStatus(ErrorCodes::OperationFailed, "timeout"));
+ net->runReadyNetworkOperations();
+ net->exitNetwork();
+
+ ASSERT(getReplCoord()->getMemberState().primary())
+ << getReplCoord()->getMemberState().toString();
+ ASSERT(getReplCoord()->isWaitingForApplierToDrain());
+
+ // Since we're still in drain mode, expect that we report ismaster: false, issecondary:true.
+ IsMasterResponse imResponse;
+ getReplCoord()->fillIsMasterForReplSet(&imResponse);
+ ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+ getReplCoord()->signalDrainComplete(&txn);
+ getReplCoord()->fillIsMasterForReplSet(&imResponse);
+ ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+}
- TEST_F(ReplCoordElectTest, Elect1NodeSuccess) {
- OperationContextReplMock txn;
- startCapturingLogMessages();
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345"))),
- HostAndPort("node1", 12345));
-
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
-
- ASSERT(getReplCoord()->getMemberState().primary()) <<
- getReplCoord()->getMemberState().toString();
- ASSERT(getReplCoord()->isWaitingForApplierToDrain());
-
- // Since we're still in drain mode, expect that we report ismaster: false, issecondary:true.
- IsMasterResponse imResponse;
- getReplCoord()->fillIsMasterForReplSet(&imResponse);
- ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- getReplCoord()->signalDrainComplete(&txn);
- getReplCoord()->fillIsMasterForReplSet(&imResponse);
- ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- }
+TEST_F(ReplCoordElectTest, Elect1NodeSuccess) {
+ OperationContextReplMock txn;
+ startCapturingLogMessages();
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345"))),
+ HostAndPort("node1", 12345));
+
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+
+ ASSERT(getReplCoord()->getMemberState().primary())
+ << getReplCoord()->getMemberState().toString();
+ ASSERT(getReplCoord()->isWaitingForApplierToDrain());
+
+ // Since we're still in drain mode, expect that we report ismaster: false, issecondary:true.
+ IsMasterResponse imResponse;
+ getReplCoord()->fillIsMasterForReplSet(&imResponse);
+ ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+ getReplCoord()->signalDrainComplete(&txn);
+ getReplCoord()->fillIsMasterForReplSet(&imResponse);
+ ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+}
- TEST_F(ReplCoordElectTest, ElectManyNodesSuccess) {
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ));
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- OperationContextNoop txn;
- getReplCoord()->setMyLastOptime(OpTime(Timestamp (100, 1), 0));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- startCapturingLogMessages();
- simulateSuccessfulElection();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("election succeeded"));
- }
+TEST_F(ReplCoordElectTest, ElectManyNodesSuccess) {
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")));
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ OperationContextNoop txn;
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 1), 0));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ startCapturingLogMessages();
+ simulateSuccessfulElection();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("election succeeded"));
+}
- TEST_F(ReplCoordElectTest, ElectNotEnoughVotes) {
- // one responds with -10000 votes, and one doesn't respond, and we are not elected
- startCapturingLogMessages();
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ));
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- ReplicaSetConfig config = assertMakeRSConfig(configObj);
-
- OperationContextNoop txn;
- OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- simulateEnoughHeartbeatsForElectability();
- simulateFreshEnoughForElectability();
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- while (net->hasReadyRequests()) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- if (request.target != HostAndPort("node2", 12345)) {
- net->blackHole(noi);
- }
- else if (request.cmdObj.firstElement().fieldNameStringData() != "replSetElect") {
- net->blackHole(noi);
- }
- else {
- net->scheduleResponse(
- noi,
- net->now(),
- makeResponseStatus(BSON("ok" << 1 <<
- "vote" << -10000 <<
- "round" << OID())));
- }
- net->runReadyNetworkOperations();
+TEST_F(ReplCoordElectTest, ElectNotEnoughVotes) {
+ // one responds with -10000 votes, and one doesn't respond, and we are not elected
+ startCapturingLogMessages();
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")));
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplicaSetConfig config = assertMakeRSConfig(configObj);
+
+ OperationContextNoop txn;
+ OpTime time1(Timestamp(100, 1), 0);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ simulateEnoughHeartbeatsForElectability();
+ simulateFreshEnoughForElectability();
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ while (net->hasReadyRequests()) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ if (request.target != HostAndPort("node2", 12345)) {
+ net->blackHole(noi);
+ } else if (request.cmdObj.firstElement().fieldNameStringData() != "replSetElect") {
+ net->blackHole(noi);
+ } else {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "vote" << -10000 << "round" << OID())));
}
- net->exitNetwork();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1,
- countLogLinesContaining("couldn't elect self, only received -9999 votes"));
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("couldn't elect self, only received -9999 votes"));
+}
- TEST_F(ReplCoordElectTest, ElectWrongTypeForVote) {
- // one responds with a bad 'vote' field, and one doesn't respond, and we are not elected
- startCapturingLogMessages();
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ));
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- ReplicaSetConfig config = assertMakeRSConfig(configObj);
-
- OperationContextNoop txn;
- OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- simulateEnoughHeartbeatsForElectability();
- simulateFreshEnoughForElectability();
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- while (net->hasReadyRequests()) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- if (request.target != HostAndPort("node2", 12345)) {
- net->blackHole(noi);
- }
- else if (request.cmdObj.firstElement().fieldNameStringData() != "replSetElect") {
- net->blackHole(noi);
- }
- else {
- net->scheduleResponse(
- noi,
- net->now(),
- makeResponseStatus(BSON("ok" << 1 <<
- "vote" << "yea" <<
- "round" << OID())));
- }
- net->runReadyNetworkOperations();
+TEST_F(ReplCoordElectTest, ElectWrongTypeForVote) {
+ // one responds with a bad 'vote' field, and one doesn't respond, and we are not elected
+ startCapturingLogMessages();
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")));
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplicaSetConfig config = assertMakeRSConfig(configObj);
+
+ OperationContextNoop txn;
+ OpTime time1(Timestamp(100, 1), 0);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ simulateEnoughHeartbeatsForElectability();
+ simulateFreshEnoughForElectability();
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ while (net->hasReadyRequests()) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ if (request.target != HostAndPort("node2", 12345)) {
+ net->blackHole(noi);
+ } else if (request.cmdObj.firstElement().fieldNameStringData() != "replSetElect") {
+ net->blackHole(noi);
+ } else {
+ net->scheduleResponse(noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "vote"
+ << "yea"
+ << "round" << OID())));
}
- net->exitNetwork();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1,
- countLogLinesContaining("wrong type for vote argument in replSetElect command"));
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1,
+ countLogLinesContaining("wrong type for vote argument in replSetElect command"));
+}
- TEST_F(ReplCoordElectTest, ElectionDuringHBReconfigFails) {
- // start up, receive reconfig via heartbeat while at the same time, become candidate.
- // candidate state should be cleared.
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") <<
- BSON("_id" << 3 << "host" << "node3:12345") <<
- BSON("_id" << 4 << "host" << "node4:12345") <<
- BSON("_id" << 5 << "host" << "node5:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100,0), 0));
-
- // set hbreconfig to hang while in progress
- getExternalState()->setStoreLocalConfigDocumentToHang(true);
-
- // hb reconfig
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- ReplSetHeartbeatResponse hbResp2;
- ReplicaSetConfig config;
- config.initialize(BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345"))));
- hbResp2.setConfig(config);
- hbResp2.setConfigVersion(3);
- hbResp2.setSetName("mySet");
- hbResp2.setState(MemberState::RS_SECONDARY);
- BSONObjBuilder respObj2;
- respObj2 << "ok" << 1;
- hbResp2.addToBSON(&respObj2, false);
- net->runUntil(net->now() + Seconds(10)); // run until we've sent a heartbeat request
- const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
- net->scheduleResponse(noi2, net->now(), makeResponseStatus(respObj2.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
-
- // prepare candidacy
- BSONObjBuilder result;
- ReplicationCoordinator::ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = config.toBSON();
- ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
-
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(2));
- startCapturingLogMessages();
-
- // receive sufficient heartbeats to trigger an election
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- net->enterNetwork();
- for (int i = 0; i < 2; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgs hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(rsConfig.getReplSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(rsConfig.getConfigVersion());
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj, false);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
+TEST_F(ReplCoordElectTest, ElectionDuringHBReconfigFails) {
+ // start up, receive reconfig via heartbeat while at the same time, become candidate.
+ // candidate state should be cleared.
+ OperationContextNoop txn;
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345") << BSON("_id" << 3 << "host"
+ << "node3:12345")
+ << BSON("_id" << 4 << "host"
+ << "node4:12345") << BSON("_id" << 5 << "host"
+ << "node5:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+
+ // set hbreconfig to hang while in progress
+ getExternalState()->setStoreLocalConfigDocumentToHang(true);
+
+ // hb reconfig
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ ReplSetHeartbeatResponse hbResp2;
+ ReplicaSetConfig config;
+ config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))));
+ hbResp2.setConfig(config);
+ hbResp2.setConfigVersion(3);
+ hbResp2.setSetName("mySet");
+ hbResp2.setState(MemberState::RS_SECONDARY);
+ BSONObjBuilder respObj2;
+ respObj2 << "ok" << 1;
+ hbResp2.addToBSON(&respObj2, false);
+ net->runUntil(net->now() + Seconds(10)); // run until we've sent a heartbeat request
+ const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
+ net->scheduleResponse(noi2, net->now(), makeResponseStatus(respObj2.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+
+ // prepare candidacy
+ BSONObjBuilder result;
+ ReplicationCoordinator::ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = config.toBSON();
+ ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(2));
+ startCapturingLogMessages();
+
+ // receive sufficient heartbeats to trigger an election
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ net->enterNetwork();
+ for (int i = 0; i < 2; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgs hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(rsConfig.getReplSetName());
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(rsConfig.getConfigVersion());
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj, false);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
-
- stopCapturingLogMessages();
- // ensure node does not stand for election
- ASSERT_EQUALS(1,
- countLogLinesContaining("Not standing for election; processing "
- "a configuration change"));
- getExternalState()->setStoreLocalConfigDocumentToHang(false);
+ net->runReadyNetworkOperations();
}
+ stopCapturingLogMessages();
+ // ensure node does not stand for election
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "Not standing for election; processing "
+ "a configuration change"));
+ getExternalState()->setStoreLocalConfigDocumentToHang(false);
+}
}
}
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
index 686f7bbe5d3..36a28e24a37 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
@@ -44,55 +44,56 @@ namespace mongo {
namespace repl {
namespace {
- class LoseElectionGuardV1 {
- MONGO_DISALLOW_COPYING(LoseElectionGuardV1);
- public:
- LoseElectionGuardV1(
- TopologyCoordinator* topCoord,
- ReplicationExecutor* executor,
- std::unique_ptr<VoteRequester>* voteRequester,
- std::unique_ptr<ElectionWinnerDeclarer>* electionWinnerDeclarer,
- ReplicationExecutor::EventHandle* electionFinishedEvent)
- : _topCoord(topCoord),
- _executor(executor),
- _voteRequester(voteRequester),
- _electionWinnerDeclarer(electionWinnerDeclarer),
- _electionFinishedEvent(electionFinishedEvent),
- _dismissed(false) {
+class LoseElectionGuardV1 {
+ MONGO_DISALLOW_COPYING(LoseElectionGuardV1);
+
+public:
+ LoseElectionGuardV1(TopologyCoordinator* topCoord,
+ ReplicationExecutor* executor,
+ std::unique_ptr<VoteRequester>* voteRequester,
+ std::unique_ptr<ElectionWinnerDeclarer>* electionWinnerDeclarer,
+ ReplicationExecutor::EventHandle* electionFinishedEvent)
+ : _topCoord(topCoord),
+ _executor(executor),
+ _voteRequester(voteRequester),
+ _electionWinnerDeclarer(electionWinnerDeclarer),
+ _electionFinishedEvent(electionFinishedEvent),
+ _dismissed(false) {}
+
+ ~LoseElectionGuardV1() {
+ if (_dismissed) {
+ return;
}
-
- ~LoseElectionGuardV1() {
- if (_dismissed) {
- return;
- }
- _topCoord->processLoseElection();
- _electionWinnerDeclarer->reset(nullptr);
- _voteRequester->reset(nullptr);
- if (_electionFinishedEvent->isValid()) {
- _executor->signalEvent(*_electionFinishedEvent);
- }
+ _topCoord->processLoseElection();
+ _electionWinnerDeclarer->reset(nullptr);
+ _voteRequester->reset(nullptr);
+ if (_electionFinishedEvent->isValid()) {
+ _executor->signalEvent(*_electionFinishedEvent);
}
+ }
- void dismiss() { _dismissed = true; }
+ void dismiss() {
+ _dismissed = true;
+ }
- private:
- TopologyCoordinator* const _topCoord;
- ReplicationExecutor* const _executor;
- std::unique_ptr<VoteRequester>* const _voteRequester;
- std::unique_ptr<ElectionWinnerDeclarer>* const _electionWinnerDeclarer;
- const ReplicationExecutor::EventHandle* _electionFinishedEvent;
- bool _dismissed;
- };
+private:
+ TopologyCoordinator* const _topCoord;
+ ReplicationExecutor* const _executor;
+ std::unique_ptr<VoteRequester>* const _voteRequester;
+ std::unique_ptr<ElectionWinnerDeclarer>* const _electionWinnerDeclarer;
+ const ReplicationExecutor::EventHandle* _electionFinishedEvent;
+ bool _dismissed;
+};
} // namespace
- void ReplicationCoordinatorImpl::_startElectSelfV1() {
- invariant(!_electionWinnerDeclarer);
- invariant(!_voteRequester);
- invariant(!_freshnessChecker);
+void ReplicationCoordinatorImpl::_startElectSelfV1() {
+ invariant(!_electionWinnerDeclarer);
+ invariant(!_voteRequester);
+ invariant(!_freshnessChecker);
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- switch (_rsConfigState) {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ switch (_rsConfigState) {
case kConfigSteady:
break;
case kConfigInitiating:
@@ -103,183 +104,175 @@ namespace {
_topCoord->processLoseElection();
return;
default:
- severe() << "Entered replica set election code while in illegal config state " <<
- int(_rsConfigState);
+ severe() << "Entered replica set election code while in illegal config state "
+ << int(_rsConfigState);
fassertFailed(28641);
- }
-
- const StatusWith<ReplicationExecutor::EventHandle> finishEvh = _replExecutor.makeEvent();
- if (finishEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(28642, finishEvh.getStatus());
- _electionFinishedEvent = finishEvh.getValue();
- LoseElectionGuardV1 lossGuard(_topCoord.get(),
- &_replExecutor,
- &_voteRequester,
- &_electionWinnerDeclarer,
- &_electionFinishedEvent);
-
-
- invariant(_rsConfig.getMemberAt(_selfIndex).isElectable());
- OpTime lastOpTimeApplied(_getMyLastOptime_inlock());
-
- if (lastOpTimeApplied == OpTime()) {
- log() << "not trying to elect self, "
- "do not yet have a complete set of data from any point in time";
- return;
- }
-
- log() << "conducting a dry run election to see if we could be elected";
- _voteRequester.reset(new VoteRequester);
-
- // This is necessary because the voteRequester may call directly into winning an
- // election, if there are no other MaybeUp nodes. Winning an election attempts to lock
- // _mutex again.
- lk.unlock();
-
- long long term = _topCoord->getTerm();
- StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _voteRequester->start(
- &_replExecutor,
- _rsConfig,
- _rsConfig.getMemberAt(_selfIndex).getId(),
- _topCoord->getTerm(),
- true, // dry run
- getMyLastOptime(),
- stdx::bind(&ReplicationCoordinatorImpl::_onDryRunComplete, this, term));
- if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(28685, nextPhaseEvh.getStatus());
- lossGuard.dismiss();
}
- void ReplicationCoordinatorImpl::_onDryRunComplete(long long originalTerm) {
- invariant(_voteRequester);
- invariant(!_electionWinnerDeclarer);
- LoseElectionGuardV1 lossGuard(_topCoord.get(),
- &_replExecutor,
- &_voteRequester,
- &_electionWinnerDeclarer,
- &_electionFinishedEvent);
-
- if (_topCoord->getTerm() != originalTerm) {
- log() << "not running for primary, we have been superceded already";
- return;
- }
-
- const VoteRequester::VoteRequestResult endResult = _voteRequester->getResult();
-
- if (endResult == VoteRequester::InsufficientVotes) {
- log() << "not running for primary, we received insufficient votes";
- return;
- }
- else if (endResult == VoteRequester::StaleTerm) {
- log() << "not running for primary, we have been superceded already";
- return;
- }
- else if (endResult != VoteRequester::SuccessfullyElected) {
- log() << "not running for primary, we received an unexpected problem";
- return;
- }
-
- log() << "dry election run succeeded, running for election";
- _topCoord->incrementTerm();
- // Secure our vote for ourself first
- _topCoord->voteForMyselfV1();
-
- _voteRequester.reset(new VoteRequester);
-
- StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _voteRequester->start(
- &_replExecutor,
- _rsConfig,
- _rsConfig.getMemberAt(_selfIndex).getId(),
- _topCoord->getTerm(),
- false,
- getMyLastOptime(),
- stdx::bind(&ReplicationCoordinatorImpl::_onVoteRequestComplete,
- this,
- originalTerm + 1));
- if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(28643, nextPhaseEvh.getStatus());
- lossGuard.dismiss();
+ const StatusWith<ReplicationExecutor::EventHandle> finishEvh = _replExecutor.makeEvent();
+ if (finishEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(28642, finishEvh.getStatus());
+ _electionFinishedEvent = finishEvh.getValue();
+ LoseElectionGuardV1 lossGuard(_topCoord.get(),
+ &_replExecutor,
+ &_voteRequester,
+ &_electionWinnerDeclarer,
+ &_electionFinishedEvent);
+
+
+ invariant(_rsConfig.getMemberAt(_selfIndex).isElectable());
+ OpTime lastOpTimeApplied(_getMyLastOptime_inlock());
+
+ if (lastOpTimeApplied == OpTime()) {
+ log() << "not trying to elect self, "
+ "do not yet have a complete set of data from any point in time";
+ return;
}
- void ReplicationCoordinatorImpl::_onVoteRequestComplete(long long originalTerm) {
- invariant(_voteRequester);
- invariant(!_electionWinnerDeclarer);
- LoseElectionGuardV1 lossGuard(_topCoord.get(),
- &_replExecutor,
- &_voteRequester,
- &_electionWinnerDeclarer,
- &_electionFinishedEvent);
-
- if (_topCoord->getTerm() != originalTerm) {
- log() << "not becoming primary, we have been superceded already";
- return;
- }
-
- const VoteRequester::VoteRequestResult endResult = _voteRequester->getResult();
+ log() << "conducting a dry run election to see if we could be elected";
+ _voteRequester.reset(new VoteRequester);
+
+ // This is necessary because the voteRequester may call directly into winning an
+ // election, if there are no other MaybeUp nodes. Winning an election attempts to lock
+ // _mutex again.
+ lk.unlock();
+
+ long long term = _topCoord->getTerm();
+ StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _voteRequester->start(
+ &_replExecutor,
+ _rsConfig,
+ _rsConfig.getMemberAt(_selfIndex).getId(),
+ _topCoord->getTerm(),
+ true, // dry run
+ getMyLastOptime(),
+ stdx::bind(&ReplicationCoordinatorImpl::_onDryRunComplete, this, term));
+ if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(28685, nextPhaseEvh.getStatus());
+ lossGuard.dismiss();
+}
+
+void ReplicationCoordinatorImpl::_onDryRunComplete(long long originalTerm) {
+ invariant(_voteRequester);
+ invariant(!_electionWinnerDeclarer);
+ LoseElectionGuardV1 lossGuard(_topCoord.get(),
+ &_replExecutor,
+ &_voteRequester,
+ &_electionWinnerDeclarer,
+ &_electionFinishedEvent);
+
+ if (_topCoord->getTerm() != originalTerm) {
+ log() << "not running for primary, we have been superceded already";
+ return;
+ }
- if (endResult == VoteRequester::InsufficientVotes) {
- log() << "not becoming primary, we received insufficient votes";
- return;
- }
- else if (endResult == VoteRequester::StaleTerm) {
- log() << "not becoming primary, we have been superceded already";
- return;
- }
- else if (endResult != VoteRequester::SuccessfullyElected) {
- log() << "not becoming primary, we received an unexpected problem";
- return;
- }
+ const VoteRequester::VoteRequestResult endResult = _voteRequester->getResult();
+
+ if (endResult == VoteRequester::InsufficientVotes) {
+ log() << "not running for primary, we received insufficient votes";
+ return;
+ } else if (endResult == VoteRequester::StaleTerm) {
+ log() << "not running for primary, we have been superceded already";
+ return;
+ } else if (endResult != VoteRequester::SuccessfullyElected) {
+ log() << "not running for primary, we received an unexpected problem";
+ return;
+ }
- log() << "election succeeded, assuming primary role";
- _performPostMemberStateUpdateAction(kActionWinElection);
-
- _electionWinnerDeclarer.reset(new ElectionWinnerDeclarer);
- StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _electionWinnerDeclarer->start(
- &_replExecutor,
- _rsConfig.getReplSetName(),
- _rsConfig.getMemberAt(_selfIndex).getId(),
- _topCoord->getTerm(),
- _topCoord->getMaybeUpHostAndPorts(),
- stdx::bind(&ReplicationCoordinatorImpl::_onElectionWinnerDeclarerComplete, this));
- if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(28644, nextPhaseEvh.getStatus());
- lossGuard.dismiss();
+ log() << "dry election run succeeded, running for election";
+ _topCoord->incrementTerm();
+ // Secure our vote for ourself first
+ _topCoord->voteForMyselfV1();
+
+ _voteRequester.reset(new VoteRequester);
+
+ StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _voteRequester->start(
+ &_replExecutor,
+ _rsConfig,
+ _rsConfig.getMemberAt(_selfIndex).getId(),
+ _topCoord->getTerm(),
+ false,
+ getMyLastOptime(),
+ stdx::bind(&ReplicationCoordinatorImpl::_onVoteRequestComplete, this, originalTerm + 1));
+ if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(28643, nextPhaseEvh.getStatus());
+ lossGuard.dismiss();
+}
+
+void ReplicationCoordinatorImpl::_onVoteRequestComplete(long long originalTerm) {
+ invariant(_voteRequester);
+ invariant(!_electionWinnerDeclarer);
+ LoseElectionGuardV1 lossGuard(_topCoord.get(),
+ &_replExecutor,
+ &_voteRequester,
+ &_electionWinnerDeclarer,
+ &_electionFinishedEvent);
+
+ if (_topCoord->getTerm() != originalTerm) {
+ log() << "not becoming primary, we have been superceded already";
+ return;
}
- void ReplicationCoordinatorImpl::_onElectionWinnerDeclarerComplete() {
- LoseElectionGuardV1 lossGuard(_topCoord.get(),
- &_replExecutor,
- &_voteRequester,
- &_electionWinnerDeclarer,
- &_electionFinishedEvent);
-
- invariant(_voteRequester);
- invariant(_electionWinnerDeclarer);
-
- const Status endResult = _electionWinnerDeclarer->getStatus();
-
- if (!endResult.isOK()) {
- log() << "stepping down from primary, because: " << endResult;
- _topCoord->prepareForStepDown();
- _replExecutor.scheduleWorkWithGlobalExclusiveLock(
- stdx::bind(&ReplicationCoordinatorImpl::_stepDownFinish,
- this,
- stdx::placeholders::_1));
- }
+ const VoteRequester::VoteRequestResult endResult = _voteRequester->getResult();
+
+ if (endResult == VoteRequester::InsufficientVotes) {
+ log() << "not becoming primary, we received insufficient votes";
+ return;
+ } else if (endResult == VoteRequester::StaleTerm) {
+ log() << "not becoming primary, we have been superceded already";
+ return;
+ } else if (endResult != VoteRequester::SuccessfullyElected) {
+ log() << "not becoming primary, we received an unexpected problem";
+ return;
+ }
- lossGuard.dismiss();
- _voteRequester.reset(nullptr);
- _electionWinnerDeclarer.reset(nullptr);
- _replExecutor.signalEvent(_electionFinishedEvent);
+ log() << "election succeeded, assuming primary role";
+ _performPostMemberStateUpdateAction(kActionWinElection);
+
+ _electionWinnerDeclarer.reset(new ElectionWinnerDeclarer);
+ StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _electionWinnerDeclarer->start(
+ &_replExecutor,
+ _rsConfig.getReplSetName(),
+ _rsConfig.getMemberAt(_selfIndex).getId(),
+ _topCoord->getTerm(),
+ _topCoord->getMaybeUpHostAndPorts(),
+ stdx::bind(&ReplicationCoordinatorImpl::_onElectionWinnerDeclarerComplete, this));
+ if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(28644, nextPhaseEvh.getStatus());
+ lossGuard.dismiss();
+}
+
+void ReplicationCoordinatorImpl::_onElectionWinnerDeclarerComplete() {
+ LoseElectionGuardV1 lossGuard(_topCoord.get(),
+ &_replExecutor,
+ &_voteRequester,
+ &_electionWinnerDeclarer,
+ &_electionFinishedEvent);
+
+ invariant(_voteRequester);
+ invariant(_electionWinnerDeclarer);
+
+ const Status endResult = _electionWinnerDeclarer->getStatus();
+
+ if (!endResult.isOK()) {
+ log() << "stepping down from primary, because: " << endResult;
+ _topCoord->prepareForStepDown();
+ _replExecutor.scheduleWorkWithGlobalExclusiveLock(
+ stdx::bind(&ReplicationCoordinatorImpl::_stepDownFinish, this, stdx::placeholders::_1));
}
+ lossGuard.dismiss();
+ _voteRequester.reset(nullptr);
+ _electionWinnerDeclarer.reset(nullptr);
+ _replExecutor.signalEvent(_electionFinishedEvent);
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
index c3c1deddf88..0e3300bf079 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
@@ -48,575 +48,590 @@ namespace mongo {
namespace repl {
namespace {
- using executor::NetworkInterfaceMock;
-
- class ReplCoordElectV1Test : public ReplCoordTest {
- protected:
- void simulateEnoughHeartbeatsForElectability();
- void simulateSuccessfulDryRun();
- };
-
- void ReplCoordElectV1Test::simulateEnoughHeartbeatsForElectability() {
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- for (int i = 0; i < rsConfig.getNumMembers() - 1; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgsV1 hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(rsConfig.getReplSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(rsConfig.getConfigVersion());
- BSONObjBuilder respObj;
- net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON(true)));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
+using executor::NetworkInterfaceMock;
+
+class ReplCoordElectV1Test : public ReplCoordTest {
+protected:
+ void simulateEnoughHeartbeatsForElectability();
+ void simulateSuccessfulDryRun();
+};
+
+void ReplCoordElectV1Test::simulateEnoughHeartbeatsForElectability() {
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ for (int i = 0; i < rsConfig.getNumMembers() - 1; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgsV1 hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(rsConfig.getReplSetName());
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(rsConfig.getConfigVersion());
+ BSONObjBuilder respObj;
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON(true)));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
- net->exitNetwork();
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+}
- void ReplCoordElectV1Test::simulateSuccessfulDryRun() {
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- for (int i = 0; i < rsConfig.getNumMembers() / 2; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- if (request.cmdObj.firstElement().fieldNameStringData() == "replSetRequestVotes") {
- net->scheduleResponse(noi, net->now(), makeResponseStatus(
- BSON("ok" << 1 <<
- "reason" << "" <<
- "term" << request.cmdObj["term"].Long() <<
- "voteGranted" << true)));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
+void ReplCoordElectV1Test::simulateSuccessfulDryRun() {
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ for (int i = 0; i < rsConfig.getNumMembers() / 2; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ if (request.cmdObj.firstElement().fieldNameStringData() == "replSetRequestVotes") {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "reason"
+ << ""
+ << "term" << request.cmdObj["term"].Long()
+ << "voteGranted" << true)));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
- net->exitNetwork();
- }
-
- TEST_F(ReplCoordElectV1Test, ElectTooSoon) {
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- // Election never starts because we haven't set a lastOpTimeApplied value yet, via a
- // heartbeat.
- startCapturingLogMessages();
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345")) <<
- "protocolVersion" << 1),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- simulateEnoughHeartbeatsForElectability();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("node has no applied oplog entries"));
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+}
- TEST_F(ReplCoordElectV1Test, ElectTwoNodesWithOneZeroVoter) {
- OperationContextReplMock txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345" <<
- "votes" << 0 << "hidden" << true <<
- "priority" << 0)) <<
- "protocolVersion" << 1),
- HostAndPort("node1", 12345));
+TEST_F(ReplCoordElectV1Test, ElectTooSoon) {
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ // Election never starts because we haven't set a lastOpTimeApplied value yet, via a
+ // heartbeat.
+ startCapturingLogMessages();
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")) << "protocolVersion"
+ << 1),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ simulateEnoughHeartbeatsForElectability();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("node has no applied oplog entries"));
+}
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+TEST_F(ReplCoordElectV1Test, ElectTwoNodesWithOneZeroVoter) {
+ OperationContextReplMock txn;
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"
+ << "votes" << 0 << "hidden" << true << "priority" << 0))
+ << "protocolVersion" << 1),
+ HostAndPort("node1", 12345));
+
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+
+ ASSERT(getReplCoord()->getMemberState().secondary())
+ << getReplCoord()->getMemberState().toString();
+
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0));
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi, net->now(), ResponseStatus(ErrorCodes::OperationFailed, "timeout"));
+ net->runReadyNetworkOperations();
+ net->exitNetwork();
+
+ ASSERT(getReplCoord()->getMemberState().primary())
+ << getReplCoord()->getMemberState().toString();
+ ASSERT(getReplCoord()->isWaitingForApplierToDrain());
+
+ // Since we're still in drain mode, expect that we report ismaster: false, issecondary:true.
+ IsMasterResponse imResponse;
+ getReplCoord()->fillIsMasterForReplSet(&imResponse);
+ ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+ getReplCoord()->signalDrainComplete(&txn);
+ getReplCoord()->fillIsMasterForReplSet(&imResponse);
+ ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+}
- ASSERT(getReplCoord()->getMemberState().secondary()) <<
- getReplCoord()->getMemberState().toString();
+TEST_F(ReplCoordElectV1Test, Elect1NodeSuccess) {
+ OperationContextReplMock txn;
+ startCapturingLogMessages();
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")) << "protocolVersion" << 1),
+ HostAndPort("node1", 12345));
+
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+
+ ASSERT(getReplCoord()->getMemberState().primary())
+ << getReplCoord()->getMemberState().toString();
+ ASSERT(getReplCoord()->isWaitingForApplierToDrain());
+
+ // Since we're still in drain mode, expect that we report ismaster: false, issecondary:true.
+ IsMasterResponse imResponse;
+ getReplCoord()->fillIsMasterForReplSet(&imResponse);
+ ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+ getReplCoord()->signalDrainComplete(&txn);
+ getReplCoord()->fillIsMasterForReplSet(&imResponse);
+ ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+}
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(10,0), 0));
+TEST_F(ReplCoordElectV1Test, ElectManyNodesSuccess) {
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")) << "protocolVersion"
+ << 1);
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ OperationContextNoop txn;
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 1), 0));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ startCapturingLogMessages();
+ simulateSuccessfulV1Election();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("election succeeded"));
+}
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
+TEST_F(ReplCoordElectV1Test, ElectNotEnoughVotesInDryRun) {
+ startCapturingLogMessages();
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")) << "protocolVersion"
+ << 1);
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplicaSetConfig config = assertMakeRSConfig(configObj);
+
+ OperationContextNoop txn;
+ OpTime time1(Timestamp(100, 1), 0);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ simulateEnoughHeartbeatsForElectability();
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ while (net->hasReadyRequests()) {
const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now(),
- ResponseStatus(ErrorCodes::OperationFailed, "timeout"));
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ if (request.cmdObj.firstElement().fieldNameStringData() != "replSetRequestVotes") {
+ net->blackHole(noi);
+ } else {
+ net->scheduleResponse(noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "term" << 0 << "voteGranted"
+ << false << "reason"
+ << "don't like him much")));
+ }
net->runReadyNetworkOperations();
- net->exitNetwork();
-
- ASSERT(getReplCoord()->getMemberState().primary()) <<
- getReplCoord()->getMemberState().toString();
- ASSERT(getReplCoord()->isWaitingForApplierToDrain());
-
- // Since we're still in drain mode, expect that we report ismaster: false, issecondary:true.
- IsMasterResponse imResponse;
- getReplCoord()->fillIsMasterForReplSet(&imResponse);
- ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- getReplCoord()->signalDrainComplete(&txn);
- getReplCoord()->fillIsMasterForReplSet(&imResponse);
- ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- }
-
- TEST_F(ReplCoordElectV1Test, Elect1NodeSuccess) {
- OperationContextReplMock txn;
- startCapturingLogMessages();
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")) <<
- "protocolVersion" << 1),
- HostAndPort("node1", 12345));
-
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
-
- ASSERT(getReplCoord()->getMemberState().primary()) <<
- getReplCoord()->getMemberState().toString();
- ASSERT(getReplCoord()->isWaitingForApplierToDrain());
-
- // Since we're still in drain mode, expect that we report ismaster: false, issecondary:true.
- IsMasterResponse imResponse;
- getReplCoord()->fillIsMasterForReplSet(&imResponse);
- ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- getReplCoord()->signalDrainComplete(&txn);
- getReplCoord()->fillIsMasterForReplSet(&imResponse);
- ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- }
-
- TEST_F(ReplCoordElectV1Test, ElectManyNodesSuccess) {
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ) <<
- "protocolVersion" << 1);
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- OperationContextNoop txn;
- getReplCoord()->setMyLastOptime(OpTime(Timestamp (100, 1), 0));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- startCapturingLogMessages();
- simulateSuccessfulV1Election();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("election succeeded"));
}
+ net->exitNetwork();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(
+ 1, countLogLinesContaining("not running for primary, we received insufficient votes"));
+}
- TEST_F(ReplCoordElectV1Test, ElectNotEnoughVotesInDryRun) {
- startCapturingLogMessages();
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ) <<
- "protocolVersion" << 1);
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- ReplicaSetConfig config = assertMakeRSConfig(configObj);
-
- OperationContextNoop txn;
- OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- simulateEnoughHeartbeatsForElectability();
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- while (net->hasReadyRequests()) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- if (request.cmdObj.firstElement().fieldNameStringData() != "replSetRequestVotes") {
- net->blackHole(noi);
- }
- else {
- net->scheduleResponse(
- noi,
- net->now(),
- makeResponseStatus(BSON("ok" << 1 <<
- "term" << 0 <<
- "voteGranted" << false <<
- "reason" << "don't like him much")));
- }
- net->runReadyNetworkOperations();
+TEST_F(ReplCoordElectV1Test, ElectStaleTermInDryRun) {
+ startCapturingLogMessages();
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")) << "protocolVersion"
+ << 1);
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplicaSetConfig config = assertMakeRSConfig(configObj);
+
+ OperationContextNoop txn;
+ OpTime time1(Timestamp(100, 1), 0);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ simulateEnoughHeartbeatsForElectability();
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ while (net->hasReadyRequests()) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ if (request.cmdObj.firstElement().fieldNameStringData() != "replSetRequestVotes") {
+ net->blackHole(noi);
+ } else {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "term" << request.cmdObj["term"].Long() + 1
+ << "voteGranted" << false << "reason"
+ << "quit living in the past")));
}
- net->exitNetwork();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1,
- countLogLinesContaining("not running for primary, we received insufficient votes"));
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(
+ 1, countLogLinesContaining("not running for primary, we have been superceded already"));
+}
- TEST_F(ReplCoordElectV1Test, ElectStaleTermInDryRun) {
- startCapturingLogMessages();
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ) <<
- "protocolVersion" << 1);
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- ReplicaSetConfig config = assertMakeRSConfig(configObj);
-
- OperationContextNoop txn;
- OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- simulateEnoughHeartbeatsForElectability();
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- while (net->hasReadyRequests()) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- if (request.cmdObj.firstElement().fieldNameStringData() != "replSetRequestVotes") {
- net->blackHole(noi);
- }
- else {
- net->scheduleResponse(
- noi,
- net->now(),
- makeResponseStatus(BSON("ok" << 1 <<
- "term" << request.cmdObj["term"].Long() + 1 <<
- "voteGranted" << false <<
- "reason" << "quit living in the past")));
- }
- net->runReadyNetworkOperations();
+TEST_F(ReplCoordElectV1Test, ElectionDuringHBReconfigFails) {
+ // start up, receive reconfig via heartbeat while at the same time, become candidate.
+ // candidate state should be cleared.
+ OperationContextNoop txn;
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345") << BSON("_id" << 3 << "host"
+ << "node3:12345")
+ << BSON("_id" << 4 << "host"
+ << "node4:12345") << BSON("_id" << 5 << "host"
+ << "node5:12345"))
+ << "protocolVersion" << 1),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+
+ // set hbreconfig to hang while in progress
+ getExternalState()->setStoreLocalConfigDocumentToHang(true);
+
+ // hb reconfig
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ ReplSetHeartbeatResponse hbResp2;
+ ReplicaSetConfig config;
+ config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")) << "protocolVersion"
+ << 1));
+ hbResp2.setConfig(config);
+ hbResp2.setConfigVersion(3);
+ hbResp2.setSetName("mySet");
+ hbResp2.setState(MemberState::RS_SECONDARY);
+ net->runUntil(net->now() + Seconds(10)); // run until we've sent a heartbeat request
+ const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
+ net->scheduleResponse(noi2, net->now(), makeResponseStatus(hbResp2.toBSON(true)));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+
+ // prepare candidacy
+ BSONObjBuilder result;
+ ReplicationCoordinator::ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = config.toBSON();
+ ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(2));
+ startCapturingLogMessages();
+
+ // receive sufficient heartbeats to trigger an election
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ net->enterNetwork();
+ for (int i = 0; i < 2; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgsV1 hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(rsConfig.getReplSetName());
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(rsConfig.getConfigVersion());
+ BSONObjBuilder respObj;
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON(true)));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
- net->exitNetwork();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1,
- countLogLinesContaining(
- "not running for primary, we have been superceded already"));
- }
-
- TEST_F(ReplCoordElectV1Test, ElectionDuringHBReconfigFails) {
- // start up, receive reconfig via heartbeat while at the same time, become candidate.
- // candidate state should be cleared.
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") <<
- BSON("_id" << 3 << "host" << "node3:12345") <<
- BSON("_id" << 4 << "host" << "node4:12345") <<
- BSON("_id" << 5 << "host" << "node5:12345")) <<
- "protocolVersion" << 1),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100,0), 0));
-
- // set hbreconfig to hang while in progress
- getExternalState()->setStoreLocalConfigDocumentToHang(true);
-
- // hb reconfig
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- ReplSetHeartbeatResponse hbResp2;
- ReplicaSetConfig config;
- config.initialize(BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345")) <<
- "protocolVersion" << 1));
- hbResp2.setConfig(config);
- hbResp2.setConfigVersion(3);
- hbResp2.setSetName("mySet");
- hbResp2.setState(MemberState::RS_SECONDARY);
- net->runUntil(net->now() + Seconds(10)); // run until we've sent a heartbeat request
- const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
- net->scheduleResponse(noi2, net->now(), makeResponseStatus(hbResp2.toBSON(true)));
net->runReadyNetworkOperations();
- getNet()->exitNetwork();
-
- // prepare candidacy
- BSONObjBuilder result;
- ReplicationCoordinator::ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = config.toBSON();
- ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
-
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(2));
- startCapturingLogMessages();
-
- // receive sufficient heartbeats to trigger an election
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- net->enterNetwork();
- for (int i = 0; i < 2; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgsV1 hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(rsConfig.getReplSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(rsConfig.getConfigVersion());
- BSONObjBuilder respObj;
- net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON(true)));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
- }
- net->exitNetwork();
-
- stopCapturingLogMessages();
- // ensure node does not stand for election
- ASSERT_EQUALS(1,
- countLogLinesContaining("Not standing for election; processing "
- "a configuration change"));
- getExternalState()->setStoreLocalConfigDocumentToHang(false);
}
+ net->exitNetwork();
+
+ stopCapturingLogMessages();
+ // ensure node does not stand for election
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "Not standing for election; processing "
+ "a configuration change"));
+ getExternalState()->setStoreLocalConfigDocumentToHang(false);
+}
- TEST_F(ReplCoordElectV1Test, ElectionSucceedsButDeclaringWinnerFails) {
- startCapturingLogMessages();
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ) <<
- "protocolVersion" << 1);
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- ReplicaSetConfig config = assertMakeRSConfig(configObj);
-
- OperationContextNoop txn;
- OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- simulateEnoughHeartbeatsForElectability();
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- while (net->hasReadyRequests()) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- if (request.cmdObj.firstElement().fieldNameStringData() == "replSetRequestVotes") {
- net->scheduleResponse(
- noi,
- net->now(),
- makeResponseStatus(BSON("ok" << 1 <<
- "term" << (request.cmdObj["dryRun"].Bool() ?
- request.cmdObj["term"].Long() - 1 :
- request.cmdObj["term"].Long()) <<
- "voteGranted" << true)));
- }
- else if (request.cmdObj.firstElement().fieldNameStringData() ==
- "replSetDeclareElectionWinner") {
- net->scheduleResponse(noi, net->now(), makeResponseStatus(
- BSON("ok" << 0 <<
- "code" << ErrorCodes::BadValue <<
- "errmsg" << "term has already passed" <<
- "term" << request.cmdObj["term"].Long() + 1)));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
+TEST_F(ReplCoordElectV1Test, ElectionSucceedsButDeclaringWinnerFails) {
+ startCapturingLogMessages();
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")) << "protocolVersion"
+ << 1);
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplicaSetConfig config = assertMakeRSConfig(configObj);
+
+ OperationContextNoop txn;
+ OpTime time1(Timestamp(100, 1), 0);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ simulateEnoughHeartbeatsForElectability();
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ while (net->hasReadyRequests()) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ if (request.cmdObj.firstElement().fieldNameStringData() == "replSetRequestVotes") {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "term"
+ << (request.cmdObj["dryRun"].Bool()
+ ? request.cmdObj["term"].Long() - 1
+ : request.cmdObj["term"].Long())
+ << "voteGranted" << true)));
+ } else if (request.cmdObj.firstElement().fieldNameStringData() ==
+ "replSetDeclareElectionWinner") {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 0 << "code" << ErrorCodes::BadValue << "errmsg"
+ << "term has already passed"
+ << "term" << request.cmdObj["term"].Long() + 1)));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
- net->exitNetwork();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("stepping down from primary, because:"));
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("stepping down from primary, because:"));
+}
- TEST_F(ReplCoordElectV1Test, ElectNotEnoughVotes) {
- startCapturingLogMessages();
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ) <<
- "protocolVersion" << 1);
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- ReplicaSetConfig config = assertMakeRSConfig(configObj);
-
- OperationContextNoop txn;
- OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- simulateEnoughHeartbeatsForElectability();
- simulateSuccessfulDryRun();
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- while (net->hasReadyRequests()) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- if (request.cmdObj.firstElement().fieldNameStringData() != "replSetRequestVotes") {
- net->blackHole(noi);
- }
- else {
- net->scheduleResponse(
- noi,
- net->now(),
- makeResponseStatus(BSON("ok" << 1 <<
- "term" << 1 <<
- "voteGranted" << false <<
- "reason" << "don't like him much")));
- }
- net->runReadyNetworkOperations();
+TEST_F(ReplCoordElectV1Test, ElectNotEnoughVotes) {
+ startCapturingLogMessages();
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")) << "protocolVersion"
+ << 1);
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplicaSetConfig config = assertMakeRSConfig(configObj);
+
+ OperationContextNoop txn;
+ OpTime time1(Timestamp(100, 1), 0);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ simulateEnoughHeartbeatsForElectability();
+ simulateSuccessfulDryRun();
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ while (net->hasReadyRequests()) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ if (request.cmdObj.firstElement().fieldNameStringData() != "replSetRequestVotes") {
+ net->blackHole(noi);
+ } else {
+ net->scheduleResponse(noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "term" << 1 << "voteGranted"
+ << false << "reason"
+ << "don't like him much")));
}
- net->exitNetwork();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1,
- countLogLinesContaining("not becoming primary, we received insufficient votes"));
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1,
+ countLogLinesContaining("not becoming primary, we received insufficient votes"));
+}
- TEST_F(ReplCoordElectV1Test, ElectStaleTerm) {
- startCapturingLogMessages();
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ) <<
- "protocolVersion" << 1);
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- ReplicaSetConfig config = assertMakeRSConfig(configObj);
-
- OperationContextNoop txn;
- OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- simulateEnoughHeartbeatsForElectability();
- simulateSuccessfulDryRun();
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- while (net->hasReadyRequests()) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- if (request.cmdObj.firstElement().fieldNameStringData() != "replSetRequestVotes") {
- net->blackHole(noi);
- }
- else {
- net->scheduleResponse(
- noi,
- net->now(),
- makeResponseStatus(BSON("ok" << 1 <<
- "term" << request.cmdObj["term"].Long() + 1 <<
- "voteGranted" << false <<
- "reason" << "quit living in the past")));
- }
- net->runReadyNetworkOperations();
+TEST_F(ReplCoordElectV1Test, ElectStaleTerm) {
+ startCapturingLogMessages();
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")) << "protocolVersion"
+ << 1);
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplicaSetConfig config = assertMakeRSConfig(configObj);
+
+ OperationContextNoop txn;
+ OpTime time1(Timestamp(100, 1), 0);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ simulateEnoughHeartbeatsForElectability();
+ simulateSuccessfulDryRun();
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ while (net->hasReadyRequests()) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ if (request.cmdObj.firstElement().fieldNameStringData() != "replSetRequestVotes") {
+ net->blackHole(noi);
+ } else {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "term" << request.cmdObj["term"].Long() + 1
+ << "voteGranted" << false << "reason"
+ << "quit living in the past")));
}
- net->exitNetwork();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1,
- countLogLinesContaining("not becoming primary, we have been superceded already"));
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1,
+ countLogLinesContaining("not becoming primary, we have been superceded already"));
+}
- TEST_F(ReplCoordElectV1Test, ElectTermChangeDuringDryRun) {
- startCapturingLogMessages();
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ) <<
- "protocolVersion" << 1);
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- ReplicaSetConfig config = assertMakeRSConfig(configObj);
-
- OperationContextNoop txn;
- OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- simulateEnoughHeartbeatsForElectability();
- // update to a future term before dry run completes
- getReplCoord()->updateTerm(1000);
- simulateSuccessfulDryRun();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1,
- countLogLinesContaining(
- "not running for primary, we have been superceded already"));
- }
+TEST_F(ReplCoordElectV1Test, ElectTermChangeDuringDryRun) {
+ startCapturingLogMessages();
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")) << "protocolVersion"
+ << 1);
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplicaSetConfig config = assertMakeRSConfig(configObj);
+
+ OperationContextNoop txn;
+ OpTime time1(Timestamp(100, 1), 0);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ simulateEnoughHeartbeatsForElectability();
+ // update to a future term before dry run completes
+ getReplCoord()->updateTerm(1000);
+ simulateSuccessfulDryRun();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(
+ 1, countLogLinesContaining("not running for primary, we have been superceded already"));
+}
- TEST_F(ReplCoordElectV1Test, ElectTermChangeDuringActualElection) {
- startCapturingLogMessages();
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ) <<
- "protocolVersion" << 1);
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- ReplicaSetConfig config = assertMakeRSConfig(configObj);
-
- OperationContextNoop txn;
- OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- simulateEnoughHeartbeatsForElectability();
- simulateSuccessfulDryRun();
- // update to a future term before the election completes
- getReplCoord()->updateTerm(1000);
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- while (net->hasReadyRequests()) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- if (request.cmdObj.firstElement().fieldNameStringData() != "replSetRequestVotes") {
- net->blackHole(noi);
- }
- else {
- net->scheduleResponse(
- noi,
- net->now(),
- makeResponseStatus(BSON("ok" << 1 <<
- "term" << request.cmdObj["term"].Long() <<
- "voteGranted" << true <<
- "reason" << "")));
- }
- net->runReadyNetworkOperations();
+TEST_F(ReplCoordElectV1Test, ElectTermChangeDuringActualElection) {
+ startCapturingLogMessages();
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")) << "protocolVersion"
+ << 1);
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplicaSetConfig config = assertMakeRSConfig(configObj);
+
+ OperationContextNoop txn;
+ OpTime time1(Timestamp(100, 1), 0);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ simulateEnoughHeartbeatsForElectability();
+ simulateSuccessfulDryRun();
+ // update to a future term before the election completes
+ getReplCoord()->updateTerm(1000);
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ while (net->hasReadyRequests()) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ if (request.cmdObj.firstElement().fieldNameStringData() != "replSetRequestVotes") {
+ net->blackHole(noi);
+ } else {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "term" << request.cmdObj["term"].Long()
+ << "voteGranted" << true << "reason"
+ << "")));
}
- net->exitNetwork();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1,
- countLogLinesContaining("not becoming primary, we have been superceded already"));
+ net->runReadyNetworkOperations();
}
-
+ net->exitNetwork();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1,
+ countLogLinesContaining("not becoming primary, we have been superceded already"));
+}
}
}
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index 4afdb8594e8..61ae5ddecdc 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -57,155 +57,131 @@ namespace repl {
namespace {
- typedef ReplicationExecutor::CallbackHandle CBHandle;
+typedef ReplicationExecutor::CallbackHandle CBHandle;
-} //namespace
-
- void ReplicationCoordinatorImpl::_doMemberHeartbeat(ReplicationExecutor::CallbackArgs cbData,
- const HostAndPort& target,
- int targetIndex) {
-
- _untrackHeartbeatHandle(cbData.myHandle);
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
-
- const Date_t now = _replExecutor.now();
- BSONObj heartbeatObj;
- Milliseconds timeout(0);
- if (isV1ElectionProtocol()) {
- const std::pair<ReplSetHeartbeatArgsV1, Milliseconds> hbRequest =
- _topCoord->prepareHeartbeatRequestV1(
- now,
- _settings.ourSetName(),
- target);
- heartbeatObj = hbRequest.first.toBSON();
- timeout = hbRequest.second;
- }
- else {
- const std::pair<ReplSetHeartbeatArgs, Milliseconds> hbRequest =
- _topCoord->prepareHeartbeatRequest(
- now,
- _settings.ourSetName(),
- target);
- heartbeatObj = hbRequest.first.toBSON();
- timeout = hbRequest.second;
- }
-
- const RemoteCommandRequest request(target, "admin", heartbeatObj, timeout);
- const ReplicationExecutor::RemoteCommandCallbackFn callback = stdx::bind(
- &ReplicationCoordinatorImpl::_handleHeartbeatResponse,
- this,
- stdx::placeholders::_1,
- targetIndex);
+} // namespace
- _trackHeartbeatHandle(_replExecutor.scheduleRemoteCommand(request, callback));
+void ReplicationCoordinatorImpl::_doMemberHeartbeat(ReplicationExecutor::CallbackArgs cbData,
+ const HostAndPort& target,
+ int targetIndex) {
+ _untrackHeartbeatHandle(cbData.myHandle);
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
}
- void ReplicationCoordinatorImpl::_scheduleHeartbeatToTarget(
- const HostAndPort& target,
- int targetIndex,
- Date_t when) {
-
- LOG(2) << "Scheduling heartbeat to " << target << " at " << dateToISOStringUTC(when);
- _trackHeartbeatHandle(
- _replExecutor.scheduleWorkAt(
- when,
- stdx::bind(&ReplicationCoordinatorImpl::_doMemberHeartbeat,
- this,
- stdx::placeholders::_1,
- target,
- targetIndex)));
+ const Date_t now = _replExecutor.now();
+ BSONObj heartbeatObj;
+ Milliseconds timeout(0);
+ if (isV1ElectionProtocol()) {
+ const std::pair<ReplSetHeartbeatArgsV1, Milliseconds> hbRequest =
+ _topCoord->prepareHeartbeatRequestV1(now, _settings.ourSetName(), target);
+ heartbeatObj = hbRequest.first.toBSON();
+ timeout = hbRequest.second;
+ } else {
+ const std::pair<ReplSetHeartbeatArgs, Milliseconds> hbRequest =
+ _topCoord->prepareHeartbeatRequest(now, _settings.ourSetName(), target);
+ heartbeatObj = hbRequest.first.toBSON();
+ timeout = hbRequest.second;
}
- void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
- const ReplicationExecutor::RemoteCommandCallbackArgs& cbData, int targetIndex) {
+ const RemoteCommandRequest request(target, "admin", heartbeatObj, timeout);
+ const ReplicationExecutor::RemoteCommandCallbackFn callback =
+ stdx::bind(&ReplicationCoordinatorImpl::_handleHeartbeatResponse,
+ this,
+ stdx::placeholders::_1,
+ targetIndex);
+
+ _trackHeartbeatHandle(_replExecutor.scheduleRemoteCommand(request, callback));
+}
+
+void ReplicationCoordinatorImpl::_scheduleHeartbeatToTarget(const HostAndPort& target,
+ int targetIndex,
+ Date_t when) {
+ LOG(2) << "Scheduling heartbeat to " << target << " at " << dateToISOStringUTC(when);
+ _trackHeartbeatHandle(
+ _replExecutor.scheduleWorkAt(when,
+ stdx::bind(&ReplicationCoordinatorImpl::_doMemberHeartbeat,
+ this,
+ stdx::placeholders::_1,
+ target,
+ targetIndex)));
+}
+
+void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
+ const ReplicationExecutor::RemoteCommandCallbackArgs& cbData, int targetIndex) {
+ // remove handle from queued heartbeats
+ _untrackHeartbeatHandle(cbData.myHandle);
+
+ // Parse and validate the response. At the end of this step, if responseStatus is OK then
+ // hbResponse is valid.
+ Status responseStatus = cbData.response.getStatus();
+ if (responseStatus == ErrorCodes::CallbackCanceled) {
+ return;
+ }
- // remove handle from queued heartbeats
- _untrackHeartbeatHandle(cbData.myHandle);
+ const HostAndPort& target = cbData.request.target;
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj resp;
+ if (responseStatus.isOK()) {
+ resp = cbData.response.getValue().data;
+ responseStatus = hbResponse.initialize(resp, _topCoord->getTerm());
+ }
+ const Date_t now = _replExecutor.now();
+ const OpTime lastApplied = getMyLastOptime(); // Locks and unlocks _mutex.
+ Milliseconds networkTime(0);
+ StatusWith<ReplSetHeartbeatResponse> hbStatusResponse(hbResponse);
- // Parse and validate the response. At the end of this step, if responseStatus is OK then
- // hbResponse is valid.
- Status responseStatus = cbData.response.getStatus();
- if (responseStatus == ErrorCodes::CallbackCanceled) {
- return;
+ if (responseStatus.isOK()) {
+ networkTime = cbData.response.getValue().elapsedMillis;
+ _updateTerm_incallback(hbStatusResponse.getValue().getTerm(), nullptr);
+ } else {
+ log() << "Error in heartbeat request to " << target << "; " << responseStatus;
+ if (!resp.isEmpty()) {
+ LOG(3) << "heartbeat response: " << resp;
}
- const HostAndPort& target = cbData.request.target;
- ReplSetHeartbeatResponse hbResponse;
- BSONObj resp;
- if (responseStatus.isOK()) {
- resp = cbData.response.getValue().data;
- responseStatus = hbResponse.initialize(resp, _topCoord->getTerm());
- }
- const Date_t now = _replExecutor.now();
- const OpTime lastApplied = getMyLastOptime(); // Locks and unlocks _mutex.
- Milliseconds networkTime(0);
- StatusWith<ReplSetHeartbeatResponse> hbStatusResponse(hbResponse);
-
- if (responseStatus.isOK()) {
- networkTime = cbData.response.getValue().elapsedMillis;
- _updateTerm_incallback(hbStatusResponse.getValue().getTerm(), nullptr);
- }
- else {
- log() << "Error in heartbeat request to " << target << "; " << responseStatus;
- if (!resp.isEmpty()) {
- LOG(3) << "heartbeat response: " << resp;
- }
+ hbStatusResponse = StatusWith<ReplSetHeartbeatResponse>(responseStatus);
+ }
- hbStatusResponse = StatusWith<ReplSetHeartbeatResponse>(responseStatus);
- }
+ HeartbeatResponseAction action = _topCoord->processHeartbeatResponse(
+ now, networkTime, target, hbStatusResponse, lastApplied);
- HeartbeatResponseAction action =
- _topCoord->processHeartbeatResponse(
- now,
- networkTime,
- target,
- hbStatusResponse,
- lastApplied);
-
- if (action.getAction() == HeartbeatResponseAction::NoAction &&
- hbStatusResponse.isOK() &&
- hbStatusResponse.getValue().hasOpTime() &&
- targetIndex >= 0 &&
- hbStatusResponse.getValue().hasState() &&
- hbStatusResponse.getValue().getState() != MemberState::RS_PRIMARY) {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- if (hbStatusResponse.getValue().getConfigVersion() == _rsConfig.getConfigVersion()) {
- _updateOpTimeFromHeartbeat_inlock(targetIndex,
- hbStatusResponse.getValue().getOpTime());
- // TODO: Enable with Data Replicator
- //lk.unlock();
- //_dr.slavesHaveProgressed();
- }
+ if (action.getAction() == HeartbeatResponseAction::NoAction && hbStatusResponse.isOK() &&
+ hbStatusResponse.getValue().hasOpTime() && targetIndex >= 0 &&
+ hbStatusResponse.getValue().hasState() &&
+ hbStatusResponse.getValue().getState() != MemberState::RS_PRIMARY) {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ if (hbStatusResponse.getValue().getConfigVersion() == _rsConfig.getConfigVersion()) {
+ _updateOpTimeFromHeartbeat_inlock(targetIndex, hbStatusResponse.getValue().getOpTime());
+ // TODO: Enable with Data Replicator
+ // lk.unlock();
+ //_dr.slavesHaveProgressed();
}
+ }
- _signalStepDownWaiters();
+ _signalStepDownWaiters();
- _scheduleHeartbeatToTarget(
- target,
- targetIndex,
- std::max(now, action.getNextHeartbeatStartDate()));
+ _scheduleHeartbeatToTarget(
+ target, targetIndex, std::max(now, action.getNextHeartbeatStartDate()));
- _handleHeartbeatResponseAction(action, hbStatusResponse);
- }
+ _handleHeartbeatResponseAction(action, hbStatusResponse);
+}
- void ReplicationCoordinatorImpl::_updateOpTimeFromHeartbeat_inlock(int targetIndex,
- const OpTime& optime) {
- invariant(_selfIndex >= 0);
- invariant(targetIndex >= 0);
+void ReplicationCoordinatorImpl::_updateOpTimeFromHeartbeat_inlock(int targetIndex,
+ const OpTime& optime) {
+ invariant(_selfIndex >= 0);
+ invariant(targetIndex >= 0);
- SlaveInfo& slaveInfo = _slaveInfo[targetIndex];
- if (optime > slaveInfo.opTime) {
- _updateSlaveInfoOptime_inlock(&slaveInfo, optime);
- }
+ SlaveInfo& slaveInfo = _slaveInfo[targetIndex];
+ if (optime > slaveInfo.opTime) {
+ _updateSlaveInfoOptime_inlock(&slaveInfo, optime);
}
+}
- void ReplicationCoordinatorImpl::_handleHeartbeatResponseAction(
- const HeartbeatResponseAction& action,
- const StatusWith<ReplSetHeartbeatResponse>& responseStatus) {
-
- switch (action.getAction()) {
+void ReplicationCoordinatorImpl::_handleHeartbeatResponseAction(
+ const HeartbeatResponseAction& action,
+ const StatusWith<ReplSetHeartbeatResponse>& responseStatus) {
+ switch (action.getAction()) {
case HeartbeatResponseAction::NoAction:
// Update the cached member state if different than the current topology member state
if (_memberState != _topCoord->getMemberState()) {
@@ -223,8 +199,7 @@ namespace {
case HeartbeatResponseAction::StartElection:
if (isV1ElectionProtocol()) {
_startElectSelfV1();
- }
- else {
+ } else {
_startElectSelf();
}
break;
@@ -235,312 +210,290 @@ namespace {
case HeartbeatResponseAction::StepDownRemotePrimary: {
invariant(action.getPrimaryConfigIndex() != _selfIndex);
_requestRemotePrimaryStepdown(
- _rsConfig.getMemberAt(action.getPrimaryConfigIndex()).getHostAndPort());
+ _rsConfig.getMemberAt(action.getPrimaryConfigIndex()).getHostAndPort());
break;
}
default:
severe() << "Illegal heartbeat response action code " << int(action.getAction());
invariant(false);
- }
}
+}
namespace {
- /**
- * This callback is purely for logging and has no effect on any other operations
- */
- void remoteStepdownCallback(const ReplicationExecutor::RemoteCommandCallbackArgs& cbData) {
-
- const Status status = cbData.response.getStatus();
- if (status == ErrorCodes::CallbackCanceled) {
- return;
- }
+/**
+ * This callback is purely for logging and has no effect on any other operations
+ */
+void remoteStepdownCallback(const ReplicationExecutor::RemoteCommandCallbackArgs& cbData) {
+ const Status status = cbData.response.getStatus();
+ if (status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
- if (status.isOK()) {
- LOG(1) << "stepdown of primary(" << cbData.request.target
- << ") succeeded with response -- "
- << cbData.response.getValue().data;
- }
- else {
- warning() << "stepdown of primary(" << cbData.request.target
- << ") failed due to " << cbData.response.getStatus();
- }
+ if (status.isOK()) {
+ LOG(1) << "stepdown of primary(" << cbData.request.target << ") succeeded with response -- "
+ << cbData.response.getValue().data;
+ } else {
+ warning() << "stepdown of primary(" << cbData.request.target << ") failed due to "
+ << cbData.response.getStatus();
}
+}
} // namespace
- void ReplicationCoordinatorImpl::_requestRemotePrimaryStepdown(const HostAndPort& target) {
- RemoteCommandRequest request(target, "admin", BSON("replSetStepDown" << 1));
-
- log() << "Requesting " << target << " step down from primary";
- CBHStatus cbh = _replExecutor.scheduleRemoteCommand(
- request, remoteStepdownCallback);
- if (cbh.getStatus() != ErrorCodes::ShutdownInProgress) {
- fassert(18808, cbh.getStatus());
- }
- }
+void ReplicationCoordinatorImpl::_requestRemotePrimaryStepdown(const HostAndPort& target) {
+ RemoteCommandRequest request(target, "admin", BSON("replSetStepDown" << 1));
- void ReplicationCoordinatorImpl::_heartbeatStepDownStart() {
- log() << "Stepping down from primary in response to heartbeat";
- _replExecutor.scheduleWorkWithGlobalExclusiveLock(
- stdx::bind(&ReplicationCoordinatorImpl::_stepDownFinish,
- this,
- stdx::placeholders::_1));
+ log() << "Requesting " << target << " step down from primary";
+ CBHStatus cbh = _replExecutor.scheduleRemoteCommand(request, remoteStepdownCallback);
+ if (cbh.getStatus() != ErrorCodes::ShutdownInProgress) {
+ fassert(18808, cbh.getStatus());
}
+}
- void ReplicationCoordinatorImpl::_stepDownFinish(
- const ReplicationExecutor::CallbackArgs& cbData) {
+void ReplicationCoordinatorImpl::_heartbeatStepDownStart() {
+ log() << "Stepping down from primary in response to heartbeat";
+ _replExecutor.scheduleWorkWithGlobalExclusiveLock(
+ stdx::bind(&ReplicationCoordinatorImpl::_stepDownFinish, this, stdx::placeholders::_1));
+}
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
- invariant(cbData.txn);
- // TODO Add invariant that we've got global shared or global exclusive lock, when supported
- // by lock manager.
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- _topCoord->stepDownIfPending();
- const PostMemberStateUpdateAction action =
- _updateMemberStateFromTopologyCoordinator_inlock();
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+void ReplicationCoordinatorImpl::_stepDownFinish(const ReplicationExecutor::CallbackArgs& cbData) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+ invariant(cbData.txn);
+ // TODO Add invariant that we've got global shared or global exclusive lock, when supported
+ // by lock manager.
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ _topCoord->stepDownIfPending();
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator_inlock();
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+}
+
+void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig(const ReplicaSetConfig& newConfig) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ if (_inShutdown) {
+ return;
}
- void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig(const ReplicaSetConfig& newConfig) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- if (_inShutdown) {
- return;
- }
-
- switch (_rsConfigState) {
+ switch (_rsConfigState) {
case kConfigStartingUp:
- LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() <<
- " because still attempting to load local configuration information";
+ LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion()
+ << " because still attempting to load local configuration information";
return;
case kConfigUninitialized:
case kConfigSteady:
- LOG(1) << "Received new config via heartbeat with version " <<
- newConfig.getConfigVersion();
+ LOG(1) << "Received new config via heartbeat with version "
+ << newConfig.getConfigVersion();
break;
case kConfigInitiating:
case kConfigReconfiguring:
case kConfigHBReconfiguring:
- LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() <<
- " because already in the midst of a configuration process";
+ LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion()
+ << " because already in the midst of a configuration process";
return;
default:
- severe() << "Reconfiguration request occurred while _rsConfigState == " <<
- int(_rsConfigState) << "; aborting.";
+ severe() << "Reconfiguration request occurred while _rsConfigState == "
+ << int(_rsConfigState) << "; aborting.";
fassertFailed(18807);
- }
- _setConfigState_inlock(kConfigHBReconfiguring);
- invariant(!_rsConfig.isInitialized() ||
- _rsConfig.getConfigVersion() < newConfig.getConfigVersion());
- if (_freshnessChecker) {
- _freshnessChecker->cancel(&_replExecutor);
- if (_electCmdRunner) {
- _electCmdRunner->cancel(&_replExecutor);
- }
- _replExecutor.onEvent(
- _electionFinishedEvent,
- stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigAfterElectionCanceled,
- this,
- stdx::placeholders::_1,
- newConfig));
- return;
- }
- _replExecutor.scheduleDBWork(stdx::bind(
- &ReplicationCoordinatorImpl::_heartbeatReconfigStore,
- this,
- stdx::placeholders::_1,
- newConfig));
+ }
+ _setConfigState_inlock(kConfigHBReconfiguring);
+ invariant(!_rsConfig.isInitialized() ||
+ _rsConfig.getConfigVersion() < newConfig.getConfigVersion());
+ if (_freshnessChecker) {
+ _freshnessChecker->cancel(&_replExecutor);
+ if (_electCmdRunner) {
+ _electCmdRunner->cancel(&_replExecutor);
+ }
+ _replExecutor.onEvent(
+ _electionFinishedEvent,
+ stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigAfterElectionCanceled,
+ this,
+ stdx::placeholders::_1,
+ newConfig));
+ return;
+ }
+ _replExecutor.scheduleDBWork(stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigStore,
+ this,
+ stdx::placeholders::_1,
+ newConfig));
+}
+
+void ReplicationCoordinatorImpl::_heartbeatReconfigAfterElectionCanceled(
+ const ReplicationExecutor::CallbackArgs& cbData, const ReplicaSetConfig& newConfig) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+ fassert(18911, cbData.status);
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ if (_inShutdown) {
+ return;
}
- void ReplicationCoordinatorImpl::_heartbeatReconfigAfterElectionCanceled(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicaSetConfig& newConfig) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
- fassert(18911, cbData.status);
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- if (_inShutdown) {
- return;
- }
-
- _replExecutor.scheduleDBWork(stdx::bind(
- &ReplicationCoordinatorImpl::_heartbeatReconfigStore,
- this,
- stdx::placeholders::_1,
- newConfig));
+ _replExecutor.scheduleDBWork(stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigStore,
+ this,
+ stdx::placeholders::_1,
+ newConfig));
+}
+
+void ReplicationCoordinatorImpl::_heartbeatReconfigStore(
+ const ReplicationExecutor::CallbackArgs& cbd, const ReplicaSetConfig& newConfig) {
+ if (cbd.status.code() == ErrorCodes::CallbackCanceled) {
+ log() << "The callback to persist the replica set configuration was canceled - "
+ << "the configuration was not persisted but was used: " << newConfig.toBSON();
+ return;
}
- void ReplicationCoordinatorImpl::_heartbeatReconfigStore(
- const ReplicationExecutor::CallbackArgs& cbd,
- const ReplicaSetConfig& newConfig) {
+ stdx::unique_lock<stdx::mutex> lk(_mutex, stdx::defer_lock);
- if (cbd.status.code() == ErrorCodes::CallbackCanceled) {
- log() << "The callback to persist the replica set configuration was canceled - "
- << "the configuration was not persisted but was used: " << newConfig.toBSON();
+ const StatusWith<int> myIndex =
+ validateConfigForHeartbeatReconfig(_externalState.get(), newConfig);
+
+ if (myIndex.getStatus() == ErrorCodes::NodeNotFound) {
+ lk.lock();
+ // If this node absent in newConfig, and this node was not previously initialized,
+ // return to kConfigUninitialized immediately, rather than storing the config and
+ // transitioning into the RS_REMOVED state. See SERVER-15740.
+ if (!_rsConfig.isInitialized()) {
+ invariant(_rsConfigState == kConfigHBReconfiguring);
+ LOG(1) << "Ignoring new configuration in heartbeat response because we are "
+ "uninitialized and not a member of the new configuration";
+ _setConfigState_inlock(kConfigUninitialized);
return;
}
+ lk.unlock();
+ }
- stdx::unique_lock<stdx::mutex> lk(_mutex, stdx::defer_lock);
-
- const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig(
- _externalState.get(),
- newConfig);
-
- if (myIndex.getStatus() == ErrorCodes::NodeNotFound) {
- lk.lock();
- // If this node absent in newConfig, and this node was not previously initialized,
- // return to kConfigUninitialized immediately, rather than storing the config and
- // transitioning into the RS_REMOVED state. See SERVER-15740.
- if (!_rsConfig.isInitialized()) {
- invariant(_rsConfigState == kConfigHBReconfiguring);
- LOG(1) << "Ignoring new configuration in heartbeat response because we are "
- "uninitialized and not a member of the new configuration";
- _setConfigState_inlock(kConfigUninitialized);
- return;
- }
- lk.unlock();
- }
+ if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) {
+ warning() << "Not persisting new configuration in heartbeat response to disk because "
+ "it is invalid: " << myIndex.getStatus();
+ } else {
+ Status status = _externalState->storeLocalConfigDocument(cbd.txn, newConfig.toBSON());
- if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) {
- warning() << "Not persisting new configuration in heartbeat response to disk because "
- "it is invalid: "<< myIndex.getStatus();
- }
- else {
- Status status = _externalState->storeLocalConfigDocument(cbd.txn, newConfig.toBSON());
-
- lk.lock();
- if (!status.isOK()) {
- error() << "Ignoring new configuration in heartbeat response because we failed to"
- " write it to stable storage; " << status;
- invariant(_rsConfigState == kConfigHBReconfiguring);
- if (_rsConfig.isInitialized()) {
- _setConfigState_inlock(kConfigSteady);
- }
- else {
- _setConfigState_inlock(kConfigUninitialized);
- }
- return;
+ lk.lock();
+ if (!status.isOK()) {
+ error() << "Ignoring new configuration in heartbeat response because we failed to"
+ " write it to stable storage; " << status;
+ invariant(_rsConfigState == kConfigHBReconfiguring);
+ if (_rsConfig.isInitialized()) {
+ _setConfigState_inlock(kConfigSteady);
+ } else {
+ _setConfigState_inlock(kConfigUninitialized);
}
-
- lk.unlock();
-
- _externalState->startThreads();
+ return;
}
- const stdx::function<void (const ReplicationExecutor::CallbackArgs&)> reconfigFinishFn(
- stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
- this,
- stdx::placeholders::_1,
- newConfig,
- myIndex));
+ lk.unlock();
- // Make sure that the reconfigFinishFn doesn't finish until we've reset
- // _heartbeatReconfigThread.
- lk.lock();
- if (_memberState.primary()) {
- // If the primary is receiving a heartbeat reconfig, that strongly suggests
- // that there has been a force reconfiguration. In any event, it might lead
- // to this node stepping down as primary, so we'd better do it with the global
- // lock.
- _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn);
- }
- else {
- _replExecutor.scheduleWork(reconfigFinishFn);
- }
+ _externalState->startThreads();
}
- void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicaSetConfig& newConfig,
- StatusWith<int> myIndex) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
+ const stdx::function<void(const ReplicationExecutor::CallbackArgs&)> reconfigFinishFn(
+ stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
+ this,
+ stdx::placeholders::_1,
+ newConfig,
+ myIndex));
+
+ // Make sure that the reconfigFinishFn doesn't finish until we've reset
+ // _heartbeatReconfigThread.
+ lk.lock();
+ if (_memberState.primary()) {
+ // If the primary is receiving a heartbeat reconfig, that strongly suggests
+ // that there has been a force reconfiguration. In any event, it might lead
+ // to this node stepping down as primary, so we'd better do it with the global
+ // lock.
+ _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn);
+ } else {
+ _replExecutor.scheduleWork(reconfigFinishFn);
+ }
+}
+
+void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
+ const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplicaSetConfig& newConfig,
+ StatusWith<int> myIndex) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- invariant(_rsConfigState == kConfigHBReconfiguring);
- invariant(!_rsConfig.isInitialized() ||
- _rsConfig.getConfigVersion() < newConfig.getConfigVersion());
-
- if (_getMemberState_inlock().primary() && !cbData.txn) {
- // Not having an OperationContext in the CallbackData means we definitely aren't holding
- // the global lock. Since we're primary and this reconfig could cause us to stepdown,
- // reschedule this work with the global exclusive lock so the stepdown is safe.
- // TODO(spencer): When we *do* have an OperationContext, consult it to confirm that
- // we are indeed holding the global lock.
- _replExecutor.scheduleWorkWithGlobalExclusiveLock(
- stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
- this,
- stdx::placeholders::_1,
- newConfig,
- myIndex));
- return;
- }
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ invariant(_rsConfigState == kConfigHBReconfiguring);
+ invariant(!_rsConfig.isInitialized() ||
+ _rsConfig.getConfigVersion() < newConfig.getConfigVersion());
+
+ if (_getMemberState_inlock().primary() && !cbData.txn) {
+ // Not having an OperationContext in the CallbackData means we definitely aren't holding
+ // the global lock. Since we're primary and this reconfig could cause us to stepdown,
+ // reschedule this work with the global exclusive lock so the stepdown is safe.
+ // TODO(spencer): When we *do* have an OperationContext, consult it to confirm that
+ // we are indeed holding the global lock.
+ _replExecutor.scheduleWorkWithGlobalExclusiveLock(
+ stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
+ this,
+ stdx::placeholders::_1,
+ newConfig,
+ myIndex));
+ return;
+ }
- if (!myIndex.isOK()) {
- switch (myIndex.getStatus().code()) {
+ if (!myIndex.isOK()) {
+ switch (myIndex.getStatus().code()) {
case ErrorCodes::NodeNotFound:
- log() << "Cannot find self in new replica set configuration; I must be removed; " <<
- myIndex.getStatus();
+ log() << "Cannot find self in new replica set configuration; I must be removed; "
+ << myIndex.getStatus();
break;
case ErrorCodes::DuplicateKey:
error() << "Several entries in new config represent this node; "
- "Removing self until an acceptable configuration arrives; " <<
- myIndex.getStatus();
+ "Removing self until an acceptable configuration arrives; "
+ << myIndex.getStatus();
break;
default:
error() << "Could not validate configuration received from remote node; "
- "Removing self until an acceptable configuration arrives; " <<
- myIndex.getStatus();
+ "Removing self until an acceptable configuration arrives; "
+ << myIndex.getStatus();
break;
- }
- myIndex = StatusWith<int>(-1);
}
- const PostMemberStateUpdateAction action =
- _setCurrentRSConfig_inlock(newConfig, myIndex.getValue());
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
- }
-
- void ReplicationCoordinatorImpl::_trackHeartbeatHandle(const StatusWith<CBHandle>& handle) {
- if (handle.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(18912, handle.getStatus());
- _heartbeatHandles.push_back(handle.getValue());
+ myIndex = StatusWith<int>(-1);
}
-
- void ReplicationCoordinatorImpl::_untrackHeartbeatHandle(const CBHandle& handle) {
- const HeartbeatHandles::iterator newEnd = std::remove(
- _heartbeatHandles.begin(),
- _heartbeatHandles.end(),
- handle);
- invariant(newEnd != _heartbeatHandles.end());
- _heartbeatHandles.erase(newEnd, _heartbeatHandles.end());
+ const PostMemberStateUpdateAction action =
+ _setCurrentRSConfig_inlock(newConfig, myIndex.getValue());
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+}
+
+void ReplicationCoordinatorImpl::_trackHeartbeatHandle(const StatusWith<CBHandle>& handle) {
+ if (handle.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
}
-
- void ReplicationCoordinatorImpl::_cancelHeartbeats() {
- std::for_each(_heartbeatHandles.begin(),
- _heartbeatHandles.end(),
- stdx::bind(&ReplicationExecutor::cancel,
- &_replExecutor,
- stdx::placeholders::_1));
- // Heartbeat callbacks will remove themselves from _heartbeatHandles when they execute with
- // CallbackCanceled status, so it's better to leave the handles in the list, for now.
- }
-
- void ReplicationCoordinatorImpl::_startHeartbeats() {
- const Date_t now = _replExecutor.now();
- _seedList.clear();
- for (int i = 0; i < _rsConfig.getNumMembers(); ++i) {
- if (i == _selfIndex) {
- continue;
- }
- _scheduleHeartbeatToTarget(_rsConfig.getMemberAt(i).getHostAndPort(), i, now);
- }
+ fassert(18912, handle.getStatus());
+ _heartbeatHandles.push_back(handle.getValue());
+}
+
+void ReplicationCoordinatorImpl::_untrackHeartbeatHandle(const CBHandle& handle) {
+ const HeartbeatHandles::iterator newEnd =
+ std::remove(_heartbeatHandles.begin(), _heartbeatHandles.end(), handle);
+ invariant(newEnd != _heartbeatHandles.end());
+ _heartbeatHandles.erase(newEnd, _heartbeatHandles.end());
+}
+
+void ReplicationCoordinatorImpl::_cancelHeartbeats() {
+ std::for_each(_heartbeatHandles.begin(),
+ _heartbeatHandles.end(),
+ stdx::bind(&ReplicationExecutor::cancel, &_replExecutor, stdx::placeholders::_1));
+ // Heartbeat callbacks will remove themselves from _heartbeatHandles when they execute with
+ // CallbackCanceled status, so it's better to leave the handles in the list, for now.
+}
+
+void ReplicationCoordinatorImpl::_startHeartbeats() {
+ const Date_t now = _replExecutor.now();
+ _seedList.clear();
+ for (int i = 0; i < _rsConfig.getNumMembers(); ++i) {
+ if (i == _selfIndex) {
+ continue;
+ }
+ _scheduleHeartbeatToTarget(_rsConfig.getMemberAt(i).getHostAndPort(), i, now);
}
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_test.cpp
index 2afcad55859..f992bf8e94b 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_test.cpp
@@ -47,206 +47,212 @@ namespace mongo {
namespace repl {
namespace {
- using executor::NetworkInterfaceMock;
-
- class ReplCoordHBTest : public ReplCoordTest {
- protected:
- void assertMemberState(MemberState expected, std::string msg = "");
- ReplSetHeartbeatResponse receiveHeartbeatFrom(
- const ReplicaSetConfig& rsConfig,
- int sourceId,
- const HostAndPort& source);
- };
-
- void ReplCoordHBTest::assertMemberState(const MemberState expected, std::string msg) {
- const MemberState actual = getReplCoord()->getMemberState();
- ASSERT(expected == actual) << "Expected coordinator to report state " <<
- expected.toString() << " but found " << actual.toString() << " - " << msg;
- }
+using executor::NetworkInterfaceMock;
- ReplSetHeartbeatResponse ReplCoordHBTest::receiveHeartbeatFrom(
- const ReplicaSetConfig& rsConfig,
- int sourceId,
- const HostAndPort& source) {
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(rsConfig.getConfigVersion());
- hbArgs.setSetName(rsConfig.getReplSetName());
- hbArgs.setSenderHost(source);
- hbArgs.setSenderId(sourceId);
- ASSERT(hbArgs.isInitialized());
-
- ReplSetHeartbeatResponse response;
- ASSERT_OK(getReplCoord()->processHeartbeat(hbArgs, &response));
- return response;
- }
+class ReplCoordHBTest : public ReplCoordTest {
+protected:
+ void assertMemberState(MemberState expected, std::string msg = "");
+ ReplSetHeartbeatResponse receiveHeartbeatFrom(const ReplicaSetConfig& rsConfig,
+ int sourceId,
+ const HostAndPort& source);
+};
- TEST_F(ReplCoordHBTest, JoinExistingReplSet) {
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1"))));
- init("mySet");
- addSelf(HostAndPort("h2", 1));
- const Date_t startDate = getNet()->now();
- start();
- enterNetwork();
- assertMemberState(MemberState::RS_STARTUP);
- NetworkInterfaceMock* net = getNet();
- ASSERT_FALSE(net->hasReadyRequests());
- exitNetwork();
- receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
-
- enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
- ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- ASSERT_EQUALS("mySet", hbArgs.getSetName());
- ASSERT_EQUALS(-2, hbArgs.getConfigVersion());
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_PRIMARY);
- hbResp.noteReplSet();
- hbResp.setConfigVersion(rsConfig.getConfigVersion());
- hbResp.setConfig(rsConfig);
- BSONObjBuilder responseBuilder;
- responseBuilder << "ok" << 1;
- hbResp.addToBSON(&responseBuilder, false);
- net->scheduleResponse(noi,
- startDate + Milliseconds(200),
- makeResponseStatus(responseBuilder.obj()));
- assertRunUntil(startDate + Milliseconds(200));
-
- // Because the new config is stored using an out-of-band thread, we need to perform some
- // extra synchronization to let the executor finish the heartbeat reconfig. We know that
- // after the out-of-band thread completes, it schedules new heartbeats. We assume that no
- // other network operations get scheduled during or before the reconfig, though this may
- // cease to be true in the future.
- noi = net->getNextReadyRequest();
-
- assertMemberState(MemberState::RS_STARTUP2);
- OperationContextNoop txn;
- ReplicaSetConfig storedConfig;
- ASSERT_OK(storedConfig.initialize(
- unittest::assertGet(getExternalState()->loadLocalConfigDocument(&txn))));
- ASSERT_OK(storedConfig.validate());
- ASSERT_EQUALS(3, storedConfig.getConfigVersion());
- ASSERT_EQUALS(3, storedConfig.getNumMembers());
- exitNetwork();
- }
+void ReplCoordHBTest::assertMemberState(const MemberState expected, std::string msg) {
+ const MemberState actual = getReplCoord()->getMemberState();
+ ASSERT(expected == actual) << "Expected coordinator to report state " << expected.toString()
+ << " but found " << actual.toString() << " - " << msg;
+}
- TEST_F(ReplCoordHBTest, DoNotJoinReplSetIfNotAMember) {
- // Tests that a node in RS_STARTUP will not transition to RS_REMOVED if it receives a
- // configuration that does not contain it.
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1"))));
- init("mySet");
- addSelf(HostAndPort("h4", 1));
- const Date_t startDate = getNet()->now();
- start();
- enterNetwork();
- assertMemberState(MemberState::RS_STARTUP, "1");
- NetworkInterfaceMock* net = getNet();
- ASSERT_FALSE(net->hasReadyRequests());
- exitNetwork();
- receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
-
- enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
- ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- ASSERT_EQUALS("mySet", hbArgs.getSetName());
- ASSERT_EQUALS(-2, hbArgs.getConfigVersion());
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_PRIMARY);
- hbResp.noteReplSet();
- hbResp.setConfigVersion(rsConfig.getConfigVersion());
- hbResp.setConfig(rsConfig);
- BSONObjBuilder responseBuilder;
- responseBuilder << "ok" << 1;
- hbResp.addToBSON(&responseBuilder, false);
- net->scheduleResponse(noi,
- startDate + Milliseconds(200),
- makeResponseStatus(responseBuilder.obj()));
- assertRunUntil(startDate + Milliseconds(2200));
-
- // Because the new config is stored using an out-of-band thread, we need to perform some
- // extra synchronization to let the executor finish the heartbeat reconfig. We know that
- // after the out-of-band thread completes, it schedules new heartbeats. We assume that no
- // other network operations get scheduled during or before the reconfig, though this may
- // cease to be true in the future.
- noi = net->getNextReadyRequest();
-
- assertMemberState(MemberState::RS_STARTUP, "2");
- OperationContextNoop txn;
-
- StatusWith<BSONObj> loadedConfig(getExternalState()->loadLocalConfigDocument(&txn));
- ASSERT_NOT_OK(loadedConfig.getStatus()) << loadedConfig.getValue();
- exitNetwork();
- }
+ReplSetHeartbeatResponse ReplCoordHBTest::receiveHeartbeatFrom(const ReplicaSetConfig& rsConfig,
+ int sourceId,
+ const HostAndPort& source) {
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(rsConfig.getConfigVersion());
+ hbArgs.setSetName(rsConfig.getReplSetName());
+ hbArgs.setSenderHost(source);
+ hbArgs.setSenderId(sourceId);
+ ASSERT(hbArgs.isInitialized());
- TEST_F(ReplCoordHBTest, NotYetInitializedConfigStateEarlyReturn) {
- // ensure that if we've yet to receive an initial config, we return NotYetInitialized
- init("mySet");
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(3);
- hbArgs.setSetName("mySet");
- hbArgs.setSenderHost(HostAndPort("h1:1"));
- hbArgs.setSenderId(1);
- ASSERT(hbArgs.isInitialized());
-
- ReplSetHeartbeatResponse response;
- Status status = getReplCoord()->processHeartbeat(hbArgs, &response);
- ASSERT_EQUALS(ErrorCodes::NotYetInitialized, status.code());
- }
+ ReplSetHeartbeatResponse response;
+ ASSERT_OK(getReplCoord()->processHeartbeat(hbArgs, &response));
+ return response;
+}
+
+TEST_F(ReplCoordHBTest, JoinExistingReplSet) {
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1"))));
+ init("mySet");
+ addSelf(HostAndPort("h2", 1));
+ const Date_t startDate = getNet()->now();
+ start();
+ enterNetwork();
+ assertMemberState(MemberState::RS_STARTUP);
+ NetworkInterfaceMock* net = getNet();
+ ASSERT_FALSE(net->hasReadyRequests());
+ exitNetwork();
+ receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
+
+ enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
+ ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ ASSERT_EQUALS("mySet", hbArgs.getSetName());
+ ASSERT_EQUALS(-2, hbArgs.getConfigVersion());
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_PRIMARY);
+ hbResp.noteReplSet();
+ hbResp.setConfigVersion(rsConfig.getConfigVersion());
+ hbResp.setConfig(rsConfig);
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1;
+ hbResp.addToBSON(&responseBuilder, false);
+ net->scheduleResponse(
+ noi, startDate + Milliseconds(200), makeResponseStatus(responseBuilder.obj()));
+ assertRunUntil(startDate + Milliseconds(200));
+
+ // Because the new config is stored using an out-of-band thread, we need to perform some
+ // extra synchronization to let the executor finish the heartbeat reconfig. We know that
+ // after the out-of-band thread completes, it schedules new heartbeats. We assume that no
+ // other network operations get scheduled during or before the reconfig, though this may
+ // cease to be true in the future.
+ noi = net->getNextReadyRequest();
+
+ assertMemberState(MemberState::RS_STARTUP2);
+ OperationContextNoop txn;
+ ReplicaSetConfig storedConfig;
+ ASSERT_OK(storedConfig.initialize(
+ unittest::assertGet(getExternalState()->loadLocalConfigDocument(&txn))));
+ ASSERT_OK(storedConfig.validate());
+ ASSERT_EQUALS(3, storedConfig.getConfigVersion());
+ ASSERT_EQUALS(3, storedConfig.getNumMembers());
+ exitNetwork();
+}
+
+TEST_F(ReplCoordHBTest, DoNotJoinReplSetIfNotAMember) {
+ // Tests that a node in RS_STARTUP will not transition to RS_REMOVED if it receives a
+ // configuration that does not contain it.
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1"))));
+ init("mySet");
+ addSelf(HostAndPort("h4", 1));
+ const Date_t startDate = getNet()->now();
+ start();
+ enterNetwork();
+ assertMemberState(MemberState::RS_STARTUP, "1");
+ NetworkInterfaceMock* net = getNet();
+ ASSERT_FALSE(net->hasReadyRequests());
+ exitNetwork();
+ receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
- TEST_F(ReplCoordHBTest, OnlyUnauthorizedUpCausesRecovering) {
- // Tests that a node that only has auth error heartbeats is recovering
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345"))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- // process heartbeat
- enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(
- BSON("ok" << 0.0 <<
- "errmsg" << "unauth'd" <<
- "code" << ErrorCodes::Unauthorized)));
-
- if (request.target != HostAndPort("node2", 12345)
- && request.cmdObj.firstElement().fieldNameStringData() != "replSetHeartbeat") {
- error() << "Black holing unexpected request to "
- << request.target << ": " << request.cmdObj;
- getNet()->blackHole(noi);
- }
- getNet()->runReadyNetworkOperations();
- exitNetwork();
-
- ASSERT_TRUE(getTopoCoord().getMemberState().recovering());
- assertMemberState(MemberState::RS_RECOVERING, "0");
+ enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
+ ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ ASSERT_EQUALS("mySet", hbArgs.getSetName());
+ ASSERT_EQUALS(-2, hbArgs.getConfigVersion());
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_PRIMARY);
+ hbResp.noteReplSet();
+ hbResp.setConfigVersion(rsConfig.getConfigVersion());
+ hbResp.setConfig(rsConfig);
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1;
+ hbResp.addToBSON(&responseBuilder, false);
+ net->scheduleResponse(
+ noi, startDate + Milliseconds(200), makeResponseStatus(responseBuilder.obj()));
+ assertRunUntil(startDate + Milliseconds(2200));
+
+ // Because the new config is stored using an out-of-band thread, we need to perform some
+ // extra synchronization to let the executor finish the heartbeat reconfig. We know that
+ // after the out-of-band thread completes, it schedules new heartbeats. We assume that no
+ // other network operations get scheduled during or before the reconfig, though this may
+ // cease to be true in the future.
+ noi = net->getNextReadyRequest();
+
+ assertMemberState(MemberState::RS_STARTUP, "2");
+ OperationContextNoop txn;
+
+ StatusWith<BSONObj> loadedConfig(getExternalState()->loadLocalConfigDocument(&txn));
+ ASSERT_NOT_OK(loadedConfig.getStatus()) << loadedConfig.getValue();
+ exitNetwork();
+}
+
+TEST_F(ReplCoordHBTest, NotYetInitializedConfigStateEarlyReturn) {
+ // ensure that if we've yet to receive an initial config, we return NotYetInitialized
+ init("mySet");
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(3);
+ hbArgs.setSetName("mySet");
+ hbArgs.setSenderHost(HostAndPort("h1:1"));
+ hbArgs.setSenderId(1);
+ ASSERT(hbArgs.isInitialized());
+
+ ReplSetHeartbeatResponse response;
+ Status status = getReplCoord()->processHeartbeat(hbArgs, &response);
+ ASSERT_EQUALS(ErrorCodes::NotYetInitialized, status.code());
+}
+
+TEST_F(ReplCoordHBTest, OnlyUnauthorizedUpCausesRecovering) {
+ // Tests that a node that only has auth error heartbeats is recovering
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ // process heartbeat
+ enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ getNet()->scheduleResponse(
+ noi,
+ getNet()->now(),
+ makeResponseStatus(BSON("ok" << 0.0 << "errmsg"
+ << "unauth'd"
+ << "code" << ErrorCodes::Unauthorized)));
+
+ if (request.target != HostAndPort("node2", 12345) &&
+ request.cmdObj.firstElement().fieldNameStringData() != "replSetHeartbeat") {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ getNet()->blackHole(noi);
}
+ getNet()->runReadyNetworkOperations();
+ exitNetwork();
+
+ ASSERT_TRUE(getTopoCoord().getMemberState().recovering());
+ assertMemberState(MemberState::RS_RECOVERING, "0");
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
index 4ae6a358e53..c51e8e48929 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
@@ -47,206 +47,212 @@ namespace mongo {
namespace repl {
namespace {
- using executor::NetworkInterfaceMock;
-
- class ReplCoordHBV1Test : public ReplCoordTest {
- protected:
- void assertMemberState(MemberState expected, std::string msg = "");
- ReplSetHeartbeatResponse receiveHeartbeatFrom(
- const ReplicaSetConfig& rsConfig,
- int sourceId,
- const HostAndPort& source);
- };
-
- void ReplCoordHBV1Test::assertMemberState(const MemberState expected, std::string msg) {
- const MemberState actual = getReplCoord()->getMemberState();
- ASSERT(expected == actual) << "Expected coordinator to report state " <<
- expected.toString() << " but found " << actual.toString() << " - " << msg;
- }
+using executor::NetworkInterfaceMock;
- ReplSetHeartbeatResponse ReplCoordHBV1Test::receiveHeartbeatFrom(
- const ReplicaSetConfig& rsConfig,
- int sourceId,
- const HostAndPort& source) {
- ReplSetHeartbeatArgsV1 hbArgs;
- hbArgs.setConfigVersion(rsConfig.getConfigVersion());
- hbArgs.setSetName(rsConfig.getReplSetName());
- hbArgs.setSenderHost(source);
- hbArgs.setSenderId(sourceId);
- hbArgs.setTerm(1);
- ASSERT(hbArgs.isInitialized());
-
- ReplSetHeartbeatResponse response;
- ASSERT_OK(getReplCoord()->processHeartbeatV1(hbArgs, &response));
- return response;
- }
+class ReplCoordHBV1Test : public ReplCoordTest {
+protected:
+ void assertMemberState(MemberState expected, std::string msg = "");
+ ReplSetHeartbeatResponse receiveHeartbeatFrom(const ReplicaSetConfig& rsConfig,
+ int sourceId,
+ const HostAndPort& source);
+};
- TEST_F(ReplCoordHBV1Test, JoinExistingReplSet) {
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1")) <<
- "protocolVersion" << 1));
- init("mySet");
- addSelf(HostAndPort("h2", 1));
- const Date_t startDate = getNet()->now();
- start();
- enterNetwork();
- assertMemberState(MemberState::RS_STARTUP);
- NetworkInterfaceMock* net = getNet();
- ASSERT_FALSE(net->hasReadyRequests());
- exitNetwork();
- receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
-
- enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
- ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- ASSERT_EQUALS("mySet", hbArgs.getSetName());
- ASSERT_EQUALS(-2, hbArgs.getConfigVersion());
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_PRIMARY);
- hbResp.setConfigVersion(rsConfig.getConfigVersion());
- hbResp.setConfig(rsConfig);
- BSONObjBuilder responseBuilder;
- responseBuilder << "ok" << 1;
- hbResp.addToBSON(&responseBuilder, true);
- net->scheduleResponse(noi,
- startDate + Milliseconds(200),
- makeResponseStatus(responseBuilder.obj()));
- assertRunUntil(startDate + Milliseconds(200));
-
- // Because the new config is stored using an out-of-band thread, we need to perform some
- // extra synchronization to let the executor finish the heartbeat reconfig. We know that
- // after the out-of-band thread completes, it schedules new heartbeats. We assume that no
- // other network operations get scheduled during or before the reconfig, though this may
- // cease to be true in the future.
- noi = net->getNextReadyRequest();
-
- assertMemberState(MemberState::RS_STARTUP2);
- OperationContextNoop txn;
- ReplicaSetConfig storedConfig;
- ASSERT_OK(storedConfig.initialize(
- unittest::assertGet(getExternalState()->loadLocalConfigDocument(&txn))));
- ASSERT_OK(storedConfig.validate());
- ASSERT_EQUALS(3, storedConfig.getConfigVersion());
- ASSERT_EQUALS(3, storedConfig.getNumMembers());
- exitNetwork();
- }
+void ReplCoordHBV1Test::assertMemberState(const MemberState expected, std::string msg) {
+ const MemberState actual = getReplCoord()->getMemberState();
+ ASSERT(expected == actual) << "Expected coordinator to report state " << expected.toString()
+ << " but found " << actual.toString() << " - " << msg;
+}
- TEST_F(ReplCoordHBV1Test, DoNotJoinReplSetIfNotAMember) {
- // Tests that a node in RS_STARTUP will not transition to RS_REMOVED if it receives a
- // configuration that does not contain it.
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1")) <<
- "protocolVersion" << 1));
- init("mySet");
- addSelf(HostAndPort("h4", 1));
- const Date_t startDate = getNet()->now();
- start();
- enterNetwork();
- assertMemberState(MemberState::RS_STARTUP, "1");
- NetworkInterfaceMock* net = getNet();
- ASSERT_FALSE(net->hasReadyRequests());
- exitNetwork();
- receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
-
- enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
- ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- ASSERT_EQUALS("mySet", hbArgs.getSetName());
- ASSERT_EQUALS(-2, hbArgs.getConfigVersion());
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_PRIMARY);
- hbResp.setConfigVersion(rsConfig.getConfigVersion());
- hbResp.setConfig(rsConfig);
- BSONObjBuilder responseBuilder;
- responseBuilder << "ok" << 1;
- hbResp.addToBSON(&responseBuilder, true);
- net->scheduleResponse(noi,
- startDate + Milliseconds(200),
- makeResponseStatus(responseBuilder.obj()));
- assertRunUntil(startDate + Milliseconds(2200));
-
- // Because the new config is stored using an out-of-band thread, we need to perform some
- // extra synchronization to let the executor finish the heartbeat reconfig. We know that
- // after the out-of-band thread completes, it schedules new heartbeats. We assume that no
- // other network operations get scheduled during or before the reconfig, though this may
- // cease to be true in the future.
- noi = net->getNextReadyRequest();
-
- assertMemberState(MemberState::RS_STARTUP, "2");
- OperationContextNoop txn;
-
- StatusWith<BSONObj> loadedConfig(getExternalState()->loadLocalConfigDocument(&txn));
- ASSERT_NOT_OK(loadedConfig.getStatus()) << loadedConfig.getValue();
- exitNetwork();
- }
+ReplSetHeartbeatResponse ReplCoordHBV1Test::receiveHeartbeatFrom(const ReplicaSetConfig& rsConfig,
+ int sourceId,
+ const HostAndPort& source) {
+ ReplSetHeartbeatArgsV1 hbArgs;
+ hbArgs.setConfigVersion(rsConfig.getConfigVersion());
+ hbArgs.setSetName(rsConfig.getReplSetName());
+ hbArgs.setSenderHost(source);
+ hbArgs.setSenderId(sourceId);
+ hbArgs.setTerm(1);
+ ASSERT(hbArgs.isInitialized());
- TEST_F(ReplCoordHBV1Test, NotYetInitializedConfigStateEarlyReturn) {
- // ensure that if we've yet to receive an initial config, we return NotYetInitialized
- init("mySet");
- ReplSetHeartbeatArgsV1 hbArgs;
- hbArgs.setConfigVersion(3);
- hbArgs.setSetName("mySet");
- hbArgs.setSenderHost(HostAndPort("h1:1"));
- hbArgs.setSenderId(1);
- hbArgs.setTerm(1);
- ASSERT(hbArgs.isInitialized());
-
- ReplSetHeartbeatResponse response;
- Status status = getReplCoord()->processHeartbeatV1(hbArgs, &response);
- ASSERT_EQUALS(ErrorCodes::NotYetInitialized, status.code());
- }
+ ReplSetHeartbeatResponse response;
+ ASSERT_OK(getReplCoord()->processHeartbeatV1(hbArgs, &response));
+ return response;
+}
+
+TEST_F(ReplCoordHBV1Test, JoinExistingReplSet) {
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1"))
+ << "protocolVersion" << 1));
+ init("mySet");
+ addSelf(HostAndPort("h2", 1));
+ const Date_t startDate = getNet()->now();
+ start();
+ enterNetwork();
+ assertMemberState(MemberState::RS_STARTUP);
+ NetworkInterfaceMock* net = getNet();
+ ASSERT_FALSE(net->hasReadyRequests());
+ exitNetwork();
+ receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
+
+ enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
+ ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ ASSERT_EQUALS("mySet", hbArgs.getSetName());
+ ASSERT_EQUALS(-2, hbArgs.getConfigVersion());
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_PRIMARY);
+ hbResp.setConfigVersion(rsConfig.getConfigVersion());
+ hbResp.setConfig(rsConfig);
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1;
+ hbResp.addToBSON(&responseBuilder, true);
+ net->scheduleResponse(
+ noi, startDate + Milliseconds(200), makeResponseStatus(responseBuilder.obj()));
+ assertRunUntil(startDate + Milliseconds(200));
+
+ // Because the new config is stored using an out-of-band thread, we need to perform some
+ // extra synchronization to let the executor finish the heartbeat reconfig. We know that
+ // after the out-of-band thread completes, it schedules new heartbeats. We assume that no
+ // other network operations get scheduled during or before the reconfig, though this may
+ // cease to be true in the future.
+ noi = net->getNextReadyRequest();
+
+ assertMemberState(MemberState::RS_STARTUP2);
+ OperationContextNoop txn;
+ ReplicaSetConfig storedConfig;
+ ASSERT_OK(storedConfig.initialize(
+ unittest::assertGet(getExternalState()->loadLocalConfigDocument(&txn))));
+ ASSERT_OK(storedConfig.validate());
+ ASSERT_EQUALS(3, storedConfig.getConfigVersion());
+ ASSERT_EQUALS(3, storedConfig.getNumMembers());
+ exitNetwork();
+}
+
+TEST_F(ReplCoordHBV1Test, DoNotJoinReplSetIfNotAMember) {
+ // Tests that a node in RS_STARTUP will not transition to RS_REMOVED if it receives a
+ // configuration that does not contain it.
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1"))
+ << "protocolVersion" << 1));
+ init("mySet");
+ addSelf(HostAndPort("h4", 1));
+ const Date_t startDate = getNet()->now();
+ start();
+ enterNetwork();
+ assertMemberState(MemberState::RS_STARTUP, "1");
+ NetworkInterfaceMock* net = getNet();
+ ASSERT_FALSE(net->hasReadyRequests());
+ exitNetwork();
+ receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
- TEST_F(ReplCoordHBV1Test, OnlyUnauthorizedUpCausesRecovering) {
- // Tests that a node that only has auth error heartbeats is recovering
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345"))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- // process heartbeat
- enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(
- BSON("ok" << 0.0 <<
- "errmsg" << "unauth'd" <<
- "code" << ErrorCodes::Unauthorized)));
-
- if (request.target != HostAndPort("node2", 12345)
- && request.cmdObj.firstElement().fieldNameStringData() != "replSetHeartbeat") {
- error() << "Black holing unexpected request to "
- << request.target << ": " << request.cmdObj;
- getNet()->blackHole(noi);
- }
- getNet()->runReadyNetworkOperations();
- exitNetwork();
-
- ASSERT_TRUE(getTopoCoord().getMemberState().recovering());
- assertMemberState(MemberState::RS_RECOVERING, "0");
+ enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
+ ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ ASSERT_EQUALS("mySet", hbArgs.getSetName());
+ ASSERT_EQUALS(-2, hbArgs.getConfigVersion());
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_PRIMARY);
+ hbResp.setConfigVersion(rsConfig.getConfigVersion());
+ hbResp.setConfig(rsConfig);
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1;
+ hbResp.addToBSON(&responseBuilder, true);
+ net->scheduleResponse(
+ noi, startDate + Milliseconds(200), makeResponseStatus(responseBuilder.obj()));
+ assertRunUntil(startDate + Milliseconds(2200));
+
+ // Because the new config is stored using an out-of-band thread, we need to perform some
+ // extra synchronization to let the executor finish the heartbeat reconfig. We know that
+ // after the out-of-band thread completes, it schedules new heartbeats. We assume that no
+ // other network operations get scheduled during or before the reconfig, though this may
+ // cease to be true in the future.
+ noi = net->getNextReadyRequest();
+
+ assertMemberState(MemberState::RS_STARTUP, "2");
+ OperationContextNoop txn;
+
+ StatusWith<BSONObj> loadedConfig(getExternalState()->loadLocalConfigDocument(&txn));
+ ASSERT_NOT_OK(loadedConfig.getStatus()) << loadedConfig.getValue();
+ exitNetwork();
+}
+
+TEST_F(ReplCoordHBV1Test, NotYetInitializedConfigStateEarlyReturn) {
+ // ensure that if we've yet to receive an initial config, we return NotYetInitialized
+ init("mySet");
+ ReplSetHeartbeatArgsV1 hbArgs;
+ hbArgs.setConfigVersion(3);
+ hbArgs.setSetName("mySet");
+ hbArgs.setSenderHost(HostAndPort("h1:1"));
+ hbArgs.setSenderId(1);
+ hbArgs.setTerm(1);
+ ASSERT(hbArgs.isInitialized());
+
+ ReplSetHeartbeatResponse response;
+ Status status = getReplCoord()->processHeartbeatV1(hbArgs, &response);
+ ASSERT_EQUALS(ErrorCodes::NotYetInitialized, status.code());
+}
+
+TEST_F(ReplCoordHBV1Test, OnlyUnauthorizedUpCausesRecovering) {
+ // Tests that a node that only has auth error heartbeats is recovering
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ // process heartbeat
+ enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ getNet()->scheduleResponse(
+ noi,
+ getNet()->now(),
+ makeResponseStatus(BSON("ok" << 0.0 << "errmsg"
+ << "unauth'd"
+ << "code" << ErrorCodes::Unauthorized)));
+
+ if (request.target != HostAndPort("node2", 12345) &&
+ request.cmdObj.firstElement().fieldNameStringData() != "replSetHeartbeat") {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ getNet()->blackHole(noi);
}
+ getNet()->runReadyNetworkOperations();
+ exitNetwork();
+
+ ASSERT_TRUE(getTopoCoord().getMemberState().recovering());
+ assertMemberState(MemberState::RS_RECOVERING, "0");
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
index 5a9eb6ef965..6255e799d67 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
@@ -38,7 +38,7 @@
#include "mongo/db/repl/replication_coordinator_external_state_mock.h"
#include "mongo/db/repl/replication_coordinator_impl.h"
#include "mongo/db/repl/replication_coordinator_test_fixture.h"
-#include "mongo/db/repl/replication_coordinator.h" // ReplSetReconfigArgs
+#include "mongo/db/repl/replication_coordinator.h" // ReplSetReconfigArgs
#include "mongo/executor/network_interface_mock.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/log.h"
@@ -47,485 +47,518 @@ namespace mongo {
namespace repl {
namespace {
- using executor::NetworkInterfaceMock;
- typedef ReplicationCoordinator::ReplSetReconfigArgs ReplSetReconfigArgs;
-
- TEST_F(ReplCoordTest, ReconfigBeforeInitialized) {
- // start up but do not initiate
- OperationContextNoop txn;
- init();
- start();
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
-
- ASSERT_EQUALS(ErrorCodes::NotYetInitialized,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
- }
-
- TEST_F(ReplCoordTest, ReconfigWhileNotPrimary) {
- // start up, become secondary, receive reconfig
- OperationContextNoop txn;
- init();
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
-
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- ASSERT_EQUALS(ErrorCodes::NotMaster,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
- }
-
- TEST_F(ReplCoordTest, ReconfigWithUninitializableConfig) {
- // start up, become primary, receive uninitializable config
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
- simulateSuccessfulElection();
-
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 2 <<
- "invalidlyNamedField" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345" <<
- "arbiterOnly" << true) <<
- BSON("_id" << 2 <<
- "host" << "node2:12345" <<
- "arbiterOnly" << true)));
- // ErrorCodes::BadValue should be propagated from ReplicaSetConfig::initialize()
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
- }
-
- TEST_F(ReplCoordTest, ReconfigWithWrongReplSetName) {
- // start up, become primary, receive config with incorrect replset name
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
- simulateSuccessfulElection();
-
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "notMySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345")));
-
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
- }
-
- TEST_F(ReplCoordTest, ReconfigValidateFails) {
- // start up, become primary, validate fails
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
- simulateSuccessfulElection();
-
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << -3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345")));
-
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
- }
-
- void doReplSetInitiate(ReplicationCoordinatorImpl* replCoord, Status* status) {
- OperationContextNoop txn;
- BSONObjBuilder garbage;
- *status = replCoord->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345"))),
- &garbage);
- }
-
- void doReplSetReconfig(ReplicationCoordinatorImpl* replCoord, Status* status) {
- OperationContextNoop txn;
- BSONObjBuilder garbage;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345" <<
- "priority" << 3)));
- *status = replCoord->processReplSetReconfig(&txn, args, &garbage);
- }
-
- TEST_F(ReplCoordTest, ReconfigQuorumCheckFails) {
- // start up, become primary, fail during quorum check due to a heartbeat
- // containing a higher config version
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
- simulateSuccessfulElection();
-
- Status status(ErrorCodes::InternalError, "Not Set");
- stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
-
- NetworkInterfaceMock* net = getNet();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- repl::ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- repl::ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(5);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj, false);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
- reconfigThread.join();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- }
-
- TEST_F(ReplCoordTest, ReconfigStoreLocalConfigDocumentFails) {
- // start up, become primary, saving the config fails
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
- simulateSuccessfulElection();
-
- Status status(ErrorCodes::InternalError, "Not Set");
- getExternalState()->setStoreLocalConfigDocumentStatus(Status(ErrorCodes::OutOfDiskSpace,
- "The test set this"));
- stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
-
- NetworkInterfaceMock* net = getNet();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- repl::ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- repl::ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(2);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj, false);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
- reconfigThread.join();
- ASSERT_EQUALS(ErrorCodes::OutOfDiskSpace, status);
- }
-
- TEST_F(ReplCoordTest, ReconfigWhileReconfiggingFails) {
- // start up, become primary, reconfig, then before that reconfig concludes, reconfig again
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
- simulateSuccessfulElection();
-
- Status status(ErrorCodes::InternalError, "Not Set");
- // first reconfig
- stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
- getNet()->enterNetwork();
- getNet()->blackHole(getNet()->getNextReadyRequest());
- getNet()->exitNetwork();
-
- // second reconfig
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345")));
-
- ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
-
- shutdown();
- reconfigThread.join();
- }
-
- TEST_F(ReplCoordTest, ReconfigWhileInitializingFails) {
- // start up, initiate, then before that initiate concludes, reconfig
- OperationContextNoop txn;
- init();
- start(HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
-
- // initiate
- Status status(ErrorCodes::InternalError, "Not Set");
- stdx::thread initateThread(stdx::bind(doReplSetInitiate, getReplCoord(), &status));
- getNet()->enterNetwork();
- getNet()->blackHole(getNet()->getNextReadyRequest());
- getNet()->exitNetwork();
-
- // reconfig
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345")));
-
- ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
-
- shutdown();
- initateThread.join();
- }
-
- TEST_F(ReplCoordTest, ReconfigSuccessful) {
- // start up, become primary, reconfig successfully
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345"))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
- simulateSuccessfulElection();
-
- Status status(ErrorCodes::InternalError, "Not Set");
- stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
-
- NetworkInterfaceMock* net = getNet();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- repl::ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- repl::ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(2);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj, false);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
- reconfigThread.join();
- ASSERT_OK(status);
- }
-
- TEST_F(ReplCoordTest, ReconfigDuringHBReconfigFails) {
- // start up, become primary, receive reconfig via heartbeat, then a second one
- // from reconfig
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100,0), 0));
- simulateSuccessfulElection();
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
-
- // set hbreconfig to hang while in progress
- getExternalState()->setStoreLocalConfigDocumentToHang(true);
-
- // hb reconfig
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- ReplSetHeartbeatResponse hbResp2;
- ReplicaSetConfig config;
- config.initialize(BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345"))));
- hbResp2.setConfig(config);
- hbResp2.setConfigVersion(3);
- hbResp2.setSetName("mySet");
- hbResp2.setState(MemberState::RS_SECONDARY);
- BSONObjBuilder respObj2;
- respObj2 << "ok" << 1;
- hbResp2.addToBSON(&respObj2, false);
- net->runUntil(net->now() + Seconds(10)); // run until we've sent a heartbeat request
- const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
- net->scheduleResponse(noi2, net->now(), makeResponseStatus(respObj2.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
-
- // reconfig
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = config.toBSON();
- ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
-
- getExternalState()->setStoreLocalConfigDocumentToHang(false);
- }
-
- TEST_F(ReplCoordTest, HBReconfigDuringReconfigFails) {
- // start up, become primary, reconfig, while reconfigging receive reconfig via heartbeat
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100,0), 0));
- simulateSuccessfulElection();
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
-
- // start reconfigThread
- Status status(ErrorCodes::InternalError, "Not Set");
- stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
-
- // wait for reconfigThread to create network requests to ensure the replication coordinator
- // is in state kConfigReconfiguring
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- net->blackHole(net->getNextReadyRequest());
-
- // schedule hb reconfig
- net->runUntil(net->now() + Seconds(10)); // run until we've sent a heartbeat request
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- ReplSetHeartbeatResponse hbResp;
- ReplicaSetConfig config;
- config.initialize(BSON("_id" << "mySet" <<
- "version" << 4 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345"))));
- hbResp.setConfig(config);
- hbResp.setConfigVersion(4);
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_SECONDARY);
- BSONObjBuilder respObj2;
- respObj2 << "ok" << 1;
- hbResp.addToBSON(&respObj2, false);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj2.obj()));
-
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(1));
- startCapturingLogMessages();
- // execute hb reconfig, which should fail with a log message; confirmed at end of test
- net->runReadyNetworkOperations();
- // respond to reconfig's quorum check so that we can join that thread and exit cleanly
- net->exitNetwork();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1,
- countLogLinesContaining("because already in the midst of a configuration process"));
- shutdown();
- reconfigThread.join();
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
- }
-
- TEST_F(ReplCoordTest, ForceReconfigWhileNotPrimarySuccessful) {
- // start up, become a secondary, receive a forced reconfig
- OperationContextNoop txn;
- init();
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
-
- // fail before forced
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345")));
- ASSERT_EQUALS(ErrorCodes::NotMaster,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
-
- // forced should succeed
- args.force = true;
- ASSERT_OK(getReplCoord()->processReplSetReconfig(&txn, args, &result));
- getReplCoord()->processReplSetGetConfig(&result);
-
- // ensure forced reconfig results in a random larger version
- ASSERT_GREATER_THAN(result.obj()["config"].Obj()["version"].numberInt(), 3);
- }
-
-} // anonymous namespace
-} // namespace repl
-} // namespace mongo
+using executor::NetworkInterfaceMock;
+typedef ReplicationCoordinator::ReplSetReconfigArgs ReplSetReconfigArgs;
+
+TEST_F(ReplCoordTest, ReconfigBeforeInitialized) {
+ // start up but do not initiate
+ OperationContextNoop txn;
+ init();
+ start();
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+
+ ASSERT_EQUALS(ErrorCodes::NotYetInitialized,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+TEST_F(ReplCoordTest, ReconfigWhileNotPrimary) {
+ // start up, become secondary, receive reconfig
+ OperationContextNoop txn;
+ init();
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ ASSERT_EQUALS(ErrorCodes::NotMaster,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+TEST_F(ReplCoordTest, ReconfigWithUninitializableConfig) {
+ // start up, become primary, receive uninitializable config
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ simulateSuccessfulElection();
+
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 2 << "invalidlyNamedField" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"
+ << "arbiterOnly" << true)));
+ // ErrorCodes::BadValue should be propagated from ReplicaSetConfig::initialize()
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+TEST_F(ReplCoordTest, ReconfigWithWrongReplSetName) {
+ // start up, become primary, receive config with incorrect replset name
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ simulateSuccessfulElection();
+
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "notMySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")));
+
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+TEST_F(ReplCoordTest, ReconfigValidateFails) {
+ // start up, become primary, validate fails
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ simulateSuccessfulElection();
+
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << -3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")));
+
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+void doReplSetInitiate(ReplicationCoordinatorImpl* replCoord, Status* status) {
+ OperationContextNoop txn;
+ BSONObjBuilder garbage;
+ *status =
+ replCoord->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ &garbage);
+}
+
+void doReplSetReconfig(ReplicationCoordinatorImpl* replCoord, Status* status) {
+ OperationContextNoop txn;
+ BSONObjBuilder garbage;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"
+ << "priority" << 3)));
+ *status = replCoord->processReplSetReconfig(&txn, args, &garbage);
+}
+
+TEST_F(ReplCoordTest, ReconfigQuorumCheckFails) {
+ // start up, become primary, fail during quorum check due to a heartbeat
+ // containing a higher config version
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ simulateSuccessfulElection();
+
+ Status status(ErrorCodes::InternalError, "Not Set");
+ stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
+
+ NetworkInterfaceMock* net = getNet();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ repl::ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ repl::ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(5);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj, false);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+ reconfigThread.join();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+}
+
+TEST_F(ReplCoordTest, ReconfigStoreLocalConfigDocumentFails) {
+ // start up, become primary, saving the config fails
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ simulateSuccessfulElection();
+
+ Status status(ErrorCodes::InternalError, "Not Set");
+ getExternalState()->setStoreLocalConfigDocumentStatus(
+ Status(ErrorCodes::OutOfDiskSpace, "The test set this"));
+ stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
+
+ NetworkInterfaceMock* net = getNet();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ repl::ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ repl::ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(2);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj, false);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+ reconfigThread.join();
+ ASSERT_EQUALS(ErrorCodes::OutOfDiskSpace, status);
+}
+
+TEST_F(ReplCoordTest, ReconfigWhileReconfiggingFails) {
+ // start up, become primary, reconfig, then before that reconfig concludes, reconfig again
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ simulateSuccessfulElection();
+
+ Status status(ErrorCodes::InternalError, "Not Set");
+ // first reconfig
+ stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
+ getNet()->enterNetwork();
+ getNet()->blackHole(getNet()->getNextReadyRequest());
+ getNet()->exitNetwork();
+
+ // second reconfig
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")));
+
+ ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+
+ shutdown();
+ reconfigThread.join();
+}
+
+TEST_F(ReplCoordTest, ReconfigWhileInitializingFails) {
+ // start up, initiate, then before that initiate concludes, reconfig
+ OperationContextNoop txn;
+ init();
+ start(HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+
+ // initiate
+ Status status(ErrorCodes::InternalError, "Not Set");
+ stdx::thread initateThread(stdx::bind(doReplSetInitiate, getReplCoord(), &status));
+ getNet()->enterNetwork();
+ getNet()->blackHole(getNet()->getNextReadyRequest());
+ getNet()->exitNetwork();
+
+ // reconfig
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")));
+
+ ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+
+ shutdown();
+ initateThread.join();
+}
+
+TEST_F(ReplCoordTest, ReconfigSuccessful) {
+ // start up, become primary, reconfig successfully
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ simulateSuccessfulElection();
+
+ Status status(ErrorCodes::InternalError, "Not Set");
+ stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
+
+ NetworkInterfaceMock* net = getNet();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ repl::ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ repl::ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(2);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj, false);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+ reconfigThread.join();
+ ASSERT_OK(status);
+}
+
+TEST_F(ReplCoordTest, ReconfigDuringHBReconfigFails) {
+ // start up, become primary, receive reconfig via heartbeat, then a second one
+ // from reconfig
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ simulateSuccessfulElection();
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
+ // set hbreconfig to hang while in progress
+ getExternalState()->setStoreLocalConfigDocumentToHang(true);
+
+ // hb reconfig
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ ReplSetHeartbeatResponse hbResp2;
+ ReplicaSetConfig config;
+ config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))));
+ hbResp2.setConfig(config);
+ hbResp2.setConfigVersion(3);
+ hbResp2.setSetName("mySet");
+ hbResp2.setState(MemberState::RS_SECONDARY);
+ BSONObjBuilder respObj2;
+ respObj2 << "ok" << 1;
+ hbResp2.addToBSON(&respObj2, false);
+ net->runUntil(net->now() + Seconds(10)); // run until we've sent a heartbeat request
+ const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
+ net->scheduleResponse(noi2, net->now(), makeResponseStatus(respObj2.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+
+ // reconfig
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = config.toBSON();
+ ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+
+ getExternalState()->setStoreLocalConfigDocumentToHang(false);
+}
+
+TEST_F(ReplCoordTest, HBReconfigDuringReconfigFails) {
+ // start up, become primary, reconfig, while reconfigging receive reconfig via heartbeat
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ simulateSuccessfulElection();
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
+ // start reconfigThread
+ Status status(ErrorCodes::InternalError, "Not Set");
+ stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
+
+ // wait for reconfigThread to create network requests to ensure the replication coordinator
+ // is in state kConfigReconfiguring
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ net->blackHole(net->getNextReadyRequest());
+
+ // schedule hb reconfig
+ net->runUntil(net->now() + Seconds(10)); // run until we've sent a heartbeat request
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ ReplSetHeartbeatResponse hbResp;
+ ReplicaSetConfig config;
+ config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 4 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))));
+ hbResp.setConfig(config);
+ hbResp.setConfigVersion(4);
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_SECONDARY);
+ BSONObjBuilder respObj2;
+ respObj2 << "ok" << 1;
+ hbResp.addToBSON(&respObj2, false);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj2.obj()));
+
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(1));
+ startCapturingLogMessages();
+ // execute hb reconfig, which should fail with a log message; confirmed at end of test
+ net->runReadyNetworkOperations();
+ // respond to reconfig's quorum check so that we can join that thread and exit cleanly
+ net->exitNetwork();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(
+ 1, countLogLinesContaining("because already in the midst of a configuration process"));
+ shutdown();
+ reconfigThread.join();
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
+}
+
+TEST_F(ReplCoordTest, ForceReconfigWhileNotPrimarySuccessful) {
+ // start up, become a secondary, receive a forced reconfig
+ OperationContextNoop txn;
+ init();
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+
+ // fail before forced
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")));
+ ASSERT_EQUALS(ErrorCodes::NotMaster,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+
+ // forced should succeed
+ args.force = true;
+ ASSERT_OK(getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ getReplCoord()->processReplSetGetConfig(&result);
+
+ // ensure forced reconfig results in a random larger version
+ ASSERT_GREATER_THAN(result.obj()["config"].Obj()["version"].numberInt(), 3);
+}
+
+} // anonymous namespace
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index b625f45fe07..452c07519e1 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -46,7 +46,7 @@
#include "mongo/db/repl/repl_set_heartbeat_args.h"
#include "mongo/db/repl/repl_settings.h"
#include "mongo/db/repl/replica_set_config.h"
-#include "mongo/db/repl/replication_coordinator.h" // ReplSetReconfigArgs
+#include "mongo/db/repl/replication_coordinator.h" // ReplSetReconfigArgs
#include "mongo/db/repl/replication_coordinator_external_state_mock.h"
#include "mongo/db/repl/replication_coordinator_impl.h"
#include "mongo/db/repl/replication_coordinator_test_fixture.h"
@@ -67,2039 +67,2141 @@ namespace mongo {
namespace repl {
namespace {
- using executor::NetworkInterfaceMock;
- typedef ReplicationCoordinator::ReplSetReconfigArgs ReplSetReconfigArgs;
- Status kInterruptedStatus(ErrorCodes::Interrupted, "operation was interrupted");
+using executor::NetworkInterfaceMock;
+typedef ReplicationCoordinator::ReplSetReconfigArgs ReplSetReconfigArgs;
+Status kInterruptedStatus(ErrorCodes::Interrupted, "operation was interrupted");
+
+// Helper class to wrap Timestamp as an OpTime with term 0.
+struct OpTimeWithTermZero {
+ OpTimeWithTermZero(unsigned int sec, unsigned int i) : timestamp(sec, i) {}
+ operator OpTime() const {
+ return OpTime(timestamp, 0);
+ }
+
+ Timestamp timestamp;
+};
+
+TEST_F(ReplCoordTest, StartupWithValidLocalConfig) {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345"))),
+ HostAndPort("node1", 12345));
+}
+
+TEST_F(ReplCoordTest, StartupWithConfigMissingSelf) {
+ startCapturingLogMessages();
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:54321"))),
+ HostAndPort("node3", 12345));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("NodeNotFound"));
+}
+
+TEST_F(ReplCoordTest, StartupWithLocalConfigSetNameMismatch) {
+ init("mySet");
+ startCapturingLogMessages();
+ assertStartSuccess(BSON("_id"
+ << "notMySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345"))),
+ HostAndPort("node1", 12345));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("reports set name of notMySet,"));
+}
+
+TEST_F(ReplCoordTest, StartupWithNoLocalConfig) {
+ startCapturingLogMessages();
+ start();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(2, countLogLinesContaining("Did not find local "));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, InitiateFailsWithEmptyConfig) {
+ OperationContextNoop txn;
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ BSONObjBuilder result;
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetInitiate(&txn, BSONObj(), &result));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, InitiateSucceedsWithOneNodeConfig) {
+ OperationContextNoop txn;
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ // Starting uninitialized, show that we can perform the initiate behavior.
+ BSONObjBuilder result1;
+ ASSERT_OK(
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"))),
+ &result1));
+ ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
+
+ // Show that initiate fails after it has already succeeded.
+ BSONObjBuilder result2;
+ ASSERT_EQUALS(
+ ErrorCodes::AlreadyInitialized,
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"))),
+ &result2));
+
+ // Still in repl set mode, even after failed reinitiate.
+ ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
+}
+
+TEST_F(ReplCoordTest, InitiateSucceedsAfterFailing) {
+ OperationContextNoop txn;
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ BSONObjBuilder result;
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetInitiate(&txn, BSONObj(), &result));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ // Having failed to initiate once, show that we can now initiate.
+ BSONObjBuilder result1;
+ ASSERT_OK(
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"))),
+ &result1));
+ ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
+}
+
+TEST_F(ReplCoordTest, InitiateFailsIfAlreadyInitialized) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345"))),
+ HostAndPort("node1", 12345));
+ BSONObjBuilder result;
+ ASSERT_EQUALS(
+ ErrorCodes::AlreadyInitialized,
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345"))),
+ &result));
+}
+
+TEST_F(ReplCoordTest, InitiateFailsIfSelfMissing) {
+ OperationContextNoop txn;
+ BSONObjBuilder result;
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(
+ ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node4"))),
+ &result));
+}
+
+void doReplSetInitiate(ReplicationCoordinatorImpl* replCoord, Status* status) {
+ OperationContextNoop txn;
+ BSONObjBuilder garbage;
+ *status =
+ replCoord->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345")
+ << BSON("_id" << 1 << "host"
+ << "node2:54321"))),
+ &garbage);
+}
+
+TEST_F(ReplCoordTest, InitiateFailsIfQuorumNotMet) {
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setSetName("mySet");
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(1);
+ hbArgs.setCheckEmpty(true);
+ hbArgs.setSenderHost(HostAndPort("node1", 12345));
+ hbArgs.setSenderId(0);
+
+ Status status(ErrorCodes::InternalError, "Not set");
+ stdx::thread prsiThread(stdx::bind(doReplSetInitiate, getReplCoord(), &status));
+ const Date_t startDate = getNet()->now();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
+ ASSERT_EQUALS(HostAndPort("node2", 54321), noi->getRequest().target);
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(hbArgs.toBSON(), noi->getRequest().cmdObj);
+ getNet()->scheduleResponse(
+ noi, startDate + Milliseconds(10), ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
+ getNet()->runUntil(startDate + Milliseconds(10));
+ getNet()->exitNetwork();
+ ASSERT_EQUALS(startDate + Milliseconds(10), getNet()->now());
+ prsiThread.join();
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, InitiatePassesIfQuorumMet) {
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setSetName("mySet");
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(1);
+ hbArgs.setCheckEmpty(true);
+ hbArgs.setSenderHost(HostAndPort("node1", 12345));
+ hbArgs.setSenderId(0);
+
+ Status status(ErrorCodes::InternalError, "Not set");
+ stdx::thread prsiThread(stdx::bind(doReplSetInitiate, getReplCoord(), &status));
+ const Date_t startDate = getNet()->now();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
+ ASSERT_EQUALS(HostAndPort("node2", 54321), noi->getRequest().target);
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(hbArgs.toBSON(), noi->getRequest().cmdObj);
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setConfigVersion(0);
+ getNet()->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(hbResp.toBSON(false), Milliseconds(8))));
+ getNet()->runUntil(startDate + Milliseconds(10));
+ getNet()->exitNetwork();
+ ASSERT_EQUALS(startDate + Milliseconds(10), getNet()->now());
+ prsiThread.join();
+ ASSERT_OK(status);
+ ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
+}
+
+TEST_F(ReplCoordTest, InitiateFailsWithSetNameMismatch) {
+ OperationContextNoop txn;
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ BSONObjBuilder result1;
+ ASSERT_EQUALS(
+ ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "wrongSet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"))),
+ &result1));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, InitiateFailsWithoutReplSetFlag) {
+ OperationContextNoop txn;
+ init("");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ BSONObjBuilder result1;
+ ASSERT_EQUALS(
+ ErrorCodes::NoReplicationEnabled,
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"))),
+ &result1));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, InitiateFailsWhileStoringLocalConfigDocument) {
+ OperationContextNoop txn;
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ BSONObjBuilder result1;
+ getExternalState()->setStoreLocalConfigDocumentStatus(
+ Status(ErrorCodes::OutOfDiskSpace, "The test set this"));
+ ASSERT_EQUALS(
+ ErrorCodes::OutOfDiskSpace,
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"))),
+ &result1));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, CheckReplEnabledForCommandNotRepl) {
+ // pass in settings to avoid having a replSet
+ ReplSettings settings;
+ init(settings);
+ start();
+
+ // check status NoReplicationEnabled and empty result
+ BSONObjBuilder result;
+ Status status = getReplCoord()->checkReplEnabledForCommand(&result);
+ ASSERT_EQUALS(status, ErrorCodes::NoReplicationEnabled);
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+TEST_F(ReplCoordTest, checkReplEnabledForCommandConfigSvr) {
+ ReplSettings settings;
+ serverGlobalParams.configsvr = true;
+ init(settings);
+ start();
+
+ // check status NoReplicationEnabled and result mentions configsrv
+ BSONObjBuilder result;
+ Status status = getReplCoord()->checkReplEnabledForCommand(&result);
+ ASSERT_EQUALS(status, ErrorCodes::NoReplicationEnabled);
+ ASSERT_EQUALS(result.obj()["info"].String(), "configsvr");
+ serverGlobalParams.configsvr = false;
+}
+
+TEST_F(ReplCoordTest, checkReplEnabledForCommandNoConfig) {
+ start();
+
+ // check status NotYetInitialized and result mentions rs.initiate
+ BSONObjBuilder result;
+ Status status = getReplCoord()->checkReplEnabledForCommand(&result);
+ ASSERT_EQUALS(status, ErrorCodes::NotYetInitialized);
+ ASSERT_TRUE(result.obj()["info"].String().find("rs.initiate") != std::string::npos);
+}
+
+TEST_F(ReplCoordTest, checkReplEnabledForCommandWorking) {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members" << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0))),
+ HostAndPort("node1", 12345));
+
+ // check status OK and result is empty
+ BSONObjBuilder result;
+ Status status = getReplCoord()->checkReplEnabledForCommand(&result);
+ ASSERT_EQUALS(status, Status::OK());
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+TEST_F(ReplCoordTest, BasicRBIDUsage) {
+ start();
+ BSONObjBuilder result;
+ getReplCoord()->processReplSetGetRBID(&result);
+ long long initialValue = result.obj()["rbid"].Int();
+ getReplCoord()->incrementRollbackID();
+
+ BSONObjBuilder result2;
+ getReplCoord()->processReplSetGetRBID(&result2);
+ long long incrementedValue = result2.obj()["rbid"].Int();
+ ASSERT_EQUALS(incrementedValue, initialValue + 1);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationNoReplEnabled) {
+ init("");
+ OperationContextNoop txn;
+ OpTimeWithTermZero time(100, 1);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 2;
+
+ // Because we didn't set ReplSettings.replSet, it will think we're a standalone so
+ // awaitReplication will always work.
+ ReplicationCoordinator::StatusAndDuration statusAndDur =
+ getReplCoord()->awaitReplication(&txn, time, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationMasterSlaveMajorityBaseCase) {
+ ReplSettings settings;
+ settings.master = true;
+ init(settings);
+ OperationContextNoop txn;
+ OpTimeWithTermZero time(100, 1);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 2;
+
+
+ writeConcern.wNumNodes = 0;
+ writeConcern.wMode = WriteConcernOptions::kMajority;
+ // w:majority always works on master/slave
+ ReplicationCoordinator::StatusAndDuration statusAndDur =
+ getReplCoord()->awaitReplication(&txn, time, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationReplSetBaseCases) {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+
+ OperationContextNoop txn;
+ OpTimeWithTermZero time(100, 1);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 0; // Waiting for 0 nodes always works
+ writeConcern.wMode = "";
+
+ // Should fail when not primary
+ ReplicationCoordinator::StatusAndDuration statusAndDur =
+ getReplCoord()->awaitReplication(&txn, time, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::NotMaster, statusAndDur.status);
+
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ simulateSuccessfulElection();
+
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationNumberOfNodesNonBlocking) {
+ OperationContextNoop txn;
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2) << BSON("host"
+ << "node4:12345"
+ << "_id" << 3))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ simulateSuccessfulElection();
+
+ OpTimeWithTermZero time1(100, 1);
+ OpTimeWithTermZero time2(100, 2);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 1;
+
+ // 1 node waiting for time 1
+ ReplicationCoordinator::StatusAndDuration statusAndDur =
+ getReplCoord()->awaitReplication(&txn, time1, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ getReplCoord()->setMyLastOptime(time1);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+
+ // 2 nodes waiting for time1
+ writeConcern.wNumNodes = 2;
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+
+ // 2 nodes waiting for time2
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ getReplCoord()->setMyLastOptime(time2);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, time2));
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+
+ // 3 nodes waiting for time2
+ writeConcern.wNumNodes = 3;
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time2));
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationNamedModesNonBlocking) {
+ OperationContextNoop txn;
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node0"
+ << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rackNA1"))
+ << BSON("_id" << 1 << "host"
+ << "node1"
+ << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rackNA2"))
+ << BSON("_id" << 2 << "host"
+ << "node2"
+ << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rackNA3"))
+ << BSON("_id" << 3 << "host"
+ << "node3"
+ << "tags" << BSON("dc"
+ << "EU"
+ << "rack"
+ << "rackEU1"))
+ << BSON("_id" << 4 << "host"
+ << "node4"
+ << "tags" << BSON("dc"
+ << "EU"
+ << "rack"
+ << "rackEU2"))) << "settings"
+ << BSON("getLastErrorModes" << BSON("multiDC" << BSON("dc" << 2) << "multiDCAndRack"
+ << BSON("dc" << 2 << "rack" << 3)))),
+ HostAndPort("node0"));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ simulateSuccessfulElection();
+
+ OpTimeWithTermZero time1(100, 1);
+ OpTimeWithTermZero time2(100, 2);
+
+ // Test invalid write concern
+ WriteConcernOptions invalidWriteConcern;
+ invalidWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ invalidWriteConcern.wMode = "fakemode";
+
+ ReplicationCoordinator::StatusAndDuration statusAndDur =
+ getReplCoord()->awaitReplication(&txn, time1, invalidWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::UnknownReplWriteConcern, statusAndDur.status);
+
+
+ // Set up valid write concerns for the rest of the test
+ WriteConcernOptions majorityWriteConcern;
+ majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ majorityWriteConcern.wMode = WriteConcernOptions::kMajority;
+
+ WriteConcernOptions multiDCWriteConcern;
+ multiDCWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ multiDCWriteConcern.wMode = "multiDC";
+
+ WriteConcernOptions multiRackWriteConcern;
+ multiRackWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ multiRackWriteConcern.wMode = "multiDCAndRack";
+
+
+ // Nothing satisfied
+ getReplCoord()->setMyLastOptime(time1);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiDCWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiRackWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+
+ // Majority satisfied but not either custom mode
+ getReplCoord()->setLastOptime_forTest(2, 1, time1);
+ getReplCoord()->setLastOptime_forTest(2, 2, time1);
+
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
+ ASSERT_OK(statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiDCWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiRackWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+
+ // All modes satisfied
+ getReplCoord()->setLastOptime_forTest(2, 3, time1);
+
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
+ ASSERT_OK(statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiDCWriteConcern);
+ ASSERT_OK(statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiRackWriteConcern);
+ ASSERT_OK(statusAndDur.status);
+
+ // multiDC satisfied but not majority or multiRack
+ getReplCoord()->setMyLastOptime(time2);
+ getReplCoord()->setLastOptime_forTest(2, 3, time2);
+
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, majorityWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, multiDCWriteConcern);
+ ASSERT_OK(statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, multiRackWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+}
- // Helper class to wrap Timestamp as an OpTime with term 0.
- struct OpTimeWithTermZero {
- OpTimeWithTermZero(unsigned int sec, unsigned int i) : timestamp(sec, i) { }
- operator OpTime() const { return OpTime(timestamp, 0); }
-
- Timestamp timestamp;
- };
-
- TEST_F(ReplCoordTest, StartupWithValidLocalConfig) {
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345"))),
- HostAndPort("node1", 12345));
- }
-
- TEST_F(ReplCoordTest, StartupWithConfigMissingSelf) {
- startCapturingLogMessages();
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:54321"))),
- HostAndPort("node3", 12345));
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("NodeNotFound"));
- }
-
- TEST_F(ReplCoordTest, StartupWithLocalConfigSetNameMismatch) {
- init("mySet");
- startCapturingLogMessages();
- assertStartSuccess(
- BSON("_id" << "notMySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345"))),
- HostAndPort("node1", 12345));
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("reports set name of notMySet,"));
- }
-
- TEST_F(ReplCoordTest, StartupWithNoLocalConfig) {
- startCapturingLogMessages();
- start();
- stopCapturingLogMessages();
- ASSERT_EQUALS(2, countLogLinesContaining("Did not find local "));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- }
-
- TEST_F(ReplCoordTest, InitiateFailsWithEmptyConfig) {
- OperationContextNoop txn;
- init("mySet");
- start(HostAndPort("node1", 12345));
- BSONObjBuilder result;
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetInitiate(&txn, BSONObj(), &result));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- }
-
- TEST_F(ReplCoordTest, InitiateSucceedsWithOneNodeConfig) {
- OperationContextNoop txn;
- init("mySet");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- // Starting uninitialized, show that we can perform the initiate behavior.
- BSONObjBuilder result1;
- ASSERT_OK(getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345"))),
- &result1));
- ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
-
- // Show that initiate fails after it has already succeeded.
- BSONObjBuilder result2;
- ASSERT_EQUALS(ErrorCodes::AlreadyInitialized,
- getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345"))),
- &result2));
-
- // Still in repl set mode, even after failed reinitiate.
- ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
- }
-
- TEST_F(ReplCoordTest, InitiateSucceedsAfterFailing) {
- OperationContextNoop txn;
- init("mySet");
- start(HostAndPort("node1", 12345));
- BSONObjBuilder result;
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetInitiate(&txn, BSONObj(), &result));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- // Having failed to initiate once, show that we can now initiate.
- BSONObjBuilder result1;
- ASSERT_OK(getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345"))),
- &result1));
- ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
- }
-
- TEST_F(ReplCoordTest, InitiateFailsIfAlreadyInitialized) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345"))),
- HostAndPort("node1", 12345));
- BSONObjBuilder result;
- ASSERT_EQUALS(ErrorCodes::AlreadyInitialized,
- getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345"))),
- &result));
- }
-
- TEST_F(ReplCoordTest, InitiateFailsIfSelfMissing) {
- OperationContextNoop txn;
- BSONObjBuilder result;
- init("mySet");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node4"))),
- &result));
- }
-
- void doReplSetInitiate(ReplicationCoordinatorImpl* replCoord, Status* status) {
- OperationContextNoop txn;
- BSONObjBuilder garbage;
- *status = replCoord->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345") <<
- BSON("_id" << 1 << "host" << "node2:54321"))),
- &garbage);
- }
-
- TEST_F(ReplCoordTest, InitiateFailsIfQuorumNotMet) {
- init("mySet");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setSetName("mySet");
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(1);
- hbArgs.setCheckEmpty(true);
- hbArgs.setSenderHost(HostAndPort("node1", 12345));
- hbArgs.setSenderId(0);
-
- Status status(ErrorCodes::InternalError, "Not set");
- stdx::thread prsiThread(stdx::bind(doReplSetInitiate, getReplCoord(), &status));
- const Date_t startDate = getNet()->now();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- ASSERT_EQUALS(HostAndPort("node2", 54321), noi->getRequest().target);
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(hbArgs.toBSON(), noi->getRequest().cmdObj);
- getNet()->scheduleResponse(noi, startDate + Milliseconds(10),
- ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
- getNet()->runUntil(startDate + Milliseconds(10));
- getNet()->exitNetwork();
- ASSERT_EQUALS(startDate + Milliseconds(10), getNet()->now());
- prsiThread.join();
- ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- }
-
- TEST_F(ReplCoordTest, InitiatePassesIfQuorumMet) {
- init("mySet");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setSetName("mySet");
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(1);
- hbArgs.setCheckEmpty(true);
- hbArgs.setSenderHost(HostAndPort("node1", 12345));
- hbArgs.setSenderId(0);
-
- Status status(ErrorCodes::InternalError, "Not set");
- stdx::thread prsiThread(stdx::bind(doReplSetInitiate, getReplCoord(), &status));
- const Date_t startDate = getNet()->now();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- ASSERT_EQUALS(HostAndPort("node2", 54321), noi->getRequest().target);
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(hbArgs.toBSON(), noi->getRequest().cmdObj);
- ReplSetHeartbeatResponse hbResp;
- hbResp.setConfigVersion(0);
- getNet()->scheduleResponse(
- noi,
- startDate + Milliseconds(10),
- ResponseStatus(RemoteCommandResponse(hbResp.toBSON(false), Milliseconds(8))));
- getNet()->runUntil(startDate + Milliseconds(10));
- getNet()->exitNetwork();
- ASSERT_EQUALS(startDate + Milliseconds(10), getNet()->now());
- prsiThread.join();
- ASSERT_OK(status);
- ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
- }
-
- TEST_F(ReplCoordTest, InitiateFailsWithSetNameMismatch) {
- OperationContextNoop txn;
- init("mySet");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- BSONObjBuilder result1;
- ASSERT_EQUALS(
- ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "wrongSet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345"))),
- &result1));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- }
-
- TEST_F(ReplCoordTest, InitiateFailsWithoutReplSetFlag) {
- OperationContextNoop txn;
- init("");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- BSONObjBuilder result1;
- ASSERT_EQUALS(
- ErrorCodes::NoReplicationEnabled,
- getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345"))),
- &result1));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- }
-
- TEST_F(ReplCoordTest, InitiateFailsWhileStoringLocalConfigDocument) {
- OperationContextNoop txn;
- init("mySet");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- BSONObjBuilder result1;
- getExternalState()->setStoreLocalConfigDocumentStatus(Status(ErrorCodes::OutOfDiskSpace,
- "The test set this"));
- ASSERT_EQUALS(
- ErrorCodes::OutOfDiskSpace,
- getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345"))),
- &result1));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- }
-
- TEST_F(ReplCoordTest, CheckReplEnabledForCommandNotRepl) {
- // pass in settings to avoid having a replSet
- ReplSettings settings;
- init(settings);
- start();
-
- // check status NoReplicationEnabled and empty result
- BSONObjBuilder result;
- Status status = getReplCoord()->checkReplEnabledForCommand(&result);
- ASSERT_EQUALS(status, ErrorCodes::NoReplicationEnabled);
- ASSERT_TRUE(result.obj().isEmpty());
- }
-
- TEST_F(ReplCoordTest, checkReplEnabledForCommandConfigSvr) {
- ReplSettings settings;
- serverGlobalParams.configsvr = true;
- init(settings);
- start();
-
- // check status NoReplicationEnabled and result mentions configsrv
- BSONObjBuilder result;
- Status status = getReplCoord()->checkReplEnabledForCommand(&result);
- ASSERT_EQUALS(status, ErrorCodes::NoReplicationEnabled);
- ASSERT_EQUALS(result.obj()["info"].String(), "configsvr");
- serverGlobalParams.configsvr = false;
- }
-
- TEST_F(ReplCoordTest, checkReplEnabledForCommandNoConfig) {
- start();
-
- // check status NotYetInitialized and result mentions rs.initiate
- BSONObjBuilder result;
- Status status = getReplCoord()->checkReplEnabledForCommand(&result);
- ASSERT_EQUALS(status, ErrorCodes::NotYetInitialized);
- ASSERT_TRUE(result.obj()["info"].String().find("rs.initiate") != std::string::npos);
- }
-
- TEST_F(ReplCoordTest, checkReplEnabledForCommandWorking) {
- assertStartSuccess(BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" <<
- "_id" << 0 ))),
- HostAndPort("node1", 12345));
-
- // check status OK and result is empty
- BSONObjBuilder result;
- Status status = getReplCoord()->checkReplEnabledForCommand(&result);
- ASSERT_EQUALS(status, Status::OK());
- ASSERT_TRUE(result.obj().isEmpty());
- }
-
- TEST_F(ReplCoordTest, BasicRBIDUsage) {
- start();
- BSONObjBuilder result;
- getReplCoord()->processReplSetGetRBID(&result);
- long long initialValue = result.obj()["rbid"].Int();
- getReplCoord()->incrementRollbackID();
-
- BSONObjBuilder result2;
- getReplCoord()->processReplSetGetRBID(&result2);
- long long incrementedValue = result2.obj()["rbid"].Int();
- ASSERT_EQUALS(incrementedValue, initialValue + 1);
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationNoReplEnabled) {
- init("");
- OperationContextNoop txn;
- OpTimeWithTermZero time(100, 1);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern.wNumNodes = 2;
-
- // Because we didn't set ReplSettings.replSet, it will think we're a standalone so
- // awaitReplication will always work.
- ReplicationCoordinator::StatusAndDuration statusAndDur =
- getReplCoord()->awaitReplication(&txn, time, writeConcern);
- ASSERT_OK(statusAndDur.status);
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationMasterSlaveMajorityBaseCase) {
- ReplSettings settings;
- settings.master = true;
- init(settings);
- OperationContextNoop txn;
- OpTimeWithTermZero time(100, 1);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern.wNumNodes = 2;
-
-
- writeConcern.wNumNodes = 0;
- writeConcern.wMode = WriteConcernOptions::kMajority;
- // w:majority always works on master/slave
- ReplicationCoordinator::StatusAndDuration statusAndDur = getReplCoord()->awaitReplication(
- &txn, time, writeConcern);
- ASSERT_OK(statusAndDur.status);
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationReplSetBaseCases) {
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
-
- OperationContextNoop txn;
- OpTimeWithTermZero time(100, 1);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern.wNumNodes = 0; // Waiting for 0 nodes always works
- writeConcern.wMode = "";
-
- // Should fail when not primary
- ReplicationCoordinator::StatusAndDuration statusAndDur = getReplCoord()->awaitReplication(
- &txn, time, writeConcern);
- ASSERT_EQUALS(ErrorCodes::NotMaster, statusAndDur.status);
-
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
- simulateSuccessfulElection();
-
- statusAndDur = getReplCoord()->awaitReplication(&txn, time, writeConcern);
- ASSERT_OK(statusAndDur.status);
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationNumberOfNodesNonBlocking) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2) <<
- BSON("host" << "node4:12345" << "_id" << 3))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
- simulateSuccessfulElection();
-
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern.wNumNodes = 1;
-
- // 1 node waiting for time 1
- ReplicationCoordinator::StatusAndDuration statusAndDur =
- getReplCoord()->awaitReplication(&txn, time1, writeConcern);
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- getReplCoord()->setMyLastOptime(time1);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
- ASSERT_OK(statusAndDur.status);
-
- // 2 nodes waiting for time1
- writeConcern.wNumNodes = 2;
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
- ASSERT_OK(statusAndDur.status);
-
- // 2 nodes waiting for time2
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- getReplCoord()->setMyLastOptime(time2);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, time2));
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
- ASSERT_OK(statusAndDur.status);
-
- // 3 nodes waiting for time2
- writeConcern.wNumNodes = 3;
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time2));
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
- ASSERT_OK(statusAndDur.status);
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationNamedModesNonBlocking) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "node0" <<
- "tags" << BSON("dc" << "NA" <<
- "rack" << "rackNA1")) <<
- BSON("_id" << 1 <<
- "host" << "node1" <<
- "tags" << BSON("dc" << "NA" <<
- "rack" << "rackNA2")) <<
- BSON("_id" << 2 <<
- "host" << "node2" <<
- "tags" << BSON("dc" << "NA" <<
- "rack" << "rackNA3")) <<
- BSON("_id" << 3 <<
- "host" << "node3" <<
- "tags" << BSON("dc" << "EU" <<
- "rack" << "rackEU1")) <<
- BSON("_id" << 4 <<
- "host" << "node4" <<
- "tags" << BSON("dc" << "EU" <<
- "rack" << "rackEU2"))) <<
- "settings" << BSON("getLastErrorModes" <<
- BSON("multiDC" << BSON("dc" << 2) <<
- "multiDCAndRack" << BSON("dc" << 2 << "rack" << 3)))),
- HostAndPort("node0"));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
- simulateSuccessfulElection();
-
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
-
- // Test invalid write concern
- WriteConcernOptions invalidWriteConcern;
- invalidWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- invalidWriteConcern.wMode = "fakemode";
-
- ReplicationCoordinator::StatusAndDuration statusAndDur =
- getReplCoord()->awaitReplication(&txn, time1, invalidWriteConcern);
- ASSERT_EQUALS(ErrorCodes::UnknownReplWriteConcern, statusAndDur.status);
-
-
- // Set up valid write concerns for the rest of the test
- WriteConcernOptions majorityWriteConcern;
- majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- majorityWriteConcern.wMode = WriteConcernOptions::kMajority;
-
- WriteConcernOptions multiDCWriteConcern;
- multiDCWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- multiDCWriteConcern.wMode = "multiDC";
-
- WriteConcernOptions multiRackWriteConcern;
- multiRackWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- multiRackWriteConcern.wMode = "multiDCAndRack";
-
-
- // Nothing satisfied
- getReplCoord()->setMyLastOptime(time1);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiDCWriteConcern);
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiRackWriteConcern);
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
-
- // Majority satisfied but not either custom mode
- getReplCoord()->setLastOptime_forTest(2, 1, time1);
- getReplCoord()->setLastOptime_forTest(2, 2, time1);
-
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
- ASSERT_OK(statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiDCWriteConcern);
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiRackWriteConcern);
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
-
- // All modes satisfied
- getReplCoord()->setLastOptime_forTest(2, 3, time1);
-
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
- ASSERT_OK(statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiDCWriteConcern);
- ASSERT_OK(statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiRackWriteConcern);
- ASSERT_OK(statusAndDur.status);
-
- // multiDC satisfied but not majority or multiRack
- getReplCoord()->setMyLastOptime(time2);
- getReplCoord()->setLastOptime_forTest(2, 3, time2);
-
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, majorityWriteConcern);
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, multiDCWriteConcern);
- ASSERT_OK(statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, multiRackWriteConcern);
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- }
-
- /**
- * Used to wait for replication in a separate thread without blocking execution of the test.
- * To use, set the optime and write concern to be passed to awaitReplication and then call
- * start(), which will spawn a thread that calls awaitReplication. No calls may be made
- * on the ReplicationAwaiter instance between calling start and getResult(). After returning
- * from getResult(), you can call reset() to allow the awaiter to be reused for another
- * awaitReplication call.
- */
- class ReplicationAwaiter {
- public:
-
- ReplicationAwaiter(ReplicationCoordinatorImpl* replCoord, OperationContext* txn) :
- _replCoord(replCoord), _finished(false),
- _result(ReplicationCoordinator::StatusAndDuration(
- Status::OK(), Milliseconds(0))) {}
-
- void setOpTime(const OpTime& ot) {
- _optime = ot;
- }
-
- void setWriteConcern(const WriteConcernOptions& wc) {
- _writeConcern = wc;
- }
-
- // may block
- ReplicationCoordinator::StatusAndDuration getResult() {
- _thread->join();
- ASSERT(_finished);
- return _result;
- }
-
- void start(OperationContext* txn) {
- ASSERT(!_finished);
- _thread.reset(new stdx::thread(stdx::bind(&ReplicationAwaiter::_awaitReplication,
- this,
- txn)));
- }
-
- void reset() {
- ASSERT(_finished);
- _finished = false;
- _result = ReplicationCoordinator::StatusAndDuration(
- Status::OK(), Milliseconds(0));
- }
-
- private:
-
- void _awaitReplication(OperationContext* txn) {
- _result = _replCoord->awaitReplication(txn, _optime, _writeConcern);
- _finished = true;
- }
-
- ReplicationCoordinatorImpl* _replCoord;
- bool _finished;
- OpTime _optime;
- WriteConcernOptions _writeConcern;
- ReplicationCoordinator::StatusAndDuration _result;
- std::unique_ptr<stdx::thread> _thread;
- };
-
- TEST_F(ReplCoordTest, AwaitReplicationNumberOfNodesBlocking) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
- simulateSuccessfulElection();
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
-
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wNumNodes = 2;
-
- // 2 nodes waiting for time1
- awaiter.setOpTime(time1);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_OK(statusAndDur.status);
- awaiter.reset();
-
- // 2 nodes waiting for time2
- awaiter.setOpTime(time2);
- awaiter.start(&txn);
- getReplCoord()->setMyLastOptime(time2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time2));
- statusAndDur = awaiter.getResult();
- ASSERT_OK(statusAndDur.status);
- awaiter.reset();
-
- // 3 nodes waiting for time2
- writeConcern.wNumNodes = 3;
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time2));
- statusAndDur = awaiter.getResult();
- ASSERT_OK(statusAndDur.status);
- awaiter.reset();
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationTimeout) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
- simulateSuccessfulElection();
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
-
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = 50;
- writeConcern.wNumNodes = 2;
-
- // 2 nodes waiting for time2
- awaiter.setOpTime(time2);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
- getReplCoord()->setMyLastOptime(time2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- awaiter.reset();
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationShutdown) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
- simulateSuccessfulElection();
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
-
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wNumNodes = 2;
-
- // 2 nodes waiting for time2
- awaiter.setOpTime(time2);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time1));
- shutdown();
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, statusAndDur.status);
- awaiter.reset();
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationStepDown) {
- // Test that a thread blocked in awaitReplication will be woken up and return NotMaster
- // if the node steps down while it is waiting.
- OperationContextReplMock txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
- simulateSuccessfulElection();
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
-
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wNumNodes = 2;
-
- // 2 nodes waiting for time2
- awaiter.setOpTime(time2);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time1));
- getReplCoord()->stepDown(&txn, true, Milliseconds(0), Milliseconds(1000));
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_EQUALS(ErrorCodes::NotMaster, statusAndDur.status);
- awaiter.reset();
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationInterrupt) {
- // Tests that a thread blocked in awaitReplication can be killed by a killOp operation
- const unsigned int opID = 100;
- OperationContextReplMock txn{opID};
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "node1") <<
- BSON("_id" << 1 << "host" << "node2") <<
- BSON("_id" << 2 << "host" << "node3"))),
- HostAndPort("node1"));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
- simulateSuccessfulElection();
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
-
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wNumNodes = 2;
-
-
- // 2 nodes waiting for time2
- awaiter.setOpTime(time2);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time1));
-
- txn.setCheckForInterruptStatus(kInterruptedStatus);
- getReplCoord()->interrupt(opID);
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_EQUALS(ErrorCodes::Interrupted, statusAndDur.status);
- awaiter.reset();
- }
-
- class StepDownTest : public ReplCoordTest {
- protected:
- OID myRid;
- OID rid2;
- OID rid3;
-
- private:
- virtual void setUp() {
- ReplCoordTest::setUp();
- init("mySet/test1:1234,test2:1234,test3:1234");
-
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test1:1234") <<
- BSON("_id" << 1 << "host" << "test2:1234") <<
- BSON("_id" << 2 << "host" << "test3:1234"))),
- HostAndPort("test1", 1234));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- myRid = getReplCoord()->getMyRID();
- }
- };
-
- TEST_F(ReplCoordTest, UpdateTerm) {
+/**
+ * Used to wait for replication in a separate thread without blocking execution of the test.
+ * To use, set the optime and write concern to be passed to awaitReplication and then call
+ * start(), which will spawn a thread that calls awaitReplication. No calls may be made
+ * on the ReplicationAwaiter instance between calling start and getResult(). After returning
+ * from getResult(), you can call reset() to allow the awaiter to be reused for another
+ * awaitReplication call.
+ */
+class ReplicationAwaiter {
+public:
+ ReplicationAwaiter(ReplicationCoordinatorImpl* replCoord, OperationContext* txn)
+ : _replCoord(replCoord),
+ _finished(false),
+ _result(ReplicationCoordinator::StatusAndDuration(Status::OK(), Milliseconds(0))) {}
+
+ void setOpTime(const OpTime& ot) {
+ _optime = ot;
+ }
+
+ void setWriteConcern(const WriteConcernOptions& wc) {
+ _writeConcern = wc;
+ }
+
+ // may block
+ ReplicationCoordinator::StatusAndDuration getResult() {
+ _thread->join();
+ ASSERT(_finished);
+ return _result;
+ }
+
+ void start(OperationContext* txn) {
+ ASSERT(!_finished);
+ _thread.reset(
+ new stdx::thread(stdx::bind(&ReplicationAwaiter::_awaitReplication, this, txn)));
+ }
+
+ void reset() {
+ ASSERT(_finished);
+ _finished = false;
+ _result = ReplicationCoordinator::StatusAndDuration(Status::OK(), Milliseconds(0));
+ }
+
+private:
+ void _awaitReplication(OperationContext* txn) {
+ _result = _replCoord->awaitReplication(txn, _optime, _writeConcern);
+ _finished = true;
+ }
+
+ ReplicationCoordinatorImpl* _replCoord;
+ bool _finished;
+ OpTime _optime;
+ WriteConcernOptions _writeConcern;
+ ReplicationCoordinator::StatusAndDuration _result;
+ std::unique_ptr<stdx::thread> _thread;
+};
+
+TEST_F(ReplCoordTest, AwaitReplicationNumberOfNodesBlocking) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ simulateSuccessfulElection();
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+
+ OpTimeWithTermZero time1(100, 1);
+ OpTimeWithTermZero time2(100, 2);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wNumNodes = 2;
+
+ // 2 nodes waiting for time1
+ awaiter.setOpTime(time1);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_OK(statusAndDur.status);
+ awaiter.reset();
+
+ // 2 nodes waiting for time2
+ awaiter.setOpTime(time2);
+ awaiter.start(&txn);
+ getReplCoord()->setMyLastOptime(time2);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time2));
+ statusAndDur = awaiter.getResult();
+ ASSERT_OK(statusAndDur.status);
+ awaiter.reset();
+
+ // 3 nodes waiting for time2
+ writeConcern.wNumNodes = 3;
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time2));
+ statusAndDur = awaiter.getResult();
+ ASSERT_OK(statusAndDur.status);
+ awaiter.reset();
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationTimeout) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ simulateSuccessfulElection();
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+
+ OpTimeWithTermZero time1(100, 1);
+ OpTimeWithTermZero time2(100, 2);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = 50;
+ writeConcern.wNumNodes = 2;
+
+ // 2 nodes waiting for time2
+ awaiter.setOpTime(time2);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+ getReplCoord()->setMyLastOptime(time2);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ awaiter.reset();
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationShutdown) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ simulateSuccessfulElection();
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+
+ OpTimeWithTermZero time1(100, 1);
+ OpTimeWithTermZero time2(100, 2);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wNumNodes = 2;
+
+ // 2 nodes waiting for time2
+ awaiter.setOpTime(time2);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time1));
+ shutdown();
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, statusAndDur.status);
+ awaiter.reset();
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationStepDown) {
+ // Test that a thread blocked in awaitReplication will be woken up and return NotMaster
+ // if the node steps down while it is waiting.
+ OperationContextReplMock txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ simulateSuccessfulElection();
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+
+ OpTimeWithTermZero time1(100, 1);
+ OpTimeWithTermZero time2(100, 2);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wNumNodes = 2;
+
+ // 2 nodes waiting for time2
+ awaiter.setOpTime(time2);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time1));
+ getReplCoord()->stepDown(&txn, true, Milliseconds(0), Milliseconds(1000));
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_EQUALS(ErrorCodes::NotMaster, statusAndDur.status);
+ awaiter.reset();
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationInterrupt) {
+ // Tests that a thread blocked in awaitReplication can be killed by a killOp operation
+ const unsigned int opID = 100;
+ OperationContextReplMock txn{opID};
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1")
+ << BSON("_id" << 1 << "host"
+ << "node2") << BSON("_id" << 2 << "host"
+ << "node3"))),
+ HostAndPort("node1"));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ simulateSuccessfulElection();
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+
+ OpTimeWithTermZero time1(100, 1);
+ OpTimeWithTermZero time2(100, 2);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wNumNodes = 2;
+
+
+ // 2 nodes waiting for time2
+ awaiter.setOpTime(time2);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time1));
+
+ txn.setCheckForInterruptStatus(kInterruptedStatus);
+ getReplCoord()->interrupt(opID);
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_EQUALS(ErrorCodes::Interrupted, statusAndDur.status);
+ awaiter.reset();
+}
+
+class StepDownTest : public ReplCoordTest {
+protected:
+ OID myRid;
+ OID rid2;
+ OID rid3;
+
+private:
+ virtual void setUp() {
ReplCoordTest::setUp();
init("mySet/test1:1234,test2:1234,test3:1234");
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test1:1234") <<
- BSON("_id" << 1 << "host" << "test2:1234") <<
- BSON("_id" << 2 << "host" << "test3:1234")) <<
- "protocolVersion" << 1),
- HostAndPort("test1", 1234));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp (100, 1), 0));
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234")
+ << BSON("_id" << 1 << "host"
+ << "test2:1234")
+ << BSON("_id" << 2 << "host"
+ << "test3:1234"))),
+ HostAndPort("test1", 1234));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
- simulateSuccessfulV1Election();
-
- ASSERT_EQUALS(1, getReplCoord()->getTerm());
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
-
- // lower term, no change
- getReplCoord()->updateTerm(0);
- ASSERT_EQUALS(1, getReplCoord()->getTerm());
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
-
- // same term, no change
- getReplCoord()->updateTerm(1);
- ASSERT_EQUALS(1, getReplCoord()->getTerm());
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
-
- // higher term, step down and change term
- Handle cbHandle;
- getReplCoord()->updateTerm_forTest(2);
- ASSERT_EQUALS(2, getReplCoord()->getTerm());
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
- }
-
- TEST_F(StepDownTest, StepDownNotPrimary) {
- OperationContextReplMock txn;
- OpTimeWithTermZero optime1(100, 1);
- // All nodes are caught up
- getReplCoord()->setMyLastOptime(optime1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
-
- Status status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(0));
- ASSERT_EQUALS(ErrorCodes::NotMaster, status);
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
- }
-
- TEST_F(StepDownTest, StepDownTimeoutAcquiringGlobalLock) {
- OperationContextReplMock txn;
- OpTimeWithTermZero optime1(100, 1);
- // All nodes are caught up
- getReplCoord()->setMyLastOptime(optime1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
-
- simulateSuccessfulElection();
-
- // Make sure stepDown cannot grab the global shared lock
- Lock::GlobalWrite lk(txn.lockState());
-
- Status status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000));
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
- }
-
- TEST_F(StepDownTest, StepDownNoWaiting) {
- OperationContextReplMock txn;
- OpTimeWithTermZero optime1(100, 1);
- // All nodes are caught up
- getReplCoord()->setMyLastOptime(optime1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
-
- simulateSuccessfulElection();
-
- enterNetwork();
- getNet()->runUntil(getNet()->now() + Seconds(2));
- ASSERT(getNet()->hasReadyRequests());
- NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- RemoteCommandRequest request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgs hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(hbArgs.getSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(hbArgs.getConfigVersion());
- hbResp.setOpTime(optime1);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj, false);
- getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(respObj.obj()));
- }
- while (getNet()->hasReadyRequests()) {
- getNet()->blackHole(getNet()->getNextReadyRequest());
- }
- getNet()->runReadyNetworkOperations();
- exitNetwork();
-
-
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
- ASSERT_OK(getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000)));
- enterNetwork(); // So we can safely inspect the topology coordinator
- ASSERT_EQUALS(getNet()->now() + Seconds(1), getTopoCoord().getStepDownTime());
- ASSERT_TRUE(getTopoCoord().getMemberState().secondary());
- exitNetwork();
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
- }
-
- TEST_F(ReplCoordTest, StepDownAndBackUpSingleNode) {
- init("mySet");
-
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test1:1234"))),
- HostAndPort("test1", 1234));
- OperationContextReplMock txn;
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
-
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
- ASSERT_OK(getReplCoord()->stepDown(&txn, true, Milliseconds(0), Milliseconds(1000)));
- getNet()->enterNetwork(); // Must do this before inspecting the topocoord
- Date_t stepdownUntil = getNet()->now() + Seconds(1);
- ASSERT_EQUALS(stepdownUntil, getTopoCoord().getStepDownTime());
- ASSERT_TRUE(getTopoCoord().getMemberState().secondary());
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
- // Now run time forward and make sure that the node becomes primary again when the stepdown
- // period ends.
- getNet()->runUntil(stepdownUntil);
- ASSERT_EQUALS(stepdownUntil, getNet()->now());
- ASSERT_TRUE(getTopoCoord().getMemberState().primary());
- getNet()->exitNetwork();
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
- }
-
- /**
- * Used to run wait for stepDown() to finish in a separate thread without blocking execution of
- * the test. To use, set the values of "force", "waitTime", and "stepDownTime", which will be
- * used as the arguments passed to stepDown, and then call
- * start(), which will spawn a thread that calls stepDown. No calls may be made
- * on the StepDownRunner instance between calling start and getResult(). After returning
- * from getResult(), you can call reset() to allow the StepDownRunner to be reused for another
- * stepDown call.
- */
- class StepDownRunner {
- public:
-
- StepDownRunner(ReplicationCoordinatorImpl* replCoord) :
- _replCoord(replCoord), _finished(false), _result(Status::OK()), _force(false),
- _waitTime(0), _stepDownTime(0) {}
-
- // may block
- Status getResult() {
- _thread->join();
- ASSERT(_finished);
- return _result;
- }
-
- void start(OperationContext* txn) {
- ASSERT(!_finished);
- _thread.reset(new stdx::thread(stdx::bind(&StepDownRunner::_stepDown,
- this,
- txn)));
- }
-
- void reset() {
- ASSERT(_finished);
- _finished = false;
- _result = Status(ErrorCodes::InternalError, "Result Status never set");
- }
-
- void setForce(bool force) {
- _force = force;
- }
-
- void setWaitTime(const Milliseconds& waitTime) {
- _waitTime = waitTime;
- }
-
- void setStepDownTime(const Milliseconds& stepDownTime) {
- _stepDownTime = stepDownTime;
- }
-
- private:
-
- void _stepDown(OperationContext* txn) {
- _result = _replCoord->stepDown(txn, _force, _waitTime, _stepDownTime);
- _finished = true;
- }
-
- ReplicationCoordinatorImpl* _replCoord;
- bool _finished;
- Status _result;
- std::unique_ptr<stdx::thread> _thread;
- bool _force;
- Milliseconds _waitTime;
- Milliseconds _stepDownTime;
- };
-
- TEST_F(StepDownTest, StepDownNotCaughtUp) {
- OperationContextReplMock txn;
- OpTimeWithTermZero optime1(100, 1);
- OpTimeWithTermZero optime2(100, 2);
- // No secondary is caught up
- getReplCoord()->setMyLastOptime(optime2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
-
- // Try to stepDown but time out because no secondaries are caught up
- StepDownRunner runner(getReplCoord());
- runner.setForce(false);
- runner.setWaitTime(Milliseconds(0));
- runner.setStepDownTime(Milliseconds(1000));
-
- simulateSuccessfulElection();
-
- runner.start(&txn);
- Status status = runner.getResult();
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
-
- // Now use "force" to force it to step down even though no one is caught up
- runner.reset();
- getNet()->enterNetwork();
- const Date_t startDate = getNet()->now();
- while (startDate + Milliseconds(1000) < getNet()->now()) {
- while (getNet()->hasReadyRequests()) {
- getNet()->blackHole(getNet()->getNextReadyRequest());
- }
- getNet()->runUntil(startDate + Milliseconds(1000));
- }
- getNet()->exitNetwork();
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
- runner.setForce(true);
- runner.start(&txn);
- status = runner.getResult();
- ASSERT_OK(status);
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
- }
-
- TEST_F(StepDownTest, StepDownCatchUp) {
- OperationContextReplMock txn;
- OpTimeWithTermZero optime1(100, 1);
- OpTimeWithTermZero optime2(100, 2);
- // No secondary is caught up
- getReplCoord()->setMyLastOptime(optime2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
-
- // stepDown where the secondary actually has to catch up before the stepDown can succeed
- StepDownRunner runner(getReplCoord());
- runner.setForce(false);
- runner.setWaitTime(Milliseconds(10000));
- runner.setStepDownTime(Milliseconds(60000));
-
- simulateSuccessfulElection();
-
- runner.start(&txn);
-
- // Make a secondary actually catch up
- enterNetwork();
- getNet()->runUntil(getNet()->now() + Milliseconds(2000));
- ASSERT(getNet()->hasReadyRequests());
- NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- RemoteCommandRequest request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgs hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(hbArgs.getSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(hbArgs.getConfigVersion());
- hbResp.setOpTime(optime2);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj, false);
- getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(respObj.obj()));
- }
- while (getNet()->hasReadyRequests()) {
- getNet()->blackHole(getNet()->getNextReadyRequest());
- }
- getNet()->runReadyNetworkOperations();
- exitNetwork();
-
- ASSERT_OK(runner.getResult());
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
- }
-
- TEST_F(StepDownTest, InterruptStepDown) {
- const unsigned int opID = 100;
- OperationContextReplMock txn{opID};
- OpTimeWithTermZero optime1(100, 1);
- OpTimeWithTermZero optime2(100, 2);
- // No secondary is caught up
- getReplCoord()->setMyLastOptime(optime2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
-
- // stepDown where the secondary actually has to catch up before the stepDown can succeed
- StepDownRunner runner(getReplCoord());
- runner.setForce(false);
- runner.setWaitTime(Milliseconds(10000));
- runner.setStepDownTime(Milliseconds(60000));
-
- simulateSuccessfulElection();
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
-
- runner.start(&txn);
-
- txn.setCheckForInterruptStatus(kInterruptedStatus);
- getReplCoord()->interrupt(opID);
-
- ASSERT_EQUALS(ErrorCodes::Interrupted, runner.getResult());
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
- }
+ myRid = getReplCoord()->getMyRID();
+ }
+};
+
+TEST_F(ReplCoordTest, UpdateTerm) {
+ ReplCoordTest::setUp();
+ init("mySet/test1:1234,test2:1234,test3:1234");
+
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234")
+ << BSON("_id" << 1 << "host"
+ << "test2:1234") << BSON("_id" << 2 << "host"
+ << "test3:1234"))
+ << "protocolVersion" << 1),
+ HostAndPort("test1", 1234));
+ getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 1), 0));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+
+ simulateSuccessfulV1Election();
+
+ ASSERT_EQUALS(1, getReplCoord()->getTerm());
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
+ // lower term, no change
+ getReplCoord()->updateTerm(0);
+ ASSERT_EQUALS(1, getReplCoord()->getTerm());
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
+ // same term, no change
+ getReplCoord()->updateTerm(1);
+ ASSERT_EQUALS(1, getReplCoord()->getTerm());
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
+ // higher term, step down and change term
+ Handle cbHandle;
+ getReplCoord()->updateTerm_forTest(2);
+ ASSERT_EQUALS(2, getReplCoord()->getTerm());
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+}
+
+TEST_F(StepDownTest, StepDownNotPrimary) {
+ OperationContextReplMock txn;
+ OpTimeWithTermZero optime1(100, 1);
+ // All nodes are caught up
+ getReplCoord()->setMyLastOptime(optime1);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
+
+ Status status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(0));
+ ASSERT_EQUALS(ErrorCodes::NotMaster, status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+}
+
+TEST_F(StepDownTest, StepDownTimeoutAcquiringGlobalLock) {
+ OperationContextReplMock txn;
+ OpTimeWithTermZero optime1(100, 1);
+ // All nodes are caught up
+ getReplCoord()->setMyLastOptime(optime1);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
+
+ simulateSuccessfulElection();
+
+ // Make sure stepDown cannot grab the global shared lock
+ Lock::GlobalWrite lk(txn.lockState());
+
+ Status status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000));
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+}
+
+TEST_F(StepDownTest, StepDownNoWaiting) {
+ OperationContextReplMock txn;
+ OpTimeWithTermZero optime1(100, 1);
+ // All nodes are caught up
+ getReplCoord()->setMyLastOptime(optime1);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
+
+ simulateSuccessfulElection();
+
+ enterNetwork();
+ getNet()->runUntil(getNet()->now() + Seconds(2));
+ ASSERT(getNet()->hasReadyRequests());
+ NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
+ RemoteCommandRequest request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgs hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(hbArgs.getSetName());
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(hbArgs.getConfigVersion());
+ hbResp.setOpTime(optime1);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj, false);
+ getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(respObj.obj()));
+ }
+ while (getNet()->hasReadyRequests()) {
+ getNet()->blackHole(getNet()->getNextReadyRequest());
+ }
+ getNet()->runReadyNetworkOperations();
+ exitNetwork();
+
+
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+ ASSERT_OK(getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000)));
+ enterNetwork(); // So we can safely inspect the topology coordinator
+ ASSERT_EQUALS(getNet()->now() + Seconds(1), getTopoCoord().getStepDownTime());
+ ASSERT_TRUE(getTopoCoord().getMemberState().secondary());
+ exitNetwork();
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+}
+
+TEST_F(ReplCoordTest, StepDownAndBackUpSingleNode) {
+ init("mySet");
+
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234"))),
+ HostAndPort("test1", 1234));
+ OperationContextReplMock txn;
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+ ASSERT_OK(getReplCoord()->stepDown(&txn, true, Milliseconds(0), Milliseconds(1000)));
+ getNet()->enterNetwork(); // Must do this before inspecting the topocoord
+ Date_t stepdownUntil = getNet()->now() + Seconds(1);
+ ASSERT_EQUALS(stepdownUntil, getTopoCoord().getStepDownTime());
+ ASSERT_TRUE(getTopoCoord().getMemberState().secondary());
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+
+ // Now run time forward and make sure that the node becomes primary again when the stepdown
+ // period ends.
+ getNet()->runUntil(stepdownUntil);
+ ASSERT_EQUALS(stepdownUntil, getNet()->now());
+ ASSERT_TRUE(getTopoCoord().getMemberState().primary());
+ getNet()->exitNetwork();
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+}
- TEST_F(ReplCoordTest, GetReplicationModeNone) {
- init();
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- }
+/**
+ * Used to run wait for stepDown() to finish in a separate thread without blocking execution of
+ * the test. To use, set the values of "force", "waitTime", and "stepDownTime", which will be
+ * used as the arguments passed to stepDown, and then call
+ * start(), which will spawn a thread that calls stepDown. No calls may be made
+ * on the StepDownRunner instance between calling start and getResult(). After returning
+ * from getResult(), you can call reset() to allow the StepDownRunner to be reused for another
+ * stepDown call.
+ */
+class StepDownRunner {
+public:
+ StepDownRunner(ReplicationCoordinatorImpl* replCoord)
+ : _replCoord(replCoord),
+ _finished(false),
+ _result(Status::OK()),
+ _force(false),
+ _waitTime(0),
+ _stepDownTime(0) {}
- TEST_F(ReplCoordTest, GetReplicationModeMaster) {
- // modeMasterSlave if master set
- ReplSettings settings;
- settings.master = true;
- init(settings);
- ASSERT_EQUALS(ReplicationCoordinator::modeMasterSlave,
- getReplCoord()->getReplicationMode());
+ // may block
+ Status getResult() {
+ _thread->join();
+ ASSERT(_finished);
+ return _result;
}
- TEST_F(ReplCoordTest, GetReplicationModeSlave) {
- // modeMasterSlave if the slave flag was set
- ReplSettings settings;
- settings.slave = SimpleSlave;
- init(settings);
- ASSERT_EQUALS(ReplicationCoordinator::modeMasterSlave,
- getReplCoord()->getReplicationMode());
+ void start(OperationContext* txn) {
+ ASSERT(!_finished);
+ _thread.reset(new stdx::thread(stdx::bind(&StepDownRunner::_stepDown, this, txn)));
}
- TEST_F(ReplCoordTest, GetReplicationModeRepl) {
- // modeReplSet if the set name was supplied.
- ReplSettings settings;
- settings.replSet = "mySet/node1:12345";
- init(settings);
- ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0 ))),
- HostAndPort("node1", 12345));
+ void reset() {
+ ASSERT(_finished);
+ _finished = false;
+ _result = Status(ErrorCodes::InternalError, "Result Status never set");
}
- TEST_F(ReplCoordTest, TestPrepareReplSetUpdatePositionCommand) {
- OperationContextNoop txn;
- init("mySet/test1:1234,test2:1234,test3:1234");
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test1:1234") <<
- BSON("_id" << 1 << "host" << "test2:1234") <<
- BSON("_id" << 2 << "host" << "test3:1234"))),
- HostAndPort("test1", 1234));
- OpTimeWithTermZero optime1(100, 1);
- OpTimeWithTermZero optime2(100, 2);
- OpTimeWithTermZero optime3(2, 1);
- getReplCoord()->setMyLastOptime(optime1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime2));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime3));
-
- // Check that the proper BSON is generated for the replSetUpdatePositionCommand
- BSONObjBuilder cmdBuilder;
- getReplCoord()->prepareReplSetUpdatePositionCommand(&cmdBuilder);
- BSONObj cmd = cmdBuilder.done();
-
- ASSERT_EQUALS(2, cmd.nFields());
- ASSERT_EQUALS("replSetUpdatePosition", cmd.firstElement().fieldNameStringData());
-
- std::set<long long> memberIds;
- BSONForEach(entryElement, cmd["optimes"].Obj()) {
- BSONObj entry = entryElement.Obj();
- long long memberId = entry["memberId"].Number();
- memberIds.insert(memberId);
- if (memberId == 0) {
- // TODO(siyuan) Update when we change replSetUpdatePosition format
- ASSERT_EQUALS(optime1.timestamp, entry["optime"].timestamp());
- } else if (memberId == 1) {
- ASSERT_EQUALS(optime2.timestamp, entry["optime"].timestamp());
- } else {
- ASSERT_EQUALS(2, memberId);
- ASSERT_EQUALS(optime3.timestamp, entry["optime"].timestamp());
- }
- }
- ASSERT_EQUALS(3U, memberIds.size()); // Make sure we saw all 3 nodes
+ void setForce(bool force) {
+ _force = force;
}
- TEST_F(ReplCoordTest, SetMaintenanceMode) {
- init("mySet/test1:1234,test2:1234,test3:1234");
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test1:1234") <<
- BSON("_id" << 1 << "host" << "test2:1234") <<
- BSON("_id" << 2 << "host" << "test3:1234"))),
- HostAndPort("test2", 1234));
- OperationContextNoop txn;
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
-
- // Can't unset maintenance mode if it was never set to begin with.
- Status status = getReplCoord()->setMaintenanceMode(false);
- ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
- // valid set
- ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
- ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
-
- // If we go into rollback while in maintenance mode, our state changes to RS_ROLLBACK.
- getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK);
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
-
- // When we go back to SECONDARY, we still observe RECOVERING because of maintenance mode.
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
-
- // Can set multiple times
- ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
- ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
-
- // Need to unset the number of times you set
- ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
- ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
- ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
- status = getReplCoord()->setMaintenanceMode(false);
- // fourth one fails b/c we only set three times
- ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
- // Unsetting maintenance mode changes our state to secondary if maintenance mode was
- // the only thinking keeping us out of it.
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
- // From rollback, entering and exiting maintenance mode doesn't change perceived
- // state.
- getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK);
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
- ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
- ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
-
- // Rollback is sticky even if entered while in maintenance mode.
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
- ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
- ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
- getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK);
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
- ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
- // Can't modify maintenance mode when PRIMARY
- simulateSuccessfulElection();
-
- status = getReplCoord()->setMaintenanceMode(true);
- ASSERT_EQUALS(ErrorCodes::NotSecondary, status);
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
-
- simulateStepDownOnIsolation();
-
- status = getReplCoord()->setMaintenanceMode(false);
- ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
- ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
- ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
+ void setWaitTime(const Milliseconds& waitTime) {
+ _waitTime = waitTime;
}
- TEST_F(ReplCoordTest, GetHostsWrittenToReplSet) {
- HostAndPort myHost("node1:12345");
- HostAndPort client1Host("node2:12345");
- HostAndPort client2Host("node3:12345") ;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << myHost.toString()) <<
- BSON("_id" << 1 << "host" << client1Host.toString()) <<
- BSON("_id" << 2 << "host" << client2Host.toString()))),
- HostAndPort("node1", 12345));
- OperationContextNoop txn;
-
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
-
- getReplCoord()->setMyLastOptime(time2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
-
- std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
- ASSERT_EQUALS(1U, caughtUpHosts.size());
- ASSERT_EQUALS(myHost, caughtUpHosts[0]);
-
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time2));
- caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
- ASSERT_EQUALS(2U, caughtUpHosts.size());
- if (myHost == caughtUpHosts[0]) {
- ASSERT_EQUALS(client2Host, caughtUpHosts[1]);
- }
- else {
- ASSERT_EQUALS(client2Host, caughtUpHosts[0]);
- ASSERT_EQUALS(myHost, caughtUpHosts[1]);
- }
+ void setStepDownTime(const Milliseconds& stepDownTime) {
+ _stepDownTime = stepDownTime;
}
- TEST_F(ReplCoordTest, GetHostsWrittenToMasterSlave) {
- ReplSettings settings;
- settings.master = true;
- init(settings);
- HostAndPort clientHost("node2:12345");
- OperationContextNoop txn;
-
- OID client = OID::gen();
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
-
- getExternalState()->setClientHostAndPort(clientHost);
- HandshakeArgs handshake;
- ASSERT_OK(handshake.initialize(BSON("handshake" << client)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
-
- getReplCoord()->setMyLastOptime(time2);
- ASSERT_OK(getReplCoord()->setLastOptimeForSlave(client, time1.timestamp));
-
- std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
- ASSERT_EQUALS(0U, caughtUpHosts.size()); // self doesn't get included in master-slave
-
- ASSERT_OK(getReplCoord()->setLastOptimeForSlave(client, time2.timestamp));
- caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
- ASSERT_EQUALS(1U, caughtUpHosts.size());
- ASSERT_EQUALS(clientHost, caughtUpHosts[0]);
+private:
+ void _stepDown(OperationContext* txn) {
+ _result = _replCoord->stepDown(txn, _force, _waitTime, _stepDownTime);
+ _finished = true;
}
- TEST_F(ReplCoordTest, GetOtherNodesInReplSetNoConfig) {
- start();
- ASSERT_EQUALS(0U, getReplCoord()->getOtherNodesInReplSet().size());
- }
+ ReplicationCoordinatorImpl* _replCoord;
+ bool _finished;
+ Status _result;
+ std::unique_ptr<stdx::thread> _thread;
+ bool _force;
+ Milliseconds _waitTime;
+ Milliseconds _stepDownTime;
+};
- TEST_F(ReplCoordTest, GetOtherNodesInReplSet) {
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "h1") <<
- BSON("_id" << 1 << "host" << "h2") <<
- BSON("_id" << 2 <<
- "host" << "h3" <<
- "priority" << 0 <<
- "hidden" << true))),
- HostAndPort("h1"));
-
- std::vector<HostAndPort> otherNodes = getReplCoord()->getOtherNodesInReplSet();
- ASSERT_EQUALS(2U, otherNodes.size());
- if (otherNodes[0] == HostAndPort("h2")) {
- ASSERT_EQUALS(HostAndPort("h3"), otherNodes[1]);
- }
- else {
- ASSERT_EQUALS(HostAndPort("h3"), otherNodes[0]);
- ASSERT_EQUALS(HostAndPort("h2"), otherNodes[0]);
- }
- }
+TEST_F(StepDownTest, StepDownNotCaughtUp) {
+ OperationContextReplMock txn;
+ OpTimeWithTermZero optime1(100, 1);
+ OpTimeWithTermZero optime2(100, 2);
+ // No secondary is caught up
+ getReplCoord()->setMyLastOptime(optime2);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
- TEST_F(ReplCoordTest, IsMasterNoConfig) {
- start();
- IsMasterResponse response;
+ // Try to stepDown but time out because no secondaries are caught up
+ StepDownRunner runner(getReplCoord());
+ runner.setForce(false);
+ runner.setWaitTime(Milliseconds(0));
+ runner.setStepDownTime(Milliseconds(1000));
- getReplCoord()->fillIsMasterForReplSet(&response);
- ASSERT_FALSE(response.isConfigSet());
- BSONObj responseObj = response.toBSON();
- ASSERT_FALSE(responseObj["ismaster"].Bool());
- ASSERT_FALSE(responseObj["secondary"].Bool());
- ASSERT_TRUE(responseObj["isreplicaset"].Bool());
- ASSERT_EQUALS("Does not have a valid replica set config", responseObj["info"].String());
+ simulateSuccessfulElection();
- IsMasterResponse roundTripped;
- ASSERT_OK(roundTripped.initialize(response.toBSON()));
- }
+ runner.start(&txn);
+ Status status = runner.getResult();
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
- TEST_F(ReplCoordTest, IsMaster) {
- HostAndPort h1("h1");
- HostAndPort h2("h2");
- HostAndPort h3("h3");
- HostAndPort h4("h4");
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << h1.toString()) <<
- BSON("_id" << 1 << "host" << h2.toString()) <<
- BSON("_id" << 2 <<
- "host" << h3.toString() <<
- "arbiterOnly" << true) <<
- BSON("_id" << 3 <<
- "host" << h4.toString() <<
- "priority" << 0 <<
- "tags" << BSON("key1" << "value1" <<
- "key2" << "value2")))),
- h4);
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
- IsMasterResponse response;
- getReplCoord()->fillIsMasterForReplSet(&response);
-
- ASSERT_EQUALS("mySet", response.getReplSetName());
- ASSERT_EQUALS(2, response.getReplSetVersion());
- ASSERT_FALSE(response.isMaster());
- ASSERT_TRUE(response.isSecondary());
- // TODO(spencer): test that response includes current primary when there is one.
- ASSERT_FALSE(response.isArbiterOnly());
- ASSERT_TRUE(response.isPassive());
- ASSERT_FALSE(response.isHidden());
- ASSERT_TRUE(response.shouldBuildIndexes());
- ASSERT_EQUALS(Seconds(0), response.getSlaveDelay());
- ASSERT_EQUALS(h4, response.getMe());
-
- std::vector<HostAndPort> hosts = response.getHosts();
- ASSERT_EQUALS(2U, hosts.size());
- if (hosts[0] == h1) {
- ASSERT_EQUALS(h2, hosts[1]);
- }
- else {
- ASSERT_EQUALS(h2, hosts[0]);
- ASSERT_EQUALS(h1, hosts[1]);
+ // Now use "force" to force it to step down even though no one is caught up
+ runner.reset();
+ getNet()->enterNetwork();
+ const Date_t startDate = getNet()->now();
+ while (startDate + Milliseconds(1000) < getNet()->now()) {
+ while (getNet()->hasReadyRequests()) {
+ getNet()->blackHole(getNet()->getNextReadyRequest());
}
- std::vector<HostAndPort> passives = response.getPassives();
- ASSERT_EQUALS(1U, passives.size());
- ASSERT_EQUALS(h4, passives[0]);
- std::vector<HostAndPort> arbiters = response.getArbiters();
- ASSERT_EQUALS(1U, arbiters.size());
- ASSERT_EQUALS(h3, arbiters[0]);
-
- unordered_map<std::string, std::string> tags = response.getTags();
- ASSERT_EQUALS(2U, tags.size());
- ASSERT_EQUALS("value1", tags["key1"]);
- ASSERT_EQUALS("value2", tags["key2"]);
-
- IsMasterResponse roundTripped;
- ASSERT_OK(roundTripped.initialize(response.toBSON()));
- }
-
- TEST_F(ReplCoordTest, ShutDownBeforeStartUpFinished) {
- init();
- startCapturingLogMessages();
- getReplCoord()->shutdown();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1,
- countLogLinesContaining("shutdown() called before startReplication() finished"));
- }
-
- TEST_F(ReplCoordTest, UpdatePositionWithConfigVersionAndMemberIdTest) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
- simulateSuccessfulElection();
-
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
- OpTimeWithTermZero staleTime(10, 0);
- getReplCoord()->setMyLastOptime(time1);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern.wNumNodes = 1;
-
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
- getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
-
- // receive updatePosition containing ourself, should not process the update for self
- UpdatePositionArgs args;
- ASSERT_OK(args.initialize(BSON("replSetUpdatePosition" << 1 <<
- "optimes" << BSON_ARRAY(
- BSON("cfgver" << 2 <<
- "memberId" << 0 <<
- "optime" << time2.timestamp)))));
-
- ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args, 0));
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
- getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
-
- // receive updatePosition with incorrect config version
- UpdatePositionArgs args2;
- ASSERT_OK(args2.initialize(BSON("replSetUpdatePosition" << 1 <<
- "optimes" << BSON_ARRAY(
- BSON("cfgver" << 3 <<
- "memberId" << 1 <<
- "optime" << time2.timestamp)))));
-
- long long cfgver;
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetUpdatePosition(args2, &cfgver));
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
- getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
-
- // receive updatePosition with nonexistent member id
- UpdatePositionArgs args3;
- ASSERT_OK(args3.initialize(BSON("replSetUpdatePosition" << 1 <<
- "optimes" << BSON_ARRAY(
- BSON("cfgver" << 2 <<
- "memberId" << 9 <<
- "optime" << time2.timestamp)))));
-
- ASSERT_EQUALS(ErrorCodes::NodeNotFound,
- getReplCoord()->processReplSetUpdatePosition(args3, 0));
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
- getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
-
- // receive a good update position
- getReplCoord()->setMyLastOptime(time2);
- UpdatePositionArgs args4;
- ASSERT_OK(args4.initialize(BSON("replSetUpdatePosition" << 1 <<
- "optimes" << BSON_ARRAY(
- BSON("cfgver" << 2 <<
- "memberId" << 1 <<
- "optime" << time2.timestamp) <<
- BSON("cfgver" << 2 <<
- "memberId" << 2 <<
- "optime" << time2.timestamp)))));
-
- ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args4, 0));
- ASSERT_OK(getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
-
- writeConcern.wNumNodes = 3;
- ASSERT_OK(getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
- }
-
- void doReplSetReconfig(ReplicationCoordinatorImpl* replCoord, Status* status) {
- OperationContextNoop txn;
- BSONObjBuilder garbage;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 <<
- "host" << "node1:12345" <<
- "priority" << 3) <<
- BSON("_id" << 1 << "host" << "node2:12345") <<
- BSON("_id" << 2 << "host" << "node3:12345")));
- *status = replCoord->processReplSetReconfig(&txn, args, &garbage);
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationReconfigSimple) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 2));
- simulateSuccessfulElection();
-
- OpTimeWithTermZero time(100, 2);
-
- // 3 nodes waiting for time
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wNumNodes = 3;
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
- awaiter.setOpTime(time);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
-
- // reconfig
- Status status(ErrorCodes::InternalError, "Not Set");
- stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
-
- NetworkInterfaceMock* net = getNet();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- repl::ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- repl::ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(2);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj, false);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
- reconfigThread.join();
- ASSERT_OK(status);
-
- // satisfy write concern
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(3, 0, time));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(3, 1, time));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(3, 2, time));
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_OK(statusAndDur.status);
- awaiter.reset();
- }
-
- void doReplSetReconfigToFewer(ReplicationCoordinatorImpl* replCoord, Status* status) {
- OperationContextNoop txn;
- BSONObjBuilder garbage;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node3:12345")));
- *status = replCoord->processReplSetReconfig(&txn, args, &garbage);
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationReconfigNodeCountExceedsNumberOfNodes) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 2));
- simulateSuccessfulElection();
-
- OpTimeWithTermZero time(100, 2);
-
- // 3 nodes waiting for time
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wNumNodes = 3;
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
- awaiter.setOpTime(time);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
-
- // reconfig to fewer nodes
- Status status(ErrorCodes::InternalError, "Not Set");
- stdx::thread reconfigThread(stdx::bind(doReplSetReconfigToFewer, getReplCoord(), &status));
-
- NetworkInterfaceMock* net = getNet();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- repl::ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- repl::ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(2);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj, false);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
- reconfigThread.join();
- ASSERT_OK(status);
- std::cout << "asdf" << std::endl;
-
- // writeconcern feasability should be reevaluated and an error should be returned
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern, statusAndDur.status);
- awaiter.reset();
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationReconfigToSmallerMajority) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2) <<
- BSON("host" << "node4:12345" << "_id" << 3) <<
- BSON("host" << "node5:12345" << "_id" << 4))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 1));
- simulateSuccessfulElection();
-
- OpTimeWithTermZero time(100, 2);
-
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time));
-
-
- // majority nodes waiting for time
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wMode = WriteConcernOptions::kMajority;
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
- awaiter.setOpTime(time);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
-
- // demonstrate that majority cannot currently be satisfied
- WriteConcernOptions writeConcern2;
- writeConcern2.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern2.wMode = WriteConcernOptions::kMajority;
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
- getReplCoord()->awaitReplication(&txn, time, writeConcern2).status);
-
- // reconfig to three nodes
- Status status(ErrorCodes::InternalError, "Not Set");
- stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
-
- NetworkInterfaceMock* net = getNet();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- repl::ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- repl::ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
+ getNet()->runUntil(startDate + Milliseconds(1000));
+ }
+ getNet()->exitNetwork();
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+ runner.setForce(true);
+ runner.start(&txn);
+ status = runner.getResult();
+ ASSERT_OK(status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+}
+
+TEST_F(StepDownTest, StepDownCatchUp) {
+ OperationContextReplMock txn;
+ OpTimeWithTermZero optime1(100, 1);
+ OpTimeWithTermZero optime2(100, 2);
+ // No secondary is caught up
+ getReplCoord()->setMyLastOptime(optime2);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
+
+ // stepDown where the secondary actually has to catch up before the stepDown can succeed
+ StepDownRunner runner(getReplCoord());
+ runner.setForce(false);
+ runner.setWaitTime(Milliseconds(10000));
+ runner.setStepDownTime(Milliseconds(60000));
+
+ simulateSuccessfulElection();
+
+ runner.start(&txn);
+
+ // Make a secondary actually catch up
+ enterNetwork();
+ getNet()->runUntil(getNet()->now() + Milliseconds(2000));
+ ASSERT(getNet()->hasReadyRequests());
+ NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
+ RemoteCommandRequest request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgs hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(hbArgs.getSetName());
hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(2);
+ hbResp.setConfigVersion(hbArgs.getConfigVersion());
+ hbResp.setOpTime(optime2);
BSONObjBuilder respObj;
respObj << "ok" << 1;
hbResp.addToBSON(&respObj, false);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
- reconfigThread.join();
- ASSERT_OK(status);
-
- // writeconcern feasability should be reevaluated and be satisfied
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_OK(statusAndDur.status);
- awaiter.reset();
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationMajority) {
- // Test that we can satisfy majority write concern can only be
- // statisfied by voting data-bearing members.
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2) <<
- BSON("host" << "node4:12345" <<
- "_id" << 3 <<
- "votes" << 0) <<
- BSON("host" << "node5:12345" <<
- "_id" << 4 <<
- "arbiterOnly" << true))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- OpTimeWithTermZero time(100, 0);
- getReplCoord()->setMyLastOptime(time);
- simulateSuccessfulElection();
-
- WriteConcernOptions majorityWriteConcern;
- majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- majorityWriteConcern.wMode = WriteConcernOptions::kMajority;
-
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
- getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
-
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time));
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
- getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
-
- // this member does not vote and as a result should not count towards write concern
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, time));
- ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
- getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
-
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time));
- ASSERT_OK(getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
- }
-
- TEST_F(ReplCoordTest, LastCommittedOpTime) {
- // Test that the commit level advances properly.
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2) <<
- BSON("host" << "node4:12345" <<
- "_id" << 3 <<
- "votes" << 0) <<
- BSON("host" << "node5:12345" <<
- "_id" << 4 <<
- "arbiterOnly" << true))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- OpTimeWithTermZero zero(0, 0);
- OpTimeWithTermZero time(100, 0);
- getReplCoord()->setMyLastOptime(time);
- simulateSuccessfulElection();
-
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time));
- ASSERT_EQUALS((OpTime)zero, getReplCoord()->getLastCommittedOpTime());
-
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, time));
- ASSERT_EQUALS((OpTime)zero, getReplCoord()->getLastCommittedOpTime());
-
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time));
- ASSERT_EQUALS((OpTime)time, getReplCoord()->getLastCommittedOpTime());
-
-
- // Set a new, later OpTime.
- OpTimeWithTermZero newTime = OpTimeWithTermZero(100, 1);
- getReplCoord()->setMyLastOptime(newTime);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, newTime));
- ASSERT_EQUALS((OpTime)time, getReplCoord()->getLastCommittedOpTime());
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, newTime));
- // Reached majority of voting nodes with newTime.
- ASSERT_EQUALS((OpTime)newTime, getReplCoord()->getLastCommittedOpTime());
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, newTime));
- ASSERT_EQUALS((OpTime)newTime, getReplCoord()->getLastCommittedOpTime());
- }
-
- TEST_F(ReplCoordTest, CantUseReadAfterIfNotReplSet) {
- init(ReplSettings());
- OperationContextNoop txn;
- auto result = getReplCoord()->waitUntilOpTime(&txn,
- ReadAfterOpTimeArgs(OpTimeWithTermZero(50, 0)));
-
- ASSERT_FALSE(result.didWait());
- ASSERT_EQUALS(ErrorCodes::NotAReplicaSet, result.getStatus());
- }
-
- TEST_F(ReplCoordTest, ReadAfterWhileShutdown) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0))),
- HostAndPort("node1", 12345));
-
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(10, 0));
-
- shutdown();
-
- auto result = getReplCoord()->waitUntilOpTime(&txn,
- ReadAfterOpTimeArgs(OpTimeWithTermZero(50, 0)));
-
- ASSERT_TRUE(result.didWait());
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, result.getStatus());
- }
-
- TEST_F(ReplCoordTest, ReadAfterInterrupted) {
- OperationContextReplMock txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0))),
- HostAndPort("node1", 12345));
-
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(10, 0));
-
- txn.setCheckForInterruptStatus(Status(ErrorCodes::Interrupted, "test"));
-
- auto result = getReplCoord()->waitUntilOpTime(&txn,
- ReadAfterOpTimeArgs(OpTimeWithTermZero(50, 0)));
-
- ASSERT_TRUE(result.didWait());
- ASSERT_EQUALS(ErrorCodes::Interrupted, result.getStatus());
- }
-
- TEST_F(ReplCoordTest, ReadAfterNoOpTime) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0))),
- HostAndPort("node1", 12345));
-
- auto result = getReplCoord()->waitUntilOpTime(&txn, ReadAfterOpTimeArgs());
-
- ASSERT_FALSE(result.didWait());
- ASSERT_OK(result.getStatus());
- }
-
- TEST_F(ReplCoordTest, ReadAfterGreaterOpTime) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0))),
- HostAndPort("node1", 12345));
-
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
- auto result = getReplCoord()->waitUntilOpTime(&txn,
- ReadAfterOpTimeArgs(OpTimeWithTermZero(50, 0)));
-
- ASSERT_TRUE(result.didWait());
- ASSERT_OK(result.getStatus());
- }
-
- TEST_F(ReplCoordTest, ReadAfterEqualOpTime) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0))),
- HostAndPort("node1", 12345));
-
-
- OpTimeWithTermZero time(100, 0);
- getReplCoord()->setMyLastOptime(time);
- auto result = getReplCoord()->waitUntilOpTime(&txn, ReadAfterOpTimeArgs(time));
-
- ASSERT_TRUE(result.didWait());
- ASSERT_OK(result.getStatus());
- }
-
- TEST_F(ReplCoordTest, ReadAfterDeferredGreaterOpTime) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0))),
- HostAndPort("node1", 12345));
-
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(0, 0));
-
- auto pseudoLogOp = std::async(std::launch::async, [this]() {
- // Not guaranteed to be scheduled after waitUnitl blocks...
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(200, 0));
- });
-
- auto result = getReplCoord()->waitUntilOpTime(&txn,
- ReadAfterOpTimeArgs(OpTimeWithTermZero(100, 0)));
- pseudoLogOp.get();
-
- ASSERT_TRUE(result.didWait());
- ASSERT_OK(result.getStatus());
- }
-
- TEST_F(ReplCoordTest, ReadAfterDeferredEqualOpTime) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0))),
- HostAndPort("node1", 12345));
-
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(0, 0));
-
- OpTimeWithTermZero opTimeToWait(100, 0);
-
- auto pseudoLogOp = std::async(std::launch::async, [this, &opTimeToWait]() {
- // Not guaranteed to be scheduled after waitUnitl blocks...
- getReplCoord()->setMyLastOptime(opTimeToWait);
- });
-
- auto result = getReplCoord()->waitUntilOpTime(&txn, ReadAfterOpTimeArgs(opTimeToWait));
- pseudoLogOp.get();
-
- ASSERT_TRUE(result.didWait());
- ASSERT_OK(result.getStatus());
+ getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(respObj.obj()));
+ }
+ while (getNet()->hasReadyRequests()) {
+ getNet()->blackHole(getNet()->getNextReadyRequest());
+ }
+ getNet()->runReadyNetworkOperations();
+ exitNetwork();
+
+ ASSERT_OK(runner.getResult());
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+}
+
+TEST_F(StepDownTest, InterruptStepDown) {
+ const unsigned int opID = 100;
+ OperationContextReplMock txn{opID};
+ OpTimeWithTermZero optime1(100, 1);
+ OpTimeWithTermZero optime2(100, 2);
+ // No secondary is caught up
+ getReplCoord()->setMyLastOptime(optime2);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
+
+ // stepDown where the secondary actually has to catch up before the stepDown can succeed
+ StepDownRunner runner(getReplCoord());
+ runner.setForce(false);
+ runner.setWaitTime(Milliseconds(10000));
+ runner.setStepDownTime(Milliseconds(60000));
+
+ simulateSuccessfulElection();
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
+ runner.start(&txn);
+
+ txn.setCheckForInterruptStatus(kInterruptedStatus);
+ getReplCoord()->interrupt(opID);
+
+ ASSERT_EQUALS(ErrorCodes::Interrupted, runner.getResult());
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+}
+
+TEST_F(ReplCoordTest, GetReplicationModeNone) {
+ init();
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, GetReplicationModeMaster) {
+ // modeMasterSlave if master set
+ ReplSettings settings;
+ settings.master = true;
+ init(settings);
+ ASSERT_EQUALS(ReplicationCoordinator::modeMasterSlave, getReplCoord()->getReplicationMode());
+}
+
+TEST_F(ReplCoordTest, GetReplicationModeSlave) {
+ // modeMasterSlave if the slave flag was set
+ ReplSettings settings;
+ settings.slave = SimpleSlave;
+ init(settings);
+ ASSERT_EQUALS(ReplicationCoordinator::modeMasterSlave, getReplCoord()->getReplicationMode());
+}
+
+TEST_F(ReplCoordTest, GetReplicationModeRepl) {
+ // modeReplSet if the set name was supplied.
+ ReplSettings settings;
+ settings.replSet = "mySet/node1:12345";
+ init(settings);
+ ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members" << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0))),
+ HostAndPort("node1", 12345));
+}
+
+TEST_F(ReplCoordTest, TestPrepareReplSetUpdatePositionCommand) {
+ OperationContextNoop txn;
+ init("mySet/test1:1234,test2:1234,test3:1234");
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234")
+ << BSON("_id" << 1 << "host"
+ << "test2:1234") << BSON("_id" << 2 << "host"
+ << "test3:1234"))),
+ HostAndPort("test1", 1234));
+ OpTimeWithTermZero optime1(100, 1);
+ OpTimeWithTermZero optime2(100, 2);
+ OpTimeWithTermZero optime3(2, 1);
+ getReplCoord()->setMyLastOptime(optime1);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime2));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime3));
+
+ // Check that the proper BSON is generated for the replSetUpdatePositionCommand
+ BSONObjBuilder cmdBuilder;
+ getReplCoord()->prepareReplSetUpdatePositionCommand(&cmdBuilder);
+ BSONObj cmd = cmdBuilder.done();
+
+ ASSERT_EQUALS(2, cmd.nFields());
+ ASSERT_EQUALS("replSetUpdatePosition", cmd.firstElement().fieldNameStringData());
+
+ std::set<long long> memberIds;
+ BSONForEach(entryElement, cmd["optimes"].Obj()) {
+ BSONObj entry = entryElement.Obj();
+ long long memberId = entry["memberId"].Number();
+ memberIds.insert(memberId);
+ if (memberId == 0) {
+ // TODO(siyuan) Update when we change replSetUpdatePosition format
+ ASSERT_EQUALS(optime1.timestamp, entry["optime"].timestamp());
+ } else if (memberId == 1) {
+ ASSERT_EQUALS(optime2.timestamp, entry["optime"].timestamp());
+ } else {
+ ASSERT_EQUALS(2, memberId);
+ ASSERT_EQUALS(optime3.timestamp, entry["optime"].timestamp());
+ }
}
-
- // TODO(schwerin): Unit test election id updating
+ ASSERT_EQUALS(3U, memberIds.size()); // Make sure we saw all 3 nodes
+}
+
+TEST_F(ReplCoordTest, SetMaintenanceMode) {
+ init("mySet/test1:1234,test2:1234,test3:1234");
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234")
+ << BSON("_id" << 1 << "host"
+ << "test2:1234") << BSON("_id" << 2 << "host"
+ << "test3:1234"))),
+ HostAndPort("test2", 1234));
+ OperationContextNoop txn;
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+
+ // Can't unset maintenance mode if it was never set to begin with.
+ Status status = getReplCoord()->setMaintenanceMode(false);
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+
+ // valid set
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
+ ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
+
+ // If we go into rollback while in maintenance mode, our state changes to RS_ROLLBACK.
+ getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK);
+ ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
+
+ // When we go back to SECONDARY, we still observe RECOVERING because of maintenance mode.
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
+
+ // Can set multiple times
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
+
+ // Need to unset the number of times you set
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
+ status = getReplCoord()->setMaintenanceMode(false);
+ // fourth one fails b/c we only set three times
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
+ // Unsetting maintenance mode changes our state to secondary if maintenance mode was
+ // the only thinking keeping us out of it.
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+
+ // From rollback, entering and exiting maintenance mode doesn't change perceived
+ // state.
+ getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK);
+ ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
+ ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
+ ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
+
+ // Rollback is sticky even if entered while in maintenance mode.
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
+ ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
+ getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK);
+ ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
+ ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+
+ // Can't modify maintenance mode when PRIMARY
+ simulateSuccessfulElection();
+
+ status = getReplCoord()->setMaintenanceMode(true);
+ ASSERT_EQUALS(ErrorCodes::NotSecondary, status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
+ simulateStepDownOnIsolation();
+
+ status = getReplCoord()->setMaintenanceMode(false);
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
+}
+
+TEST_F(ReplCoordTest, GetHostsWrittenToReplSet) {
+ HostAndPort myHost("node1:12345");
+ HostAndPort client1Host("node2:12345");
+ HostAndPort client2Host("node3:12345");
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host" << myHost.toString())
+ << BSON("_id" << 1 << "host" << client1Host.toString())
+ << BSON("_id" << 2 << "host" << client2Host.toString()))),
+ HostAndPort("node1", 12345));
+ OperationContextNoop txn;
+
+ OpTimeWithTermZero time1(100, 1);
+ OpTimeWithTermZero time2(100, 2);
+
+ getReplCoord()->setMyLastOptime(time2);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
+
+ std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
+ ASSERT_EQUALS(1U, caughtUpHosts.size());
+ ASSERT_EQUALS(myHost, caughtUpHosts[0]);
+
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time2));
+ caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
+ ASSERT_EQUALS(2U, caughtUpHosts.size());
+ if (myHost == caughtUpHosts[0]) {
+ ASSERT_EQUALS(client2Host, caughtUpHosts[1]);
+ } else {
+ ASSERT_EQUALS(client2Host, caughtUpHosts[0]);
+ ASSERT_EQUALS(myHost, caughtUpHosts[1]);
+ }
+}
+
+TEST_F(ReplCoordTest, GetHostsWrittenToMasterSlave) {
+ ReplSettings settings;
+ settings.master = true;
+ init(settings);
+ HostAndPort clientHost("node2:12345");
+ OperationContextNoop txn;
+
+ OID client = OID::gen();
+ OpTimeWithTermZero time1(100, 1);
+ OpTimeWithTermZero time2(100, 2);
+
+ getExternalState()->setClientHostAndPort(clientHost);
+ HandshakeArgs handshake;
+ ASSERT_OK(handshake.initialize(BSON("handshake" << client)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
+
+ getReplCoord()->setMyLastOptime(time2);
+ ASSERT_OK(getReplCoord()->setLastOptimeForSlave(client, time1.timestamp));
+
+ std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
+ ASSERT_EQUALS(0U, caughtUpHosts.size()); // self doesn't get included in master-slave
+
+ ASSERT_OK(getReplCoord()->setLastOptimeForSlave(client, time2.timestamp));
+ caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
+ ASSERT_EQUALS(1U, caughtUpHosts.size());
+ ASSERT_EQUALS(clientHost, caughtUpHosts[0]);
+}
+
+TEST_F(ReplCoordTest, GetOtherNodesInReplSetNoConfig) {
+ start();
+ ASSERT_EQUALS(0U, getReplCoord()->getOtherNodesInReplSet().size());
+}
+
+TEST_F(ReplCoordTest, GetOtherNodesInReplSet) {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h1")
+ << BSON("_id" << 1 << "host"
+ << "h2")
+ << BSON("_id" << 2 << "host"
+ << "h3"
+ << "priority" << 0 << "hidden" << true))),
+ HostAndPort("h1"));
+
+ std::vector<HostAndPort> otherNodes = getReplCoord()->getOtherNodesInReplSet();
+ ASSERT_EQUALS(2U, otherNodes.size());
+ if (otherNodes[0] == HostAndPort("h2")) {
+ ASSERT_EQUALS(HostAndPort("h3"), otherNodes[1]);
+ } else {
+ ASSERT_EQUALS(HostAndPort("h3"), otherNodes[0]);
+ ASSERT_EQUALS(HostAndPort("h2"), otherNodes[0]);
+ }
+}
+
+TEST_F(ReplCoordTest, IsMasterNoConfig) {
+ start();
+ IsMasterResponse response;
+
+ getReplCoord()->fillIsMasterForReplSet(&response);
+ ASSERT_FALSE(response.isConfigSet());
+ BSONObj responseObj = response.toBSON();
+ ASSERT_FALSE(responseObj["ismaster"].Bool());
+ ASSERT_FALSE(responseObj["secondary"].Bool());
+ ASSERT_TRUE(responseObj["isreplicaset"].Bool());
+ ASSERT_EQUALS("Does not have a valid replica set config", responseObj["info"].String());
+
+ IsMasterResponse roundTripped;
+ ASSERT_OK(roundTripped.initialize(response.toBSON()));
+}
+
+TEST_F(ReplCoordTest, IsMaster) {
+ HostAndPort h1("h1");
+ HostAndPort h2("h2");
+ HostAndPort h3("h3");
+ HostAndPort h4("h4");
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host" << h1.toString())
+ << BSON("_id" << 1 << "host" << h2.toString())
+ << BSON("_id" << 2 << "host" << h3.toString() << "arbiterOnly" << true)
+ << BSON("_id" << 3 << "host" << h4.toString() << "priority" << 0
+ << "tags" << BSON("key1"
+ << "value1"
+ << "key2"
+ << "value2")))),
+ h4);
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+
+ IsMasterResponse response;
+ getReplCoord()->fillIsMasterForReplSet(&response);
+
+ ASSERT_EQUALS("mySet", response.getReplSetName());
+ ASSERT_EQUALS(2, response.getReplSetVersion());
+ ASSERT_FALSE(response.isMaster());
+ ASSERT_TRUE(response.isSecondary());
+ // TODO(spencer): test that response includes current primary when there is one.
+ ASSERT_FALSE(response.isArbiterOnly());
+ ASSERT_TRUE(response.isPassive());
+ ASSERT_FALSE(response.isHidden());
+ ASSERT_TRUE(response.shouldBuildIndexes());
+ ASSERT_EQUALS(Seconds(0), response.getSlaveDelay());
+ ASSERT_EQUALS(h4, response.getMe());
+
+ std::vector<HostAndPort> hosts = response.getHosts();
+ ASSERT_EQUALS(2U, hosts.size());
+ if (hosts[0] == h1) {
+ ASSERT_EQUALS(h2, hosts[1]);
+ } else {
+ ASSERT_EQUALS(h2, hosts[0]);
+ ASSERT_EQUALS(h1, hosts[1]);
+ }
+ std::vector<HostAndPort> passives = response.getPassives();
+ ASSERT_EQUALS(1U, passives.size());
+ ASSERT_EQUALS(h4, passives[0]);
+ std::vector<HostAndPort> arbiters = response.getArbiters();
+ ASSERT_EQUALS(1U, arbiters.size());
+ ASSERT_EQUALS(h3, arbiters[0]);
+
+ unordered_map<std::string, std::string> tags = response.getTags();
+ ASSERT_EQUALS(2U, tags.size());
+ ASSERT_EQUALS("value1", tags["key1"]);
+ ASSERT_EQUALS("value2", tags["key2"]);
+
+ IsMasterResponse roundTripped;
+ ASSERT_OK(roundTripped.initialize(response.toBSON()));
+}
+
+TEST_F(ReplCoordTest, ShutDownBeforeStartUpFinished) {
+ init();
+ startCapturingLogMessages();
+ getReplCoord()->shutdown();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1,
+ countLogLinesContaining("shutdown() called before startReplication() finished"));
+}
+
+TEST_F(ReplCoordTest, UpdatePositionWithConfigVersionAndMemberIdTest) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ simulateSuccessfulElection();
+
+ OpTimeWithTermZero time1(100, 1);
+ OpTimeWithTermZero time2(100, 2);
+ OpTimeWithTermZero staleTime(10, 0);
+ getReplCoord()->setMyLastOptime(time1);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 1;
+
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
+ getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+
+ // receive updatePosition containing ourself, should not process the update for self
+ UpdatePositionArgs args;
+ ASSERT_OK(args.initialize(BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 0 << "optime"
+ << time2.timestamp)))));
+
+ ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args, 0));
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
+ getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+
+ // receive updatePosition with incorrect config version
+ UpdatePositionArgs args2;
+ ASSERT_OK(args2.initialize(BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(BSON("cfgver" << 3 << "memberId" << 1 << "optime"
+ << time2.timestamp)))));
+
+ long long cfgver;
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetUpdatePosition(args2, &cfgver));
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
+ getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+
+ // receive updatePosition with nonexistent member id
+ UpdatePositionArgs args3;
+ ASSERT_OK(args3.initialize(BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 9 << "optime"
+ << time2.timestamp)))));
+
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, getReplCoord()->processReplSetUpdatePosition(args3, 0));
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
+ getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+
+ // receive a good update position
+ getReplCoord()->setMyLastOptime(time2);
+ UpdatePositionArgs args4;
+ ASSERT_OK(args4.initialize(
+ BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(
+ BSON("cfgver" << 2 << "memberId" << 1 << "optime" << time2.timestamp)
+ << BSON("cfgver" << 2 << "memberId" << 2 << "optime" << time2.timestamp)))));
+
+ ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args4, 0));
+ ASSERT_OK(getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+
+ writeConcern.wNumNodes = 3;
+ ASSERT_OK(getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+}
+
+void doReplSetReconfig(ReplicationCoordinatorImpl* replCoord, Status* status) {
+ OperationContextNoop txn;
+ BSONObjBuilder garbage;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"
+ << "priority" << 3)
+ << BSON("_id" << 1 << "host"
+ << "node2:12345")
+ << BSON("_id" << 2 << "host"
+ << "node3:12345")));
+ *status = replCoord->processReplSetReconfig(&txn, args, &garbage);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationReconfigSimple) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 2));
+ simulateSuccessfulElection();
+
+ OpTimeWithTermZero time(100, 2);
+
+ // 3 nodes waiting for time
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wNumNodes = 3;
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+ awaiter.setOpTime(time);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+
+ // reconfig
+ Status status(ErrorCodes::InternalError, "Not Set");
+ stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
+
+ NetworkInterfaceMock* net = getNet();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ repl::ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ repl::ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(2);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj, false);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+ reconfigThread.join();
+ ASSERT_OK(status);
+
+ // satisfy write concern
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(3, 0, time));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(3, 1, time));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(3, 2, time));
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_OK(statusAndDur.status);
+ awaiter.reset();
+}
+
+void doReplSetReconfigToFewer(ReplicationCoordinatorImpl* replCoord, Status* status) {
+ OperationContextNoop txn;
+ BSONObjBuilder garbage;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node3:12345")));
+ *status = replCoord->processReplSetReconfig(&txn, args, &garbage);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationReconfigNodeCountExceedsNumberOfNodes) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 2));
+ simulateSuccessfulElection();
+
+ OpTimeWithTermZero time(100, 2);
+
+ // 3 nodes waiting for time
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wNumNodes = 3;
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+ awaiter.setOpTime(time);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+
+ // reconfig to fewer nodes
+ Status status(ErrorCodes::InternalError, "Not Set");
+ stdx::thread reconfigThread(stdx::bind(doReplSetReconfigToFewer, getReplCoord(), &status));
+
+ NetworkInterfaceMock* net = getNet();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ repl::ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ repl::ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(2);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj, false);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+ reconfigThread.join();
+ ASSERT_OK(status);
+ std::cout << "asdf" << std::endl;
+
+ // writeconcern feasability should be reevaluated and an error should be returned
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern, statusAndDur.status);
+ awaiter.reset();
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationReconfigToSmallerMajority) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3) << BSON("host"
+ << "node5:12345"
+ << "_id" << 4))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 1));
+ simulateSuccessfulElection();
+
+ OpTimeWithTermZero time(100, 2);
+
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time));
+
+
+ // majority nodes waiting for time
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wMode = WriteConcernOptions::kMajority;
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+ awaiter.setOpTime(time);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+
+ // demonstrate that majority cannot currently be satisfied
+ WriteConcernOptions writeConcern2;
+ writeConcern2.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern2.wMode = WriteConcernOptions::kMajority;
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
+ getReplCoord()->awaitReplication(&txn, time, writeConcern2).status);
+
+ // reconfig to three nodes
+ Status status(ErrorCodes::InternalError, "Not Set");
+ stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
+
+ NetworkInterfaceMock* net = getNet();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ repl::ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ repl::ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(2);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj, false);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+ reconfigThread.join();
+ ASSERT_OK(status);
+
+ // writeconcern feasability should be reevaluated and be satisfied
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_OK(statusAndDur.status);
+ awaiter.reset();
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationMajority) {
+ // Test that we can satisfy majority write concern can only be
+ // statisfied by voting data-bearing members.
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3 << "votes" << 0)
+ << BSON("host"
+ << "node5:12345"
+ << "_id" << 4 << "arbiterOnly" << true))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ OpTimeWithTermZero time(100, 0);
+ getReplCoord()->setMyLastOptime(time);
+ simulateSuccessfulElection();
+
+ WriteConcernOptions majorityWriteConcern;
+ majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ majorityWriteConcern.wMode = WriteConcernOptions::kMajority;
+
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
+ getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
+
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time));
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
+ getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
+
+ // this member does not vote and as a result should not count towards write concern
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, time));
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
+ getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
+
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time));
+ ASSERT_OK(getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
+}
+
+TEST_F(ReplCoordTest, LastCommittedOpTime) {
+ // Test that the commit level advances properly.
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3 << "votes" << 0)
+ << BSON("host"
+ << "node5:12345"
+ << "_id" << 4 << "arbiterOnly" << true))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ OpTimeWithTermZero zero(0, 0);
+ OpTimeWithTermZero time(100, 0);
+ getReplCoord()->setMyLastOptime(time);
+ simulateSuccessfulElection();
+
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time));
+ ASSERT_EQUALS((OpTime)zero, getReplCoord()->getLastCommittedOpTime());
+
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, time));
+ ASSERT_EQUALS((OpTime)zero, getReplCoord()->getLastCommittedOpTime());
+
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time));
+ ASSERT_EQUALS((OpTime)time, getReplCoord()->getLastCommittedOpTime());
+
+
+ // Set a new, later OpTime.
+ OpTimeWithTermZero newTime = OpTimeWithTermZero(100, 1);
+ getReplCoord()->setMyLastOptime(newTime);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, newTime));
+ ASSERT_EQUALS((OpTime)time, getReplCoord()->getLastCommittedOpTime());
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, newTime));
+ // Reached majority of voting nodes with newTime.
+ ASSERT_EQUALS((OpTime)newTime, getReplCoord()->getLastCommittedOpTime());
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, newTime));
+ ASSERT_EQUALS((OpTime)newTime, getReplCoord()->getLastCommittedOpTime());
+}
+
+TEST_F(ReplCoordTest, CantUseReadAfterIfNotReplSet) {
+ init(ReplSettings());
+ OperationContextNoop txn;
+ auto result =
+ getReplCoord()->waitUntilOpTime(&txn, ReadAfterOpTimeArgs(OpTimeWithTermZero(50, 0)));
+
+ ASSERT_FALSE(result.didWait());
+ ASSERT_EQUALS(ErrorCodes::NotAReplicaSet, result.getStatus());
+}
+
+TEST_F(ReplCoordTest, ReadAfterWhileShutdown) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members" << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0))),
+ HostAndPort("node1", 12345));
+
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(10, 0));
+
+ shutdown();
+
+ auto result =
+ getReplCoord()->waitUntilOpTime(&txn, ReadAfterOpTimeArgs(OpTimeWithTermZero(50, 0)));
+
+ ASSERT_TRUE(result.didWait());
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, result.getStatus());
+}
+
+TEST_F(ReplCoordTest, ReadAfterInterrupted) {
+ OperationContextReplMock txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members" << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0))),
+ HostAndPort("node1", 12345));
+
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(10, 0));
+
+ txn.setCheckForInterruptStatus(Status(ErrorCodes::Interrupted, "test"));
+
+ auto result =
+ getReplCoord()->waitUntilOpTime(&txn, ReadAfterOpTimeArgs(OpTimeWithTermZero(50, 0)));
+
+ ASSERT_TRUE(result.didWait());
+ ASSERT_EQUALS(ErrorCodes::Interrupted, result.getStatus());
+}
+
+TEST_F(ReplCoordTest, ReadAfterNoOpTime) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members" << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0))),
+ HostAndPort("node1", 12345));
+
+ auto result = getReplCoord()->waitUntilOpTime(&txn, ReadAfterOpTimeArgs());
+
+ ASSERT_FALSE(result.didWait());
+ ASSERT_OK(result.getStatus());
+}
+
+TEST_F(ReplCoordTest, ReadAfterGreaterOpTime) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members" << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0))),
+ HostAndPort("node1", 12345));
+
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ auto result =
+ getReplCoord()->waitUntilOpTime(&txn, ReadAfterOpTimeArgs(OpTimeWithTermZero(50, 0)));
+
+ ASSERT_TRUE(result.didWait());
+ ASSERT_OK(result.getStatus());
+}
+
+TEST_F(ReplCoordTest, ReadAfterEqualOpTime) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members" << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0))),
+ HostAndPort("node1", 12345));
+
+
+ OpTimeWithTermZero time(100, 0);
+ getReplCoord()->setMyLastOptime(time);
+ auto result = getReplCoord()->waitUntilOpTime(&txn, ReadAfterOpTimeArgs(time));
+
+ ASSERT_TRUE(result.didWait());
+ ASSERT_OK(result.getStatus());
+}
+
+TEST_F(ReplCoordTest, ReadAfterDeferredGreaterOpTime) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members" << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0))),
+ HostAndPort("node1", 12345));
+
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(0, 0));
+
+ auto pseudoLogOp = std::async(std::launch::async,
+ [this]() {
+ // Not guaranteed to be scheduled after waitUnitl blocks...
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(200, 0));
+ });
+
+ auto result =
+ getReplCoord()->waitUntilOpTime(&txn, ReadAfterOpTimeArgs(OpTimeWithTermZero(100, 0)));
+ pseudoLogOp.get();
+
+ ASSERT_TRUE(result.didWait());
+ ASSERT_OK(result.getStatus());
+}
+
+TEST_F(ReplCoordTest, ReadAfterDeferredEqualOpTime) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members" << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0))),
+ HostAndPort("node1", 12345));
+
+ getReplCoord()->setMyLastOptime(OpTimeWithTermZero(0, 0));
+
+ OpTimeWithTermZero opTimeToWait(100, 0);
+
+ auto pseudoLogOp = std::async(std::launch::async,
+ [this, &opTimeToWait]() {
+ // Not guaranteed to be scheduled after waitUnitl blocks...
+ getReplCoord()->setMyLastOptime(opTimeToWait);
+ });
+
+ auto result = getReplCoord()->waitUntilOpTime(&txn, ReadAfterOpTimeArgs(opTimeToWait));
+ pseudoLogOp.get();
+
+ ASSERT_TRUE(result.didWait());
+ ASSERT_OK(result.getStatus());
+}
+
+// TODO(schwerin): Unit test election id updating
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp
index 9d739707925..19ee9730cae 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_mock.cpp
@@ -41,315 +41,312 @@
namespace mongo {
namespace repl {
- using std::vector;
+using std::vector;
+
+ReplicationCoordinatorMock::ReplicationCoordinatorMock(const ReplSettings& settings)
+ : _settings(settings) {}
+ReplicationCoordinatorMock::~ReplicationCoordinatorMock() {}
+
+void ReplicationCoordinatorMock::startReplication(OperationContext* txn) {
+ // TODO
+}
+
+void ReplicationCoordinatorMock::shutdown() {
+ // TODO
+}
+
+const ReplSettings& ReplicationCoordinatorMock::getSettings() const {
+ return _settings;
+}
+
+bool ReplicationCoordinatorMock::isReplEnabled() const {
+ return _settings.usingReplSets() || _settings.master || _settings.slave;
+}
+
+ReplicationCoordinator::Mode ReplicationCoordinatorMock::getReplicationMode() const {
+ if (_settings.usingReplSets()) {
+ return modeReplSet;
+ }
+ if (_settings.master || _settings.slave) {
+ return modeMasterSlave;
+ }
+ return modeNone;
+}
+
+MemberState ReplicationCoordinatorMock::getMemberState() const {
+ return _memberState;
+}
+
+bool ReplicationCoordinatorMock::isInPrimaryOrSecondaryState() const {
+ invariant(false);
+}
+
+Seconds ReplicationCoordinatorMock::getSlaveDelaySecs() const {
+ return Seconds(0);
+}
+
+void ReplicationCoordinatorMock::clearSyncSourceBlacklist() {}
+
+ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorMock::awaitReplication(
+ OperationContext* txn, const OpTime& opTime, const WriteConcernOptions& writeConcern) {
+ // TODO
+ return StatusAndDuration(Status::OK(), Milliseconds(0));
+}
+
+ReplicationCoordinator::StatusAndDuration
+ReplicationCoordinatorMock::awaitReplicationOfLastOpForClient(
+ OperationContext* txn, const WriteConcernOptions& writeConcern) {
+ return StatusAndDuration(Status::OK(), Milliseconds(0));
+}
+
+Status ReplicationCoordinatorMock::stepDown(OperationContext* txn,
+ bool force,
+ const Milliseconds& waitTime,
+ const Milliseconds& stepdownTime) {
+ return Status::OK();
+}
- ReplicationCoordinatorMock::ReplicationCoordinatorMock(const ReplSettings& settings) :
- _settings(settings) {}
- ReplicationCoordinatorMock::~ReplicationCoordinatorMock() {}
+bool ReplicationCoordinatorMock::isMasterForReportingPurposes() {
+ // TODO
+ return true;
+}
- void ReplicationCoordinatorMock::startReplication(OperationContext* txn) {
- // TODO
- }
-
- void ReplicationCoordinatorMock::shutdown() {
- // TODO
- }
-
- const ReplSettings& ReplicationCoordinatorMock::getSettings() const {
- return _settings;
- }
-
- bool ReplicationCoordinatorMock::isReplEnabled() const {
- return _settings.usingReplSets() || _settings.master || _settings.slave;
- }
-
- ReplicationCoordinator::Mode ReplicationCoordinatorMock::getReplicationMode() const {
- if (_settings.usingReplSets()) {
- return modeReplSet;
- }
- if (_settings.master || _settings.slave) {
- return modeMasterSlave;
- }
- return modeNone;
- }
-
- MemberState ReplicationCoordinatorMock::getMemberState() const {
- return _memberState;
- }
-
- bool ReplicationCoordinatorMock::isInPrimaryOrSecondaryState() const {
- invariant(false);
- }
-
- Seconds ReplicationCoordinatorMock::getSlaveDelaySecs() const {
- return Seconds(0);
- }
-
- void ReplicationCoordinatorMock::clearSyncSourceBlacklist() {}
-
- ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorMock::awaitReplication(
- OperationContext* txn,
- const OpTime& opTime,
- const WriteConcernOptions& writeConcern) {
- // TODO
- return StatusAndDuration(Status::OK(), Milliseconds(0));
- }
-
- ReplicationCoordinator::StatusAndDuration
- ReplicationCoordinatorMock::awaitReplicationOfLastOpForClient(
- OperationContext* txn,
- const WriteConcernOptions& writeConcern) {
- return StatusAndDuration(Status::OK(), Milliseconds(0));
- }
-
- Status ReplicationCoordinatorMock::stepDown(OperationContext* txn,
- bool force,
- const Milliseconds& waitTime,
- const Milliseconds& stepdownTime) {
- return Status::OK();
- }
-
- bool ReplicationCoordinatorMock::isMasterForReportingPurposes() {
- // TODO
- return true;
- }
-
- bool ReplicationCoordinatorMock::canAcceptWritesForDatabase(StringData dbName) {
- // TODO
- return true;
- }
-
- bool ReplicationCoordinatorMock::canAcceptWritesFor(const NamespaceString& ns) {
- // TODO
- return canAcceptWritesForDatabase(ns.db());
- }
-
- Status ReplicationCoordinatorMock::checkCanServeReadsFor(OperationContext* txn,
- const NamespaceString& ns,
- bool slaveOk) {
- // TODO
- return Status::OK();
- }
-
- bool ReplicationCoordinatorMock::shouldIgnoreUniqueIndex(const IndexDescriptor* idx) {
- // TODO
- return false;
- }
-
- Status ReplicationCoordinatorMock::setLastOptimeForSlave(const OID& rid, const Timestamp& ts) {
- return Status::OK();
- }
-
- void ReplicationCoordinatorMock::setMyHeartbeatMessage(const std::string& msg) {
- // TODO
- }
-
- void ReplicationCoordinatorMock::setMyLastOptime(const OpTime& opTime) {
- _myLastOpTime = opTime;
- }
-
- void ReplicationCoordinatorMock::resetMyLastOptime() {
- _myLastOpTime = OpTime();
- }
-
- OpTime ReplicationCoordinatorMock::getMyLastOptime() const {
- return _myLastOpTime;
- }
-
- ReadAfterOpTimeResponse ReplicationCoordinatorMock::waitUntilOpTime(
- OperationContext* txn,
- const ReadAfterOpTimeArgs& settings) {
- return ReadAfterOpTimeResponse();
- }
-
-
- OID ReplicationCoordinatorMock::getElectionId() {
- // TODO
- return OID();
- }
-
- OID ReplicationCoordinatorMock::getMyRID() const {
- return OID();
- }
-
- int ReplicationCoordinatorMock::getMyId() const {
- return 0;
- }
-
- bool ReplicationCoordinatorMock::setFollowerMode(const MemberState& newState) {
- _memberState = newState;
- return true;
- }
-
- bool ReplicationCoordinatorMock::isWaitingForApplierToDrain() {
- return false;
- }
-
- void ReplicationCoordinatorMock::signalDrainComplete(OperationContext*) {}
-
- void ReplicationCoordinatorMock::signalUpstreamUpdater() {}
-
- bool ReplicationCoordinatorMock::prepareReplSetUpdatePositionCommand(
- BSONObjBuilder* cmdBuilder) {
- cmdBuilder->append("replSetUpdatePosition", 1);
- return true;
- }
-
- ReplicaSetConfig ReplicationCoordinatorMock::getConfig() const {
- return ReplicaSetConfig();
- }
-
- void ReplicationCoordinatorMock::processReplSetGetConfig(BSONObjBuilder* result) {
- // TODO
- }
-
- Status ReplicationCoordinatorMock::processReplSetGetStatus(BSONObjBuilder* result) {
- return Status::OK();
- }
-
- void ReplicationCoordinatorMock::fillIsMasterForReplSet(IsMasterResponse* result) {}
-
- void ReplicationCoordinatorMock::appendSlaveInfoData(BSONObjBuilder* result) {}
-
- Status ReplicationCoordinatorMock::setMaintenanceMode(bool activate) {
- return Status::OK();
- }
-
- bool ReplicationCoordinatorMock::getMaintenanceMode() {
- return false;
- }
-
- Status ReplicationCoordinatorMock::processReplSetSyncFrom(const HostAndPort& target,
- BSONObjBuilder* resultObj) {
- // TODO
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processReplSetFreeze(int secs, BSONObjBuilder* resultObj) {
- // TODO
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processHeartbeat(const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response) {
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processReplSetReconfig(OperationContext* txn,
- const ReplSetReconfigArgs& args,
- BSONObjBuilder* resultObj) {
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processReplSetInitiate(OperationContext* txn,
- const BSONObj& configObj,
- BSONObjBuilder* resultObj) {
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processReplSetGetRBID(BSONObjBuilder* resultObj) {
- return Status::OK();
- }
-
- void ReplicationCoordinatorMock::incrementRollbackID() {}
-
- Status ReplicationCoordinatorMock::processReplSetFresh(const ReplSetFreshArgs& args,
- BSONObjBuilder* resultObj) {
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processReplSetElect(const ReplSetElectArgs& args,
- BSONObjBuilder* resultObj) {
- // TODO
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processReplSetUpdatePosition(
- const UpdatePositionArgs& updates, long long* configVersion) {
- // TODO
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processHandshake(OperationContext* txn,
- const HandshakeArgs& handshake) {
- return Status::OK();
- }
-
- bool ReplicationCoordinatorMock::buildsIndexes() {
- // TODO
- return true;
- }
-
- std::vector<HostAndPort> ReplicationCoordinatorMock::getHostsWrittenTo(const OpTime& op) {
- return std::vector<HostAndPort>();
- }
-
- vector<HostAndPort> ReplicationCoordinatorMock::getOtherNodesInReplSet() const {
- return std::vector<HostAndPort>();
- }
-
- Status ReplicationCoordinatorMock::checkIfWriteConcernCanBeSatisfied(
- const WriteConcernOptions& writeConcern) const {
- return Status::OK();
- }
-
- WriteConcernOptions ReplicationCoordinatorMock::getGetLastErrorDefault() {
- return WriteConcernOptions();
- }
-
- Status ReplicationCoordinatorMock::checkReplEnabledForCommand(BSONObjBuilder* result) {
- // TODO
- return Status::OK();
- }
-
- HostAndPort ReplicationCoordinatorMock::chooseNewSyncSource() {
- return HostAndPort();
- }
-
- void ReplicationCoordinatorMock::blacklistSyncSource(const HostAndPort& host, Date_t until) {
- }
-
- void ReplicationCoordinatorMock::resetLastOpTimeFromOplog(OperationContext* txn) {
- invariant(false);
- }
-
- bool ReplicationCoordinatorMock::shouldChangeSyncSource(const HostAndPort& currentSource) {
- invariant(false);
- }
-
- OpTime ReplicationCoordinatorMock::getLastCommittedOpTime() const {
- return OpTime();
- }
-
- Status ReplicationCoordinatorMock::processReplSetRequestVotes(
- OperationContext* txn,
- const ReplSetRequestVotesArgs& args,
- ReplSetRequestVotesResponse* response) {
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processReplSetDeclareElectionWinner(
- const ReplSetDeclareElectionWinnerArgs& args,
- long long* responseTerm) {
- return Status::OK();
- }
-
- void ReplicationCoordinatorMock::prepareCursorResponseInfo(BSONObjBuilder* objBuilder) {}
-
- Status ReplicationCoordinatorMock::processHeartbeatV1(const ReplSetHeartbeatArgsV1& args,
- ReplSetHeartbeatResponse* response) {
- return Status::OK();
- }
-
- bool ReplicationCoordinatorMock::isV1ElectionProtocol() {
- return true;
- }
-
- void ReplicationCoordinatorMock::summarizeAsHtml(ReplSetHtmlSummary* output) {}
-
- long long ReplicationCoordinatorMock::getTerm() { return OpTime::kDefaultTerm; }
-
- bool ReplicationCoordinatorMock::updateTerm(long long term) { return false; }
+bool ReplicationCoordinatorMock::canAcceptWritesForDatabase(StringData dbName) {
+ // TODO
+ return true;
+}
-} // namespace repl
-} // namespace mongo
+bool ReplicationCoordinatorMock::canAcceptWritesFor(const NamespaceString& ns) {
+ // TODO
+ return canAcceptWritesForDatabase(ns.db());
+}
+
+Status ReplicationCoordinatorMock::checkCanServeReadsFor(OperationContext* txn,
+ const NamespaceString& ns,
+ bool slaveOk) {
+ // TODO
+ return Status::OK();
+}
+
+bool ReplicationCoordinatorMock::shouldIgnoreUniqueIndex(const IndexDescriptor* idx) {
+ // TODO
+ return false;
+}
+
+Status ReplicationCoordinatorMock::setLastOptimeForSlave(const OID& rid, const Timestamp& ts) {
+ return Status::OK();
+}
+
+void ReplicationCoordinatorMock::setMyHeartbeatMessage(const std::string& msg) {
+ // TODO
+}
+
+void ReplicationCoordinatorMock::setMyLastOptime(const OpTime& opTime) {
+ _myLastOpTime = opTime;
+}
+
+void ReplicationCoordinatorMock::resetMyLastOptime() {
+ _myLastOpTime = OpTime();
+}
+
+OpTime ReplicationCoordinatorMock::getMyLastOptime() const {
+ return _myLastOpTime;
+}
+
+ReadAfterOpTimeResponse ReplicationCoordinatorMock::waitUntilOpTime(
+ OperationContext* txn, const ReadAfterOpTimeArgs& settings) {
+ return ReadAfterOpTimeResponse();
+}
+
+
+OID ReplicationCoordinatorMock::getElectionId() {
+ // TODO
+ return OID();
+}
+
+OID ReplicationCoordinatorMock::getMyRID() const {
+ return OID();
+}
+
+int ReplicationCoordinatorMock::getMyId() const {
+ return 0;
+}
+
+bool ReplicationCoordinatorMock::setFollowerMode(const MemberState& newState) {
+ _memberState = newState;
+ return true;
+}
+
+bool ReplicationCoordinatorMock::isWaitingForApplierToDrain() {
+ return false;
+}
+
+void ReplicationCoordinatorMock::signalDrainComplete(OperationContext*) {}
+
+void ReplicationCoordinatorMock::signalUpstreamUpdater() {}
+
+bool ReplicationCoordinatorMock::prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) {
+ cmdBuilder->append("replSetUpdatePosition", 1);
+ return true;
+}
+
+ReplicaSetConfig ReplicationCoordinatorMock::getConfig() const {
+ return ReplicaSetConfig();
+}
+
+void ReplicationCoordinatorMock::processReplSetGetConfig(BSONObjBuilder* result) {
+ // TODO
+}
+
+Status ReplicationCoordinatorMock::processReplSetGetStatus(BSONObjBuilder* result) {
+ return Status::OK();
+}
+
+void ReplicationCoordinatorMock::fillIsMasterForReplSet(IsMasterResponse* result) {}
+
+void ReplicationCoordinatorMock::appendSlaveInfoData(BSONObjBuilder* result) {}
+
+Status ReplicationCoordinatorMock::setMaintenanceMode(bool activate) {
+ return Status::OK();
+}
+
+bool ReplicationCoordinatorMock::getMaintenanceMode() {
+ return false;
+}
+
+Status ReplicationCoordinatorMock::processReplSetSyncFrom(const HostAndPort& target,
+ BSONObjBuilder* resultObj) {
+ // TODO
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processReplSetFreeze(int secs, BSONObjBuilder* resultObj) {
+ // TODO
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processHeartbeat(const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response) {
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processReplSetReconfig(OperationContext* txn,
+ const ReplSetReconfigArgs& args,
+ BSONObjBuilder* resultObj) {
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processReplSetInitiate(OperationContext* txn,
+ const BSONObj& configObj,
+ BSONObjBuilder* resultObj) {
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processReplSetGetRBID(BSONObjBuilder* resultObj) {
+ return Status::OK();
+}
+
+void ReplicationCoordinatorMock::incrementRollbackID() {}
+
+Status ReplicationCoordinatorMock::processReplSetFresh(const ReplSetFreshArgs& args,
+ BSONObjBuilder* resultObj) {
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processReplSetElect(const ReplSetElectArgs& args,
+ BSONObjBuilder* resultObj) {
+ // TODO
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processReplSetUpdatePosition(const UpdatePositionArgs& updates,
+ long long* configVersion) {
+ // TODO
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processHandshake(OperationContext* txn,
+ const HandshakeArgs& handshake) {
+ return Status::OK();
+}
+
+bool ReplicationCoordinatorMock::buildsIndexes() {
+ // TODO
+ return true;
+}
+
+std::vector<HostAndPort> ReplicationCoordinatorMock::getHostsWrittenTo(const OpTime& op) {
+ return std::vector<HostAndPort>();
+}
+
+vector<HostAndPort> ReplicationCoordinatorMock::getOtherNodesInReplSet() const {
+ return std::vector<HostAndPort>();
+}
+
+Status ReplicationCoordinatorMock::checkIfWriteConcernCanBeSatisfied(
+ const WriteConcernOptions& writeConcern) const {
+ return Status::OK();
+}
+
+WriteConcernOptions ReplicationCoordinatorMock::getGetLastErrorDefault() {
+ return WriteConcernOptions();
+}
+
+Status ReplicationCoordinatorMock::checkReplEnabledForCommand(BSONObjBuilder* result) {
+ // TODO
+ return Status::OK();
+}
+
+HostAndPort ReplicationCoordinatorMock::chooseNewSyncSource() {
+ return HostAndPort();
+}
+
+void ReplicationCoordinatorMock::blacklistSyncSource(const HostAndPort& host, Date_t until) {}
+
+void ReplicationCoordinatorMock::resetLastOpTimeFromOplog(OperationContext* txn) {
+ invariant(false);
+}
+
+bool ReplicationCoordinatorMock::shouldChangeSyncSource(const HostAndPort& currentSource) {
+ invariant(false);
+}
+
+OpTime ReplicationCoordinatorMock::getLastCommittedOpTime() const {
+ return OpTime();
+}
+
+Status ReplicationCoordinatorMock::processReplSetRequestVotes(
+ OperationContext* txn,
+ const ReplSetRequestVotesArgs& args,
+ ReplSetRequestVotesResponse* response) {
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processReplSetDeclareElectionWinner(
+ const ReplSetDeclareElectionWinnerArgs& args, long long* responseTerm) {
+ return Status::OK();
+}
+
+void ReplicationCoordinatorMock::prepareCursorResponseInfo(BSONObjBuilder* objBuilder) {}
+
+Status ReplicationCoordinatorMock::processHeartbeatV1(const ReplSetHeartbeatArgsV1& args,
+ ReplSetHeartbeatResponse* response) {
+ return Status::OK();
+}
+
+bool ReplicationCoordinatorMock::isV1ElectionProtocol() {
+ return true;
+}
+
+void ReplicationCoordinatorMock::summarizeAsHtml(ReplSetHtmlSummary* output) {}
+
+long long ReplicationCoordinatorMock::getTerm() {
+ return OpTime::kDefaultTerm;
+}
+
+bool ReplicationCoordinatorMock::updateTerm(long long term) {
+ return false;
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index 2f404b3d157..2856878bd6f 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -35,187 +35,176 @@
namespace mongo {
namespace repl {
- /**
- * A mock ReplicationCoordinator. Currently it is extremely simple and exists solely to link
- * into dbtests.
- */
- class ReplicationCoordinatorMock : public ReplicationCoordinator {
- MONGO_DISALLOW_COPYING(ReplicationCoordinatorMock);
-
- public:
-
- ReplicationCoordinatorMock(const ReplSettings& settings);
- virtual ~ReplicationCoordinatorMock();
+/**
+ * A mock ReplicationCoordinator. Currently it is extremely simple and exists solely to link
+ * into dbtests.
+ */
+class ReplicationCoordinatorMock : public ReplicationCoordinator {
+ MONGO_DISALLOW_COPYING(ReplicationCoordinatorMock);
- virtual void startReplication(OperationContext* txn);
+public:
+ ReplicationCoordinatorMock(const ReplSettings& settings);
+ virtual ~ReplicationCoordinatorMock();
- virtual void shutdown();
+ virtual void startReplication(OperationContext* txn);
- virtual const ReplSettings& getSettings() const;
+ virtual void shutdown();
- virtual bool isReplEnabled() const;
+ virtual const ReplSettings& getSettings() const;
- virtual Mode getReplicationMode() const;
+ virtual bool isReplEnabled() const;
- virtual MemberState getMemberState() const;
+ virtual Mode getReplicationMode() const;
- virtual bool isInPrimaryOrSecondaryState() const;
+ virtual MemberState getMemberState() const;
- virtual Seconds getSlaveDelaySecs() const;
+ virtual bool isInPrimaryOrSecondaryState() const;
- virtual void clearSyncSourceBlacklist();
+ virtual Seconds getSlaveDelaySecs() const;
- virtual ReplicationCoordinator::StatusAndDuration awaitReplication(
- OperationContext* txn,
- const OpTime& opTime,
- const WriteConcernOptions& writeConcern);
+ virtual void clearSyncSourceBlacklist();
- virtual ReplicationCoordinator::StatusAndDuration awaitReplicationOfLastOpForClient(
- OperationContext* txn,
- const WriteConcernOptions& writeConcern);
+ virtual ReplicationCoordinator::StatusAndDuration awaitReplication(
+ OperationContext* txn, const OpTime& opTime, const WriteConcernOptions& writeConcern);
- virtual Status stepDown(OperationContext* txn,
- bool force,
- const Milliseconds& waitTime,
- const Milliseconds& stepdownTime);
+ virtual ReplicationCoordinator::StatusAndDuration awaitReplicationOfLastOpForClient(
+ OperationContext* txn, const WriteConcernOptions& writeConcern);
- virtual bool isMasterForReportingPurposes();
+ virtual Status stepDown(OperationContext* txn,
+ bool force,
+ const Milliseconds& waitTime,
+ const Milliseconds& stepdownTime);
- virtual bool canAcceptWritesForDatabase(StringData dbName);
+ virtual bool isMasterForReportingPurposes();
- bool canAcceptWritesFor(const NamespaceString& ns) override;
+ virtual bool canAcceptWritesForDatabase(StringData dbName);
- virtual Status checkIfWriteConcernCanBeSatisfied(
- const WriteConcernOptions& writeConcern) const;
+ bool canAcceptWritesFor(const NamespaceString& ns) override;
- virtual Status checkCanServeReadsFor(OperationContext* txn,
- const NamespaceString& ns,
- bool slaveOk);
+ virtual Status checkIfWriteConcernCanBeSatisfied(const WriteConcernOptions& writeConcern) const;
- virtual bool shouldIgnoreUniqueIndex(const IndexDescriptor* idx);
+ virtual Status checkCanServeReadsFor(OperationContext* txn,
+ const NamespaceString& ns,
+ bool slaveOk);
- virtual Status setLastOptimeForSlave(const OID& rid, const Timestamp& ts);
+ virtual bool shouldIgnoreUniqueIndex(const IndexDescriptor* idx);
- virtual void setMyLastOptime(const OpTime& opTime);
+ virtual Status setLastOptimeForSlave(const OID& rid, const Timestamp& ts);
- virtual void resetMyLastOptime();
+ virtual void setMyLastOptime(const OpTime& opTime);
- virtual void setMyHeartbeatMessage(const std::string& msg);
+ virtual void resetMyLastOptime();
- virtual OpTime getMyLastOptime() const;
+ virtual void setMyHeartbeatMessage(const std::string& msg);
- virtual ReadAfterOpTimeResponse waitUntilOpTime(
- OperationContext* txn,
- const ReadAfterOpTimeArgs& settings) override;
+ virtual OpTime getMyLastOptime() const;
- virtual OID getElectionId();
+ virtual ReadAfterOpTimeResponse waitUntilOpTime(OperationContext* txn,
+ const ReadAfterOpTimeArgs& settings) override;
- virtual OID getMyRID() const;
+ virtual OID getElectionId();
- virtual int getMyId() const;
+ virtual OID getMyRID() const;
- virtual bool setFollowerMode(const MemberState& newState);
+ virtual int getMyId() const;
- virtual bool isWaitingForApplierToDrain();
+ virtual bool setFollowerMode(const MemberState& newState);
- virtual void signalDrainComplete(OperationContext*);
+ virtual bool isWaitingForApplierToDrain();
- virtual void signalUpstreamUpdater();
+ virtual void signalDrainComplete(OperationContext*);
- virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder);
+ virtual void signalUpstreamUpdater();
- virtual Status processReplSetGetStatus(BSONObjBuilder* result);
+ virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder);
- virtual void fillIsMasterForReplSet(IsMasterResponse* result);
+ virtual Status processReplSetGetStatus(BSONObjBuilder* result);
- virtual void appendSlaveInfoData(BSONObjBuilder* result);
+ virtual void fillIsMasterForReplSet(IsMasterResponse* result);
- virtual ReplicaSetConfig getConfig() const;
+ virtual void appendSlaveInfoData(BSONObjBuilder* result);
- virtual void processReplSetGetConfig(BSONObjBuilder* result);
+ virtual ReplicaSetConfig getConfig() const;
- virtual Status setMaintenanceMode(bool activate);
+ virtual void processReplSetGetConfig(BSONObjBuilder* result);
- virtual bool getMaintenanceMode();
+ virtual Status setMaintenanceMode(bool activate);
- virtual Status processReplSetSyncFrom(const HostAndPort& target,
- BSONObjBuilder* resultObj);
+ virtual bool getMaintenanceMode();
- virtual Status processReplSetFreeze(int secs, BSONObjBuilder* resultObj);
+ virtual Status processReplSetSyncFrom(const HostAndPort& target, BSONObjBuilder* resultObj);
- virtual Status processHeartbeat(const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response);
+ virtual Status processReplSetFreeze(int secs, BSONObjBuilder* resultObj);
- virtual Status processReplSetReconfig(OperationContext* txn,
- const ReplSetReconfigArgs& args,
- BSONObjBuilder* resultObj);
+ virtual Status processHeartbeat(const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response);
- virtual Status processReplSetInitiate(OperationContext* txn,
- const BSONObj& configObj,
- BSONObjBuilder* resultObj);
+ virtual Status processReplSetReconfig(OperationContext* txn,
+ const ReplSetReconfigArgs& args,
+ BSONObjBuilder* resultObj);
- virtual Status processReplSetGetRBID(BSONObjBuilder* resultObj);
+ virtual Status processReplSetInitiate(OperationContext* txn,
+ const BSONObj& configObj,
+ BSONObjBuilder* resultObj);
- virtual void incrementRollbackID();
+ virtual Status processReplSetGetRBID(BSONObjBuilder* resultObj);
- virtual Status processReplSetFresh(const ReplSetFreshArgs& args,
- BSONObjBuilder* resultObj);
+ virtual void incrementRollbackID();
- virtual Status processReplSetElect(const ReplSetElectArgs& args,
- BSONObjBuilder* resultObj);
+ virtual Status processReplSetFresh(const ReplSetFreshArgs& args, BSONObjBuilder* resultObj);
- virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates,
- long long* configVersion);
+ virtual Status processReplSetElect(const ReplSetElectArgs& args, BSONObjBuilder* resultObj);
- virtual Status processHandshake(OperationContext* txn, const HandshakeArgs& handshake);
+ virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates,
+ long long* configVersion);
- virtual bool buildsIndexes();
+ virtual Status processHandshake(OperationContext* txn, const HandshakeArgs& handshake);
- virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op);
+ virtual bool buildsIndexes();
- virtual std::vector<HostAndPort> getOtherNodesInReplSet() const;
+ virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op);
- virtual WriteConcernOptions getGetLastErrorDefault();
+ virtual std::vector<HostAndPort> getOtherNodesInReplSet() const;
- virtual Status checkReplEnabledForCommand(BSONObjBuilder* result);
+ virtual WriteConcernOptions getGetLastErrorDefault();
- virtual HostAndPort chooseNewSyncSource();
+ virtual Status checkReplEnabledForCommand(BSONObjBuilder* result);
- virtual void blacklistSyncSource(const HostAndPort& host, Date_t until);
+ virtual HostAndPort chooseNewSyncSource();
- virtual void resetLastOpTimeFromOplog(OperationContext* txn);
+ virtual void blacklistSyncSource(const HostAndPort& host, Date_t until);
- virtual bool shouldChangeSyncSource(const HostAndPort& currentSource);
+ virtual void resetLastOpTimeFromOplog(OperationContext* txn);
- virtual OpTime getLastCommittedOpTime() const;
+ virtual bool shouldChangeSyncSource(const HostAndPort& currentSource);
- virtual Status processReplSetRequestVotes(OperationContext* txn,
- const ReplSetRequestVotesArgs& args,
- ReplSetRequestVotesResponse* response);
+ virtual OpTime getLastCommittedOpTime() const;
- virtual Status processReplSetDeclareElectionWinner(
- const ReplSetDeclareElectionWinnerArgs& args,
- long long* responseTerm);
+ virtual Status processReplSetRequestVotes(OperationContext* txn,
+ const ReplSetRequestVotesArgs& args,
+ ReplSetRequestVotesResponse* response);
- virtual void prepareCursorResponseInfo(BSONObjBuilder* objBuilder);
+ virtual Status processReplSetDeclareElectionWinner(const ReplSetDeclareElectionWinnerArgs& args,
+ long long* responseTerm);
- virtual Status processHeartbeatV1(const ReplSetHeartbeatArgsV1& args,
- ReplSetHeartbeatResponse* response);
+ virtual void prepareCursorResponseInfo(BSONObjBuilder* objBuilder);
- virtual bool isV1ElectionProtocol();
+ virtual Status processHeartbeatV1(const ReplSetHeartbeatArgsV1& args,
+ ReplSetHeartbeatResponse* response);
- virtual void summarizeAsHtml(ReplSetHtmlSummary* output);
+ virtual bool isV1ElectionProtocol();
- virtual long long getTerm();
+ virtual void summarizeAsHtml(ReplSetHtmlSummary* output);
- virtual bool updateTerm(long long term);
+ virtual long long getTerm();
- private:
+ virtual bool updateTerm(long long term);
- const ReplSettings _settings;
- MemberState _memberState;
- OpTime _myLastOpTime;
- };
+private:
+ const ReplSettings _settings;
+ MemberState _memberState;
+ OpTime _myLastOpTime;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_test_fixture.cpp b/src/mongo/db/repl/replication_coordinator_test_fixture.cpp
index 2359de1f751..47f25a6bbdd 100644
--- a/src/mongo/db/repl/replication_coordinator_test_fixture.cpp
+++ b/src/mongo/db/repl/replication_coordinator_test_fixture.cpp
@@ -51,290 +51,269 @@ namespace mongo {
namespace repl {
namespace {
- bool stringContains(const std::string &haystack, const std::string& needle) {
- return haystack.find(needle) != std::string::npos;
- }
+bool stringContains(const std::string& haystack, const std::string& needle) {
+ return haystack.find(needle) != std::string::npos;
+}
} // namespace
- using executor::NetworkInterfaceMock;
+using executor::NetworkInterfaceMock;
- ReplicaSetConfig ReplCoordTest::assertMakeRSConfig(const BSONObj& configBson) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(configBson));
- ASSERT_OK(config.validate());
- return config;
- }
+ReplicaSetConfig ReplCoordTest::assertMakeRSConfig(const BSONObj& configBson) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(configBson));
+ ASSERT_OK(config.validate());
+ return config;
+}
- ReplCoordTest::ReplCoordTest() : _callShutdown(false) {}
- ReplCoordTest::~ReplCoordTest() {}
+ReplCoordTest::ReplCoordTest() : _callShutdown(false) {}
+ReplCoordTest::~ReplCoordTest() {}
- void ReplCoordTest::setUp() {
- _settings.replSet = "mySet/node1:12345,node2:54321";
- }
+void ReplCoordTest::setUp() {
+ _settings.replSet = "mySet/node1:12345,node2:54321";
+}
- void ReplCoordTest::tearDown() {
- if (_externalState) {
- _externalState->setStoreLocalConfigDocumentToHang(false);
- }
- if (_callShutdown) {
- shutdown();
- }
+void ReplCoordTest::tearDown() {
+ if (_externalState) {
+ _externalState->setStoreLocalConfigDocumentToHang(false);
}
-
- void ReplCoordTest::assertRunUntil(Date_t newTime) {
- this->_net->runUntil(newTime);
- ASSERT_EQUALS(newTime, getNet()->now());
+ if (_callShutdown) {
+ shutdown();
}
-
- void ReplCoordTest::enterNetwork() {
- getNet()->enterNetwork();
- }
-
- void ReplCoordTest::exitNetwork() {
- getNet()->exitNetwork();
+}
+
+void ReplCoordTest::assertRunUntil(Date_t newTime) {
+ this->_net->runUntil(newTime);
+ ASSERT_EQUALS(newTime, getNet()->now());
+}
+
+void ReplCoordTest::enterNetwork() {
+ getNet()->enterNetwork();
+}
+
+void ReplCoordTest::exitNetwork() {
+ getNet()->exitNetwork();
+}
+
+void ReplCoordTest::addSelf(const HostAndPort& selfHost) {
+ getExternalState()->addSelf(selfHost);
+}
+
+void ReplCoordTest::init() {
+ invariant(!_repl);
+ invariant(!_callShutdown);
+
+ // PRNG seed for tests.
+ const int64_t seed = 0;
+
+ _topo = new TopologyCoordinatorImpl(Seconds(0));
+ _net = new NetworkInterfaceMock;
+ _storage = new StorageInterfaceMock;
+ _externalState = new ReplicationCoordinatorExternalStateMock;
+ _repl.reset(
+ new ReplicationCoordinatorImpl(_settings, _externalState, _net, _storage, _topo, seed));
+}
+
+void ReplCoordTest::init(const ReplSettings& settings) {
+ _settings = settings;
+ init();
+}
+
+void ReplCoordTest::init(const std::string& replSet) {
+ _settings.replSet = replSet;
+ init();
+}
+
+void ReplCoordTest::start() {
+ invariant(!_callShutdown);
+ // if we haven't initialized yet, do that first.
+ if (!_repl) {
+ init();
}
- void ReplCoordTest::addSelf(const HostAndPort& selfHost) {
- getExternalState()->addSelf(selfHost);
- }
+ OperationContextNoop txn;
+ _repl->startReplication(&txn);
+ _repl->waitForStartUpComplete();
+ _callShutdown = true;
+}
- void ReplCoordTest::init() {
- invariant(!_repl);
- invariant(!_callShutdown);
-
- // PRNG seed for tests.
- const int64_t seed = 0;
-
- _topo = new TopologyCoordinatorImpl(Seconds(0));
- _net = new NetworkInterfaceMock;
- _storage = new StorageInterfaceMock;
- _externalState = new ReplicationCoordinatorExternalStateMock;
- _repl.reset(new ReplicationCoordinatorImpl(_settings,
- _externalState,
- _net,
- _storage,
- _topo,
- seed));
- }
-
- void ReplCoordTest::init(const ReplSettings& settings) {
- _settings = settings;
+void ReplCoordTest::start(const BSONObj& configDoc, const HostAndPort& selfHost) {
+ if (!_repl) {
init();
}
+ _externalState->setLocalConfigDocument(StatusWith<BSONObj>(configDoc));
+ _externalState->addSelf(selfHost);
+ start();
+}
- void ReplCoordTest::init(const std::string& replSet) {
- _settings.replSet = replSet;
+void ReplCoordTest::start(const HostAndPort& selfHost) {
+ if (!_repl) {
init();
}
-
- void ReplCoordTest::start() {
- invariant(!_callShutdown);
- // if we haven't initialized yet, do that first.
- if (!_repl) {
- init();
- }
-
- OperationContextNoop txn;
- _repl->startReplication(&txn);
- _repl->waitForStartUpComplete();
- _callShutdown = true;
- }
-
- void ReplCoordTest::start(const BSONObj& configDoc, const HostAndPort& selfHost) {
- if (!_repl) {
- init();
- }
- _externalState->setLocalConfigDocument(StatusWith<BSONObj>(configDoc));
- _externalState->addSelf(selfHost);
- start();
- }
-
- void ReplCoordTest::start(const HostAndPort& selfHost) {
- if (!_repl) {
- init();
- }
- _externalState->addSelf(selfHost);
- start();
- }
-
- void ReplCoordTest::assertStartSuccess(
- const BSONObj& configDoc,
- const HostAndPort& selfHost) {
- start(configDoc, selfHost);
- ASSERT_NE(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- }
-
- ResponseStatus ReplCoordTest::makeResponseStatus(const BSONObj& doc, Milliseconds millis) {
- log() << "Responding with " << doc;
- return ResponseStatus(RemoteCommandResponse(doc, millis));
- }
-
- void ReplCoordTest::simulateSuccessfulV1Election() {
- OperationContextReplMock txn;
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- NetworkInterfaceMock* net = getNet();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- ASSERT(replCoord->getMemberState().secondary()) <<
- replCoord->getMemberState().toString();
- while (!replCoord->getMemberState().primary()) {
- log() << "Waiting on network in state " << replCoord->getMemberState();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgsV1 hbArgs;
- Status status = hbArgs.initialize(request.cmdObj);
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(rsConfig.getReplSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(rsConfig.getConfigVersion());
- net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON(true)));
- }
- else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetRequestVotes") {
-
- net->scheduleResponse(noi, net->now(), makeResponseStatus(
- BSON("ok" << 1 <<
- "reason" << "" <<
- "term" << request.cmdObj["term"].Long() <<
- "voteGranted" << true)));
- }
- else if (request.cmdObj.firstElement().fieldNameStringData() ==
- "replSetDeclareElectionWinner") {
- net->scheduleResponse(noi, net->now(), makeResponseStatus(
- BSON("ok" << 1 <<
- "term" << request.cmdObj["term"].Long())));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
+ _externalState->addSelf(selfHost);
+ start();
+}
+
+void ReplCoordTest::assertStartSuccess(const BSONObj& configDoc, const HostAndPort& selfHost) {
+ start(configDoc, selfHost);
+ ASSERT_NE(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+ResponseStatus ReplCoordTest::makeResponseStatus(const BSONObj& doc, Milliseconds millis) {
+ log() << "Responding with " << doc;
+ return ResponseStatus(RemoteCommandResponse(doc, millis));
+}
+
+void ReplCoordTest::simulateSuccessfulV1Election() {
+ OperationContextReplMock txn;
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ NetworkInterfaceMock* net = getNet();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ ASSERT(replCoord->getMemberState().secondary()) << replCoord->getMemberState().toString();
+ while (!replCoord->getMemberState().primary()) {
+ log() << "Waiting on network in state " << replCoord->getMemberState();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgsV1 hbArgs;
+ Status status = hbArgs.initialize(request.cmdObj);
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(rsConfig.getReplSetName());
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(rsConfig.getConfigVersion());
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON(true)));
+ } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetRequestVotes") {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "reason"
+ << ""
+ << "term" << request.cmdObj["term"].Long()
+ << "voteGranted" << true)));
+ } else if (request.cmdObj.firstElement().fieldNameStringData() ==
+ "replSetDeclareElectionWinner") {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "term" << request.cmdObj["term"].Long())));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
- ASSERT(replCoord->isWaitingForApplierToDrain());
- ASSERT(replCoord->getMemberState().primary()) <<
- replCoord->getMemberState().toString();
-
- IsMasterResponse imResponse;
- replCoord->fillIsMasterForReplSet(&imResponse);
- ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- replCoord->signalDrainComplete(&txn);
- replCoord->fillIsMasterForReplSet(&imResponse);
- ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
-
- ASSERT(replCoord->getMemberState().primary()) <<
- replCoord->getMemberState().toString();
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
}
-
- void ReplCoordTest::simulateSuccessfulElection() {
- OperationContextReplMock txn;
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- NetworkInterfaceMock* net = getNet();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- ASSERT(replCoord->getMemberState().secondary()) <<
- replCoord->getMemberState().toString();
- while (!replCoord->getMemberState().primary()) {
- log() << "Waiting on network in state " << replCoord->getMemberState();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgs hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(rsConfig.getReplSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setConfigVersion(rsConfig.getConfigVersion());
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj, false);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- }
- else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetFresh") {
- net->scheduleResponse(noi, net->now(), makeResponseStatus(
- BSON("ok" << 1 <<
- "fresher" << false <<
- "opTime" << Date_t() <<
- "veto" << false)));
- }
- else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetElect") {
- net->scheduleResponse(noi, net->now(), makeResponseStatus(
- BSON("ok" << 1 <<
- "vote" << 1 <<
- "round" << request.cmdObj["round"].OID())));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
+ ASSERT(replCoord->isWaitingForApplierToDrain());
+ ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString();
+
+ IsMasterResponse imResponse;
+ replCoord->fillIsMasterForReplSet(&imResponse);
+ ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+ replCoord->signalDrainComplete(&txn);
+ replCoord->fillIsMasterForReplSet(&imResponse);
+ ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+
+ ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString();
+}
+
+void ReplCoordTest::simulateSuccessfulElection() {
+ OperationContextReplMock txn;
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ NetworkInterfaceMock* net = getNet();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ ASSERT(replCoord->getMemberState().secondary()) << replCoord->getMemberState().toString();
+ while (!replCoord->getMemberState().primary()) {
+ log() << "Waiting on network in state " << replCoord->getMemberState();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgs hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(rsConfig.getReplSetName());
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setConfigVersion(rsConfig.getConfigVersion());
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj, false);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetFresh") {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "fresher" << false << "opTime" << Date_t()
+ << "veto" << false)));
+ } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetElect") {
+ net->scheduleResponse(noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "vote" << 1 << "round"
+ << request.cmdObj["round"].OID())));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
- ASSERT(replCoord->isWaitingForApplierToDrain());
- ASSERT(replCoord->getMemberState().primary()) <<
- replCoord->getMemberState().toString();
-
- IsMasterResponse imResponse;
- replCoord->fillIsMasterForReplSet(&imResponse);
- ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- replCoord->signalDrainComplete(&txn);
- replCoord->fillIsMasterForReplSet(&imResponse);
- ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
-
- ASSERT(replCoord->getMemberState().primary()) <<
- replCoord->getMemberState().toString();
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
}
-
- void ReplCoordTest::simulateStepDownOnIsolation() {
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- NetworkInterfaceMock* net = getNet();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- ASSERT(replCoord->getMemberState().primary()) <<
- replCoord->getMemberState().toString();
- while (replCoord->getMemberState().primary()) {
- log() << "Waiting on network in state " << replCoord->getMemberState();
- getNet()->enterNetwork();
- net->runUntil(net->now() + Seconds(10));
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgs hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- net->scheduleResponse(noi,
- net->now(),
- ResponseStatus(ErrorCodes::NetworkTimeout, "Nobody's home"));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
+ ASSERT(replCoord->isWaitingForApplierToDrain());
+ ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString();
+
+ IsMasterResponse imResponse;
+ replCoord->fillIsMasterForReplSet(&imResponse);
+ ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+ replCoord->signalDrainComplete(&txn);
+ replCoord->fillIsMasterForReplSet(&imResponse);
+ ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+
+ ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString();
+}
+
+void ReplCoordTest::simulateStepDownOnIsolation() {
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ NetworkInterfaceMock* net = getNet();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString();
+ while (replCoord->getMemberState().primary()) {
+ log() << "Waiting on network in state " << replCoord->getMemberState();
+ getNet()->enterNetwork();
+ net->runUntil(net->now() + Seconds(10));
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgs hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ net->scheduleResponse(
+ noi, net->now(), ResponseStatus(ErrorCodes::NetworkTimeout, "Nobody's home"));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
}
-
- void ReplCoordTest::shutdown() {
- invariant(_callShutdown);
- _net->exitNetwork();
- _repl->shutdown();
- _callShutdown = false;
- }
-
- int64_t ReplCoordTest::countLogLinesContaining(const std::string& needle) {
- return std::count_if(getCapturedLogMessages().begin(),
- getCapturedLogMessages().end(),
- stdx::bind(stringContains,
- stdx::placeholders::_1,
- needle));
- }
+}
+
+void ReplCoordTest::shutdown() {
+ invariant(_callShutdown);
+ _net->exitNetwork();
+ _repl->shutdown();
+ _callShutdown = false;
+}
+
+int64_t ReplCoordTest::countLogLinesContaining(const std::string& needle) {
+ return std::count_if(getCapturedLogMessages().begin(),
+ getCapturedLogMessages().end(),
+ stdx::bind(stringContains, stdx::placeholders::_1, needle));
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_test_fixture.h b/src/mongo/db/repl/replication_coordinator_test_fixture.h
index 22ab10a1e25..d38ef060eb7 100644
--- a/src/mongo/db/repl/replication_coordinator_test_fixture.h
+++ b/src/mongo/db/repl/replication_coordinator_test_fixture.h
@@ -37,162 +37,170 @@
namespace mongo {
- class BSONObj;
- struct HostAndPort;
+class BSONObj;
+struct HostAndPort;
namespace executor {
- class NetworkInterfaceMock;
-} // namespace executor
+class NetworkInterfaceMock;
+} // namespace executor
namespace repl {
- class ReplicaSetConfig;
- class ReplicationCoordinatorExternalStateMock;
- class ReplicationCoordinatorImpl;
- class StorageInterfaceMock;
- class TopologyCoordinatorImpl;
-
- /**
- * Fixture for testing ReplicationCoordinatorImpl behaviors.
- */
- class ReplCoordTest : public mongo::unittest::Test {
- public:
- /**
- * Makes a ResponseStatus with the given "doc" response and optional elapsed time "millis".
- */
- static ResponseStatus makeResponseStatus(const BSONObj& doc,
- Milliseconds millis = Milliseconds(0));
-
- /**
- * Constructs a ReplicaSetConfig from the given BSON, or raises a test failure exception.
- */
- static ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBSON);
-
- ReplCoordTest();
- virtual ~ReplCoordTest();
-
- protected:
- virtual void setUp();
- virtual void tearDown();
-
- /**
- * Gets the network mock.
- */
- executor::NetworkInterfaceMock* getNet() { return _net; }
-
- /**
- * Gets the replication coordinator under test.
- */
- ReplicationCoordinatorImpl* getReplCoord() { return _repl.get();}
-
- /**
- * Gets the topology coordinator used by the replication coordinator under test.
- */
- TopologyCoordinatorImpl& getTopoCoord() { return *_topo;}
-
- /**
- * Gets the external state used by the replication coordinator under test.
- */
- ReplicationCoordinatorExternalStateMock* getExternalState() { return _externalState; }
-
- /**
- * Adds "selfHost" to the list of hosts that identify as "this" host.
- */
- void addSelf(const HostAndPort& selfHost);
-
- /**
- * Moves time forward in the network until the new time, and asserts if now!=newTime after
- */
- void assertRunUntil(Date_t newTime);
-
- /**
- * Shorthand for getNet()->enterNetwork()
- */
- void enterNetwork();
-
- /**
- * Shorthand for getNet()->exitNetwork()
- */
- void exitNetwork();
-
- /**
- * Initializes the objects under test; this behavior is optional, in case you need to call
- * any methods on the network or coordinator objects before calling start.
- */
- void init();
-
- /**
- * Initializes the objects under test, using the given "settings".
- */
- void init(const ReplSettings& settings);
-
- /**
- * Initializes the objects under test, using "replSet" as the name of the replica set under
- * test.
- */
- void init(const std::string& replSet);
-
- /**
- * Starts the replication coordinator under test, with no local config document and
- * no notion of what host or hosts are represented by the network interface.
- */
- void start();
-
- /**
- * Starts the replication coordinator under test, with the given configuration in
- * local storage and the given host name.
- */
- void start(const BSONObj& configDoc, const HostAndPort& selfHost);
-
- /**
- * Starts the replication coordinator under test with the given host name.
- */
- void start(const HostAndPort& selfHost);
-
- /**
- * Brings the repl coord from SECONDARY to PRIMARY by simulating the messages required to
- * elect it.
- *
- * Behavior is unspecified if node does not have a clean config, is not in SECONDARY, etc.
- */
- void simulateSuccessfulElection();
- void simulateSuccessfulV1Election();
-
- /**
- * Brings the repl coord from PRIMARY to SECONDARY by simulating a period of time in which
- * all heartbeats respond with an error condition, such as time out.
- */
- void simulateStepDownOnIsolation();
-
- /**
- * Asserts that calling start(configDoc, selfHost) successfully initiates the
- * ReplicationCoordinator under test.
- */
- void assertStartSuccess(const BSONObj& configDoc, const HostAndPort& selfHost);
-
- /**
- * Shuts down the objects under test.
- */
- void shutdown();
-
- /**
- * Returns the number of collected log lines containing "needle".
- */
- int64_t countLogLinesContaining(const std::string& needle);
-
- private:
- std::unique_ptr<ReplicationCoordinatorImpl> _repl;
- // Owned by ReplicationCoordinatorImpl
- TopologyCoordinatorImpl* _topo;
- // Owned by ReplicationCoordinatorImpl
- executor::NetworkInterfaceMock* _net;
- // Owned by ReplicationCoordinatorImpl
- StorageInterfaceMock* _storage;
- // Owned by ReplicationCoordinatorImpl
- ReplicationCoordinatorExternalStateMock* _externalState;
- ReplSettings _settings;
- bool _callShutdown;
- };
+class ReplicaSetConfig;
+class ReplicationCoordinatorExternalStateMock;
+class ReplicationCoordinatorImpl;
+class StorageInterfaceMock;
+class TopologyCoordinatorImpl;
+
+/**
+ * Fixture for testing ReplicationCoordinatorImpl behaviors.
+ */
+class ReplCoordTest : public mongo::unittest::Test {
+public:
+ /**
+ * Makes a ResponseStatus with the given "doc" response and optional elapsed time "millis".
+ */
+ static ResponseStatus makeResponseStatus(const BSONObj& doc,
+ Milliseconds millis = Milliseconds(0));
+
+ /**
+ * Constructs a ReplicaSetConfig from the given BSON, or raises a test failure exception.
+ */
+ static ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBSON);
+
+ ReplCoordTest();
+ virtual ~ReplCoordTest();
+
+protected:
+ virtual void setUp();
+ virtual void tearDown();
+
+ /**
+ * Gets the network mock.
+ */
+ executor::NetworkInterfaceMock* getNet() {
+ return _net;
+ }
+
+ /**
+ * Gets the replication coordinator under test.
+ */
+ ReplicationCoordinatorImpl* getReplCoord() {
+ return _repl.get();
+ }
+
+ /**
+ * Gets the topology coordinator used by the replication coordinator under test.
+ */
+ TopologyCoordinatorImpl& getTopoCoord() {
+ return *_topo;
+ }
+
+ /**
+ * Gets the external state used by the replication coordinator under test.
+ */
+ ReplicationCoordinatorExternalStateMock* getExternalState() {
+ return _externalState;
+ }
+
+ /**
+ * Adds "selfHost" to the list of hosts that identify as "this" host.
+ */
+ void addSelf(const HostAndPort& selfHost);
+
+ /**
+ * Moves time forward in the network until the new time, and asserts if now!=newTime after
+ */
+ void assertRunUntil(Date_t newTime);
+
+ /**
+ * Shorthand for getNet()->enterNetwork()
+ */
+ void enterNetwork();
+
+ /**
+ * Shorthand for getNet()->exitNetwork()
+ */
+ void exitNetwork();
+
+ /**
+ * Initializes the objects under test; this behavior is optional, in case you need to call
+ * any methods on the network or coordinator objects before calling start.
+ */
+ void init();
+
+ /**
+ * Initializes the objects under test, using the given "settings".
+ */
+ void init(const ReplSettings& settings);
+
+ /**
+ * Initializes the objects under test, using "replSet" as the name of the replica set under
+ * test.
+ */
+ void init(const std::string& replSet);
+
+ /**
+ * Starts the replication coordinator under test, with no local config document and
+ * no notion of what host or hosts are represented by the network interface.
+ */
+ void start();
+
+ /**
+ * Starts the replication coordinator under test, with the given configuration in
+ * local storage and the given host name.
+ */
+ void start(const BSONObj& configDoc, const HostAndPort& selfHost);
+
+ /**
+ * Starts the replication coordinator under test with the given host name.
+ */
+ void start(const HostAndPort& selfHost);
+
+ /**
+ * Brings the repl coord from SECONDARY to PRIMARY by simulating the messages required to
+ * elect it.
+ *
+ * Behavior is unspecified if node does not have a clean config, is not in SECONDARY, etc.
+ */
+ void simulateSuccessfulElection();
+ void simulateSuccessfulV1Election();
+
+ /**
+ * Brings the repl coord from PRIMARY to SECONDARY by simulating a period of time in which
+ * all heartbeats respond with an error condition, such as time out.
+ */
+ void simulateStepDownOnIsolation();
+
+ /**
+ * Asserts that calling start(configDoc, selfHost) successfully initiates the
+ * ReplicationCoordinator under test.
+ */
+ void assertStartSuccess(const BSONObj& configDoc, const HostAndPort& selfHost);
+
+ /**
+ * Shuts down the objects under test.
+ */
+ void shutdown();
+
+ /**
+ * Returns the number of collected log lines containing "needle".
+ */
+ int64_t countLogLinesContaining(const std::string& needle);
+
+private:
+ std::unique_ptr<ReplicationCoordinatorImpl> _repl;
+ // Owned by ReplicationCoordinatorImpl
+ TopologyCoordinatorImpl* _topo;
+ // Owned by ReplicationCoordinatorImpl
+ executor::NetworkInterfaceMock* _net;
+ // Owned by ReplicationCoordinatorImpl
+ StorageInterfaceMock* _storage;
+ // Owned by ReplicationCoordinatorImpl
+ ReplicationCoordinatorExternalStateMock* _externalState;
+ ReplSettings _settings;
+ bool _callShutdown;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_executor.cpp b/src/mongo/db/repl/replication_executor.cpp
index a944e76751f..f6f04e51a8d 100644
--- a/src/mongo/db/repl/replication_executor.cpp
+++ b/src/mongo/db/repl/replication_executor.cpp
@@ -46,563 +46,515 @@ namespace mongo {
namespace repl {
namespace {
- stdx::function<void ()> makeNoExcept(const stdx::function<void ()> &fn);
+stdx::function<void()> makeNoExcept(const stdx::function<void()>& fn);
} // namespace
- using executor::NetworkInterface;
-
- ReplicationExecutor::ReplicationExecutor(NetworkInterface* netInterface,
- StorageInterface* storageInterface,
- int64_t prngSeed) :
- _random(prngSeed),
- _networkInterface(netInterface),
- _storageInterface(storageInterface),
- _totalEventWaiters(0),
- _inShutdown(false),
- _dblockWorkers(OldThreadPool::DoNotStartThreadsTag(),
- 3,
- "replExecDBWorker-"),
- _dblockTaskRunner(
- &_dblockWorkers,
- stdx::bind(&StorageInterface::createOperationContext, storageInterface)),
- _dblockExclusiveLockTaskRunner(
- &_dblockWorkers,
- stdx::bind(&StorageInterface::createOperationContext, storageInterface)),
- _nextId(0) {
- }
-
- ReplicationExecutor::~ReplicationExecutor() {}
-
- std::string ReplicationExecutor::getDiagnosticString() {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- return _getDiagnosticString_inlock();
- }
-
- std::string ReplicationExecutor::_getDiagnosticString_inlock() const {
- str::stream output;
- output << "ReplicationExecutor";
- output << " networkInProgress:" << _networkInProgressQueue.size();
- output << " dbWorkInProgress:" << _dbWorkInProgressQueue.size();
- output << " exclusiveInProgress:" << _exclusiveLockInProgressQueue.size();
- output << " sleeperQueue:" << _sleepersQueue.size();
- output << " ready:" << _readyQueue.size();
- output << " free:" << _freeQueue.size();
- output << " unsignaledEvents:" << _unsignaledEvents.size();
- output << " eventWaiters:" << _totalEventWaiters;
- output << " shuttingDown:" << _inShutdown;
- output << " networkInterface:" << _networkInterface->getDiagnosticString();
- return output;
- }
-
- Date_t ReplicationExecutor::now() {
- return _networkInterface->now();
- }
-
- void ReplicationExecutor::run() {
- setThreadName("ReplicationExecutor");
- _networkInterface->startup();
- _dblockWorkers.startThreads();
- std::pair<WorkItem, CallbackHandle> work;
- while ((work = getWork()).first.callback.isValid()) {
- {
- stdx::lock_guard<stdx::mutex> lk(_terribleExLockSyncMutex);
- const Callback* callback = _getCallbackFromHandle(work.first.callback);
- const Status inStatus = callback->_isCanceled ?
- Status(ErrorCodes::CallbackCanceled, "Callback canceled") :
- Status::OK();
- makeNoExcept(stdx::bind(callback->_callbackFn,
- CallbackArgs(this, work.second, inStatus)))();
- }
- signalEvent(work.first.finishedEvent);
- }
- finishShutdown();
- _networkInterface->shutdown();
- }
-
- void ReplicationExecutor::shutdown() {
- // Correct shutdown needs to:
- // * Disable future work queueing.
- // * drain all of the unsignaled events, sleepers, and ready queue, by running those
- // callbacks with a "shutdown" or "canceled" status.
- // * Signal all threads blocked in waitForEvent, and wait for them to return from that method.
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- _inShutdown = true;
-
- _readyQueue.splice(_readyQueue.end(), _dbWorkInProgressQueue);
- _readyQueue.splice(_readyQueue.end(), _exclusiveLockInProgressQueue);
- _readyQueue.splice(_readyQueue.end(), _networkInProgressQueue);
- _readyQueue.splice(_readyQueue.end(), _sleepersQueue);
- for (auto event : _unsignaledEvents) {
- _readyQueue.splice(_readyQueue.end(), _getEventFromHandle(event)->_waiters);
- }
- for (auto readyWork : _readyQueue) {
- _getCallbackFromHandle(readyWork.callback)->_isCanceled = true;
- }
- _networkInterface->signalWorkAvailable();
- }
-
- void ReplicationExecutor::finishShutdown() {
- _dblockExclusiveLockTaskRunner.cancel();
- _dblockTaskRunner.cancel();
- _dblockWorkers.join();
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- invariant(_inShutdown);
- invariant(_dbWorkInProgressQueue.empty());
- invariant(_exclusiveLockInProgressQueue.empty());
- invariant(_readyQueue.empty());
- invariant(_sleepersQueue.empty());
-
- while (!_unsignaledEvents.empty()) {
- EventList::iterator eventIter = _unsignaledEvents.begin();
- invariant(_getEventFromHandle(*eventIter)->_waiters.empty());
- signalEvent_inlock(*eventIter);
- }
-
- while (_totalEventWaiters > 0)
- _noMoreWaitingThreads.wait(lk);
-
- invariant(_dbWorkInProgressQueue.empty());
- invariant(_exclusiveLockInProgressQueue.empty());
- invariant(_readyQueue.empty());
- invariant(_sleepersQueue.empty());
- invariant(_unsignaledEvents.empty());
- }
-
- void ReplicationExecutor::maybeNotifyShutdownComplete_inlock() {
- if (_totalEventWaiters == 0)
- _noMoreWaitingThreads.notify_all();
- }
-
- StatusWith<ReplicationExecutor::EventHandle> ReplicationExecutor::makeEvent() {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- return makeEvent_inlock();
- }
-
- StatusWith<ReplicationExecutor::EventHandle> ReplicationExecutor::makeEvent_inlock() {
- if (_inShutdown)
- return StatusWith<EventHandle>(ErrorCodes::ShutdownInProgress, "Shutdown in progress");
-
- _unsignaledEvents.emplace_back();
- auto event = std::make_shared<Event>(this, --_unsignaledEvents.end());
- setEventForHandle(&_unsignaledEvents.back(), std::move(event));
- return _unsignaledEvents.back();
- }
-
- void ReplicationExecutor::signalEvent(const EventHandle& eventHandle) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- signalEvent_inlock(eventHandle);
- }
-
- void ReplicationExecutor::signalEvent_inlock(const EventHandle& eventHandle) {
- Event* event = _getEventFromHandle(eventHandle);
- event->_signal_inlock();
- _unsignaledEvents.erase(event->_iter);
- }
-
- void ReplicationExecutor::waitForEvent(const EventHandle& event) {
- _getEventFromHandle(event)->waitUntilSignaled();
- }
-
- void ReplicationExecutor::cancel(const CallbackHandle& cbHandle) {
- _getCallbackFromHandle(cbHandle)->cancel();
- };
-
- void ReplicationExecutor::wait(const CallbackHandle& cbHandle) {
- _getCallbackFromHandle(cbHandle)->waitForCompletion();
- };
-
- StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::onEvent(
- const EventHandle& eventHandle,
- const CallbackFn& work) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- WorkQueue* queue = &_readyQueue;
- Event* event = _getEventFromHandle(eventHandle);
- if (!event->_isSignaled) {
- queue = &event->_waiters;
- }
- else {
- queue = &_readyQueue;
- }
- return enqueueWork_inlock(queue, work);
- }
-
- static void remoteCommandFinished(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicationExecutor::RemoteCommandCallbackFn& cb,
- const RemoteCommandRequest& request,
- const ResponseStatus& response) {
-
- if (cbData.status.isOK()) {
- cb(ReplicationExecutor::RemoteCommandCallbackArgs(
- cbData.executor, cbData.myHandle, request, response));
- }
- else {
- cb(ReplicationExecutor::RemoteCommandCallbackArgs(
- cbData.executor,
- cbData.myHandle,
- request,
- ResponseStatus(cbData.status)));
+using executor::NetworkInterface;
+
+ReplicationExecutor::ReplicationExecutor(NetworkInterface* netInterface,
+ StorageInterface* storageInterface,
+ int64_t prngSeed)
+ : _random(prngSeed),
+ _networkInterface(netInterface),
+ _storageInterface(storageInterface),
+ _totalEventWaiters(0),
+ _inShutdown(false),
+ _dblockWorkers(OldThreadPool::DoNotStartThreadsTag(), 3, "replExecDBWorker-"),
+ _dblockTaskRunner(&_dblockWorkers,
+ stdx::bind(&StorageInterface::createOperationContext, storageInterface)),
+ _dblockExclusiveLockTaskRunner(
+ &_dblockWorkers, stdx::bind(&StorageInterface::createOperationContext, storageInterface)),
+ _nextId(0) {}
+
+ReplicationExecutor::~ReplicationExecutor() {}
+
+std::string ReplicationExecutor::getDiagnosticString() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _getDiagnosticString_inlock();
+}
+
+std::string ReplicationExecutor::_getDiagnosticString_inlock() const {
+ str::stream output;
+ output << "ReplicationExecutor";
+ output << " networkInProgress:" << _networkInProgressQueue.size();
+ output << " dbWorkInProgress:" << _dbWorkInProgressQueue.size();
+ output << " exclusiveInProgress:" << _exclusiveLockInProgressQueue.size();
+ output << " sleeperQueue:" << _sleepersQueue.size();
+ output << " ready:" << _readyQueue.size();
+ output << " free:" << _freeQueue.size();
+ output << " unsignaledEvents:" << _unsignaledEvents.size();
+ output << " eventWaiters:" << _totalEventWaiters;
+ output << " shuttingDown:" << _inShutdown;
+ output << " networkInterface:" << _networkInterface->getDiagnosticString();
+ return output;
+}
+
+Date_t ReplicationExecutor::now() {
+ return _networkInterface->now();
+}
+
+void ReplicationExecutor::run() {
+ setThreadName("ReplicationExecutor");
+ _networkInterface->startup();
+ _dblockWorkers.startThreads();
+ std::pair<WorkItem, CallbackHandle> work;
+ while ((work = getWork()).first.callback.isValid()) {
+ {
+ stdx::lock_guard<stdx::mutex> lk(_terribleExLockSyncMutex);
+ const Callback* callback = _getCallbackFromHandle(work.first.callback);
+ const Status inStatus = callback->_isCanceled
+ ? Status(ErrorCodes::CallbackCanceled, "Callback canceled")
+ : Status::OK();
+ makeNoExcept(
+ stdx::bind(callback->_callbackFn, CallbackArgs(this, work.second, inStatus)))();
}
- }
-
- static void remoteCommandFailedEarly(
- const ReplicationExecutor::CallbackArgs& cbData,
- const ReplicationExecutor::RemoteCommandCallbackFn& cb,
- const RemoteCommandRequest& request) {
-
- invariant(!cbData.status.isOK());
+ signalEvent(work.first.finishedEvent);
+ }
+ finishShutdown();
+ _networkInterface->shutdown();
+}
+
+void ReplicationExecutor::shutdown() {
+ // Correct shutdown needs to:
+ // * Disable future work queueing.
+ // * drain all of the unsignaled events, sleepers, and ready queue, by running those
+ // callbacks with a "shutdown" or "canceled" status.
+ // * Signal all threads blocked in waitForEvent, and wait for them to return from that method.
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _inShutdown = true;
+
+ _readyQueue.splice(_readyQueue.end(), _dbWorkInProgressQueue);
+ _readyQueue.splice(_readyQueue.end(), _exclusiveLockInProgressQueue);
+ _readyQueue.splice(_readyQueue.end(), _networkInProgressQueue);
+ _readyQueue.splice(_readyQueue.end(), _sleepersQueue);
+ for (auto event : _unsignaledEvents) {
+ _readyQueue.splice(_readyQueue.end(), _getEventFromHandle(event)->_waiters);
+ }
+ for (auto readyWork : _readyQueue) {
+ _getCallbackFromHandle(readyWork.callback)->_isCanceled = true;
+ }
+ _networkInterface->signalWorkAvailable();
+}
+
+void ReplicationExecutor::finishShutdown() {
+ _dblockExclusiveLockTaskRunner.cancel();
+ _dblockTaskRunner.cancel();
+ _dblockWorkers.join();
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ invariant(_inShutdown);
+ invariant(_dbWorkInProgressQueue.empty());
+ invariant(_exclusiveLockInProgressQueue.empty());
+ invariant(_readyQueue.empty());
+ invariant(_sleepersQueue.empty());
+
+ while (!_unsignaledEvents.empty()) {
+ EventList::iterator eventIter = _unsignaledEvents.begin();
+ invariant(_getEventFromHandle(*eventIter)->_waiters.empty());
+ signalEvent_inlock(*eventIter);
+ }
+
+ while (_totalEventWaiters > 0)
+ _noMoreWaitingThreads.wait(lk);
+
+ invariant(_dbWorkInProgressQueue.empty());
+ invariant(_exclusiveLockInProgressQueue.empty());
+ invariant(_readyQueue.empty());
+ invariant(_sleepersQueue.empty());
+ invariant(_unsignaledEvents.empty());
+}
+
+void ReplicationExecutor::maybeNotifyShutdownComplete_inlock() {
+ if (_totalEventWaiters == 0)
+ _noMoreWaitingThreads.notify_all();
+}
+
+StatusWith<ReplicationExecutor::EventHandle> ReplicationExecutor::makeEvent() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return makeEvent_inlock();
+}
+
+StatusWith<ReplicationExecutor::EventHandle> ReplicationExecutor::makeEvent_inlock() {
+ if (_inShutdown)
+ return StatusWith<EventHandle>(ErrorCodes::ShutdownInProgress, "Shutdown in progress");
+
+ _unsignaledEvents.emplace_back();
+ auto event = std::make_shared<Event>(this, --_unsignaledEvents.end());
+ setEventForHandle(&_unsignaledEvents.back(), std::move(event));
+ return _unsignaledEvents.back();
+}
+
+void ReplicationExecutor::signalEvent(const EventHandle& eventHandle) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ signalEvent_inlock(eventHandle);
+}
+
+void ReplicationExecutor::signalEvent_inlock(const EventHandle& eventHandle) {
+ Event* event = _getEventFromHandle(eventHandle);
+ event->_signal_inlock();
+ _unsignaledEvents.erase(event->_iter);
+}
+
+void ReplicationExecutor::waitForEvent(const EventHandle& event) {
+ _getEventFromHandle(event)->waitUntilSignaled();
+}
+
+void ReplicationExecutor::cancel(const CallbackHandle& cbHandle) {
+ _getCallbackFromHandle(cbHandle)->cancel();
+};
+
+void ReplicationExecutor::wait(const CallbackHandle& cbHandle) {
+ _getCallbackFromHandle(cbHandle)->waitForCompletion();
+};
+
+StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::onEvent(
+ const EventHandle& eventHandle, const CallbackFn& work) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ WorkQueue* queue = &_readyQueue;
+ Event* event = _getEventFromHandle(eventHandle);
+ if (!event->_isSignaled) {
+ queue = &event->_waiters;
+ } else {
+ queue = &_readyQueue;
+ }
+ return enqueueWork_inlock(queue, work);
+}
+
+static void remoteCommandFinished(const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplicationExecutor::RemoteCommandCallbackFn& cb,
+ const RemoteCommandRequest& request,
+ const ResponseStatus& response) {
+ if (cbData.status.isOK()) {
cb(ReplicationExecutor::RemoteCommandCallbackArgs(
- cbData.executor,
- cbData.myHandle,
- request,
- ResponseStatus(cbData.status)));
- }
-
- void ReplicationExecutor::_finishRemoteCommand(
- const RemoteCommandRequest& request,
- const ResponseStatus& response,
- const CallbackHandle& cbHandle,
- const uint64_t expectedHandleGeneration,
- const RemoteCommandCallbackFn& cb) {
-
- Callback* callback = _getCallbackFromHandle(cbHandle);
- const WorkQueue::iterator iter = callback->_iter;
-
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- if (_inShutdown) {
- return;
- }
-
- if (expectedHandleGeneration != iter->generation) {
- return;
- }
-
- LOG(4) << "Received remote response: "
- << (response.isOK() ? response.getValue().toString() :
- response.getStatus().toString());
-
- callback->_callbackFn = stdx::bind(remoteCommandFinished,
- stdx::placeholders::_1,
- cb,
- request,
- response);
- _readyQueue.splice(_readyQueue.end(), _networkInProgressQueue, iter);
- }
-
- StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleRemoteCommand(
- const RemoteCommandRequest& request,
- const RemoteCommandCallbackFn& cb) {
- RemoteCommandRequest scheduledRequest = request;
- if (request.timeout == RemoteCommandRequest::kNoTimeout) {
- scheduledRequest.expirationDate = RemoteCommandRequest::kNoExpirationDate;
- }
- else {
- scheduledRequest.expirationDate = _networkInterface->now() + scheduledRequest.timeout;
- }
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- StatusWith<CallbackHandle> handle = enqueueWork_inlock(
- &_networkInProgressQueue,
- stdx::bind(remoteCommandFailedEarly,
- stdx::placeholders::_1,
- cb,
- scheduledRequest));
- if (handle.isOK()) {
- _getCallbackFromHandle(handle.getValue())->_iter->isNetworkOperation = true;
-
- LOG(4) << "Scheduling remote request: " << request.toString();
-
- _networkInterface->startCommand(
- handle.getValue(),
- scheduledRequest,
- stdx::bind(&ReplicationExecutor::_finishRemoteCommand,
+ cbData.executor, cbData.myHandle, request, response));
+ } else {
+ cb(ReplicationExecutor::RemoteCommandCallbackArgs(
+ cbData.executor, cbData.myHandle, request, ResponseStatus(cbData.status)));
+ }
+}
+
+static void remoteCommandFailedEarly(const ReplicationExecutor::CallbackArgs& cbData,
+ const ReplicationExecutor::RemoteCommandCallbackFn& cb,
+ const RemoteCommandRequest& request) {
+ invariant(!cbData.status.isOK());
+ cb(ReplicationExecutor::RemoteCommandCallbackArgs(
+ cbData.executor, cbData.myHandle, request, ResponseStatus(cbData.status)));
+}
+
+void ReplicationExecutor::_finishRemoteCommand(const RemoteCommandRequest& request,
+ const ResponseStatus& response,
+ const CallbackHandle& cbHandle,
+ const uint64_t expectedHandleGeneration,
+ const RemoteCommandCallbackFn& cb) {
+ Callback* callback = _getCallbackFromHandle(cbHandle);
+ const WorkQueue::iterator iter = callback->_iter;
+
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ if (_inShutdown) {
+ return;
+ }
+
+ if (expectedHandleGeneration != iter->generation) {
+ return;
+ }
+
+ LOG(4) << "Received remote response: " << (response.isOK() ? response.getValue().toString()
+ : response.getStatus().toString());
+
+ callback->_callbackFn =
+ stdx::bind(remoteCommandFinished, stdx::placeholders::_1, cb, request, response);
+ _readyQueue.splice(_readyQueue.end(), _networkInProgressQueue, iter);
+}
+
+StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleRemoteCommand(
+ const RemoteCommandRequest& request, const RemoteCommandCallbackFn& cb) {
+ RemoteCommandRequest scheduledRequest = request;
+ if (request.timeout == RemoteCommandRequest::kNoTimeout) {
+ scheduledRequest.expirationDate = RemoteCommandRequest::kNoExpirationDate;
+ } else {
+ scheduledRequest.expirationDate = _networkInterface->now() + scheduledRequest.timeout;
+ }
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ StatusWith<CallbackHandle> handle = enqueueWork_inlock(
+ &_networkInProgressQueue,
+ stdx::bind(remoteCommandFailedEarly, stdx::placeholders::_1, cb, scheduledRequest));
+ if (handle.isOK()) {
+ _getCallbackFromHandle(handle.getValue())->_iter->isNetworkOperation = true;
+
+ LOG(4) << "Scheduling remote request: " << request.toString();
+
+ _networkInterface->startCommand(
+ handle.getValue(),
+ scheduledRequest,
+ stdx::bind(&ReplicationExecutor::_finishRemoteCommand,
+ this,
+ scheduledRequest,
+ stdx::placeholders::_1,
+ handle.getValue(),
+ _getCallbackFromHandle(handle.getValue())->_iter->generation,
+ cb));
+ }
+ return handle;
+}
+
+StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWork(
+ const CallbackFn& work) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _networkInterface->signalWorkAvailable();
+ return enqueueWork_inlock(&_readyQueue, work);
+}
+
+StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWorkAt(
+ Date_t when, const CallbackFn& work) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ WorkQueue temp;
+ StatusWith<CallbackHandle> cbHandle = enqueueWork_inlock(&temp, work);
+ if (!cbHandle.isOK())
+ return cbHandle;
+ _getCallbackFromHandle(cbHandle.getValue())->_iter->readyDate = when;
+ WorkQueue::iterator insertBefore = _sleepersQueue.begin();
+ while (insertBefore != _sleepersQueue.end() && insertBefore->readyDate <= when)
+ ++insertBefore;
+ _sleepersQueue.splice(insertBefore, temp, temp.begin());
+ return cbHandle;
+}
+
+StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleDBWork(
+ const CallbackFn& work) {
+ return scheduleDBWork(work, NamespaceString(), MODE_NONE);
+}
+
+StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleDBWork(
+ const CallbackFn& work, const NamespaceString& nss, LockMode mode) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ StatusWith<CallbackHandle> handle = enqueueWork_inlock(&_dbWorkInProgressQueue, work);
+ if (handle.isOK()) {
+ auto doOp = stdx::bind(&ReplicationExecutor::_doOperation,
this,
- scheduledRequest,
stdx::placeholders::_1,
+ stdx::placeholders::_2,
handle.getValue(),
- _getCallbackFromHandle(handle.getValue())->_iter->generation,
- cb));
- }
- return handle;
- }
-
- StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWork(
- const CallbackFn& work) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- _networkInterface->signalWorkAvailable();
- return enqueueWork_inlock(&_readyQueue, work);
- }
-
- StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWorkAt(
- Date_t when,
- const CallbackFn& work) {
-
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- WorkQueue temp;
- StatusWith<CallbackHandle> cbHandle = enqueueWork_inlock(&temp, work);
- if (!cbHandle.isOK())
- return cbHandle;
- _getCallbackFromHandle(cbHandle.getValue())->_iter->readyDate = when;
- WorkQueue::iterator insertBefore = _sleepersQueue.begin();
- while (insertBefore != _sleepersQueue.end() && insertBefore->readyDate <= when)
- ++insertBefore;
- _sleepersQueue.splice(insertBefore, temp, temp.begin());
- return cbHandle;
- }
-
- StatusWith<ReplicationExecutor::CallbackHandle>
- ReplicationExecutor::scheduleDBWork(const CallbackFn& work) {
- return scheduleDBWork(work, NamespaceString(), MODE_NONE);
- }
-
- StatusWith<ReplicationExecutor::CallbackHandle>
- ReplicationExecutor::scheduleDBWork(const CallbackFn& work,
- const NamespaceString& nss,
- LockMode mode) {
-
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- StatusWith<CallbackHandle> handle = enqueueWork_inlock(&_dbWorkInProgressQueue,
- work);
- if (handle.isOK()) {
- auto doOp = stdx::bind(
- &ReplicationExecutor::_doOperation,
- this,
- stdx::placeholders::_1,
- stdx::placeholders::_2,
- handle.getValue(),
- &_dbWorkInProgressQueue,
- nullptr);
- auto task = [doOp](OperationContext* txn, const Status& status) {
- makeNoExcept(stdx::bind(doOp, txn, status))();
- return TaskRunner::NextAction::kDisposeOperationContext;
- };
- if (mode == MODE_NONE && nss.ns().empty()) {
- _dblockTaskRunner.schedule(task);
- }
- else {
- _dblockTaskRunner.schedule(DatabaseTask::makeCollectionLockTask(task, nss, mode));
- }
- }
- return handle;
- }
-
- void ReplicationExecutor::_doOperation(OperationContext* txn,
- const Status& taskRunnerStatus,
- const CallbackHandle& cbHandle,
- WorkQueue* workQueue,
- stdx::mutex* terribleExLockSyncMutex) {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- if (_inShutdown)
- return;
- Callback* callback = _getCallbackFromHandle(cbHandle);
- const WorkQueue::iterator iter = callback->_iter;
- iter->callback = CallbackHandle();
- _freeQueue.splice(_freeQueue.begin(), *workQueue, iter);
- lk.unlock();
- {
- std::unique_ptr<stdx::lock_guard<stdx::mutex> > terribleLock(
- terribleExLockSyncMutex ?
- new stdx::lock_guard<stdx::mutex>(*terribleExLockSyncMutex) :
- nullptr);
- // Only possible task runner error status is CallbackCanceled.
- callback->_callbackFn(CallbackArgs(this,
- cbHandle,
- (callback->_isCanceled || !taskRunnerStatus.isOK() ?
- Status(ErrorCodes::CallbackCanceled,
- "Callback canceled") :
- Status::OK()),
- txn));
+ &_dbWorkInProgressQueue,
+ nullptr);
+ auto task = [doOp](OperationContext* txn, const Status& status) {
+ makeNoExcept(stdx::bind(doOp, txn, status))();
+ return TaskRunner::NextAction::kDisposeOperationContext;
+ };
+ if (mode == MODE_NONE && nss.ns().empty()) {
+ _dblockTaskRunner.schedule(task);
+ } else {
+ _dblockTaskRunner.schedule(DatabaseTask::makeCollectionLockTask(task, nss, mode));
}
- lk.lock();
- signalEvent_inlock(callback->_finishedEvent);
}
-
- StatusWith<ReplicationExecutor::CallbackHandle>
- ReplicationExecutor::scheduleWorkWithGlobalExclusiveLock(
- const CallbackFn& work) {
-
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- StatusWith<CallbackHandle> handle = enqueueWork_inlock(&_exclusiveLockInProgressQueue,
- work);
- if (handle.isOK()) {
- auto doOp = stdx::bind(
- &ReplicationExecutor::_doOperation,
- this,
- stdx::placeholders::_1,
- stdx::placeholders::_2,
- handle.getValue(),
- &_exclusiveLockInProgressQueue,
- &_terribleExLockSyncMutex);
- _dblockExclusiveLockTaskRunner.schedule(
- DatabaseTask::makeGlobalExclusiveLockTask(
- [doOp](OperationContext* txn, const Status& status) {
+ return handle;
+}
+
+void ReplicationExecutor::_doOperation(OperationContext* txn,
+ const Status& taskRunnerStatus,
+ const CallbackHandle& cbHandle,
+ WorkQueue* workQueue,
+ stdx::mutex* terribleExLockSyncMutex) {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ if (_inShutdown)
+ return;
+ Callback* callback = _getCallbackFromHandle(cbHandle);
+ const WorkQueue::iterator iter = callback->_iter;
+ iter->callback = CallbackHandle();
+ _freeQueue.splice(_freeQueue.begin(), *workQueue, iter);
+ lk.unlock();
+ {
+ std::unique_ptr<stdx::lock_guard<stdx::mutex>> terribleLock(
+ terribleExLockSyncMutex ? new stdx::lock_guard<stdx::mutex>(*terribleExLockSyncMutex)
+ : nullptr);
+ // Only possible task runner error status is CallbackCanceled.
+ callback->_callbackFn(
+ CallbackArgs(this,
+ cbHandle,
+ (callback->_isCanceled || !taskRunnerStatus.isOK()
+ ? Status(ErrorCodes::CallbackCanceled, "Callback canceled")
+ : Status::OK()),
+ txn));
+ }
+ lk.lock();
+ signalEvent_inlock(callback->_finishedEvent);
+}
+
+StatusWith<ReplicationExecutor::CallbackHandle>
+ReplicationExecutor::scheduleWorkWithGlobalExclusiveLock(const CallbackFn& work) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ StatusWith<CallbackHandle> handle = enqueueWork_inlock(&_exclusiveLockInProgressQueue, work);
+ if (handle.isOK()) {
+ auto doOp = stdx::bind(&ReplicationExecutor::_doOperation,
+ this,
+ stdx::placeholders::_1,
+ stdx::placeholders::_2,
+ handle.getValue(),
+ &_exclusiveLockInProgressQueue,
+ &_terribleExLockSyncMutex);
+ _dblockExclusiveLockTaskRunner.schedule(DatabaseTask::makeGlobalExclusiveLockTask(
+ [doOp](OperationContext* txn, const Status& status) {
makeNoExcept(stdx::bind(doOp, txn, status))();
return TaskRunner::NextAction::kDisposeOperationContext;
}));
- }
- return handle;
- }
-
- std::pair<ReplicationExecutor::WorkItem, ReplicationExecutor::CallbackHandle>
- ReplicationExecutor::getWork() {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- while (true) {
- const Date_t now = _networkInterface->now();
- Date_t nextWakeupDate = scheduleReadySleepers_inlock(now);
- if (!_readyQueue.empty()) {
- break;
- }
- else if (_inShutdown) {
- return std::make_pair(WorkItem(), CallbackHandle());
- }
- lk.unlock();
- if (nextWakeupDate == Date_t::max()) {
- _networkInterface->waitForWork();
- }
- else {
- _networkInterface->waitForWorkUntil(nextWakeupDate);
- }
- lk.lock();
- }
- const WorkItem work = *_readyQueue.begin();
- const CallbackHandle cbHandle = work.callback;
- _readyQueue.begin()->callback = CallbackHandle();
- _freeQueue.splice(_freeQueue.begin(), _readyQueue, _readyQueue.begin());
- return std::make_pair(work, cbHandle);
- }
-
- int64_t ReplicationExecutor::nextRandomInt64(int64_t limit) {
- return _random.nextInt64(limit);
- }
-
- Date_t ReplicationExecutor::scheduleReadySleepers_inlock(const Date_t now) {
- WorkQueue::iterator iter = _sleepersQueue.begin();
- while ((iter != _sleepersQueue.end()) && (iter->readyDate <= now)) {
- ++iter;
- }
- _readyQueue.splice(_readyQueue.end(), _sleepersQueue, _sleepersQueue.begin(), iter);
- if (iter == _sleepersQueue.end()) {
- // indicate no sleeper to wait for
- return Date_t::max();
- }
- return iter->readyDate;
- }
-
- StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::enqueueWork_inlock(
- WorkQueue* queue, const CallbackFn& callbackFn) {
-
- invariant(callbackFn);
- StatusWith<EventHandle> event = makeEvent_inlock();
- if (!event.isOK())
- return StatusWith<CallbackHandle>(event.getStatus());
-
- if (_freeQueue.empty())
- _freeQueue.push_front(WorkItem());
- const WorkQueue::iterator iter = _freeQueue.begin();
- WorkItem& work = *iter;
-
- invariant(!work.callback.isValid());
- setCallbackForHandle(&work.callback, std::shared_ptr<executor::TaskExecutor::CallbackState>(
- new Callback(this, callbackFn, iter, event.getValue())));
-
- work.generation++;
- work.finishedEvent = event.getValue();
- work.readyDate = Date_t();
- queue->splice(queue->end(), _freeQueue, iter);
- return StatusWith<CallbackHandle>(work.callback);
}
-
- ReplicationExecutor::WorkItem::WorkItem() : generation(0U),
- isNetworkOperation(false) {}
-
- ReplicationExecutor::Event::Event(ReplicationExecutor* executor,
- const EventList::iterator& iter) :
- executor::TaskExecutor::EventState(), _executor(executor), _isSignaled(false), _iter(iter) {}
-
- ReplicationExecutor::Event::~Event() {}
-
- void ReplicationExecutor::Event::signal() {
- // Must go through executor to signal so that this can be removed from the _unsignaledEvents
- // EventList.
- _executor->signalEvent(*_iter);
- }
-
- void ReplicationExecutor::Event::_signal_inlock() {
- invariant(!_isSignaled);
- _isSignaled = true;
-
- if (!_waiters.empty()) {
- _executor->_readyQueue.splice(_executor->_readyQueue.end(), _waiters);
- _executor->_networkInterface->signalWorkAvailable();
+ return handle;
+}
+
+std::pair<ReplicationExecutor::WorkItem, ReplicationExecutor::CallbackHandle>
+ReplicationExecutor::getWork() {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ while (true) {
+ const Date_t now = _networkInterface->now();
+ Date_t nextWakeupDate = scheduleReadySleepers_inlock(now);
+ if (!_readyQueue.empty()) {
+ break;
+ } else if (_inShutdown) {
+ return std::make_pair(WorkItem(), CallbackHandle());
}
-
- _isSignaledCondition.notify_all();
- }
-
- void ReplicationExecutor::Event::waitUntilSignaled() {
- stdx::unique_lock<stdx::mutex> lk(_executor->_mutex);
- ++_executor->_totalEventWaiters;
- while (!_isSignaled) {
- _isSignaledCondition.wait(lk);
+ lk.unlock();
+ if (nextWakeupDate == Date_t::max()) {
+ _networkInterface->waitForWork();
+ } else {
+ _networkInterface->waitForWorkUntil(nextWakeupDate);
}
- --_executor->_totalEventWaiters;
- _executor->maybeNotifyShutdownComplete_inlock();
- }
-
- bool ReplicationExecutor::Event::isSignaled() {
- stdx::lock_guard<stdx::mutex> lk(_executor->_mutex);
- return _isSignaled;
+ lk.lock();
}
-
- ReplicationExecutor::Callback::Callback(ReplicationExecutor* executor,
- const CallbackFn callbackFn,
- const WorkQueue::iterator& iter,
- const EventHandle& finishedEvent) :
- executor::TaskExecutor::CallbackState(),
- _executor(executor),
- _callbackFn(callbackFn),
- _isCanceled(false),
- _iter(iter),
- _finishedEvent(finishedEvent) {}
-
- ReplicationExecutor::Callback::~Callback() {}
-
- void ReplicationExecutor::Callback::cancel() {
- stdx::unique_lock<stdx::mutex> lk(_executor->_mutex);
- _isCanceled = true;
- if (_iter->isNetworkOperation) {
- lk.unlock();
- _executor->_networkInterface->cancelCommand(_iter->callback);
- }
+ const WorkItem work = *_readyQueue.begin();
+ const CallbackHandle cbHandle = work.callback;
+ _readyQueue.begin()->callback = CallbackHandle();
+ _freeQueue.splice(_freeQueue.begin(), _readyQueue, _readyQueue.begin());
+ return std::make_pair(work, cbHandle);
+}
+
+int64_t ReplicationExecutor::nextRandomInt64(int64_t limit) {
+ return _random.nextInt64(limit);
+}
+
+Date_t ReplicationExecutor::scheduleReadySleepers_inlock(const Date_t now) {
+ WorkQueue::iterator iter = _sleepersQueue.begin();
+ while ((iter != _sleepersQueue.end()) && (iter->readyDate <= now)) {
+ ++iter;
+ }
+ _readyQueue.splice(_readyQueue.end(), _sleepersQueue, _sleepersQueue.begin(), iter);
+ if (iter == _sleepersQueue.end()) {
+ // indicate no sleeper to wait for
+ return Date_t::max();
+ }
+ return iter->readyDate;
+}
+
+StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::enqueueWork_inlock(
+ WorkQueue* queue, const CallbackFn& callbackFn) {
+ invariant(callbackFn);
+ StatusWith<EventHandle> event = makeEvent_inlock();
+ if (!event.isOK())
+ return StatusWith<CallbackHandle>(event.getStatus());
+
+ if (_freeQueue.empty())
+ _freeQueue.push_front(WorkItem());
+ const WorkQueue::iterator iter = _freeQueue.begin();
+ WorkItem& work = *iter;
+
+ invariant(!work.callback.isValid());
+ setCallbackForHandle(&work.callback,
+ std::shared_ptr<executor::TaskExecutor::CallbackState>(
+ new Callback(this, callbackFn, iter, event.getValue())));
+
+ work.generation++;
+ work.finishedEvent = event.getValue();
+ work.readyDate = Date_t();
+ queue->splice(queue->end(), _freeQueue, iter);
+ return StatusWith<CallbackHandle>(work.callback);
+}
+
+ReplicationExecutor::WorkItem::WorkItem() : generation(0U), isNetworkOperation(false) {}
+
+ReplicationExecutor::Event::Event(ReplicationExecutor* executor, const EventList::iterator& iter)
+ : executor::TaskExecutor::EventState(), _executor(executor), _isSignaled(false), _iter(iter) {}
+
+ReplicationExecutor::Event::~Event() {}
+
+void ReplicationExecutor::Event::signal() {
+ // Must go through executor to signal so that this can be removed from the _unsignaledEvents
+ // EventList.
+ _executor->signalEvent(*_iter);
+}
+
+void ReplicationExecutor::Event::_signal_inlock() {
+ invariant(!_isSignaled);
+ _isSignaled = true;
+
+ if (!_waiters.empty()) {
+ _executor->_readyQueue.splice(_executor->_readyQueue.end(), _waiters);
+ _executor->_networkInterface->signalWorkAvailable();
+ }
+
+ _isSignaledCondition.notify_all();
+}
+
+void ReplicationExecutor::Event::waitUntilSignaled() {
+ stdx::unique_lock<stdx::mutex> lk(_executor->_mutex);
+ ++_executor->_totalEventWaiters;
+ while (!_isSignaled) {
+ _isSignaledCondition.wait(lk);
+ }
+ --_executor->_totalEventWaiters;
+ _executor->maybeNotifyShutdownComplete_inlock();
+}
+
+bool ReplicationExecutor::Event::isSignaled() {
+ stdx::lock_guard<stdx::mutex> lk(_executor->_mutex);
+ return _isSignaled;
+}
+
+ReplicationExecutor::Callback::Callback(ReplicationExecutor* executor,
+ const CallbackFn callbackFn,
+ const WorkQueue::iterator& iter,
+ const EventHandle& finishedEvent)
+ : executor::TaskExecutor::CallbackState(),
+ _executor(executor),
+ _callbackFn(callbackFn),
+ _isCanceled(false),
+ _iter(iter),
+ _finishedEvent(finishedEvent) {}
+
+ReplicationExecutor::Callback::~Callback() {}
+
+void ReplicationExecutor::Callback::cancel() {
+ stdx::unique_lock<stdx::mutex> lk(_executor->_mutex);
+ _isCanceled = true;
+ if (_iter->isNetworkOperation) {
+ lk.unlock();
+ _executor->_networkInterface->cancelCommand(_iter->callback);
}
+}
- void ReplicationExecutor::Callback::waitForCompletion() {
- _executor->waitForEvent(_finishedEvent);
- }
+void ReplicationExecutor::Callback::waitForCompletion() {
+ _executor->waitForEvent(_finishedEvent);
+}
- ReplicationExecutor::Event* ReplicationExecutor::_getEventFromHandle(
- const EventHandle& eventHandle) {
- return static_cast<Event*>(getEventFromHandle(eventHandle));
- }
+ReplicationExecutor::Event* ReplicationExecutor::_getEventFromHandle(
+ const EventHandle& eventHandle) {
+ return static_cast<Event*>(getEventFromHandle(eventHandle));
+}
- ReplicationExecutor::Callback* ReplicationExecutor::_getCallbackFromHandle(
- const CallbackHandle& callbackHandle) {
- return static_cast<Callback*>(getCallbackFromHandle(callbackHandle));
- }
+ReplicationExecutor::Callback* ReplicationExecutor::_getCallbackFromHandle(
+ const CallbackHandle& callbackHandle) {
+ return static_cast<Callback*>(getCallbackFromHandle(callbackHandle));
+}
namespace {
- void callNoExcept(const stdx::function<void ()>& fn) {
- try {
- fn();
- }
- catch (...) {
- std::terminate();
- }
+void callNoExcept(const stdx::function<void()>& fn) {
+ try {
+ fn();
+ } catch (...) {
+ std::terminate();
}
+}
- stdx::function<void ()> makeNoExcept(const stdx::function<void ()> &fn) {
- return stdx::bind(callNoExcept, fn);
- }
+stdx::function<void()> makeNoExcept(const stdx::function<void()>& fn) {
+ return stdx::bind(callNoExcept, fn);
+}
} // namespace
diff --git a/src/mongo/db/repl/replication_executor.h b/src/mongo/db/repl/replication_executor.h
index a6749bb26a4..0dfa97f9cf0 100644
--- a/src/mongo/db/repl/replication_executor.h
+++ b/src/mongo/db/repl/replication_executor.h
@@ -51,357 +51,349 @@
namespace mongo {
- class NamespaceString;
- class OperationContext;
+class NamespaceString;
+class OperationContext;
-namespace executor{
- class NetworkInterface;
-} // namespace executor
+namespace executor {
+class NetworkInterface;
+} // namespace executor
namespace repl {
- class StorageInterface;
+class StorageInterface;
+
+/**
+ * Implementation of the TaskExecutor interface for providing an event loop for driving state
+ * machines in replication.
+ *
+ * Usage: Instantiate an executor, schedule a work item, call run().
+ *
+ * Implementation details:
+ *
+ * The executor is composed of several WorkQueues, which are queues of WorkItems. WorkItems
+ * describe units of work -- a callback and state needed to track its lifecycle. The iterators
+ * pointing to WorkItems are spliced between the WorkQueues, rather than copying WorkItems
+ * themselves. Further, those WorkQueue::iterators are never invalidated during the life of an
+ * executor. They may be recycled to represent new work items, but when that happens, a counter
+ * on the WorkItem is incremented, to disambiguate.
+ *
+ * All work executed by the run() method of the executor is popped off the front of the
+ * _readyQueue. Remote commands blocked on the network can be found in the
+ * _networkInProgressQueue. Callbacks waiting for a timer to expire are in the _sleepersQueue.
+ * When the network returns or the timer expires, items from these two queues are transferred to
+ * the back of the _readyQueue.
+ *
+ * The _exclusiveLockInProgressQueue, which represents work items to execute while holding the
+ * GlobalWrite lock, is exceptional. WorkItems in that queue execute in unspecified order with
+ * respect to work in the _readyQueue or other WorkItems in the _exclusiveLockInProgressQueue,
+ * but they are executed in a single serial order with respect to those other WorkItems. The
+ * _terribleExLockSyncMutex is used to provide this serialization, until such time as the global
+ * lock may be passed from one thread to another.
+ */
+class ReplicationExecutor final : public executor::TaskExecutor {
+ MONGO_DISALLOW_COPYING(ReplicationExecutor);
+
+public:
+ /**
+ * Constructs a new executor.
+ *
+ * Takes ownership of the passed NetworkInterface object.
+ */
+ ReplicationExecutor(executor::NetworkInterface* netInterface,
+ StorageInterface* storageInterface,
+ int64_t pnrgSeed);
+
+ /**
+ * Destroys an executor.
+ */
+ virtual ~ReplicationExecutor();
+
+ std::string getDiagnosticString() override;
+ Date_t now() override;
+ void shutdown() override;
+ void signalEvent(const EventHandle& event) override;
+ StatusWith<EventHandle> makeEvent() override;
+ StatusWith<CallbackHandle> onEvent(const EventHandle& event, const CallbackFn& work) override;
+ void waitForEvent(const EventHandle& event) override;
+ StatusWith<CallbackHandle> scheduleWork(const CallbackFn& work) override;
+ StatusWith<CallbackHandle> scheduleWorkAt(Date_t when, const CallbackFn& work) override;
+ StatusWith<CallbackHandle> scheduleRemoteCommand(const RemoteCommandRequest& request,
+ const RemoteCommandCallbackFn& cb) override;
+ void cancel(const CallbackHandle& cbHandle) override;
+ void wait(const CallbackHandle& cbHandle) override;
+
+
+ /**
+ * Executes the run loop. May be called up to one time.
+ *
+ * Returns after the executor has been shutdown and is safe to delete.
+ */
+ void run();
/**
- * Implementation of the TaskExecutor interface for providing an event loop for driving state
- * machines in replication.
+ * Schedules DB "work" to be run by the executor..
*
- * Usage: Instantiate an executor, schedule a work item, call run().
+ * Takes no locks for caller - global, database or collection.
*
- * Implementation details:
+ * The "work" will run exclusively with other DB work items. All DB work items
+ * are run the in order they are scheduled.
*
- * The executor is composed of several WorkQueues, which are queues of WorkItems. WorkItems
- * describe units of work -- a callback and state needed to track its lifecycle. The iterators
- * pointing to WorkItems are spliced between the WorkQueues, rather than copying WorkItems
- * themselves. Further, those WorkQueue::iterators are never invalidated during the life of an
- * executor. They may be recycled to represent new work items, but when that happens, a counter
- * on the WorkItem is incremented, to disambiguate.
+ * The "work" may run concurrently with other non-DB work items,
+ * but there are no ordering guarantees provided with respect to
+ * any other work item.
*
- * All work executed by the run() method of the executor is popped off the front of the
- * _readyQueue. Remote commands blocked on the network can be found in the
- * _networkInProgressQueue. Callbacks waiting for a timer to expire are in the _sleepersQueue.
- * When the network returns or the timer expires, items from these two queues are transferred to
- * the back of the _readyQueue.
+ * Returns a handle for waiting on or canceling the callback, or
+ * ErrorCodes::ShutdownInProgress.
*
- * The _exclusiveLockInProgressQueue, which represents work items to execute while holding the
- * GlobalWrite lock, is exceptional. WorkItems in that queue execute in unspecified order with
- * respect to work in the _readyQueue or other WorkItems in the _exclusiveLockInProgressQueue,
- * but they are executed in a single serial order with respect to those other WorkItems. The
- * _terribleExLockSyncMutex is used to provide this serialization, until such time as the global
- * lock may be passed from one thread to another.
+ * May be called by client threads or callbacks running in the executor.
*/
- class ReplicationExecutor final : public executor::TaskExecutor {
- MONGO_DISALLOW_COPYING(ReplicationExecutor);
- public:
-
- /**
- * Constructs a new executor.
- *
- * Takes ownership of the passed NetworkInterface object.
- */
- ReplicationExecutor(executor::NetworkInterface* netInterface,
- StorageInterface* storageInterface,
- int64_t pnrgSeed);
-
- /**
- * Destroys an executor.
- */
- virtual ~ReplicationExecutor();
-
- std::string getDiagnosticString() override;
- Date_t now() override;
- void shutdown() override;
- void signalEvent(const EventHandle& event) override;
- StatusWith<EventHandle> makeEvent() override;
- StatusWith<CallbackHandle> onEvent(const EventHandle& event,
- const CallbackFn& work) override;
- void waitForEvent(const EventHandle& event) override;
- StatusWith<CallbackHandle> scheduleWork(const CallbackFn& work) override;
- StatusWith<CallbackHandle> scheduleWorkAt(Date_t when, const CallbackFn& work) override;
- StatusWith<CallbackHandle> scheduleRemoteCommand(
- const RemoteCommandRequest& request,
- const RemoteCommandCallbackFn& cb) override;
- void cancel(const CallbackHandle& cbHandle) override;
- void wait(const CallbackHandle& cbHandle) override;
-
-
- /**
- * Executes the run loop. May be called up to one time.
- *
- * Returns after the executor has been shutdown and is safe to delete.
- */
- void run();
-
- /**
- * Schedules DB "work" to be run by the executor..
- *
- * Takes no locks for caller - global, database or collection.
- *
- * The "work" will run exclusively with other DB work items. All DB work items
- * are run the in order they are scheduled.
- *
- * The "work" may run concurrently with other non-DB work items,
- * but there are no ordering guarantees provided with respect to
- * any other work item.
- *
- * Returns a handle for waiting on or canceling the callback, or
- * ErrorCodes::ShutdownInProgress.
- *
- * May be called by client threads or callbacks running in the executor.
- */
- StatusWith<CallbackHandle> scheduleDBWork(const CallbackFn& work);
-
- /**
- * Schedules DB "work" to be run by the executor while holding the collection lock.
- *
- * Takes collection lock in specified mode (and slightly more permissive lock for the
- * database lock) but not the global exclusive lock.
- *
- * The "work" will run exclusively with other DB work items. All DB work items
- * are run the in order they are scheduled.
- *
- * The "work" may run concurrently with other non-DB work items,
- * but there are no ordering guarantees provided with respect to
- * any other work item.
- *
- * Returns a handle for waiting on or canceling the callback, or
- * ErrorCodes::ShutdownInProgress.
- *
- * May be called by client threads or callbacks running in the executor.
- */
- StatusWith<CallbackHandle> scheduleDBWork(const CallbackFn& work,
- const NamespaceString& nss,
- LockMode mode);
-
- /**
- * Schedules "work" to be run by the executor while holding the global exclusive lock.
- *
- * Takes collection lock in specified mode (and slightly more permissive lock for the
- * database lock) but not the global exclusive lock.
- *
- * The "work" will run exclusively, as though it were executed by the main
- * run loop, but there are no ordering guarantees provided with respect to
- * any other work item.
- *
- * Returns a handle for waiting on or canceling the callback, or
- * ErrorCodes::ShutdownInProgress.
- *
- * May be called by client threads or callbacks running in the executor.
- */
- StatusWith<CallbackHandle> scheduleWorkWithGlobalExclusiveLock(
- const CallbackFn& work);
-
- /**
- * Returns an int64_t generated by the prng with a max value of "limit".
- */
- int64_t nextRandomInt64(int64_t limit);
-
- private:
- class Callback;
- class Event;
- struct WorkItem;
- friend class Callback;
- friend class Event;
-
-
- /**
- * A linked list of WorkItem objects.
- *
- * WorkItems get moved among lists by splicing iterators of work lists together,
- * not by copying underlying WorkItem objects.
- */
- typedef stdx::list<WorkItem> WorkQueue;
-
- /**
- * A linked list of EventHandles.
- */
- typedef stdx::list<EventHandle> EventList;
-
- /**
- * Returns diagnostic info
- */
- std::string _getDiagnosticString_inlock() const;
-
- /**
- * Implementation of makeEvent() for use when _mutex is already held.
- */
- StatusWith<EventHandle> makeEvent_inlock();
-
- /**
- * Implementation of signalEvent() for use when _mutex is already held.
- */
- void signalEvent_inlock(const EventHandle&);
-
- /**
- * Gets a single piece of work to execute.
- *
- * If the "callback" member of the returned WorkItem is falsey, that is a signal
- * to the run loop to wait for shutdown.
- */
- std::pair<WorkItem, CallbackHandle> getWork();
-
- /**
- * Marks as runnable any sleepers whose ready date has passed as of "now".
- * Returns the date when the next sleeper will be ready, or Date_t(~0ULL) if there are no
- * remaining sleepers.
- */
- Date_t scheduleReadySleepers_inlock(Date_t now);
-
- /**
- * Enqueues "callback" into "queue".
- */
- StatusWith<CallbackHandle> enqueueWork_inlock(WorkQueue* queue, const CallbackFn& callback);
-
- /**
- * Notifies interested parties that shutdown has completed, if it has.
- */
- void maybeNotifyShutdownComplete_inlock();
-
- /**
- * Completes the shutdown process. Called by run().
- */
- void finishShutdown();
-
- void _finishRemoteCommand(
- const RemoteCommandRequest& request,
- const StatusWith<RemoteCommandResponse>& response,
- const CallbackHandle& cbHandle,
- const uint64_t expectedHandleGeneration,
- const RemoteCommandCallbackFn& cb);
-
- /**
- * Executes the callback referenced by "cbHandle", and moves the underlying
- * WorkQueue::iterator from "workQueue" into the _freeQueue.
- *
- * "txn" is a pointer to the OperationContext.
- *
- * "status" is the callback status from the task runner. Only possible values are
- * Status::OK and ErrorCodes::CallbackCanceled (when task runner is canceled).
- *
- * If "terribleExLockSyncMutex" is not null, serializes execution of "cbHandle" with the
- * execution of other callbacks.
- */
- void _doOperation(OperationContext* txn,
- const Status& taskRunnerStatus,
- const CallbackHandle& cbHandle,
- WorkQueue* workQueue,
- stdx::mutex* terribleExLockSyncMutex);
-
- /**
- * Wrapper around TaskExecutor::getCallbackFromHandle that return an Event* instead of
- * a generic EventState*.
- */
- Event* _getEventFromHandle(const EventHandle& eventHandle);
-
- /**
- * Wrapper around TaskExecutor::getCallbackFromHandle that return an Event* instead of
- * a generic EventState*.
- */
- Callback* _getCallbackFromHandle(const CallbackHandle& callbackHandle);
-
- // PRNG; seeded at class construction time.
- PseudoRandom _random;
-
- std::unique_ptr<executor::NetworkInterface> _networkInterface;
- std::unique_ptr<StorageInterface> _storageInterface;
- stdx::mutex _mutex;
- stdx::mutex _terribleExLockSyncMutex;
- stdx::condition_variable _noMoreWaitingThreads;
- WorkQueue _freeQueue;
- WorkQueue _readyQueue;
- WorkQueue _dbWorkInProgressQueue;
- WorkQueue _exclusiveLockInProgressQueue;
- WorkQueue _networkInProgressQueue;
- WorkQueue _sleepersQueue;
- EventList _unsignaledEvents;
- int64_t _totalEventWaiters;
- bool _inShutdown;
- OldThreadPool _dblockWorkers;
- TaskRunner _dblockTaskRunner;
- TaskRunner _dblockExclusiveLockTaskRunner;
- uint64_t _nextId;
- };
-
- class ReplicationExecutor::Callback : public executor::TaskExecutor::CallbackState {
- friend class ReplicationExecutor;
-
- public:
-
- Callback(ReplicationExecutor* executor,
- const CallbackFn callbackFn,
- const WorkQueue::iterator& iter,
- const EventHandle& finishedEvent);
- virtual ~Callback();
-
- void cancel() override;
- void waitForCompletion() override;
-
- private:
-
- ReplicationExecutor* _executor;
-
- // All members other than _executor are protected by the executor's _mutex.
- CallbackFn _callbackFn;
- bool _isCanceled;
- WorkQueue::iterator _iter;
- EventHandle _finishedEvent;
- };
-
- typedef ReplicationExecutor::ResponseStatus ResponseStatus;
+ StatusWith<CallbackHandle> scheduleDBWork(const CallbackFn& work);
/**
- * Description of a scheduled but not-yet-run work item.
+ * Schedules DB "work" to be run by the executor while holding the collection lock.
*
- * Once created, WorkItem objects remain in scope until the executor is destroyed.
- * However, over their lifetime, they may represent many different work items. This
- * divorces the lifetime of CallbackHandles from the lifetime of WorkItem objects, but
- * requires a unique generation identifier in CallbackHandles and WorkItem objects.
+ * Takes collection lock in specified mode (and slightly more permissive lock for the
+ * database lock) but not the global exclusive lock.
+ *
+ * The "work" will run exclusively with other DB work items. All DB work items
+ * are run the in order they are scheduled.
+ *
+ * The "work" may run concurrently with other non-DB work items,
+ * but there are no ordering guarantees provided with respect to
+ * any other work item.
+ *
+ * Returns a handle for waiting on or canceling the callback, or
+ * ErrorCodes::ShutdownInProgress.
+ *
+ * May be called by client threads or callbacks running in the executor.
+ */
+ StatusWith<CallbackHandle> scheduleDBWork(const CallbackFn& work,
+ const NamespaceString& nss,
+ LockMode mode);
+
+ /**
+ * Schedules "work" to be run by the executor while holding the global exclusive lock.
+ *
+ * Takes collection lock in specified mode (and slightly more permissive lock for the
+ * database lock) but not the global exclusive lock.
+ *
+ * The "work" will run exclusively, as though it were executed by the main
+ * run loop, but there are no ordering guarantees provided with respect to
+ * any other work item.
+ *
+ * Returns a handle for waiting on or canceling the callback, or
+ * ErrorCodes::ShutdownInProgress.
+ *
+ * May be called by client threads or callbacks running in the executor.
+ */
+ StatusWith<CallbackHandle> scheduleWorkWithGlobalExclusiveLock(const CallbackFn& work);
+
+ /**
+ * Returns an int64_t generated by the prng with a max value of "limit".
+ */
+ int64_t nextRandomInt64(int64_t limit);
+
+private:
+ class Callback;
+ class Event;
+ struct WorkItem;
+ friend class Callback;
+ friend class Event;
+
+
+ /**
+ * A linked list of WorkItem objects.
*
- * WorkItem is copyable so that it may be stored in a list. However, in practice they
- * should only be copied by getWork() and when allocating new entries into a WorkQueue (not
- * when moving entries between work lists).
+ * WorkItems get moved among lists by splicing iterators of work lists together,
+ * not by copying underlying WorkItem objects.
*/
- struct ReplicationExecutor::WorkItem {
- WorkItem();
- uint64_t generation;
- CallbackHandle callback;
- EventHandle finishedEvent;
- Date_t readyDate;
- bool isNetworkOperation;
- };
+ typedef stdx::list<WorkItem> WorkQueue;
/**
- * Description of an event.
+ * A linked list of EventHandles.
+ */
+ typedef stdx::list<EventHandle> EventList;
+
+ /**
+ * Returns diagnostic info
+ */
+ std::string _getDiagnosticString_inlock() const;
+
+ /**
+ * Implementation of makeEvent() for use when _mutex is already held.
+ */
+ StatusWith<EventHandle> makeEvent_inlock();
+
+ /**
+ * Implementation of signalEvent() for use when _mutex is already held.
+ */
+ void signalEvent_inlock(const EventHandle&);
+
+ /**
+ * Gets a single piece of work to execute.
*
- * Like WorkItem, above, but for events. On signaling, the executor removes the event from the
- * "unsignaled" EventList and schedules all work items in the _waiters list.
+ * If the "callback" member of the returned WorkItem is falsey, that is a signal
+ * to the run loop to wait for shutdown.
*/
- class ReplicationExecutor::Event : public executor::TaskExecutor::EventState {
- friend class ReplicationExecutor;
+ std::pair<WorkItem, CallbackHandle> getWork();
- public:
+ /**
+ * Marks as runnable any sleepers whose ready date has passed as of "now".
+ * Returns the date when the next sleeper will be ready, or Date_t(~0ULL) if there are no
+ * remaining sleepers.
+ */
+ Date_t scheduleReadySleepers_inlock(Date_t now);
- Event(ReplicationExecutor* executor, const EventList::iterator& iter);
- virtual ~Event();
+ /**
+ * Enqueues "callback" into "queue".
+ */
+ StatusWith<CallbackHandle> enqueueWork_inlock(WorkQueue* queue, const CallbackFn& callback);
- void signal() override;
- void waitUntilSignaled() override;
- bool isSignaled() override;
+ /**
+ * Notifies interested parties that shutdown has completed, if it has.
+ */
+ void maybeNotifyShutdownComplete_inlock();
- private:
+ /**
+ * Completes the shutdown process. Called by run().
+ */
+ void finishShutdown();
- // Note that the caller is responsible for removing any references to any EventHandles
- // pointing to this event.
- void _signal_inlock();
+ void _finishRemoteCommand(const RemoteCommandRequest& request,
+ const StatusWith<RemoteCommandResponse>& response,
+ const CallbackHandle& cbHandle,
+ const uint64_t expectedHandleGeneration,
+ const RemoteCommandCallbackFn& cb);
- ReplicationExecutor* _executor;
+ /**
+ * Executes the callback referenced by "cbHandle", and moves the underlying
+ * WorkQueue::iterator from "workQueue" into the _freeQueue.
+ *
+ * "txn" is a pointer to the OperationContext.
+ *
+ * "status" is the callback status from the task runner. Only possible values are
+ * Status::OK and ErrorCodes::CallbackCanceled (when task runner is canceled).
+ *
+ * If "terribleExLockSyncMutex" is not null, serializes execution of "cbHandle" with the
+ * execution of other callbacks.
+ */
+ void _doOperation(OperationContext* txn,
+ const Status& taskRunnerStatus,
+ const CallbackHandle& cbHandle,
+ WorkQueue* workQueue,
+ stdx::mutex* terribleExLockSyncMutex);
- // All members other than _executor are protected by the executor's _mutex.
- bool _isSignaled;
- stdx::condition_variable _isSignaledCondition;
- EventList::iterator _iter;
- WorkQueue _waiters;
- };
+ /**
+ * Wrapper around TaskExecutor::getCallbackFromHandle that return an Event* instead of
+ * a generic EventState*.
+ */
+ Event* _getEventFromHandle(const EventHandle& eventHandle);
+
+ /**
+ * Wrapper around TaskExecutor::getCallbackFromHandle that return an Event* instead of
+ * a generic EventState*.
+ */
+ Callback* _getCallbackFromHandle(const CallbackHandle& callbackHandle);
+
+ // PRNG; seeded at class construction time.
+ PseudoRandom _random;
+
+ std::unique_ptr<executor::NetworkInterface> _networkInterface;
+ std::unique_ptr<StorageInterface> _storageInterface;
+ stdx::mutex _mutex;
+ stdx::mutex _terribleExLockSyncMutex;
+ stdx::condition_variable _noMoreWaitingThreads;
+ WorkQueue _freeQueue;
+ WorkQueue _readyQueue;
+ WorkQueue _dbWorkInProgressQueue;
+ WorkQueue _exclusiveLockInProgressQueue;
+ WorkQueue _networkInProgressQueue;
+ WorkQueue _sleepersQueue;
+ EventList _unsignaledEvents;
+ int64_t _totalEventWaiters;
+ bool _inShutdown;
+ OldThreadPool _dblockWorkers;
+ TaskRunner _dblockTaskRunner;
+ TaskRunner _dblockExclusiveLockTaskRunner;
+ uint64_t _nextId;
+};
+
+class ReplicationExecutor::Callback : public executor::TaskExecutor::CallbackState {
+ friend class ReplicationExecutor;
+
+public:
+ Callback(ReplicationExecutor* executor,
+ const CallbackFn callbackFn,
+ const WorkQueue::iterator& iter,
+ const EventHandle& finishedEvent);
+ virtual ~Callback();
+
+ void cancel() override;
+ void waitForCompletion() override;
+
+private:
+ ReplicationExecutor* _executor;
+
+ // All members other than _executor are protected by the executor's _mutex.
+ CallbackFn _callbackFn;
+ bool _isCanceled;
+ WorkQueue::iterator _iter;
+ EventHandle _finishedEvent;
+};
+
+typedef ReplicationExecutor::ResponseStatus ResponseStatus;
+
+/**
+ * Description of a scheduled but not-yet-run work item.
+ *
+ * Once created, WorkItem objects remain in scope until the executor is destroyed.
+ * However, over their lifetime, they may represent many different work items. This
+ * divorces the lifetime of CallbackHandles from the lifetime of WorkItem objects, but
+ * requires a unique generation identifier in CallbackHandles and WorkItem objects.
+ *
+ * WorkItem is copyable so that it may be stored in a list. However, in practice they
+ * should only be copied by getWork() and when allocating new entries into a WorkQueue (not
+ * when moving entries between work lists).
+ */
+struct ReplicationExecutor::WorkItem {
+ WorkItem();
+ uint64_t generation;
+ CallbackHandle callback;
+ EventHandle finishedEvent;
+ Date_t readyDate;
+ bool isNetworkOperation;
+};
+
+/**
+ * Description of an event.
+ *
+ * Like WorkItem, above, but for events. On signaling, the executor removes the event from the
+ * "unsignaled" EventList and schedules all work items in the _waiters list.
+ */
+class ReplicationExecutor::Event : public executor::TaskExecutor::EventState {
+ friend class ReplicationExecutor;
+
+public:
+ Event(ReplicationExecutor* executor, const EventList::iterator& iter);
+ virtual ~Event();
+
+ void signal() override;
+ void waitUntilSignaled() override;
+ bool isSignaled() override;
+
+private:
+ // Note that the caller is responsible for removing any references to any EventHandles
+ // pointing to this event.
+ void _signal_inlock();
+
+ ReplicationExecutor* _executor;
+
+ // All members other than _executor are protected by the executor's _mutex.
+ bool _isSignaled;
+ stdx::condition_variable _isSignaledCondition;
+ EventList::iterator _iter;
+ WorkQueue _waiters;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_executor_test.cpp b/src/mongo/db/repl/replication_executor_test.cpp
index d299706f75b..c421397701c 100644
--- a/src/mongo/db/repl/replication_executor_test.cpp
+++ b/src/mongo/db/repl/replication_executor_test.cpp
@@ -48,513 +48,462 @@ namespace repl {
namespace {
- using executor::NetworkInterfaceMock;
-
- bool operator==(const RemoteCommandRequest lhs,
- const RemoteCommandRequest rhs) {
- return lhs.target == rhs.target &&
- lhs.dbname == rhs.dbname &&
- lhs.cmdObj == rhs.cmdObj;
- }
-
- bool operator!=(const RemoteCommandRequest lhs,
- const RemoteCommandRequest rhs) {
- return !(lhs == rhs);
- }
-
- void setStatus(const ReplicationExecutor::CallbackArgs& cbData, Status* target) {
- *target = cbData.status;
- }
-
- void setStatusAndShutdown(const ReplicationExecutor::CallbackArgs& cbData,
- Status* target) {
- setStatus(cbData, target);
- if (cbData.status != ErrorCodes::CallbackCanceled)
- cbData.executor->shutdown();
- }
-
- void setStatusAndTriggerEvent(const ReplicationExecutor::CallbackArgs& cbData,
- Status* outStatus,
- ReplicationExecutor::EventHandle event) {
- *outStatus = cbData.status;
- if (!cbData.status.isOK())
- return;
- cbData.executor->signalEvent(event);
- }
-
- void scheduleSetStatusAndShutdown(const ReplicationExecutor::CallbackArgs& cbData,
- Status* outStatus1,
- Status* outStatus2) {
- if (!cbData.status.isOK()) {
- *outStatus1 = cbData.status;
- return;
- }
- *outStatus1= cbData.executor->scheduleWork(stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- outStatus2)).getStatus();
- }
-
- const int64_t prngSeed = 1;
-
- TEST_F(ReplicationExecutorTest, RunOne) {
- ReplicationExecutor& executor = getExecutor();
- Status status = getDetectableErrorStatus();
- ASSERT_OK(executor.scheduleWork(stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- &status)).getStatus());
- executor.run();
- ASSERT_OK(status);
- }
-
- TEST_F(ReplicationExecutorTest, Schedule1ButShutdown) {
- ReplicationExecutor& executor = getExecutor();
- Status status = getDetectableErrorStatus();
- ASSERT_OK(executor.scheduleWork(stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- &status)).getStatus());
- executor.shutdown();
- executor.run();
- ASSERT_EQUALS(status, ErrorCodes::CallbackCanceled);
- }
-
- TEST_F(ReplicationExecutorTest, Schedule2Cancel1) {
- ReplicationExecutor& executor = getExecutor();
- Status status1 = getDetectableErrorStatus();
- Status status2 = getDetectableErrorStatus();
- ReplicationExecutor::CallbackHandle cb = unittest::assertGet(
- executor.scheduleWork(stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- &status1)));
- executor.cancel(cb);
- ASSERT_OK(executor.scheduleWork(stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- &status2)).getStatus());
- executor.run();
- ASSERT_EQUALS(status1, ErrorCodes::CallbackCanceled);
- ASSERT_OK(status2);
- }
-
- TEST_F(ReplicationExecutorTest, OneSchedulesAnother) {
- ReplicationExecutor& executor = getExecutor();
- Status status1 = getDetectableErrorStatus();
- Status status2 = getDetectableErrorStatus();
- ASSERT_OK(executor.scheduleWork(stdx::bind(scheduleSetStatusAndShutdown,
- stdx::placeholders::_1,
- &status1,
- &status2)).getStatus());
- executor.run();
- ASSERT_OK(status1);
- ASSERT_OK(status2);
+using executor::NetworkInterfaceMock;
+
+bool operator==(const RemoteCommandRequest lhs, const RemoteCommandRequest rhs) {
+ return lhs.target == rhs.target && lhs.dbname == rhs.dbname && lhs.cmdObj == rhs.cmdObj;
+}
+
+bool operator!=(const RemoteCommandRequest lhs, const RemoteCommandRequest rhs) {
+ return !(lhs == rhs);
+}
+
+void setStatus(const ReplicationExecutor::CallbackArgs& cbData, Status* target) {
+ *target = cbData.status;
+}
+
+void setStatusAndShutdown(const ReplicationExecutor::CallbackArgs& cbData, Status* target) {
+ setStatus(cbData, target);
+ if (cbData.status != ErrorCodes::CallbackCanceled)
+ cbData.executor->shutdown();
+}
+
+void setStatusAndTriggerEvent(const ReplicationExecutor::CallbackArgs& cbData,
+ Status* outStatus,
+ ReplicationExecutor::EventHandle event) {
+ *outStatus = cbData.status;
+ if (!cbData.status.isOK())
+ return;
+ cbData.executor->signalEvent(event);
+}
+
+void scheduleSetStatusAndShutdown(const ReplicationExecutor::CallbackArgs& cbData,
+ Status* outStatus1,
+ Status* outStatus2) {
+ if (!cbData.status.isOK()) {
+ *outStatus1 = cbData.status;
+ return;
}
-
- class EventChainAndWaitingTest {
- MONGO_DISALLOW_COPYING(EventChainAndWaitingTest);
- public:
- EventChainAndWaitingTest();
- void run();
- private:
- void onGo(const ReplicationExecutor::CallbackArgs& cbData);
- void onGoAfterTriggered(const ReplicationExecutor::CallbackArgs& cbData);
-
- NetworkInterfaceMock* net;
- StorageInterfaceMock* storage;
- ReplicationExecutor executor;
- stdx::thread executorThread;
- const ReplicationExecutor::EventHandle goEvent;
- const ReplicationExecutor::EventHandle event2;
- const ReplicationExecutor::EventHandle event3;
- ReplicationExecutor::EventHandle triggerEvent;
- ReplicationExecutor::CallbackFn triggered2;
- ReplicationExecutor::CallbackFn triggered3;
- Status status1;
- Status status2;
- Status status3;
- Status status4;
- Status status5;
- };
-
- TEST(ReplicationExecutorTest, EventChainAndWaiting) {
- EventChainAndWaitingTest().run();
+ *outStatus1 = cbData.executor->scheduleWork(stdx::bind(setStatusAndShutdown,
+ stdx::placeholders::_1,
+ outStatus2)).getStatus();
+}
+
+const int64_t prngSeed = 1;
+
+TEST_F(ReplicationExecutorTest, RunOne) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status = getDetectableErrorStatus();
+ ASSERT_OK(
+ executor.scheduleWork(stdx::bind(setStatusAndShutdown, stdx::placeholders::_1, &status))
+ .getStatus());
+ executor.run();
+ ASSERT_OK(status);
+}
+
+TEST_F(ReplicationExecutorTest, Schedule1ButShutdown) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status = getDetectableErrorStatus();
+ ASSERT_OK(
+ executor.scheduleWork(stdx::bind(setStatusAndShutdown, stdx::placeholders::_1, &status))
+ .getStatus());
+ executor.shutdown();
+ executor.run();
+ ASSERT_EQUALS(status, ErrorCodes::CallbackCanceled);
+}
+
+TEST_F(ReplicationExecutorTest, Schedule2Cancel1) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status1 = getDetectableErrorStatus();
+ Status status2 = getDetectableErrorStatus();
+ ReplicationExecutor::CallbackHandle cb = unittest::assertGet(
+ executor.scheduleWork(stdx::bind(setStatusAndShutdown, stdx::placeholders::_1, &status1)));
+ executor.cancel(cb);
+ ASSERT_OK(
+ executor.scheduleWork(stdx::bind(setStatusAndShutdown, stdx::placeholders::_1, &status2))
+ .getStatus());
+ executor.run();
+ ASSERT_EQUALS(status1, ErrorCodes::CallbackCanceled);
+ ASSERT_OK(status2);
+}
+
+TEST_F(ReplicationExecutorTest, OneSchedulesAnother) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status1 = getDetectableErrorStatus();
+ Status status2 = getDetectableErrorStatus();
+ ASSERT_OK(executor.scheduleWork(stdx::bind(scheduleSetStatusAndShutdown,
+ stdx::placeholders::_1,
+ &status1,
+ &status2)).getStatus());
+ executor.run();
+ ASSERT_OK(status1);
+ ASSERT_OK(status2);
+}
+
+class EventChainAndWaitingTest {
+ MONGO_DISALLOW_COPYING(EventChainAndWaitingTest);
+
+public:
+ EventChainAndWaitingTest();
+ void run();
+
+private:
+ void onGo(const ReplicationExecutor::CallbackArgs& cbData);
+ void onGoAfterTriggered(const ReplicationExecutor::CallbackArgs& cbData);
+
+ NetworkInterfaceMock* net;
+ StorageInterfaceMock* storage;
+ ReplicationExecutor executor;
+ stdx::thread executorThread;
+ const ReplicationExecutor::EventHandle goEvent;
+ const ReplicationExecutor::EventHandle event2;
+ const ReplicationExecutor::EventHandle event3;
+ ReplicationExecutor::EventHandle triggerEvent;
+ ReplicationExecutor::CallbackFn triggered2;
+ ReplicationExecutor::CallbackFn triggered3;
+ Status status1;
+ Status status2;
+ Status status3;
+ Status status4;
+ Status status5;
+};
+
+TEST(ReplicationExecutorTest, EventChainAndWaiting) {
+ EventChainAndWaitingTest().run();
+}
+
+EventChainAndWaitingTest::EventChainAndWaitingTest()
+ : net(new NetworkInterfaceMock),
+ storage(new StorageInterfaceMock),
+ executor(net, storage, prngSeed),
+ executorThread(stdx::bind(&ReplicationExecutor::run, &executor)),
+ goEvent(unittest::assertGet(executor.makeEvent())),
+ event2(unittest::assertGet(executor.makeEvent())),
+ event3(unittest::assertGet(executor.makeEvent())),
+ status1(ErrorCodes::InternalError, "Not mutated"),
+ status2(ErrorCodes::InternalError, "Not mutated"),
+ status3(ErrorCodes::InternalError, "Not mutated"),
+ status4(ErrorCodes::InternalError, "Not mutated"),
+ status5(ErrorCodes::InternalError, "Not mutated") {
+ triggered2 = stdx::bind(setStatusAndTriggerEvent, stdx::placeholders::_1, &status2, event2);
+ triggered3 = stdx::bind(setStatusAndTriggerEvent, stdx::placeholders::_1, &status3, event3);
+}
+
+void EventChainAndWaitingTest::run() {
+ executor.onEvent(goEvent,
+ stdx::bind(&EventChainAndWaitingTest::onGo, this, stdx::placeholders::_1));
+ executor.signalEvent(goEvent);
+ executor.waitForEvent(goEvent);
+ executor.waitForEvent(event2);
+ executor.waitForEvent(event3);
+
+ ReplicationExecutor::EventHandle neverSignaledEvent = unittest::assertGet(executor.makeEvent());
+ stdx::thread neverSignaledWaiter(
+ stdx::bind(&ReplicationExecutor::waitForEvent, &executor, neverSignaledEvent));
+ ReplicationExecutor::CallbackHandle shutdownCallback = unittest::assertGet(
+ executor.scheduleWork(stdx::bind(setStatusAndShutdown, stdx::placeholders::_1, &status5)));
+ executor.wait(shutdownCallback);
+ neverSignaledWaiter.join();
+ executorThread.join();
+ ASSERT_OK(status1);
+ ASSERT_OK(status2);
+ ASSERT_OK(status3);
+ ASSERT_OK(status4);
+ ASSERT_OK(status5);
+}
+
+void EventChainAndWaitingTest::onGo(const ReplicationExecutor::CallbackArgs& cbData) {
+ if (!cbData.status.isOK()) {
+ status1 = cbData.status;
+ return;
}
-
- EventChainAndWaitingTest::EventChainAndWaitingTest() :
- net(new NetworkInterfaceMock),
- storage(new StorageInterfaceMock),
- executor(net, storage, prngSeed),
- executorThread(stdx::bind(&ReplicationExecutor::run, &executor)),
- goEvent(unittest::assertGet(executor.makeEvent())),
- event2(unittest::assertGet(executor.makeEvent())),
- event3(unittest::assertGet(executor.makeEvent())),
- status1(ErrorCodes::InternalError, "Not mutated"),
- status2(ErrorCodes::InternalError, "Not mutated"),
- status3(ErrorCodes::InternalError, "Not mutated"),
- status4(ErrorCodes::InternalError, "Not mutated"),
- status5(ErrorCodes::InternalError, "Not mutated") {
-
- triggered2 = stdx::bind(setStatusAndTriggerEvent,
- stdx::placeholders::_1,
- &status2,
- event2);
- triggered3 = stdx::bind(setStatusAndTriggerEvent,
- stdx::placeholders::_1,
- &status3,
- event3);
+ executor::TaskExecutor* executor = cbData.executor;
+ StatusWith<ReplicationExecutor::EventHandle> errorOrTriggerEvent = executor->makeEvent();
+ if (!errorOrTriggerEvent.isOK()) {
+ status1 = errorOrTriggerEvent.getStatus();
+ executor->shutdown();
+ return;
}
-
- void EventChainAndWaitingTest::run() {
- executor.onEvent(goEvent,
- stdx::bind(&EventChainAndWaitingTest::onGo,
- this,
- stdx::placeholders::_1));
- executor.signalEvent(goEvent);
- executor.waitForEvent(goEvent);
- executor.waitForEvent(event2);
- executor.waitForEvent(event3);
-
- ReplicationExecutor::EventHandle neverSignaledEvent =
- unittest::assertGet(executor.makeEvent());
- stdx::thread neverSignaledWaiter(stdx::bind(&ReplicationExecutor::waitForEvent,
- &executor,
- neverSignaledEvent));
- ReplicationExecutor::CallbackHandle shutdownCallback = unittest::assertGet(
- executor.scheduleWork(stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- &status5)));
- executor.wait(shutdownCallback);
- neverSignaledWaiter.join();
- executorThread.join();
- ASSERT_OK(status1);
- ASSERT_OK(status2);
- ASSERT_OK(status3);
- ASSERT_OK(status4);
- ASSERT_OK(status5);
+ triggerEvent = errorOrTriggerEvent.getValue();
+ StatusWith<ReplicationExecutor::CallbackHandle> cbHandle =
+ executor->onEvent(triggerEvent, triggered2);
+ if (!cbHandle.isOK()) {
+ status1 = cbHandle.getStatus();
+ executor->shutdown();
+ return;
}
-
- void EventChainAndWaitingTest::onGo(const ReplicationExecutor::CallbackArgs& cbData) {
- if (!cbData.status.isOK()) {
- status1 = cbData.status;
- return;
- }
- executor::TaskExecutor* executor = cbData.executor;
- StatusWith<ReplicationExecutor::EventHandle> errorOrTriggerEvent = executor->makeEvent();
- if (!errorOrTriggerEvent.isOK()) {
- status1 = errorOrTriggerEvent.getStatus();
- executor->shutdown();
- return;
- }
- triggerEvent = errorOrTriggerEvent.getValue();
- StatusWith<ReplicationExecutor::CallbackHandle> cbHandle = executor->onEvent(
- triggerEvent, triggered2);
- if (!cbHandle.isOK()) {
- status1 = cbHandle.getStatus();
- executor->shutdown();
- return;
- }
- cbHandle = executor->onEvent(triggerEvent, triggered3);
- if (!cbHandle.isOK()) {
- status1 = cbHandle.getStatus();
- executor->shutdown();
- return;
- }
-
- cbHandle = executor->onEvent(
- goEvent,
- stdx::bind(&EventChainAndWaitingTest::onGoAfterTriggered,
- this,
- stdx::placeholders::_1));
- if (!cbHandle.isOK()) {
- status1 = cbHandle.getStatus();
- executor->shutdown();
- return;
- }
- status1 = Status::OK();
+ cbHandle = executor->onEvent(triggerEvent, triggered3);
+ if (!cbHandle.isOK()) {
+ status1 = cbHandle.getStatus();
+ executor->shutdown();
+ return;
}
- void EventChainAndWaitingTest::onGoAfterTriggered(
- const ReplicationExecutor::CallbackArgs& cbData) {
- status4 = cbData.status;
- if (!cbData.status.isOK()) {
- return;
- }
- cbData.executor->signalEvent(triggerEvent);
+ cbHandle = executor->onEvent(
+ goEvent,
+ stdx::bind(&EventChainAndWaitingTest::onGoAfterTriggered, this, stdx::placeholders::_1));
+ if (!cbHandle.isOK()) {
+ status1 = cbHandle.getStatus();
+ executor->shutdown();
+ return;
}
+ status1 = Status::OK();
+}
- TEST_F(ReplicationExecutorTest, ScheduleWorkAt) {
- NetworkInterfaceMock* net = getNet();
- ReplicationExecutor& executor = getExecutor();
- launchExecutorThread();
- Status status1 = getDetectableErrorStatus();
- Status status2 = getDetectableErrorStatus();
- Status status3 = getDetectableErrorStatus();
- const Date_t now = net->now();
- const ReplicationExecutor::CallbackHandle cb1 =
- unittest::assertGet(executor.scheduleWorkAt(now + Milliseconds(100),
- stdx::bind(setStatus,
- stdx::placeholders::_1,
- &status1)));
- unittest::assertGet(executor.scheduleWorkAt(now + Milliseconds(5000),
- stdx::bind(setStatus,
- stdx::placeholders::_1,
- &status3)));
- const ReplicationExecutor::CallbackHandle cb2 =
- unittest::assertGet(executor.scheduleWorkAt(now + Milliseconds(200),
- stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- &status2)));
- const Date_t startTime = net->now();
- net->runUntil(startTime + Milliseconds(200));
- ASSERT_EQUALS(startTime + Milliseconds(200), net->now());
- executor.wait(cb1);
- executor.wait(cb2);
- ASSERT_OK(status1);
- ASSERT_OK(status2);
- executor.shutdown();
- joinExecutorThread();
- ASSERT_EQUALS(status3, ErrorCodes::CallbackCanceled);
+void EventChainAndWaitingTest::onGoAfterTriggered(const ReplicationExecutor::CallbackArgs& cbData) {
+ status4 = cbData.status;
+ if (!cbData.status.isOK()) {
+ return;
}
-
- std::string getRequestDescription(const RemoteCommandRequest& request) {
- return mongoutils::str::stream() << "Request(" << request.target.toString() << ", " <<
- request.dbname << ", " << request.cmdObj << ')';
- }
-
- static void setStatusOnRemoteCommandCompletion(
- const ReplicationExecutor::RemoteCommandCallbackArgs& cbData,
- const RemoteCommandRequest& expectedRequest,
- Status* outStatus) {
-
- if (cbData.request != expectedRequest) {
- *outStatus = Status(
- ErrorCodes::BadValue,
- mongoutils::str::stream() << "Actual request: " <<
- getRequestDescription(cbData.request) << "; expected: " <<
- getRequestDescription(expectedRequest));
- return;
- }
- *outStatus = cbData.response.getStatus();
- }
-
- TEST_F(ReplicationExecutorTest, ScheduleRemoteCommand) {
- NetworkInterfaceMock* net = getNet();
- ReplicationExecutor& executor = getExecutor();
- launchExecutorThread();
- Status status1 = getDetectableErrorStatus();
- const RemoteCommandRequest request(
- HostAndPort("localhost", 27017),
- "mydb",
- BSON("whatsUp" << "doc"));
- ReplicationExecutor::CallbackHandle cbHandle = unittest::assertGet(
- executor.scheduleRemoteCommand(
- request,
- stdx::bind(setStatusOnRemoteCommandCompletion,
- stdx::placeholders::_1,
- request,
- &status1)));
- ASSERT(net->hasReadyRequests());
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now(),
- ResponseStatus(ErrorCodes::NoSuchKey, "I'm missing"));
- net->runReadyNetworkOperations();
- ASSERT(!net->hasReadyRequests());
- executor.wait(cbHandle);
- executor.shutdown();
- joinExecutorThread();
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, status1);
- }
-
- TEST_F(ReplicationExecutorTest, ScheduleAndCancelRemoteCommand) {
- ReplicationExecutor& executor = getExecutor();
- Status status1 = getDetectableErrorStatus();
- const RemoteCommandRequest request(
- HostAndPort("localhost", 27017),
- "mydb",
- BSON("whatsUp" << "doc"));
- ReplicationExecutor::CallbackHandle cbHandle = unittest::assertGet(
- executor.scheduleRemoteCommand(
- request,
- stdx::bind(setStatusOnRemoteCommandCompletion,
- stdx::placeholders::_1,
- request,
- &status1)));
- executor.cancel(cbHandle);
- launchExecutorThread();
- getNet()->runReadyNetworkOperations();
- executor.wait(cbHandle);
- executor.shutdown();
- joinExecutorThread();
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, status1);
- }
-
- TEST_F(ReplicationExecutorTest, ScheduleDBWorkAndExclusiveWorkConcurrently) {
- unittest::Barrier barrier(2U);
- NamespaceString nss("mydb", "mycoll");
- ReplicationExecutor& executor = getExecutor();
- Status status1 = getDetectableErrorStatus();
- OperationContext* txn = nullptr;
- using CallbackData = ReplicationExecutor::CallbackArgs;
- ASSERT_OK(executor.scheduleDBWork([&](const CallbackData& cbData) {
- status1 = cbData.status;
- txn = cbData.txn;
- barrier.countDownAndWait();
- if (cbData.status != ErrorCodes::CallbackCanceled)
- cbData.executor->shutdown();
- }).getStatus());
- ASSERT_OK(executor.scheduleWorkWithGlobalExclusiveLock([&](const CallbackData& cbData) {
- barrier.countDownAndWait();
- }).getStatus());
- executor.run();
- ASSERT_OK(status1);
- ASSERT(txn);
- }
-
- TEST_F(ReplicationExecutorTest, ScheduleDBWorkWithCollectionLock) {
- NamespaceString nss("mydb", "mycoll");
- ReplicationExecutor& executor = getExecutor();
- Status status1 = getDetectableErrorStatus();
- OperationContext* txn = nullptr;
- bool collectionIsLocked = false;
- using CallbackData = ReplicationExecutor::CallbackArgs;
- ASSERT_OK(executor.scheduleDBWork([&](const CallbackData& cbData) {
- status1 = cbData.status;
- txn = cbData.txn;
- collectionIsLocked = txn ?
- txn->lockState()->isCollectionLockedForMode(nss.ns(), MODE_X) :
- false;
- if (cbData.status != ErrorCodes::CallbackCanceled)
- cbData.executor->shutdown();
- }, nss, MODE_X).getStatus());
- executor.run();
- ASSERT_OK(status1);
- ASSERT(txn);
- ASSERT_TRUE(collectionIsLocked);
- }
-
- TEST_F(ReplicationExecutorTest, ScheduleExclusiveLockOperation) {
- ReplicationExecutor& executor = getExecutor();
- Status status1 = getDetectableErrorStatus();
- OperationContext* txn = nullptr;
- bool lockIsW = false;
- using CallbackData = ReplicationExecutor::CallbackArgs;
- ASSERT_OK(executor.scheduleWorkWithGlobalExclusiveLock([&](const CallbackData& cbData) {
- status1 = cbData.status;
- txn = cbData.txn;
- lockIsW = txn ? txn->lockState()->isW() : false;
- if (cbData.status != ErrorCodes::CallbackCanceled)
- cbData.executor->shutdown();
- }).getStatus());
- executor.run();
- ASSERT_OK(status1);
- ASSERT(txn);
- ASSERT_TRUE(lockIsW);
- }
-
- TEST_F(ReplicationExecutorTest, ShutdownBeforeRunningSecondExclusiveLockOperation) {
- ReplicationExecutor& executor = getExecutor();
- using CallbackData = ReplicationExecutor::CallbackArgs;
- Status status1 = getDetectableErrorStatus();
- ASSERT_OK(executor.scheduleWorkWithGlobalExclusiveLock([&](const CallbackData& cbData) {
- status1 = cbData.status;
- if (cbData.status != ErrorCodes::CallbackCanceled)
- cbData.executor->shutdown();
- }).getStatus());
- // Second db work item is invoked by the main executor thread because the work item is
- // moved from the exclusive lock queue to the ready work item queue when the first callback
- // cancels the executor.
- Status status2 = getDetectableErrorStatus();
- ASSERT_OK(executor.scheduleWorkWithGlobalExclusiveLock([&](const CallbackData& cbData) {
- status2 = cbData.status;
- if (cbData.status != ErrorCodes::CallbackCanceled)
- cbData.executor->shutdown();
- }).getStatus());
- executor.run();
- ASSERT_OK(status1);
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, status2.code());
- }
-
- TEST_F(ReplicationExecutorTest, RemoteCommandWithTimeout) {
- NetworkInterfaceMock* net = getNet();
- ReplicationExecutor& executor = getExecutor();
- Status status(ErrorCodes::InternalError, "");
- launchExecutorThread();
- const RemoteCommandRequest request(
- HostAndPort("lazy", 27017),
- "admin",
- BSON("sleep" << 1),
- Milliseconds(1));
- ReplicationExecutor::CallbackHandle cbHandle = unittest::assertGet(
- executor.scheduleRemoteCommand(
- request,
- stdx::bind(setStatusOnRemoteCommandCompletion,
- stdx::placeholders::_1,
- request,
- &status)));
- ASSERT(net->hasReadyRequests());
- const Date_t startTime = net->now();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- startTime + Milliseconds(2),
- ResponseStatus(ErrorCodes::ExceededTimeLimit, "I took too long"));
- net->runUntil(startTime + Milliseconds(2));
- ASSERT_EQUALS(startTime + Milliseconds(2), net->now());
- executor.wait(cbHandle);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
- }
-
- TEST_F(ReplicationExecutorTest, CallbackHandleComparison) {
- ReplicationExecutor& executor = getExecutor();
- Status status(ErrorCodes::InternalError, "");
- const RemoteCommandRequest request(
- HostAndPort("lazy", 27017),
- "admin",
- BSON("cmd" << 1));
- ReplicationExecutor::CallbackHandle cbHandle1 = unittest::assertGet(
- executor.scheduleRemoteCommand(
- request,
- stdx::bind(setStatusOnRemoteCommandCompletion,
- stdx::placeholders::_1,
- request,
- &status)));
- ReplicationExecutor::CallbackHandle cbHandle2 = unittest::assertGet(
- executor.scheduleRemoteCommand(
- request,
- stdx::bind(setStatusOnRemoteCommandCompletion,
- stdx::placeholders::_1,
- request,
- &status)));
-
- // test equality
- ASSERT_TRUE(cbHandle1 == cbHandle1);
- ASSERT_TRUE(cbHandle2 == cbHandle2);
- ASSERT_FALSE(cbHandle1 != cbHandle1);
- ASSERT_FALSE(cbHandle2 != cbHandle2);
-
- // test inequality
- ASSERT_TRUE(cbHandle1 != cbHandle2);
- ASSERT_TRUE(cbHandle2 != cbHandle1);
- ASSERT_FALSE(cbHandle1 == cbHandle2);
- ASSERT_FALSE(cbHandle2 == cbHandle1);
-
- ReplicationExecutor::CallbackHandle cbHandle1Copy = cbHandle1;
- ASSERT_TRUE(cbHandle1 == cbHandle1Copy);
- ASSERT_TRUE(cbHandle1Copy == cbHandle1);
- ASSERT_FALSE(cbHandle1Copy != cbHandle1);
- ASSERT_FALSE(cbHandle1 != cbHandle1Copy);
-
- std::vector<ReplicationExecutor::CallbackHandle> cbs;
- cbs.push_back(cbHandle1);
- cbs.push_back(cbHandle2);
- ASSERT(cbHandle1 != cbHandle2);
- std::vector<ReplicationExecutor::CallbackHandle>::iterator foundHandle =
- std::find(cbs.begin(),
- cbs.end(),
- cbHandle1);
- ASSERT_TRUE(cbs.end() != foundHandle);
- ASSERT_TRUE(cbHandle1 == *foundHandle);
- launchExecutorThread();
- executor.shutdown();
- joinExecutorThread();
+ cbData.executor->signalEvent(triggerEvent);
+}
+
+TEST_F(ReplicationExecutorTest, ScheduleWorkAt) {
+ NetworkInterfaceMock* net = getNet();
+ ReplicationExecutor& executor = getExecutor();
+ launchExecutorThread();
+ Status status1 = getDetectableErrorStatus();
+ Status status2 = getDetectableErrorStatus();
+ Status status3 = getDetectableErrorStatus();
+ const Date_t now = net->now();
+ const ReplicationExecutor::CallbackHandle cb1 = unittest::assertGet(executor.scheduleWorkAt(
+ now + Milliseconds(100), stdx::bind(setStatus, stdx::placeholders::_1, &status1)));
+ unittest::assertGet(executor.scheduleWorkAt(
+ now + Milliseconds(5000), stdx::bind(setStatus, stdx::placeholders::_1, &status3)));
+ const ReplicationExecutor::CallbackHandle cb2 = unittest::assertGet(executor.scheduleWorkAt(
+ now + Milliseconds(200),
+ stdx::bind(setStatusAndShutdown, stdx::placeholders::_1, &status2)));
+ const Date_t startTime = net->now();
+ net->runUntil(startTime + Milliseconds(200));
+ ASSERT_EQUALS(startTime + Milliseconds(200), net->now());
+ executor.wait(cb1);
+ executor.wait(cb2);
+ ASSERT_OK(status1);
+ ASSERT_OK(status2);
+ executor.shutdown();
+ joinExecutorThread();
+ ASSERT_EQUALS(status3, ErrorCodes::CallbackCanceled);
+}
+
+std::string getRequestDescription(const RemoteCommandRequest& request) {
+ return mongoutils::str::stream() << "Request(" << request.target.toString() << ", "
+ << request.dbname << ", " << request.cmdObj << ')';
+}
+
+static void setStatusOnRemoteCommandCompletion(
+ const ReplicationExecutor::RemoteCommandCallbackArgs& cbData,
+ const RemoteCommandRequest& expectedRequest,
+ Status* outStatus) {
+ if (cbData.request != expectedRequest) {
+ *outStatus = Status(ErrorCodes::BadValue,
+ mongoutils::str::stream()
+ << "Actual request: " << getRequestDescription(cbData.request)
+ << "; expected: " << getRequestDescription(expectedRequest));
+ return;
}
+ *outStatus = cbData.response.getStatus();
+}
+
+TEST_F(ReplicationExecutorTest, ScheduleRemoteCommand) {
+ NetworkInterfaceMock* net = getNet();
+ ReplicationExecutor& executor = getExecutor();
+ launchExecutorThread();
+ Status status1 = getDetectableErrorStatus();
+ const RemoteCommandRequest request(HostAndPort("localhost", 27017),
+ "mydb",
+ BSON("whatsUp"
+ << "doc"));
+ ReplicationExecutor::CallbackHandle cbHandle =
+ unittest::assertGet(executor.scheduleRemoteCommand(
+ request,
+ stdx::bind(
+ setStatusOnRemoteCommandCompletion, stdx::placeholders::_1, request, &status1)));
+ ASSERT(net->hasReadyRequests());
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi, net->now(), ResponseStatus(ErrorCodes::NoSuchKey, "I'm missing"));
+ net->runReadyNetworkOperations();
+ ASSERT(!net->hasReadyRequests());
+ executor.wait(cbHandle);
+ executor.shutdown();
+ joinExecutorThread();
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, status1);
+}
+
+TEST_F(ReplicationExecutorTest, ScheduleAndCancelRemoteCommand) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status1 = getDetectableErrorStatus();
+ const RemoteCommandRequest request(HostAndPort("localhost", 27017),
+ "mydb",
+ BSON("whatsUp"
+ << "doc"));
+ ReplicationExecutor::CallbackHandle cbHandle =
+ unittest::assertGet(executor.scheduleRemoteCommand(
+ request,
+ stdx::bind(
+ setStatusOnRemoteCommandCompletion, stdx::placeholders::_1, request, &status1)));
+ executor.cancel(cbHandle);
+ launchExecutorThread();
+ getNet()->runReadyNetworkOperations();
+ executor.wait(cbHandle);
+ executor.shutdown();
+ joinExecutorThread();
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, status1);
+}
+
+TEST_F(ReplicationExecutorTest, ScheduleDBWorkAndExclusiveWorkConcurrently) {
+ unittest::Barrier barrier(2U);
+ NamespaceString nss("mydb", "mycoll");
+ ReplicationExecutor& executor = getExecutor();
+ Status status1 = getDetectableErrorStatus();
+ OperationContext* txn = nullptr;
+ using CallbackData = ReplicationExecutor::CallbackArgs;
+ ASSERT_OK(executor.scheduleDBWork([&](const CallbackData& cbData) {
+ status1 = cbData.status;
+ txn = cbData.txn;
+ barrier.countDownAndWait();
+ if (cbData.status != ErrorCodes::CallbackCanceled)
+ cbData.executor->shutdown();
+ }).getStatus());
+ ASSERT_OK(executor.scheduleWorkWithGlobalExclusiveLock([&](const CallbackData& cbData) {
+ barrier.countDownAndWait();
+ }).getStatus());
+ executor.run();
+ ASSERT_OK(status1);
+ ASSERT(txn);
+}
+
+TEST_F(ReplicationExecutorTest, ScheduleDBWorkWithCollectionLock) {
+ NamespaceString nss("mydb", "mycoll");
+ ReplicationExecutor& executor = getExecutor();
+ Status status1 = getDetectableErrorStatus();
+ OperationContext* txn = nullptr;
+ bool collectionIsLocked = false;
+ using CallbackData = ReplicationExecutor::CallbackArgs;
+ ASSERT_OK(executor.scheduleDBWork([&](const CallbackData& cbData) {
+ status1 = cbData.status;
+ txn = cbData.txn;
+ collectionIsLocked =
+ txn ? txn->lockState()->isCollectionLockedForMode(nss.ns(), MODE_X) : false;
+ if (cbData.status != ErrorCodes::CallbackCanceled)
+ cbData.executor->shutdown();
+ }, nss, MODE_X).getStatus());
+ executor.run();
+ ASSERT_OK(status1);
+ ASSERT(txn);
+ ASSERT_TRUE(collectionIsLocked);
+}
+
+TEST_F(ReplicationExecutorTest, ScheduleExclusiveLockOperation) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status1 = getDetectableErrorStatus();
+ OperationContext* txn = nullptr;
+ bool lockIsW = false;
+ using CallbackData = ReplicationExecutor::CallbackArgs;
+ ASSERT_OK(executor.scheduleWorkWithGlobalExclusiveLock([&](const CallbackData& cbData) {
+ status1 = cbData.status;
+ txn = cbData.txn;
+ lockIsW = txn ? txn->lockState()->isW() : false;
+ if (cbData.status != ErrorCodes::CallbackCanceled)
+ cbData.executor->shutdown();
+ }).getStatus());
+ executor.run();
+ ASSERT_OK(status1);
+ ASSERT(txn);
+ ASSERT_TRUE(lockIsW);
+}
+
+TEST_F(ReplicationExecutorTest, ShutdownBeforeRunningSecondExclusiveLockOperation) {
+ ReplicationExecutor& executor = getExecutor();
+ using CallbackData = ReplicationExecutor::CallbackArgs;
+ Status status1 = getDetectableErrorStatus();
+ ASSERT_OK(executor.scheduleWorkWithGlobalExclusiveLock([&](const CallbackData& cbData) {
+ status1 = cbData.status;
+ if (cbData.status != ErrorCodes::CallbackCanceled)
+ cbData.executor->shutdown();
+ }).getStatus());
+ // Second db work item is invoked by the main executor thread because the work item is
+ // moved from the exclusive lock queue to the ready work item queue when the first callback
+ // cancels the executor.
+ Status status2 = getDetectableErrorStatus();
+ ASSERT_OK(executor.scheduleWorkWithGlobalExclusiveLock([&](const CallbackData& cbData) {
+ status2 = cbData.status;
+ if (cbData.status != ErrorCodes::CallbackCanceled)
+ cbData.executor->shutdown();
+ }).getStatus());
+ executor.run();
+ ASSERT_OK(status1);
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, status2.code());
+}
+
+TEST_F(ReplicationExecutorTest, RemoteCommandWithTimeout) {
+ NetworkInterfaceMock* net = getNet();
+ ReplicationExecutor& executor = getExecutor();
+ Status status(ErrorCodes::InternalError, "");
+ launchExecutorThread();
+ const RemoteCommandRequest request(
+ HostAndPort("lazy", 27017), "admin", BSON("sleep" << 1), Milliseconds(1));
+ ReplicationExecutor::CallbackHandle cbHandle =
+ unittest::assertGet(executor.scheduleRemoteCommand(
+ request,
+ stdx::bind(
+ setStatusOnRemoteCommandCompletion, stdx::placeholders::_1, request, &status)));
+ ASSERT(net->hasReadyRequests());
+ const Date_t startTime = net->now();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ startTime + Milliseconds(2),
+ ResponseStatus(ErrorCodes::ExceededTimeLimit, "I took too long"));
+ net->runUntil(startTime + Milliseconds(2));
+ ASSERT_EQUALS(startTime + Milliseconds(2), net->now());
+ executor.wait(cbHandle);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
+}
+
+TEST_F(ReplicationExecutorTest, CallbackHandleComparison) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status(ErrorCodes::InternalError, "");
+ const RemoteCommandRequest request(HostAndPort("lazy", 27017), "admin", BSON("cmd" << 1));
+ ReplicationExecutor::CallbackHandle cbHandle1 =
+ unittest::assertGet(executor.scheduleRemoteCommand(
+ request,
+ stdx::bind(
+ setStatusOnRemoteCommandCompletion, stdx::placeholders::_1, request, &status)));
+ ReplicationExecutor::CallbackHandle cbHandle2 =
+ unittest::assertGet(executor.scheduleRemoteCommand(
+ request,
+ stdx::bind(
+ setStatusOnRemoteCommandCompletion, stdx::placeholders::_1, request, &status)));
+
+ // test equality
+ ASSERT_TRUE(cbHandle1 == cbHandle1);
+ ASSERT_TRUE(cbHandle2 == cbHandle2);
+ ASSERT_FALSE(cbHandle1 != cbHandle1);
+ ASSERT_FALSE(cbHandle2 != cbHandle2);
+
+ // test inequality
+ ASSERT_TRUE(cbHandle1 != cbHandle2);
+ ASSERT_TRUE(cbHandle2 != cbHandle1);
+ ASSERT_FALSE(cbHandle1 == cbHandle2);
+ ASSERT_FALSE(cbHandle2 == cbHandle1);
+
+ ReplicationExecutor::CallbackHandle cbHandle1Copy = cbHandle1;
+ ASSERT_TRUE(cbHandle1 == cbHandle1Copy);
+ ASSERT_TRUE(cbHandle1Copy == cbHandle1);
+ ASSERT_FALSE(cbHandle1Copy != cbHandle1);
+ ASSERT_FALSE(cbHandle1 != cbHandle1Copy);
+
+ std::vector<ReplicationExecutor::CallbackHandle> cbs;
+ cbs.push_back(cbHandle1);
+ cbs.push_back(cbHandle2);
+ ASSERT(cbHandle1 != cbHandle2);
+ std::vector<ReplicationExecutor::CallbackHandle>::iterator foundHandle =
+ std::find(cbs.begin(), cbs.end(), cbHandle1);
+ ASSERT_TRUE(cbs.end() != foundHandle);
+ ASSERT_TRUE(cbHandle1 == *foundHandle);
+ launchExecutorThread();
+ executor.shutdown();
+ joinExecutorThread();
+}
} // namespace
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_executor_test_fixture.cpp b/src/mongo/db/repl/replication_executor_test_fixture.cpp
index bcd07d0db68..6172ca01a33 100644
--- a/src/mongo/db/repl/replication_executor_test_fixture.cpp
+++ b/src/mongo/db/repl/replication_executor_test_fixture.cpp
@@ -39,47 +39,46 @@ namespace repl {
namespace {
- const int64_t prngSeed = 1;
+const int64_t prngSeed = 1;
-} // namespace
+} // namespace
- // static
- Status ReplicationExecutorTest::getDetectableErrorStatus() {
- return Status(ErrorCodes::InternalError, "Not mutated");
- }
+// static
+Status ReplicationExecutorTest::getDetectableErrorStatus() {
+ return Status(ErrorCodes::InternalError, "Not mutated");
+}
- void ReplicationExecutorTest::launchExecutorThread() {
- ASSERT(!_executorThread);
- _executorThread.reset(
- new stdx::thread(stdx::bind(&ReplicationExecutor::run, _executor.get())));
- postExecutorThreadLaunch();
- }
+void ReplicationExecutorTest::launchExecutorThread() {
+ ASSERT(!_executorThread);
+ _executorThread.reset(new stdx::thread(stdx::bind(&ReplicationExecutor::run, _executor.get())));
+ postExecutorThreadLaunch();
+}
- void ReplicationExecutorTest::postExecutorThreadLaunch() {
- _net->enterNetwork();
- }
+void ReplicationExecutorTest::postExecutorThreadLaunch() {
+ _net->enterNetwork();
+}
- void ReplicationExecutorTest::joinExecutorThread() {
- ASSERT(_executorThread);
- getNet()->exitNetwork();
- _executorThread->join();
- _executorThread.reset();
- }
+void ReplicationExecutorTest::joinExecutorThread() {
+ ASSERT(_executorThread);
+ getNet()->exitNetwork();
+ _executorThread->join();
+ _executorThread.reset();
+}
- void ReplicationExecutorTest::setUp() {
- _net = new executor::NetworkInterfaceMock;
- _storage = new StorageInterfaceMock;
- _executor.reset(new ReplicationExecutor(_net, _storage, prngSeed));
- }
+void ReplicationExecutorTest::setUp() {
+ _net = new executor::NetworkInterfaceMock;
+ _storage = new StorageInterfaceMock;
+ _executor.reset(new ReplicationExecutor(_net, _storage, prngSeed));
+}
- void ReplicationExecutorTest::tearDown() {
- if (_executorThread) {
- _executor->shutdown();
- joinExecutorThread();
- }
- _executor.reset();
- _net = nullptr;
+void ReplicationExecutorTest::tearDown() {
+ if (_executorThread) {
+ _executor->shutdown();
+ joinExecutorThread();
}
+ _executor.reset();
+ _net = nullptr;
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_executor_test_fixture.h b/src/mongo/db/repl/replication_executor_test_fixture.h
index a6fec40ebd2..e89dd99ecb6 100644
--- a/src/mongo/db/repl/replication_executor_test_fixture.h
+++ b/src/mongo/db/repl/replication_executor_test_fixture.h
@@ -34,73 +34,76 @@
namespace mongo {
namespace executor {
- class NetworkInterfaceMock;
-} // namespace executor
+class NetworkInterfaceMock;
+} // namespace executor
namespace repl {
- using std::unique_ptr;
+using std::unique_ptr;
- class ReplicationExecutor;
- class StorageInterfaceMock;
+class ReplicationExecutor;
+class StorageInterfaceMock;
+
+/**
+ * Test fixture for tests that require a ReplicationExecutor backed by
+ * a NetworkInterfaceMock.
+ */
+class ReplicationExecutorTest : public unittest::Test {
+public:
+ /**
+ * Creates an initial error status suitable for checking if
+ * component has modified the 'status' field in test fixture.
+ */
+ static Status getDetectableErrorStatus();
+
+protected:
+ executor::NetworkInterfaceMock* getNet() {
+ return _net;
+ }
+ ReplicationExecutor& getExecutor() {
+ return *_executor;
+ }
+ /**
+ * Runs ReplicationExecutor in background.
+ */
+ void launchExecutorThread();
+
+ /**
+ * Anything that needs to be done after launchExecutorThread should go in here.
+ */
+ virtual void postExecutorThreadLaunch();
/**
- * Test fixture for tests that require a ReplicationExecutor backed by
- * a NetworkInterfaceMock.
+ * Waits for background ReplicationExecutor to stop running.
+ *
+ * The executor should be shutdown prior to calling this function
+ * or the test may block indefinitely.
*/
- class ReplicationExecutorTest : public unittest::Test {
- public:
-
- /**
- * Creates an initial error status suitable for checking if
- * component has modified the 'status' field in test fixture.
- */
- static Status getDetectableErrorStatus();
-
- protected:
- executor::NetworkInterfaceMock* getNet() { return _net; }
- ReplicationExecutor& getExecutor() { return *_executor; }
- /**
- * Runs ReplicationExecutor in background.
- */
- void launchExecutorThread();
-
- /**
- * Anything that needs to be done after launchExecutorThread should go in here.
- */
- virtual void postExecutorThreadLaunch();
-
- /**
- * Waits for background ReplicationExecutor to stop running.
- *
- * The executor should be shutdown prior to calling this function
- * or the test may block indefinitely.
- */
- void joinExecutorThread();
-
- /**
- * Initializes both the NetworkInterfaceMock and ReplicationExecutor but
- * does not run the executor in the background.
- *
- * To run the executor in the background, tests should invoke launchExecutorThread() or
- * override this function() to achieve the same effect.
- */
- void setUp() override;
-
- /**
- * Destroys the replication executor.
- *
- * Shuts down running background executor.
- */
- void tearDown() override;
-
-
- private:
- executor::NetworkInterfaceMock* _net;
- StorageInterfaceMock* _storage;
- unique_ptr<ReplicationExecutor> _executor;
- unique_ptr<stdx::thread> _executorThread;
- };
+ void joinExecutorThread();
+
+ /**
+ * Initializes both the NetworkInterfaceMock and ReplicationExecutor but
+ * does not run the executor in the background.
+ *
+ * To run the executor in the background, tests should invoke launchExecutorThread() or
+ * override this function() to achieve the same effect.
+ */
+ void setUp() override;
+
+ /**
+ * Destroys the replication executor.
+ *
+ * Shuts down running background executor.
+ */
+ void tearDown() override;
+
+
+private:
+ executor::NetworkInterfaceMock* _net;
+ StorageInterfaceMock* _storage;
+ unique_ptr<ReplicationExecutor> _executor;
+ unique_ptr<stdx::thread> _executorThread;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_info.cpp b/src/mongo/db/repl/replication_info.cpp
index 343a939fc95..57b403aa434 100644
--- a/src/mongo/db/repl/replication_info.cpp
+++ b/src/mongo/db/repl/replication_info.cpp
@@ -50,189 +50,193 @@
namespace mongo {
- using std::unique_ptr;
- using std::list;
- using std::string;
- using std::stringstream;
+using std::unique_ptr;
+using std::list;
+using std::string;
+using std::stringstream;
namespace repl {
- void appendReplicationInfo(OperationContext* txn, BSONObjBuilder& result, int level) {
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (replCoord->getSettings().usingReplSets()) {
- IsMasterResponse isMasterResponse;
- replCoord->fillIsMasterForReplSet(&isMasterResponse);
- result.appendElements(isMasterResponse.toBSON());
- if (level) {
- replCoord->appendSlaveInfoData(&result);
- }
- return;
- }
-
- // TODO(dannenberg) replAllDead is bad and should be removed when master slave is removed
- if (replAllDead) {
- result.append("ismaster", 0);
- string s = string("dead: ") + replAllDead;
- result.append("info", s);
+void appendReplicationInfo(OperationContext* txn, BSONObjBuilder& result, int level) {
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (replCoord->getSettings().usingReplSets()) {
+ IsMasterResponse isMasterResponse;
+ replCoord->fillIsMasterForReplSet(&isMasterResponse);
+ result.appendElements(isMasterResponse.toBSON());
+ if (level) {
+ replCoord->appendSlaveInfoData(&result);
}
- else {
- result.appendBool("ismaster",
- getGlobalReplicationCoordinator()->isMasterForReportingPurposes());
+ return;
+ }
+
+ // TODO(dannenberg) replAllDead is bad and should be removed when master slave is removed
+ if (replAllDead) {
+ result.append("ismaster", 0);
+ string s = string("dead: ") + replAllDead;
+ result.append("info", s);
+ } else {
+ result.appendBool("ismaster",
+ getGlobalReplicationCoordinator()->isMasterForReportingPurposes());
+ }
+
+ if (level) {
+ BSONObjBuilder sources(result.subarrayStart("sources"));
+
+ int n = 0;
+ list<BSONObj> src;
+ {
+ const char* localSources = "local.sources";
+ AutoGetCollectionForRead ctx(txn, localSources);
+ unique_ptr<PlanExecutor> exec(
+ InternalPlanner::collectionScan(txn, localSources, ctx.getCollection()));
+ BSONObj obj;
+ PlanExecutor::ExecState state;
+ while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
+ src.push_back(obj);
+ }
}
-
- if (level) {
- BSONObjBuilder sources( result.subarrayStart( "sources" ) );
-
- int n = 0;
- list<BSONObj> src;
+
+ for (list<BSONObj>::const_iterator i = src.begin(); i != src.end(); i++) {
+ BSONObj s = *i;
+ BSONObjBuilder bb;
+ bb.append(s["host"]);
+ string sourcename = s["source"].valuestr();
+ if (sourcename != "main")
+ bb.append(s["source"]);
{
- const char* localSources = "local.sources";
- AutoGetCollectionForRead ctx(txn, localSources);
- unique_ptr<PlanExecutor> exec(
- InternalPlanner::collectionScan(txn, localSources, ctx.getCollection()));
- BSONObj obj;
- PlanExecutor::ExecState state;
- while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
- src.push_back(obj);
- }
+ BSONElement e = s["syncedTo"];
+ BSONObjBuilder t(bb.subobjStart("syncedTo"));
+ t.appendDate("time", e.timestampTime());
+ t.append("inc", e.timestampInc());
+ t.done();
}
-
- for( list<BSONObj>::const_iterator i = src.begin(); i != src.end(); i++ ) {
- BSONObj s = *i;
- BSONObjBuilder bb;
- bb.append( s["host"] );
- string sourcename = s["source"].valuestr();
- if ( sourcename != "main" )
- bb.append( s["source"] );
- {
- BSONElement e = s["syncedTo"];
- BSONObjBuilder t( bb.subobjStart( "syncedTo" ) );
- t.appendDate( "time" , e.timestampTime() );
- t.append( "inc" , e.timestampInc() );
- t.done();
- }
-
- if ( level > 1 ) {
- wassert(!txn->lockState()->isLocked());
- // note: there is no so-style timeout on this connection; perhaps we should have one.
- ScopedDbConnection conn(s["host"].valuestr());
-
- DBClientConnection *cliConn = dynamic_cast< DBClientConnection* >( &conn.conn() );
- if ( cliConn && replAuthenticate(cliConn) ) {
- BSONObj first = conn->findOne( (string)"local.oplog.$" + sourcename,
- Query().sort( BSON( "$natural" << 1 ) ) );
- BSONObj last = conn->findOne( (string)"local.oplog.$" + sourcename,
- Query().sort( BSON( "$natural" << -1 ) ) );
- bb.appendDate( "masterFirst" , first["ts"].timestampTime() );
- bb.appendDate( "masterLast" , last["ts"].timestampTime() );
- const auto lag =
- (last["ts"].timestampTime() - s["syncedTo"].timestampTime());
- bb.append("lagSeconds", durationCount<Milliseconds>(lag) / 1000.0);
- }
- conn.done();
+
+ if (level > 1) {
+ wassert(!txn->lockState()->isLocked());
+ // note: there is no so-style timeout on this connection; perhaps we should have one.
+ ScopedDbConnection conn(s["host"].valuestr());
+
+ DBClientConnection* cliConn = dynamic_cast<DBClientConnection*>(&conn.conn());
+ if (cliConn && replAuthenticate(cliConn)) {
+ BSONObj first = conn->findOne((string) "local.oplog.$" + sourcename,
+ Query().sort(BSON("$natural" << 1)));
+ BSONObj last = conn->findOne((string) "local.oplog.$" + sourcename,
+ Query().sort(BSON("$natural" << -1)));
+ bb.appendDate("masterFirst", first["ts"].timestampTime());
+ bb.appendDate("masterLast", last["ts"].timestampTime());
+ const auto lag = (last["ts"].timestampTime() - s["syncedTo"].timestampTime());
+ bb.append("lagSeconds", durationCount<Milliseconds>(lag) / 1000.0);
}
-
- sources.append( BSONObjBuilder::numStr( n++ ) , bb.obj() );
+ conn.done();
}
-
- sources.done();
- replCoord->appendSlaveInfoData(&result);
+ sources.append(BSONObjBuilder::numStr(n++), bb.obj());
}
+
+ sources.done();
+
+ replCoord->appendSlaveInfoData(&result);
}
-
- class ReplicationInfoServerStatus : public ServerStatusSection {
- public:
- ReplicationInfoServerStatus() : ServerStatusSection( "repl" ){}
- bool includeByDefault() const { return true; }
-
- BSONObj generateSection(OperationContext* txn,
- const BSONElement& configElement) const {
-
- if (!getGlobalReplicationCoordinator()->isReplEnabled()) {
- return BSONObj();
- }
-
- int level = configElement.numberInt();
-
- BSONObjBuilder result;
- appendReplicationInfo(txn, result, level);
- getGlobalReplicationCoordinator()->processReplSetGetRBID(&result);
-
- return result.obj();
+}
+
+class ReplicationInfoServerStatus : public ServerStatusSection {
+public:
+ ReplicationInfoServerStatus() : ServerStatusSection("repl") {}
+ bool includeByDefault() const {
+ return true;
+ }
+
+ BSONObj generateSection(OperationContext* txn, const BSONElement& configElement) const {
+ if (!getGlobalReplicationCoordinator()->isReplEnabled()) {
+ return BSONObj();
}
- } replicationInfoServerStatus;
+ int level = configElement.numberInt();
- class OplogInfoServerStatus : public ServerStatusSection {
- public:
- OplogInfoServerStatus() : ServerStatusSection( "oplog" ){}
- bool includeByDefault() const { return false; }
+ BSONObjBuilder result;
+ appendReplicationInfo(txn, result, level);
+ getGlobalReplicationCoordinator()->processReplSetGetRBID(&result);
- BSONObj generateSection(OperationContext* txn,
- const BSONElement& configElement) const {
+ return result.obj();
+ }
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (!replCoord->isReplEnabled()) {
- return BSONObj();
- }
+} replicationInfoServerStatus;
- BSONObjBuilder result;
- // TODO(siyuan) Output term of OpTime
- result.append("latestOptime", replCoord->getMyLastOptime().getTimestamp());
-
- const std::string& oplogNS =
- replCoord->getReplicationMode() == ReplicationCoordinator::modeReplSet ?
- rsOplogName : masterSlaveOplogName;
- BSONObj o;
- uassert(17347,
- "Problem reading earliest entry from oplog",
- Helpers::getSingleton(txn, oplogNS.c_str(), o));
- result.append("earliestOptime", o["ts"].timestamp());
- return result.obj();
- }
- } oplogInfoServerStatus;
+class OplogInfoServerStatus : public ServerStatusSection {
+public:
+ OplogInfoServerStatus() : ServerStatusSection("oplog") {}
+ bool includeByDefault() const {
+ return false;
+ }
- class CmdIsMaster : public Command {
- public:
- virtual bool requiresAuth() { return false; }
- virtual bool slaveOk() const {
- return true;
- }
- virtual void help( stringstream &help ) const {
- help << "Check if this server is primary for a replica pair/set; also if it is --master or --slave in simple master/slave setups.\n";
- help << "{ isMaster : 1 }";
- }
- virtual bool isWriteCommandForConfigServer() const { return false; }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {} // No auth required
- CmdIsMaster() : Command("isMaster", true, "ismaster") { }
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- /* currently request to arbiter is (somewhat arbitrarily) an ismaster request that is not
- authenticated.
- */
- if ( cmdObj["forShell"].trueValue() )
- LastError::get(txn->getClient()).disable();
-
- appendReplicationInfo(txn, result, 0);
-
- result.appendNumber("maxBsonObjectSize", BSONObjMaxUserSize);
- result.appendNumber("maxMessageSizeBytes", MaxMessageSizeBytes);
- result.appendNumber("maxWriteBatchSize", BatchedCommandRequest::kMaxWriteBatchSize);
- result.appendDate("localTime", jsTime());
- result.append("maxWireVersion", maxWireVersion);
- result.append("minWireVersion", minWireVersion);
- return true;
+ BSONObj generateSection(OperationContext* txn, const BSONElement& configElement) const {
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (!replCoord->isReplEnabled()) {
+ return BSONObj();
}
- } cmdismaster;
- OpCounterServerStatusSection replOpCounterServerStatusSection( "opcountersRepl", &replOpCounters );
+ BSONObjBuilder result;
+ // TODO(siyuan) Output term of OpTime
+ result.append("latestOptime", replCoord->getMyLastOptime().getTimestamp());
+
+ const std::string& oplogNS =
+ replCoord->getReplicationMode() == ReplicationCoordinator::modeReplSet
+ ? rsOplogName
+ : masterSlaveOplogName;
+ BSONObj o;
+ uassert(17347,
+ "Problem reading earliest entry from oplog",
+ Helpers::getSingleton(txn, oplogNS.c_str(), o));
+ result.append("earliestOptime", o["ts"].timestamp());
+ return result.obj();
+ }
+} oplogInfoServerStatus;
+
+class CmdIsMaster : public Command {
+public:
+ virtual bool requiresAuth() {
+ return false;
+ }
+ virtual bool slaveOk() const {
+ return true;
+ }
+ virtual void help(stringstream& help) const {
+ help << "Check if this server is primary for a replica pair/set; also if it is --master or "
+ "--slave in simple master/slave setups.\n";
+ help << "{ isMaster : 1 }";
+ }
+ virtual bool isWriteCommandForConfigServer() const {
+ return false;
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {} // No auth required
+ CmdIsMaster() : Command("isMaster", true, "ismaster") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ /* currently request to arbiter is (somewhat arbitrarily) an ismaster request that is not
+ authenticated.
+ */
+ if (cmdObj["forShell"].trueValue())
+ LastError::get(txn->getClient()).disable();
+
+ appendReplicationInfo(txn, result, 0);
+
+ result.appendNumber("maxBsonObjectSize", BSONObjMaxUserSize);
+ result.appendNumber("maxMessageSizeBytes", MaxMessageSizeBytes);
+ result.appendNumber("maxWriteBatchSize", BatchedCommandRequest::kMaxWriteBatchSize);
+ result.appendDate("localTime", jsTime());
+ result.append("maxWireVersion", maxWireVersion);
+ result.append("minWireVersion", minWireVersion);
+ return true;
+ }
+} cmdismaster;
+
+OpCounterServerStatusSection replOpCounterServerStatusSection("opcountersRepl", &replOpCounters);
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replset_commands.cpp b/src/mongo/db/repl/replset_commands.cpp
index 600fbfe52a8..c26bed4775b 100644
--- a/src/mongo/db/repl/replset_commands.cpp
+++ b/src/mongo/db/repl/replset_commands.cpp
@@ -61,757 +61,740 @@
namespace mongo {
namespace repl {
- using std::string;
- using std::stringstream;
-
- // Testing only, enabled via command-line.
- class CmdReplSetTest : public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "Just for regression tests.\n";
- }
- // No auth needed because it only works when enabled via command line.
- virtual Status checkAuthForCommand(ClientBasic* client,
- const std::string& dbname,
- const BSONObj& cmdObj) {
- return Status::OK();
- }
- CmdReplSetTest() : ReplSetCommand("replSetTest") { }
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- log() << "replSetTest command received: " << cmdObj.toString();
-
- if( cmdObj.hasElement("forceInitialSyncFailure") ) {
- replSetForceInitialSyncFailure = (unsigned) cmdObj["forceInitialSyncFailure"].Number();
- return true;
- }
-
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
-
- return false;
- }
- };
- MONGO_INITIALIZER(RegisterReplSetTestCmd)(InitializerContext* context) {
- if (Command::testCommandsEnabled) {
- // Leaked intentionally: a Command registers itself when constructed.
- new CmdReplSetTest();
- }
+using std::string;
+using std::stringstream;
+
+// Testing only, enabled via command-line.
+class CmdReplSetTest : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "Just for regression tests.\n";
+ }
+ // No auth needed because it only works when enabled via command line.
+ virtual Status checkAuthForCommand(ClientBasic* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) {
return Status::OK();
}
+ CmdReplSetTest() : ReplSetCommand("replSetTest") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ log() << "replSetTest command received: " << cmdObj.toString();
+
+ if (cmdObj.hasElement("forceInitialSyncFailure")) {
+ replSetForceInitialSyncFailure = (unsigned)cmdObj["forceInitialSyncFailure"].Number();
+ return true;
+ }
- /** get rollback id. used to check if a rollback happened during some interval of time.
- as consumed, the rollback id is not in any particular order, it simply changes on each rollback.
- @see incRBID()
- */
- class CmdReplSetGetRBID : public ReplSetCommand {
- public:
- CmdReplSetGetRBID() : ReplSetCommand("replSetGetRBID") {}
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
-
- status = getGlobalReplicationCoordinator()->processReplSetGetRBID(&result);
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
return appendCommandStatus(result, status);
- }
- } cmdReplSetRBID;
-
- class CmdReplSetGetStatus : public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "Report status of a replica set from the POV of this server\n";
- help << "{ replSetGetStatus : 1 }";
- help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
- }
- virtual Status checkAuthForCommand(ClientBasic* client,
- const std::string& dbname,
- const BSONObj& cmdObj) {
- ActionSet actions;
- actions.addAction(ActionType::replSetGetStatus);
- if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
- ResourcePattern::forClusterResource(), actions)) {
- return Status(ErrorCodes::Unauthorized, "Unauthorized");
- }
- return Status::OK();
- }
- CmdReplSetGetStatus() : ReplSetCommand("replSetGetStatus", true) { }
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- if ( cmdObj["forShell"].trueValue() )
- LastError::get(txn->getClient()).disable();
-
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
- status = getGlobalReplicationCoordinator()->processReplSetGetStatus(&result);
+ return false;
+ }
+};
+MONGO_INITIALIZER(RegisterReplSetTestCmd)(InitializerContext* context) {
+ if (Command::testCommandsEnabled) {
+ // Leaked intentionally: a Command registers itself when constructed.
+ new CmdReplSetTest();
+ }
+ return Status::OK();
+}
+
+/** get rollback id. used to check if a rollback happened during some interval of time.
+ as consumed, the rollback id is not in any particular order, it simply changes on each rollback.
+ @see incRBID()
+*/
+class CmdReplSetGetRBID : public ReplSetCommand {
+public:
+ CmdReplSetGetRBID() : ReplSetCommand("replSetGetRBID") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
return appendCommandStatus(result, status);
+
+ status = getGlobalReplicationCoordinator()->processReplSetGetRBID(&result);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetRBID;
+
+class CmdReplSetGetStatus : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "Report status of a replica set from the POV of this server\n";
+ help << "{ replSetGetStatus : 1 }";
+ help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
+ }
+ virtual Status checkAuthForCommand(ClientBasic* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetGetStatus);
+ if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
+ ResourcePattern::forClusterResource(), actions)) {
+ return Status(ErrorCodes::Unauthorized, "Unauthorized");
}
- } cmdReplSetGetStatus;
-
- class CmdReplSetGetConfig : public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "Returns the current replica set configuration";
- help << "{ replSetGetConfig : 1 }";
- help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
- }
- virtual Status checkAuthForCommand(ClientBasic* client,
- const std::string& dbname,
- const BSONObj& cmdObj) {
- ActionSet actions;
- actions.addAction(ActionType::replSetGetConfig);
- if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
- ResourcePattern::forClusterResource(), actions)) {
- return Status(ErrorCodes::Unauthorized, "Unauthorized");
- }
- return Status::OK();
- }
- CmdReplSetGetConfig() : ReplSetCommand("replSetGetConfig", true) { }
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
+ return Status::OK();
+ }
+ CmdReplSetGetStatus() : ReplSetCommand("replSetGetStatus", true) {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ if (cmdObj["forShell"].trueValue())
+ LastError::get(txn->getClient()).disable();
+
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
- getGlobalReplicationCoordinator()->processReplSetGetConfig(&result);
- return true;
+ status = getGlobalReplicationCoordinator()->processReplSetGetStatus(&result);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetGetStatus;
+
+class CmdReplSetGetConfig : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "Returns the current replica set configuration";
+ help << "{ replSetGetConfig : 1 }";
+ help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
+ }
+ virtual Status checkAuthForCommand(ClientBasic* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetGetConfig);
+ if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
+ ResourcePattern::forClusterResource(), actions)) {
+ return Status(ErrorCodes::Unauthorized, "Unauthorized");
}
- } cmdReplSetGetConfig;
+ return Status::OK();
+ }
+ CmdReplSetGetConfig() : ReplSetCommand("replSetGetConfig", true) {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
+
+ getGlobalReplicationCoordinator()->processReplSetGetConfig(&result);
+ return true;
+ }
+} cmdReplSetGetConfig;
namespace {
- HostAndPort someHostAndPortForMe() {
- const char* ips = serverGlobalParams.bind_ip.c_str();
- while (*ips) {
- std::string ip;
- const char* comma = strchr(ips, ',');
- if (comma) {
- ip = std::string(ips, comma - ips);
- ips = comma + 1;
- }
- else {
- ip = std::string(ips);
- ips = "";
- }
- HostAndPort h = HostAndPort(ip, serverGlobalParams.port);
- if (!h.isLocalHost()) {
- return h;
- }
+HostAndPort someHostAndPortForMe() {
+ const char* ips = serverGlobalParams.bind_ip.c_str();
+ while (*ips) {
+ std::string ip;
+ const char* comma = strchr(ips, ',');
+ if (comma) {
+ ip = std::string(ips, comma - ips);
+ ips = comma + 1;
+ } else {
+ ip = std::string(ips);
+ ips = "";
+ }
+ HostAndPort h = HostAndPort(ip, serverGlobalParams.port);
+ if (!h.isLocalHost()) {
+ return h;
}
+ }
+
+ std::string h = getHostName();
+ verify(!h.empty());
+ verify(h != "localhost");
+ return HostAndPort(h, serverGlobalParams.port);
+}
+
+void parseReplSetSeedList(ReplicationCoordinatorExternalState* externalState,
+ const std::string& replSetString,
+ std::string* setname,
+ std::vector<HostAndPort>* seeds) {
+ const char* p = replSetString.c_str();
+ const char* slash = strchr(p, '/');
+ std::set<HostAndPort> seedSet;
+ if (slash) {
+ *setname = string(p, slash - p);
+ } else {
+ *setname = p;
+ }
- std::string h = getHostName();
- verify(!h.empty());
- verify(h != "localhost");
- return HostAndPort(h, serverGlobalParams.port);
+ if (slash == 0) {
+ return;
}
- void parseReplSetSeedList(ReplicationCoordinatorExternalState* externalState,
- const std::string& replSetString,
- std::string* setname,
- std::vector<HostAndPort>* seeds) {
- const char *p = replSetString.c_str();
- const char *slash = strchr(p, '/');
- std::set<HostAndPort> seedSet;
- if (slash) {
- *setname = string(p, slash-p);
+ p = slash + 1;
+ while (1) {
+ const char* comma = strchr(p, ',');
+ if (comma == 0) {
+ comma = strchr(p, 0);
+ }
+ if (p == comma) {
+ break;
+ }
+ HostAndPort m;
+ try {
+ m = HostAndPort(string(p, comma - p));
+ } catch (...) {
+ uassert(13114, "bad --replSet seed hostname", false);
+ }
+ uassert(13096, "bad --replSet command line config string - dups?", seedSet.count(m) == 0);
+ seedSet.insert(m);
+ // uassert(13101, "can't use localhost in replset host list", !m.isLocalHost());
+ if (externalState->isSelf(m)) {
+ LOG(1) << "ignoring seed " << m.toString() << " (=self)";
+ } else {
+ seeds->push_back(m);
+ }
+ if (*comma == 0) {
+ break;
+ }
+ p = comma + 1;
+ }
+}
+} // namespace
+
+class CmdReplSetInitiate : public ReplSetCommand {
+public:
+ CmdReplSetInitiate() : ReplSetCommand("replSetInitiate") {}
+ virtual void help(stringstream& h) const {
+ h << "Initiate/christen a replica set.";
+ h << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
+ }
+ virtual Status checkAuthForCommand(ClientBasic* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetConfigure);
+ if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
+ ResourcePattern::forClusterResource(), actions)) {
+ return Status(ErrorCodes::Unauthorized, "Unauthorized");
}
- else {
- *setname = p;
+ return Status::OK();
+ }
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ BSONObj configObj;
+ if (cmdObj["replSetInitiate"].type() == Object) {
+ configObj = cmdObj["replSetInitiate"].Obj();
+ }
+
+ std::string replSetString = getGlobalReplicationCoordinator()->getSettings().replSet;
+ if (replSetString.empty()) {
+ return appendCommandStatus(result,
+ Status(ErrorCodes::NoReplicationEnabled,
+ "This node was not started with the replSet "
+ "option"));
+ }
+
+ if (configObj.isEmpty()) {
+ string noConfigMessage =
+ "no configuration specified. "
+ "Using a default configuration for the set";
+ result.append("info2", noConfigMessage);
+ log() << "initiate : " << noConfigMessage;
+
+ ReplicationCoordinatorExternalStateImpl externalState;
+ std::string name;
+ std::vector<HostAndPort> seeds;
+ parseReplSetSeedList(&externalState, replSetString, &name, &seeds); // may throw...
+
+ BSONObjBuilder b;
+ b.append("_id", name);
+ b.append("version", 1);
+ BSONObjBuilder members;
+ HostAndPort me = someHostAndPortForMe();
+ members.append("0", BSON("_id" << 0 << "host" << me.toString()));
+ result.append("me", me.toString());
+ for (unsigned i = 0; i < seeds.size(); i++) {
+ members.append(BSONObjBuilder::numStr(i + 1),
+ BSON("_id" << i + 1 << "host" << seeds[i].toString()));
+ }
+ b.appendArray("members", members.obj());
+ configObj = b.obj();
+ log() << "created this configuration for initiation : " << configObj.toString();
+ }
+
+ if (configObj.getField("version").eoo()) {
+ // Missing version field defaults to version 1.
+ BSONObjBuilder builder;
+ builder.appendElements(configObj);
+ builder.append("version", 1);
+ configObj = builder.obj();
+ }
+
+ Status status =
+ getGlobalReplicationCoordinator()->processReplSetInitiate(txn, configObj, &result);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetInitiate;
+
+class CmdReplSetReconfig : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "Adjust configuration of a replica set\n";
+ help << "{ replSetReconfig : config_object }";
+ help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
+ }
+ virtual Status checkAuthForCommand(ClientBasic* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetConfigure);
+ if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
+ ResourcePattern::forClusterResource(), actions)) {
+ return Status(ErrorCodes::Unauthorized, "Unauthorized");
}
-
- if (slash == 0) {
- return;
+ return Status::OK();
+ }
+ CmdReplSetReconfig() : ReplSetCommand("replSetReconfig") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK()) {
+ return appendCommandStatus(result, status);
}
- p = slash + 1;
- while (1) {
- const char *comma = strchr(p, ',');
- if (comma == 0) {
- comma = strchr(p,0);
- }
- if (p == comma) {
- break;
- }
- HostAndPort m;
- try {
- m = HostAndPort( string(p, comma-p) );
- }
- catch (...) {
- uassert(13114, "bad --replSet seed hostname", false);
- }
- uassert(13096, "bad --replSet command line config string - dups?",
- seedSet.count(m) == 0);
- seedSet.insert(m);
- //uassert(13101, "can't use localhost in replset host list", !m.isLocalHost());
- if (externalState->isSelf(m)) {
- LOG(1) << "ignoring seed " << m.toString() << " (=self)";
- }
- else {
- seeds->push_back(m);
- }
- if (*comma == 0) {
- break;
- }
- p = comma + 1;
- }
- }
-} // namespace
-
- class CmdReplSetInitiate : public ReplSetCommand {
- public:
- CmdReplSetInitiate() : ReplSetCommand("replSetInitiate") { }
- virtual void help(stringstream& h) const {
- h << "Initiate/christen a replica set.";
- h << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
- }
- virtual Status checkAuthForCommand(ClientBasic* client,
- const std::string& dbname,
- const BSONObj& cmdObj) {
- ActionSet actions;
- actions.addAction(ActionType::replSetConfigure);
- if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
- ResourcePattern::forClusterResource(), actions)) {
- return Status(ErrorCodes::Unauthorized, "Unauthorized");
- }
- return Status::OK();
- }
- virtual bool run(OperationContext* txn,
- const string& ,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
-
- BSONObj configObj;
- if( cmdObj["replSetInitiate"].type() == Object ) {
- configObj = cmdObj["replSetInitiate"].Obj();
- }
+ if (cmdObj["replSetReconfig"].type() != Object) {
+ errmsg = "no configuration specified";
+ return false;
+ }
- std::string replSetString = getGlobalReplicationCoordinator()->getSettings().replSet;
- if (replSetString.empty()) {
- return appendCommandStatus(result,
- Status(ErrorCodes::NoReplicationEnabled,
- "This node was not started with the replSet "
- "option"));
- }
+ ReplicationCoordinator::ReplSetReconfigArgs parsedArgs;
+ parsedArgs.newConfigObj = cmdObj["replSetReconfig"].Obj();
+ parsedArgs.force = cmdObj.hasField("force") && cmdObj["force"].trueValue();
+ status =
+ getGlobalReplicationCoordinator()->processReplSetReconfig(txn, parsedArgs, &result);
- if (configObj.isEmpty()) {
- string noConfigMessage = "no configuration specified. "
- "Using a default configuration for the set";
- result.append("info2", noConfigMessage);
- log() << "initiate : " << noConfigMessage;
-
- ReplicationCoordinatorExternalStateImpl externalState;
- std::string name;
- std::vector<HostAndPort> seeds;
- parseReplSetSeedList(
- &externalState,
- replSetString,
- &name,
- &seeds); // may throw...
-
- BSONObjBuilder b;
- b.append("_id", name);
- b.append("version", 1);
- BSONObjBuilder members;
- HostAndPort me = someHostAndPortForMe();
- members.append("0", BSON( "_id" << 0 << "host" << me.toString() ));
- result.append("me", me.toString());
- for( unsigned i = 0; i < seeds.size(); i++ ) {
- members.append(BSONObjBuilder::numStr(i+1),
- BSON( "_id" << i+1 << "host" << seeds[i].toString()));
- }
- b.appendArray("members", members.obj());
- configObj = b.obj();
- log() << "created this configuration for initiation : " <<
- configObj.toString();
- }
+ ScopedTransaction scopedXact(txn, MODE_X);
+ Lock::GlobalWrite globalWrite(txn->lockState());
- if (configObj.getField("version").eoo()) {
- // Missing version field defaults to version 1.
- BSONObjBuilder builder;
- builder.appendElements(configObj);
- builder.append("version", 1);
- configObj = builder.obj();
- }
+ WriteUnitOfWork wuow(txn);
+ if (status.isOK() && !parsedArgs.force) {
+ getGlobalServiceContext()->getOpObserver()->onOpMessage(
+ txn,
+ BSON("msg"
+ << "Reconfig set"
+ << "version" << parsedArgs.newConfigObj["version"]));
+ }
+ wuow.commit();
- Status status = getGlobalReplicationCoordinator()->processReplSetInitiate(txn,
- configObj,
- &result);
- return appendCommandStatus(result, status);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetReconfig;
+
+class CmdReplSetFreeze : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "{ replSetFreeze : <seconds> }";
+ help << "'freeze' state of member to the extent we can do that. What this really means is "
+ "that\n";
+ help << "this node will not attempt to become primary until the time period specified "
+ "expires.\n";
+ help << "You can call again with {replSetFreeze:0} to unfreeze sooner.\n";
+ help << "A process restart unfreezes the member also.\n";
+ help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
+ }
+ virtual Status checkAuthForCommand(ClientBasic* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetStateChange);
+ if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
+ ResourcePattern::forClusterResource(), actions)) {
+ return Status(ErrorCodes::Unauthorized, "Unauthorized");
}
- } cmdReplSetInitiate;
-
- class CmdReplSetReconfig : public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "Adjust configuration of a replica set\n";
- help << "{ replSetReconfig : config_object }";
- help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
- }
- virtual Status checkAuthForCommand(ClientBasic* client,
- const std::string& dbname,
- const BSONObj& cmdObj) {
- ActionSet actions;
- actions.addAction(ActionType::replSetConfigure);
- if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
- ResourcePattern::forClusterResource(), actions)) {
- return Status(ErrorCodes::Unauthorized, "Unauthorized");
- }
- return Status::OK();
- }
- CmdReplSetReconfig() : ReplSetCommand("replSetReconfig") { }
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK()) {
- return appendCommandStatus(result, status);
- }
+ return Status::OK();
+ }
+ CmdReplSetFreeze() : ReplSetCommand("replSetFreeze") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
- if( cmdObj["replSetReconfig"].type() != Object ) {
- errmsg = "no configuration specified";
- return false;
- }
+ int secs = (int)cmdObj.firstElement().numberInt();
+ return appendCommandStatus(
+ result, getGlobalReplicationCoordinator()->processReplSetFreeze(secs, &result));
+ }
+} cmdReplSetFreeze;
+
+class CmdReplSetStepDown : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "{ replSetStepDown : <seconds> }\n";
+ help << "Step down as primary. Will not try to reelect self for the specified time period "
+ "(1 minute if no numeric secs value specified).\n";
+ help << "(If another member with same priority takes over in the meantime, it will stay "
+ "primary.)\n";
+ help << "http://dochub.mongodb.org/core/replicasetcommands";
+ }
+ virtual Status checkAuthForCommand(ClientBasic* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetStateChange);
+ if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
+ ResourcePattern::forClusterResource(), actions)) {
+ return Status(ErrorCodes::Unauthorized, "Unauthorized");
+ }
+ return Status::OK();
+ }
+ CmdReplSetStepDown() : ReplSetCommand("replSetStepDown") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
- ReplicationCoordinator::ReplSetReconfigArgs parsedArgs;
- parsedArgs.newConfigObj = cmdObj["replSetReconfig"].Obj();
- parsedArgs.force = cmdObj.hasField("force") && cmdObj["force"].trueValue();
- status = getGlobalReplicationCoordinator()->processReplSetReconfig(txn,
- parsedArgs,
- &result);
-
- ScopedTransaction scopedXact(txn, MODE_X);
- Lock::GlobalWrite globalWrite(txn->lockState());
-
- WriteUnitOfWork wuow(txn);
- if (status.isOK() && !parsedArgs.force) {
- getGlobalServiceContext()->getOpObserver()->onOpMessage(
- txn,
- BSON("msg" << "Reconfig set" <<
- "version" << parsedArgs.newConfigObj["version"]));
- }
- wuow.commit();
+ const bool force = cmdObj["force"].trueValue();
+ long long stepDownForSecs = cmdObj.firstElement().numberLong();
+ if (stepDownForSecs == 0) {
+ stepDownForSecs = 60;
+ } else if (stepDownForSecs < 0) {
+ status = Status(ErrorCodes::BadValue, "stepdown period must be a positive integer");
return appendCommandStatus(result, status);
}
- } cmdReplSetReconfig;
-
- class CmdReplSetFreeze : public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "{ replSetFreeze : <seconds> }";
- help << "'freeze' state of member to the extent we can do that. What this really means is that\n";
- help << "this node will not attempt to become primary until the time period specified expires.\n";
- help << "You can call again with {replSetFreeze:0} to unfreeze sooner.\n";
- help << "A process restart unfreezes the member also.\n";
- help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
- }
- virtual Status checkAuthForCommand(ClientBasic* client,
- const std::string& dbname,
- const BSONObj& cmdObj) {
- ActionSet actions;
- actions.addAction(ActionType::replSetStateChange);
- if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
- ResourcePattern::forClusterResource(), actions)) {
- return Status(ErrorCodes::Unauthorized, "Unauthorized");
- }
- return Status::OK();
- }
- CmdReplSetFreeze() : ReplSetCommand("replSetFreeze") { }
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
-
- int secs = (int) cmdObj.firstElement().numberInt();
- return appendCommandStatus(
- result,
- getGlobalReplicationCoordinator()->processReplSetFreeze(secs, &result));
- }
- } cmdReplSetFreeze;
-
- class CmdReplSetStepDown: public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "{ replSetStepDown : <seconds> }\n";
- help << "Step down as primary. Will not try to reelect self for the specified time period (1 minute if no numeric secs value specified).\n";
- help << "(If another member with same priority takes over in the meantime, it will stay primary.)\n";
- help << "http://dochub.mongodb.org/core/replicasetcommands";
- }
- virtual Status checkAuthForCommand(ClientBasic* client,
- const std::string& dbname,
- const BSONObj& cmdObj) {
- ActionSet actions;
- actions.addAction(ActionType::replSetStateChange);
- if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
- ResourcePattern::forClusterResource(), actions)) {
- return Status(ErrorCodes::Unauthorized, "Unauthorized");
- }
- return Status::OK();
- }
- CmdReplSetStepDown() : ReplSetCommand("replSetStepDown") { }
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
-
- const bool force = cmdObj["force"].trueValue();
- long long stepDownForSecs = cmdObj.firstElement().numberLong();
- if (stepDownForSecs == 0) {
- stepDownForSecs = 60;
- }
- else if (stepDownForSecs < 0) {
- status = Status(ErrorCodes::BadValue,
- "stepdown period must be a positive integer");
- return appendCommandStatus(result, status);
- }
-
- long long secondaryCatchUpPeriodSecs;
- status = bsonExtractIntegerField(cmdObj,
- "secondaryCatchUpPeriodSecs",
- &secondaryCatchUpPeriodSecs);
- if (status.code() == ErrorCodes::NoSuchKey) {
- // if field is absent, default values
- if (force) {
- secondaryCatchUpPeriodSecs = 0;
- }
- else {
- secondaryCatchUpPeriodSecs = 10;
- }
- }
- else if (!status.isOK()) {
- return appendCommandStatus(result, status);
- }
-
- if (secondaryCatchUpPeriodSecs < 0) {
- status = Status(ErrorCodes::BadValue,
- "secondaryCatchUpPeriodSecs period must be a positive or absent");
- return appendCommandStatus(result, status);
- }
-
- if (stepDownForSecs < secondaryCatchUpPeriodSecs) {
- status = Status(ErrorCodes::BadValue,
- "stepdown period must be longer than secondaryCatchUpPeriodSecs");
- return appendCommandStatus(result, status);
+ long long secondaryCatchUpPeriodSecs;
+ status = bsonExtractIntegerField(
+ cmdObj, "secondaryCatchUpPeriodSecs", &secondaryCatchUpPeriodSecs);
+ if (status.code() == ErrorCodes::NoSuchKey) {
+ // if field is absent, default values
+ if (force) {
+ secondaryCatchUpPeriodSecs = 0;
+ } else {
+ secondaryCatchUpPeriodSecs = 10;
}
+ } else if (!status.isOK()) {
+ return appendCommandStatus(result, status);
+ }
- log() << "Attempting to step down in response to replSetStepDown command";
+ if (secondaryCatchUpPeriodSecs < 0) {
+ status = Status(ErrorCodes::BadValue,
+ "secondaryCatchUpPeriodSecs period must be a positive or absent");
+ return appendCommandStatus(result, status);
+ }
- status = getGlobalReplicationCoordinator()->stepDown(
- txn,
- force,
- Seconds(secondaryCatchUpPeriodSecs),
- Seconds(stepDownForSecs));
+ if (stepDownForSecs < secondaryCatchUpPeriodSecs) {
+ status = Status(ErrorCodes::BadValue,
+ "stepdown period must be longer than secondaryCatchUpPeriodSecs");
return appendCommandStatus(result, status);
}
- } cmdReplSetStepDown;
-
- class CmdReplSetMaintenance: public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "{ replSetMaintenance : bool }\n";
- help << "Enable or disable maintenance mode.";
- }
- virtual Status checkAuthForCommand(ClientBasic* client,
- const std::string& dbname,
- const BSONObj& cmdObj) {
- ActionSet actions;
- actions.addAction(ActionType::replSetStateChange);
- if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
- ResourcePattern::forClusterResource(), actions)) {
- return Status(ErrorCodes::Unauthorized, "Unauthorized");
- }
- return Status::OK();
- }
- CmdReplSetMaintenance() : ReplSetCommand("replSetMaintenance") { }
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
- return appendCommandStatus(
- result,
- getGlobalReplicationCoordinator()->setMaintenanceMode(
- cmdObj["replSetMaintenance"].trueValue()));
- }
- } cmdReplSetMaintenance;
-
- class CmdReplSetSyncFrom: public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "{ replSetSyncFrom : \"host:port\" }\n";
- help << "Change who this member is syncing from.";
- }
- virtual Status checkAuthForCommand(ClientBasic* client,
- const std::string& dbname,
- const BSONObj& cmdObj) {
- ActionSet actions;
- actions.addAction(ActionType::replSetStateChange);
- if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
- ResourcePattern::forClusterResource(), actions)) {
- return Status(ErrorCodes::Unauthorized, "Unauthorized");
- }
- return Status::OK();
- }
- CmdReplSetSyncFrom() : ReplSetCommand("replSetSyncFrom") { }
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
+ log() << "Attempting to step down in response to replSetStepDown command";
- HostAndPort targetHostAndPort;
- status = targetHostAndPort.initialize(cmdObj["replSetSyncFrom"].valuestrsafe());
- if (!status.isOK())
- return appendCommandStatus(result, status);
+ status = getGlobalReplicationCoordinator()->stepDown(
+ txn, force, Seconds(secondaryCatchUpPeriodSecs), Seconds(stepDownForSecs));
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetStepDown;
- return appendCommandStatus(
- result,
- getGlobalReplicationCoordinator()->processReplSetSyncFrom(targetHostAndPort,
- &result));
- }
- } cmdReplSetSyncFrom;
-
- class CmdReplSetUpdatePosition: public ReplSetCommand {
- public:
- CmdReplSetUpdatePosition() : ReplSetCommand("replSetUpdatePosition") { }
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
+class CmdReplSetMaintenance : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "{ replSetMaintenance : bool }\n";
+ help << "Enable or disable maintenance mode.";
+ }
+ virtual Status checkAuthForCommand(ClientBasic* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetStateChange);
+ if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
+ ResourcePattern::forClusterResource(), actions)) {
+ return Status(ErrorCodes::Unauthorized, "Unauthorized");
+ }
+ return Status::OK();
+ }
+ CmdReplSetMaintenance() : ReplSetCommand("replSetMaintenance") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
- // accept and ignore handshakes sent from old (3.0-series) nodes without erroring to
- // enable mixed-version operation, since we no longer use the handshakes
- if (cmdObj.hasField("handshake")) {
- return true;
- }
-
- UpdatePositionArgs args;
- status = args.initialize(cmdObj);
- if (!status.isOK())
- return appendCommandStatus(result, status);
+ return appendCommandStatus(result,
+ getGlobalReplicationCoordinator()->setMaintenanceMode(
+ cmdObj["replSetMaintenance"].trueValue()));
+ }
+} cmdReplSetMaintenance;
- // in the case of an update from a member with an invalid replica set config,
- // we return our current config version
- long long configVersion = -1;
- status = getGlobalReplicationCoordinator()->
- processReplSetUpdatePosition(args, &configVersion);
+class CmdReplSetSyncFrom : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "{ replSetSyncFrom : \"host:port\" }\n";
+ help << "Change who this member is syncing from.";
+ }
+ virtual Status checkAuthForCommand(ClientBasic* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetStateChange);
+ if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
+ ResourcePattern::forClusterResource(), actions)) {
+ return Status(ErrorCodes::Unauthorized, "Unauthorized");
+ }
+ return Status::OK();
+ }
+ CmdReplSetSyncFrom() : ReplSetCommand("replSetSyncFrom") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
- if (status == ErrorCodes::InvalidReplicaSetConfig) {
- result.append("configVersion", configVersion);
- }
+ HostAndPort targetHostAndPort;
+ status = targetHostAndPort.initialize(cmdObj["replSetSyncFrom"].valuestrsafe());
+ if (!status.isOK())
return appendCommandStatus(result, status);
- }
- } cmdReplSetUpdatePosition;
-namespace {
- /**
- * Returns true if there is no data on this server. Useful when starting replication.
- * The "local" database does NOT count except for "rs.oplog" collection.
- * Used to set the hasData field on replset heartbeat command response.
- */
- bool replHasDatabases(OperationContext* txn) {
- std::vector<string> names;
- StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
- storageEngine->listDatabases(&names);
-
- if( names.size() >= 2 ) return true;
- if( names.size() == 1 ) {
- if( names[0] != "local" )
- return true;
-
- // we have a local database. return true if oplog isn't empty
- BSONObj o;
- if (Helpers::getSingleton(txn, repl::rsOplogName.c_str(), o)) {
- return true;
- }
- }
- return false;
+ return appendCommandStatus(
+ result,
+ getGlobalReplicationCoordinator()->processReplSetSyncFrom(targetHostAndPort, &result));
}
+} cmdReplSetSyncFrom;
+
+class CmdReplSetUpdatePosition : public ReplSetCommand {
+public:
+ CmdReplSetUpdatePosition() : ReplSetCommand("replSetUpdatePosition") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
-} // namespace
+ // accept and ignore handshakes sent from old (3.0-series) nodes without erroring to
+ // enable mixed-version operation, since we no longer use the handshakes
+ if (cmdObj.hasField("handshake")) {
+ return true;
+ }
- MONGO_FP_DECLARE(rsDelayHeartbeatResponse);
+ UpdatePositionArgs args;
+ status = args.initialize(cmdObj);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
- /* { replSetHeartbeat : <setname> } */
- class CmdReplSetHeartbeat : public ReplSetCommand {
- public:
- CmdReplSetHeartbeat() : ReplSetCommand("replSetHeartbeat") { }
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
+ // in the case of an update from a member with an invalid replica set config,
+ // we return our current config version
+ long long configVersion = -1;
+ status =
+ getGlobalReplicationCoordinator()->processReplSetUpdatePosition(args, &configVersion);
- MONGO_FAIL_POINT_BLOCK(rsDelayHeartbeatResponse, delay) {
- const BSONObj& data = delay.getData();
- sleepsecs(data["delay"].numberInt());
- }
+ if (status == ErrorCodes::InvalidReplicaSetConfig) {
+ result.append("configVersion", configVersion);
+ }
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetUpdatePosition;
- Status status = Status(ErrorCodes::InternalError, "status not set in heartbeat code");
- /* we don't call ReplSetCommand::check() here because heartbeat
- checks many things that are pre-initialization. */
- if (!getGlobalReplicationCoordinator()->getSettings().usingReplSets()) {
- status = Status(ErrorCodes::NoReplicationEnabled, "not running with --replSet");
- return appendCommandStatus(result, status);
- }
+namespace {
+/**
+ * Returns true if there is no data on this server. Useful when starting replication.
+ * The "local" database does NOT count except for "rs.oplog" collection.
+ * Used to set the hasData field on replset heartbeat command response.
+ */
+bool replHasDatabases(OperationContext* txn) {
+ std::vector<string> names;
+ StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
+ storageEngine->listDatabases(&names);
+
+ if (names.size() >= 2)
+ return true;
+ if (names.size() == 1) {
+ if (names[0] != "local")
+ return true;
- /* we want to keep heartbeat connections open when relinquishing primary.
- tag them here. */
- {
- AbstractMessagingPort *mp = txn->getClient()->port();
- if( mp )
- mp->tag |= executor::NetworkInterface::kMessagingPortKeepOpen;
- }
+ // we have a local database. return true if oplog isn't empty
+ BSONObj o;
+ if (Helpers::getSingleton(txn, repl::rsOplogName.c_str(), o)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace
+
+MONGO_FP_DECLARE(rsDelayHeartbeatResponse);
+
+/* { replSetHeartbeat : <setname> } */
+class CmdReplSetHeartbeat : public ReplSetCommand {
+public:
+ CmdReplSetHeartbeat() : ReplSetCommand("replSetHeartbeat") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ MONGO_FAIL_POINT_BLOCK(rsDelayHeartbeatResponse, delay) {
+ const BSONObj& data = delay.getData();
+ sleepsecs(data["delay"].numberInt());
+ }
+
+ Status status = Status(ErrorCodes::InternalError, "status not set in heartbeat code");
+ /* we don't call ReplSetCommand::check() here because heartbeat
+ checks many things that are pre-initialization. */
+ if (!getGlobalReplicationCoordinator()->getSettings().usingReplSets()) {
+ status = Status(ErrorCodes::NoReplicationEnabled, "not running with --replSet");
+ return appendCommandStatus(result, status);
+ }
- if (getGlobalReplicationCoordinator()->isV1ElectionProtocol()) {
- ReplSetHeartbeatArgsV1 args;
- status = args.initialize(cmdObj);
- if (status.isOK()) {
- ReplSetHeartbeatResponse response;
- status = getGlobalReplicationCoordinator()->processHeartbeatV1(args, &response);
- if (status.isOK())
- response.addToBSON(&result, true);
- return appendCommandStatus(result, status);
- }
- // else: fall through to old heartbeat protocol as it is likely that
- // a new node just joined the set
- }
+ /* we want to keep heartbeat connections open when relinquishing primary.
+ tag them here. */
+ {
+ AbstractMessagingPort* mp = txn->getClient()->port();
+ if (mp)
+ mp->tag |= executor::NetworkInterface::kMessagingPortKeepOpen;
+ }
- ReplSetHeartbeatArgs args;
+ if (getGlobalReplicationCoordinator()->isV1ElectionProtocol()) {
+ ReplSetHeartbeatArgsV1 args;
status = args.initialize(cmdObj);
- if (!status.isOK()) {
+ if (status.isOK()) {
+ ReplSetHeartbeatResponse response;
+ status = getGlobalReplicationCoordinator()->processHeartbeatV1(args, &response);
+ if (status.isOK())
+ response.addToBSON(&result, true);
return appendCommandStatus(result, status);
}
+ // else: fall through to old heartbeat protocol as it is likely that
+ // a new node just joined the set
+ }
- // ugh.
- if (args.getCheckEmpty()) {
- result.append("hasData", replHasDatabases(txn));
- }
-
- ReplSetHeartbeatResponse response;
- status = getGlobalReplicationCoordinator()->processHeartbeat(args, &response);
- if (status.isOK())
- response.addToBSON(&result, false);
+ ReplSetHeartbeatArgs args;
+ status = args.initialize(cmdObj);
+ if (!status.isOK()) {
return appendCommandStatus(result, status);
}
- } cmdReplSetHeartbeat;
-
- /** the first cmd called by a node seeking election and it's a basic sanity
- test: do any of the nodes it can reach know that it can't be the primary?
- */
- class CmdReplSetFresh : public ReplSetCommand {
- public:
- CmdReplSetFresh() : ReplSetCommand("replSetFresh") { }
-
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
- ReplicationCoordinator::ReplSetFreshArgs parsedArgs;
- parsedArgs.id = cmdObj["id"].Int();
- parsedArgs.setName = cmdObj["set"].String();
- parsedArgs.who = HostAndPort(cmdObj["who"].String());
- BSONElement cfgverElement = cmdObj["cfgver"];
- uassert(28525,
- str::stream() << "Expected cfgver argument to replSetFresh command to have "
- "numeric type, but found " << typeName(cfgverElement.type()),
- cfgverElement.isNumber());
- parsedArgs.cfgver = cfgverElement.safeNumberLong();
- parsedArgs.opTime = Timestamp(cmdObj["opTime"].Date());
-
- status = getGlobalReplicationCoordinator()->processReplSetFresh(parsedArgs, &result);
- return appendCommandStatus(result, status);
+ // ugh.
+ if (args.getCheckEmpty()) {
+ result.append("hasData", replHasDatabases(txn));
}
- } cmdReplSetFresh;
-
- class CmdReplSetElect : public ReplSetCommand {
- public:
- CmdReplSetElect() : ReplSetCommand("replSetElect") { }
- private:
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
- DEV log() << "received elect msg " << cmdObj.toString();
- else LOG(2) << "received elect msg " << cmdObj.toString();
-
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
- ReplicationCoordinator::ReplSetElectArgs parsedArgs;
- parsedArgs.set = cmdObj["set"].String();
- parsedArgs.whoid = cmdObj["whoid"].Int();
- BSONElement cfgverElement = cmdObj["cfgver"];
- uassert(28526,
- str::stream() << "Expected cfgver argument to replSetElect command to have "
- "numeric type, but found " << typeName(cfgverElement.type()),
- cfgverElement.isNumber());
- parsedArgs.cfgver = cfgverElement.safeNumberLong();
- parsedArgs.round = cmdObj["round"].OID();
-
- status = getGlobalReplicationCoordinator()->processReplSetElect(parsedArgs, &result);
+ ReplSetHeartbeatResponse response;
+ status = getGlobalReplicationCoordinator()->processHeartbeat(args, &response);
+ if (status.isOK())
+ response.addToBSON(&result, false);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetHeartbeat;
+
+/** the first cmd called by a node seeking election and it's a basic sanity
+ test: do any of the nodes it can reach know that it can't be the primary?
+ */
+class CmdReplSetFresh : public ReplSetCommand {
+public:
+ CmdReplSetFresh() : ReplSetCommand("replSetFresh") {}
+
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
return appendCommandStatus(result, status);
- }
- } cmdReplSetElect;
-} // namespace repl
-} // namespace mongo
+ ReplicationCoordinator::ReplSetFreshArgs parsedArgs;
+ parsedArgs.id = cmdObj["id"].Int();
+ parsedArgs.setName = cmdObj["set"].String();
+ parsedArgs.who = HostAndPort(cmdObj["who"].String());
+ BSONElement cfgverElement = cmdObj["cfgver"];
+ uassert(28525,
+ str::stream() << "Expected cfgver argument to replSetFresh command to have "
+ "numeric type, but found " << typeName(cfgverElement.type()),
+ cfgverElement.isNumber());
+ parsedArgs.cfgver = cfgverElement.safeNumberLong();
+ parsedArgs.opTime = Timestamp(cmdObj["opTime"].Date());
+
+ status = getGlobalReplicationCoordinator()->processReplSetFresh(parsedArgs, &result);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetFresh;
+
+class CmdReplSetElect : public ReplSetCommand {
+public:
+ CmdReplSetElect() : ReplSetCommand("replSetElect") {}
+
+private:
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ DEV log() << "received elect msg " << cmdObj.toString();
+ else LOG(2) << "received elect msg " << cmdObj.toString();
+
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
+
+ ReplicationCoordinator::ReplSetElectArgs parsedArgs;
+ parsedArgs.set = cmdObj["set"].String();
+ parsedArgs.whoid = cmdObj["whoid"].Int();
+ BSONElement cfgverElement = cmdObj["cfgver"];
+ uassert(28526,
+ str::stream() << "Expected cfgver argument to replSetElect command to have "
+ "numeric type, but found " << typeName(cfgverElement.type()),
+ cfgverElement.isNumber());
+ parsedArgs.cfgver = cfgverElement.safeNumberLong();
+ parsedArgs.round = cmdObj["round"].OID();
+
+ status = getGlobalReplicationCoordinator()->processReplSetElect(parsedArgs, &result);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetElect;
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replset_web_handler.cpp b/src/mongo/db/repl/replset_web_handler.cpp
index 12fe6ab8bd0..6ec53363396 100644
--- a/src/mongo/db/repl/replset_web_handler.cpp
+++ b/src/mongo/db/repl/replset_web_handler.cpp
@@ -41,56 +41,55 @@
namespace mongo {
namespace repl {
- using namespace html;
+using namespace html;
- class ReplSetHandler : public DbWebHandler {
- public:
- ReplSetHandler() : DbWebHandler( "_replSet" , 1 , false ) {}
+class ReplSetHandler : public DbWebHandler {
+public:
+ ReplSetHandler() : DbWebHandler("_replSet", 1, false) {}
- virtual bool handles( const std::string& url ) const {
- return str::startsWith( url , "/_replSet" );
- }
-
- virtual void handle( OperationContext* txn,
- const char *rq,
- const std::string& url,
- BSONObj params,
- std::string& responseMsg,
- int& responseCode,
- std::vector<std::string>& headers,
- const SockAddr &from ) {
- responseMsg = _replSet(txn);
- responseCode = 200;
- }
+ virtual bool handles(const std::string& url) const {
+ return str::startsWith(url, "/_replSet");
+ }
- /* /_replSet show replica set status in html format */
- std::string _replSet(OperationContext* txn) {
- std::stringstream s;
- s << start("Replica Set Status " + prettyHostName());
- s << p( a("/", "back", "Home") + " | " +
- a("/local/system.replset/?html=1", "", "View Replset Config") + " | " +
- a("/replSetGetStatus?text=1", "", "replSetGetStatus") + " | " +
- a("http://dochub.mongodb.org/core/replicasets", "", "Docs")
- );
+ virtual void handle(OperationContext* txn,
+ const char* rq,
+ const std::string& url,
+ BSONObj params,
+ std::string& responseMsg,
+ int& responseCode,
+ std::vector<std::string>& headers,
+ const SockAddr& from) {
+ responseMsg = _replSet(txn);
+ responseCode = 200;
+ }
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (replCoord->getReplicationMode() != ReplicationCoordinator::modeReplSet) {
- s << p("Not using --replSet");
- s << _end();
- return s.str();
- }
+ /* /_replSet show replica set status in html format */
+ std::string _replSet(OperationContext* txn) {
+ std::stringstream s;
+ s << start("Replica Set Status " + prettyHostName());
+ s << p(a("/", "back", "Home") + " | " +
+ a("/local/system.replset/?html=1", "", "View Replset Config") + " | " +
+ a("/replSetGetStatus?text=1", "", "replSetGetStatus") + " | " +
+ a("http://dochub.mongodb.org/core/replicasets", "", "Docs"));
- ReplSetHtmlSummary summary;
- replCoord->summarizeAsHtml(&summary);
- s << summary.toHtmlString();
-
- s << p("Recent replset log activity:");
- fillRsLog(&s);
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (replCoord->getReplicationMode() != ReplicationCoordinator::modeReplSet) {
+ s << p("Not using --replSet");
s << _end();
return s.str();
}
- } replSetHandler;
+ ReplSetHtmlSummary summary;
+ replCoord->summarizeAsHtml(&summary);
+ s << summary.toHtmlString();
+
+ s << p("Recent replset log activity:");
+ fillRsLog(&s);
+ s << _end();
+ return s.str();
+ }
+
+} replSetHandler;
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/reporter.cpp b/src/mongo/db/repl/reporter.cpp
index 90218fd017f..b05bc2dbdd9 100644
--- a/src/mongo/db/repl/reporter.cpp
+++ b/src/mongo/db/repl/reporter.cpp
@@ -38,130 +38,124 @@
namespace mongo {
namespace repl {
- ReplicationProgressManager::~ReplicationProgressManager() {}
-
- Reporter::Reporter(ReplicationExecutor* executor,
- ReplicationProgressManager* replicationProgressManager,
- const HostAndPort& target)
- : _executor(executor),
- _updatePositionSource(replicationProgressManager),
- _target(target),
- _status(Status::OK()),
- _willRunAgain(false),
- _active(false) {
-
- uassert(ErrorCodes::BadValue, "null replication executor", executor);
- uassert(ErrorCodes::BadValue,
- "null replication progress manager",
- replicationProgressManager);
- uassert(ErrorCodes::BadValue, "target name cannot be empty", !target.empty());
+ReplicationProgressManager::~ReplicationProgressManager() {}
+
+Reporter::Reporter(ReplicationExecutor* executor,
+ ReplicationProgressManager* replicationProgressManager,
+ const HostAndPort& target)
+ : _executor(executor),
+ _updatePositionSource(replicationProgressManager),
+ _target(target),
+ _status(Status::OK()),
+ _willRunAgain(false),
+ _active(false) {
+ uassert(ErrorCodes::BadValue, "null replication executor", executor);
+ uassert(ErrorCodes::BadValue, "null replication progress manager", replicationProgressManager);
+ uassert(ErrorCodes::BadValue, "target name cannot be empty", !target.empty());
+}
+
+Reporter::~Reporter() {
+ DESTRUCTOR_GUARD(cancel(););
+}
+
+void Reporter::cancel() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+
+ if (!_active) {
+ return;
}
- Reporter::~Reporter() {
- DESTRUCTOR_GUARD(
- cancel();
- );
- }
+ _status = Status(ErrorCodes::CallbackCanceled, "Reporter no longer valid");
+ _willRunAgain = false;
+ invariant(_remoteCommandCallbackHandle.isValid());
+ _executor->cancel(_remoteCommandCallbackHandle);
+}
- void Reporter::cancel() {
+void Reporter::wait() {
+ ReplicationExecutor::CallbackHandle handle;
+ {
stdx::lock_guard<stdx::mutex> lk(_mutex);
-
if (!_active) {
return;
}
-
- _status = Status(ErrorCodes::CallbackCanceled, "Reporter no longer valid");
- _willRunAgain = false;
- invariant(_remoteCommandCallbackHandle.isValid());
- _executor->cancel(_remoteCommandCallbackHandle);
- }
-
- void Reporter::wait() {
- ReplicationExecutor::CallbackHandle handle;
- {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- if (!_active) {
- return;
- }
- if (!_remoteCommandCallbackHandle.isValid()) {
- return;
- }
- handle = _remoteCommandCallbackHandle;
+ if (!_remoteCommandCallbackHandle.isValid()) {
+ return;
}
- _executor->wait(handle);
+ handle = _remoteCommandCallbackHandle;
}
+ _executor->wait(handle);
+}
- Status Reporter::trigger() {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- return _schedule_inlock();
- }
-
- Status Reporter::_schedule_inlock() {
- if (!_status.isOK()) {
- return _status;
- }
-
- if (_active) {
- _willRunAgain = true;
- return _status;
- }
-
- LOG(2) << "Reporter scheduling report to : " << _target;
-
- BSONObjBuilder cmd;
- if (!_updatePositionSource->prepareReplSetUpdatePositionCommand(&cmd)) {
- // Returning NodeNotFound because currently this is the only way
- // prepareReplSetUpdatePositionCommand() can fail in production.
- return Status(ErrorCodes::NodeNotFound,
- "Reporter failed to create replSetUpdatePositionCommand command.");
- }
- auto cmdObj = cmd.obj();
- StatusWith<ReplicationExecutor::CallbackHandle> scheduleResult =
- _executor->scheduleRemoteCommand(
- RemoteCommandRequest(_target, "admin", cmdObj),
- stdx::bind(&Reporter::_callback, this, stdx::placeholders::_1));
-
- if (!scheduleResult.isOK()) {
- _status = scheduleResult.getStatus();
- LOG(2) << "Reporter failed to schedule with status: " << _status;
+Status Reporter::trigger() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _schedule_inlock();
+}
- return _status;
- }
-
- _active = true;
- _willRunAgain = false;
- _remoteCommandCallbackHandle = scheduleResult.getValue();
- return Status::OK();
+Status Reporter::_schedule_inlock() {
+ if (!_status.isOK()) {
+ return _status;
}
- void Reporter::_callback(const ReplicationExecutor::RemoteCommandCallbackArgs& rcbd) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
+ if (_active) {
+ _willRunAgain = true;
+ return _status;
+ }
- _status = rcbd.response.getStatus();
- _active = false;
+ LOG(2) << "Reporter scheduling report to : " << _target;
- LOG(2) << "Reporter ended with status: " << _status << " after reporting to " << _target;
- if (_status.isOK() && _willRunAgain) {
- _schedule_inlock();
- }
- else {
- _willRunAgain = false;
- }
+ BSONObjBuilder cmd;
+ if (!_updatePositionSource->prepareReplSetUpdatePositionCommand(&cmd)) {
+ // Returning NodeNotFound because currently this is the only way
+ // prepareReplSetUpdatePositionCommand() can fail in production.
+ return Status(ErrorCodes::NodeNotFound,
+ "Reporter failed to create replSetUpdatePositionCommand command.");
}
+ auto cmdObj = cmd.obj();
+ StatusWith<ReplicationExecutor::CallbackHandle> scheduleResult =
+ _executor->scheduleRemoteCommand(
+ RemoteCommandRequest(_target, "admin", cmdObj),
+ stdx::bind(&Reporter::_callback, this, stdx::placeholders::_1));
+
+ if (!scheduleResult.isOK()) {
+ _status = scheduleResult.getStatus();
+ LOG(2) << "Reporter failed to schedule with status: " << _status;
- Status Reporter::getStatus() const {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
return _status;
}
- bool Reporter::isActive() const {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- return _active;
- }
+ _active = true;
+ _willRunAgain = false;
+ _remoteCommandCallbackHandle = scheduleResult.getValue();
+ return Status::OK();
+}
- bool Reporter::willRunAgain() const {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- return _willRunAgain;
+void Reporter::_callback(const ReplicationExecutor::RemoteCommandCallbackArgs& rcbd) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+
+ _status = rcbd.response.getStatus();
+ _active = false;
+
+ LOG(2) << "Reporter ended with status: " << _status << " after reporting to " << _target;
+ if (_status.isOK() && _willRunAgain) {
+ _schedule_inlock();
+ } else {
+ _willRunAgain = false;
}
-} // namespace repl
-} // namespace mongo
+}
+
+Status Reporter::getStatus() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _status;
+}
+
+bool Reporter::isActive() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _active;
+}
+
+bool Reporter::willRunAgain() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _willRunAgain;
+}
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/reporter.h b/src/mongo/db/repl/reporter.h
index 44145373326..c7c502e8f5f 100644
--- a/src/mongo/db/repl/reporter.h
+++ b/src/mongo/db/repl/reporter.h
@@ -35,90 +35,90 @@
namespace mongo {
namespace repl {
- class ReplicationProgressManager {
- public:
- virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) = 0;
- virtual ~ReplicationProgressManager();
- };
-
- class Reporter {
- MONGO_DISALLOW_COPYING(Reporter);
-
- public:
- Reporter(ReplicationExecutor* executor,
- ReplicationProgressManager* replicationProgressManager,
- const HostAndPort& target);
- virtual ~Reporter();
-
- /**
- * Returns true if a remote command has been scheduled (but not completed)
- * with the executor.
- */
- bool isActive() const;
-
- /**
- * Returns true if a remote command should be scheduled once the current one returns
- * from the executor.
- */
- bool willRunAgain() const;
-
- /**
- * Cancels remote command request.
- * Returns immediately if the Reporter is not active.
- */
- void cancel();
-
- /**
- * Waits for last/current executor handle to finish.
- * Returns immediately if the handle is invalid.
- */
- void wait();
-
- /**
- * Signals to the Reporter that there is new information to be sent to the "_target" server.
- * Returns the _status, indicating any error the Reporter has encountered.
- */
- Status trigger();
-
- /**
- * Returns the previous return status so that the owner can decide whether the Reporter
- * needs a new target to whom it can report.
- */
- Status getStatus() const;
-
- private:
- /**
- * Schedules remote command to be run by the executor
- */
- Status _schedule_inlock();
-
- /**
- * Callback for remote command.
- */
- void _callback(const ReplicationExecutor::RemoteCommandCallbackArgs& rcbd);
-
- // Not owned by us.
- ReplicationExecutor* _executor;
- ReplicationProgressManager* _updatePositionSource;
-
- // Host to whom the Reporter sends updates.
- HostAndPort _target;
-
- // Protects member data of this Reporter.
- mutable stdx::mutex _mutex;
-
- // Stores the most recent Status returned from the ReplicationExecutor.
- Status _status;
-
- // _willRunAgain is true when Reporter is scheduled to be run by the executor and subsequent
- // updates have come in.
- bool _willRunAgain;
- // _active is true when Reporter is scheduled to be run by the executor.
- bool _active;
-
- // Callback handle to the scheduled remote command.
- ReplicationExecutor::CallbackHandle _remoteCommandCallbackHandle;
- };
-
-} // namespace repl
-} // namespace mongo
+class ReplicationProgressManager {
+public:
+ virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) = 0;
+ virtual ~ReplicationProgressManager();
+};
+
+class Reporter {
+ MONGO_DISALLOW_COPYING(Reporter);
+
+public:
+ Reporter(ReplicationExecutor* executor,
+ ReplicationProgressManager* replicationProgressManager,
+ const HostAndPort& target);
+ virtual ~Reporter();
+
+ /**
+ * Returns true if a remote command has been scheduled (but not completed)
+ * with the executor.
+ */
+ bool isActive() const;
+
+ /**
+ * Returns true if a remote command should be scheduled once the current one returns
+ * from the executor.
+ */
+ bool willRunAgain() const;
+
+ /**
+ * Cancels remote command request.
+ * Returns immediately if the Reporter is not active.
+ */
+ void cancel();
+
+ /**
+ * Waits for last/current executor handle to finish.
+ * Returns immediately if the handle is invalid.
+ */
+ void wait();
+
+ /**
+ * Signals to the Reporter that there is new information to be sent to the "_target" server.
+ * Returns the _status, indicating any error the Reporter has encountered.
+ */
+ Status trigger();
+
+ /**
+ * Returns the previous return status so that the owner can decide whether the Reporter
+ * needs a new target to whom it can report.
+ */
+ Status getStatus() const;
+
+private:
+ /**
+ * Schedules remote command to be run by the executor
+ */
+ Status _schedule_inlock();
+
+ /**
+ * Callback for remote command.
+ */
+ void _callback(const ReplicationExecutor::RemoteCommandCallbackArgs& rcbd);
+
+ // Not owned by us.
+ ReplicationExecutor* _executor;
+ ReplicationProgressManager* _updatePositionSource;
+
+ // Host to whom the Reporter sends updates.
+ HostAndPort _target;
+
+ // Protects member data of this Reporter.
+ mutable stdx::mutex _mutex;
+
+ // Stores the most recent Status returned from the ReplicationExecutor.
+ Status _status;
+
+ // _willRunAgain is true when Reporter is scheduled to be run by the executor and subsequent
+ // updates have come in.
+ bool _willRunAgain;
+ // _active is true when Reporter is scheduled to be run by the executor.
+ bool _active;
+
+ // Callback handle to the scheduled remote command.
+ ReplicationExecutor::CallbackHandle _remoteCommandCallbackHandle;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/reporter_test.cpp b/src/mongo/db/repl/reporter_test.cpp
index 180ccf36088..507bd49017f 100644
--- a/src/mongo/db/repl/reporter_test.cpp
+++ b/src/mongo/db/repl/reporter_test.cpp
@@ -36,248 +36,246 @@
namespace {
- using namespace mongo;
- using namespace mongo::repl;
- using executor::NetworkInterfaceMock;
-
- class MockProgressManager : public ReplicationProgressManager {
- public:
- void updateMap(int memberId, const Timestamp& ts) {
- progressMap[memberId] = ts;
- }
-
- void setResult(bool newResult) {
- _result = newResult;
- }
-
- bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) {
- if (!_result) {
- return _result;
- }
- cmdBuilder->append("replSetUpdatePosition", 1);
- BSONArrayBuilder arrayBuilder(cmdBuilder->subarrayStart("optimes"));
- for (auto itr = progressMap.begin(); itr != progressMap.end(); ++itr) {
- BSONObjBuilder entry(arrayBuilder.subobjStart());
- entry.append("optime", itr->second);
- entry.append("memberId", itr->first);
- entry.append("cfgver", 1);
- }
- return true;
- }
- private:
- std::map<int, Timestamp> progressMap;
- bool _result = true;
- };
-
- class ReporterTest : public ReplicationExecutorTest {
- public:
-
- ReporterTest();
- void scheduleNetworkResponse(const BSONObj& obj);
- void scheduleNetworkResponse(ErrorCodes::Error code, const std::string& reason);
- void finishProcessingNetworkResponse();
-
- protected:
-
- void setUp() override;
- void tearDown() override;
-
- std::unique_ptr<Reporter> reporter;
- std::unique_ptr<MockProgressManager> posUpdater;
-
- };
-
- ReporterTest::ReporterTest() {}
-
- void ReporterTest::setUp() {
- ReplicationExecutorTest::setUp();
- posUpdater.reset(new MockProgressManager());
- reporter.reset(new Reporter(&getExecutor(), posUpdater.get(), HostAndPort("h1")));
- launchExecutorThread();
+using namespace mongo;
+using namespace mongo::repl;
+using executor::NetworkInterfaceMock;
+
+class MockProgressManager : public ReplicationProgressManager {
+public:
+ void updateMap(int memberId, const Timestamp& ts) {
+ progressMap[memberId] = ts;
}
- void ReporterTest::tearDown() {
- ReplicationExecutorTest::tearDown();
- // Executor may still invoke reporter's callback before shutting down.
- posUpdater.reset();
- reporter.reset();
+ void setResult(bool newResult) {
+ _result = newResult;
}
- void ReporterTest::scheduleNetworkResponse(const BSONObj& obj) {
- NetworkInterfaceMock* net = getNet();
- ASSERT_TRUE(net->hasReadyRequests());
- Milliseconds millis(0);
- RemoteCommandResponse response(obj, millis);
- ReplicationExecutor::ResponseStatus responseStatus(response);
- net->scheduleResponse(net->getNextReadyRequest(), net->now(), responseStatus);
+ bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) {
+ if (!_result) {
+ return _result;
+ }
+ cmdBuilder->append("replSetUpdatePosition", 1);
+ BSONArrayBuilder arrayBuilder(cmdBuilder->subarrayStart("optimes"));
+ for (auto itr = progressMap.begin(); itr != progressMap.end(); ++itr) {
+ BSONObjBuilder entry(arrayBuilder.subobjStart());
+ entry.append("optime", itr->second);
+ entry.append("memberId", itr->first);
+ entry.append("cfgver", 1);
+ }
+ return true;
}
- void ReporterTest::scheduleNetworkResponse(ErrorCodes::Error code, const std::string& reason) {
- NetworkInterfaceMock* net = getNet();
- ASSERT_TRUE(net->hasReadyRequests());
- ReplicationExecutor::ResponseStatus responseStatus(code, reason);
- net->scheduleResponse(net->getNextReadyRequest(), net->now(), responseStatus);
- }
-
- TEST_F(ReporterTest, InvalidConstruction) {
- // null ReplicationProgressManager
- ASSERT_THROWS(Reporter(&getExecutor(), nullptr, HostAndPort("h1")), UserException);
-
- // null ReplicationExecutor
- ASSERT_THROWS(Reporter(nullptr, posUpdater.get(), HostAndPort("h1")), UserException);
-
- // empty HostAndPort
- ASSERT_THROWS(Reporter(&getExecutor(), posUpdater.get(), HostAndPort()), UserException);
- }
-
- TEST_F(ReporterTest, IsActiveOnceScheduled) {
- ASSERT_FALSE(reporter->isActive());
- ASSERT_OK(reporter->trigger());
- ASSERT_TRUE(reporter->isActive());
- }
-
- TEST_F(ReporterTest, CancelWithoutScheduled) {
- ASSERT_FALSE(reporter->isActive());
- reporter->cancel();
- ASSERT_FALSE(reporter->isActive());
- }
-
- TEST_F(ReporterTest, ShutdownBeforeSchedule) {
- getExecutor().shutdown();
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, reporter->trigger());
- ASSERT_FALSE(reporter->isActive());
- }
-
- // If an error is returned, it should be recorded in the Reporter and be returned when triggered
- TEST_F(ReporterTest, ErrorsAreStoredInTheReporter) {
- posUpdater->updateMap(0, Timestamp(3,0));
- ASSERT_OK(reporter->trigger());
- ASSERT_TRUE(reporter->isActive());
- scheduleNetworkResponse(ErrorCodes::NoSuchKey, "waaaah");
- getNet()->runReadyNetworkOperations();
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, reporter->getStatus());
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, reporter->trigger());
- ASSERT_FALSE(reporter->isActive());
- ASSERT_FALSE(getNet()->hasReadyRequests());
- }
-
- // If an error is returned, it should be recorded in the Reporter and not run again.
- TEST_F(ReporterTest, ErrorsStopTheReporter) {
- posUpdater->updateMap(0, Timestamp(3,0));
- ASSERT_OK(reporter->trigger());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_FALSE(reporter->willRunAgain());
- ASSERT_OK(reporter->trigger());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_TRUE(reporter->willRunAgain());
-
- scheduleNetworkResponse(ErrorCodes::NoSuchKey, "waaaah");
- getNet()->runReadyNetworkOperations();
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, reporter->getStatus());
- ASSERT_FALSE(reporter->willRunAgain());
- ASSERT_FALSE(reporter->isActive());
- ASSERT_FALSE(getNet()->hasReadyRequests());
- }
-
- // Schedule while we are already scheduled, it should set willRunAgain, then automatically
- // schedule itself after finishing.
- TEST_F(ReporterTest, DoubleScheduleShouldCauseRescheduleImmediatelyAfterRespondedTo) {
- posUpdater->updateMap(0, Timestamp(3,0));
- ASSERT_OK(reporter->trigger());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_FALSE(reporter->willRunAgain());
- ASSERT_OK(reporter->trigger());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_TRUE(reporter->willRunAgain());
-
- scheduleNetworkResponse(BSON("ok" << 1));
- getNet()->runReadyNetworkOperations();
- ASSERT_TRUE(getNet()->hasReadyRequests());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_FALSE(reporter->willRunAgain());
-
- scheduleNetworkResponse(BSON("ok" << 1));
- getNet()->runReadyNetworkOperations();
- ASSERT_FALSE(getNet()->hasReadyRequests());
- ASSERT_FALSE(reporter->isActive());
- ASSERT_FALSE(reporter->willRunAgain());
- }
-
- // Schedule multiple times while we are already scheduled, it should set willRunAgain,
- // then automatically schedule itself after finishing, but not a third time since the latter
- // two will contain the same batch of updates.
- TEST_F(ReporterTest, TripleScheduleShouldCauseRescheduleImmediatelyAfterRespondedToOnlyOnce) {
- posUpdater->updateMap(0, Timestamp(3,0));
- ASSERT_OK(reporter->trigger());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_FALSE(reporter->willRunAgain());
- ASSERT_OK(reporter->trigger());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_TRUE(reporter->willRunAgain());
- ASSERT_OK(reporter->trigger());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_TRUE(reporter->willRunAgain());
-
- scheduleNetworkResponse(BSON("ok" << 1));
- getNet()->runReadyNetworkOperations();
- ASSERT_TRUE(getNet()->hasReadyRequests());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_FALSE(reporter->willRunAgain());
-
- scheduleNetworkResponse(BSON("ok" << 1));
- getNet()->runReadyNetworkOperations();
- ASSERT_FALSE(getNet()->hasReadyRequests());
- ASSERT_FALSE(reporter->isActive());
- ASSERT_FALSE(reporter->willRunAgain());
- }
-
- TEST_F(ReporterTest, CancelWhileScheduled) {
- posUpdater->updateMap(0, Timestamp(3,0));
- ASSERT_OK(reporter->trigger());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_FALSE(reporter->willRunAgain());
- ASSERT_OK(reporter->trigger());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_TRUE(reporter->willRunAgain());
-
- reporter->cancel();
- getNet()->runReadyNetworkOperations();
- ASSERT_FALSE(reporter->isActive());
- ASSERT_FALSE(reporter->willRunAgain());
- ASSERT_FALSE(getNet()->hasReadyRequests());
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, reporter->getStatus());
-
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, reporter->trigger());
- }
-
- TEST_F(ReporterTest, CancelAfterFirstReturns) {
- posUpdater->updateMap(0, Timestamp(3,0));
- ASSERT_OK(reporter->trigger());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_FALSE(reporter->willRunAgain());
- ASSERT_OK(reporter->trigger());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_TRUE(reporter->willRunAgain());
-
- scheduleNetworkResponse(BSON("ok" << 1));
- getNet()->runReadyNetworkOperations();
- ASSERT_TRUE(getNet()->hasReadyRequests());
- ASSERT_TRUE(reporter->isActive());
- ASSERT_FALSE(reporter->willRunAgain());
-
- reporter->cancel();
- getNet()->runReadyNetworkOperations();
- ASSERT_FALSE(reporter->isActive());
- ASSERT_FALSE(reporter->willRunAgain());
- ASSERT_FALSE(getNet()->hasReadyRequests());
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, reporter->getStatus());
-
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, reporter->trigger());
- }
-
- TEST_F(ReporterTest, ProgressManagerFails) {
- posUpdater->setResult(false);
- ASSERT_EQUALS(ErrorCodes::NodeNotFound, reporter->trigger().code());
- }
-
-} // namespace
+private:
+ std::map<int, Timestamp> progressMap;
+ bool _result = true;
+};
+
+class ReporterTest : public ReplicationExecutorTest {
+public:
+ ReporterTest();
+ void scheduleNetworkResponse(const BSONObj& obj);
+ void scheduleNetworkResponse(ErrorCodes::Error code, const std::string& reason);
+ void finishProcessingNetworkResponse();
+
+protected:
+ void setUp() override;
+ void tearDown() override;
+
+ std::unique_ptr<Reporter> reporter;
+ std::unique_ptr<MockProgressManager> posUpdater;
+};
+
+ReporterTest::ReporterTest() {}
+
+void ReporterTest::setUp() {
+ ReplicationExecutorTest::setUp();
+ posUpdater.reset(new MockProgressManager());
+ reporter.reset(new Reporter(&getExecutor(), posUpdater.get(), HostAndPort("h1")));
+ launchExecutorThread();
+}
+
+void ReporterTest::tearDown() {
+ ReplicationExecutorTest::tearDown();
+ // Executor may still invoke reporter's callback before shutting down.
+ posUpdater.reset();
+ reporter.reset();
+}
+
+void ReporterTest::scheduleNetworkResponse(const BSONObj& obj) {
+ NetworkInterfaceMock* net = getNet();
+ ASSERT_TRUE(net->hasReadyRequests());
+ Milliseconds millis(0);
+ RemoteCommandResponse response(obj, millis);
+ ReplicationExecutor::ResponseStatus responseStatus(response);
+ net->scheduleResponse(net->getNextReadyRequest(), net->now(), responseStatus);
+}
+
+void ReporterTest::scheduleNetworkResponse(ErrorCodes::Error code, const std::string& reason) {
+ NetworkInterfaceMock* net = getNet();
+ ASSERT_TRUE(net->hasReadyRequests());
+ ReplicationExecutor::ResponseStatus responseStatus(code, reason);
+ net->scheduleResponse(net->getNextReadyRequest(), net->now(), responseStatus);
+}
+
+TEST_F(ReporterTest, InvalidConstruction) {
+ // null ReplicationProgressManager
+ ASSERT_THROWS(Reporter(&getExecutor(), nullptr, HostAndPort("h1")), UserException);
+
+ // null ReplicationExecutor
+ ASSERT_THROWS(Reporter(nullptr, posUpdater.get(), HostAndPort("h1")), UserException);
+
+ // empty HostAndPort
+ ASSERT_THROWS(Reporter(&getExecutor(), posUpdater.get(), HostAndPort()), UserException);
+}
+
+TEST_F(ReporterTest, IsActiveOnceScheduled) {
+ ASSERT_FALSE(reporter->isActive());
+ ASSERT_OK(reporter->trigger());
+ ASSERT_TRUE(reporter->isActive());
+}
+
+TEST_F(ReporterTest, CancelWithoutScheduled) {
+ ASSERT_FALSE(reporter->isActive());
+ reporter->cancel();
+ ASSERT_FALSE(reporter->isActive());
+}
+
+TEST_F(ReporterTest, ShutdownBeforeSchedule) {
+ getExecutor().shutdown();
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, reporter->trigger());
+ ASSERT_FALSE(reporter->isActive());
+}
+
+// If an error is returned, it should be recorded in the Reporter and be returned when triggered
+TEST_F(ReporterTest, ErrorsAreStoredInTheReporter) {
+ posUpdater->updateMap(0, Timestamp(3, 0));
+ ASSERT_OK(reporter->trigger());
+ ASSERT_TRUE(reporter->isActive());
+ scheduleNetworkResponse(ErrorCodes::NoSuchKey, "waaaah");
+ getNet()->runReadyNetworkOperations();
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, reporter->getStatus());
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, reporter->trigger());
+ ASSERT_FALSE(reporter->isActive());
+ ASSERT_FALSE(getNet()->hasReadyRequests());
+}
+
+// If an error is returned, it should be recorded in the Reporter and not run again.
+TEST_F(ReporterTest, ErrorsStopTheReporter) {
+ posUpdater->updateMap(0, Timestamp(3, 0));
+ ASSERT_OK(reporter->trigger());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_FALSE(reporter->willRunAgain());
+ ASSERT_OK(reporter->trigger());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_TRUE(reporter->willRunAgain());
+
+ scheduleNetworkResponse(ErrorCodes::NoSuchKey, "waaaah");
+ getNet()->runReadyNetworkOperations();
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, reporter->getStatus());
+ ASSERT_FALSE(reporter->willRunAgain());
+ ASSERT_FALSE(reporter->isActive());
+ ASSERT_FALSE(getNet()->hasReadyRequests());
+}
+
+// Schedule while we are already scheduled, it should set willRunAgain, then automatically
+// schedule itself after finishing.
+TEST_F(ReporterTest, DoubleScheduleShouldCauseRescheduleImmediatelyAfterRespondedTo) {
+ posUpdater->updateMap(0, Timestamp(3, 0));
+ ASSERT_OK(reporter->trigger());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_FALSE(reporter->willRunAgain());
+ ASSERT_OK(reporter->trigger());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_TRUE(reporter->willRunAgain());
+
+ scheduleNetworkResponse(BSON("ok" << 1));
+ getNet()->runReadyNetworkOperations();
+ ASSERT_TRUE(getNet()->hasReadyRequests());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_FALSE(reporter->willRunAgain());
+
+ scheduleNetworkResponse(BSON("ok" << 1));
+ getNet()->runReadyNetworkOperations();
+ ASSERT_FALSE(getNet()->hasReadyRequests());
+ ASSERT_FALSE(reporter->isActive());
+ ASSERT_FALSE(reporter->willRunAgain());
+}
+
+// Schedule multiple times while we are already scheduled, it should set willRunAgain,
+// then automatically schedule itself after finishing, but not a third time since the latter
+// two will contain the same batch of updates.
+TEST_F(ReporterTest, TripleScheduleShouldCauseRescheduleImmediatelyAfterRespondedToOnlyOnce) {
+ posUpdater->updateMap(0, Timestamp(3, 0));
+ ASSERT_OK(reporter->trigger());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_FALSE(reporter->willRunAgain());
+ ASSERT_OK(reporter->trigger());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_TRUE(reporter->willRunAgain());
+ ASSERT_OK(reporter->trigger());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_TRUE(reporter->willRunAgain());
+
+ scheduleNetworkResponse(BSON("ok" << 1));
+ getNet()->runReadyNetworkOperations();
+ ASSERT_TRUE(getNet()->hasReadyRequests());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_FALSE(reporter->willRunAgain());
+
+ scheduleNetworkResponse(BSON("ok" << 1));
+ getNet()->runReadyNetworkOperations();
+ ASSERT_FALSE(getNet()->hasReadyRequests());
+ ASSERT_FALSE(reporter->isActive());
+ ASSERT_FALSE(reporter->willRunAgain());
+}
+
+TEST_F(ReporterTest, CancelWhileScheduled) {
+ posUpdater->updateMap(0, Timestamp(3, 0));
+ ASSERT_OK(reporter->trigger());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_FALSE(reporter->willRunAgain());
+ ASSERT_OK(reporter->trigger());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_TRUE(reporter->willRunAgain());
+
+ reporter->cancel();
+ getNet()->runReadyNetworkOperations();
+ ASSERT_FALSE(reporter->isActive());
+ ASSERT_FALSE(reporter->willRunAgain());
+ ASSERT_FALSE(getNet()->hasReadyRequests());
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, reporter->getStatus());
+
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, reporter->trigger());
+}
+
+TEST_F(ReporterTest, CancelAfterFirstReturns) {
+ posUpdater->updateMap(0, Timestamp(3, 0));
+ ASSERT_OK(reporter->trigger());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_FALSE(reporter->willRunAgain());
+ ASSERT_OK(reporter->trigger());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_TRUE(reporter->willRunAgain());
+
+ scheduleNetworkResponse(BSON("ok" << 1));
+ getNet()->runReadyNetworkOperations();
+ ASSERT_TRUE(getNet()->hasReadyRequests());
+ ASSERT_TRUE(reporter->isActive());
+ ASSERT_FALSE(reporter->willRunAgain());
+
+ reporter->cancel();
+ getNet()->runReadyNetworkOperations();
+ ASSERT_FALSE(reporter->isActive());
+ ASSERT_FALSE(reporter->willRunAgain());
+ ASSERT_FALSE(getNet()->hasReadyRequests());
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, reporter->getStatus());
+
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, reporter->trigger());
+}
+
+TEST_F(ReporterTest, ProgressManagerFails) {
+ posUpdater->setResult(false);
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, reporter->trigger().code());
+}
+
+} // namespace
diff --git a/src/mongo/db/repl/resync.cpp b/src/mongo/db/repl/resync.cpp
index 0daa1ef3197..bdab9c63cc4 100644
--- a/src/mongo/db/repl/resync.cpp
+++ b/src/mongo/db/repl/resync.cpp
@@ -35,99 +35,99 @@
namespace mongo {
- using std::string;
- using std::stringstream;
+using std::string;
+using std::stringstream;
namespace repl {
- // operator requested resynchronization of replication (on a slave or secondary). {resync: 1}
- class CmdResync : public Command {
- public:
- virtual bool slaveOk() const {
- return true;
- }
- virtual bool adminOnly() const {
- return true;
- }
- virtual bool isWriteCommandForConfigServer() const { return true; }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::resync);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
+// operator requested resynchronization of replication (on a slave or secondary). {resync: 1}
+class CmdResync : public Command {
+public:
+ virtual bool slaveOk() const {
+ return true;
+ }
+ virtual bool adminOnly() const {
+ return true;
+ }
+ virtual bool isWriteCommandForConfigServer() const {
+ return true;
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::resync);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
- void help(stringstream& h) const {
- h << "resync (from scratch) a stale slave or replica set secondary node.\n";
- }
+ void help(stringstream& h) const {
+ h << "resync (from scratch) a stale slave or replica set secondary node.\n";
+ }
- CmdResync() : Command("resync") { }
- virtual bool run(OperationContext* txn,
- const string& dbname,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result) {
+ CmdResync() : Command("resync") {}
+ virtual bool run(OperationContext* txn,
+ const string& dbname,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result) {
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite globalWriteLock(txn->lockState());
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite globalWriteLock(txn->lockState());
-
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (getGlobalReplicationCoordinator()->getSettings().usingReplSets()) {
- const MemberState memberState = replCoord->getMemberState();
- if (memberState.startup()) {
- return appendCommandStatus(result, Status(ErrorCodes::NotYetInitialized,
- "no replication yet active"));
- }
- if (memberState.primary() ||
- !replCoord->setFollowerMode(MemberState::RS_STARTUP2)) {
- return appendCommandStatus(result, Status(ErrorCodes::NotSecondary,
- "primaries cannot resync"));
- }
- BackgroundSync::get()->setInitialSyncRequestedFlag(true);
- return true;
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (getGlobalReplicationCoordinator()->getSettings().usingReplSets()) {
+ const MemberState memberState = replCoord->getMemberState();
+ if (memberState.startup()) {
+ return appendCommandStatus(
+ result, Status(ErrorCodes::NotYetInitialized, "no replication yet active"));
}
-
- // below this comment pertains only to master/slave replication
- if ( cmdObj.getBoolField( "force" ) ) {
- if ( !waitForSyncToFinish(txn, errmsg ) )
- return false;
- replAllDead = "resync forced";
+ if (memberState.primary() || !replCoord->setFollowerMode(MemberState::RS_STARTUP2)) {
+ return appendCommandStatus(
+ result, Status(ErrorCodes::NotSecondary, "primaries cannot resync"));
}
- // TODO(dannenberg) replAllDead is bad and should be removed when masterslave is removed
- if (!replAllDead) {
- errmsg = "not dead, no need to resync";
- return false;
- }
- if ( !waitForSyncToFinish(txn, errmsg ) )
- return false;
-
- ReplSource::forceResyncDead( txn, "client" );
- result.append( "info", "triggered resync for all sources" );
-
+ BackgroundSync::get()->setInitialSyncRequestedFlag(true);
return true;
}
- bool waitForSyncToFinish(OperationContext* txn, string &errmsg) const {
- // Wait for slave thread to finish syncing, so sources will be be
- // reloaded with new saved state on next pass.
- Timer t;
- while ( 1 ) {
- if ( syncing == 0 || t.millis() > 30000 )
- break;
- {
- Lock::TempRelease t(txn->lockState());
- relinquishSyncingSome = 1;
- sleepmillis(1);
- }
- }
- if ( syncing ) {
- errmsg = "timeout waiting for sync() to finish";
+ // below this comment pertains only to master/slave replication
+ if (cmdObj.getBoolField("force")) {
+ if (!waitForSyncToFinish(txn, errmsg))
return false;
+ replAllDead = "resync forced";
+ }
+ // TODO(dannenberg) replAllDead is bad and should be removed when masterslave is removed
+ if (!replAllDead) {
+ errmsg = "not dead, no need to resync";
+ return false;
+ }
+ if (!waitForSyncToFinish(txn, errmsg))
+ return false;
+
+ ReplSource::forceResyncDead(txn, "client");
+ result.append("info", "triggered resync for all sources");
+
+ return true;
+ }
+
+ bool waitForSyncToFinish(OperationContext* txn, string& errmsg) const {
+ // Wait for slave thread to finish syncing, so sources will be be
+ // reloaded with new saved state on next pass.
+ Timer t;
+ while (1) {
+ if (syncing == 0 || t.millis() > 30000)
+ break;
+ {
+ Lock::TempRelease t(txn->lockState());
+ relinquishSyncingSome = 1;
+ sleepmillis(1);
}
- return true;
}
- } cmdResync;
-} // namespace repl
-} // namespace mongo
+ if (syncing) {
+ errmsg = "timeout waiting for sync() to finish";
+ return false;
+ }
+ return true;
+ }
+} cmdResync;
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/roll_back_local_operations.cpp b/src/mongo/db/repl/roll_back_local_operations.cpp
index 2cf07fa36b7..2117458e9f4 100644
--- a/src/mongo/db/repl/roll_back_local_operations.cpp
+++ b/src/mongo/db/repl/roll_back_local_operations.cpp
@@ -41,157 +41,145 @@ namespace repl {
namespace {
- Timestamp getTimestamp(const BSONObj& operation) {
- return operation["ts"].timestamp();
- }
-
- Timestamp getTimestamp(const OplogInterface::Iterator::Value& oplogValue) {
- return getTimestamp(oplogValue.first);
- }
-
- long long getHash(const BSONObj& operation) {
- return operation["h"].Long();
- }
-
- long long getHash(const OplogInterface::Iterator::Value& oplogValue) {
- return getHash(oplogValue.first);
- }
-
-} // namespace
-
- RollBackLocalOperations::RollBackLocalOperations(
- const OplogInterface& localOplog,
- const RollbackOperationFn& rollbackOperation)
-
- : _localOplogIterator(localOplog.makeIterator()),
- _rollbackOperation(rollbackOperation),
- _scanned(0) {
-
- uassert(ErrorCodes::BadValue, "invalid local oplog iterator", _localOplogIterator);
- uassert(ErrorCodes::BadValue, "null roll back operation function", rollbackOperation);
+Timestamp getTimestamp(const BSONObj& operation) {
+ return operation["ts"].timestamp();
+}
+
+Timestamp getTimestamp(const OplogInterface::Iterator::Value& oplogValue) {
+ return getTimestamp(oplogValue.first);
+}
+
+long long getHash(const BSONObj& operation) {
+ return operation["h"].Long();
+}
+
+long long getHash(const OplogInterface::Iterator::Value& oplogValue) {
+ return getHash(oplogValue.first);
+}
+
+} // namespace
+
+RollBackLocalOperations::RollBackLocalOperations(const OplogInterface& localOplog,
+ const RollbackOperationFn& rollbackOperation)
+
+ : _localOplogIterator(localOplog.makeIterator()),
+ _rollbackOperation(rollbackOperation),
+ _scanned(0) {
+ uassert(ErrorCodes::BadValue, "invalid local oplog iterator", _localOplogIterator);
+ uassert(ErrorCodes::BadValue, "null roll back operation function", rollbackOperation);
+}
+
+StatusWith<RollBackLocalOperations::RollbackCommonPoint> RollBackLocalOperations::onRemoteOperation(
+ const BSONObj& operation) {
+ if (_scanned == 0) {
+ auto result = _localOplogIterator->next();
+ if (!result.isOK()) {
+ return StatusWith<RollbackCommonPoint>(ErrorCodes::OplogStartMissing,
+ "no oplog during initsync");
+ }
+ _localOplogValue = result.getValue();
+
+ long long diff = static_cast<long long>(getTimestamp(_localOplogValue).getSecs()) -
+ getTimestamp(operation).getSecs();
+ // diff could be positive, negative, or zero
+ log() << "rollback our last optime: " << getTimestamp(_localOplogValue).toStringPretty();
+ log() << "rollback their last optime: " << getTimestamp(operation).toStringPretty();
+ log() << "rollback diff in end of log times: " << diff << " seconds";
+ if (diff > 1800) {
+ severe() << "rollback too long a time period for a rollback.";
+ return StatusWith<RollbackCommonPoint>(
+ ErrorCodes::ExceededTimeLimit,
+ "rollback error: not willing to roll back more than 30 minutes of data");
+ }
}
- StatusWith<RollBackLocalOperations::RollbackCommonPoint>
- RollBackLocalOperations::onRemoteOperation(const BSONObj& operation) {
-
- if (_scanned == 0) {
- auto result = _localOplogIterator->next();
- if (!result.isOK()) {
- return StatusWith<RollbackCommonPoint>(ErrorCodes::OplogStartMissing,
- "no oplog during initsync");
- }
- _localOplogValue = result.getValue();
-
- long long diff =
- static_cast<long long>(getTimestamp(_localOplogValue).getSecs()) -
- getTimestamp(operation).getSecs();
- // diff could be positive, negative, or zero
- log() << "rollback our last optime: "
- << getTimestamp(_localOplogValue).toStringPretty();
- log() << "rollback their last optime: " << getTimestamp(operation).toStringPretty();
- log() << "rollback diff in end of log times: " << diff << " seconds";
- if (diff > 1800) {
- severe() << "rollback too long a time period for a rollback.";
- return StatusWith<RollbackCommonPoint>(
- ErrorCodes::ExceededTimeLimit,
- "rollback error: not willing to roll back more than 30 minutes of data");
- }
+ while (getTimestamp(_localOplogValue) > getTimestamp(operation)) {
+ _scanned++;
+ auto status = _rollbackOperation(_localOplogValue.first);
+ if (!status.isOK()) {
+ invariant(ErrorCodes::NoSuchKey != status.code());
+ return status;
}
-
- while (getTimestamp(_localOplogValue) > getTimestamp(operation)) {
- _scanned++;
- auto status = _rollbackOperation(_localOplogValue.first);
- if (!status.isOK()) {
- invariant(ErrorCodes::NoSuchKey != status.code());
- return status;
- }
- auto result = _localOplogIterator->next();
- if (!result.isOK()) {
- severe() << "rollback error RS101 reached beginning of local oplog";
- log() << " scanned: " << _scanned;
- log() << " theirTime: " << getTimestamp(operation).toStringLong();
- log() << " ourTime: " << getTimestamp(_localOplogValue).toStringLong();
- return StatusWith<RollbackCommonPoint>(
- ErrorCodes::NoMatchingDocument,
- "RS101 reached beginning of local oplog [2]");
- }
- _localOplogValue = result.getValue();
+ auto result = _localOplogIterator->next();
+ if (!result.isOK()) {
+ severe() << "rollback error RS101 reached beginning of local oplog";
+ log() << " scanned: " << _scanned;
+ log() << " theirTime: " << getTimestamp(operation).toStringLong();
+ log() << " ourTime: " << getTimestamp(_localOplogValue).toStringLong();
+ return StatusWith<RollbackCommonPoint>(ErrorCodes::NoMatchingDocument,
+ "RS101 reached beginning of local oplog [2]");
}
+ _localOplogValue = result.getValue();
+ }
- if (getTimestamp(_localOplogValue) == getTimestamp(operation)) {
- _scanned++;
- if (getHash(_localOplogValue) == getHash(operation)) {
- return StatusWith<RollbackCommonPoint>(
- std::make_pair(getTimestamp(_localOplogValue), _localOplogValue.second));
- }
- auto status = _rollbackOperation(_localOplogValue.first);
- if (!status.isOK()) {
- invariant(ErrorCodes::NoSuchKey != status.code());
- return status;
- }
- auto result = _localOplogIterator->next();
- if (!result.isOK()) {
- severe() << "rollback error RS101 reached beginning of local oplog";
- log() << " scanned: " << _scanned;
- log() << " theirTime: " << getTimestamp(operation).toStringLong();
- log() << " ourTime: " << getTimestamp(_localOplogValue).toStringLong();
- return StatusWith<RollbackCommonPoint>(
- ErrorCodes::NoMatchingDocument,
- "RS101 reached beginning of local oplog [1]");
- }
- _localOplogValue = result.getValue();
+ if (getTimestamp(_localOplogValue) == getTimestamp(operation)) {
+ _scanned++;
+ if (getHash(_localOplogValue) == getHash(operation)) {
return StatusWith<RollbackCommonPoint>(
- ErrorCodes::NoSuchKey,
- "Unable to determine common point - same timestamp but different hash. "
- "Need to process additional remote operations.");
+ std::make_pair(getTimestamp(_localOplogValue), _localOplogValue.second));
}
-
- if (getTimestamp(_localOplogValue) < getTimestamp(operation)) {
- _scanned++;
- return StatusWith<RollbackCommonPoint>(
- ErrorCodes::NoSuchKey,
- "Unable to determine common point. "
- "Need to process additional remote operations.");
+ auto status = _rollbackOperation(_localOplogValue.first);
+ if (!status.isOK()) {
+ invariant(ErrorCodes::NoSuchKey != status.code());
+ return status;
}
-
- return RollbackCommonPoint(Timestamp(Seconds(1), 0), RecordId());
+ auto result = _localOplogIterator->next();
+ if (!result.isOK()) {
+ severe() << "rollback error RS101 reached beginning of local oplog";
+ log() << " scanned: " << _scanned;
+ log() << " theirTime: " << getTimestamp(operation).toStringLong();
+ log() << " ourTime: " << getTimestamp(_localOplogValue).toStringLong();
+ return StatusWith<RollbackCommonPoint>(ErrorCodes::NoMatchingDocument,
+ "RS101 reached beginning of local oplog [1]");
+ }
+ _localOplogValue = result.getValue();
+ return StatusWith<RollbackCommonPoint>(
+ ErrorCodes::NoSuchKey,
+ "Unable to determine common point - same timestamp but different hash. "
+ "Need to process additional remote operations.");
}
- StatusWith<RollBackLocalOperations::RollbackCommonPoint> syncRollBackLocalOperations(
- const OplogInterface& localOplog,
- const OplogInterface& remoteOplog,
- const RollBackLocalOperations::RollbackOperationFn& rollbackOperation) {
-
- auto remoteIterator = remoteOplog.makeIterator();
- auto remoteResult = remoteIterator->next();
- if (!remoteResult.isOK()) {
- return StatusWith<RollBackLocalOperations::RollbackCommonPoint>(
- ErrorCodes::InvalidSyncSource,
- "remote oplog empty or unreadable");
- }
+ if (getTimestamp(_localOplogValue) < getTimestamp(operation)) {
+ _scanned++;
+ return StatusWith<RollbackCommonPoint>(ErrorCodes::NoSuchKey,
+ "Unable to determine common point. "
+ "Need to process additional remote operations.");
+ }
- RollBackLocalOperations finder(localOplog, rollbackOperation);
- Timestamp theirTime;
- while (remoteResult.isOK()) {
- theirTime = remoteResult.getValue().first["ts"].timestamp();
- BSONObj theirObj = remoteResult.getValue().first;
- auto result = finder.onRemoteOperation(theirObj);
- if (result.isOK()) {
- return result.getValue();
- }
- else if (result.getStatus().code() != ErrorCodes::NoSuchKey) {
- return result;
- }
- remoteResult = remoteIterator->next();
- }
+ return RollbackCommonPoint(Timestamp(Seconds(1), 0), RecordId());
+}
- severe() << "rollback error RS100 reached beginning of remote oplog";
- log() << " them: " << remoteOplog.toString();
- log() << " theirTime: " << theirTime.toStringLong();
+StatusWith<RollBackLocalOperations::RollbackCommonPoint> syncRollBackLocalOperations(
+ const OplogInterface& localOplog,
+ const OplogInterface& remoteOplog,
+ const RollBackLocalOperations::RollbackOperationFn& rollbackOperation) {
+ auto remoteIterator = remoteOplog.makeIterator();
+ auto remoteResult = remoteIterator->next();
+ if (!remoteResult.isOK()) {
return StatusWith<RollBackLocalOperations::RollbackCommonPoint>(
- ErrorCodes::NoMatchingDocument,
- "RS100 reached beginning of remote oplog [1]");
+ ErrorCodes::InvalidSyncSource, "remote oplog empty or unreadable");
+ }
+
+ RollBackLocalOperations finder(localOplog, rollbackOperation);
+ Timestamp theirTime;
+ while (remoteResult.isOK()) {
+ theirTime = remoteResult.getValue().first["ts"].timestamp();
+ BSONObj theirObj = remoteResult.getValue().first;
+ auto result = finder.onRemoteOperation(theirObj);
+ if (result.isOK()) {
+ return result.getValue();
+ } else if (result.getStatus().code() != ErrorCodes::NoSuchKey) {
+ return result;
+ }
+ remoteResult = remoteIterator->next();
}
-} // namespace repl
-} // namespace mongo
+ severe() << "rollback error RS100 reached beginning of remote oplog";
+ log() << " them: " << remoteOplog.toString();
+ log() << " theirTime: " << theirTime.toStringLong();
+ return StatusWith<RollBackLocalOperations::RollbackCommonPoint>(
+ ErrorCodes::NoMatchingDocument, "RS100 reached beginning of remote oplog [1]");
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/roll_back_local_operations.h b/src/mongo/db/repl/roll_back_local_operations.h
index 4a9d5b71cd8..20eb923083d 100644
--- a/src/mongo/db/repl/roll_back_local_operations.h
+++ b/src/mongo/db/repl/roll_back_local_operations.h
@@ -39,57 +39,55 @@
namespace mongo {
namespace repl {
- class RollBackLocalOperations {
- MONGO_DISALLOW_COPYING(RollBackLocalOperations);
- public:
+class RollBackLocalOperations {
+ MONGO_DISALLOW_COPYING(RollBackLocalOperations);
- /**
- * Type of function to roll back an operation or process it for future use.
- * It can return any status except ErrorCodes::NoSuchKey. See onRemoteOperation().
- */
- using RollbackOperationFn = stdx::function<Status (const BSONObj&)>;
-
- using RollbackCommonPoint = std::pair<Timestamp, RecordId>;
-
- /**
- * Initializes rollback processor with a valid local oplog.
- * Whenever we encounter an operation in the local oplog that has to be rolled back,
- * we will pass it to 'rollbackOperation'.
- */
- RollBackLocalOperations(const OplogInterface& localOplog,
- const RollbackOperationFn& rollbackOperation);
-
- virtual ~RollBackLocalOperations() = default;
-
- /**
- * Process single remote operation.
- * Returns ErrorCodes::NoSuchKey if common point has not been found and
- * additional operations have to be read from the remote oplog.
- */
- StatusWith<RollbackCommonPoint> onRemoteOperation(const BSONObj& operation);
+public:
+ /**
+ * Type of function to roll back an operation or process it for future use.
+ * It can return any status except ErrorCodes::NoSuchKey. See onRemoteOperation().
+ */
+ using RollbackOperationFn = stdx::function<Status(const BSONObj&)>;
- private:
+ using RollbackCommonPoint = std::pair<Timestamp, RecordId>;
- std::unique_ptr<OplogInterface::Iterator> _localOplogIterator;
- RollbackOperationFn _rollbackOperation;
- OplogInterface::Iterator::Value _localOplogValue;
- unsigned long long _scanned;
+ /**
+ * Initializes rollback processor with a valid local oplog.
+ * Whenever we encounter an operation in the local oplog that has to be rolled back,
+ * we will pass it to 'rollbackOperation'.
+ */
+ RollBackLocalOperations(const OplogInterface& localOplog,
+ const RollbackOperationFn& rollbackOperation);
- };
+ virtual ~RollBackLocalOperations() = default;
/**
- * Rolls back every operation in the local oplog that is not in the remote oplog, in reverse
- * order.
- *
- * Whenever we encounter an operation in the local oplog that has to be rolled back,
- * we will pass it to 'rollbackOperation' starting with the most recent operation.
- * It is up to 'rollbackOperation' to roll back this operation immediately or
- * process it for future use.
+ * Process single remote operation.
+ * Returns ErrorCodes::NoSuchKey if common point has not been found and
+ * additional operations have to be read from the remote oplog.
*/
- StatusWith<RollBackLocalOperations::RollbackCommonPoint> syncRollBackLocalOperations(
- const OplogInterface& localOplog,
- const OplogInterface& remoteOplog,
- const RollBackLocalOperations::RollbackOperationFn& rollbackOperation);
+ StatusWith<RollbackCommonPoint> onRemoteOperation(const BSONObj& operation);
+
+private:
+ std::unique_ptr<OplogInterface::Iterator> _localOplogIterator;
+ RollbackOperationFn _rollbackOperation;
+ OplogInterface::Iterator::Value _localOplogValue;
+ unsigned long long _scanned;
+};
+
+/**
+ * Rolls back every operation in the local oplog that is not in the remote oplog, in reverse
+ * order.
+ *
+ * Whenever we encounter an operation in the local oplog that has to be rolled back,
+ * we will pass it to 'rollbackOperation' starting with the most recent operation.
+ * It is up to 'rollbackOperation' to roll back this operation immediately or
+ * process it for future use.
+ */
+StatusWith<RollBackLocalOperations::RollbackCommonPoint> syncRollBackLocalOperations(
+ const OplogInterface& localOplog,
+ const OplogInterface& remoteOplog,
+ const RollBackLocalOperations::RollbackOperationFn& rollbackOperation);
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/roll_back_local_operations_test.cpp b/src/mongo/db/repl/roll_back_local_operations_test.cpp
index 9fae8ce648c..06af9890571 100644
--- a/src/mongo/db/repl/roll_back_local_operations_test.cpp
+++ b/src/mongo/db/repl/roll_back_local_operations_test.cpp
@@ -37,396 +37,369 @@
namespace {
- using namespace mongo;
- using namespace mongo::repl;
+using namespace mongo;
+using namespace mongo::repl;
- const OplogInterfaceMock::Operations kEmptyMockOperations;
+const OplogInterfaceMock::Operations kEmptyMockOperations;
- BSONObj makeOp(int seconds, long long hash) {
- return BSON("ts" << Timestamp(Seconds(seconds), 0) << "h" << hash);
- }
-
- int recordId = 0;
- OplogInterfaceMock::Operation makeOpAndRecordId(int seconds, long long hash) {
- return std::make_pair(makeOp(seconds, hash), RecordId(++recordId));
- }
-
- TEST(RollBackLocalOperationsTest, InvalidLocalOplogIterator) {
- class InvalidOplogInterface : public OplogInterface {
- public:
- std::string toString() const override { return ""; }
- std::unique_ptr<Iterator> makeIterator() const override {
- return std::unique_ptr<Iterator>();
- }
- } invalidOplog;
- ASSERT_THROWS_CODE(
- RollBackLocalOperations(invalidOplog, [](const BSONObj&) { return Status::OK(); }),
- UserException,
- ErrorCodes::BadValue);
- }
-
- TEST(RollBackLocalOperationsTest, InvalidRollbackOperationFunction) {
- ASSERT_THROWS_CODE(
- RollBackLocalOperations(OplogInterfaceMock({makeOpAndRecordId(1, 0)}),
- RollBackLocalOperations::RollbackOperationFn()),
- UserException,
- ErrorCodes::BadValue);
- }
-
- TEST(RollBackLocalOperationsTest, EmptyLocalOplog) {
- OplogInterfaceMock localOplog(kEmptyMockOperations);
- RollBackLocalOperations finder(localOplog, [](const BSONObj&) { return Status::OK(); });
- auto result = finder.onRemoteOperation(makeOp(1, 0));
- ASSERT_EQUALS(ErrorCodes::OplogStartMissing, result.getStatus().code());
- }
-
- TEST(RollBackLocalOperationsTest, RollbackPeriodTooLong) {
- OplogInterfaceMock localOplog({makeOpAndRecordId(1802, 0)});
- RollBackLocalOperations finder(localOplog, [](const BSONObj&) { return Status::OK(); });
- auto result = finder.onRemoteOperation(makeOp(1, 0));
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, result.getStatus().code());
- }
-
- TEST(RollBackLocalOperationsTest, RollbackMultipleLocalOperations) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- OplogInterfaceMock::Operations localOperations({
- makeOpAndRecordId(5, 1),
- makeOpAndRecordId(4, 1),
- makeOpAndRecordId(3, 1),
- makeOpAndRecordId(2, 1),
- commonOperation,
- });
- OplogInterfaceMock localOplog(localOperations);
- auto i = localOperations.cbegin();
- auto rollbackOperation = [&](const BSONObj& operation) {
- ASSERT_EQUALS(i->first, operation);
- i++;
- return Status::OK();
- };
- RollBackLocalOperations finder(localOplog, rollbackOperation);
- auto result = finder.onRemoteOperation(commonOperation.first);
- ASSERT_OK(result.getStatus());
- ASSERT_EQUALS(commonOperation.first["ts"].timestamp(), result.getValue().first);
- ASSERT_EQUALS(commonOperation.second, result.getValue().second);
- ASSERT_FALSE(i == localOperations.cend());
- ASSERT_EQUALS(commonOperation.first, i->first);
- i++;
- ASSERT_TRUE(i == localOperations.cend());
- }
-
- TEST(RollBackLocalOperationsTest, RollbackOperationFailed) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- OplogInterfaceMock::Operations localOperations({
- makeOpAndRecordId(2, 1),
- commonOperation,
- });
- OplogInterfaceMock localOplog(localOperations);
- auto rollbackOperation = [&](const BSONObj& operation) {
- return Status(ErrorCodes::OperationFailed, "");
- };
- RollBackLocalOperations finder(localOplog, rollbackOperation);
- auto result = finder.onRemoteOperation(commonOperation.first);
- ASSERT_EQUALS(ErrorCodes::OperationFailed, result.getStatus().code());
- }
+BSONObj makeOp(int seconds, long long hash) {
+ return BSON("ts" << Timestamp(Seconds(seconds), 0) << "h" << hash);
+}
- TEST(RollBackLocalOperationsTest, EndOfLocalOplog) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- OplogInterfaceMock::Operations localOperations({
- makeOpAndRecordId(2, 1),
- });
- OplogInterfaceMock localOplog(localOperations);
- RollBackLocalOperations finder(localOplog, [](const BSONObj&) { return Status::OK(); });
- auto result = finder.onRemoteOperation(commonOperation.first);
- ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, result.getStatus().code());
- }
+int recordId = 0;
+OplogInterfaceMock::Operation makeOpAndRecordId(int seconds, long long hash) {
+ return std::make_pair(makeOp(seconds, hash), RecordId(++recordId));
+}
- TEST(RollBackLocalOperationsTest, SkipRemoteOperations) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- OplogInterfaceMock::Operations localOperations({
- makeOpAndRecordId(5, 1),
- makeOpAndRecordId(4, 1),
- makeOpAndRecordId(2, 1),
- commonOperation,
- });
- OplogInterfaceMock localOplog(localOperations);
- auto i = localOperations.cbegin();
- auto rollbackOperation = [&](const BSONObj& operation) {
- ASSERT_EQUALS(i->first, operation);
- i++;
- return Status::OK();
- };
- RollBackLocalOperations finder(localOplog, rollbackOperation);
- {
- auto result = finder.onRemoteOperation(makeOp(6,1));
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
- ASSERT_TRUE(i == localOperations.cbegin());
+TEST(RollBackLocalOperationsTest, InvalidLocalOplogIterator) {
+ class InvalidOplogInterface : public OplogInterface {
+ public:
+ std::string toString() const override {
+ return "";
}
- {
- auto result = finder.onRemoteOperation(makeOp(3,1));
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
- ASSERT_TRUE(std::distance(localOperations.cbegin(), i) == 2);
+ std::unique_ptr<Iterator> makeIterator() const override {
+ return std::unique_ptr<Iterator>();
}
- auto result = finder.onRemoteOperation(commonOperation.first);
- ASSERT_OK(result.getStatus());
- ASSERT_EQUALS(commonOperation.first["ts"].timestamp(), result.getValue().first);
- ASSERT_EQUALS(commonOperation.second, result.getValue().second);
- ASSERT_FALSE(i == localOperations.cend());
- ASSERT_EQUALS(commonOperation.first, i->first);
+ } invalidOplog;
+ ASSERT_THROWS_CODE(
+ RollBackLocalOperations(invalidOplog, [](const BSONObj&) { return Status::OK(); }),
+ UserException,
+ ErrorCodes::BadValue);
+}
+
+TEST(RollBackLocalOperationsTest, InvalidRollbackOperationFunction) {
+ ASSERT_THROWS_CODE(RollBackLocalOperations(OplogInterfaceMock({makeOpAndRecordId(1, 0)}),
+ RollBackLocalOperations::RollbackOperationFn()),
+ UserException,
+ ErrorCodes::BadValue);
+}
+
+TEST(RollBackLocalOperationsTest, EmptyLocalOplog) {
+ OplogInterfaceMock localOplog(kEmptyMockOperations);
+ RollBackLocalOperations finder(localOplog, [](const BSONObj&) { return Status::OK(); });
+ auto result = finder.onRemoteOperation(makeOp(1, 0));
+ ASSERT_EQUALS(ErrorCodes::OplogStartMissing, result.getStatus().code());
+}
+
+TEST(RollBackLocalOperationsTest, RollbackPeriodTooLong) {
+ OplogInterfaceMock localOplog({makeOpAndRecordId(1802, 0)});
+ RollBackLocalOperations finder(localOplog, [](const BSONObj&) { return Status::OK(); });
+ auto result = finder.onRemoteOperation(makeOp(1, 0));
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, result.getStatus().code());
+}
+
+TEST(RollBackLocalOperationsTest, RollbackMultipleLocalOperations) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ OplogInterfaceMock::Operations localOperations({
+ makeOpAndRecordId(5, 1),
+ makeOpAndRecordId(4, 1),
+ makeOpAndRecordId(3, 1),
+ makeOpAndRecordId(2, 1),
+ commonOperation,
+ });
+ OplogInterfaceMock localOplog(localOperations);
+ auto i = localOperations.cbegin();
+ auto rollbackOperation = [&](const BSONObj& operation) {
+ ASSERT_EQUALS(i->first, operation);
i++;
- ASSERT_TRUE(i == localOperations.cend());
- }
-
- TEST(RollBackLocalOperationsTest, SameTimestampDifferentHashess) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- OplogInterfaceMock::Operations localOperations({
- makeOpAndRecordId(1, 5),
- makeOpAndRecordId(1, 3),
- commonOperation,
- });
- OplogInterfaceMock localOplog(localOperations);
- auto i = localOperations.cbegin();
- auto rollbackOperation = [&](const BSONObj& operation) {
- ASSERT_EQUALS(i->first, operation);
- i++;
- return Status::OK();
- };
- RollBackLocalOperations finder(localOplog, rollbackOperation);
- {
- auto result = finder.onRemoteOperation(makeOp(1,4));
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
- ASSERT_TRUE(std::distance(localOperations.cbegin(), i) == 1);
- }
- {
- auto result = finder.onRemoteOperation(makeOp(1,2));
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
- ASSERT_TRUE(std::distance(localOperations.cbegin(), i) == 2);
- }
- auto result = finder.onRemoteOperation(commonOperation.first);
- ASSERT_OK(result.getStatus());
- ASSERT_EQUALS(commonOperation.first["ts"].timestamp(), result.getValue().first);
- ASSERT_EQUALS(commonOperation.second, result.getValue().second);
- ASSERT_FALSE(i == localOperations.cend());
- ASSERT_EQUALS(commonOperation.first, i->first);
+ return Status::OK();
+ };
+ RollBackLocalOperations finder(localOplog, rollbackOperation);
+ auto result = finder.onRemoteOperation(commonOperation.first);
+ ASSERT_OK(result.getStatus());
+ ASSERT_EQUALS(commonOperation.first["ts"].timestamp(), result.getValue().first);
+ ASSERT_EQUALS(commonOperation.second, result.getValue().second);
+ ASSERT_FALSE(i == localOperations.cend());
+ ASSERT_EQUALS(commonOperation.first, i->first);
+ i++;
+ ASSERT_TRUE(i == localOperations.cend());
+}
+
+TEST(RollBackLocalOperationsTest, RollbackOperationFailed) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ OplogInterfaceMock::Operations localOperations({
+ makeOpAndRecordId(2, 1), commonOperation,
+ });
+ OplogInterfaceMock localOplog(localOperations);
+ auto rollbackOperation =
+ [&](const BSONObj& operation) { return Status(ErrorCodes::OperationFailed, ""); };
+ RollBackLocalOperations finder(localOplog, rollbackOperation);
+ auto result = finder.onRemoteOperation(commonOperation.first);
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, result.getStatus().code());
+}
+
+TEST(RollBackLocalOperationsTest, EndOfLocalOplog) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ OplogInterfaceMock::Operations localOperations({
+ makeOpAndRecordId(2, 1),
+ });
+ OplogInterfaceMock localOplog(localOperations);
+ RollBackLocalOperations finder(localOplog, [](const BSONObj&) { return Status::OK(); });
+ auto result = finder.onRemoteOperation(commonOperation.first);
+ ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, result.getStatus().code());
+}
+
+TEST(RollBackLocalOperationsTest, SkipRemoteOperations) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ OplogInterfaceMock::Operations localOperations({
+ makeOpAndRecordId(5, 1), makeOpAndRecordId(4, 1), makeOpAndRecordId(2, 1), commonOperation,
+ });
+ OplogInterfaceMock localOplog(localOperations);
+ auto i = localOperations.cbegin();
+ auto rollbackOperation = [&](const BSONObj& operation) {
+ ASSERT_EQUALS(i->first, operation);
i++;
- ASSERT_TRUE(i == localOperations.cend());
+ return Status::OK();
+ };
+ RollBackLocalOperations finder(localOplog, rollbackOperation);
+ {
+ auto result = finder.onRemoteOperation(makeOp(6, 1));
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
+ ASSERT_TRUE(i == localOperations.cbegin());
}
-
- TEST(RollBackLocalOperationsTest, SameTimestampDifferentHashesRollbackOperationFailed) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- OplogInterfaceMock::Operations localOperations({
- makeOpAndRecordId(1, 3),
- commonOperation,
- });
- OplogInterfaceMock localOplog(localOperations);
- auto rollbackOperation = [&](const BSONObj& operation) {
- return Status(ErrorCodes::OperationFailed, "");
- };
- RollBackLocalOperations finder(localOplog, rollbackOperation);
- auto result = finder.onRemoteOperation(makeOp(1,2));
- ASSERT_EQUALS(ErrorCodes::OperationFailed, result.getStatus().code());
- }
-
- TEST(RollBackLocalOperationsTest, SameTimestampDifferentHashesEndOfLocalOplog) {
- OplogInterfaceMock::Operations localOperations({
- makeOpAndRecordId(1, 3),
- });
- OplogInterfaceMock localOplog(localOperations);
- RollBackLocalOperations finder(localOplog, [](const BSONObj&) { return Status::OK(); });
- auto result = finder.onRemoteOperation(makeOp(1,2));
- ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, result.getStatus().code());
+ {
+ auto result = finder.onRemoteOperation(makeOp(3, 1));
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
+ ASSERT_TRUE(std::distance(localOperations.cbegin(), i) == 2);
}
-
- TEST(SyncRollBackLocalOperationsTest, OplogStartMissing) {
- ASSERT_EQUALS(
- ErrorCodes::OplogStartMissing,
- syncRollBackLocalOperations(
- OplogInterfaceMock(kEmptyMockOperations),
- OplogInterfaceMock({makeOpAndRecordId(1, 0)}),
- [](const BSONObj&) { return Status::OK(); }).getStatus().code());
- }
-
- TEST(SyncRollBackLocalOperationsTest, RemoteOplogMissing) {
- ASSERT_EQUALS(
- ErrorCodes::InvalidSyncSource,
- syncRollBackLocalOperations(
- OplogInterfaceMock({makeOpAndRecordId(1, 0)}),
- OplogInterfaceMock(kEmptyMockOperations),
- [](const BSONObj&) { return Status::OK(); }).getStatus().code());
- }
-
- TEST(SyncRollBackLocalOperationsTest, RollbackPeriodTooLong) {
- ASSERT_EQUALS(
- ErrorCodes::ExceededTimeLimit,
- syncRollBackLocalOperations(
- OplogInterfaceMock({makeOpAndRecordId(1802, 0)}),
- OplogInterfaceMock({makeOpAndRecordId(1, 0)}),
- [](const BSONObj&) { return Status::OK(); }).getStatus().code());
- }
-
- TEST(SyncRollBackLocalOperationsTest, RollbackTwoOperations) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- OplogInterfaceMock::Operations localOperations({
- makeOpAndRecordId(3, 1),
- makeOpAndRecordId(2, 1),
- commonOperation,
- });
- auto i = localOperations.cbegin();
- auto result =
- syncRollBackLocalOperations(
- OplogInterfaceMock(localOperations),
- OplogInterfaceMock({commonOperation}),
- [&](const BSONObj& operation) {
- ASSERT_EQUALS(i->first, operation);
- i++;
- return Status::OK();
- });
- ASSERT_OK(result.getStatus());
- ASSERT_EQUALS(commonOperation.first["ts"].timestamp(), result.getValue().first);
- ASSERT_EQUALS(commonOperation.second, result.getValue().second);
- ASSERT_FALSE(i == localOperations.cend());
- ASSERT_EQUALS(commonOperation.first, i->first);
+ auto result = finder.onRemoteOperation(commonOperation.first);
+ ASSERT_OK(result.getStatus());
+ ASSERT_EQUALS(commonOperation.first["ts"].timestamp(), result.getValue().first);
+ ASSERT_EQUALS(commonOperation.second, result.getValue().second);
+ ASSERT_FALSE(i == localOperations.cend());
+ ASSERT_EQUALS(commonOperation.first, i->first);
+ i++;
+ ASSERT_TRUE(i == localOperations.cend());
+}
+
+TEST(RollBackLocalOperationsTest, SameTimestampDifferentHashess) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ OplogInterfaceMock::Operations localOperations({
+ makeOpAndRecordId(1, 5), makeOpAndRecordId(1, 3), commonOperation,
+ });
+ OplogInterfaceMock localOplog(localOperations);
+ auto i = localOperations.cbegin();
+ auto rollbackOperation = [&](const BSONObj& operation) {
+ ASSERT_EQUALS(i->first, operation);
i++;
- ASSERT_TRUE(i == localOperations.cend());
- }
-
- TEST(SyncRollBackLocalOperationsTest, SkipOneRemoteOperation) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- auto remoteOperation = makeOpAndRecordId(2, 1);
- auto result =
- syncRollBackLocalOperations(
- OplogInterfaceMock({commonOperation}),
- OplogInterfaceMock({remoteOperation, commonOperation}),
- [&](const BSONObj& operation) {
- FAIL("should not reach here");
- return Status::OK();
- });
- ASSERT_OK(result.getStatus());
- ASSERT_EQUALS(commonOperation.first["ts"].timestamp(), result.getValue().first);
- ASSERT_EQUALS(commonOperation.second, result.getValue().second);
+ return Status::OK();
+ };
+ RollBackLocalOperations finder(localOplog, rollbackOperation);
+ {
+ auto result = finder.onRemoteOperation(makeOp(1, 4));
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
+ ASSERT_TRUE(std::distance(localOperations.cbegin(), i) == 1);
}
-
- TEST(SyncRollBackLocalOperationsTest, SameTimestampDifferentHashes) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- auto localOperation = makeOpAndRecordId(1, 2);
- auto remoteOperation = makeOpAndRecordId(1, 3);
- bool called = false;
- auto result =
- syncRollBackLocalOperations(
- OplogInterfaceMock({localOperation, commonOperation}),
- OplogInterfaceMock({remoteOperation, commonOperation}),
- [&](const BSONObj& operation) {
- ASSERT_EQUALS(localOperation.first, operation);
- called = true;
- return Status::OK();
- });
- ASSERT_OK(result.getStatus());
- ASSERT_EQUALS(commonOperation.first["ts"].timestamp(), result.getValue().first);
- ASSERT_EQUALS(commonOperation.second, result.getValue().second);
- ASSERT_TRUE(called);
- }
-
- TEST(SyncRollBackLocalOperationsTest, SameTimestampEndOfLocalOplog) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- auto localOperation = makeOpAndRecordId(1, 2);
- auto remoteOperation = makeOpAndRecordId(1, 3);
- bool called = false;
- auto result =
- syncRollBackLocalOperations(
- OplogInterfaceMock({localOperation}),
- OplogInterfaceMock({remoteOperation, commonOperation}),
- [&](const BSONObj& operation) {
- ASSERT_EQUALS(localOperation.first, operation);
- called = true;
- return Status::OK();
- });
- ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, result.getStatus().code());
- ASSERT_STRING_CONTAINS(result.getStatus().reason(),
- "RS101 reached beginning of local oplog [1]");
- ASSERT_TRUE(called);
+ {
+ auto result = finder.onRemoteOperation(makeOp(1, 2));
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.getStatus().code());
+ ASSERT_TRUE(std::distance(localOperations.cbegin(), i) == 2);
}
-
- TEST(SyncRollBackLocalOperationsTest, SameTimestampRollbackOperationFailed) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- auto localOperation = makeOpAndRecordId(1, 2);
- auto remoteOperation = makeOpAndRecordId(1, 3);
- auto result =
- syncRollBackLocalOperations(
- OplogInterfaceMock({localOperation, commonOperation}),
- OplogInterfaceMock({remoteOperation, commonOperation}),
- [&](const BSONObj& operation) {
- return Status(ErrorCodes::OperationFailed, "");
- });
- ASSERT_EQUALS(ErrorCodes::OperationFailed, result.getStatus().code());
- }
-
- TEST(SyncRollBackLocalOperationsTest, SameTimestampEndOfRemoteOplog) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- auto localOperation = makeOpAndRecordId(1, 2);
- auto remoteOperation = makeOpAndRecordId(1, 3);
- bool called = false;
- auto result =
- syncRollBackLocalOperations(
- OplogInterfaceMock({localOperation, commonOperation}),
- OplogInterfaceMock({remoteOperation}),
- [&](const BSONObj& operation) {
- ASSERT_EQUALS(localOperation.first, operation);
- called = true;
- return Status::OK();
- });
- ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, result.getStatus().code());
- ASSERT_STRING_CONTAINS(result.getStatus().reason(),
- "RS100 reached beginning of remote oplog");
- ASSERT_TRUE(called);
- }
-
- TEST(SyncRollBackLocalOperationsTest, DifferentTimestampEndOfLocalOplog) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- auto localOperation = makeOpAndRecordId(3, 1);
- auto remoteOperation = makeOpAndRecordId(2, 1);
- bool called = false;
- auto result =
- syncRollBackLocalOperations(
- OplogInterfaceMock({localOperation}),
- OplogInterfaceMock({remoteOperation, commonOperation}),
- [&](const BSONObj& operation) {
- ASSERT_EQUALS(localOperation.first, operation);
- called = true;
- return Status::OK();
- });
- ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, result.getStatus().code());
- ASSERT_STRING_CONTAINS(result.getStatus().reason(),
- "RS101 reached beginning of local oplog [2]");
- ASSERT_TRUE(called);
- }
-
- TEST(SyncRollBackLocalOperationsTest, DifferentTimestampRollbackOperationFailed) {
- auto localOperation = makeOpAndRecordId(3, 1);
- auto remoteOperation = makeOpAndRecordId(2, 1);
- auto result =
- syncRollBackLocalOperations(
- OplogInterfaceMock({localOperation}),
- OplogInterfaceMock({remoteOperation}),
- [&](const BSONObj& operation) {
- return Status(ErrorCodes::OperationFailed, "");
- });
- ASSERT_EQUALS(ErrorCodes::OperationFailed, result.getStatus().code());
- }
-
- TEST(SyncRollBackLocalOperationsTest, DifferentTimestampEndOfRemoteOplog) {
- auto commonOperation = makeOpAndRecordId(1, 1);
- auto localOperation = makeOpAndRecordId(2, 1);
- auto remoteOperation = makeOpAndRecordId(3, 1);
- auto result =
- syncRollBackLocalOperations(
- OplogInterfaceMock({localOperation, commonOperation}),
- OplogInterfaceMock({remoteOperation}),
- [&](const BSONObj& operation) {
- FAIL("Should not reach here");
- return Status::OK();
- });
- ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, result.getStatus().code());
- ASSERT_STRING_CONTAINS(result.getStatus().reason(),
- "RS100 reached beginning of remote oplog [1]");
- }
-
-} // namespace
+ auto result = finder.onRemoteOperation(commonOperation.first);
+ ASSERT_OK(result.getStatus());
+ ASSERT_EQUALS(commonOperation.first["ts"].timestamp(), result.getValue().first);
+ ASSERT_EQUALS(commonOperation.second, result.getValue().second);
+ ASSERT_FALSE(i == localOperations.cend());
+ ASSERT_EQUALS(commonOperation.first, i->first);
+ i++;
+ ASSERT_TRUE(i == localOperations.cend());
+}
+
+TEST(RollBackLocalOperationsTest, SameTimestampDifferentHashesRollbackOperationFailed) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ OplogInterfaceMock::Operations localOperations({
+ makeOpAndRecordId(1, 3), commonOperation,
+ });
+ OplogInterfaceMock localOplog(localOperations);
+ auto rollbackOperation =
+ [&](const BSONObj& operation) { return Status(ErrorCodes::OperationFailed, ""); };
+ RollBackLocalOperations finder(localOplog, rollbackOperation);
+ auto result = finder.onRemoteOperation(makeOp(1, 2));
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, result.getStatus().code());
+}
+
+TEST(RollBackLocalOperationsTest, SameTimestampDifferentHashesEndOfLocalOplog) {
+ OplogInterfaceMock::Operations localOperations({
+ makeOpAndRecordId(1, 3),
+ });
+ OplogInterfaceMock localOplog(localOperations);
+ RollBackLocalOperations finder(localOplog, [](const BSONObj&) { return Status::OK(); });
+ auto result = finder.onRemoteOperation(makeOp(1, 2));
+ ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, result.getStatus().code());
+}
+
+TEST(SyncRollBackLocalOperationsTest, OplogStartMissing) {
+ ASSERT_EQUALS(ErrorCodes::OplogStartMissing,
+ syncRollBackLocalOperations(OplogInterfaceMock(kEmptyMockOperations),
+ OplogInterfaceMock({makeOpAndRecordId(1, 0)}),
+ [](const BSONObj&) { return Status::OK(); })
+ .getStatus()
+ .code());
+}
+
+TEST(SyncRollBackLocalOperationsTest, RemoteOplogMissing) {
+ ASSERT_EQUALS(ErrorCodes::InvalidSyncSource,
+ syncRollBackLocalOperations(OplogInterfaceMock({makeOpAndRecordId(1, 0)}),
+ OplogInterfaceMock(kEmptyMockOperations),
+ [](const BSONObj&) { return Status::OK(); })
+ .getStatus()
+ .code());
+}
+
+TEST(SyncRollBackLocalOperationsTest, RollbackPeriodTooLong) {
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
+ syncRollBackLocalOperations(OplogInterfaceMock({makeOpAndRecordId(1802, 0)}),
+ OplogInterfaceMock({makeOpAndRecordId(1, 0)}),
+ [](const BSONObj&) { return Status::OK(); })
+ .getStatus()
+ .code());
+}
+
+TEST(SyncRollBackLocalOperationsTest, RollbackTwoOperations) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ OplogInterfaceMock::Operations localOperations({
+ makeOpAndRecordId(3, 1), makeOpAndRecordId(2, 1), commonOperation,
+ });
+ auto i = localOperations.cbegin();
+ auto result = syncRollBackLocalOperations(OplogInterfaceMock(localOperations),
+ OplogInterfaceMock({commonOperation}),
+ [&](const BSONObj& operation) {
+ ASSERT_EQUALS(i->first, operation);
+ i++;
+ return Status::OK();
+ });
+ ASSERT_OK(result.getStatus());
+ ASSERT_EQUALS(commonOperation.first["ts"].timestamp(), result.getValue().first);
+ ASSERT_EQUALS(commonOperation.second, result.getValue().second);
+ ASSERT_FALSE(i == localOperations.cend());
+ ASSERT_EQUALS(commonOperation.first, i->first);
+ i++;
+ ASSERT_TRUE(i == localOperations.cend());
+}
+
+TEST(SyncRollBackLocalOperationsTest, SkipOneRemoteOperation) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ auto remoteOperation = makeOpAndRecordId(2, 1);
+ auto result =
+ syncRollBackLocalOperations(OplogInterfaceMock({commonOperation}),
+ OplogInterfaceMock({remoteOperation, commonOperation}),
+ [&](const BSONObj& operation) {
+ FAIL("should not reach here");
+ return Status::OK();
+ });
+ ASSERT_OK(result.getStatus());
+ ASSERT_EQUALS(commonOperation.first["ts"].timestamp(), result.getValue().first);
+ ASSERT_EQUALS(commonOperation.second, result.getValue().second);
+}
+
+TEST(SyncRollBackLocalOperationsTest, SameTimestampDifferentHashes) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ auto localOperation = makeOpAndRecordId(1, 2);
+ auto remoteOperation = makeOpAndRecordId(1, 3);
+ bool called = false;
+ auto result =
+ syncRollBackLocalOperations(OplogInterfaceMock({localOperation, commonOperation}),
+ OplogInterfaceMock({remoteOperation, commonOperation}),
+ [&](const BSONObj& operation) {
+ ASSERT_EQUALS(localOperation.first, operation);
+ called = true;
+ return Status::OK();
+ });
+ ASSERT_OK(result.getStatus());
+ ASSERT_EQUALS(commonOperation.first["ts"].timestamp(), result.getValue().first);
+ ASSERT_EQUALS(commonOperation.second, result.getValue().second);
+ ASSERT_TRUE(called);
+}
+
+TEST(SyncRollBackLocalOperationsTest, SameTimestampEndOfLocalOplog) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ auto localOperation = makeOpAndRecordId(1, 2);
+ auto remoteOperation = makeOpAndRecordId(1, 3);
+ bool called = false;
+ auto result =
+ syncRollBackLocalOperations(OplogInterfaceMock({localOperation}),
+ OplogInterfaceMock({remoteOperation, commonOperation}),
+ [&](const BSONObj& operation) {
+ ASSERT_EQUALS(localOperation.first, operation);
+ called = true;
+ return Status::OK();
+ });
+ ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, result.getStatus().code());
+ ASSERT_STRING_CONTAINS(result.getStatus().reason(),
+ "RS101 reached beginning of local oplog [1]");
+ ASSERT_TRUE(called);
+}
+
+TEST(SyncRollBackLocalOperationsTest, SameTimestampRollbackOperationFailed) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ auto localOperation = makeOpAndRecordId(1, 2);
+ auto remoteOperation = makeOpAndRecordId(1, 3);
+ auto result = syncRollBackLocalOperations(
+ OplogInterfaceMock({localOperation, commonOperation}),
+ OplogInterfaceMock({remoteOperation, commonOperation}),
+ [&](const BSONObj& operation) { return Status(ErrorCodes::OperationFailed, ""); });
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, result.getStatus().code());
+}
+
+TEST(SyncRollBackLocalOperationsTest, SameTimestampEndOfRemoteOplog) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ auto localOperation = makeOpAndRecordId(1, 2);
+ auto remoteOperation = makeOpAndRecordId(1, 3);
+ bool called = false;
+ auto result = syncRollBackLocalOperations(OplogInterfaceMock({localOperation, commonOperation}),
+ OplogInterfaceMock({remoteOperation}),
+ [&](const BSONObj& operation) {
+ ASSERT_EQUALS(localOperation.first, operation);
+ called = true;
+ return Status::OK();
+ });
+ ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, result.getStatus().code());
+ ASSERT_STRING_CONTAINS(result.getStatus().reason(), "RS100 reached beginning of remote oplog");
+ ASSERT_TRUE(called);
+}
+
+TEST(SyncRollBackLocalOperationsTest, DifferentTimestampEndOfLocalOplog) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ auto localOperation = makeOpAndRecordId(3, 1);
+ auto remoteOperation = makeOpAndRecordId(2, 1);
+ bool called = false;
+ auto result =
+ syncRollBackLocalOperations(OplogInterfaceMock({localOperation}),
+ OplogInterfaceMock({remoteOperation, commonOperation}),
+ [&](const BSONObj& operation) {
+ ASSERT_EQUALS(localOperation.first, operation);
+ called = true;
+ return Status::OK();
+ });
+ ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, result.getStatus().code());
+ ASSERT_STRING_CONTAINS(result.getStatus().reason(),
+ "RS101 reached beginning of local oplog [2]");
+ ASSERT_TRUE(called);
+}
+
+TEST(SyncRollBackLocalOperationsTest, DifferentTimestampRollbackOperationFailed) {
+ auto localOperation = makeOpAndRecordId(3, 1);
+ auto remoteOperation = makeOpAndRecordId(2, 1);
+ auto result = syncRollBackLocalOperations(
+ OplogInterfaceMock({localOperation}),
+ OplogInterfaceMock({remoteOperation}),
+ [&](const BSONObj& operation) { return Status(ErrorCodes::OperationFailed, ""); });
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, result.getStatus().code());
+}
+
+TEST(SyncRollBackLocalOperationsTest, DifferentTimestampEndOfRemoteOplog) {
+ auto commonOperation = makeOpAndRecordId(1, 1);
+ auto localOperation = makeOpAndRecordId(2, 1);
+ auto remoteOperation = makeOpAndRecordId(3, 1);
+ auto result = syncRollBackLocalOperations(OplogInterfaceMock({localOperation, commonOperation}),
+ OplogInterfaceMock({remoteOperation}),
+ [&](const BSONObj& operation) {
+ FAIL("Should not reach here");
+ return Status::OK();
+ });
+ ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, result.getStatus().code());
+ ASSERT_STRING_CONTAINS(result.getStatus().reason(),
+ "RS100 reached beginning of remote oplog [1]");
+}
+
+} // namespace
diff --git a/src/mongo/db/repl/rollback_source.h b/src/mongo/db/repl/rollback_source.h
index 304a81717dc..3e8d6f55578 100644
--- a/src/mongo/db/repl/rollback_source.h
+++ b/src/mongo/db/repl/rollback_source.h
@@ -34,57 +34,56 @@
namespace mongo {
- class NamespaceString;
- class OperationContext;
+class NamespaceString;
+class OperationContext;
namespace repl {
- class OplogInterface;
+class OplogInterface;
+
+/**
+ * Interface for rollback-related operations on the sync source.
+ */
+class RollbackSource {
+ MONGO_DISALLOW_COPYING(RollbackSource);
+
+public:
+ RollbackSource() = default;
+
+ virtual ~RollbackSource() = default;
+
+ /**
+ * Returns remote oplog interface.
+ * Read oplog entries with OplogInterface::makeIterator().
+ */
+ virtual const OplogInterface& getOplog() const = 0;
+
+ /**
+ * Returns rollback ID.
+ */
+ virtual int getRollbackId() const = 0;
/**
- * Interface for rollback-related operations on the sync source.
+ * Returns last operation in oplog.
*/
- class RollbackSource {
- MONGO_DISALLOW_COPYING(RollbackSource);
- public:
-
- RollbackSource() = default;
-
- virtual ~RollbackSource() = default;
-
- /**
- * Returns remote oplog interface.
- * Read oplog entries with OplogInterface::makeIterator().
- */
- virtual const OplogInterface& getOplog() const = 0;
-
- /**
- * Returns rollback ID.
- */
- virtual int getRollbackId() const = 0;
-
- /**
- * Returns last operation in oplog.
- */
- virtual BSONObj getLastOperation() const = 0;
-
- /**
- * Fetch a single document from the sync source.
- */
- virtual BSONObj findOne(const NamespaceString& nss, const BSONObj& filter) const = 0;
-
- /**
- * Clones a single collection from the sync source.
- */
- virtual void copyCollectionFromRemote(OperationContext* txn,
- const NamespaceString& nss) const = 0;
-
- /**
- * Returns collection info.
- */
- virtual StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const = 0;
-
- };
-
-} // namespace repl
-} // namespace mongo
+ virtual BSONObj getLastOperation() const = 0;
+
+ /**
+ * Fetch a single document from the sync source.
+ */
+ virtual BSONObj findOne(const NamespaceString& nss, const BSONObj& filter) const = 0;
+
+ /**
+ * Clones a single collection from the sync source.
+ */
+ virtual void copyCollectionFromRemote(OperationContext* txn,
+ const NamespaceString& nss) const = 0;
+
+ /**
+ * Returns collection info.
+ */
+ virtual StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const = 0;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rollback_source_impl.cpp b/src/mongo/db/repl/rollback_source_impl.cpp
index bdda5e331d9..7dbfd2e9cd7 100644
--- a/src/mongo/db/repl/rollback_source_impl.cpp
+++ b/src/mongo/db/repl/rollback_source_impl.cpp
@@ -41,58 +41,56 @@
namespace mongo {
namespace repl {
- RollbackSourceImpl::RollbackSourceImpl(DBClientConnection* conn,
- const std::string& collectionName)
- : _conn(conn),
- _collectionName(collectionName),
- _oplog(conn, collectionName) { }
+RollbackSourceImpl::RollbackSourceImpl(DBClientConnection* conn, const std::string& collectionName)
+ : _conn(conn), _collectionName(collectionName), _oplog(conn, collectionName) {}
- const OplogInterface& RollbackSourceImpl::getOplog() const {
- return _oplog;
- }
+const OplogInterface& RollbackSourceImpl::getOplog() const {
+ return _oplog;
+}
- int RollbackSourceImpl::getRollbackId() const {
- bo info;
- _conn->simpleCommand("admin", &info, "replSetGetRBID");
- return info["rbid"].numberInt();
- }
+int RollbackSourceImpl::getRollbackId() const {
+ bo info;
+ _conn->simpleCommand("admin", &info, "replSetGetRBID");
+ return info["rbid"].numberInt();
+}
- BSONObj RollbackSourceImpl::getLastOperation() const {
- const Query query = Query().sort(BSON("$natural" << -1));
- return _conn->findOne(_collectionName, query, 0, QueryOption_SlaveOk);
- }
+BSONObj RollbackSourceImpl::getLastOperation() const {
+ const Query query = Query().sort(BSON("$natural" << -1));
+ return _conn->findOne(_collectionName, query, 0, QueryOption_SlaveOk);
+}
- BSONObj RollbackSourceImpl::findOne(const NamespaceString& nss, const BSONObj& filter) const {
- return _conn->findOne(nss.toString(), filter, NULL, QueryOption_SlaveOk).getOwned();
- }
+BSONObj RollbackSourceImpl::findOne(const NamespaceString& nss, const BSONObj& filter) const {
+ return _conn->findOne(nss.toString(), filter, NULL, QueryOption_SlaveOk).getOwned();
+}
- void RollbackSourceImpl::copyCollectionFromRemote(OperationContext* txn,
- const NamespaceString& nss) const {
- std::string errmsg;
- std::unique_ptr<DBClientConnection> tmpConn(new DBClientConnection());
- uassert(15908,
- errmsg,
- tmpConn->connect(_conn->getServerHostAndPort(), errmsg) &&
+void RollbackSourceImpl::copyCollectionFromRemote(OperationContext* txn,
+ const NamespaceString& nss) const {
+ std::string errmsg;
+ std::unique_ptr<DBClientConnection> tmpConn(new DBClientConnection());
+ uassert(15908,
+ errmsg,
+ tmpConn->connect(_conn->getServerHostAndPort(), errmsg) &&
replAuthenticate(tmpConn.get()));
- // cloner owns _conn in unique_ptr
- Cloner cloner;
- cloner.setConnection(tmpConn.release());
- uassert(15909, str::stream() <<
- "replSet rollback error resyncing collection " << nss.ns() << ' ' << errmsg,
- cloner.copyCollection(txn, nss.ns(), BSONObj(), errmsg, true, false, true));
- }
+ // cloner owns _conn in unique_ptr
+ Cloner cloner;
+ cloner.setConnection(tmpConn.release());
+ uassert(15909,
+ str::stream() << "replSet rollback error resyncing collection " << nss.ns() << ' '
+ << errmsg,
+ cloner.copyCollection(txn, nss.ns(), BSONObj(), errmsg, true, false, true));
+}
- StatusWith<BSONObj> RollbackSourceImpl::getCollectionInfo(const NamespaceString& nss) const {
- std::list<BSONObj> info =
- _conn->getCollectionInfos(nss.db().toString(), BSON("name" << nss.coll()));
- if (info.empty()) {
- return StatusWith<BSONObj>(ErrorCodes::NoSuchKey, str::stream() <<
- "no collection info found: " << nss.ns());
- }
- invariant(info.size() == 1U);
- return info.front();
+StatusWith<BSONObj> RollbackSourceImpl::getCollectionInfo(const NamespaceString& nss) const {
+ std::list<BSONObj> info =
+ _conn->getCollectionInfos(nss.db().toString(), BSON("name" << nss.coll()));
+ if (info.empty()) {
+ return StatusWith<BSONObj>(ErrorCodes::NoSuchKey,
+ str::stream() << "no collection info found: " << nss.ns());
}
+ invariant(info.size() == 1U);
+ return info.front();
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rollback_source_impl.h b/src/mongo/db/repl/rollback_source_impl.h
index 3be6ef4339b..8adad2fd0ac 100644
--- a/src/mongo/db/repl/rollback_source_impl.h
+++ b/src/mongo/db/repl/rollback_source_impl.h
@@ -35,40 +35,36 @@
namespace mongo {
- class DBClientConnection;
+class DBClientConnection;
namespace repl {
- /**
- * Rollback source implementation using a connection.
- */
-
- class RollbackSourceImpl : public RollbackSource {
- public:
-
- explicit RollbackSourceImpl(DBClientConnection* conn, const std::string& collectionName);
-
- const OplogInterface& getOplog() const override;
+/**
+ * Rollback source implementation using a connection.
+ */
- int getRollbackId() const override;
+class RollbackSourceImpl : public RollbackSource {
+public:
+ explicit RollbackSourceImpl(DBClientConnection* conn, const std::string& collectionName);
- BSONObj getLastOperation() const override;
+ const OplogInterface& getOplog() const override;
- BSONObj findOne(const NamespaceString& nss, const BSONObj& filter) const override;
+ int getRollbackId() const override;
- void copyCollectionFromRemote(OperationContext* txn,
- const NamespaceString& nss) const override;
+ BSONObj getLastOperation() const override;
- StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const override;
+ BSONObj findOne(const NamespaceString& nss, const BSONObj& filter) const override;
- private:
+ void copyCollectionFromRemote(OperationContext* txn, const NamespaceString& nss) const override;
- DBClientConnection* _conn;
- std::string _collectionName;
- OplogInterfaceRemote _oplog;
+ StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const override;
- };
+private:
+ DBClientConnection* _conn;
+ std::string _collectionName;
+ OplogInterfaceRemote _oplog;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rs_initialsync.cpp b/src/mongo/db/repl/rs_initialsync.cpp
index fc1a10de72d..af233900267 100644
--- a/src/mongo/db/repl/rs_initialsync.cpp
+++ b/src/mongo/db/repl/rs_initialsync.cpp
@@ -63,478 +63,467 @@ namespace mongo {
namespace repl {
namespace {
- using std::list;
- using std::string;
-
- // Failpoint which fails initial sync and leaves on oplog entry in the buffer.
- MONGO_FP_DECLARE(failInitSyncWithBufferedEntriesLeft);
-
- /**
- * Truncates the oplog (removes any documents) and resets internal variables that were
- * originally initialized or affected by using values from the oplog at startup time. These
- * include the last applied optime, the last fetched optime, and the sync source blacklist.
- * Also resets the bgsync thread so that it reconnects its sync source after the oplog has been
- * truncated.
- */
- void truncateAndResetOplog(OperationContext* txn,
- ReplicationCoordinator* replCoord,
- BackgroundSync* bgsync) {
- // Clear minvalid
- setMinValid(txn, OpTime());
-
- AutoGetDb autoDb(txn, "local", MODE_X);
- massert(28585, "no local database found", autoDb.getDb());
- invariant(txn->lockState()->isCollectionLockedForMode(rsOplogName, MODE_X));
- // Note: the following order is important.
- // The bgsync thread uses an empty optime as a sentinel to know to wait
- // for initial sync; thus, we must
- // ensure the lastAppliedOptime is empty before restarting the bgsync thread
- // via stop().
- // We must clear the sync source blacklist after calling stop()
- // because the bgsync thread, while running, may update the blacklist.
- replCoord->resetMyLastOptime();
- bgsync->stop();
- bgsync->setLastAppliedHash(0);
- bgsync->clearBuffer();
-
- replCoord->clearSyncSourceBlacklist();
-
- // Truncate the oplog in case there was a prior initial sync that failed.
- Collection* collection = autoDb.getDb()->getCollection(rsOplogName);
- fassert(28565, collection);
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- WriteUnitOfWork wunit(txn);
- Status status = collection->truncate(txn);
- fassert(28564, status);
- wunit.commit();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "truncate", collection->ns().ns());
+using std::list;
+using std::string;
+
+// Failpoint which fails initial sync and leaves on oplog entry in the buffer.
+MONGO_FP_DECLARE(failInitSyncWithBufferedEntriesLeft);
+
+/**
+ * Truncates the oplog (removes any documents) and resets internal variables that were
+ * originally initialized or affected by using values from the oplog at startup time. These
+ * include the last applied optime, the last fetched optime, and the sync source blacklist.
+ * Also resets the bgsync thread so that it reconnects its sync source after the oplog has been
+ * truncated.
+ */
+void truncateAndResetOplog(OperationContext* txn,
+ ReplicationCoordinator* replCoord,
+ BackgroundSync* bgsync) {
+ // Clear minvalid
+ setMinValid(txn, OpTime());
+
+ AutoGetDb autoDb(txn, "local", MODE_X);
+ massert(28585, "no local database found", autoDb.getDb());
+ invariant(txn->lockState()->isCollectionLockedForMode(rsOplogName, MODE_X));
+ // Note: the following order is important.
+ // The bgsync thread uses an empty optime as a sentinel to know to wait
+ // for initial sync; thus, we must
+ // ensure the lastAppliedOptime is empty before restarting the bgsync thread
+ // via stop().
+ // We must clear the sync source blacklist after calling stop()
+ // because the bgsync thread, while running, may update the blacklist.
+ replCoord->resetMyLastOptime();
+ bgsync->stop();
+ bgsync->setLastAppliedHash(0);
+ bgsync->clearBuffer();
+
+ replCoord->clearSyncSourceBlacklist();
+
+ // Truncate the oplog in case there was a prior initial sync that failed.
+ Collection* collection = autoDb.getDb()->getCollection(rsOplogName);
+ fassert(28565, collection);
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ WriteUnitOfWork wunit(txn);
+ Status status = collection->truncate(txn);
+ fassert(28564, status);
+ wunit.commit();
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "truncate", collection->ns().ns());
+}
- /**
- * Confirms that the "admin" database contains a supported version of the auth
- * data schema. Terminates the process if the "admin" contains clearly incompatible
- * auth data.
- */
- void checkAdminDatabasePostClone(OperationContext* txn, Database* adminDb) {
- // Assumes txn holds MODE_X or MODE_S lock on "admin" database.
- if (!adminDb) {
+/**
+ * Confirms that the "admin" database contains a supported version of the auth
+ * data schema. Terminates the process if the "admin" contains clearly incompatible
+ * auth data.
+ */
+void checkAdminDatabasePostClone(OperationContext* txn, Database* adminDb) {
+ // Assumes txn holds MODE_X or MODE_S lock on "admin" database.
+ if (!adminDb) {
+ return;
+ }
+ Collection* const usersCollection =
+ adminDb->getCollection(AuthorizationManager::usersCollectionNamespace);
+ const bool hasUsers =
+ usersCollection && !Helpers::findOne(txn, usersCollection, BSONObj(), false).isNull();
+ Collection* const adminVersionCollection =
+ adminDb->getCollection(AuthorizationManager::versionCollectionNamespace);
+ BSONObj authSchemaVersionDocument;
+ if (!adminVersionCollection ||
+ !Helpers::findOne(txn,
+ adminVersionCollection,
+ AuthorizationManager::versionDocumentQuery,
+ authSchemaVersionDocument)) {
+ if (!hasUsers) {
+ // It's OK to have no auth version document if there are no user documents.
return;
}
- Collection* const usersCollection =
- adminDb->getCollection(AuthorizationManager::usersCollectionNamespace);
- const bool hasUsers = usersCollection &&
- !Helpers::findOne(txn, usersCollection, BSONObj(), false).isNull();
- Collection* const adminVersionCollection =
- adminDb->getCollection(AuthorizationManager::versionCollectionNamespace);
- BSONObj authSchemaVersionDocument;
- if (!adminVersionCollection || !Helpers::findOne(txn,
- adminVersionCollection,
- AuthorizationManager::versionDocumentQuery,
- authSchemaVersionDocument)) {
- if (!hasUsers) {
- // It's OK to have no auth version document if there are no user documents.
- return;
- }
- severe() << "During initial sync, found documents in " <<
- AuthorizationManager::usersCollectionNamespace <<
- " but could not find an auth schema version document in " <<
- AuthorizationManager::versionCollectionNamespace;
- severe() << "This indicates that the primary of this replica set was not successfully "
- "upgraded to schema version " << AuthorizationManager::schemaVersion26Final <<
- ", which is the minimum supported schema version in this version of MongoDB";
- fassertFailedNoTrace(28620);
- }
- long long foundSchemaVersion;
- Status status = bsonExtractIntegerField(authSchemaVersionDocument,
- AuthorizationManager::schemaVersionFieldName,
- &foundSchemaVersion);
+ severe() << "During initial sync, found documents in "
+ << AuthorizationManager::usersCollectionNamespace
+ << " but could not find an auth schema version document in "
+ << AuthorizationManager::versionCollectionNamespace;
+ severe() << "This indicates that the primary of this replica set was not successfully "
+ "upgraded to schema version " << AuthorizationManager::schemaVersion26Final
+ << ", which is the minimum supported schema version in this version of MongoDB";
+ fassertFailedNoTrace(28620);
+ }
+ long long foundSchemaVersion;
+ Status status = bsonExtractIntegerField(authSchemaVersionDocument,
+ AuthorizationManager::schemaVersionFieldName,
+ &foundSchemaVersion);
+ if (!status.isOK()) {
+ severe() << "During initial sync, found malformed auth schema version document: " << status
+ << "; document: " << authSchemaVersionDocument;
+ fassertFailedNoTrace(28618);
+ }
+ if ((foundSchemaVersion != AuthorizationManager::schemaVersion26Final) &&
+ (foundSchemaVersion != AuthorizationManager::schemaVersion28SCRAM)) {
+ severe() << "During initial sync, found auth schema version " << foundSchemaVersion
+ << ", but this version of MongoDB only supports schema versions "
+ << AuthorizationManager::schemaVersion26Final << " and "
+ << AuthorizationManager::schemaVersion28SCRAM;
+ fassertFailedNoTrace(28619);
+ }
+}
+
+bool _initialSyncClone(OperationContext* txn,
+ Cloner& cloner,
+ const std::string& host,
+ const list<string>& dbs,
+ bool dataPass) {
+ for (list<string>::const_iterator i = dbs.begin(); i != dbs.end(); i++) {
+ const string db = *i;
+ if (db == "local")
+ continue;
+
+ if (dataPass)
+ log() << "initial sync cloning db: " << db;
+ else
+ log() << "initial sync cloning indexes for : " << db;
+
+ CloneOptions options;
+ options.fromDB = db;
+ options.slaveOk = true;
+ options.useReplAuth = true;
+ options.snapshot = false;
+ options.mayYield = true;
+ options.mayBeInterrupted = false;
+ options.syncData = dataPass;
+ options.syncIndexes = !dataPass;
+
+ // Make database stable
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock dbWrite(txn->lockState(), db, MODE_X);
+
+ Status status = cloner.copyDb(txn, db, host, options, NULL);
if (!status.isOK()) {
- severe() << "During initial sync, found malformed auth schema version document: " <<
- status << "; document: " << authSchemaVersionDocument;
- fassertFailedNoTrace(28618);
+ log() << "initial sync: error while " << (dataPass ? "cloning " : "indexing ") << db
+ << ". " << status.toString();
+ return false;
}
- if ((foundSchemaVersion != AuthorizationManager::schemaVersion26Final) &&
- (foundSchemaVersion != AuthorizationManager::schemaVersion28SCRAM)) {
- severe() << "During initial sync, found auth schema version " << foundSchemaVersion <<
- ", but this version of MongoDB only supports schema versions " <<
- AuthorizationManager::schemaVersion26Final << " and " <<
- AuthorizationManager::schemaVersion28SCRAM;
- fassertFailedNoTrace(28619);
+
+ if (db == "admin") {
+ checkAdminDatabasePostClone(txn, dbHolder().get(txn, db));
}
}
- bool _initialSyncClone(OperationContext* txn,
- Cloner& cloner,
- const std::string& host,
- const list<string>& dbs,
- bool dataPass) {
-
- for( list<string>::const_iterator i = dbs.begin(); i != dbs.end(); i++ ) {
- const string db = *i;
- if ( db == "local" )
- continue;
-
- if ( dataPass )
- log() << "initial sync cloning db: " << db;
- else
- log() << "initial sync cloning indexes for : " << db;
-
- CloneOptions options;
- options.fromDB = db;
- options.slaveOk = true;
- options.useReplAuth = true;
- options.snapshot = false;
- options.mayYield = true;
- options.mayBeInterrupted = false;
- options.syncData = dataPass;
- options.syncIndexes = ! dataPass;
-
- // Make database stable
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock dbWrite(txn->lockState(), db, MODE_X);
-
- Status status = cloner.copyDb(txn, db, host, options, NULL);
- if (!status.isOK()) {
- log() << "initial sync: error while "
- << (dataPass ? "cloning " : "indexing ") << db
- << ". " << status.toString();
- return false;
- }
-
- if (db == "admin") {
- checkAdminDatabasePostClone(txn, dbHolder().get(txn, db));
- }
- }
+ return true;
+}
- return true;
+/**
+ * Replays the sync target's oplog from lastOp to the latest op on the sync target.
+ *
+ * @param syncer either initial sync (can reclone missing docs) or "normal" sync (no recloning)
+ * @param r the oplog reader
+ * @return if applying the oplog succeeded
+ */
+bool _initialSyncApplyOplog(OperationContext* ctx, repl::SyncTail& syncer, OplogReader* r) {
+ const OpTime startOpTime = getGlobalReplicationCoordinator()->getMyLastOptime();
+ BSONObj lastOp;
+
+ // If the fail point is set, exit failing.
+ if (MONGO_FAIL_POINT(failInitSyncWithBufferedEntriesLeft)) {
+ log() << "adding fake oplog entry to buffer.";
+ BackgroundSync::get()->pushTestOpToBuffer(BSON(
+ "ts" << startOpTime.getTimestamp() << "t" << startOpTime.getTerm() << "v" << 1 << "op"
+ << "n"));
+ return false;
}
- /**
- * Replays the sync target's oplog from lastOp to the latest op on the sync target.
- *
- * @param syncer either initial sync (can reclone missing docs) or "normal" sync (no recloning)
- * @param r the oplog reader
- * @return if applying the oplog succeeded
- */
- bool _initialSyncApplyOplog( OperationContext* ctx,
- repl::SyncTail& syncer,
- OplogReader* r) {
- const OpTime startOpTime = getGlobalReplicationCoordinator()->getMyLastOptime();
- BSONObj lastOp;
-
- // If the fail point is set, exit failing.
- if (MONGO_FAIL_POINT(failInitSyncWithBufferedEntriesLeft)) {
- log() << "adding fake oplog entry to buffer.";
- BackgroundSync::get()->pushTestOpToBuffer(
- BSON("ts" << startOpTime.getTimestamp() <<
- "t" << startOpTime.getTerm() <<
- "v" << 1 <<
- "op" << "n"));
- return false;
- }
-
- try {
- // It may have been a long time since we last used this connection to
- // query the oplog, depending on the size of the databases we needed to clone.
- // A common problem is that TCP keepalives are set too infrequent, and thus
- // our connection here is terminated by a firewall due to inactivity.
- // Solution is to increase the TCP keepalive frequency.
- lastOp = r->getLastOp(rsOplogName);
- } catch ( SocketException & ) {
- HostAndPort host = r->getHost();
- log() << "connection lost to " << host.toString() <<
- "; is your tcp keepalive interval set appropriately?";
- if ( !r->connect(host) ) {
- error() << "initial sync couldn't connect to " << host.toString();
- throw;
- }
- // retry
- lastOp = r->getLastOp(rsOplogName);
+ try {
+ // It may have been a long time since we last used this connection to
+ // query the oplog, depending on the size of the databases we needed to clone.
+ // A common problem is that TCP keepalives are set too infrequent, and thus
+ // our connection here is terminated by a firewall due to inactivity.
+ // Solution is to increase the TCP keepalive frequency.
+ lastOp = r->getLastOp(rsOplogName);
+ } catch (SocketException&) {
+ HostAndPort host = r->getHost();
+ log() << "connection lost to " << host.toString()
+ << "; is your tcp keepalive interval set appropriately?";
+ if (!r->connect(host)) {
+ error() << "initial sync couldn't connect to " << host.toString();
+ throw;
}
+ // retry
+ lastOp = r->getLastOp(rsOplogName);
+ }
- if (lastOp.isEmpty()) {
- error() << "initial sync lastOp is empty";
- sleepsecs(1);
- return false;
- }
+ if (lastOp.isEmpty()) {
+ error() << "initial sync lastOp is empty";
+ sleepsecs(1);
+ return false;
+ }
- OpTime stopOpTime = extractOpTime(lastOp);
+ OpTime stopOpTime = extractOpTime(lastOp);
- // If we already have what we need then return.
- if (stopOpTime == startOpTime)
- return true;
+ // If we already have what we need then return.
+ if (stopOpTime == startOpTime)
+ return true;
- verify( !stopOpTime.isNull() );
- verify( stopOpTime > startOpTime );
+ verify(!stopOpTime.isNull());
+ verify(stopOpTime > startOpTime);
- // apply till stopOpTime
- try {
- LOG(2) << "Applying oplog entries from " << startOpTime << " until " << stopOpTime;
- syncer.oplogApplication(ctx, stopOpTime);
+ // apply till stopOpTime
+ try {
+ LOG(2) << "Applying oplog entries from " << startOpTime << " until " << stopOpTime;
+ syncer.oplogApplication(ctx, stopOpTime);
- if (inShutdown()) {
- return false;
- }
- }
- catch (const DBException&) {
- warning() << "initial sync failed during oplog application phase, and will retry";
- sleepsecs(5);
+ if (inShutdown()) {
return false;
}
-
- return true;
+ } catch (const DBException&) {
+ warning() << "initial sync failed during oplog application phase, and will retry";
+ sleepsecs(5);
+ return false;
}
- void _tryToApplyOpWithRetry(OperationContext* txn, SyncTail* init, const BSONObj& op) {
- try {
- if (!SyncTail::syncApply(txn, op, false).isOK()) {
- bool retry;
- {
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
- retry = init->shouldRetry(txn, op);
- }
+ return true;
+}
+
+void _tryToApplyOpWithRetry(OperationContext* txn, SyncTail* init, const BSONObj& op) {
+ try {
+ if (!SyncTail::syncApply(txn, op, false).isOK()) {
+ bool retry;
+ {
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ retry = init->shouldRetry(txn, op);
+ }
- if (retry) {
- // retry
- if (!SyncTail::syncApply(txn, op, false).isOK()) {
- uasserted(28542,
- str::stream() << "During initial sync, failed to apply op: "
- << op);
- }
+ if (retry) {
+ // retry
+ if (!SyncTail::syncApply(txn, op, false).isOK()) {
+ uasserted(28542,
+ str::stream() << "During initial sync, failed to apply op: " << op);
}
- // If shouldRetry() returns false, fall through.
- // This can happen if the document that was moved and missed by Cloner
- // subsequently got deleted and no longer exists on the Sync Target at all
}
+ // If shouldRetry() returns false, fall through.
+ // This can happen if the document that was moved and missed by Cloner
+ // subsequently got deleted and no longer exists on the Sync Target at all
}
- catch (const DBException& e) {
- error() << "exception: " << causedBy(e) << " on: " << op.toString();
- uasserted(28541,
- str::stream() << "During initial sync, failed to apply op: "
- << op);
- }
+ } catch (const DBException& e) {
+ error() << "exception: " << causedBy(e) << " on: " << op.toString();
+ uasserted(28541, str::stream() << "During initial sync, failed to apply op: " << op);
}
+}
- /**
- * Do the initial sync for this member. There are several steps to this process:
- *
- * 0. Add _initialSyncFlag to minValid collection to tell us to restart initial sync if we
- * crash in the middle of this procedure
- * 1. Record start time.
- * 2. Clone.
- * 3. Set minValid1 to sync target's latest op time.
- * 4. Apply ops from start to minValid1, fetching missing docs as needed.
- * 5. Set minValid2 to sync target's latest op time.
- * 6. Apply ops from minValid1 to minValid2.
- * 7. Build indexes.
- * 8. Set minValid3 to sync target's latest op time.
- * 9. Apply ops from minValid2 to minValid3.
- 10. Cleanup minValid collection: remove _initialSyncFlag field, set ts to minValid3 OpTime
- *
- * At that point, initial sync is finished. Note that the oplog from the sync target is applied
- * three times: step 4, 6, and 8. 4 may involve refetching, 6 should not. By the end of 6,
- * this member should have consistent data. 8 is "cosmetic," it is only to get this member
- * closer to the latest op time before it can transition out of startup state
- *
- * Returns a Status with ErrorCode::ShutdownInProgress if the node enters shutdown,
- * ErrorCode::InitialSyncOplogSourceMissing if the node fails to find an sync source, Status::OK
- * if everything worked, and ErrorCode::InitialSyncFailure for all other error cases.
- */
- Status _initialSync() {
-
- log() << "initial sync pending";
-
- BackgroundSync* bgsync(BackgroundSync::get());
- OperationContextImpl txn;
- txn.setReplicatedWrites(false);
- DisableDocumentValidation validationDisabler(&txn);
- ReplicationCoordinator* replCoord(getGlobalReplicationCoordinator());
-
- // reset state for initial sync
- truncateAndResetOplog(&txn, replCoord, bgsync);
-
- OplogReader r;
- Timestamp now(duration_cast<Seconds>(Milliseconds(curTimeMillis64())), 0);
- OpTime nowOpTime(now, std::numeric_limits<long long>::max());
-
- while (r.getHost().empty()) {
- // We must prime the sync source selector so that it considers all candidates regardless
- // of oplog position, by passing in "now" with max term as the last op fetched time.
- r.connectToSyncSource(&txn, nowOpTime, replCoord);
- if (r.getHost().empty()) {
- std::string msg =
- "no valid sync sources found in current replset to do an initial sync";
- log() << msg;
- return Status(ErrorCodes::InitialSyncOplogSourceMissing, msg);
- }
-
- if (inShutdown()) {
- return Status(ErrorCodes::ShutdownInProgress, "shutting down");
- }
+/**
+ * Do the initial sync for this member. There are several steps to this process:
+ *
+ * 0. Add _initialSyncFlag to minValid collection to tell us to restart initial sync if we
+ * crash in the middle of this procedure
+ * 1. Record start time.
+ * 2. Clone.
+ * 3. Set minValid1 to sync target's latest op time.
+ * 4. Apply ops from start to minValid1, fetching missing docs as needed.
+ * 5. Set minValid2 to sync target's latest op time.
+ * 6. Apply ops from minValid1 to minValid2.
+ * 7. Build indexes.
+ * 8. Set minValid3 to sync target's latest op time.
+ * 9. Apply ops from minValid2 to minValid3.
+ 10. Cleanup minValid collection: remove _initialSyncFlag field, set ts to minValid3 OpTime
+ *
+ * At that point, initial sync is finished. Note that the oplog from the sync target is applied
+ * three times: step 4, 6, and 8. 4 may involve refetching, 6 should not. By the end of 6,
+ * this member should have consistent data. 8 is "cosmetic," it is only to get this member
+ * closer to the latest op time before it can transition out of startup state
+ *
+ * Returns a Status with ErrorCode::ShutdownInProgress if the node enters shutdown,
+ * ErrorCode::InitialSyncOplogSourceMissing if the node fails to find an sync source, Status::OK
+ * if everything worked, and ErrorCode::InitialSyncFailure for all other error cases.
+ */
+Status _initialSync() {
+ log() << "initial sync pending";
+
+ BackgroundSync* bgsync(BackgroundSync::get());
+ OperationContextImpl txn;
+ txn.setReplicatedWrites(false);
+ DisableDocumentValidation validationDisabler(&txn);
+ ReplicationCoordinator* replCoord(getGlobalReplicationCoordinator());
+
+ // reset state for initial sync
+ truncateAndResetOplog(&txn, replCoord, bgsync);
+
+ OplogReader r;
+ Timestamp now(duration_cast<Seconds>(Milliseconds(curTimeMillis64())), 0);
+ OpTime nowOpTime(now, std::numeric_limits<long long>::max());
+
+ while (r.getHost().empty()) {
+ // We must prime the sync source selector so that it considers all candidates regardless
+ // of oplog position, by passing in "now" with max term as the last op fetched time.
+ r.connectToSyncSource(&txn, nowOpTime, replCoord);
+ if (r.getHost().empty()) {
+ std::string msg =
+ "no valid sync sources found in current replset to do an initial sync";
+ log() << msg;
+ return Status(ErrorCodes::InitialSyncOplogSourceMissing, msg);
}
- InitialSync init(bgsync);
- init.setHostname(r.getHost().toString());
-
- BSONObj lastOp = r.getLastOp(rsOplogName);
- if ( lastOp.isEmpty() ) {
- std::string msg = "initial sync couldn't read remote oplog";
- log() << msg;
- sleepsecs(15);
- return Status(ErrorCodes::InitialSyncFailure, msg);
+ if (inShutdown()) {
+ return Status(ErrorCodes::ShutdownInProgress, "shutting down");
}
+ }
- // Add field to minvalid document to tell us to restart initial sync if we crash
- setInitialSyncFlag(&txn);
+ InitialSync init(bgsync);
+ init.setHostname(r.getHost().toString());
- log() << "initial sync drop all databases";
- dropAllDatabasesExceptLocal(&txn);
+ BSONObj lastOp = r.getLastOp(rsOplogName);
+ if (lastOp.isEmpty()) {
+ std::string msg = "initial sync couldn't read remote oplog";
+ log() << msg;
+ sleepsecs(15);
+ return Status(ErrorCodes::InitialSyncFailure, msg);
+ }
- log() << "initial sync clone all databases";
+ // Add field to minvalid document to tell us to restart initial sync if we crash
+ setInitialSyncFlag(&txn);
- list<string> dbs = r.conn()->getDatabaseNames();
- {
- // Clone admin database first, to catch schema errors.
- list<string>::iterator admin = std::find(dbs.begin(), dbs.end(), "admin");
- if (admin != dbs.end()) {
- dbs.splice(dbs.begin(), dbs, admin);
- }
- }
+ log() << "initial sync drop all databases";
+ dropAllDatabasesExceptLocal(&txn);
- Cloner cloner;
- if (!_initialSyncClone(&txn, cloner, r.conn()->getServerAddress(), dbs, true)) {
- return Status(ErrorCodes::InitialSyncFailure, "initial sync failed data cloning");
+ log() << "initial sync clone all databases";
+
+ list<string> dbs = r.conn()->getDatabaseNames();
+ {
+ // Clone admin database first, to catch schema errors.
+ list<string>::iterator admin = std::find(dbs.begin(), dbs.end(), "admin");
+ if (admin != dbs.end()) {
+ dbs.splice(dbs.begin(), dbs, admin);
}
+ }
- log() << "initial sync data copy, starting syncup";
+ Cloner cloner;
+ if (!_initialSyncClone(&txn, cloner, r.conn()->getServerAddress(), dbs, true)) {
+ return Status(ErrorCodes::InitialSyncFailure, "initial sync failed data cloning");
+ }
- // prime oplog
- _tryToApplyOpWithRetry(&txn, &init, lastOp);
- std::deque<BSONObj> ops;
- ops.push_back(lastOp);
+ log() << "initial sync data copy, starting syncup";
- OpTime lastOptime = writeOpsToOplog(&txn, ops);
- ReplClientInfo::forClient(txn.getClient()).setLastOp(lastOptime);
- replCoord->setMyLastOptime(lastOptime);
- setNewTimestamp(lastOptime.getTimestamp());
+ // prime oplog
+ _tryToApplyOpWithRetry(&txn, &init, lastOp);
+ std::deque<BSONObj> ops;
+ ops.push_back(lastOp);
- std::string msg = "oplog sync 1 of 3";
- log() << msg;
- if (!_initialSyncApplyOplog(&txn, init, &r)) {
- return Status(ErrorCodes::InitialSyncFailure,
- str::stream() << "initial sync failed: " << msg);
- }
+ OpTime lastOptime = writeOpsToOplog(&txn, ops);
+ ReplClientInfo::forClient(txn.getClient()).setLastOp(lastOptime);
+ replCoord->setMyLastOptime(lastOptime);
+ setNewTimestamp(lastOptime.getTimestamp());
- // Now we sync to the latest op on the sync target _again_, as we may have recloned ops
- // that were "from the future" compared with minValid. During this second application,
- // nothing should need to be recloned.
- msg = "oplog sync 2 of 3";
- log() << msg;
- if (!_initialSyncApplyOplog(&txn, init, &r)) {
- return Status(ErrorCodes::InitialSyncFailure,
- str::stream() << "initial sync failed: " << msg);
- }
- // data should now be consistent
+ std::string msg = "oplog sync 1 of 3";
+ log() << msg;
+ if (!_initialSyncApplyOplog(&txn, init, &r)) {
+ return Status(ErrorCodes::InitialSyncFailure,
+ str::stream() << "initial sync failed: " << msg);
+ }
- msg = "initial sync building indexes";
- log() << msg;
- if (!_initialSyncClone(&txn, cloner, r.conn()->getServerAddress(), dbs, false)) {
- return Status(ErrorCodes::InitialSyncFailure,
- str::stream() << "initial sync failed: " << msg);
- }
+ // Now we sync to the latest op on the sync target _again_, as we may have recloned ops
+ // that were "from the future" compared with minValid. During this second application,
+ // nothing should need to be recloned.
+ msg = "oplog sync 2 of 3";
+ log() << msg;
+ if (!_initialSyncApplyOplog(&txn, init, &r)) {
+ return Status(ErrorCodes::InitialSyncFailure,
+ str::stream() << "initial sync failed: " << msg);
+ }
+ // data should now be consistent
- // WARNING: If the 3rd oplog sync step is removed we must reset minValid
- // to the last entry on the source server so that we don't come
- // out of recovering until we get there (since the previous steps
- // could have fetched newer document than the oplog entry we were applying from).
- msg = "oplog sync 3 of 3";
- log() << msg;
+ msg = "initial sync building indexes";
+ log() << msg;
+ if (!_initialSyncClone(&txn, cloner, r.conn()->getServerAddress(), dbs, false)) {
+ return Status(ErrorCodes::InitialSyncFailure,
+ str::stream() << "initial sync failed: " << msg);
+ }
- SyncTail tail(bgsync, multiSyncApply);
- if (!_initialSyncApplyOplog(&txn, tail, &r)) {
- return Status(ErrorCodes::InitialSyncFailure,
- str::stream() << "initial sync failed: " << msg);
- }
+ // WARNING: If the 3rd oplog sync step is removed we must reset minValid
+ // to the last entry on the source server so that we don't come
+ // out of recovering until we get there (since the previous steps
+ // could have fetched newer document than the oplog entry we were applying from).
+ msg = "oplog sync 3 of 3";
+ log() << msg;
+
+ SyncTail tail(bgsync, multiSyncApply);
+ if (!_initialSyncApplyOplog(&txn, tail, &r)) {
+ return Status(ErrorCodes::InitialSyncFailure,
+ str::stream() << "initial sync failed: " << msg);
+ }
- // ---------
+ // ---------
- Status status = getGlobalAuthorizationManager()->initialize(&txn);
- if (!status.isOK()) {
- warning() << "Failed to reinitialize auth data after initial sync. " << status;
- return status;
- }
+ Status status = getGlobalAuthorizationManager()->initialize(&txn);
+ if (!status.isOK()) {
+ warning() << "Failed to reinitialize auth data after initial sync. " << status;
+ return status;
+ }
- log() << "initial sync finishing up";
+ log() << "initial sync finishing up";
- {
- ScopedTransaction scopedXact(&txn, MODE_IX);
- AutoGetDb autodb(&txn, "local", MODE_X);
- OpTime lastOpTimeWritten(getGlobalReplicationCoordinator()->getMyLastOptime());
- log() << "set minValid=" << lastOpTimeWritten;
+ {
+ ScopedTransaction scopedXact(&txn, MODE_IX);
+ AutoGetDb autodb(&txn, "local", MODE_X);
+ OpTime lastOpTimeWritten(getGlobalReplicationCoordinator()->getMyLastOptime());
+ log() << "set minValid=" << lastOpTimeWritten;
- // Initial sync is now complete. Flag this by setting minValid to the last thing
- // we synced.
- setMinValid(&txn, lastOpTimeWritten);
+ // Initial sync is now complete. Flag this by setting minValid to the last thing
+ // we synced.
+ setMinValid(&txn, lastOpTimeWritten);
- // Clear the initial sync flag.
- clearInitialSyncFlag(&txn);
- BackgroundSync::get()->setInitialSyncRequestedFlag(false);
- }
+ // Clear the initial sync flag.
+ clearInitialSyncFlag(&txn);
+ BackgroundSync::get()->setInitialSyncRequestedFlag(false);
+ }
- // If we just cloned & there were no ops applied, we still want the primary to know where
- // we're up to
- bgsync->notify(&txn);
+ // If we just cloned & there were no ops applied, we still want the primary to know where
+ // we're up to
+ bgsync->notify(&txn);
- log() << "initial sync done";
- return Status::OK();
- }
-} // namespace
+ log() << "initial sync done";
+ return Status::OK();
+}
+} // namespace
- void syncDoInitialSync() {
- static const int maxFailedAttempts = 10;
+void syncDoInitialSync() {
+ static const int maxFailedAttempts = 10;
- {
- OperationContextImpl txn;
- createOplog(&txn);
- }
+ {
+ OperationContextImpl txn;
+ createOplog(&txn);
+ }
- int failedAttempts = 0;
- while ( failedAttempts < maxFailedAttempts ) {
- try {
- // leave loop when successful
- Status status = _initialSync();
- if (status.isOK()) {
- break;
- }
- if (status == ErrorCodes::InitialSyncOplogSourceMissing) {
- sleepsecs(1);
- return;
- }
+ int failedAttempts = 0;
+ while (failedAttempts < maxFailedAttempts) {
+ try {
+ // leave loop when successful
+ Status status = _initialSync();
+ if (status.isOK()) {
+ break;
}
- catch(const DBException& e) {
- error() << e ;
- // Return if in shutdown
- if (inShutdown()) {
- return;
- }
+ if (status == ErrorCodes::InitialSyncOplogSourceMissing) {
+ sleepsecs(1);
+ return;
}
-
+ } catch (const DBException& e) {
+ error() << e;
+ // Return if in shutdown
if (inShutdown()) {
return;
}
-
- error() << "initial sync attempt failed, "
- << (maxFailedAttempts - ++failedAttempts) << " attempts remaining";
- sleepsecs(5);
}
- // No need to print a stack
- if (failedAttempts >= maxFailedAttempts) {
- severe() << "The maximum number of retries have been exhausted for initial sync.";
- fassertFailedNoTrace(16233);
+ if (inShutdown()) {
+ return;
}
+
+ error() << "initial sync attempt failed, " << (maxFailedAttempts - ++failedAttempts)
+ << " attempts remaining";
+ sleepsecs(5);
+ }
+
+ // No need to print a stack
+ if (failedAttempts >= maxFailedAttempts) {
+ severe() << "The maximum number of retries have been exhausted for initial sync.";
+ fassertFailedNoTrace(16233);
}
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rs_initialsync.h b/src/mongo/db/repl/rs_initialsync.h
index 659bb5ad577..7add22b9a37 100644
--- a/src/mongo/db/repl/rs_initialsync.h
+++ b/src/mongo/db/repl/rs_initialsync.h
@@ -30,10 +30,10 @@
namespace mongo {
namespace repl {
- /**
- * Begins an initial sync of a node. This drops all data, chooses a sync source,
- * and runs the cloner from that sync source. The node's state is not changed.
- */
- void syncDoInitialSync();
+/**
+ * Begins an initial sync of a node. This drops all data, chooses a sync source,
+ * and runs the cloner from that sync source. The node's state is not changed.
+ */
+void syncDoInitialSync();
}
}
diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp
index e5f01a6c8c7..d7a4c151910 100644
--- a/src/mongo/db/repl/rs_rollback.cpp
+++ b/src/mongo/db/repl/rs_rollback.cpp
@@ -102,762 +102,711 @@
namespace mongo {
- using std::shared_ptr;
- using std::unique_ptr;
- using std::endl;
- using std::list;
- using std::map;
- using std::set;
- using std::string;
- using std::pair;
+using std::shared_ptr;
+using std::unique_ptr;
+using std::endl;
+using std::list;
+using std::map;
+using std::set;
+using std::string;
+using std::pair;
namespace repl {
namespace {
- class RSFatalException : public std::exception {
- public:
- RSFatalException(std::string m = "replica set fatal exception")
- : msg(m) {}
- virtual ~RSFatalException() throw() {};
- virtual const char* what() const throw() {
- return msg.c_str();
- }
- private:
- std::string msg;
- };
-
- struct DocID {
- // ns and _id both point into ownedObj's buffer
- BSONObj ownedObj;
- const char* ns;
- BSONElement _id;
- bool operator<(const DocID& other) const {
- int comp = strcmp(ns, other.ns);
- if (comp < 0)
- return true;
- if (comp > 0)
- return false;
- return _id < other._id;
- }
- };
+class RSFatalException : public std::exception {
+public:
+ RSFatalException(std::string m = "replica set fatal exception") : msg(m) {}
+ virtual ~RSFatalException() throw(){};
+ virtual const char* what() const throw() {
+ return msg.c_str();
+ }
- struct FixUpInfo {
- // note this is a set -- if there are many $inc's on a single document we need to rollback,
- // we only need to refetch it once.
- set<DocID> toRefetch;
+private:
+ std::string msg;
+};
+
+struct DocID {
+ // ns and _id both point into ownedObj's buffer
+ BSONObj ownedObj;
+ const char* ns;
+ BSONElement _id;
+ bool operator<(const DocID& other) const {
+ int comp = strcmp(ns, other.ns);
+ if (comp < 0)
+ return true;
+ if (comp > 0)
+ return false;
+ return _id < other._id;
+ }
+};
- // collections to drop
- set<string> toDrop;
+struct FixUpInfo {
+ // note this is a set -- if there are many $inc's on a single document we need to rollback,
+ // we only need to refetch it once.
+ set<DocID> toRefetch;
- set<string> collectionsToResyncData;
- set<string> collectionsToResyncMetadata;
+ // collections to drop
+ set<string> toDrop;
- Timestamp commonPoint;
- RecordId commonPointOurDiskloc;
+ set<string> collectionsToResyncData;
+ set<string> collectionsToResyncMetadata;
- int rbid; // remote server's current rollback sequence #
- };
+ Timestamp commonPoint;
+ RecordId commonPointOurDiskloc;
+ int rbid; // remote server's current rollback sequence #
+};
- Status refetch(FixUpInfo& fixUpInfo, const BSONObj& ourObj) {
- const char* op = ourObj.getStringField("op");
- if (*op == 'n')
- return Status::OK();
- if (ourObj.objsize() > 512 * 1024 * 1024)
- throw RSFatalException("rollback too large");
+Status refetch(FixUpInfo& fixUpInfo, const BSONObj& ourObj) {
+ const char* op = ourObj.getStringField("op");
+ if (*op == 'n')
+ return Status::OK();
- DocID doc;
- doc.ownedObj = ourObj.getOwned();
- doc.ns = doc.ownedObj.getStringField("ns");
- if (*doc.ns == '\0') {
- warning() << "ignoring op on rollback no ns TODO : "
- << doc.ownedObj.toString();
- return Status::OK();
- }
+ if (ourObj.objsize() > 512 * 1024 * 1024)
+ throw RSFatalException("rollback too large");
- BSONObj obj = doc.ownedObj.getObjectField(*op=='u' ? "o2" : "o");
- if (obj.isEmpty()) {
- warning() << "ignoring op on rollback : " << doc.ownedObj.toString();
- return Status::OK();
- }
+ DocID doc;
+ doc.ownedObj = ourObj.getOwned();
+ doc.ns = doc.ownedObj.getStringField("ns");
+ if (*doc.ns == '\0') {
+ warning() << "ignoring op on rollback no ns TODO : " << doc.ownedObj.toString();
+ return Status::OK();
+ }
- if (*op == 'c') {
- BSONElement first = obj.firstElement();
- NamespaceString nss(doc.ns); // foo.$cmd
- string cmdname = first.fieldName();
- Command* cmd = Command::findCommand(cmdname.c_str());
- if (cmd == NULL) {
- severe() << "rollback no such command " << first.fieldName();
- return Status(ErrorCodes::UnrecoverableRollbackError, str::stream() <<
- "rollback no such command " << first.fieldName(),
- 18751);
- }
- if (cmdname == "create") {
- // Create collection operation
- // { ts: ..., h: ..., op: "c", ns: "foo.$cmd", o: { create: "abc", ... } }
- string ns = nss.db().toString() + '.' + obj["create"].String(); // -> foo.abc
- fixUpInfo.toDrop.insert(ns);
- return Status::OK();
- }
- else if (cmdname == "drop") {
- string ns = nss.db().toString() + '.' + first.valuestr();
- fixUpInfo.collectionsToResyncData.insert(ns);
- return Status::OK();
- }
- else if (cmdname == "dropIndexes" || cmdname == "deleteIndexes") {
- // TODO: this is bad. we simply full resync the collection here,
- // which could be very slow.
- warning() << "rollback of dropIndexes is slow in this version of "
- << "mongod";
- string ns = nss.db().toString() + '.' + first.valuestr();
- fixUpInfo.collectionsToResyncData.insert(ns);
- return Status::OK();
- }
- else if (cmdname == "renameCollection") {
- // TODO: slow.
- warning() << "rollback of renameCollection is slow in this version of "
- << "mongod";
- string from = first.valuestr();
- string to = obj["to"].String();
- fixUpInfo.collectionsToResyncData.insert(from);
- fixUpInfo.collectionsToResyncData.insert(to);
- return Status::OK();
- }
- else if (cmdname == "dropDatabase") {
- severe() << "rollback : can't rollback drop database full resync "
- << "will be required";
- log() << obj.toString();
- throw RSFatalException();
- }
- else if (cmdname == "collMod") {
- const auto ns = NamespaceString(cmd->parseNs(nss.db().toString(), obj));
- for (auto field : obj) {
- const auto modification = field.fieldNameStringData();
- if (modification == cmdname) {
- continue; // Skipping command name.
- }
+ BSONObj obj = doc.ownedObj.getObjectField(*op == 'u' ? "o2" : "o");
+ if (obj.isEmpty()) {
+ warning() << "ignoring op on rollback : " << doc.ownedObj.toString();
+ return Status::OK();
+ }
- if (modification == "validator"
- || modification == "usePowerOf2Sizes"
- || modification == "noPadding") {
- fixUpInfo.collectionsToResyncMetadata.insert(ns);
- continue;
- }
+ if (*op == 'c') {
+ BSONElement first = obj.firstElement();
+ NamespaceString nss(doc.ns); // foo.$cmd
+ string cmdname = first.fieldName();
+ Command* cmd = Command::findCommand(cmdname.c_str());
+ if (cmd == NULL) {
+ severe() << "rollback no such command " << first.fieldName();
+ return Status(ErrorCodes::UnrecoverableRollbackError,
+ str::stream() << "rollback no such command " << first.fieldName(),
+ 18751);
+ }
+ if (cmdname == "create") {
+ // Create collection operation
+ // { ts: ..., h: ..., op: "c", ns: "foo.$cmd", o: { create: "abc", ... } }
+ string ns = nss.db().toString() + '.' + obj["create"].String(); // -> foo.abc
+ fixUpInfo.toDrop.insert(ns);
+ return Status::OK();
+ } else if (cmdname == "drop") {
+ string ns = nss.db().toString() + '.' + first.valuestr();
+ fixUpInfo.collectionsToResyncData.insert(ns);
+ return Status::OK();
+ } else if (cmdname == "dropIndexes" || cmdname == "deleteIndexes") {
+ // TODO: this is bad. we simply full resync the collection here,
+ // which could be very slow.
+ warning() << "rollback of dropIndexes is slow in this version of "
+ << "mongod";
+ string ns = nss.db().toString() + '.' + first.valuestr();
+ fixUpInfo.collectionsToResyncData.insert(ns);
+ return Status::OK();
+ } else if (cmdname == "renameCollection") {
+ // TODO: slow.
+ warning() << "rollback of renameCollection is slow in this version of "
+ << "mongod";
+ string from = first.valuestr();
+ string to = obj["to"].String();
+ fixUpInfo.collectionsToResyncData.insert(from);
+ fixUpInfo.collectionsToResyncData.insert(to);
+ return Status::OK();
+ } else if (cmdname == "dropDatabase") {
+ severe() << "rollback : can't rollback drop database full resync "
+ << "will be required";
+ log() << obj.toString();
+ throw RSFatalException();
+ } else if (cmdname == "collMod") {
+ const auto ns = NamespaceString(cmd->parseNs(nss.db().toString(), obj));
+ for (auto field : obj) {
+ const auto modification = field.fieldNameStringData();
+ if (modification == cmdname) {
+ continue; // Skipping command name.
+ }
- severe() << "cannot rollback a collMod command: " << obj;
- throw RSFatalException();
+ if (modification == "validator" || modification == "usePowerOf2Sizes" ||
+ modification == "noPadding") {
+ fixUpInfo.collectionsToResyncMetadata.insert(ns);
+ continue;
}
- }
- else {
- severe() << "can't rollback this command yet: "
- << obj.toString();
- log() << "cmdname=" << cmdname;
+
+ severe() << "cannot rollback a collMod command: " << obj;
throw RSFatalException();
}
+ } else {
+ severe() << "can't rollback this command yet: " << obj.toString();
+ log() << "cmdname=" << cmdname;
+ throw RSFatalException();
}
+ }
- doc._id = obj["_id"];
- if (doc._id.eoo()) {
- warning() << "ignoring op on rollback no _id TODO : " << doc.ns << ' '
- << doc.ownedObj.toString();
- return Status::OK();
- }
-
- fixUpInfo.toRefetch.insert(doc);
+ doc._id = obj["_id"];
+ if (doc._id.eoo()) {
+ warning() << "ignoring op on rollback no _id TODO : " << doc.ns << ' '
+ << doc.ownedObj.toString();
return Status::OK();
}
+ fixUpInfo.toRefetch.insert(doc);
+ return Status::OK();
+}
- void syncFixUp(OperationContext* txn,
- FixUpInfo& fixUpInfo,
- const RollbackSource& rollbackSource,
- ReplicationCoordinator* replCoord) {
- // fetch all first so we needn't handle interruption in a fancy way
- unsigned long long totalSize = 0;
+void syncFixUp(OperationContext* txn,
+ FixUpInfo& fixUpInfo,
+ const RollbackSource& rollbackSource,
+ ReplicationCoordinator* replCoord) {
+ // fetch all first so we needn't handle interruption in a fancy way
- list< pair<DocID, BSONObj> > goodVersions;
+ unsigned long long totalSize = 0;
- BSONObj newMinValid;
+ list<pair<DocID, BSONObj>> goodVersions;
- // fetch all the goodVersions of each document from current primary
- DocID doc;
- unsigned long long numFetched = 0;
- try {
- for (set<DocID>::iterator it = fixUpInfo.toRefetch.begin();
- it != fixUpInfo.toRefetch.end();
- it++) {
- doc = *it;
-
- verify(!doc._id.eoo());
-
- {
- // TODO : slow. lots of round trips.
- numFetched++;
- BSONObj good = rollbackSource.findOne(NamespaceString(doc.ns), doc._id.wrap());
- totalSize += good.objsize();
- uassert(13410, "replSet too much data to roll back",
- totalSize < 300 * 1024 * 1024);
-
- // note good might be eoo, indicating we should delete it
- goodVersions.push_back(pair<DocID, BSONObj>(doc,good));
- }
- }
- newMinValid = rollbackSource.getLastOperation();
- if (newMinValid.isEmpty()) {
- error() << "rollback error newMinValid empty?";
- return;
- }
- }
- catch (const DBException& e) {
- LOG(1) << "rollback re-get objects: " << e.toString();
- error() << "rollback couldn't re-get ns:" << doc.ns << " _id:" << doc._id << ' '
- << numFetched << '/' << fixUpInfo.toRefetch.size();
- throw e;
- }
+ BSONObj newMinValid;
- log() << "rollback 3.5";
- if (fixUpInfo.rbid != rollbackSource.getRollbackId()) {
- // Our source rolled back itself so the data we received isn't necessarily consistent.
- warning() << "rollback rbid on source changed during rollback, "
- << "cancelling this attempt";
- return;
- }
+ // fetch all the goodVersions of each document from current primary
+ DocID doc;
+ unsigned long long numFetched = 0;
+ try {
+ for (set<DocID>::iterator it = fixUpInfo.toRefetch.begin(); it != fixUpInfo.toRefetch.end();
+ it++) {
+ doc = *it;
- // update them
- log() << "rollback 4 n:" << goodVersions.size();
+ verify(!doc._id.eoo());
- bool warn = false;
+ {
+ // TODO : slow. lots of round trips.
+ numFetched++;
+ BSONObj good = rollbackSource.findOne(NamespaceString(doc.ns), doc._id.wrap());
+ totalSize += good.objsize();
+ uassert(13410, "replSet too much data to roll back", totalSize < 300 * 1024 * 1024);
- invariant(!fixUpInfo.commonPointOurDiskloc.isNull());
+ // note good might be eoo, indicating we should delete it
+ goodVersions.push_back(pair<DocID, BSONObj>(doc, good));
+ }
+ }
+ newMinValid = rollbackSource.getLastOperation();
+ if (newMinValid.isEmpty()) {
+ error() << "rollback error newMinValid empty?";
+ return;
+ }
+ } catch (const DBException& e) {
+ LOG(1) << "rollback re-get objects: " << e.toString();
+ error() << "rollback couldn't re-get ns:" << doc.ns << " _id:" << doc._id << ' '
+ << numFetched << '/' << fixUpInfo.toRefetch.size();
+ throw e;
+ }
- // we have items we are writing that aren't from a point-in-time. thus best not to come
- // online until we get to that point in freshness.
- OpTime minValid = extractOpTime(newMinValid);
- log() << "minvalid=" << minValid;
- setMinValid(txn, minValid);
+ log() << "rollback 3.5";
+ if (fixUpInfo.rbid != rollbackSource.getRollbackId()) {
+ // Our source rolled back itself so the data we received isn't necessarily consistent.
+ warning() << "rollback rbid on source changed during rollback, "
+ << "cancelling this attempt";
+ return;
+ }
- // any full collection resyncs required?
- if (!fixUpInfo.collectionsToResyncData.empty()
- || !fixUpInfo.collectionsToResyncMetadata.empty()) {
+ // update them
+ log() << "rollback 4 n:" << goodVersions.size();
- for (const string& ns : fixUpInfo.collectionsToResyncData) {
- log() << "rollback 4.1.1 coll resync " << ns;
+ bool warn = false;
- fixUpInfo.collectionsToResyncMetadata.erase(ns);
+ invariant(!fixUpInfo.commonPointOurDiskloc.isNull());
- const NamespaceString nss(ns);
+ // we have items we are writing that aren't from a point-in-time. thus best not to come
+ // online until we get to that point in freshness.
+ OpTime minValid = extractOpTime(newMinValid);
+ log() << "minvalid=" << minValid;
+ setMinValid(txn, minValid);
+ // any full collection resyncs required?
+ if (!fixUpInfo.collectionsToResyncData.empty() ||
+ !fixUpInfo.collectionsToResyncMetadata.empty()) {
+ for (const string& ns : fixUpInfo.collectionsToResyncData) {
+ log() << "rollback 4.1.1 coll resync " << ns;
- {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_X);
- Database* db = dbHolder().openDb(txn, nss.db().toString());
- invariant(db);
- WriteUnitOfWork wunit(txn);
- db->dropCollection(txn, ns);
- wunit.commit();
- }
+ fixUpInfo.collectionsToResyncMetadata.erase(ns);
- rollbackSource.copyCollectionFromRemote(txn, nss);
- }
+ const NamespaceString nss(ns);
- for (const string& ns : fixUpInfo.collectionsToResyncMetadata) {
- log() << "rollback 4.1.2 coll metadata resync " << ns;
- const NamespaceString nss(ns);
+ {
ScopedTransaction transaction(txn, MODE_IX);
Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_X);
- auto db = dbHolder().openDb(txn, nss.db().toString());
+ Database* db = dbHolder().openDb(txn, nss.db().toString());
invariant(db);
- auto collection = db->getCollection(ns);
- invariant(collection);
- auto cce = collection->getCatalogEntry();
+ WriteUnitOfWork wunit(txn);
+ db->dropCollection(txn, ns);
+ wunit.commit();
+ }
- auto infoResult = rollbackSource.getCollectionInfo(nss);
+ rollbackSource.copyCollectionFromRemote(txn, nss);
+ }
- if (!infoResult.isOK()) {
- // Collection dropped by "them" so we should drop it too.
- log() << ns << " not found on remote host, dropping";
- fixUpInfo.toDrop.insert(ns);
- continue;
- }
+ for (const string& ns : fixUpInfo.collectionsToResyncMetadata) {
+ log() << "rollback 4.1.2 coll metadata resync " << ns;
- auto info = infoResult.getValue();
- CollectionOptions options;
- if (auto optionsField = info["options"]) {
- if (optionsField.type() != Object) {
- throw RSFatalException(str::stream() << "Failed to parse options "
- << info << ": expected 'options' to be an "
- << "Object, got " << typeName(optionsField.type()));
- }
+ const NamespaceString nss(ns);
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_X);
+ auto db = dbHolder().openDb(txn, nss.db().toString());
+ invariant(db);
+ auto collection = db->getCollection(ns);
+ invariant(collection);
+ auto cce = collection->getCatalogEntry();
- auto status = options.parse(optionsField.Obj());
- if (!status.isOK()) {
- throw RSFatalException(str::stream() << "Failed to parse options "
- << info << ": "
- << status.toString());
- }
- }
- else {
- // Use default options.
- }
+ auto infoResult = rollbackSource.getCollectionInfo(nss);
- WriteUnitOfWork wuow(txn);
- if (options.flagsSet || cce->getCollectionOptions(txn).flagsSet) {
- cce->updateFlags(txn, options.flags);
+ if (!infoResult.isOK()) {
+ // Collection dropped by "them" so we should drop it too.
+ log() << ns << " not found on remote host, dropping";
+ fixUpInfo.toDrop.insert(ns);
+ continue;
+ }
+
+ auto info = infoResult.getValue();
+ CollectionOptions options;
+ if (auto optionsField = info["options"]) {
+ if (optionsField.type() != Object) {
+ throw RSFatalException(str::stream() << "Failed to parse options " << info
+ << ": expected 'options' to be an "
+ << "Object, got "
+ << typeName(optionsField.type()));
}
- auto status = collection->setValidator(txn, options.validator);
+ auto status = options.parse(optionsField.Obj());
if (!status.isOK()) {
- throw RSFatalException(str::stream() << "Failed to set validator: "
- << status.toString());
+ throw RSFatalException(str::stream() << "Failed to parse options " << info
+ << ": " << status.toString());
}
- wuow.commit();
+ } else {
+ // Use default options.
}
- // we did more reading from primary, so check it again for a rollback (which would mess
- // us up), and make minValid newer.
- log() << "rollback 4.2";
-
- string err;
- try {
- newMinValid = rollbackSource.getLastOperation();
- if (newMinValid.isEmpty()) {
- err = "can't get minvalid from sync source";
- }
- else {
- OpTime minValid = extractOpTime(newMinValid);
- log() << "minvalid=" << minValid;
- setMinValid(txn, minValid);
- }
- }
- catch (const DBException& e) {
- err = "can't get/set minvalid: ";
- err += e.what();
- }
- if (fixUpInfo.rbid != rollbackSource.getRollbackId()) {
- // our source rolled back itself. so the data we received isn't necessarily
- // consistent. however, we've now done writes. thus we have a problem.
- err += "rbid at primary changed during resync/rollback";
+ WriteUnitOfWork wuow(txn);
+ if (options.flagsSet || cce->getCollectionOptions(txn).flagsSet) {
+ cce->updateFlags(txn, options.flags);
}
- if (!err.empty()) {
- severe() << "rolling back : " << err
- << ". A full resync will be necessary.";
- // TODO: reset minvalid so that we are permanently in fatal state
- // TODO: don't be fatal, but rather, get all the data first.
- throw RSFatalException();
+
+ auto status = collection->setValidator(txn, options.validator);
+ if (!status.isOK()) {
+ throw RSFatalException(str::stream()
+ << "Failed to set validator: " << status.toString());
}
- log() << "rollback 4.3";
+ wuow.commit();
}
- map<string,shared_ptr<Helpers::RemoveSaver> > removeSavers;
+ // we did more reading from primary, so check it again for a rollback (which would mess
+ // us up), and make minValid newer.
+ log() << "rollback 4.2";
- log() << "rollback 4.6";
- // drop collections to drop before doing individual fixups - that might make things faster
- // below actually if there were subsequent inserts to rollback
- for (set<string>::iterator it = fixUpInfo.toDrop.begin();
- it != fixUpInfo.toDrop.end();
- it++) {
- log() << "rollback drop: " << *it;
-
- ScopedTransaction transaction(txn, MODE_IX);
- const NamespaceString nss(*it);
- Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_X);
- Database* db = dbHolder().get(txn, nsToDatabaseSubstring(*it));
- if (db) {
- WriteUnitOfWork wunit(txn);
+ string err;
+ try {
+ newMinValid = rollbackSource.getLastOperation();
+ if (newMinValid.isEmpty()) {
+ err = "can't get minvalid from sync source";
+ } else {
+ OpTime minValid = extractOpTime(newMinValid);
+ log() << "minvalid=" << minValid;
+ setMinValid(txn, minValid);
+ }
+ } catch (const DBException& e) {
+ err = "can't get/set minvalid: ";
+ err += e.what();
+ }
+ if (fixUpInfo.rbid != rollbackSource.getRollbackId()) {
+ // our source rolled back itself. so the data we received isn't necessarily
+ // consistent. however, we've now done writes. thus we have a problem.
+ err += "rbid at primary changed during resync/rollback";
+ }
+ if (!err.empty()) {
+ severe() << "rolling back : " << err << ". A full resync will be necessary.";
+ // TODO: reset minvalid so that we are permanently in fatal state
+ // TODO: don't be fatal, but rather, get all the data first.
+ throw RSFatalException();
+ }
+ log() << "rollback 4.3";
+ }
- shared_ptr<Helpers::RemoveSaver>& removeSaver = removeSavers[*it];
- if (!removeSaver)
- removeSaver.reset(new Helpers::RemoveSaver("rollback", "", *it));
-
- // perform a collection scan and write all documents in the collection to disk
- std::unique_ptr<PlanExecutor> exec(
- InternalPlanner::collectionScan(txn,
- *it,
- db->getCollection(*it)));
- BSONObj curObj;
- PlanExecutor::ExecState execState;
- while (PlanExecutor::ADVANCED == (execState = exec->getNext(&curObj, NULL))) {
- removeSaver->goingToDelete(curObj);
- }
- if (execState != PlanExecutor::IS_EOF) {
- if (execState == PlanExecutor::FAILURE &&
- WorkingSetCommon::isValidStatusMemberObject(curObj)) {
- Status errorStatus = WorkingSetCommon::getMemberObjectStatus(curObj);
- severe() << "rolling back createCollection on " << *it
- << " failed with " << errorStatus
- << ". A full resync is necessary.";
- }
- else {
- severe() << "rolling back createCollection on " << *it
- << " failed. A full resync is necessary.";
- }
-
- throw RSFatalException();
+ map<string, shared_ptr<Helpers::RemoveSaver>> removeSavers;
+
+ log() << "rollback 4.6";
+ // drop collections to drop before doing individual fixups - that might make things faster
+ // below actually if there were subsequent inserts to rollback
+ for (set<string>::iterator it = fixUpInfo.toDrop.begin(); it != fixUpInfo.toDrop.end(); it++) {
+ log() << "rollback drop: " << *it;
+
+ ScopedTransaction transaction(txn, MODE_IX);
+ const NamespaceString nss(*it);
+ Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_X);
+ Database* db = dbHolder().get(txn, nsToDatabaseSubstring(*it));
+ if (db) {
+ WriteUnitOfWork wunit(txn);
+
+ shared_ptr<Helpers::RemoveSaver>& removeSaver = removeSavers[*it];
+ if (!removeSaver)
+ removeSaver.reset(new Helpers::RemoveSaver("rollback", "", *it));
+
+ // perform a collection scan and write all documents in the collection to disk
+ std::unique_ptr<PlanExecutor> exec(
+ InternalPlanner::collectionScan(txn, *it, db->getCollection(*it)));
+ BSONObj curObj;
+ PlanExecutor::ExecState execState;
+ while (PlanExecutor::ADVANCED == (execState = exec->getNext(&curObj, NULL))) {
+ removeSaver->goingToDelete(curObj);
+ }
+ if (execState != PlanExecutor::IS_EOF) {
+ if (execState == PlanExecutor::FAILURE &&
+ WorkingSetCommon::isValidStatusMemberObject(curObj)) {
+ Status errorStatus = WorkingSetCommon::getMemberObjectStatus(curObj);
+ severe() << "rolling back createCollection on " << *it << " failed with "
+ << errorStatus << ". A full resync is necessary.";
+ } else {
+ severe() << "rolling back createCollection on " << *it
+ << " failed. A full resync is necessary.";
}
- db->dropCollection(txn, *it);
- wunit.commit();
+ throw RSFatalException();
}
+
+ db->dropCollection(txn, *it);
+ wunit.commit();
}
+ }
- log() << "rollback 4.7";
- unsigned deletes = 0, updates = 0;
- time_t lastProgressUpdate = time(0);
- time_t progressUpdateGap = 10;
- for (list<pair<DocID, BSONObj> >::iterator it = goodVersions.begin();
- it != goodVersions.end();
- it++) {
- time_t now = time(0);
- if (now - lastProgressUpdate > progressUpdateGap) {
- log() << deletes << " delete and "
- << updates << " update operations processed out of "
- << goodVersions.size() << " total operations";
- lastProgressUpdate = now;
+ log() << "rollback 4.7";
+ unsigned deletes = 0, updates = 0;
+ time_t lastProgressUpdate = time(0);
+ time_t progressUpdateGap = 10;
+ for (list<pair<DocID, BSONObj>>::iterator it = goodVersions.begin(); it != goodVersions.end();
+ it++) {
+ time_t now = time(0);
+ if (now - lastProgressUpdate > progressUpdateGap) {
+ log() << deletes << " delete and " << updates << " update operations processed out of "
+ << goodVersions.size() << " total operations";
+ lastProgressUpdate = now;
+ }
+ const DocID& doc = it->first;
+ BSONObj pattern = doc._id.wrap(); // { _id : ... }
+ try {
+ verify(doc.ns && *doc.ns);
+ if (fixUpInfo.collectionsToResyncData.count(doc.ns)) {
+ // we just synced this entire collection
+ continue;
}
- const DocID& doc = it->first;
- BSONObj pattern = doc._id.wrap(); // { _id : ... }
- try {
- verify(doc.ns && *doc.ns);
- if (fixUpInfo.collectionsToResyncData.count(doc.ns)) {
- // we just synced this entire collection
- continue;
- }
- // keep an archive of items rolled back
- shared_ptr<Helpers::RemoveSaver>& removeSaver = removeSavers[doc.ns];
- if (!removeSaver)
- removeSaver.reset(new Helpers::RemoveSaver("rollback", "", doc.ns));
+ // keep an archive of items rolled back
+ shared_ptr<Helpers::RemoveSaver>& removeSaver = removeSavers[doc.ns];
+ if (!removeSaver)
+ removeSaver.reset(new Helpers::RemoveSaver("rollback", "", doc.ns));
- // todo: lots of overhead in context, this can be faster
- const NamespaceString docNss(doc.ns);
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock docDbLock(txn->lockState(), docNss.db(), MODE_X);
- OldClientContext ctx(txn, doc.ns);
+ // todo: lots of overhead in context, this can be faster
+ const NamespaceString docNss(doc.ns);
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock docDbLock(txn->lockState(), docNss.db(), MODE_X);
+ OldClientContext ctx(txn, doc.ns);
+
+ // Add the doc to our rollback file
+ BSONObj obj;
+ Collection* collection = ctx.db()->getCollection(doc.ns);
+
+ // Do not log an error when undoing an insert on a no longer existent collection.
+ // It is likely that the collection was dropped as part of rolling back a
+ // createCollection command and regardless, the document no longer exists.
+ if (collection) {
+ bool found = Helpers::findOne(txn, collection, pattern, obj, false);
+ if (found) {
+ removeSaver->goingToDelete(obj);
+ } else {
+ error() << "rollback cannot find object: " << pattern << " in namespace "
+ << doc.ns;
+ }
+ }
- // Add the doc to our rollback file
- BSONObj obj;
- Collection* collection = ctx.db()->getCollection(doc.ns);
+ if (it->second.isEmpty()) {
+ // wasn't on the primary; delete.
+ // TODO 1.6 : can't delete from a capped collection. need to handle that here.
+ deletes++;
- // Do not log an error when undoing an insert on a no longer existent collection.
- // It is likely that the collection was dropped as part of rolling back a
- // createCollection command and regardless, the document no longer exists.
if (collection) {
- bool found = Helpers::findOne(txn, collection, pattern, obj, false);
- if (found) {
- removeSaver->goingToDelete(obj);
- }
- else {
- error() << "rollback cannot find object: " << pattern
- << " in namespace " << doc.ns;
- }
- }
-
- if (it->second.isEmpty()) {
- // wasn't on the primary; delete.
- // TODO 1.6 : can't delete from a capped collection. need to handle that here.
- deletes++;
-
- if (collection) {
- if (collection->isCapped()) {
- // can't delete from a capped collection - so we truncate instead. if
- // this item must go, so must all successors!!!
- try {
- // TODO: IIRC cappedTruncateAfter does not handle completely empty.
- // this will crazy slow if no _id index.
- long long start = Listener::getElapsedTimeMillis();
- RecordId loc = Helpers::findOne(txn, collection, pattern, false);
- if (Listener::getElapsedTimeMillis() - start > 200)
- warning() << "roll back slow no _id index for "
- << doc.ns << " perhaps?";
- // would be faster but requires index:
- // RecordId loc = Helpers::findById(nsd, pattern);
- if (!loc.isNull()) {
- try {
- collection->temp_cappedTruncateAfter(txn, loc, true);
- }
- catch (const DBException& e) {
- if (e.getCode() == 13415) {
- // hack: need to just make cappedTruncate do this...
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- WriteUnitOfWork wunit(txn);
- uassertStatusOK(collection->truncate(txn));
- wunit.commit();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(
- txn,
- "truncate",
- collection->ns().ns());
- }
- else {
- throw e;
+ if (collection->isCapped()) {
+ // can't delete from a capped collection - so we truncate instead. if
+ // this item must go, so must all successors!!!
+ try {
+ // TODO: IIRC cappedTruncateAfter does not handle completely empty.
+ // this will crazy slow if no _id index.
+ long long start = Listener::getElapsedTimeMillis();
+ RecordId loc = Helpers::findOne(txn, collection, pattern, false);
+ if (Listener::getElapsedTimeMillis() - start > 200)
+ warning() << "roll back slow no _id index for " << doc.ns
+ << " perhaps?";
+ // would be faster but requires index:
+ // RecordId loc = Helpers::findById(nsd, pattern);
+ if (!loc.isNull()) {
+ try {
+ collection->temp_cappedTruncateAfter(txn, loc, true);
+ } catch (const DBException& e) {
+ if (e.getCode() == 13415) {
+ // hack: need to just make cappedTruncate do this...
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ WriteUnitOfWork wunit(txn);
+ uassertStatusOK(collection->truncate(txn));
+ wunit.commit();
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(
+ txn, "truncate", collection->ns().ns());
+ } else {
+ throw e;
}
}
}
- catch (const DBException& e) {
- error() << "rolling back capped collection rec "
- << doc.ns << ' ' << e.toString();
- }
- }
- else {
- deleteObjects(txn,
- ctx.db(),
- doc.ns,
- pattern,
- PlanExecutor::YIELD_MANUAL,
- true, // justone
- true); // god
+ } catch (const DBException& e) {
+ error() << "rolling back capped collection rec " << doc.ns << ' '
+ << e.toString();
}
- // did we just empty the collection? if so let's check if it even
- // exists on the source.
- if (collection->numRecords(txn) == 0) {
- try {
- NamespaceString nss(doc.ns);
- auto infoResult = rollbackSource.getCollectionInfo(nss);
- if (!infoResult.isOK()) {
- // we should drop
- WriteUnitOfWork wunit(txn);
- ctx.db()->dropCollection(txn, doc.ns);
- wunit.commit();
- }
- }
- catch (const DBException&) {
- // this isn't *that* big a deal, but is bad.
- warning() << "rollback error querying for existence of "
- << doc.ns << " at the primary, ignoring";
+ } else {
+ deleteObjects(txn,
+ ctx.db(),
+ doc.ns,
+ pattern,
+ PlanExecutor::YIELD_MANUAL,
+ true, // justone
+ true); // god
+ }
+ // did we just empty the collection? if so let's check if it even
+ // exists on the source.
+ if (collection->numRecords(txn) == 0) {
+ try {
+ NamespaceString nss(doc.ns);
+ auto infoResult = rollbackSource.getCollectionInfo(nss);
+ if (!infoResult.isOK()) {
+ // we should drop
+ WriteUnitOfWork wunit(txn);
+ ctx.db()->dropCollection(txn, doc.ns);
+ wunit.commit();
}
+ } catch (const DBException&) {
+ // this isn't *that* big a deal, but is bad.
+ warning() << "rollback error querying for existence of " << doc.ns
+ << " at the primary, ignoring";
}
}
}
- else {
- // TODO faster...
- OpDebug debug;
- updates++;
-
- const NamespaceString requestNs(doc.ns);
- UpdateRequest request(requestNs);
-
- request.setQuery(pattern);
- request.setUpdates(it->second);
- request.setGod();
- request.setUpsert();
- UpdateLifecycleImpl updateLifecycle(true, requestNs);
- request.setLifecycle(&updateLifecycle);
-
- update(txn, ctx.db(), request, &debug);
-
- }
- }
- catch (const DBException& e) {
- log() << "exception in rollback ns:" << doc.ns << ' ' << pattern.toString()
- << ' ' << e.toString() << " ndeletes:" << deletes;
- warn = true;
- }
- }
-
- removeSavers.clear(); // this effectively closes all of them
- log() << "rollback 5 d:" << deletes << " u:" << updates;
- log() << "rollback 6";
-
- // clean up oplog
- LOG(2) << "rollback truncate oplog after " <<
- fixUpInfo.commonPoint.toStringPretty();
- {
- const NamespaceString oplogNss(rsOplogName);
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock oplogDbLock(txn->lockState(), oplogNss.db(), MODE_IX);
- Lock::CollectionLock oplogCollectionLoc(txn->lockState(), oplogNss.ns(), MODE_X);
- OldClientContext ctx(txn, rsOplogName);
- Collection* oplogCollection = ctx.db()->getCollection(rsOplogName);
- if (!oplogCollection) {
- fassertFailedWithStatusNoTrace(
- 13423,
- Status(ErrorCodes::UnrecoverableRollbackError, str::stream() <<
- "Can't find " << rsOplogName));
+ } else {
+ // TODO faster...
+ OpDebug debug;
+ updates++;
+
+ const NamespaceString requestNs(doc.ns);
+ UpdateRequest request(requestNs);
+
+ request.setQuery(pattern);
+ request.setUpdates(it->second);
+ request.setGod();
+ request.setUpsert();
+ UpdateLifecycleImpl updateLifecycle(true, requestNs);
+ request.setLifecycle(&updateLifecycle);
+
+ update(txn, ctx.db(), request, &debug);
}
- // TODO: fatal error if this throws?
- oplogCollection->temp_cappedTruncateAfter(txn, fixUpInfo.commonPointOurDiskloc, false);
- }
-
- Status status = getGlobalAuthorizationManager()->initialize(txn);
- if (!status.isOK()) {
- warning() << "Failed to reinitialize auth data after rollback: " << status;
+ } catch (const DBException& e) {
+ log() << "exception in rollback ns:" << doc.ns << ' ' << pattern.toString() << ' '
+ << e.toString() << " ndeletes:" << deletes;
warn = true;
}
-
- // Reload the lastOpTimeApplied value in the replcoord and the lastAppliedHash value in
- // bgsync to reflect our new last op.
- replCoord->resetLastOpTimeFromOplog(txn);
- BackgroundSync::get()->loadLastAppliedHash(txn);
-
- // done
- if (warn)
- warning() << "issues during syncRollback, see log";
- else
- log() << "rollback done";
}
- Status _syncRollback(OperationContext* txn,
- const OplogInterface& localOplog,
- const RollbackSource& rollbackSource,
- ReplicationCoordinator* replCoord,
- const SleepSecondsFn& sleepSecondsFn) {
- invariant(!txn->lockState()->isLocked());
-
- log() << "rollback 0";
-
- /** by doing this, we will not service reads (return an error as we aren't in secondary
- * state. that perhaps is moot because of the write lock above, but that write lock
- * probably gets deferred or removed or yielded later anyway.
- *
- * also, this is better for status reporting - we know what is happening.
- */
- {
- Lock::GlobalWrite globalWrite(txn->lockState());
- if (!replCoord->setFollowerMode(MemberState::RS_ROLLBACK)) {
- return Status(
- ErrorCodes::OperationFailed, str::stream() <<
- "Cannot transition from " << replCoord->getMemberState().toString() <<
- " to " << MemberState(MemberState::RS_ROLLBACK).toString());
- }
+ removeSavers.clear(); // this effectively closes all of them
+ log() << "rollback 5 d:" << deletes << " u:" << updates;
+ log() << "rollback 6";
+
+ // clean up oplog
+ LOG(2) << "rollback truncate oplog after " << fixUpInfo.commonPoint.toStringPretty();
+ {
+ const NamespaceString oplogNss(rsOplogName);
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock oplogDbLock(txn->lockState(), oplogNss.db(), MODE_IX);
+ Lock::CollectionLock oplogCollectionLoc(txn->lockState(), oplogNss.ns(), MODE_X);
+ OldClientContext ctx(txn, rsOplogName);
+ Collection* oplogCollection = ctx.db()->getCollection(rsOplogName);
+ if (!oplogCollection) {
+ fassertFailedWithStatusNoTrace(13423,
+ Status(ErrorCodes::UnrecoverableRollbackError,
+ str::stream() << "Can't find " << rsOplogName));
}
+ // TODO: fatal error if this throws?
+ oplogCollection->temp_cappedTruncateAfter(txn, fixUpInfo.commonPointOurDiskloc, false);
+ }
- FixUpInfo how;
- log() << "rollback 1";
- how.rbid = rollbackSource.getRollbackId();
- {
- log() << "rollback 2 FindCommonPoint";
- try {
- auto processOperationForFixUp = [&how](const BSONObj& operation) {
- return refetch(how, operation);
- };
- auto res = syncRollBackLocalOperations(
- localOplog,
- rollbackSource.getOplog(),
- processOperationForFixUp);
- if (!res.isOK()) {
- switch (res.getStatus().code()) {
- case ErrorCodes::OplogStartMissing:
- case ErrorCodes::UnrecoverableRollbackError:
- sleepSecondsFn(Seconds(1));
- return res.getStatus();
- default:
- throw RSFatalException(res.getStatus().toString());
- }
- }
- else {
- how.commonPoint = res.getValue().first;
- how.commonPointOurDiskloc = res.getValue().second;
- }
- }
- catch (const RSFatalException& e) {
- error() << string(e.what());
- return Status(ErrorCodes::UnrecoverableRollbackError, str::stream() <<
- "need to rollback, but unable to determine common point between"
- "local and remote oplog: " << e.what(),
- 18752);
- }
- catch (const DBException& e) {
- warning() << "rollback 2 exception " << e.toString() << "; sleeping 1 min";
+ Status status = getGlobalAuthorizationManager()->initialize(txn);
+ if (!status.isOK()) {
+ warning() << "Failed to reinitialize auth data after rollback: " << status;
+ warn = true;
+ }
- sleepSecondsFn(Seconds(60));
- throw;
- }
+ // Reload the lastOpTimeApplied value in the replcoord and the lastAppliedHash value in
+ // bgsync to reflect our new last op.
+ replCoord->resetLastOpTimeFromOplog(txn);
+ BackgroundSync::get()->loadLastAppliedHash(txn);
+
+ // done
+ if (warn)
+ warning() << "issues during syncRollback, see log";
+ else
+ log() << "rollback done";
+}
+
+Status _syncRollback(OperationContext* txn,
+ const OplogInterface& localOplog,
+ const RollbackSource& rollbackSource,
+ ReplicationCoordinator* replCoord,
+ const SleepSecondsFn& sleepSecondsFn) {
+ invariant(!txn->lockState()->isLocked());
+
+ log() << "rollback 0";
+
+ /** by doing this, we will not service reads (return an error as we aren't in secondary
+ * state. that perhaps is moot because of the write lock above, but that write lock
+ * probably gets deferred or removed or yielded later anyway.
+ *
+ * also, this is better for status reporting - we know what is happening.
+ */
+ {
+ Lock::GlobalWrite globalWrite(txn->lockState());
+ if (!replCoord->setFollowerMode(MemberState::RS_ROLLBACK)) {
+ return Status(ErrorCodes::OperationFailed,
+ str::stream() << "Cannot transition from "
+ << replCoord->getMemberState().toString() << " to "
+ << MemberState(MemberState::RS_ROLLBACK).toString());
}
+ }
- log() << "rollback 3 fixup";
-
- replCoord->incrementRollbackID();
+ FixUpInfo how;
+ log() << "rollback 1";
+ how.rbid = rollbackSource.getRollbackId();
+ {
+ log() << "rollback 2 FindCommonPoint";
try {
- syncFixUp(txn, how, rollbackSource, replCoord);
- }
- catch (const RSFatalException& e) {
- error() << "exception during rollback: " << e.what();
- return Status(ErrorCodes::UnrecoverableRollbackError, str::stream() <<
- "exception during rollback: " << e.what(),
- 18753);
- }
- catch (...) {
- replCoord->incrementRollbackID();
-
- if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
- warning() << "Failed to transition into " <<
- MemberState(MemberState::RS_RECOVERING) << "; expected to be in state " <<
- MemberState(MemberState::RS_ROLLBACK) << "but found self in " <<
- replCoord->getMemberState();
+ auto processOperationForFixUp =
+ [&how](const BSONObj& operation) { return refetch(how, operation); };
+ auto res = syncRollBackLocalOperations(
+ localOplog, rollbackSource.getOplog(), processOperationForFixUp);
+ if (!res.isOK()) {
+ switch (res.getStatus().code()) {
+ case ErrorCodes::OplogStartMissing:
+ case ErrorCodes::UnrecoverableRollbackError:
+ sleepSecondsFn(Seconds(1));
+ return res.getStatus();
+ default:
+ throw RSFatalException(res.getStatus().toString());
+ }
+ } else {
+ how.commonPoint = res.getValue().first;
+ how.commonPointOurDiskloc = res.getValue().second;
}
-
+ } catch (const RSFatalException& e) {
+ error() << string(e.what());
+ return Status(ErrorCodes::UnrecoverableRollbackError,
+ str::stream()
+ << "need to rollback, but unable to determine common point between"
+ "local and remote oplog: " << e.what(),
+ 18752);
+ } catch (const DBException& e) {
+ warning() << "rollback 2 exception " << e.toString() << "; sleeping 1 min";
+
+ sleepSecondsFn(Seconds(60));
throw;
}
+ }
+
+ log() << "rollback 3 fixup";
+
+ replCoord->incrementRollbackID();
+ try {
+ syncFixUp(txn, how, rollbackSource, replCoord);
+ } catch (const RSFatalException& e) {
+ error() << "exception during rollback: " << e.what();
+ return Status(ErrorCodes::UnrecoverableRollbackError,
+ str::stream() << "exception during rollback: " << e.what(),
+ 18753);
+ } catch (...) {
replCoord->incrementRollbackID();
- // success - leave "ROLLBACK" state
- // can go to SECONDARY once minvalid is achieved
if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
- warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING) <<
- "; expected to be in state " << MemberState(MemberState::RS_ROLLBACK) <<
- "but found self in " << replCoord->getMemberState();
+ warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING)
+ << "; expected to be in state " << MemberState(MemberState::RS_ROLLBACK)
+ << "but found self in " << replCoord->getMemberState();
}
- return Status::OK();
+ throw;
}
-
-} // namespace
-
- Status syncRollback(OperationContext* txn,
- const OpTime& lastOpTimeApplied,
- const OplogInterface& localOplog,
- const RollbackSource& rollbackSource,
- ReplicationCoordinator* replCoord,
- const SleepSecondsFn& sleepSecondsFn) {
-
- invariant(txn);
- invariant(replCoord);
-
- // check that we are at minvalid, otherwise we cannot rollback as we may be in an
- // inconsistent state
- {
- OpTime minvalid = getMinValid(txn);
- if( minvalid > lastOpTimeApplied ) {
- severe() << "need to rollback, but in inconsistent state" << endl;
- return Status(ErrorCodes::UnrecoverableRollbackError, str::stream() <<
- "need to rollback, but in inconsistent state. " <<
- "minvalid: " << minvalid.toString() << " our last optime: " <<
- lastOpTimeApplied.toString(),
- 18750);
- }
- }
-
- log() << "beginning rollback" << rsLog;
-
- DisableDocumentValidation validationDisabler(txn);
- txn->setReplicatedWrites(false);
- Status status = _syncRollback(txn,
- localOplog,
- rollbackSource,
- replCoord,
- sleepSecondsFn);
-
- log() << "rollback finished" << rsLog;
- return status;
+ replCoord->incrementRollbackID();
+
+ // success - leave "ROLLBACK" state
+ // can go to SECONDARY once minvalid is achieved
+ if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
+ warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING)
+ << "; expected to be in state " << MemberState(MemberState::RS_ROLLBACK)
+ << "but found self in " << replCoord->getMemberState();
}
- Status syncRollback(OperationContext* txn,
- const OpTime& lastOpTimeWritten,
- const OplogInterface& localOplog,
- const RollbackSource& rollbackSource,
- ReplicationCoordinator* replCoord) {
-
- return syncRollback(txn,
- lastOpTimeWritten,
- localOplog,
- rollbackSource,
- replCoord,
- [](Seconds seconds) { sleepsecs(seconds.count()); });
+ return Status::OK();
+}
+
+} // namespace
+
+Status syncRollback(OperationContext* txn,
+ const OpTime& lastOpTimeApplied,
+ const OplogInterface& localOplog,
+ const RollbackSource& rollbackSource,
+ ReplicationCoordinator* replCoord,
+ const SleepSecondsFn& sleepSecondsFn) {
+ invariant(txn);
+ invariant(replCoord);
+
+ // check that we are at minvalid, otherwise we cannot rollback as we may be in an
+ // inconsistent state
+ {
+ OpTime minvalid = getMinValid(txn);
+ if (minvalid > lastOpTimeApplied) {
+ severe() << "need to rollback, but in inconsistent state" << endl;
+ return Status(ErrorCodes::UnrecoverableRollbackError,
+ str::stream() << "need to rollback, but in inconsistent state. "
+ << "minvalid: " << minvalid.toString()
+ << " our last optime: " << lastOpTimeApplied.toString(),
+ 18750);
+ }
}
-} // namespace repl
-} // namespace mongo
+ log() << "beginning rollback" << rsLog;
+
+ DisableDocumentValidation validationDisabler(txn);
+ txn->setReplicatedWrites(false);
+ Status status = _syncRollback(txn, localOplog, rollbackSource, replCoord, sleepSecondsFn);
+
+ log() << "rollback finished" << rsLog;
+ return status;
+}
+
+Status syncRollback(OperationContext* txn,
+ const OpTime& lastOpTimeWritten,
+ const OplogInterface& localOplog,
+ const RollbackSource& rollbackSource,
+ ReplicationCoordinator* replCoord) {
+ return syncRollback(txn,
+ lastOpTimeWritten,
+ localOplog,
+ rollbackSource,
+ replCoord,
+ [](Seconds seconds) { sleepsecs(seconds.count()); });
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rs_rollback.h b/src/mongo/db/repl/rs_rollback.h
index 4409d9e0be9..793521393a9 100644
--- a/src/mongo/db/repl/rs_rollback.h
+++ b/src/mongo/db/repl/rs_rollback.h
@@ -37,56 +37,56 @@
namespace mongo {
- class DBClientConnection;
- class NamespaceString;
- class OperationContext;
+class DBClientConnection;
+class NamespaceString;
+class OperationContext;
namespace repl {
- class OplogInterface;
- class OpTime;
- class ReplicationCoordinator;
- class RollbackSource;
+class OplogInterface;
+class OpTime;
+class ReplicationCoordinator;
+class RollbackSource;
- /**
- * Initiates the rollback process.
- * This function assumes the preconditions for undertaking rollback have already been met;
- * we have ops in our oplog that our sync source does not have, and we are not currently
- * PRIMARY.
- * The rollback procedure is:
- * - find the common point between this node and its sync source
- * - undo operations by fetching all documents affected, then replaying
- * the sync source's oplog until we reach the time in the oplog when we fetched the last
- * document.
- * This function can throw std::exception on failures.
- * This function runs a command on the sync source to detect if the sync source rolls back
- * while our rollback is in progress.
- *
- * @param txn Used to read and write from this node's databases
- * @param lastOpTimeWritten The last OpTime applied by the applier
- * @param localOplog reads the oplog on this server.
- * @param rollbackSource interface for sync source:
- * provides oplog; and
- * supports fetching documents and copying collections.
- * @param replCoord Used to track the rollback ID and to change the follower state
- *
- * Failures: Most failures are returned as a status but some failures throw an std::exception.
- */
+/**
+ * Initiates the rollback process.
+ * This function assumes the preconditions for undertaking rollback have already been met;
+ * we have ops in our oplog that our sync source does not have, and we are not currently
+ * PRIMARY.
+ * The rollback procedure is:
+ * - find the common point between this node and its sync source
+ * - undo operations by fetching all documents affected, then replaying
+ * the sync source's oplog until we reach the time in the oplog when we fetched the last
+ * document.
+ * This function can throw std::exception on failures.
+ * This function runs a command on the sync source to detect if the sync source rolls back
+ * while our rollback is in progress.
+ *
+ * @param txn Used to read and write from this node's databases
+ * @param lastOpTimeWritten The last OpTime applied by the applier
+ * @param localOplog reads the oplog on this server.
+ * @param rollbackSource interface for sync source:
+ * provides oplog; and
+ * supports fetching documents and copying collections.
+ * @param replCoord Used to track the rollback ID and to change the follower state
+ *
+ * Failures: Most failures are returned as a status but some failures throw an std::exception.
+ */
- using SleepSecondsFn = stdx::function<void (Seconds)>;
+using SleepSecondsFn = stdx::function<void(Seconds)>;
- Status syncRollback(OperationContext* txn,
- const OpTime& lastOpTimeWritten,
- const OplogInterface& localOplog,
- const RollbackSource& rollbackSource,
- ReplicationCoordinator* replCoord,
- const SleepSecondsFn& sleepSecondsFn);
+Status syncRollback(OperationContext* txn,
+ const OpTime& lastOpTimeWritten,
+ const OplogInterface& localOplog,
+ const RollbackSource& rollbackSource,
+ ReplicationCoordinator* replCoord,
+ const SleepSecondsFn& sleepSecondsFn);
- Status syncRollback(OperationContext* txn,
- const OpTime& lastOpTimeWritten,
- const OplogInterface& localOplog,
- const RollbackSource& rollbackSource,
- ReplicationCoordinator* replCoord);
+Status syncRollback(OperationContext* txn,
+ const OpTime& lastOpTimeWritten,
+ const OplogInterface& localOplog,
+ const RollbackSource& rollbackSource,
+ ReplicationCoordinator* replCoord);
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rs_rollback_test.cpp b/src/mongo/db/repl/rs_rollback_test.cpp
index c794f280244..a51d6aff511 100644
--- a/src/mongo/db/repl/rs_rollback_test.cpp
+++ b/src/mongo/db/repl/rs_rollback_test.cpp
@@ -54,522 +54,509 @@
namespace {
- using namespace mongo;
- using namespace mongo::repl;
-
- const OplogInterfaceMock::Operations kEmptyMockOperations;
-
- ReplSettings createReplSettings() {
- ReplSettings settings;
- settings.oplogSize = 5 * 1024 * 1024;
- settings.replSet = "mySet/node1:12345";
- return settings;
+using namespace mongo;
+using namespace mongo::repl;
+
+const OplogInterfaceMock::Operations kEmptyMockOperations;
+
+ReplSettings createReplSettings() {
+ ReplSettings settings;
+ settings.oplogSize = 5 * 1024 * 1024;
+ settings.replSet = "mySet/node1:12345";
+ return settings;
+}
+
+class ReplicationCoordinatorRollbackMock : public ReplicationCoordinatorMock {
+public:
+ ReplicationCoordinatorRollbackMock();
+ void resetLastOpTimeFromOplog(OperationContext* txn) override;
+};
+
+ReplicationCoordinatorRollbackMock::ReplicationCoordinatorRollbackMock()
+ : ReplicationCoordinatorMock(createReplSettings()) {}
+
+void ReplicationCoordinatorRollbackMock::resetLastOpTimeFromOplog(OperationContext* txn) {}
+
+class RollbackSourceMock : public RollbackSource {
+public:
+ RollbackSourceMock(std::unique_ptr<OplogInterface> oplog);
+ int getRollbackId() const override;
+ const OplogInterface& getOplog() const override;
+ BSONObj getLastOperation() const override;
+ BSONObj findOne(const NamespaceString& nss, const BSONObj& filter) const override;
+ void copyCollectionFromRemote(OperationContext* txn, const NamespaceString& nss) const override;
+ StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const override;
+
+private:
+ std::unique_ptr<OplogInterface> _oplog;
+};
+
+RollbackSourceMock::RollbackSourceMock(std::unique_ptr<OplogInterface> oplog)
+ : _oplog(std::move(oplog)) {}
+
+const OplogInterface& RollbackSourceMock::getOplog() const {
+ return *_oplog;
+}
+
+int RollbackSourceMock::getRollbackId() const {
+ return 0;
+}
+
+BSONObj RollbackSourceMock::getLastOperation() const {
+ auto iter = _oplog->makeIterator();
+ auto result = iter->next();
+ ASSERT_OK(result.getStatus());
+ return result.getValue().first;
+}
+
+BSONObj RollbackSourceMock::findOne(const NamespaceString& nss, const BSONObj& filter) const {
+ return BSONObj();
+}
+
+void RollbackSourceMock::copyCollectionFromRemote(OperationContext* txn,
+ const NamespaceString& nss) const {}
+
+StatusWith<BSONObj> RollbackSourceMock::getCollectionInfo(const NamespaceString& nss) const {
+ return BSON("name" << nss.ns() << "options" << BSONObj());
+}
+
+class RSRollbackTest : public unittest::Test {
+protected:
+ std::unique_ptr<OperationContext> _txn;
+
+ // Owned by service context
+ ReplicationCoordinator* _coordinator;
+
+private:
+ void setUp() override;
+ void tearDown() override;
+};
+
+void RSRollbackTest::setUp() {
+ ServiceContext* serviceContext = getGlobalServiceContext();
+ if (!serviceContext->getGlobalStorageEngine()) {
+ // When using the 'devnull' storage engine, it is fine for the temporary directory to
+ // go away after the global storage engine is initialized.
+ unittest::TempDir tempDir("rs_rollback_test");
+ mongo::storageGlobalParams.dbpath = tempDir.path();
+ mongo::storageGlobalParams.dbpath = tempDir.path();
+ mongo::storageGlobalParams.engine = "inMemoryExperiment";
+ mongo::storageGlobalParams.engineSetByUser = true;
+ serviceContext->initializeGlobalStorageEngine();
}
- class ReplicationCoordinatorRollbackMock : public ReplicationCoordinatorMock {
- public:
- ReplicationCoordinatorRollbackMock();
- void resetLastOpTimeFromOplog(OperationContext* txn) override;
- };
+ Client::initThreadIfNotAlready();
+ _txn.reset(new OperationContextReplMock(&cc(), 1));
+ _coordinator = new ReplicationCoordinatorRollbackMock();
- ReplicationCoordinatorRollbackMock::ReplicationCoordinatorRollbackMock()
- : ReplicationCoordinatorMock(createReplSettings()) { }
+ setGlobalReplicationCoordinator(_coordinator);
- void ReplicationCoordinatorRollbackMock::resetLastOpTimeFromOplog(OperationContext* txn) { }
+ setOplogCollectionName();
+}
- class RollbackSourceMock : public RollbackSource {
- public:
- RollbackSourceMock(std::unique_ptr<OplogInterface> oplog);
- int getRollbackId() const override;
- const OplogInterface& getOplog() const override;
- BSONObj getLastOperation() const override;
- BSONObj findOne(const NamespaceString& nss, const BSONObj& filter) const override;
- void copyCollectionFromRemote(OperationContext* txn,
- const NamespaceString& nss) const override;
- StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const override;
- private:
- std::unique_ptr<OplogInterface> _oplog;
- };
-
- RollbackSourceMock::RollbackSourceMock(std::unique_ptr<OplogInterface> oplog)
- : _oplog(std::move(oplog)) { }
-
- const OplogInterface& RollbackSourceMock::getOplog() const {
- return *_oplog;
+void RSRollbackTest::tearDown() {
+ {
+ Lock::GlobalWrite globalLock(_txn->lockState());
+ BSONObjBuilder unused;
+ invariant(mongo::dbHolder().closeAll(_txn.get(), unused, false));
}
-
- int RollbackSourceMock::getRollbackId() const {
- return 0;
- }
-
- BSONObj RollbackSourceMock::getLastOperation() const {
- auto iter = _oplog->makeIterator();
- auto result = iter->next();
- ASSERT_OK(result.getStatus());
- return result.getValue().first;
- }
-
- BSONObj RollbackSourceMock::findOne(const NamespaceString& nss, const BSONObj& filter) const {
- return BSONObj();
- }
-
- void RollbackSourceMock::copyCollectionFromRemote(OperationContext* txn,
- const NamespaceString& nss) const { }
-
- StatusWith<BSONObj> RollbackSourceMock::getCollectionInfo(const NamespaceString& nss) const {
- return BSON("name" << nss.ns() << "options" << BSONObj());
- }
-
- class RSRollbackTest : public unittest::Test {
- protected:
- std::unique_ptr<OperationContext> _txn;
-
- // Owned by service context
- ReplicationCoordinator* _coordinator;
-
- private:
- void setUp() override;
- void tearDown() override;
+ _txn.reset();
+ setGlobalReplicationCoordinator(nullptr);
+}
+
+void noSleep(Seconds seconds) {}
+
+TEST_F(RSRollbackTest, InconsistentMinValid) {
+ repl::setMinValid(_txn.get(), OpTime(Timestamp(Seconds(1), 0), 0));
+ auto status = syncRollback(_txn.get(),
+ OpTime(),
+ OplogInterfaceMock(kEmptyMockOperations),
+ RollbackSourceMock(std::unique_ptr<OplogInterface>(
+ new OplogInterfaceMock(kEmptyMockOperations))),
+ _coordinator,
+ noSleep);
+ ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code());
+ ASSERT_EQUALS(18750, status.location());
+}
+
+TEST_F(RSRollbackTest, SetFollowerModeFailed) {
+ class ReplicationCoordinatorSetFollowerModeMock : public ReplicationCoordinatorMock {
+ public:
+ ReplicationCoordinatorSetFollowerModeMock()
+ : ReplicationCoordinatorMock(createReplSettings()) {}
+ MemberState getMemberState() const override {
+ return MemberState::RS_DOWN;
+ }
+ bool setFollowerMode(const MemberState& newState) override {
+ return false;
+ }
};
-
- void RSRollbackTest::setUp() {
- ServiceContext* serviceContext = getGlobalServiceContext();
- if (!serviceContext->getGlobalStorageEngine()) {
- // When using the 'devnull' storage engine, it is fine for the temporary directory to
- // go away after the global storage engine is initialized.
- unittest::TempDir tempDir("rs_rollback_test");
- mongo::storageGlobalParams.dbpath = tempDir.path();
- mongo::storageGlobalParams.dbpath = tempDir.path();
- mongo::storageGlobalParams.engine = "inMemoryExperiment";
- mongo::storageGlobalParams.engineSetByUser = true;
- serviceContext->initializeGlobalStorageEngine();
+ _coordinator = new ReplicationCoordinatorSetFollowerModeMock();
+ setGlobalReplicationCoordinator(_coordinator);
+
+ ASSERT_EQUALS(ErrorCodes::OperationFailed,
+ syncRollback(_txn.get(),
+ OpTime(),
+ OplogInterfaceMock(kEmptyMockOperations),
+ RollbackSourceMock(std::unique_ptr<OplogInterface>(
+ new OplogInterfaceMock(kEmptyMockOperations))),
+ _coordinator,
+ noSleep).code());
+}
+
+TEST_F(RSRollbackTest, OplogStartMissing) {
+ OpTime ts(Timestamp(Seconds(1), 0), 0);
+ auto operation =
+ std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId());
+ ASSERT_EQUALS(
+ ErrorCodes::OplogStartMissing,
+ syncRollback(_txn.get(),
+ OpTime(),
+ OplogInterfaceMock(kEmptyMockOperations),
+ RollbackSourceMock(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
+ operation,
+ }))),
+ _coordinator,
+ noSleep).code());
+}
+
+TEST_F(RSRollbackTest, NoRemoteOpLog) {
+ OpTime ts(Timestamp(Seconds(1), 0), 0);
+ auto operation =
+ std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId());
+ auto status = syncRollback(_txn.get(),
+ ts,
+ OplogInterfaceMock({operation}),
+ RollbackSourceMock(std::unique_ptr<OplogInterface>(
+ new OplogInterfaceMock(kEmptyMockOperations))),
+ _coordinator,
+ noSleep);
+ ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code());
+ ASSERT_EQUALS(18752, status.location());
+}
+
+TEST_F(RSRollbackTest, RemoteGetRollbackIdThrows) {
+ OpTime ts(Timestamp(Seconds(1), 0), 0);
+ auto operation =
+ std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId());
+ class RollbackSourceLocal : public RollbackSourceMock {
+ public:
+ RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog)
+ : RollbackSourceMock(std::move(oplog)) {}
+ int getRollbackId() const override {
+ uassert(ErrorCodes::UnknownError, "getRollbackId() failed", false);
}
+ };
+ ASSERT_THROWS_CODE(syncRollback(_txn.get(),
+ ts,
+ OplogInterfaceMock({operation}),
+ RollbackSourceLocal(std::unique_ptr<OplogInterface>(
+ new OplogInterfaceMock(kEmptyMockOperations))),
+ _coordinator,
+ noSleep),
+ UserException,
+ ErrorCodes::UnknownError);
+}
+
+TEST_F(RSRollbackTest, BothOplogsAtCommonPoint) {
+ createOplog(_txn.get());
+ OpTime ts(Timestamp(Seconds(1), 0), 1);
+ auto operation =
+ std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId(1));
+ ASSERT_OK(
+ syncRollback(_txn.get(),
+ ts,
+ OplogInterfaceMock({operation}),
+ RollbackSourceMock(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
+ operation,
+ }))),
+ _coordinator,
+ noSleep));
+}
- Client::initThreadIfNotAlready();
- _txn.reset(new OperationContextReplMock(&cc(), 1));
- _coordinator = new ReplicationCoordinatorRollbackMock();
-
- setGlobalReplicationCoordinator(_coordinator);
-
- setOplogCollectionName();
- }
+/**
+ * Create test collection
+ */
+void _createCollection(OperationContext* txn,
+ const NamespaceString& nss,
+ const CollectionOptions& options) {
+ Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_X);
+ mongo::WriteUnitOfWork wuow(txn);
+ auto db = dbHolder().openDb(txn, nss.db());
+ ASSERT_TRUE(db);
+ db->dropCollection(txn, nss.ns());
+ ASSERT_TRUE(db->createCollection(txn, nss.ns(), options));
+ wuow.commit();
+}
+
+void _createCollection(OperationContext* txn,
+ const std::string& nss,
+ const CollectionOptions& options) {
+ _createCollection(txn, NamespaceString(nss), options);
+}
- void RSRollbackTest::tearDown() {
- {
- Lock::GlobalWrite globalLock(_txn->lockState());
- BSONObjBuilder unused;
- invariant(mongo::dbHolder().closeAll(_txn.get(), unused, false));
+/**
+ * Test function to roll back a delete operation.
+ * Returns number of records in collection after rolling back delete operation.
+ * If collection does not exist after rolling back, returns -1.
+ */
+int _testRollBackDelete(OperationContext* txn,
+ ReplicationCoordinator* coordinator,
+ const BSONObj& documentAtSource) {
+ auto commonOperation =
+ std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1));
+ auto deleteOperation =
+ std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op"
+ << "d"
+ << "ns"
+ << "test.t"
+ << "o" << BSON("_id" << 0)),
+ RecordId(2));
+ class RollbackSourceLocal : public RollbackSourceMock {
+ public:
+ RollbackSourceLocal(const BSONObj& documentAtSource, std::unique_ptr<OplogInterface> oplog)
+ : RollbackSourceMock(std::move(oplog)),
+ called(false),
+ _documentAtSource(documentAtSource) {}
+ BSONObj findOne(const NamespaceString& nss, const BSONObj& filter) const {
+ called = true;
+ return _documentAtSource;
}
- _txn.reset();
- setGlobalReplicationCoordinator(nullptr);
- }
-
- void noSleep(Seconds seconds) {}
-
- TEST_F(RSRollbackTest, InconsistentMinValid) {
- repl::setMinValid(_txn.get(), OpTime(Timestamp(Seconds(1), 0), 0));
- auto status =
- syncRollback(
- _txn.get(),
- OpTime(),
- OplogInterfaceMock(kEmptyMockOperations),
- RollbackSourceMock(std::unique_ptr<OplogInterface>(
- new OplogInterfaceMock(kEmptyMockOperations))),
- _coordinator,
- noSleep);
- ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code());
- ASSERT_EQUALS(18750, status.location());
- }
-
- TEST_F(RSRollbackTest, SetFollowerModeFailed) {
- class ReplicationCoordinatorSetFollowerModeMock : public ReplicationCoordinatorMock {
- public:
- ReplicationCoordinatorSetFollowerModeMock()
- : ReplicationCoordinatorMock(createReplSettings()) { }
- MemberState getMemberState() const override { return MemberState::RS_DOWN; }
- bool setFollowerMode(const MemberState& newState) override { return false; }
- };
- _coordinator = new ReplicationCoordinatorSetFollowerModeMock();
- setGlobalReplicationCoordinator(_coordinator);
-
- ASSERT_EQUALS(
- ErrorCodes::OperationFailed,
- syncRollback(
- _txn.get(),
- OpTime(),
- OplogInterfaceMock(kEmptyMockOperations),
- RollbackSourceMock(std::unique_ptr<OplogInterface>(
- new OplogInterfaceMock(kEmptyMockOperations))),
- _coordinator,
- noSleep).code());
- }
-
- TEST_F(RSRollbackTest, OplogStartMissing) {
- OpTime ts(Timestamp(Seconds(1), 0), 0);
- auto operation =
- std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId());
- ASSERT_EQUALS(
- ErrorCodes::OplogStartMissing,
- syncRollback(
- _txn.get(),
- OpTime(),
- OplogInterfaceMock(kEmptyMockOperations),
- RollbackSourceMock(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
- operation,
- }))),
- _coordinator,
- noSleep).code());
- }
+ mutable bool called;
- TEST_F(RSRollbackTest, NoRemoteOpLog) {
- OpTime ts(Timestamp(Seconds(1), 0), 0);
- auto operation =
- std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId());
- auto status =
- syncRollback(
- _txn.get(),
- ts,
- OplogInterfaceMock({operation}),
- RollbackSourceMock(std::unique_ptr<OplogInterface>(
- new OplogInterfaceMock(kEmptyMockOperations))),
- _coordinator,
- noSleep);
- ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code());
- ASSERT_EQUALS(18752, status.location());
- }
-
- TEST_F(RSRollbackTest, RemoteGetRollbackIdThrows) {
- OpTime ts(Timestamp(Seconds(1), 0), 0);
- auto operation =
- std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId());
- class RollbackSourceLocal : public RollbackSourceMock {
- public:
- RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog)
- : RollbackSourceMock(std::move(oplog)) { }
- int getRollbackId() const override {
- uassert(ErrorCodes::UnknownError, "getRollbackId() failed", false);
- }
- };
- ASSERT_THROWS_CODE(
- syncRollback(
- _txn.get(),
- ts,
- OplogInterfaceMock({operation}),
- RollbackSourceLocal(std::unique_ptr<OplogInterface>(
- new OplogInterfaceMock(kEmptyMockOperations))),
- _coordinator,
- noSleep),
- UserException,
- ErrorCodes::UnknownError);
- }
-
- TEST_F(RSRollbackTest, BothOplogsAtCommonPoint) {
- createOplog(_txn.get());
- OpTime ts(Timestamp(Seconds(1), 0), 1);
- auto operation =
- std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId(1));
- ASSERT_OK(
- syncRollback(
- _txn.get(),
- ts,
- OplogInterfaceMock({operation}),
- RollbackSourceMock(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
- operation,
- }))),
- _coordinator,
- noSleep));
+ private:
+ BSONObj _documentAtSource;
+ };
+ RollbackSourceLocal rollbackSource(documentAtSource,
+ std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
+ commonOperation,
+ })));
+ OpTime opTime(deleteOperation.first["ts"].timestamp(), deleteOperation.first["h"].Long());
+ ASSERT_OK(syncRollback(txn,
+ opTime,
+ OplogInterfaceMock({deleteOperation, commonOperation}),
+ rollbackSource,
+ coordinator,
+ noSleep));
+ ASSERT_TRUE(rollbackSource.called);
+
+ Lock::DBLock dbLock(txn->lockState(), "test", MODE_S);
+ Lock::CollectionLock collLock(txn->lockState(), "test.t", MODE_S);
+ auto db = dbHolder().get(txn, "test");
+ ASSERT_TRUE(db);
+ auto collection = db->getCollection("test.t");
+ if (!collection) {
+ return -1;
}
-
- /**
- * Create test collection
- */
- void _createCollection(OperationContext* txn,
- const NamespaceString& nss,
- const CollectionOptions& options) {
- Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_X);
- mongo::WriteUnitOfWork wuow(txn);
- auto db = dbHolder().openDb(txn, nss.db());
+ return collection->getRecordStore()->numRecords(txn);
+}
+
+TEST_F(RSRollbackTest, RollBackDeleteNoDocumentAtSourceCollectionDoesNotExist) {
+ createOplog(_txn.get());
+ ASSERT_EQUALS(-1, _testRollBackDelete(_txn.get(), _coordinator, BSONObj()));
+}
+
+TEST_F(RSRollbackTest, RollBackDeleteNoDocumentAtSourceCollectionExistsNonCapped) {
+ createOplog(_txn.get());
+ _createCollection(_txn.get(), "test.t", CollectionOptions());
+ _testRollBackDelete(_txn.get(), _coordinator, BSONObj());
+ ASSERT_EQUALS(0, _testRollBackDelete(_txn.get(), _coordinator, BSONObj()));
+}
+
+TEST_F(RSRollbackTest, RollBackDeleteNoDocumentAtSourceCollectionExistsCapped) {
+ createOplog(_txn.get());
+ CollectionOptions options;
+ options.capped = true;
+ _createCollection(_txn.get(), "test.t", options);
+ ASSERT_EQUALS(0, _testRollBackDelete(_txn.get(), _coordinator, BSONObj()));
+}
+
+TEST_F(RSRollbackTest, RollBackDeleteRestoreDocument) {
+ createOplog(_txn.get());
+ _createCollection(_txn.get(), "test.t", CollectionOptions());
+ BSONObj doc = BSON("_id" << 0 << "a" << 1);
+ _testRollBackDelete(_txn.get(), _coordinator, doc);
+ ASSERT_EQUALS(1, _testRollBackDelete(_txn.get(), _coordinator, doc));
+}
+
+TEST_F(RSRollbackTest, RollbackUnknownCommand) {
+ createOplog(_txn.get());
+ auto commonOperation =
+ std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1));
+ auto unknownCommandOperation =
+ std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op"
+ << "c"
+ << "ns"
+ << "test.t"
+ << "o" << BSON("unknown_command"
+ << "t")),
+ RecordId(2));
+ {
+ Lock::DBLock dbLock(_txn->lockState(), "test", MODE_X);
+ mongo::WriteUnitOfWork wuow(_txn.get());
+ auto db = dbHolder().openDb(_txn.get(), "test");
ASSERT_TRUE(db);
- db->dropCollection(txn, nss.ns());
- ASSERT_TRUE(db->createCollection(txn, nss.ns(), options));
+ ASSERT_TRUE(db->getOrCreateCollection(_txn.get(), "test.t"));
wuow.commit();
}
-
- void _createCollection(OperationContext* txn,
- const std::string& nss,
- const CollectionOptions& options) {
- _createCollection(txn, NamespaceString(nss), options);
- }
-
- /**
- * Test function to roll back a delete operation.
- * Returns number of records in collection after rolling back delete operation.
- * If collection does not exist after rolling back, returns -1.
- */
- int _testRollBackDelete(OperationContext* txn,
- ReplicationCoordinator* coordinator,
- const BSONObj& documentAtSource) {
- auto commonOperation =
- std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1));
- auto deleteOperation =
- std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) <<
- "h" << 1LL <<
- "op" << "d" <<
- "ns" << "test.t" <<
- "o" << BSON("_id" << 0)),
- RecordId(2));
- class RollbackSourceLocal : public RollbackSourceMock {
- public:
- RollbackSourceLocal(const BSONObj& documentAtSource,
- std::unique_ptr<OplogInterface> oplog)
- : RollbackSourceMock(std::move(oplog)),
- called(false),
- _documentAtSource(documentAtSource) { }
- BSONObj findOne(const NamespaceString& nss, const BSONObj& filter) const {
- called = true;
- return _documentAtSource;
- }
- mutable bool called;
- private:
- BSONObj _documentAtSource;
- };
- RollbackSourceLocal rollbackSource(
- documentAtSource,
- std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
- commonOperation,
- })));
- OpTime opTime(deleteOperation.first["ts"].timestamp(),
- deleteOperation.first["h"].Long());
- ASSERT_OK(
- syncRollback(
- txn,
- opTime,
- OplogInterfaceMock({deleteOperation, commonOperation}),
- rollbackSource,
- coordinator,
- noSleep));
- ASSERT_TRUE(rollbackSource.called);
-
- Lock::DBLock dbLock(txn->lockState(), "test", MODE_S);
- Lock::CollectionLock collLock(txn->lockState(), "test.t", MODE_S);
- auto db = dbHolder().get(txn, "test");
- ASSERT_TRUE(db);
- auto collection = db->getCollection("test.t");
- if (!collection) {
- return -1;
+ OpTime opTime(unknownCommandOperation.first["ts"].timestamp(),
+ unknownCommandOperation.first["h"].Long());
+ auto status =
+ syncRollback(_txn.get(),
+ opTime,
+ OplogInterfaceMock({unknownCommandOperation, commonOperation}),
+ RollbackSourceMock(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
+ commonOperation,
+ }))),
+ _coordinator,
+ noSleep);
+ ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code());
+ ASSERT_EQUALS(18751, status.location());
+}
+
+TEST_F(RSRollbackTest, RollbackDropCollectionCommand) {
+ createOplog(_txn.get());
+ auto commonOperation =
+ std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1));
+ auto dropCollectionOperation =
+ std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op"
+ << "c"
+ << "ns"
+ << "test.t"
+ << "o" << BSON("drop"
+ << "t")),
+ RecordId(2));
+ class RollbackSourceLocal : public RollbackSourceMock {
+ public:
+ RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog)
+ : RollbackSourceMock(std::move(oplog)), called(false) {}
+ void copyCollectionFromRemote(OperationContext* txn,
+ const NamespaceString& nss) const override {
+ called = true;
}
- return collection->getRecordStore()->numRecords(txn);
- }
-
- TEST_F(RSRollbackTest, RollBackDeleteNoDocumentAtSourceCollectionDoesNotExist) {
- createOplog(_txn.get());
- ASSERT_EQUALS(-1, _testRollBackDelete(_txn.get(), _coordinator, BSONObj()));
- }
-
- TEST_F(RSRollbackTest, RollBackDeleteNoDocumentAtSourceCollectionExistsNonCapped) {
- createOplog(_txn.get());
- _createCollection(_txn.get(), "test.t", CollectionOptions());
- _testRollBackDelete(_txn.get(), _coordinator, BSONObj());
- ASSERT_EQUALS(0, _testRollBackDelete(_txn.get(), _coordinator, BSONObj()));
- }
-
- TEST_F(RSRollbackTest, RollBackDeleteNoDocumentAtSourceCollectionExistsCapped) {
- createOplog(_txn.get());
- CollectionOptions options;
- options.capped = true;
- _createCollection(_txn.get(), "test.t", options);
- ASSERT_EQUALS(0, _testRollBackDelete(_txn.get(), _coordinator, BSONObj()));
- }
-
- TEST_F(RSRollbackTest, RollBackDeleteRestoreDocument) {
- createOplog(_txn.get());
- _createCollection(_txn.get(), "test.t", CollectionOptions());
- BSONObj doc = BSON("_id" << 0 << "a" << 1);
- _testRollBackDelete(_txn.get(), _coordinator, doc);
- ASSERT_EQUALS(1, _testRollBackDelete(_txn.get(), _coordinator, doc));
+ mutable bool called;
+ };
+ RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
+ commonOperation,
+ })));
+ _createCollection(_txn.get(), "test.t", CollectionOptions());
+ OpTime opTime(dropCollectionOperation.first["ts"].timestamp(),
+ dropCollectionOperation.first["h"].Long());
+ ASSERT_OK(syncRollback(_txn.get(),
+ opTime,
+ OplogInterfaceMock({dropCollectionOperation, commonOperation}),
+ rollbackSource,
+ _coordinator,
+ noSleep));
+ ASSERT_TRUE(rollbackSource.called);
+}
+
+TEST_F(RSRollbackTest, RollbackCreateCollectionCommand) {
+ createOplog(_txn.get());
+ auto commonOperation =
+ std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1));
+ auto createCollectionOperation =
+ std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op"
+ << "c"
+ << "ns"
+ << "test.t"
+ << "o" << BSON("create"
+ << "t")),
+ RecordId(2));
+ RollbackSourceMock rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
+ commonOperation,
+ })));
+ _createCollection(_txn.get(), "test.t", CollectionOptions());
+ OpTime opTime(createCollectionOperation.first["ts"].timestamp(),
+ createCollectionOperation.first["h"].Long());
+ ASSERT_OK(syncRollback(_txn.get(),
+ opTime,
+ OplogInterfaceMock({createCollectionOperation, commonOperation}),
+ rollbackSource,
+ _coordinator,
+ noSleep));
+ {
+ Lock::DBLock dbLock(_txn->lockState(), "test", MODE_S);
+ auto db = dbHolder().get(_txn.get(), "test");
+ ASSERT_TRUE(db);
+ ASSERT_FALSE(db->getCollection("test.t"));
}
-
- TEST_F(RSRollbackTest, RollbackUnknownCommand) {
- createOplog(_txn.get());
- auto commonOperation =
- std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1));
- auto unknownCommandOperation =
- std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) <<
- "h" << 1LL <<
- "op" << "c" <<
- "ns" << "test.t" <<
- "o" << BSON("unknown_command" << "t")),
- RecordId(2));
- {
- Lock::DBLock dbLock(_txn->lockState(), "test", MODE_X);
- mongo::WriteUnitOfWork wuow(_txn.get());
- auto db = dbHolder().openDb(_txn.get(), "test");
- ASSERT_TRUE(db);
- ASSERT_TRUE(db->getOrCreateCollection(_txn.get(), "test.t"));
- wuow.commit();
+}
+
+TEST_F(RSRollbackTest, RollbackCollectionModificationCommand) {
+ createOplog(_txn.get());
+ auto commonOperation =
+ std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1));
+ auto collectionModificationOperation =
+ std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op"
+ << "c"
+ << "ns"
+ << "test.t"
+ << "o" << BSON("collMod"
+ << "t"
+ << "noPadding" << false)),
+ RecordId(2));
+ class RollbackSourceLocal : public RollbackSourceMock {
+ public:
+ RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog)
+ : RollbackSourceMock(std::move(oplog)), called(false) {}
+ StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const {
+ called = true;
+ return RollbackSourceMock::getCollectionInfo(nss);
}
- OpTime opTime(unknownCommandOperation.first["ts"].timestamp(),
- unknownCommandOperation.first["h"].Long());
- auto status =
- syncRollback(
- _txn.get(),
- opTime,
- OplogInterfaceMock({unknownCommandOperation, commonOperation}),
- RollbackSourceMock(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
- commonOperation,
- }))),
- _coordinator,
- noSleep);
- ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code());
- ASSERT_EQUALS(18751, status.location());
- }
-
- TEST_F(RSRollbackTest, RollbackDropCollectionCommand) {
- createOplog(_txn.get());
- auto commonOperation =
- std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1));
- auto dropCollectionOperation =
- std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) <<
- "h" << 1LL <<
- "op" << "c" <<
- "ns" << "test.t" <<
- "o" << BSON("drop" << "t")),
- RecordId(2));
- class RollbackSourceLocal : public RollbackSourceMock {
- public:
- RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog)
- : RollbackSourceMock(std::move(oplog)),
- called(false) { }
- void copyCollectionFromRemote(OperationContext* txn,
- const NamespaceString& nss) const override {
- called = true;
- }
- mutable bool called;
- };
- RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
- commonOperation,
- })));
- _createCollection(_txn.get(), "test.t", CollectionOptions());
- OpTime opTime(dropCollectionOperation.first["ts"].timestamp(),
- dropCollectionOperation.first["h"].Long());
- ASSERT_OK(
- syncRollback(
- _txn.get(),
- opTime,
- OplogInterfaceMock({dropCollectionOperation, commonOperation}),
- rollbackSource,
- _coordinator,
- noSleep));
- ASSERT_TRUE(rollbackSource.called);
- }
-
- TEST_F(RSRollbackTest, RollbackCreateCollectionCommand) {
- createOplog(_txn.get());
- auto commonOperation =
- std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1));
- auto createCollectionOperation =
- std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) <<
- "h" << 1LL <<
- "op" << "c" <<
- "ns" << "test.t" <<
- "o" << BSON("create" << "t")),
- RecordId(2));
- RollbackSourceMock rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
- commonOperation,
- })));
- _createCollection(_txn.get(), "test.t", CollectionOptions());
- OpTime opTime(createCollectionOperation.first["ts"].timestamp(),
- createCollectionOperation.first["h"].Long());
- ASSERT_OK(
- syncRollback(
- _txn.get(),
- opTime,
- OplogInterfaceMock({createCollectionOperation, commonOperation}),
- rollbackSource,
- _coordinator,
- noSleep));
- {
- Lock::DBLock dbLock(_txn->lockState(), "test", MODE_S);
- auto db = dbHolder().get(_txn.get(), "test");
- ASSERT_TRUE(db);
- ASSERT_FALSE(db->getCollection("test.t"));
+ mutable bool called;
+ };
+ RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
+ commonOperation,
+ })));
+ _createCollection(_txn.get(), "test.t", CollectionOptions());
+ OpTime opTime(collectionModificationOperation.first["ts"].timestamp(),
+ collectionModificationOperation.first["h"].Long());
+ ASSERT_OK(syncRollback(_txn.get(),
+ opTime,
+ OplogInterfaceMock({collectionModificationOperation, commonOperation}),
+ rollbackSource,
+ _coordinator,
+ noSleep));
+ ASSERT_TRUE(rollbackSource.called);
+}
+
+TEST_F(RSRollbackTest, RollbackCollectionModificationCommandInvalidCollectionOptions) {
+ createOplog(_txn.get());
+ auto commonOperation =
+ std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1));
+ auto collectionModificationOperation =
+ std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op"
+ << "c"
+ << "ns"
+ << "test.t"
+ << "o" << BSON("collMod"
+ << "t"
+ << "noPadding" << false)),
+ RecordId(2));
+ class RollbackSourceLocal : public RollbackSourceMock {
+ public:
+ RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog)
+ : RollbackSourceMock(std::move(oplog)) {}
+ StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const {
+ return BSON("name" << nss.ns() << "options" << 12345);
}
- }
-
- TEST_F(RSRollbackTest, RollbackCollectionModificationCommand) {
- createOplog(_txn.get());
- auto commonOperation =
- std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1));
- auto collectionModificationOperation =
- std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) <<
- "h" << 1LL <<
- "op" << "c" <<
- "ns" << "test.t" <<
- "o" << BSON("collMod" << "t" << "noPadding" << false)),
- RecordId(2));
- class RollbackSourceLocal : public RollbackSourceMock {
- public:
- RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog)
- : RollbackSourceMock(std::move(oplog)),
- called(false) { }
- StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const {
- called = true;
- return RollbackSourceMock::getCollectionInfo(nss);
- }
- mutable bool called;
- };
- RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
- commonOperation,
- })));
- _createCollection(_txn.get(), "test.t", CollectionOptions());
- OpTime opTime(collectionModificationOperation.first["ts"].timestamp(),
- collectionModificationOperation.first["h"].Long());
- ASSERT_OK(
- syncRollback(
- _txn.get(),
- opTime,
- OplogInterfaceMock({collectionModificationOperation, commonOperation}),
- rollbackSource,
- _coordinator,
- noSleep));
- ASSERT_TRUE(rollbackSource.called);
- }
-
- TEST_F(RSRollbackTest, RollbackCollectionModificationCommandInvalidCollectionOptions) {
- createOplog(_txn.get());
- auto commonOperation =
- std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1));
- auto collectionModificationOperation =
- std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) <<
- "h" << 1LL <<
- "op" << "c" <<
- "ns" << "test.t" <<
- "o" << BSON("collMod" << "t" << "noPadding" << false)),
- RecordId(2));
- class RollbackSourceLocal : public RollbackSourceMock {
- public:
- RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog)
- : RollbackSourceMock(std::move(oplog)) { }
- StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const {
- return BSON("name" << nss.ns() << "options" << 12345);
- }
- };
- RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
- commonOperation,
- })));
- _createCollection(_txn.get(), "test.t", CollectionOptions());
- OpTime opTime(collectionModificationOperation.first["ts"].timestamp(),
- collectionModificationOperation.first["h"].Long());
- auto status =
- syncRollback(
- _txn.get(),
- opTime,
- OplogInterfaceMock({collectionModificationOperation, commonOperation}),
- rollbackSource,
- _coordinator,
- noSleep);
- ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code());
- ASSERT_EQUALS(18753, status.location());
- }
-
-} // namespace
+ };
+ RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({
+ commonOperation,
+ })));
+ _createCollection(_txn.get(), "test.t", CollectionOptions());
+ OpTime opTime(collectionModificationOperation.first["ts"].timestamp(),
+ collectionModificationOperation.first["h"].Long());
+ auto status =
+ syncRollback(_txn.get(),
+ opTime,
+ OplogInterfaceMock({collectionModificationOperation, commonOperation}),
+ rollbackSource,
+ _coordinator,
+ noSleep);
+ ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code());
+ ASSERT_EQUALS(18753, status.location());
+}
+
+} // namespace
diff --git a/src/mongo/db/repl/rs_sync.cpp b/src/mongo/db/repl/rs_sync.cpp
index 3f2ee40b963..b1332337370 100644
--- a/src/mongo/db/repl/rs_sync.cpp
+++ b/src/mongo/db/repl/rs_sync.cpp
@@ -62,88 +62,84 @@
namespace mongo {
namespace repl {
- void runSyncThread() {
- Client::initThread("rsSync");
- AuthorizationSession::get(cc())->grantInternalAuthorization();
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
-
- // Set initial indexPrefetch setting
- const std::string& prefetch = replCoord->getSettings().rsIndexPrefetch;
- if (!prefetch.empty()) {
- BackgroundSync::IndexPrefetchConfig prefetchConfig = BackgroundSync::PREFETCH_ALL;
- if (prefetch == "none")
- prefetchConfig = BackgroundSync::PREFETCH_NONE;
- else if (prefetch == "_id_only")
- prefetchConfig = BackgroundSync::PREFETCH_ID_ONLY;
- else if (prefetch == "all")
- prefetchConfig = BackgroundSync::PREFETCH_ALL;
- else {
- warning() << "unrecognized indexPrefetch setting " << prefetch << ", defaulting "
- << "to \"all\"";
- }
- BackgroundSync::get()->setIndexPrefetchConfig(prefetchConfig);
+void runSyncThread() {
+ Client::initThread("rsSync");
+ AuthorizationSession::get(cc())->grantInternalAuthorization();
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+
+ // Set initial indexPrefetch setting
+ const std::string& prefetch = replCoord->getSettings().rsIndexPrefetch;
+ if (!prefetch.empty()) {
+ BackgroundSync::IndexPrefetchConfig prefetchConfig = BackgroundSync::PREFETCH_ALL;
+ if (prefetch == "none")
+ prefetchConfig = BackgroundSync::PREFETCH_NONE;
+ else if (prefetch == "_id_only")
+ prefetchConfig = BackgroundSync::PREFETCH_ID_ONLY;
+ else if (prefetch == "all")
+ prefetchConfig = BackgroundSync::PREFETCH_ALL;
+ else {
+ warning() << "unrecognized indexPrefetch setting " << prefetch << ", defaulting "
+ << "to \"all\"";
}
+ BackgroundSync::get()->setIndexPrefetchConfig(prefetchConfig);
+ }
- while (!inShutdown()) {
- // After a reconfig, we may not be in the replica set anymore, so
- // check that we are in the set (and not an arbiter) before
- // trying to sync with other replicas.
- // TODO(spencer): Use a condition variable to await loading a config
- if (replCoord->getMemberState().startup()) {
- warning() << "did not receive a valid config yet, sleeping 5 seconds ";
- sleepsecs(5);
- continue;
- }
+ while (!inShutdown()) {
+ // After a reconfig, we may not be in the replica set anymore, so
+ // check that we are in the set (and not an arbiter) before
+ // trying to sync with other replicas.
+ // TODO(spencer): Use a condition variable to await loading a config
+ if (replCoord->getMemberState().startup()) {
+ warning() << "did not receive a valid config yet, sleeping 5 seconds ";
+ sleepsecs(5);
+ continue;
+ }
- const MemberState memberState = replCoord->getMemberState();
+ const MemberState memberState = replCoord->getMemberState();
- // An arbiter can never transition to any other state, and doesn't replicate, ever
- if (memberState.arbiter()) {
- break;
- }
+ // An arbiter can never transition to any other state, and doesn't replicate, ever
+ if (memberState.arbiter()) {
+ break;
+ }
+
+ // If we are removed then we don't belong to the set anymore
+ if (memberState.removed()) {
+ sleepsecs(5);
+ continue;
+ }
- // If we are removed then we don't belong to the set anymore
- if (memberState.removed()) {
- sleepsecs(5);
+ try {
+ if (memberState.primary() && !replCoord->isWaitingForApplierToDrain()) {
+ sleepsecs(1);
continue;
}
- try {
-
- if (memberState.primary() && !replCoord->isWaitingForApplierToDrain()) {
- sleepsecs(1);
- continue;
- }
-
- bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag();
- // Check criteria for doing an initial sync:
- // 1. If the oplog is empty, do an initial sync
- // 2. If minValid has _initialSyncFlag set, do an initial sync
- // 3. If initialSyncRequested is true
- if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
- getInitialSyncFlag() ||
- initialSyncRequested) {
- syncDoInitialSync();
- continue; // start from top again in case sync failed.
- }
- if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
- continue;
- }
-
- /* we have some data. continue tailing. */
- SyncTail tail(BackgroundSync::get(), multiSyncApply);
- tail.oplogApplication();
+ bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag();
+ // Check criteria for doing an initial sync:
+ // 1. If the oplog is empty, do an initial sync
+ // 2. If minValid has _initialSyncFlag set, do an initial sync
+ // 3. If initialSyncRequested is true
+ if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
+ getInitialSyncFlag() || initialSyncRequested) {
+ syncDoInitialSync();
+ continue; // start from top again in case sync failed.
}
- catch(const DBException& e) {
- log() << "Received exception while syncing: " << e.toString();
- sleepsecs(10);
- }
- catch(const std::exception& e) {
- log() << "Received exception while syncing: " << e.what();
- sleepsecs(10);
+ if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
+ continue;
}
+
+ /* we have some data. continue tailing. */
+ SyncTail tail(BackgroundSync::get(), multiSyncApply);
+ tail.oplogApplication();
+ } catch (const DBException& e) {
+ log() << "Received exception while syncing: " << e.toString();
+ sleepsecs(10);
+ } catch (const std::exception& e) {
+ log() << "Received exception while syncing: " << e.what();
+ sleepsecs(10);
}
}
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rs_sync.h b/src/mongo/db/repl/rs_sync.h
index af099db43a5..ec174268b5c 100644
--- a/src/mongo/db/repl/rs_sync.h
+++ b/src/mongo/db/repl/rs_sync.h
@@ -40,8 +40,8 @@
namespace mongo {
namespace repl {
- // Body of the thread that will do the background sync.
- void runSyncThread();
+// Body of the thread that will do the background sync.
+void runSyncThread();
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rslog.cpp b/src/mongo/db/repl/rslog.cpp
index 9a02f64ce27..f48d6399847 100644
--- a/src/mongo/db/repl/rslog.cpp
+++ b/src/mongo/db/repl/rslog.cpp
@@ -36,12 +36,12 @@
namespace mongo {
namespace repl {
- static RamLog* _rsLog = RamLog::get("rs");
- logger::Tee* rsLog = _rsLog;
+static RamLog* _rsLog = RamLog::get("rs");
+logger::Tee* rsLog = _rsLog;
- void fillRsLog(std::stringstream* s) {
- _rsLog->toHTML(*s);
- }
+void fillRsLog(std::stringstream* s) {
+ _rsLog->toHTML(*s);
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rslog.h b/src/mongo/db/repl/rslog.h
index 7a6624c876d..5b0b694d7bf 100644
--- a/src/mongo/db/repl/rslog.h
+++ b/src/mongo/db/repl/rslog.h
@@ -33,15 +33,15 @@
namespace mongo {
namespace logger {
- class Tee;
-} // namespace logger
+class Tee;
+} // namespace logger
namespace repl {
- void fillRsLog(std::stringstream* s);
+void fillRsLog(std::stringstream* s);
- // ramlog used for replSet actions
- extern logger::Tee* rsLog;
+// ramlog used for replSet actions
+extern logger::Tee* rsLog;
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/scatter_gather_algorithm.cpp b/src/mongo/db/repl/scatter_gather_algorithm.cpp
index 5e65a8f2df1..78fc22fa38f 100644
--- a/src/mongo/db/repl/scatter_gather_algorithm.cpp
+++ b/src/mongo/db/repl/scatter_gather_algorithm.cpp
@@ -33,7 +33,7 @@
namespace mongo {
namespace repl {
- ScatterGatherAlgorithm::~ScatterGatherAlgorithm() {}
+ScatterGatherAlgorithm::~ScatterGatherAlgorithm() {}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/scatter_gather_algorithm.h b/src/mongo/db/repl/scatter_gather_algorithm.h
index 9b6bd05d896..c5e06e4d3c9 100644
--- a/src/mongo/db/repl/scatter_gather_algorithm.h
+++ b/src/mongo/db/repl/scatter_gather_algorithm.h
@@ -34,46 +34,47 @@
namespace mongo {
- template <typename T> class StatusWith;
+template <typename T>
+class StatusWith;
namespace repl {
+/**
+ * Interface for a specialization of a scatter-gather algorithm that sends
+ * requests to a set of targets, and then processes responses until it has
+ * seen enough.
+ *
+ * To use, call getRequests() to get a vector of request objects describing network operations.
+ * Start performing the network operations in any order, and then, until
+ * hasReceivedSufficientResponses() returns true, call processResponse for each response as it
+ * arrives. Once hasReceivedSufficientResponses() you may cancel outstanding network
+ * operations, and must stop calling processResponse. Implementations of this interface may
+ * assume that processResponse() is never called after hasReceivedSufficientResponses() returns
+ * true.
+ */
+class ScatterGatherAlgorithm {
+public:
/**
- * Interface for a specialization of a scatter-gather algorithm that sends
- * requests to a set of targets, and then processes responses until it has
- * seen enough.
- *
- * To use, call getRequests() to get a vector of request objects describing network operations.
- * Start performing the network operations in any order, and then, until
- * hasReceivedSufficientResponses() returns true, call processResponse for each response as it
- * arrives. Once hasReceivedSufficientResponses() you may cancel outstanding network
- * operations, and must stop calling processResponse. Implementations of this interface may
- * assume that processResponse() is never called after hasReceivedSufficientResponses() returns
- * true.
+ * Returns the list of requests that should be sent.
*/
- class ScatterGatherAlgorithm {
- public:
- /**
- * Returns the list of requests that should be sent.
- */
- virtual std::vector<RemoteCommandRequest> getRequests() const = 0;
+ virtual std::vector<RemoteCommandRequest> getRequests() const = 0;
- /**
- * Method to call once for each received response.
- */
- virtual void processResponse(const RemoteCommandRequest& request,
- const ResponseStatus& response) = 0;
+ /**
+ * Method to call once for each received response.
+ */
+ virtual void processResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response) = 0;
- /**
- * Returns true if no more calls to processResponse are needed to consider the
- * algorithm complete. Once this method returns true, one should no longer
- * call processResponse.
- */
- virtual bool hasReceivedSufficientResponses() const = 0;
+ /**
+ * Returns true if no more calls to processResponse are needed to consider the
+ * algorithm complete. Once this method returns true, one should no longer
+ * call processResponse.
+ */
+ virtual bool hasReceivedSufficientResponses() const = 0;
- protected:
- virtual ~ScatterGatherAlgorithm(); // Shouldn't actually be virtual.
- };
+protected:
+ virtual ~ScatterGatherAlgorithm(); // Shouldn't actually be virtual.
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/scatter_gather_runner.cpp b/src/mongo/db/repl/scatter_gather_runner.cpp
index 52b8b3ce43f..6161f71db63 100644
--- a/src/mongo/db/repl/scatter_gather_runner.cpp
+++ b/src/mongo/db/repl/scatter_gather_runner.cpp
@@ -41,127 +41,111 @@
namespace mongo {
namespace repl {
- ScatterGatherRunner::ScatterGatherRunner(ScatterGatherAlgorithm* algorithm) :
- _algorithm(algorithm),
- _started(false) {
+ScatterGatherRunner::ScatterGatherRunner(ScatterGatherAlgorithm* algorithm)
+ : _algorithm(algorithm), _started(false) {}
+
+ScatterGatherRunner::~ScatterGatherRunner() {}
+
+static void startTrampoline(const ReplicationExecutor::CallbackArgs& cbData,
+ ScatterGatherRunner* runner,
+ StatusWith<ReplicationExecutor::EventHandle>* result) {
+ // TODO: remove static cast once ScatterGatherRunner is designed to work with a generic
+ // TaskExecutor.
+ ReplicationExecutor* executor = static_cast<ReplicationExecutor*>(cbData.executor);
+ *result = runner->start(executor);
+}
+
+Status ScatterGatherRunner::run(ReplicationExecutor* executor) {
+ StatusWith<ReplicationExecutor::EventHandle> finishEvh(ErrorCodes::InternalError, "Not set");
+ StatusWith<ReplicationExecutor::CallbackHandle> startCBH = executor->scheduleWork(
+ stdx::bind(startTrampoline, stdx::placeholders::_1, this, &finishEvh));
+ if (!startCBH.isOK()) {
+ return startCBH.getStatus();
}
-
- ScatterGatherRunner::~ScatterGatherRunner() {
- }
-
- static void startTrampoline(const ReplicationExecutor::CallbackArgs& cbData,
- ScatterGatherRunner* runner,
- StatusWith<ReplicationExecutor::EventHandle>* result) {
-
- // TODO: remove static cast once ScatterGatherRunner is designed to work with a generic
- // TaskExecutor.
- ReplicationExecutor* executor = static_cast<ReplicationExecutor*>(cbData.executor);
- *result = runner->start(executor);
+ executor->wait(startCBH.getValue());
+ if (!finishEvh.isOK()) {
+ return finishEvh.getStatus();
}
-
- Status ScatterGatherRunner::run(ReplicationExecutor* executor) {
- StatusWith<ReplicationExecutor::EventHandle> finishEvh(ErrorCodes::InternalError,
- "Not set");
- StatusWith<ReplicationExecutor::CallbackHandle> startCBH = executor->scheduleWork(
- stdx::bind(startTrampoline, stdx::placeholders::_1, this, &finishEvh));
- if (!startCBH.isOK()) {
- return startCBH.getStatus();
- }
- executor->wait(startCBH.getValue());
- if (!finishEvh.isOK()) {
- return finishEvh.getStatus();
- }
- executor->waitForEvent(finishEvh.getValue());
- return Status::OK();
+ executor->waitForEvent(finishEvh.getValue());
+ return Status::OK();
+}
+
+StatusWith<ReplicationExecutor::EventHandle> ScatterGatherRunner::start(
+ ReplicationExecutor* executor, const stdx::function<void()>& onCompletion) {
+ invariant(!_started);
+ _started = true;
+ _actualResponses = 0;
+ _onCompletion = onCompletion;
+ StatusWith<ReplicationExecutor::EventHandle> evh = executor->makeEvent();
+ if (!evh.isOK()) {
+ return evh;
}
-
- StatusWith<ReplicationExecutor::EventHandle> ScatterGatherRunner::start(
- ReplicationExecutor* executor,
- const stdx::function<void ()>& onCompletion) {
-
- invariant(!_started);
- _started = true;
- _actualResponses = 0;
- _onCompletion = onCompletion;
- StatusWith<ReplicationExecutor::EventHandle> evh = executor->makeEvent();
- if (!evh.isOK()) {
- return evh;
- }
- _sufficientResponsesReceived = evh.getValue();
- ScopeGuard earlyReturnGuard = MakeGuard(
- &ScatterGatherRunner::_signalSufficientResponsesReceived,
- this,
- executor);
-
- const ReplicationExecutor::RemoteCommandCallbackFn cb = stdx::bind(
- &ScatterGatherRunner::_processResponse,
- stdx::placeholders::_1,
- this);
-
- std::vector<RemoteCommandRequest> requests = _algorithm->getRequests();
- for (size_t i = 0; i < requests.size(); ++i) {
- const StatusWith<ReplicationExecutor::CallbackHandle> cbh =
- executor->scheduleRemoteCommand(requests[i], cb);
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return StatusWith<ReplicationExecutor::EventHandle>(cbh.getStatus());
- }
- fassert(18743, cbh.getStatus());
- _callbacks.push_back(cbh.getValue());
+ _sufficientResponsesReceived = evh.getValue();
+ ScopeGuard earlyReturnGuard =
+ MakeGuard(&ScatterGatherRunner::_signalSufficientResponsesReceived, this, executor);
+
+ const ReplicationExecutor::RemoteCommandCallbackFn cb =
+ stdx::bind(&ScatterGatherRunner::_processResponse, stdx::placeholders::_1, this);
+
+ std::vector<RemoteCommandRequest> requests = _algorithm->getRequests();
+ for (size_t i = 0; i < requests.size(); ++i) {
+ const StatusWith<ReplicationExecutor::CallbackHandle> cbh =
+ executor->scheduleRemoteCommand(requests[i], cb);
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return StatusWith<ReplicationExecutor::EventHandle>(cbh.getStatus());
}
-
- if (_callbacks.empty() || _algorithm->hasReceivedSufficientResponses()) {
- invariant(_algorithm->hasReceivedSufficientResponses());
- _signalSufficientResponsesReceived(executor);
- }
-
- earlyReturnGuard.Dismiss();
- return evh;
+ fassert(18743, cbh.getStatus());
+ _callbacks.push_back(cbh.getValue());
}
- void ScatterGatherRunner::cancel(ReplicationExecutor* executor) {
- invariant(_started);
+ if (_callbacks.empty() || _algorithm->hasReceivedSufficientResponses()) {
+ invariant(_algorithm->hasReceivedSufficientResponses());
_signalSufficientResponsesReceived(executor);
}
- void ScatterGatherRunner::_processResponse(
- const ReplicationExecutor::RemoteCommandCallbackArgs& cbData,
- ScatterGatherRunner* runner) {
-
- // It is possible that the ScatterGatherRunner has already gone out of scope, if the
- // response indicates the callback was canceled. In that case, do not access any members
- // of "runner" and return immediately.
- if (cbData.response.getStatus() == ErrorCodes::CallbackCanceled) {
- return;
- }
-
- ++runner->_actualResponses;
- runner->_algorithm->processResponse(cbData.request, cbData.response);
- if (runner->_algorithm->hasReceivedSufficientResponses()) {
- // TODO: remove static cast once ScatterGatherRunner is designed to work with a generic
- // TaskExecutor.
- ReplicationExecutor* executor = static_cast<ReplicationExecutor*>(cbData.executor);
- runner->_signalSufficientResponsesReceived(executor);
- }
- else {
- invariant(runner->_actualResponses < runner->_callbacks.size());
- }
+ earlyReturnGuard.Dismiss();
+ return evh;
+}
+
+void ScatterGatherRunner::cancel(ReplicationExecutor* executor) {
+ invariant(_started);
+ _signalSufficientResponsesReceived(executor);
+}
+
+void ScatterGatherRunner::_processResponse(
+ const ReplicationExecutor::RemoteCommandCallbackArgs& cbData, ScatterGatherRunner* runner) {
+ // It is possible that the ScatterGatherRunner has already gone out of scope, if the
+ // response indicates the callback was canceled. In that case, do not access any members
+ // of "runner" and return immediately.
+ if (cbData.response.getStatus() == ErrorCodes::CallbackCanceled) {
+ return;
}
- void ScatterGatherRunner::_signalSufficientResponsesReceived(ReplicationExecutor* executor) {
- if (_sufficientResponsesReceived.isValid()) {
- std::for_each(_callbacks.begin(),
- _callbacks.end(),
- stdx::bind(&ReplicationExecutor::cancel,
- executor,
- stdx::placeholders::_1));
- const ReplicationExecutor::EventHandle h = _sufficientResponsesReceived;
- _sufficientResponsesReceived = ReplicationExecutor::EventHandle();
- if (_onCompletion) {
- _onCompletion();
- }
- executor->signalEvent(h);
+ ++runner->_actualResponses;
+ runner->_algorithm->processResponse(cbData.request, cbData.response);
+ if (runner->_algorithm->hasReceivedSufficientResponses()) {
+ // TODO: remove static cast once ScatterGatherRunner is designed to work with a generic
+ // TaskExecutor.
+ ReplicationExecutor* executor = static_cast<ReplicationExecutor*>(cbData.executor);
+ runner->_signalSufficientResponsesReceived(executor);
+ } else {
+ invariant(runner->_actualResponses < runner->_callbacks.size());
+ }
+}
+
+void ScatterGatherRunner::_signalSufficientResponsesReceived(ReplicationExecutor* executor) {
+ if (_sufficientResponsesReceived.isValid()) {
+ std::for_each(_callbacks.begin(),
+ _callbacks.end(),
+ stdx::bind(&ReplicationExecutor::cancel, executor, stdx::placeholders::_1));
+ const ReplicationExecutor::EventHandle h = _sufficientResponsesReceived;
+ _sufficientResponsesReceived = ReplicationExecutor::EventHandle();
+ if (_onCompletion) {
+ _onCompletion();
}
+ executor->signalEvent(h);
}
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/scatter_gather_runner.h b/src/mongo/db/repl/scatter_gather_runner.h
index 089fa178dde..ad7b8d93aa5 100644
--- a/src/mongo/db/repl/scatter_gather_runner.h
+++ b/src/mongo/db/repl/scatter_gather_runner.h
@@ -36,88 +36,90 @@
namespace mongo {
- template <typename T> class StatusWith;
+template <typename T>
+class StatusWith;
namespace repl {
- class ScatterGatherAlgorithm;
+class ScatterGatherAlgorithm;
+
+/**
+ * Implementation of a scatter-gather behavior using a ReplicationExecutor.
+ */
+class ScatterGatherRunner {
+ MONGO_DISALLOW_COPYING(ScatterGatherRunner);
+
+public:
+ /**
+ * Constructs a new runner whose underlying algorithm is "algorithm".
+ *
+ * "algorithm" must remain in scope until the runner's destructor completes.
+ */
+ explicit ScatterGatherRunner(ScatterGatherAlgorithm* algorithm);
+
+ ~ScatterGatherRunner();
+
+ /**
+ * Runs the scatter-gather process using "executor", and blocks until it completes.
+ *
+ * Must _not_ be run from inside the executor context.
+ *
+ * Returns ErrorCodes::ShutdownInProgress if the executor enters or is already in
+ * the shutdown state before run() can schedule execution of the scatter-gather
+ * in the executor. Note that if the executor is shut down after the algorithm
+ * is scheduled but before it completes, this method will return Status::OK(),
+ * just as it does when it runs successfully to completion.
+ */
+ Status run(ReplicationExecutor* executor);
+
+ /**
+ * Starts executing the scatter-gather process using "executor".
+ *
+ * On success, returns an event handle that will be signaled when the runner has
+ * finished executing the scatter-gather process. After that event has been
+ * signaled, it is safe for the caller to examine any state on "algorithm".
+ *
+ * This method must be called inside the executor context.
+ *
+ * onCompletion is an optional callback that will be executed in executor context
+ * immediately prior to signaling the event handle returned here. It must never
+ * throw exceptions. It may examine the state of the algorithm object.
+ *
+ * NOTE: If the executor starts to shut down before onCompletion executes, onCompletion may
+ * never execute, even though the returned event will eventually be signaled.
+ */
+ StatusWith<ReplicationExecutor::EventHandle> start(
+ ReplicationExecutor* executor,
+ const stdx::function<void()>& onCompletion = stdx::function<void()>());
+
+ /**
+ * Informs the runner to cancel further processing. The "executor" argument
+ * must point to the same executor passed to "start()".
+ *
+ * Like start, this method must be called from within the executor context.
+ */
+ void cancel(ReplicationExecutor* executor);
+
+private:
+ /**
+ * Callback invoked once for every response from the network.
+ */
+ static void _processResponse(const ReplicationExecutor::RemoteCommandCallbackArgs& cbData,
+ ScatterGatherRunner* runner);
/**
- * Implementation of a scatter-gather behavior using a ReplicationExecutor.
+ * Method that performs all actions required when _algorithm indicates a sufficient
+ * number of respones have been received.
*/
- class ScatterGatherRunner {
- MONGO_DISALLOW_COPYING(ScatterGatherRunner);
- public:
- /**
- * Constructs a new runner whose underlying algorithm is "algorithm".
- *
- * "algorithm" must remain in scope until the runner's destructor completes.
- */
- explicit ScatterGatherRunner(ScatterGatherAlgorithm* algorithm);
-
- ~ScatterGatherRunner();
-
- /**
- * Runs the scatter-gather process using "executor", and blocks until it completes.
- *
- * Must _not_ be run from inside the executor context.
- *
- * Returns ErrorCodes::ShutdownInProgress if the executor enters or is already in
- * the shutdown state before run() can schedule execution of the scatter-gather
- * in the executor. Note that if the executor is shut down after the algorithm
- * is scheduled but before it completes, this method will return Status::OK(),
- * just as it does when it runs successfully to completion.
- */
- Status run(ReplicationExecutor* executor);
-
- /**
- * Starts executing the scatter-gather process using "executor".
- *
- * On success, returns an event handle that will be signaled when the runner has
- * finished executing the scatter-gather process. After that event has been
- * signaled, it is safe for the caller to examine any state on "algorithm".
- *
- * This method must be called inside the executor context.
- *
- * onCompletion is an optional callback that will be executed in executor context
- * immediately prior to signaling the event handle returned here. It must never
- * throw exceptions. It may examine the state of the algorithm object.
- *
- * NOTE: If the executor starts to shut down before onCompletion executes, onCompletion may
- * never execute, even though the returned event will eventually be signaled.
- */
- StatusWith<ReplicationExecutor::EventHandle> start(
- ReplicationExecutor* executor,
- const stdx::function<void ()>& onCompletion = stdx::function<void ()>());
-
- /**
- * Informs the runner to cancel further processing. The "executor" argument
- * must point to the same executor passed to "start()".
- *
- * Like start, this method must be called from within the executor context.
- */
- void cancel(ReplicationExecutor* executor);
-
- private:
- /**
- * Callback invoked once for every response from the network.
- */
- static void _processResponse(const ReplicationExecutor::RemoteCommandCallbackArgs& cbData,
- ScatterGatherRunner* runner);
-
- /**
- * Method that performs all actions required when _algorithm indicates a sufficient
- * number of respones have been received.
- */
- void _signalSufficientResponsesReceived(ReplicationExecutor* executor);
-
- ScatterGatherAlgorithm* _algorithm;
- stdx::function<void ()> _onCompletion;
- ReplicationExecutor::EventHandle _sufficientResponsesReceived;
- std::vector<ReplicationExecutor::CallbackHandle> _callbacks;
- size_t _actualResponses;
- bool _started;
- };
+ void _signalSufficientResponsesReceived(ReplicationExecutor* executor);
+
+ ScatterGatherAlgorithm* _algorithm;
+ stdx::function<void()> _onCompletion;
+ ReplicationExecutor::EventHandle _sufficientResponsesReceived;
+ std::vector<ReplicationExecutor::CallbackHandle> _callbacks;
+ size_t _actualResponses;
+ bool _started;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/scatter_gather_test.cpp b/src/mongo/db/repl/scatter_gather_test.cpp
index 6d3673fa927..fb605a597e0 100644
--- a/src/mongo/db/repl/scatter_gather_test.cpp
+++ b/src/mongo/db/repl/scatter_gather_test.cpp
@@ -41,308 +41,288 @@ namespace mongo {
namespace repl {
namespace {
- using executor::NetworkInterfaceMock;
-
- /**
- * Algorithm for testing the ScatterGatherRunner, which will finish running when finish() is
- * called, or upon receiving responses from two nodes. Creates a three requests algorithm
- * simulating running an algorithm against three other nodes.
- */
- class ScatterGatherTestAlgorithm : public ScatterGatherAlgorithm {
- public:
- ScatterGatherTestAlgorithm(int64_t maxResponses = 2) :
- _done(false),
- _numResponses(0),
- _maxResponses(maxResponses) {}
-
- virtual std::vector<RemoteCommandRequest> getRequests() const {
- std::vector<RemoteCommandRequest> requests;
- for (int i = 0; i < 3; i++) {
- requests.push_back(RemoteCommandRequest(
- HostAndPort("hostname", i),
- "admin",
- BSONObj(),
- Milliseconds(30*1000)));
- }
- return requests;
- }
-
- virtual void processResponse(
- const RemoteCommandRequest& request,
- const ResponseStatus& response) {
- _numResponses++;
- }
-
- void finish() {
- _done = true;
- }
+using executor::NetworkInterfaceMock;
- virtual bool hasReceivedSufficientResponses() const {
- if (_done) {
- return _done;
- }
-
- return _numResponses >= _maxResponses;
- }
-
- int getResponseCount() {
- return _numResponses;
+/**
+ * Algorithm for testing the ScatterGatherRunner, which will finish running when finish() is
+ * called, or upon receiving responses from two nodes. Creates a three requests algorithm
+ * simulating running an algorithm against three other nodes.
+ */
+class ScatterGatherTestAlgorithm : public ScatterGatherAlgorithm {
+public:
+ ScatterGatherTestAlgorithm(int64_t maxResponses = 2)
+ : _done(false), _numResponses(0), _maxResponses(maxResponses) {}
+
+ virtual std::vector<RemoteCommandRequest> getRequests() const {
+ std::vector<RemoteCommandRequest> requests;
+ for (int i = 0; i < 3; i++) {
+ requests.push_back(RemoteCommandRequest(
+ HostAndPort("hostname", i), "admin", BSONObj(), Milliseconds(30 * 1000)));
}
-
- private:
-
- bool _done;
- int64_t _numResponses;
- int64_t _maxResponses;
- };
-
- /**
- * ScatterGatherTest base class which sets up the ReplicationExecutor and NetworkInterfaceMock.
- */
- class ScatterGatherTest : public mongo::unittest::Test {
- protected:
-
- NetworkInterfaceMock* getNet() { return _net; }
- ReplicationExecutor* getExecutor() { return _executor.get(); }
-
- int64_t countLogLinesContaining(const std::string& needle);
- private:
-
- void setUp();
- void tearDown();
-
- // owned by _executor
- NetworkInterfaceMock* _net;
- StorageInterfaceMock* _storage;
- std::unique_ptr<ReplicationExecutor> _executor;
- std::unique_ptr<stdx::thread> _executorThread;
- };
-
- void ScatterGatherTest::setUp() {
- _net = new NetworkInterfaceMock;
- _storage = new StorageInterfaceMock;
- _executor.reset(new ReplicationExecutor(_net, _storage, 1 /* prng seed */));
- _executorThread.reset(new stdx::thread(stdx::bind(&ReplicationExecutor::run,
- _executor.get())));
+ return requests;
}
- void ScatterGatherTest::tearDown() {
- _executor->shutdown();
- _executorThread->join();
+ virtual void processResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response) {
+ _numResponses++;
}
+ void finish() {
+ _done = true;
+ }
- // Used to run a ScatterGatherRunner in a separate thread, to avoid blocking test execution.
- class ScatterGatherRunnerRunner {
- public:
-
- ScatterGatherRunnerRunner(ScatterGatherRunner* sgr, ReplicationExecutor* executor) :
- _sgr(sgr),
- _executor(executor),
- _result(Status(ErrorCodes::BadValue, "failed to set status")) {}
-
- // Could block if _sgr has not finished
- Status getResult() {
- _thread->join();
- return _result;
- }
-
- void run() {
- _thread.reset(new stdx::thread(stdx::bind(&ScatterGatherRunnerRunner::_run,
- this,
- _executor)));
- }
-
- private:
-
- void _run(ReplicationExecutor* executor) {
- _result = _sgr->run(_executor);
+ virtual bool hasReceivedSufficientResponses() const {
+ if (_done) {
+ return _done;
}
- ScatterGatherRunner* _sgr;
- ReplicationExecutor* _executor;
- Status _result;
- std::unique_ptr<stdx::thread> _thread;
- };
-
- // Simple onCompletion function which will toggle a bool, so that we can check the logs to
- // ensure the onCompletion function ran when expected.
- void onCompletionTestFunction(bool* ran) {
- *ran = true;
+ return _numResponses >= _maxResponses;
}
- // Confirm that running via start() will finish and run the onComplete function once sufficient
- // responses have been received.
- // Confirm that deleting both the ScatterGatherTestAlgorithm and ScatterGatherRunner while
- // scheduled callbacks still exist will not be unsafe (ASAN builder) after the algorithm has
- // completed.
- TEST_F(ScatterGatherTest, DeleteAlgorithmAfterItHasCompleted) {
- ScatterGatherTestAlgorithm* sga = new ScatterGatherTestAlgorithm();
- ScatterGatherRunner* sgr = new ScatterGatherRunner(sga);
- bool ranCompletion = false;
- StatusWith<ReplicationExecutor::EventHandle> status = sgr->start(getExecutor(),
- stdx::bind(&onCompletionTestFunction, &ranCompletion));
- ASSERT_OK(status.getStatus());
- ASSERT_FALSE(ranCompletion);
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now() + Seconds(2),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(10))));
- ASSERT_FALSE(ranCompletion);
-
- noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now() + Seconds(2),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(10))));
- ASSERT_FALSE(ranCompletion);
-
- noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now() + Seconds(5),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(10))));
- ASSERT_FALSE(ranCompletion);
-
- net->runUntil(net->now() + Seconds(2));
- ASSERT_TRUE(ranCompletion);
-
- delete sga;
- delete sgr;
-
- net->runReadyNetworkOperations();
-
- net->exitNetwork();
+ int getResponseCount() {
+ return _numResponses;
}
- // Confirm that shutting the ReplicationExecutor down before calling run() will cause run()
- // to return ErrorCodes::ShutdownInProgress.
- TEST_F(ScatterGatherTest, ShutdownExecutorBeforeRun) {
- ScatterGatherTestAlgorithm sga;
- ScatterGatherRunner sgr(&sga);
- getExecutor()->shutdown();
- sga.finish();
- Status status = sgr.run(getExecutor());
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, status);
- }
+private:
+ bool _done;
+ int64_t _numResponses;
+ int64_t _maxResponses;
+};
- // Confirm that shutting the ReplicationExecutor down after calling run(), but before run()
- // finishes will cause run() to return Status::OK().
- TEST_F(ScatterGatherTest, ShutdownExecutorAfterRun) {
- ScatterGatherTestAlgorithm sga;
- ScatterGatherRunner sgr(&sga);
- ScatterGatherRunnerRunner sgrr(&sgr, getExecutor());
- sgrr.run();
- // need to wait for the scatter-gather to be scheduled in the executor
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->blackHole(noi);
- net->exitNetwork();
- getExecutor()->shutdown();
- Status status = sgrr.getResult();
- ASSERT_OK(status);
+/**
+ * ScatterGatherTest base class which sets up the ReplicationExecutor and NetworkInterfaceMock.
+ */
+class ScatterGatherTest : public mongo::unittest::Test {
+protected:
+ NetworkInterfaceMock* getNet() {
+ return _net;
}
-
- // Confirm that shutting the ReplicationExecutor down before calling start() will cause start()
- // to return ErrorCodes::ShutdownInProgress and should not run onCompletion().
- TEST_F(ScatterGatherTest, ShutdownExecutorBeforeStart) {
- ScatterGatherTestAlgorithm sga;
- ScatterGatherRunner sgr(&sga);
- getExecutor()->shutdown();
- bool ranCompletion = false;
- StatusWith<ReplicationExecutor::EventHandle> status = sgr.start(getExecutor(),
- stdx::bind(&onCompletionTestFunction, &ranCompletion));
- sga.finish();
- ASSERT_FALSE(ranCompletion);
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, status.getStatus());
+ ReplicationExecutor* getExecutor() {
+ return _executor.get();
}
- // Confirm that shutting the ReplicationExecutor down after calling start() will cause start()
- // to return Status::OK and should not run onCompletion().
- TEST_F(ScatterGatherTest, ShutdownExecutorAfterStart) {
- ScatterGatherTestAlgorithm sga;
- ScatterGatherRunner sgr(&sga);
- bool ranCompletion = false;
- StatusWith<ReplicationExecutor::EventHandle> status = sgr.start(getExecutor(),
- stdx::bind(&onCompletionTestFunction, &ranCompletion));
- getExecutor()->shutdown();
- sga.finish();
- ASSERT_FALSE(ranCompletion);
- ASSERT_OK(status.getStatus());
+ int64_t countLogLinesContaining(const std::string& needle);
+
+private:
+ void setUp();
+ void tearDown();
+
+ // owned by _executor
+ NetworkInterfaceMock* _net;
+ StorageInterfaceMock* _storage;
+ std::unique_ptr<ReplicationExecutor> _executor;
+ std::unique_ptr<stdx::thread> _executorThread;
+};
+
+void ScatterGatherTest::setUp() {
+ _net = new NetworkInterfaceMock;
+ _storage = new StorageInterfaceMock;
+ _executor.reset(new ReplicationExecutor(_net, _storage, 1 /* prng seed */));
+ _executorThread.reset(new stdx::thread(stdx::bind(&ReplicationExecutor::run, _executor.get())));
+}
+
+void ScatterGatherTest::tearDown() {
+ _executor->shutdown();
+ _executorThread->join();
+}
+
+
+// Used to run a ScatterGatherRunner in a separate thread, to avoid blocking test execution.
+class ScatterGatherRunnerRunner {
+public:
+ ScatterGatherRunnerRunner(ScatterGatherRunner* sgr, ReplicationExecutor* executor)
+ : _sgr(sgr),
+ _executor(executor),
+ _result(Status(ErrorCodes::BadValue, "failed to set status")) {}
+
+ // Could block if _sgr has not finished
+ Status getResult() {
+ _thread->join();
+ return _result;
}
- // Confirm that responses are not processed once sufficient responses have been received.
- TEST_F(ScatterGatherTest, DoNotProcessMoreThanSufficientResponses) {
- ScatterGatherTestAlgorithm sga;
- ScatterGatherRunner sgr(&sga);
- bool ranCompletion = false;
- StatusWith<ReplicationExecutor::EventHandle> status = sgr.start(getExecutor(),
- stdx::bind(&onCompletionTestFunction, &ranCompletion));
- ASSERT_OK(status.getStatus());
- ASSERT_FALSE(ranCompletion);
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now() + Seconds(2),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(10))));
- ASSERT_FALSE(ranCompletion);
-
- noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now() + Seconds(2),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(10))));
- ASSERT_FALSE(ranCompletion);
-
- noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now() + Seconds(5),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(10))));
- ASSERT_FALSE(ranCompletion);
-
- net->runUntil(net->now() + Seconds(2));
- ASSERT_TRUE(ranCompletion);
-
- net->runReadyNetworkOperations();
- // the third resposne should not be processed, so the count should not increment
- ASSERT_EQUALS(2, sga.getResponseCount());
-
- net->exitNetwork();
+ void run() {
+ _thread.reset(
+ new stdx::thread(stdx::bind(&ScatterGatherRunnerRunner::_run, this, _executor)));
}
- // Confirm that starting with sufficient responses received will immediate complete.
- TEST_F(ScatterGatherTest, DoNotCreateCallbacksIfHasSufficientResponsesReturnsTrueImmediately) {
- ScatterGatherTestAlgorithm sga;
- // set hasReceivedSufficientResponses to return true before the run starts
- sga.finish();
- ScatterGatherRunner sgr(&sga);
- bool ranCompletion = false;
- StatusWith<ReplicationExecutor::EventHandle> status = sgr.start(getExecutor(),
- stdx::bind(&onCompletionTestFunction, &ranCompletion));
- ASSERT_OK(status.getStatus());
- ASSERT_TRUE(ranCompletion);
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- ASSERT_FALSE(net->hasReadyRequests());
- net->exitNetwork();
+private:
+ void _run(ReplicationExecutor* executor) {
+ _result = _sgr->run(_executor);
}
+ ScatterGatherRunner* _sgr;
+ ReplicationExecutor* _executor;
+ Status _result;
+ std::unique_ptr<stdx::thread> _thread;
+};
+
+// Simple onCompletion function which will toggle a bool, so that we can check the logs to
+// ensure the onCompletion function ran when expected.
+void onCompletionTestFunction(bool* ran) {
+ *ran = true;
+}
+
+// Confirm that running via start() will finish and run the onComplete function once sufficient
+// responses have been received.
+// Confirm that deleting both the ScatterGatherTestAlgorithm and ScatterGatherRunner while
+// scheduled callbacks still exist will not be unsafe (ASAN builder) after the algorithm has
+// completed.
+TEST_F(ScatterGatherTest, DeleteAlgorithmAfterItHasCompleted) {
+ ScatterGatherTestAlgorithm* sga = new ScatterGatherTestAlgorithm();
+ ScatterGatherRunner* sgr = new ScatterGatherRunner(sga);
+ bool ranCompletion = false;
+ StatusWith<ReplicationExecutor::EventHandle> status =
+ sgr->start(getExecutor(), stdx::bind(&onCompletionTestFunction, &ranCompletion));
+ ASSERT_OK(status.getStatus());
+ ASSERT_FALSE(ranCompletion);
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now() + Seconds(2),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(10))));
+ ASSERT_FALSE(ranCompletion);
+
+ noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now() + Seconds(2),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(10))));
+ ASSERT_FALSE(ranCompletion);
+
+ noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now() + Seconds(5),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(10))));
+ ASSERT_FALSE(ranCompletion);
+
+ net->runUntil(net->now() + Seconds(2));
+ ASSERT_TRUE(ranCompletion);
+
+ delete sga;
+ delete sgr;
+
+ net->runReadyNetworkOperations();
+
+ net->exitNetwork();
+}
+
+// Confirm that shutting the ReplicationExecutor down before calling run() will cause run()
+// to return ErrorCodes::ShutdownInProgress.
+TEST_F(ScatterGatherTest, ShutdownExecutorBeforeRun) {
+ ScatterGatherTestAlgorithm sga;
+ ScatterGatherRunner sgr(&sga);
+ getExecutor()->shutdown();
+ sga.finish();
+ Status status = sgr.run(getExecutor());
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, status);
+}
+
+// Confirm that shutting the ReplicationExecutor down after calling run(), but before run()
+// finishes will cause run() to return Status::OK().
+TEST_F(ScatterGatherTest, ShutdownExecutorAfterRun) {
+ ScatterGatherTestAlgorithm sga;
+ ScatterGatherRunner sgr(&sga);
+ ScatterGatherRunnerRunner sgrr(&sgr, getExecutor());
+ sgrr.run();
+ // need to wait for the scatter-gather to be scheduled in the executor
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->blackHole(noi);
+ net->exitNetwork();
+ getExecutor()->shutdown();
+ Status status = sgrr.getResult();
+ ASSERT_OK(status);
+}
+
+// Confirm that shutting the ReplicationExecutor down before calling start() will cause start()
+// to return ErrorCodes::ShutdownInProgress and should not run onCompletion().
+TEST_F(ScatterGatherTest, ShutdownExecutorBeforeStart) {
+ ScatterGatherTestAlgorithm sga;
+ ScatterGatherRunner sgr(&sga);
+ getExecutor()->shutdown();
+ bool ranCompletion = false;
+ StatusWith<ReplicationExecutor::EventHandle> status =
+ sgr.start(getExecutor(), stdx::bind(&onCompletionTestFunction, &ranCompletion));
+ sga.finish();
+ ASSERT_FALSE(ranCompletion);
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, status.getStatus());
+}
+
+// Confirm that shutting the ReplicationExecutor down after calling start() will cause start()
+// to return Status::OK and should not run onCompletion().
+TEST_F(ScatterGatherTest, ShutdownExecutorAfterStart) {
+ ScatterGatherTestAlgorithm sga;
+ ScatterGatherRunner sgr(&sga);
+ bool ranCompletion = false;
+ StatusWith<ReplicationExecutor::EventHandle> status =
+ sgr.start(getExecutor(), stdx::bind(&onCompletionTestFunction, &ranCompletion));
+ getExecutor()->shutdown();
+ sga.finish();
+ ASSERT_FALSE(ranCompletion);
+ ASSERT_OK(status.getStatus());
+}
+
+// Confirm that responses are not processed once sufficient responses have been received.
+TEST_F(ScatterGatherTest, DoNotProcessMoreThanSufficientResponses) {
+ ScatterGatherTestAlgorithm sga;
+ ScatterGatherRunner sgr(&sga);
+ bool ranCompletion = false;
+ StatusWith<ReplicationExecutor::EventHandle> status =
+ sgr.start(getExecutor(), stdx::bind(&onCompletionTestFunction, &ranCompletion));
+ ASSERT_OK(status.getStatus());
+ ASSERT_FALSE(ranCompletion);
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now() + Seconds(2),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(10))));
+ ASSERT_FALSE(ranCompletion);
+
+ noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now() + Seconds(2),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(10))));
+ ASSERT_FALSE(ranCompletion);
+
+ noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now() + Seconds(5),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(10))));
+ ASSERT_FALSE(ranCompletion);
+
+ net->runUntil(net->now() + Seconds(2));
+ ASSERT_TRUE(ranCompletion);
+
+ net->runReadyNetworkOperations();
+ // the third resposne should not be processed, so the count should not increment
+ ASSERT_EQUALS(2, sga.getResponseCount());
+
+ net->exitNetwork();
+}
+
+// Confirm that starting with sufficient responses received will immediate complete.
+TEST_F(ScatterGatherTest, DoNotCreateCallbacksIfHasSufficientResponsesReturnsTrueImmediately) {
+ ScatterGatherTestAlgorithm sga;
+ // set hasReceivedSufficientResponses to return true before the run starts
+ sga.finish();
+ ScatterGatherRunner sgr(&sga);
+ bool ranCompletion = false;
+ StatusWith<ReplicationExecutor::EventHandle> status =
+ sgr.start(getExecutor(), stdx::bind(&onCompletionTestFunction, &ranCompletion));
+ ASSERT_OK(status.getStatus());
+ ASSERT_TRUE(ranCompletion);
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ ASSERT_FALSE(net->hasReadyRequests());
+ net->exitNetwork();
+}
+
#if 0
// TODO Enable this test once we have a way to test for invariants.
@@ -386,41 +366,35 @@ namespace {
net->exitNetwork();
ASSERT_FALSE(ranCompletion);
}
-#endif // 0
-
- // Confirm that running via run() will finish once sufficient responses have been received.
- TEST_F(ScatterGatherTest, SuccessfulScatterGatherViaRun) {
- ScatterGatherTestAlgorithm sga;
- ScatterGatherRunner sgr(&sga);
- ScatterGatherRunnerRunner sgrr(&sgr, getExecutor());
- sgrr.run();
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now(),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(10))));
- net->runReadyNetworkOperations();
-
- noi = net->getNextReadyRequest();
- net->blackHole(noi);
- net->runReadyNetworkOperations();
-
- noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now(),
- ResponseStatus(RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(10))));
- net->runReadyNetworkOperations();
- net->exitNetwork();
-
- Status status = sgrr.getResult();
- ASSERT_OK(status);
- }
+#endif // 0
+
+// Confirm that running via run() will finish once sufficient responses have been received.
+TEST_F(ScatterGatherTest, SuccessfulScatterGatherViaRun) {
+ ScatterGatherTestAlgorithm sga;
+ ScatterGatherRunner sgr(&sga);
+ ScatterGatherRunnerRunner sgrr(&sgr, getExecutor());
+ sgrr.run();
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->scheduleResponse(
+ noi, net->now(), ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(10))));
+ net->runReadyNetworkOperations();
+
+ noi = net->getNextReadyRequest();
+ net->blackHole(noi);
+ net->runReadyNetworkOperations();
+
+ noi = net->getNextReadyRequest();
+ net->scheduleResponse(
+ noi, net->now(), ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), Milliseconds(10))));
+ net->runReadyNetworkOperations();
+ net->exitNetwork();
+
+ Status status = sgrr.getResult();
+ ASSERT_OK(status);
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/storage_interface.cpp b/src/mongo/db/repl/storage_interface.cpp
index c09d76ad9ff..0b883d827b9 100644
--- a/src/mongo/db/repl/storage_interface.cpp
+++ b/src/mongo/db/repl/storage_interface.cpp
@@ -35,8 +35,8 @@
namespace mongo {
namespace repl {
- StorageInterface::StorageInterface() {}
- StorageInterface::~StorageInterface() {}
+StorageInterface::StorageInterface() {}
+StorageInterface::~StorageInterface() {}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/storage_interface.h b/src/mongo/db/repl/storage_interface.h
index df51692b2f1..1f9d0576741 100644
--- a/src/mongo/db/repl/storage_interface.h
+++ b/src/mongo/db/repl/storage_interface.h
@@ -32,28 +32,26 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace repl {
+/**
+ * Storage interface used by used by the ReplicationExecutor inside mongod for supporting
+ * ReplicationExectutor's ability to take database locks.
+ */
+class StorageInterface {
+public:
+ virtual ~StorageInterface();
+
/**
- * Storage interface used by used by the ReplicationExecutor inside mongod for supporting
- * ReplicationExectutor's ability to take database locks.
+ * Creates an operation context for running database operations.
*/
- class StorageInterface {
- public:
- virtual ~StorageInterface();
-
- /**
- * Creates an operation context for running database operations.
- */
- virtual OperationContext* createOperationContext() = 0;
-
- protected:
-
- StorageInterface();
+ virtual OperationContext* createOperationContext() = 0;
- };
+protected:
+ StorageInterface();
+};
} // namespace repl
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index 73a14ce6330..a58f85964b4 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -39,16 +39,16 @@
namespace mongo {
namespace repl {
- StorageInterfaceImpl::StorageInterfaceImpl() : StorageInterface() {}
- StorageInterfaceImpl::~StorageInterfaceImpl() { }
-
- OperationContext* StorageInterfaceImpl::createOperationContext() {
- if (!ClientBasic::getCurrent()) {
- Client::initThreadIfNotAlready();
- AuthorizationSession::get(*ClientBasic::getCurrent())->grantInternalAuthorization();
- }
- return new OperationContextImpl();
+StorageInterfaceImpl::StorageInterfaceImpl() : StorageInterface() {}
+StorageInterfaceImpl::~StorageInterfaceImpl() {}
+
+OperationContext* StorageInterfaceImpl::createOperationContext() {
+ if (!ClientBasic::getCurrent()) {
+ Client::initThreadIfNotAlready();
+ AuthorizationSession::get(*ClientBasic::getCurrent())->grantInternalAuthorization();
}
+ return new OperationContextImpl();
+}
} // namespace repl
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/repl/storage_interface_impl.h b/src/mongo/db/repl/storage_interface_impl.h
index 24cc8268f17..fa378e537fd 100644
--- a/src/mongo/db/repl/storage_interface_impl.h
+++ b/src/mongo/db/repl/storage_interface_impl.h
@@ -33,18 +33,17 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace repl {
- class StorageInterfaceImpl : public StorageInterface {
- public:
- explicit StorageInterfaceImpl();
- virtual ~StorageInterfaceImpl();
+class StorageInterfaceImpl : public StorageInterface {
+public:
+ explicit StorageInterfaceImpl();
+ virtual ~StorageInterfaceImpl();
- OperationContext* createOperationContext() override;
-
- };
+ OperationContext* createOperationContext() override;
+};
} // namespace repl
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/repl/storage_interface_mock.cpp b/src/mongo/db/repl/storage_interface_mock.cpp
index 4a6f4a7a293..b620c65276d 100644
--- a/src/mongo/db/repl/storage_interface_mock.cpp
+++ b/src/mongo/db/repl/storage_interface_mock.cpp
@@ -37,13 +37,13 @@
namespace mongo {
namespace repl {
- StorageInterfaceMock::StorageInterfaceMock() {}
+StorageInterfaceMock::StorageInterfaceMock() {}
- StorageInterfaceMock::~StorageInterfaceMock() { }
+StorageInterfaceMock::~StorageInterfaceMock() {}
- OperationContext* StorageInterfaceMock::createOperationContext() {
- return new OperationContextReplMock();
- }
+OperationContext* StorageInterfaceMock::createOperationContext() {
+ return new OperationContextReplMock();
+}
} // namespace repl
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/repl/storage_interface_mock.h b/src/mongo/db/repl/storage_interface_mock.h
index 4bd3e63ec9d..8ce76adb642 100644
--- a/src/mongo/db/repl/storage_interface_mock.h
+++ b/src/mongo/db/repl/storage_interface_mock.h
@@ -33,17 +33,17 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace repl {
- class StorageInterfaceMock : public StorageInterface {
- public:
- explicit StorageInterfaceMock();
- virtual ~StorageInterfaceMock();
+class StorageInterfaceMock : public StorageInterface {
+public:
+ explicit StorageInterfaceMock();
+ virtual ~StorageInterfaceMock();
- OperationContext* createOperationContext() override;
- };
+ OperationContext* createOperationContext() override;
+};
} // namespace repl
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/repl/sync_source_feedback.cpp b/src/mongo/db/repl/sync_source_feedback.cpp
index 49c70c3c2b7..602523a5471 100644
--- a/src/mongo/db/repl/sync_source_feedback.cpp
+++ b/src/mongo/db/repl/sync_source_feedback.cpp
@@ -51,161 +51,158 @@
namespace mongo {
- using std::endl;
- using std::string;
+using std::endl;
+using std::string;
namespace repl {
- SyncSourceFeedback::SyncSourceFeedback() : _positionChanged(false),
- _shutdownSignaled(false) {}
- SyncSourceFeedback::~SyncSourceFeedback() {}
+SyncSourceFeedback::SyncSourceFeedback() : _positionChanged(false), _shutdownSignaled(false) {}
+SyncSourceFeedback::~SyncSourceFeedback() {}
- void SyncSourceFeedback::_resetConnection() {
- LOG(1) << "resetting connection in sync source feedback";
- _connection.reset();
- }
+void SyncSourceFeedback::_resetConnection() {
+ LOG(1) << "resetting connection in sync source feedback";
+ _connection.reset();
+}
- bool SyncSourceFeedback::replAuthenticate() {
- if (!getGlobalAuthorizationManager()->isAuthEnabled())
- return true;
+bool SyncSourceFeedback::replAuthenticate() {
+ if (!getGlobalAuthorizationManager()->isAuthEnabled())
+ return true;
- if (!isInternalAuthSet())
- return false;
- return authenticateInternalUser(_connection.get());
- }
+ if (!isInternalAuthSet())
+ return false;
+ return authenticateInternalUser(_connection.get());
+}
- bool SyncSourceFeedback::_connect(OperationContext* txn, const HostAndPort& host) {
- if (hasConnection()) {
- return true;
- }
- log() << "setting syncSourceFeedback to " << host.toString();
- _connection.reset(new DBClientConnection(false, OplogReader::tcp_timeout));
- string errmsg;
- try {
- if (!_connection->connect(host, errmsg) ||
- (getGlobalAuthorizationManager()->isAuthEnabled() && !replAuthenticate())) {
- _resetConnection();
- log() << errmsg << endl;
- return false;
- }
- }
- catch (const DBException& e) {
- error() << "Error connecting to " << host.toString() << ": " << e.what();
+bool SyncSourceFeedback::_connect(OperationContext* txn, const HostAndPort& host) {
+ if (hasConnection()) {
+ return true;
+ }
+ log() << "setting syncSourceFeedback to " << host.toString();
+ _connection.reset(new DBClientConnection(false, OplogReader::tcp_timeout));
+ string errmsg;
+ try {
+ if (!_connection->connect(host, errmsg) ||
+ (getGlobalAuthorizationManager()->isAuthEnabled() && !replAuthenticate())) {
_resetConnection();
+ log() << errmsg << endl;
return false;
}
-
- return hasConnection();
+ } catch (const DBException& e) {
+ error() << "Error connecting to " << host.toString() << ": " << e.what();
+ _resetConnection();
+ return false;
}
- void SyncSourceFeedback::forwardSlaveProgress() {
- stdx::unique_lock<stdx::mutex> lock(_mtx);
- _positionChanged = true;
- _cond.notify_all();
- }
+ return hasConnection();
+}
+
+void SyncSourceFeedback::forwardSlaveProgress() {
+ stdx::unique_lock<stdx::mutex> lock(_mtx);
+ _positionChanged = true;
+ _cond.notify_all();
+}
- Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (replCoord->getMemberState().primary()) {
- // primary has no one to update to
+Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (replCoord->getMemberState().primary()) {
+ // primary has no one to update to
+ return Status::OK();
+ }
+ BSONObjBuilder cmd;
+ {
+ stdx::unique_lock<stdx::mutex> lock(_mtx);
+ // the command could not be created, likely because the node was removed from the set
+ if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) {
return Status::OK();
}
- BSONObjBuilder cmd;
- {
- stdx::unique_lock<stdx::mutex> lock(_mtx);
- // the command could not be created, likely because the node was removed from the set
- if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) {
- return Status::OK();
- }
- }
- BSONObj res;
+ }
+ BSONObj res;
+
+ LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
+ try {
+ _connection->runCommand("admin", cmd.obj(), res);
+ } catch (const DBException& e) {
+ log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
+ // blacklist sync target for .5 seconds and find a new one
+ replCoord->blacklistSyncSource(_syncTarget, Date_t::now() + Milliseconds(500));
+ BackgroundSync::get()->clearSyncTarget();
+ _resetConnection();
+ return e.toStatus();
+ }
- LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
- try {
- _connection->runCommand("admin", cmd.obj(), res);
- }
- catch (const DBException& e) {
- log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
- // blacklist sync target for .5 seconds and find a new one
+ Status status = Command::getStatusFromCommandResult(res);
+ if (!status.isOK()) {
+ log() << "SyncSourceFeedback error sending update, response: " << res.toString() << endl;
+ // blacklist sync target for .5 seconds and find a new one, unless we were rejected due
+ // to the syncsource having a newer config
+ if (status != ErrorCodes::InvalidReplicaSetConfig || res["cfgver"].eoo() ||
+ res["cfgver"].numberLong() < replCoord->getConfig().getConfigVersion()) {
replCoord->blacklistSyncSource(_syncTarget, Date_t::now() + Milliseconds(500));
BackgroundSync::get()->clearSyncTarget();
_resetConnection();
- return e.toStatus();
- }
-
- Status status = Command::getStatusFromCommandResult(res);
- if (!status.isOK()) {
- log() << "SyncSourceFeedback error sending update, response: " << res.toString() <<endl;
- // blacklist sync target for .5 seconds and find a new one, unless we were rejected due
- // to the syncsource having a newer config
- if (status != ErrorCodes::InvalidReplicaSetConfig || res["cfgver"].eoo() ||
- res["cfgver"].numberLong() < replCoord->getConfig().getConfigVersion()) {
- replCoord->blacklistSyncSource(_syncTarget, Date_t::now() + Milliseconds(500));
- BackgroundSync::get()->clearSyncTarget();
- _resetConnection();
- }
}
-
- return status;
}
- void SyncSourceFeedback::shutdown() {
- stdx::unique_lock<stdx::mutex> lock(_mtx);
- _shutdownSignaled = true;
- _cond.notify_all();
- }
+ return status;
+}
- void SyncSourceFeedback::run() {
- Client::initThread("SyncSourceFeedback");
+void SyncSourceFeedback::shutdown() {
+ stdx::unique_lock<stdx::mutex> lock(_mtx);
+ _shutdownSignaled = true;
+ _cond.notify_all();
+}
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- while (true) { // breaks once _shutdownSignaled is true
- {
- stdx::unique_lock<stdx::mutex> lock(_mtx);
- while (!_positionChanged && !_shutdownSignaled) {
- _cond.wait(lock);
- }
+void SyncSourceFeedback::run() {
+ Client::initThread("SyncSourceFeedback");
- if (_shutdownSignaled) {
- break;
- }
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ while (true) { // breaks once _shutdownSignaled is true
+ {
+ stdx::unique_lock<stdx::mutex> lock(_mtx);
+ while (!_positionChanged && !_shutdownSignaled) {
+ _cond.wait(lock);
+ }
- _positionChanged = false;
+ if (_shutdownSignaled) {
+ break;
}
- auto txn = cc().makeOperationContext();
- MemberState state = replCoord->getMemberState();
- if (state.primary() || state.startup()) {
- _resetConnection();
+ _positionChanged = false;
+ }
+
+ auto txn = cc().makeOperationContext();
+ MemberState state = replCoord->getMemberState();
+ if (state.primary() || state.startup()) {
+ _resetConnection();
+ continue;
+ }
+ const HostAndPort target = BackgroundSync::get()->getSyncTarget();
+ if (_syncTarget != target) {
+ _resetConnection();
+ _syncTarget = target;
+ }
+ if (!hasConnection()) {
+ // fix connection if need be
+ if (target.empty()) {
+ sleepmillis(500);
+ stdx::unique_lock<stdx::mutex> lock(_mtx);
+ _positionChanged = true;
continue;
}
- const HostAndPort target = BackgroundSync::get()->getSyncTarget();
- if (_syncTarget != target) {
- _resetConnection();
- _syncTarget = target;
- }
- if (!hasConnection()) {
- // fix connection if need be
- if (target.empty()) {
- sleepmillis(500);
- stdx::unique_lock<stdx::mutex> lock(_mtx);
- _positionChanged = true;
- continue;
- }
- if (!_connect(txn.get(), target)) {
- sleepmillis(500);
- stdx::unique_lock<stdx::mutex> lock(_mtx);
- _positionChanged = true;
- continue;
- }
- }
- Status status = updateUpstream(txn.get());
- if (!status.isOK()) {
+ if (!_connect(txn.get(), target)) {
sleepmillis(500);
stdx::unique_lock<stdx::mutex> lock(_mtx);
_positionChanged = true;
+ continue;
}
}
+ Status status = updateUpstream(txn.get());
+ if (!status.isOK()) {
+ sleepmillis(500);
+ stdx::unique_lock<stdx::mutex> lock(_mtx);
+ _positionChanged = true;
+ }
}
-} // namespace repl
-} // namespace mongo
+}
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/sync_source_feedback.h b/src/mongo/db/repl/sync_source_feedback.h
index ebbd55b1dbf..8e08fcf25b0 100644
--- a/src/mongo/db/repl/sync_source_feedback.h
+++ b/src/mongo/db/repl/sync_source_feedback.h
@@ -36,59 +36,59 @@
#include "mongo/util/net/hostandport.h"
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace repl {
- class SyncSourceFeedback {
- public:
- SyncSourceFeedback();
- ~SyncSourceFeedback();
+class SyncSourceFeedback {
+public:
+ SyncSourceFeedback();
+ ~SyncSourceFeedback();
- /// Notifies the SyncSourceFeedbackThread to wake up and send an update upstream of slave
- /// replication progress.
- void forwardSlaveProgress();
+ /// Notifies the SyncSourceFeedbackThread to wake up and send an update upstream of slave
+ /// replication progress.
+ void forwardSlaveProgress();
- /// Loops continuously until shutdown() is called, passing updates when they are present.
- void run();
+ /// Loops continuously until shutdown() is called, passing updates when they are present.
+ void run();
- /// Signals the run() method to terminate.
- void shutdown();
+ /// Signals the run() method to terminate.
+ void shutdown();
- private:
- void _resetConnection();
+private:
+ void _resetConnection();
- /**
- * Authenticates _connection using the server's cluster-membership credentials.
- *
- * Returns true on successful authentication.
- */
- bool replAuthenticate();
+ /**
+ * Authenticates _connection using the server's cluster-membership credentials.
+ *
+ * Returns true on successful authentication.
+ */
+ bool replAuthenticate();
- /* Inform the sync target of our current position in the oplog, as well as the positions
- * of all secondaries chained through us.
- */
- Status updateUpstream(OperationContext* txn);
+ /* Inform the sync target of our current position in the oplog, as well as the positions
+ * of all secondaries chained through us.
+ */
+ Status updateUpstream(OperationContext* txn);
- bool hasConnection() {
- return _connection.get();
- }
+ bool hasConnection() {
+ return _connection.get();
+ }
- /// Connect to sync target.
- bool _connect(OperationContext* txn, const HostAndPort& host);
+ /// Connect to sync target.
+ bool _connect(OperationContext* txn, const HostAndPort& host);
- // the member we are currently syncing from
- HostAndPort _syncTarget;
- // our connection to our sync target
- std::unique_ptr<DBClientConnection> _connection;
- // protects cond, _shutdownSignaled, and _positionChanged.
- stdx::mutex _mtx;
- // used to alert our thread of changes which need to be passed up the chain
- stdx::condition_variable _cond;
- // used to indicate a position change which has not yet been pushed along
- bool _positionChanged;
- // Once this is set to true the _run method will terminate
- bool _shutdownSignaled;
- };
-} // namespace repl
-} // namespace mongo
+ // the member we are currently syncing from
+ HostAndPort _syncTarget;
+ // our connection to our sync target
+ std::unique_ptr<DBClientConnection> _connection;
+ // protects cond, _shutdownSignaled, and _positionChanged.
+ stdx::mutex _mtx;
+ // used to alert our thread of changes which need to be passed up the chain
+ stdx::condition_variable _cond;
+ // used to indicate a position change which has not yet been pushed along
+ bool _positionChanged;
+ // Once this is set to true the _run method will terminate
+ bool _shutdownSignaled;
+};
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index 2bfe05728fe..2a1e4463139 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -66,260 +66,247 @@
namespace mongo {
- using std::endl;
+using std::endl;
namespace repl {
#if defined(MONGO_PLATFORM_64)
- const int replWriterThreadCount = 16;
- const int replPrefetcherThreadCount = 16;
+const int replWriterThreadCount = 16;
+const int replPrefetcherThreadCount = 16;
#elif defined(MONGO_PLATFORM_32)
- const int replWriterThreadCount = 2;
- const int replPrefetcherThreadCount = 2;
+const int replWriterThreadCount = 2;
+const int replPrefetcherThreadCount = 2;
#else
#error need to include something that defines MONGO_PLATFORM_XX
#endif
- static Counter64 opsAppliedStats;
+static Counter64 opsAppliedStats;
- //The oplog entries applied
- static ServerStatusMetricField<Counter64> displayOpsApplied( "repl.apply.ops",
- &opsAppliedStats );
+// The oplog entries applied
+static ServerStatusMetricField<Counter64> displayOpsApplied("repl.apply.ops", &opsAppliedStats);
- MONGO_FP_DECLARE(rsSyncApplyStop);
+MONGO_FP_DECLARE(rsSyncApplyStop);
- // Number and time of each ApplyOps worker pool round
- static TimerStats applyBatchStats;
- static ServerStatusMetricField<TimerStats> displayOpBatchesApplied(
- "repl.apply.batches",
- &applyBatchStats );
- void initializePrefetchThread() {
- if (!ClientBasic::getCurrent()) {
- Client::initThreadIfNotAlready();
- AuthorizationSession::get(cc())->grantInternalAuthorization();
- }
+// Number and time of each ApplyOps worker pool round
+static TimerStats applyBatchStats;
+static ServerStatusMetricField<TimerStats> displayOpBatchesApplied("repl.apply.batches",
+ &applyBatchStats);
+void initializePrefetchThread() {
+ if (!ClientBasic::getCurrent()) {
+ Client::initThreadIfNotAlready();
+ AuthorizationSession::get(cc())->grantInternalAuthorization();
}
- namespace {
- bool isCrudOpType( const char* field ) {
- switch ( field[0] ) {
- case 'd':
- case 'i':
- case 'u':
- return field[1] == 0;
- }
- return false;
- }
+}
+namespace {
+bool isCrudOpType(const char* field) {
+ switch (field[0]) {
+ case 'd':
+ case 'i':
+ case 'u':
+ return field[1] == 0;
}
+ return false;
+}
+}
- SyncTail::SyncTail(BackgroundSyncInterface *q, MultiSyncApplyFunc func) :
- _networkQueue(q),
- _applyFunc(func),
- _writerPool(replWriterThreadCount, "repl writer worker "),
- _prefetcherPool(replPrefetcherThreadCount, "repl prefetch worker ")
- {}
-
- SyncTail::~SyncTail() {}
-
- bool SyncTail::peek(BSONObj* op) {
- return _networkQueue->peek(op);
- }
+SyncTail::SyncTail(BackgroundSyncInterface* q, MultiSyncApplyFunc func)
+ : _networkQueue(q),
+ _applyFunc(func),
+ _writerPool(replWriterThreadCount, "repl writer worker "),
+ _prefetcherPool(replPrefetcherThreadCount, "repl prefetch worker ") {}
- // static
- Status SyncTail::syncApply(OperationContext* txn,
- const BSONObj &op,
- bool convertUpdateToUpsert,
- ApplyOperationInLockFn applyOperationInLock,
- ApplyCommandInLockFn applyCommandInLock,
- IncrementOpsAppliedStatsFn incrementOpsAppliedStats) {
+SyncTail::~SyncTail() {}
- if (inShutdown()) {
- return Status::OK();
- }
+bool SyncTail::peek(BSONObj* op) {
+ return _networkQueue->peek(op);
+}
- // Count each log op application as a separate operation, for reporting purposes
- CurOp individualOp(txn);
+// static
+Status SyncTail::syncApply(OperationContext* txn,
+ const BSONObj& op,
+ bool convertUpdateToUpsert,
+ ApplyOperationInLockFn applyOperationInLock,
+ ApplyCommandInLockFn applyCommandInLock,
+ IncrementOpsAppliedStatsFn incrementOpsAppliedStats) {
+ if (inShutdown()) {
+ return Status::OK();
+ }
- const char *ns = op.getStringField("ns");
- verify(ns);
+ // Count each log op application as a separate operation, for reporting purposes
+ CurOp individualOp(txn);
- const char* opType = op["op"].valuestrsafe();
+ const char* ns = op.getStringField("ns");
+ verify(ns);
- bool isCommand(opType[0] == 'c');
- bool isNoOp(opType[0] == 'n');
+ const char* opType = op["op"].valuestrsafe();
- if ( (*ns == '\0') || (*ns == '.') ) {
- // this is ugly
- // this is often a no-op
- // but can't be 100% sure
- if (!isNoOp) {
- error() << "skipping bad op in oplog: " << op.toString();
- }
- return Status::OK();
- }
+ bool isCommand(opType[0] == 'c');
+ bool isNoOp(opType[0] == 'n');
- if (isCommand) {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- // a command may need a global write lock. so we will conservatively go
- // ahead and grab one here. suboptimal. :-(
- Lock::GlobalWrite globalWriteLock(txn->lockState());
-
- // special case apply for commands to avoid implicit database creation
- Status status = applyCommandInLock(txn, op);
- incrementOpsAppliedStats();
- return status;
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "syncApply_command", ns);
+ if ((*ns == '\0') || (*ns == '.')) {
+ // this is ugly
+ // this is often a no-op
+ // but can't be 100% sure
+ if (!isNoOp) {
+ error() << "skipping bad op in oplog: " << op.toString();
}
+ return Status::OK();
+ }
- auto applyOp = [&](Database* db) {
- // For non-initial-sync, we convert updates to upserts
- // to suppress errors when replaying oplog entries.
- txn->setReplicatedWrites(false);
- DisableDocumentValidation validationDisabler(txn);
+ if (isCommand) {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ // a command may need a global write lock. so we will conservatively go
+ // ahead and grab one here. suboptimal. :-(
+ Lock::GlobalWrite globalWriteLock(txn->lockState());
- Status status = applyOperationInLock(txn, db, op, convertUpdateToUpsert);
+ // special case apply for commands to avoid implicit database creation
+ Status status = applyCommandInLock(txn, op);
incrementOpsAppliedStats();
return status;
- };
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "syncApply_command", ns);
+ }
+
+ auto applyOp = [&](Database* db) {
+ // For non-initial-sync, we convert updates to upserts
+ // to suppress errors when replaying oplog entries.
+ txn->setReplicatedWrites(false);
+ DisableDocumentValidation validationDisabler(txn);
+
+ Status status = applyOperationInLock(txn, db, op, convertUpdateToUpsert);
+ incrementOpsAppliedStats();
+ return status;
+ };
- if (isNoOp ||
- (opType[0] == 'i' && nsToCollectionSubstring( ns ) == "system.indexes")) {
- auto opStr = isNoOp ? "syncApply_noop" : "syncApply_indexBuild";
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- Lock::DBLock dbLock(txn->lockState(), nsToDatabaseSubstring(ns), MODE_X);
- OldClientContext ctx(txn, ns);
- return applyOp(ctx.db());
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, opStr, ns);
+ if (isNoOp || (opType[0] == 'i' && nsToCollectionSubstring(ns) == "system.indexes")) {
+ auto opStr = isNoOp ? "syncApply_noop" : "syncApply_indexBuild";
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ Lock::DBLock dbLock(txn->lockState(), nsToDatabaseSubstring(ns), MODE_X);
+ OldClientContext ctx(txn, ns);
+ return applyOp(ctx.db());
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, opStr, ns);
+ }
- if (isCrudOpType(opType)) {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- // DB lock always acquires the global lock
- std::unique_ptr<Lock::DBLock> dbLock;
- std::unique_ptr<Lock::CollectionLock> collectionLock;
- std::unique_ptr<OldClientContext> ctx;
+ if (isCrudOpType(opType)) {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ // DB lock always acquires the global lock
+ std::unique_ptr<Lock::DBLock> dbLock;
+ std::unique_ptr<Lock::CollectionLock> collectionLock;
+ std::unique_ptr<OldClientContext> ctx;
- auto dbName = nsToDatabaseSubstring(ns);
+ auto dbName = nsToDatabaseSubstring(ns);
- auto resetLocks = [&](LockMode mode) {
- collectionLock.reset();
- dbLock.reset(new Lock::DBLock(txn->lockState(), dbName, mode));
- collectionLock.reset(new Lock::CollectionLock(txn->lockState(), ns, mode));
- };
+ auto resetLocks = [&](LockMode mode) {
+ collectionLock.reset();
+ dbLock.reset(new Lock::DBLock(txn->lockState(), dbName, mode));
+ collectionLock.reset(new Lock::CollectionLock(txn->lockState(), ns, mode));
+ };
- resetLocks(MODE_IX);
- if (!dbHolder().get(txn, dbName)) {
- // need to create database, try again
+ resetLocks(MODE_IX);
+ if (!dbHolder().get(txn, dbName)) {
+ // need to create database, try again
+ resetLocks(MODE_X);
+ ctx.reset(new OldClientContext(txn, ns));
+ } else {
+ ctx.reset(new OldClientContext(txn, ns));
+ if (!ctx->db()->getCollection(ns)) {
+ // uh, oh, we need to create collection
+ // try again
+ ctx.reset();
resetLocks(MODE_X);
ctx.reset(new OldClientContext(txn, ns));
}
- else {
- ctx.reset(new OldClientContext(txn, ns));
- if (!ctx->db()->getCollection(ns)) {
- // uh, oh, we need to create collection
- // try again
- ctx.reset();
- resetLocks(MODE_X);
- ctx.reset(new OldClientContext(txn, ns));
- }
- }
+ }
- return applyOp(ctx->db());
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "syncApply_CRUD", ns);
+ return applyOp(ctx->db());
}
-
- // unknown opType
- str::stream ss;
- ss << "bad opType '" << opType << "' in oplog entry: " << op.toString();
- error() << std::string(ss);
- return Status(ErrorCodes::BadValue, ss);
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "syncApply_CRUD", ns);
}
- Status SyncTail::syncApply(OperationContext* txn,
- const BSONObj &op,
- bool convertUpdateToUpsert) {
- return syncApply(txn,
- op,
- convertUpdateToUpsert,
- applyOperation_inlock,
- applyCommand_inlock,
- stdx::bind(&Counter64::increment, &opsAppliedStats, 1ULL));
- }
+ // unknown opType
+ str::stream ss;
+ ss << "bad opType '" << opType << "' in oplog entry: " << op.toString();
+ error() << std::string(ss);
+ return Status(ErrorCodes::BadValue, ss);
+}
+
+Status SyncTail::syncApply(OperationContext* txn, const BSONObj& op, bool convertUpdateToUpsert) {
+ return syncApply(txn,
+ op,
+ convertUpdateToUpsert,
+ applyOperation_inlock,
+ applyCommand_inlock,
+ stdx::bind(&Counter64::increment, &opsAppliedStats, 1ULL));
+}
namespace {
- // The pool threads call this to prefetch each op
- void prefetchOp(const BSONObj& op) {
- initializePrefetchThread();
-
- const char *ns = op.getStringField("ns");
- if (ns && (ns[0] != '\0')) {
- try {
- // one possible tweak here would be to stay in the read lock for this database
- // for multiple prefetches if they are for the same database.
- OperationContextImpl txn;
- AutoGetCollectionForRead ctx(&txn, ns);
- Database* db = ctx.getDb();
- if (db) {
- prefetchPagesForReplicatedOp(&txn, db, op);
- }
- }
- catch (const DBException& e) {
- LOG(2) << "ignoring exception in prefetchOp(): " << e.what() << endl;
- }
- catch (const std::exception& e) {
- log() << "Unhandled std::exception in prefetchOp(): " << e.what() << endl;
- fassertFailed(16397);
+// The pool threads call this to prefetch each op
+void prefetchOp(const BSONObj& op) {
+ initializePrefetchThread();
+
+ const char* ns = op.getStringField("ns");
+ if (ns && (ns[0] != '\0')) {
+ try {
+ // one possible tweak here would be to stay in the read lock for this database
+ // for multiple prefetches if they are for the same database.
+ OperationContextImpl txn;
+ AutoGetCollectionForRead ctx(&txn, ns);
+ Database* db = ctx.getDb();
+ if (db) {
+ prefetchPagesForReplicatedOp(&txn, db, op);
}
+ } catch (const DBException& e) {
+ LOG(2) << "ignoring exception in prefetchOp(): " << e.what() << endl;
+ } catch (const std::exception& e) {
+ log() << "Unhandled std::exception in prefetchOp(): " << e.what() << endl;
+ fassertFailed(16397);
}
}
+}
- // Doles out all the work to the reader pool threads and waits for them to complete
- void prefetchOps(const std::deque<BSONObj>& ops,
- OldThreadPool* prefetcherPool) {
- invariant(prefetcherPool);
- for (std::deque<BSONObj>::const_iterator it = ops.begin();
- it != ops.end();
- ++it) {
- prefetcherPool->schedule(&prefetchOp, *it);
- }
- prefetcherPool->join();
+// Doles out all the work to the reader pool threads and waits for them to complete
+void prefetchOps(const std::deque<BSONObj>& ops, OldThreadPool* prefetcherPool) {
+ invariant(prefetcherPool);
+ for (std::deque<BSONObj>::const_iterator it = ops.begin(); it != ops.end(); ++it) {
+ prefetcherPool->schedule(&prefetchOp, *it);
}
+ prefetcherPool->join();
+}
- // Doles out all the work to the writer pool threads and waits for them to complete
- void applyOps(const std::vector< std::vector<BSONObj> >& writerVectors,
- OldThreadPool* writerPool,
- SyncTail::MultiSyncApplyFunc func,
- SyncTail* sync) {
- TimerHolder timer(&applyBatchStats);
- for (std::vector< std::vector<BSONObj> >::const_iterator it = writerVectors.begin();
- it != writerVectors.end();
- ++it) {
- if (!it->empty()) {
- writerPool->schedule(func, boost::cref(*it), sync);
- }
+// Doles out all the work to the writer pool threads and waits for them to complete
+void applyOps(const std::vector<std::vector<BSONObj>>& writerVectors,
+ OldThreadPool* writerPool,
+ SyncTail::MultiSyncApplyFunc func,
+ SyncTail* sync) {
+ TimerHolder timer(&applyBatchStats);
+ for (std::vector<std::vector<BSONObj>>::const_iterator it = writerVectors.begin();
+ it != writerVectors.end();
+ ++it) {
+ if (!it->empty()) {
+ writerPool->schedule(func, boost::cref(*it), sync);
}
- writerPool->join();
}
+ writerPool->join();
+}
- void fillWriterVectors(const std::deque<BSONObj>& ops,
- std::vector< std::vector<BSONObj> >* writerVectors) {
-
- for (std::deque<BSONObj>::const_iterator it = ops.begin();
- it != ops.end();
- ++it) {
- const BSONElement e = it->getField("ns");
- verify(e.type() == String);
- const char* ns = e.valuestr();
- int len = e.valuestrsize();
- uint32_t hash = 0;
- MurmurHash3_x86_32( ns, len, 0, &hash);
-
- const char* opType = it->getField( "op" ).valuestrsafe();
-
- if (getGlobalServiceContext()->getGlobalStorageEngine()->supportsDocLocking() &&
- isCrudOpType(opType)) {
- BSONElement id;
- switch (opType[0]) {
+void fillWriterVectors(const std::deque<BSONObj>& ops,
+ std::vector<std::vector<BSONObj>>* writerVectors) {
+ for (std::deque<BSONObj>::const_iterator it = ops.begin(); it != ops.end(); ++it) {
+ const BSONElement e = it->getField("ns");
+ verify(e.type() == String);
+ const char* ns = e.valuestr();
+ int len = e.valuestrsize();
+ uint32_t hash = 0;
+ MurmurHash3_x86_32(ns, len, 0, &hash);
+
+ const char* opType = it->getField("op").valuestrsafe();
+
+ if (getGlobalServiceContext()->getGlobalStorageEngine()->supportsDocLocking() &&
+ isCrudOpType(opType)) {
+ BSONElement id;
+ switch (opType[0]) {
case 'u':
id = it->getField("o2").Obj()["_id"];
break;
@@ -327,571 +314,554 @@ namespace {
case 'i':
id = it->getField("o").Obj()["_id"];
break;
- }
-
- const size_t idHash = BSONElement::Hasher()( id );
- MurmurHash3_x86_32(&idHash, sizeof(idHash), hash, &hash);
}
- (*writerVectors)[hash % writerVectors->size()].push_back(*it);
- }
- }
-
-} // namespace
-
- // Doles out all the work to the writer pool threads and waits for them to complete
- // static
- OpTime SyncTail::multiApply(OperationContext* txn,
- const OpQueue& ops,
- OldThreadPool* prefetcherPool,
- OldThreadPool* writerPool,
- MultiSyncApplyFunc func,
- SyncTail* sync,
- bool supportsWaitingUntilDurable) {
- invariant(prefetcherPool);
- invariant(writerPool);
- invariant(func);
- invariant(sync);
-
- if (getGlobalServiceContext()->getGlobalStorageEngine()->isMmapV1()) {
- // Use a ThreadPool to prefetch all the operations in a batch.
- prefetchOps(ops.getDeque(), prefetcherPool);
+ const size_t idHash = BSONElement::Hasher()(id);
+ MurmurHash3_x86_32(&idHash, sizeof(idHash), hash, &hash);
}
-
- std::vector< std::vector<BSONObj> > writerVectors(replWriterThreadCount);
-
- fillWriterVectors(ops.getDeque(), &writerVectors);
- LOG(2) << "replication batch size is " << ops.getDeque().size() << endl;
- // We must grab this because we're going to grab write locks later.
- // We hold this mutex the entire time we're writing; it doesn't matter
- // because all readers are blocked anyway.
- stdx::lock_guard<SimpleMutex> fsynclk(filesLockedFsync);
- // stop all readers until we're done
- Lock::ParallelBatchWriterMode pbwm(txn->lockState());
-
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (replCoord->getMemberState().primary() &&
- !replCoord->isWaitingForApplierToDrain()) {
+ (*writerVectors)[hash % writerVectors->size()].push_back(*it);
+ }
+}
- severe() << "attempting to replicate ops while primary";
- fassertFailed(28527);
- }
+} // namespace
- applyOps(writerVectors, writerPool, func, sync);
+// Doles out all the work to the writer pool threads and waits for them to complete
+// static
+OpTime SyncTail::multiApply(OperationContext* txn,
+ const OpQueue& ops,
+ OldThreadPool* prefetcherPool,
+ OldThreadPool* writerPool,
+ MultiSyncApplyFunc func,
+ SyncTail* sync,
+ bool supportsWaitingUntilDurable) {
+ invariant(prefetcherPool);
+ invariant(writerPool);
+ invariant(func);
+ invariant(sync);
+
+ if (getGlobalServiceContext()->getGlobalStorageEngine()->isMmapV1()) {
+ // Use a ThreadPool to prefetch all the operations in a batch.
+ prefetchOps(ops.getDeque(), prefetcherPool);
+ }
- if (inShutdown()) {
- return OpTime();
- }
+ std::vector<std::vector<BSONObj>> writerVectors(replWriterThreadCount);
- const bool mustWaitUntilDurable = replCoord->isV1ElectionProtocol() &&
- supportsWaitingUntilDurable;
- if (mustWaitUntilDurable) {
- txn->recoveryUnit()->goingToWaitUntilDurable();
- }
+ fillWriterVectors(ops.getDeque(), &writerVectors);
+ LOG(2) << "replication batch size is " << ops.getDeque().size() << endl;
+ // We must grab this because we're going to grab write locks later.
+ // We hold this mutex the entire time we're writing; it doesn't matter
+ // because all readers are blocked anyway.
+ stdx::lock_guard<SimpleMutex> fsynclk(filesLockedFsync);
- OpTime lastOpTime = writeOpsToOplog(txn, ops.getDeque());
+ // stop all readers until we're done
+ Lock::ParallelBatchWriterMode pbwm(txn->lockState());
- if (mustWaitUntilDurable) {
- txn->recoveryUnit()->waitUntilDurable();
- }
- ReplClientInfo::forClient(txn->getClient()).setLastOp(lastOpTime);
- replCoord->setMyLastOptime(lastOpTime);
- setNewTimestamp(lastOpTime.getTimestamp());
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (replCoord->getMemberState().primary() && !replCoord->isWaitingForApplierToDrain()) {
+ severe() << "attempting to replicate ops while primary";
+ fassertFailed(28527);
+ }
- BackgroundSync::get()->notify(txn);
+ applyOps(writerVectors, writerPool, func, sync);
- return lastOpTime;
+ if (inShutdown()) {
+ return OpTime();
}
- void SyncTail::oplogApplication(OperationContext* txn, const OpTime& endOpTime) {
- _applyOplogUntil(txn, endOpTime);
+ const bool mustWaitUntilDurable =
+ replCoord->isV1ElectionProtocol() && supportsWaitingUntilDurable;
+ if (mustWaitUntilDurable) {
+ txn->recoveryUnit()->goingToWaitUntilDurable();
}
- /* applies oplog from "now" until endOpTime using the applier threads for initial sync*/
- void SyncTail::_applyOplogUntil(OperationContext* txn, const OpTime& endOpTime) {
- unsigned long long bytesApplied = 0;
- unsigned long long entriesApplied = 0;
- while (true) {
- OpQueue ops;
-
- while (!tryPopAndWaitForMore(txn, &ops, getGlobalReplicationCoordinator())) {
- // nothing came back last time, so go again
- if (ops.empty()) continue;
+ OpTime lastOpTime = writeOpsToOplog(txn, ops.getDeque());
- // Check if we reached the end
- const BSONObj currentOp = ops.back();
- const OpTime currentOpTime = extractOpTime(currentOp);
+ if (mustWaitUntilDurable) {
+ txn->recoveryUnit()->waitUntilDurable();
+ }
+ ReplClientInfo::forClient(txn->getClient()).setLastOp(lastOpTime);
+ replCoord->setMyLastOptime(lastOpTime);
+ setNewTimestamp(lastOpTime.getTimestamp());
- // When we reach the end return this batch
- if (currentOpTime == endOpTime) {
- break;
- }
- else if (currentOpTime > endOpTime) {
- severe() << "Applied past expected end " << endOpTime << " to " << currentOpTime
- << " without seeing it. Rollback?";
- fassertFailedNoTrace(18693);
- }
+ BackgroundSync::get()->notify(txn);
- // apply replication batch limits
- if (ops.getSize() > replBatchLimitBytes)
- break;
- if (ops.getDeque().size() > replBatchLimitOperations)
- break;
- };
+ return lastOpTime;
+}
- if (ops.empty()) {
- severe() << "got no ops for batch...";
- fassertFailedNoTrace(18692);
- }
+void SyncTail::oplogApplication(OperationContext* txn, const OpTime& endOpTime) {
+ _applyOplogUntil(txn, endOpTime);
+}
- const BSONObj lastOp = ops.back().getOwned();
+/* applies oplog from "now" until endOpTime using the applier threads for initial sync*/
+void SyncTail::_applyOplogUntil(OperationContext* txn, const OpTime& endOpTime) {
+ unsigned long long bytesApplied = 0;
+ unsigned long long entriesApplied = 0;
+ while (true) {
+ OpQueue ops;
- // Tally operation information
- bytesApplied += ops.getSize();
- entriesApplied += ops.getDeque().size();
+ while (!tryPopAndWaitForMore(txn, &ops, getGlobalReplicationCoordinator())) {
+ // nothing came back last time, so go again
+ if (ops.empty())
+ continue;
- const OpTime lastOpTime = multiApply(txn,
- ops,
- &_prefetcherPool,
- &_writerPool,
- _applyFunc,
- this,
- supportsWaitingUntilDurable());
- if (inShutdown()) {
- return;
+ // Check if we reached the end
+ const BSONObj currentOp = ops.back();
+ const OpTime currentOpTime = extractOpTime(currentOp);
+
+ // When we reach the end return this batch
+ if (currentOpTime == endOpTime) {
+ break;
+ } else if (currentOpTime > endOpTime) {
+ severe() << "Applied past expected end " << endOpTime << " to " << currentOpTime
+ << " without seeing it. Rollback?";
+ fassertFailedNoTrace(18693);
}
- // if the last op applied was our end, return
- if (lastOpTime == endOpTime) {
- LOG(1) << "SyncTail applied " << entriesApplied
- << " entries (" << bytesApplied << " bytes)"
- << " and finished at opTime " << endOpTime;
- return;
- }
- } // end of while (true)
- }
+ // apply replication batch limits
+ if (ops.getSize() > replBatchLimitBytes)
+ break;
+ if (ops.getDeque().size() > replBatchLimitOperations)
+ break;
+ };
-namespace {
- void tryToGoLiveAsASecondary(OperationContext* txn, ReplicationCoordinator* replCoord) {
- if (replCoord->isInPrimaryOrSecondaryState()) {
- return;
+ if (ops.empty()) {
+ severe() << "got no ops for batch...";
+ fassertFailedNoTrace(18692);
}
- ScopedTransaction transaction(txn, MODE_S);
- Lock::GlobalRead readLock(txn->lockState());
+ const BSONObj lastOp = ops.back().getOwned();
- if (replCoord->getMaintenanceMode()) {
- // we're not actually going live
- return;
- }
+ // Tally operation information
+ bytesApplied += ops.getSize();
+ entriesApplied += ops.getDeque().size();
- // Only state RECOVERING can transition to SECONDARY.
- MemberState state(replCoord->getMemberState());
- if (!state.recovering()) {
+ const OpTime lastOpTime = multiApply(txn,
+ ops,
+ &_prefetcherPool,
+ &_writerPool,
+ _applyFunc,
+ this,
+ supportsWaitingUntilDurable());
+ if (inShutdown()) {
return;
}
- OpTime minvalid = getMinValid(txn);
- if (minvalid > replCoord->getMyLastOptime()) {
+ // if the last op applied was our end, return
+ if (lastOpTime == endOpTime) {
+ LOG(1) << "SyncTail applied " << entriesApplied << " entries (" << bytesApplied
+ << " bytes)"
+ << " and finished at opTime " << endOpTime;
return;
}
+ } // end of while (true)
+}
- bool worked = replCoord->setFollowerMode(MemberState::RS_SECONDARY);
- if (!worked) {
- warning() << "Failed to transition into " << MemberState(MemberState::RS_SECONDARY)
- << ". Current state: " << replCoord->getMemberState();
- }
+namespace {
+void tryToGoLiveAsASecondary(OperationContext* txn, ReplicationCoordinator* replCoord) {
+ if (replCoord->isInPrimaryOrSecondaryState()) {
+ return;
}
-}
- /* tail an oplog. ok to return, will be re-called. */
- void SyncTail::oplogApplication() {
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ ScopedTransaction transaction(txn, MODE_S);
+ Lock::GlobalRead readLock(txn->lockState());
- while(!inShutdown()) {
- OpQueue ops;
- OperationContextImpl txn;
+ if (replCoord->getMaintenanceMode()) {
+ // we're not actually going live
+ return;
+ }
- Timer batchTimer;
- int lastTimeChecked = 0;
+ // Only state RECOVERING can transition to SECONDARY.
+ MemberState state(replCoord->getMemberState());
+ if (!state.recovering()) {
+ return;
+ }
- do {
- int now = batchTimer.seconds();
+ OpTime minvalid = getMinValid(txn);
+ if (minvalid > replCoord->getMyLastOptime()) {
+ return;
+ }
- // apply replication batch limits
- if (!ops.empty()) {
- if (now > replBatchLimitSeconds)
- break;
- if (ops.getDeque().size() > replBatchLimitOperations)
- break;
- }
- // occasionally check some things
- // (always checked in the first iteration of this do-while loop, because
- // ops is empty)
- if (ops.empty() || now > lastTimeChecked) {
- BackgroundSync* bgsync = BackgroundSync::get();
- if (bgsync->getInitialSyncRequestedFlag()) {
- // got a resync command
- return;
- }
- lastTimeChecked = now;
- // can we become secondary?
- // we have to check this before calling mgr, as we must be a secondary to
- // become primary
- tryToGoLiveAsASecondary(&txn, replCoord);
- }
+ bool worked = replCoord->setFollowerMode(MemberState::RS_SECONDARY);
+ if (!worked) {
+ warning() << "Failed to transition into " << MemberState(MemberState::RS_SECONDARY)
+ << ". Current state: " << replCoord->getMemberState();
+ }
+}
+}
- const int slaveDelaySecs = replCoord->getSlaveDelaySecs().count();
- if (!ops.empty() && slaveDelaySecs > 0) {
- const BSONObj lastOp = ops.back();
- const unsigned int opTimestampSecs = lastOp["ts"].timestamp().getSecs();
+/* tail an oplog. ok to return, will be re-called. */
+void SyncTail::oplogApplication() {
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- // Stop the batch as the lastOp is too new to be applied. If we continue
- // on, we can get ops that are way ahead of the delay and this will
- // make this thread sleep longer when handleSlaveDelay is called
- // and apply ops much sooner than we like.
- if (opTimestampSecs > static_cast<unsigned int>(time(0) - slaveDelaySecs)) {
- break;
- }
- }
- // keep fetching more ops as long as we haven't filled up a full batch yet
- } while (!tryPopAndWaitForMore(&txn, &ops, replCoord) && // tryPopAndWaitForMore returns
- // true when we need to end a
- // batch early
- (ops.getSize() < replBatchLimitBytes) &&
- !inShutdown());
-
- // For pausing replication in tests
- while (MONGO_FAIL_POINT(rsSyncApplyStop)) {
- sleepmillis(0);
- }
+ while (!inShutdown()) {
+ OpQueue ops;
+ OperationContextImpl txn;
- if (ops.empty()) {
- continue;
- }
+ Timer batchTimer;
+ int lastTimeChecked = 0;
- const BSONObj lastOp = ops.back();
- handleSlaveDelay(lastOp);
-
- // Set minValid to the last op to be applied in this next batch.
- // This will cause this node to go into RECOVERING state
- // if we should crash and restart before updating the oplog
- setMinValid(&txn, extractOpTime(lastOp));
- multiApply(&txn,
- ops,
- &_prefetcherPool,
- &_writerPool,
- _applyFunc,
- this,
- supportsWaitingUntilDurable());
- }
- }
+ do {
+ int now = batchTimer.seconds();
- // Copies ops out of the bgsync queue into the deque passed in as a parameter.
- // Returns true if the batch should be ended early.
- // Batch should end early if we encounter a command, or if
- // there are no further ops in the bgsync queue to read.
- // This function also blocks 1 second waiting for new ops to appear in the bgsync
- // queue. We can't block forever because there are maintenance things we need
- // to periodically check in the loop.
- bool SyncTail::tryPopAndWaitForMore(OperationContext* txn,
- SyncTail::OpQueue* ops,
- ReplicationCoordinator* replCoord) {
- BSONObj op;
- // Check to see if there are ops waiting in the bgsync queue
- bool peek_success = peek(&op);
-
- if (!peek_success) {
- // if we don't have anything in the queue, wait a bit for something to appear
- if (ops->empty()) {
- if (replCoord->isWaitingForApplierToDrain()) {
- BackgroundSync::get()->waitUntilPaused();
- if (peek(&op)) {
- // The producer generated a last batch of ops before pausing so return
- // false so that we'll come back and apply them before signaling the drain
- // is complete.
- return false;
- }
- replCoord->signalDrainComplete(txn);
+ // apply replication batch limits
+ if (!ops.empty()) {
+ if (now > replBatchLimitSeconds)
+ break;
+ if (ops.getDeque().size() > replBatchLimitOperations)
+ break;
+ }
+ // occasionally check some things
+ // (always checked in the first iteration of this do-while loop, because
+ // ops is empty)
+ if (ops.empty() || now > lastTimeChecked) {
+ BackgroundSync* bgsync = BackgroundSync::get();
+ if (bgsync->getInitialSyncRequestedFlag()) {
+ // got a resync command
+ return;
}
- // block up to 1 second
- _networkQueue->waitForMore();
- return false;
+ lastTimeChecked = now;
+ // can we become secondary?
+ // we have to check this before calling mgr, as we must be a secondary to
+ // become primary
+ tryToGoLiveAsASecondary(&txn, replCoord);
}
- // otherwise, apply what we have
- return true;
- }
-
- const char* ns = op["ns"].valuestrsafe();
-
- // check for commands
- if ((op["op"].valuestrsafe()[0] == 'c') ||
- // Index builds are acheived through the use of an insert op, not a command op.
- // The following line is the same as what the insert code uses to detect an index build.
- ( *ns != '\0' && nsToCollectionSubstring(ns) == "system.indexes" )) {
+ const int slaveDelaySecs = replCoord->getSlaveDelaySecs().count();
+ if (!ops.empty() && slaveDelaySecs > 0) {
+ const BSONObj lastOp = ops.back();
+ const unsigned int opTimestampSecs = lastOp["ts"].timestamp().getSecs();
- if (ops->empty()) {
- // apply commands one-at-a-time
- ops->push_back(op);
- _networkQueue->consume();
+ // Stop the batch as the lastOp is too new to be applied. If we continue
+ // on, we can get ops that are way ahead of the delay and this will
+ // make this thread sleep longer when handleSlaveDelay is called
+ // and apply ops much sooner than we like.
+ if (opTimestampSecs > static_cast<unsigned int>(time(0) - slaveDelaySecs)) {
+ break;
+ }
}
-
- // otherwise, apply what we have so far and come back for the command
- return true;
+ // keep fetching more ops as long as we haven't filled up a full batch yet
+ } while (!tryPopAndWaitForMore(&txn, &ops, replCoord) && // tryPopAndWaitForMore returns
+ // true when we need to end a
+ // batch early
+ (ops.getSize() < replBatchLimitBytes) &&
+ !inShutdown());
+
+ // For pausing replication in tests
+ while (MONGO_FAIL_POINT(rsSyncApplyStop)) {
+ sleepmillis(0);
}
- // check for oplog version change
- BSONElement elemVersion = op["v"];
- int curVersion = 0;
- if (elemVersion.eoo())
- // missing version means version 1
- curVersion = 1;
- else
- curVersion = elemVersion.Int();
-
- if (curVersion != OPLOG_VERSION) {
- severe() << "expected oplog version " << OPLOG_VERSION << " but found version "
- << curVersion << " in oplog entry: " << op;
- fassertFailedNoTrace(18820);
+ if (ops.empty()) {
+ continue;
}
-
- // Copy the op to the deque and remove it from the bgsync queue.
- ops->push_back(op);
- _networkQueue->consume();
- // Go back for more ops
- return false;
+ const BSONObj lastOp = ops.back();
+ handleSlaveDelay(lastOp);
+
+ // Set minValid to the last op to be applied in this next batch.
+ // This will cause this node to go into RECOVERING state
+ // if we should crash and restart before updating the oplog
+ setMinValid(&txn, extractOpTime(lastOp));
+ multiApply(&txn,
+ ops,
+ &_prefetcherPool,
+ &_writerPool,
+ _applyFunc,
+ this,
+ supportsWaitingUntilDurable());
}
+}
- void SyncTail::handleSlaveDelay(const BSONObj& lastOp) {
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- int slaveDelaySecs = replCoord->getSlaveDelaySecs().count();
-
- // ignore slaveDelay if the box is still initializing. once
- // it becomes secondary we can worry about it.
- if( slaveDelaySecs > 0 && replCoord->getMemberState().secondary() ) {
- const Timestamp ts = lastOp["ts"].timestamp();
- long long a = ts.getSecs();
- long long b = time(0);
- long long lag = b - a;
- long long sleeptime = slaveDelaySecs - lag;
- if( sleeptime > 0 ) {
- uassert(12000, "rs slaveDelay differential too big check clocks and systems",
- sleeptime < 0x40000000);
- if( sleeptime < 60 ) {
- sleepsecs((int) sleeptime);
- }
- else {
- warning() << "slavedelay causing a long sleep of " << sleeptime
- << " seconds";
- // sleep(hours) would prevent reconfigs from taking effect & such!
- long long waitUntil = b + sleeptime;
- while(time(0) < waitUntil) {
- sleepsecs(6);
-
- // Handle reconfigs that changed the slave delay
- if (replCoord->getSlaveDelaySecs().count() != slaveDelaySecs)
- break;
- }
+// Copies ops out of the bgsync queue into the deque passed in as a parameter.
+// Returns true if the batch should be ended early.
+// Batch should end early if we encounter a command, or if
+// there are no further ops in the bgsync queue to read.
+// This function also blocks 1 second waiting for new ops to appear in the bgsync
+// queue. We can't block forever because there are maintenance things we need
+// to periodically check in the loop.
+bool SyncTail::tryPopAndWaitForMore(OperationContext* txn,
+ SyncTail::OpQueue* ops,
+ ReplicationCoordinator* replCoord) {
+ BSONObj op;
+ // Check to see if there are ops waiting in the bgsync queue
+ bool peek_success = peek(&op);
+
+ if (!peek_success) {
+ // if we don't have anything in the queue, wait a bit for something to appear
+ if (ops->empty()) {
+ if (replCoord->isWaitingForApplierToDrain()) {
+ BackgroundSync::get()->waitUntilPaused();
+ if (peek(&op)) {
+ // The producer generated a last batch of ops before pausing so return
+ // false so that we'll come back and apply them before signaling the drain
+ // is complete.
+ return false;
}
+ replCoord->signalDrainComplete(txn);
}
- } // endif slaveDelay
+ // block up to 1 second
+ _networkQueue->waitForMore();
+ return false;
+ }
+
+ // otherwise, apply what we have
+ return true;
}
- void SyncTail::setHostname(const std::string& hostname) {
- _hostname = hostname;
+ const char* ns = op["ns"].valuestrsafe();
+
+ // check for commands
+ if ((op["op"].valuestrsafe()[0] == 'c') ||
+ // Index builds are acheived through the use of an insert op, not a command op.
+ // The following line is the same as what the insert code uses to detect an index build.
+ (*ns != '\0' && nsToCollectionSubstring(ns) == "system.indexes")) {
+ if (ops->empty()) {
+ // apply commands one-at-a-time
+ ops->push_back(op);
+ _networkQueue->consume();
+ }
+
+ // otherwise, apply what we have so far and come back for the command
+ return true;
}
- BSONObj SyncTail::getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o) {
- OplogReader missingObjReader; // why are we using OplogReader to run a non-oplog query?
- const char *ns = o.getStringField("ns");
+ // check for oplog version change
+ BSONElement elemVersion = op["v"];
+ int curVersion = 0;
+ if (elemVersion.eoo())
+ // missing version means version 1
+ curVersion = 1;
+ else
+ curVersion = elemVersion.Int();
+
+ if (curVersion != OPLOG_VERSION) {
+ severe() << "expected oplog version " << OPLOG_VERSION << " but found version "
+ << curVersion << " in oplog entry: " << op;
+ fassertFailedNoTrace(18820);
+ }
- // capped collections
- Collection* collection = db->getCollection(ns);
- if ( collection && collection->isCapped() ) {
- log() << "missing doc, but this is okay for a capped collection (" << ns << ")";
- return BSONObj();
- }
+ // Copy the op to the deque and remove it from the bgsync queue.
+ ops->push_back(op);
+ _networkQueue->consume();
- const int retryMax = 3;
- for (int retryCount = 1; retryCount <= retryMax; ++retryCount) {
- if (retryCount != 1) {
- // if we are retrying, sleep a bit to let the network possibly recover
- sleepsecs(retryCount * retryCount);
- }
- try {
- bool ok = missingObjReader.connect(HostAndPort(_hostname));
- if (!ok) {
- warning() << "network problem detected while connecting to the "
- << "sync source, attempt " << retryCount << " of "
- << retryMax << endl;
- continue; // try again
+ // Go back for more ops
+ return false;
+}
+
+void SyncTail::handleSlaveDelay(const BSONObj& lastOp) {
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ int slaveDelaySecs = replCoord->getSlaveDelaySecs().count();
+
+ // ignore slaveDelay if the box is still initializing. once
+ // it becomes secondary we can worry about it.
+ if (slaveDelaySecs > 0 && replCoord->getMemberState().secondary()) {
+ const Timestamp ts = lastOp["ts"].timestamp();
+ long long a = ts.getSecs();
+ long long b = time(0);
+ long long lag = b - a;
+ long long sleeptime = slaveDelaySecs - lag;
+ if (sleeptime > 0) {
+ uassert(12000,
+ "rs slaveDelay differential too big check clocks and systems",
+ sleeptime < 0x40000000);
+ if (sleeptime < 60) {
+ sleepsecs((int)sleeptime);
+ } else {
+ warning() << "slavedelay causing a long sleep of " << sleeptime << " seconds";
+ // sleep(hours) would prevent reconfigs from taking effect & such!
+ long long waitUntil = b + sleeptime;
+ while (time(0) < waitUntil) {
+ sleepsecs(6);
+
+ // Handle reconfigs that changed the slave delay
+ if (replCoord->getSlaveDelaySecs().count() != slaveDelaySecs)
+ break;
}
}
- catch (const SocketException&) {
- warning() << "network problem detected while connecting to the "
- << "sync source, attempt " << retryCount << " of "
- << retryMax << endl;
- continue; // try again
- }
+ }
+ } // endif slaveDelay
+}
- // might be more than just _id in the update criteria
- BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj();
- BSONObj missingObj;
- try {
- missingObj = missingObjReader.findOne(ns, query);
- }
- catch (const SocketException&) {
- warning() << "network problem detected while fetching a missing document from the "
- << "sync source, attempt " << retryCount << " of "
- << retryMax << endl;
- continue; // try again
- }
- catch (DBException& e) {
- error() << "assertion fetching missing object: " << e.what() << endl;
- throw;
+void SyncTail::setHostname(const std::string& hostname) {
+ _hostname = hostname;
+}
+
+BSONObj SyncTail::getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o) {
+ OplogReader missingObjReader; // why are we using OplogReader to run a non-oplog query?
+ const char* ns = o.getStringField("ns");
+
+ // capped collections
+ Collection* collection = db->getCollection(ns);
+ if (collection && collection->isCapped()) {
+ log() << "missing doc, but this is okay for a capped collection (" << ns << ")";
+ return BSONObj();
+ }
+
+ const int retryMax = 3;
+ for (int retryCount = 1; retryCount <= retryMax; ++retryCount) {
+ if (retryCount != 1) {
+ // if we are retrying, sleep a bit to let the network possibly recover
+ sleepsecs(retryCount * retryCount);
+ }
+ try {
+ bool ok = missingObjReader.connect(HostAndPort(_hostname));
+ if (!ok) {
+ warning() << "network problem detected while connecting to the "
+ << "sync source, attempt " << retryCount << " of " << retryMax << endl;
+ continue; // try again
}
+ } catch (const SocketException&) {
+ warning() << "network problem detected while connecting to the "
+ << "sync source, attempt " << retryCount << " of " << retryMax << endl;
+ continue; // try again
+ }
- // success!
- return missingObj;
+ // might be more than just _id in the update criteria
+ BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj();
+ BSONObj missingObj;
+ try {
+ missingObj = missingObjReader.findOne(ns, query);
+ } catch (const SocketException&) {
+ warning() << "network problem detected while fetching a missing document from the "
+ << "sync source, attempt " << retryCount << " of " << retryMax << endl;
+ continue; // try again
+ } catch (DBException& e) {
+ error() << "assertion fetching missing object: " << e.what() << endl;
+ throw;
}
- // retry count exceeded
- msgasserted(15916, str::stream() <<
- "Can no longer connect to initial sync source: " << _hostname);
- }
- bool SyncTail::shouldRetry(OperationContext* txn, const BSONObj& o) {
- const NamespaceString nss(o.getStringField("ns"));
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- // Take an X lock on the database in order to preclude other modifications.
- // Also, the database might not exist yet, so create it.
- AutoGetOrCreateDb autoDb(txn, nss.db(), MODE_X);
- Database* const db = autoDb.getDb();
+ // success!
+ return missingObj;
+ }
+ // retry count exceeded
+ msgasserted(15916,
+ str::stream() << "Can no longer connect to initial sync source: " << _hostname);
+}
- // we don't have the object yet, which is possible on initial sync. get it.
- log() << "adding missing object" << endl; // rare enough we can log
+bool SyncTail::shouldRetry(OperationContext* txn, const BSONObj& o) {
+ const NamespaceString nss(o.getStringField("ns"));
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ // Take an X lock on the database in order to preclude other modifications.
+ // Also, the database might not exist yet, so create it.
+ AutoGetOrCreateDb autoDb(txn, nss.db(), MODE_X);
+ Database* const db = autoDb.getDb();
- BSONObj missingObj = getMissingDoc(txn, db, o);
+ // we don't have the object yet, which is possible on initial sync. get it.
+ log() << "adding missing object" << endl; // rare enough we can log
- if( missingObj.isEmpty() ) {
- log() << "missing object not found on source."
- " presumably deleted later in oplog";
- log() << "o2: " << o.getObjectField("o2").toString();
- log() << "o firstfield: " << o.getObjectField("o").firstElementFieldName();
+ BSONObj missingObj = getMissingDoc(txn, db, o);
- return false;
- }
- else {
- WriteUnitOfWork wunit(txn);
+ if (missingObj.isEmpty()) {
+ log() << "missing object not found on source."
+ " presumably deleted later in oplog";
+ log() << "o2: " << o.getObjectField("o2").toString();
+ log() << "o firstfield: " << o.getObjectField("o").firstElementFieldName();
- Collection* const coll = db->getOrCreateCollection(txn, nss.toString());
- invariant(coll);
+ return false;
+ } else {
+ WriteUnitOfWork wunit(txn);
- StatusWith<RecordId> result = coll->insertDocument(txn, missingObj, true);
- uassert(15917,
- str::stream() << "failed to insert missing doc: "
- << result.getStatus().toString(),
- result.isOK() );
+ Collection* const coll = db->getOrCreateCollection(txn, nss.toString());
+ invariant(coll);
- LOG(1) << "inserted missing doc: " << missingObj.toString() << endl;
+ StatusWith<RecordId> result = coll->insertDocument(txn, missingObj, true);
+ uassert(
+ 15917,
+ str::stream() << "failed to insert missing doc: " << result.getStatus().toString(),
+ result.isOK());
- wunit.commit();
- return true;
- }
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "InsertRetry", nss.ns());
+ LOG(1) << "inserted missing doc: " << missingObj.toString() << endl;
- // fixes compile errors on GCC - see SERVER-18219 for details
- MONGO_UNREACHABLE;
+ wunit.commit();
+ return true;
+ }
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "InsertRetry", nss.ns());
- static AtomicUInt32 replWriterWorkerId;
+ // fixes compile errors on GCC - see SERVER-18219 for details
+ MONGO_UNREACHABLE;
+}
- static void initializeWriterThread() {
- // Only do this once per thread
- if (!ClientBasic::getCurrent()) {
- Client::initThreadIfNotAlready();
- AuthorizationSession::get(cc())->grantInternalAuthorization();
- }
+static AtomicUInt32 replWriterWorkerId;
+
+static void initializeWriterThread() {
+ // Only do this once per thread
+ if (!ClientBasic::getCurrent()) {
+ Client::initThreadIfNotAlready();
+ AuthorizationSession::get(cc())->grantInternalAuthorization();
}
+}
- // This free function is used by the writer threads to apply each op
- void multiSyncApply(const std::vector<BSONObj>& ops, SyncTail* st) {
- initializeWriterThread();
+// This free function is used by the writer threads to apply each op
+void multiSyncApply(const std::vector<BSONObj>& ops, SyncTail* st) {
+ initializeWriterThread();
- OperationContextImpl txn;
- txn.setReplicatedWrites(false);
- DisableDocumentValidation validationDisabler(&txn);
+ OperationContextImpl txn;
+ txn.setReplicatedWrites(false);
+ DisableDocumentValidation validationDisabler(&txn);
- // allow us to get through the magic barrier
- txn.lockState()->setIsBatchWriter(true);
+ // allow us to get through the magic barrier
+ txn.lockState()->setIsBatchWriter(true);
- bool convertUpdatesToUpserts = true;
+ bool convertUpdatesToUpserts = true;
- for (std::vector<BSONObj>::const_iterator it = ops.begin();
- it != ops.end();
- ++it) {
- try {
- if (!SyncTail::syncApply(&txn, *it, convertUpdatesToUpserts).isOK()) {
- fassertFailedNoTrace(16359);
- }
+ for (std::vector<BSONObj>::const_iterator it = ops.begin(); it != ops.end(); ++it) {
+ try {
+ if (!SyncTail::syncApply(&txn, *it, convertUpdatesToUpserts).isOK()) {
+ fassertFailedNoTrace(16359);
}
- catch (const DBException& e) {
- error() << "writer worker caught exception: " << causedBy(e)
- << " on: " << it->toString();
-
- if (inShutdown()) {
- return;
- }
+ } catch (const DBException& e) {
+ error() << "writer worker caught exception: " << causedBy(e)
+ << " on: " << it->toString();
- fassertFailedNoTrace(16360);
+ if (inShutdown()) {
+ return;
}
+
+ fassertFailedNoTrace(16360);
}
}
+}
- // This free function is used by the initial sync writer threads to apply each op
- void multiInitialSyncApply(const std::vector<BSONObj>& ops, SyncTail* st) {
- initializeWriterThread();
-
- OperationContextImpl txn;
- txn.setReplicatedWrites(false);
- DisableDocumentValidation validationDisabler(&txn);
+// This free function is used by the initial sync writer threads to apply each op
+void multiInitialSyncApply(const std::vector<BSONObj>& ops, SyncTail* st) {
+ initializeWriterThread();
- // allow us to get through the magic barrier
- txn.lockState()->setIsBatchWriter(true);
+ OperationContextImpl txn;
+ txn.setReplicatedWrites(false);
+ DisableDocumentValidation validationDisabler(&txn);
- bool convertUpdatesToUpserts = false;
+ // allow us to get through the magic barrier
+ txn.lockState()->setIsBatchWriter(true);
- for (std::vector<BSONObj>::const_iterator it = ops.begin();
- it != ops.end();
- ++it) {
- try {
- if (!SyncTail::syncApply(&txn, *it, convertUpdatesToUpserts).isOK()) {
+ bool convertUpdatesToUpserts = false;
- if (st->shouldRetry(&txn, *it)) {
- if (!SyncTail::syncApply(&txn, *it, convertUpdatesToUpserts).isOK()) {
- fassertFailedNoTrace(15915);
- }
+ for (std::vector<BSONObj>::const_iterator it = ops.begin(); it != ops.end(); ++it) {
+ try {
+ if (!SyncTail::syncApply(&txn, *it, convertUpdatesToUpserts).isOK()) {
+ if (st->shouldRetry(&txn, *it)) {
+ if (!SyncTail::syncApply(&txn, *it, convertUpdatesToUpserts).isOK()) {
+ fassertFailedNoTrace(15915);
}
-
- // If shouldRetry() returns false, fall through.
- // This can happen if the document that was moved and missed by Cloner
- // subsequently got deleted and no longer exists on the Sync Target at all
}
- }
- catch (const DBException& e) {
- error() << "writer worker caught exception: " << causedBy(e)
- << " on: " << it->toString();
- if (inShutdown()) {
- return;
- }
+ // If shouldRetry() returns false, fall through.
+ // This can happen if the document that was moved and missed by Cloner
+ // subsequently got deleted and no longer exists on the Sync Target at all
+ }
+ } catch (const DBException& e) {
+ error() << "writer worker caught exception: " << causedBy(e)
+ << " on: " << it->toString();
- fassertFailedNoTrace(16361);
+ if (inShutdown()) {
+ return;
}
+
+ fassertFailedNoTrace(16361);
}
}
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/sync_tail.h b/src/mongo/db/repl/sync_tail.h
index 74f17cdea12..2e4424b71cf 100644
--- a/src/mongo/db/repl/sync_tail.h
+++ b/src/mongo/db/repl/sync_tail.h
@@ -38,160 +38,162 @@
namespace mongo {
- class Database;
- class OperationContext;
+class Database;
+class OperationContext;
namespace repl {
- class BackgroundSyncInterface;
- class ReplicationCoordinator;
- class OpTime;
+class BackgroundSyncInterface;
+class ReplicationCoordinator;
+class OpTime;
+
+/**
+ * "Normal" replica set syncing
+ */
+class SyncTail {
+public:
+ using MultiSyncApplyFunc = stdx::function<void(const std::vector<BSONObj>& ops, SyncTail* st)>;
+
+ /**
+ * Type of function that takes a non-command op and applies it locally.
+ * Used for applying from an oplog.
+ * Last boolean argument 'convertUpdateToUpsert' converts some updates to upserts for
+ * idempotency reasons.
+ * Returns failure status if the op was an update that could not be applied.
+ */
+ using ApplyOperationInLockFn =
+ stdx::function<Status(OperationContext*, Database*, const BSONObj&, bool)>;
+
+ /**
+ * Type of function that takes a command op and applies it locally.
+ * Used for applying from an oplog.
+ * Returns failure status if the op that could not be applied.
+ */
+ using ApplyCommandInLockFn = stdx::function<Status(OperationContext*, const BSONObj&)>;
+
+ /**
+ * Type of function to increment "repl.apply.ops" server status metric.
+ */
+ using IncrementOpsAppliedStatsFn = stdx::function<void()>;
+
+ SyncTail(BackgroundSyncInterface* q, MultiSyncApplyFunc func);
+ virtual ~SyncTail();
/**
- * "Normal" replica set syncing
+ * Applies the operation that is in param o.
+ * Functions for applying operations/commands and increment server status counters may
+ * be overridden for testing.
*/
- class SyncTail {
+ static Status syncApply(OperationContext* txn,
+ const BSONObj& o,
+ bool convertUpdateToUpsert,
+ ApplyOperationInLockFn applyOperationInLock,
+ ApplyCommandInLockFn applyCommandInLock,
+ IncrementOpsAppliedStatsFn incrementOpsAppliedStats);
+
+ static Status syncApply(OperationContext* txn, const BSONObj& o, bool convertUpdateToUpsert);
+
+ /**
+ * Runs _applyOplogUntil(stopOpTime)
+ */
+ virtual void oplogApplication(OperationContext* txn, const OpTime& stopOpTime);
+
+ void oplogApplication();
+ bool peek(BSONObj* obj);
+
+ class OpQueue {
public:
- using MultiSyncApplyFunc =
- stdx::function<void (const std::vector<BSONObj>& ops, SyncTail* st)>;
-
- /**
- * Type of function that takes a non-command op and applies it locally.
- * Used for applying from an oplog.
- * Last boolean argument 'convertUpdateToUpsert' converts some updates to upserts for
- * idempotency reasons.
- * Returns failure status if the op was an update that could not be applied.
- */
- using ApplyOperationInLockFn =
- stdx::function<Status (OperationContext*, Database*, const BSONObj&, bool)>;
-
- /**
- * Type of function that takes a command op and applies it locally.
- * Used for applying from an oplog.
- * Returns failure status if the op that could not be applied.
- */
- using ApplyCommandInLockFn = stdx::function<Status (OperationContext*, const BSONObj&)>;
-
- /**
- * Type of function to increment "repl.apply.ops" server status metric.
- */
- using IncrementOpsAppliedStatsFn = stdx::function<void ()>;
-
- SyncTail(BackgroundSyncInterface *q, MultiSyncApplyFunc func);
- virtual ~SyncTail();
-
- /**
- * Applies the operation that is in param o.
- * Functions for applying operations/commands and increment server status counters may
- * be overridden for testing.
- */
- static Status syncApply(OperationContext* txn,
- const BSONObj &o,
- bool convertUpdateToUpsert,
- ApplyOperationInLockFn applyOperationInLock,
- ApplyCommandInLockFn applyCommandInLock,
- IncrementOpsAppliedStatsFn incrementOpsAppliedStats);
-
- static Status syncApply(OperationContext* txn,
- const BSONObj &o,
- bool convertUpdateToUpsert);
-
- /**
- * Runs _applyOplogUntil(stopOpTime)
- */
- virtual void oplogApplication(OperationContext* txn, const OpTime& stopOpTime);
-
- void oplogApplication();
- bool peek(BSONObj* obj);
-
- class OpQueue {
- public:
- OpQueue() : _size(0) {}
- size_t getSize() const { return _size; }
- const std::deque<BSONObj>& getDeque() const { return _deque; }
- void push_back(BSONObj& op) {
- _deque.push_back(op);
- _size += op.objsize();
- }
- bool empty() const {
- return _deque.empty();
- }
-
- BSONObj back() const {
- invariant(!_deque.empty());
- return _deque.back();
- }
-
- private:
- std::deque<BSONObj> _deque;
- size_t _size;
- };
-
- // returns true if we should continue waiting for BSONObjs, false if we should
- // stop waiting and apply the queue we have. Only returns false if !ops.empty().
- bool tryPopAndWaitForMore(OperationContext* txn,
- OpQueue* ops,
- ReplicationCoordinator* replCoord);
-
- /**
- * Fetch a single document referenced in the operation from the sync source.
- */
- virtual BSONObj getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o);
-
- /**
- * If applyOperation_inlock should be called again after an update fails.
- */
- virtual bool shouldRetry(OperationContext* txn, const BSONObj& o);
- void setHostname(const std::string& hostname);
-
- protected:
- // Cap the batches using the limit on journal commits.
- // This works out to be 100 MB (64 bit) or 50 MB (32 bit)
- static const unsigned int replBatchLimitBytes = dur::UncommittedBytesLimit;
- static const int replBatchLimitSeconds = 1;
- static const unsigned int replBatchLimitOperations = 5000;
-
- // SyncTail base class always supports awaiting commit if any op has j:true flag
- // that indicates awaiting commit before updating last OpTime.
- virtual bool supportsWaitingUntilDurable() { return true; }
-
- // Prefetch and write a deque of operations, using the supplied function.
- // Initial Sync and Sync Tail each use a different function.
- // Returns the last OpTime applied.
- static OpTime multiApply(OperationContext* txn,
- const OpQueue& ops,
- OldThreadPool* prefetcherPool,
- OldThreadPool* writerPool,
- MultiSyncApplyFunc func,
- SyncTail* sync,
- bool supportsAwaitingCommit);
-
- /**
- * Applies oplog entries until reaching "endOpTime".
- *
- * NOTE:Will not transition or check states
- */
- void _applyOplogUntil(OperationContext* txn, const OpTime& endOpTime);
+ OpQueue() : _size(0) {}
+ size_t getSize() const {
+ return _size;
+ }
+ const std::deque<BSONObj>& getDeque() const {
+ return _deque;
+ }
+ void push_back(BSONObj& op) {
+ _deque.push_back(op);
+ _size += op.objsize();
+ }
+ bool empty() const {
+ return _deque.empty();
+ }
+
+ BSONObj back() const {
+ invariant(!_deque.empty());
+ return _deque.back();
+ }
private:
- std::string _hostname;
+ std::deque<BSONObj> _deque;
+ size_t _size;
+ };
- BackgroundSyncInterface* _networkQueue;
+ // returns true if we should continue waiting for BSONObjs, false if we should
+ // stop waiting and apply the queue we have. Only returns false if !ops.empty().
+ bool tryPopAndWaitForMore(OperationContext* txn,
+ OpQueue* ops,
+ ReplicationCoordinator* replCoord);
- // Function to use during applyOps
- MultiSyncApplyFunc _applyFunc;
+ /**
+ * Fetch a single document referenced in the operation from the sync source.
+ */
+ virtual BSONObj getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o);
- void handleSlaveDelay(const BSONObj& op);
+ /**
+ * If applyOperation_inlock should be called again after an update fails.
+ */
+ virtual bool shouldRetry(OperationContext* txn, const BSONObj& o);
+ void setHostname(const std::string& hostname);
+
+protected:
+ // Cap the batches using the limit on journal commits.
+ // This works out to be 100 MB (64 bit) or 50 MB (32 bit)
+ static const unsigned int replBatchLimitBytes = dur::UncommittedBytesLimit;
+ static const int replBatchLimitSeconds = 1;
+ static const unsigned int replBatchLimitOperations = 5000;
+
+ // SyncTail base class always supports awaiting commit if any op has j:true flag
+ // that indicates awaiting commit before updating last OpTime.
+ virtual bool supportsWaitingUntilDurable() {
+ return true;
+ }
+
+ // Prefetch and write a deque of operations, using the supplied function.
+ // Initial Sync and Sync Tail each use a different function.
+ // Returns the last OpTime applied.
+ static OpTime multiApply(OperationContext* txn,
+ const OpQueue& ops,
+ OldThreadPool* prefetcherPool,
+ OldThreadPool* writerPool,
+ MultiSyncApplyFunc func,
+ SyncTail* sync,
+ bool supportsAwaitingCommit);
- // persistent pool of worker threads for writing ops to the databases
- OldThreadPool _writerPool;
- // persistent pool of worker threads for prefetching
- OldThreadPool _prefetcherPool;
+ /**
+ * Applies oplog entries until reaching "endOpTime".
+ *
+ * NOTE:Will not transition or check states
+ */
+ void _applyOplogUntil(OperationContext* txn, const OpTime& endOpTime);
- };
+private:
+ std::string _hostname;
+
+ BackgroundSyncInterface* _networkQueue;
+
+ // Function to use during applyOps
+ MultiSyncApplyFunc _applyFunc;
+
+ void handleSlaveDelay(const BSONObj& op);
+
+ // persistent pool of worker threads for writing ops to the databases
+ OldThreadPool _writerPool;
+ // persistent pool of worker threads for prefetching
+ OldThreadPool _prefetcherPool;
+};
- // These free functions are used by the thread pool workers to write ops to the db.
- void multiSyncApply(const std::vector<BSONObj>& ops, SyncTail* st);
- void multiInitialSyncApply(const std::vector<BSONObj>& ops, SyncTail* st);
+// These free functions are used by the thread pool workers to write ops to the db.
+void multiSyncApply(const std::vector<BSONObj>& ops, SyncTail* st);
+void multiInitialSyncApply(const std::vector<BSONObj>& ops, SyncTail* st);
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/sync_tail_test.cpp b/src/mongo/db/repl/sync_tail_test.cpp
index f1e2a3051da..d718f188b5e 100644
--- a/src/mongo/db/repl/sync_tail_test.cpp
+++ b/src/mongo/db/repl/sync_tail_test.cpp
@@ -50,253 +50,272 @@
namespace {
- using namespace mongo;
- using namespace mongo::repl;
+using namespace mongo;
+using namespace mongo::repl;
- class BackgroundSyncMock : public BackgroundSyncInterface {
- public:
- bool peek(BSONObj* op) override;
- void consume() override;
- void waitForMore() override;
- };
+class BackgroundSyncMock : public BackgroundSyncInterface {
+public:
+ bool peek(BSONObj* op) override;
+ void consume() override;
+ void waitForMore() override;
+};
- bool BackgroundSyncMock::peek(BSONObj* op) { return false; }
- void BackgroundSyncMock::consume() { }
- void BackgroundSyncMock::waitForMore() { }
+bool BackgroundSyncMock::peek(BSONObj* op) {
+ return false;
+}
+void BackgroundSyncMock::consume() {}
+void BackgroundSyncMock::waitForMore() {}
- class SyncTailTest : public unittest::Test {
- protected:
- void _testSyncApplyInsertDocument(LockMode expectedMode);
+class SyncTailTest : public unittest::Test {
+protected:
+ void _testSyncApplyInsertDocument(LockMode expectedMode);
- std::unique_ptr<OperationContext> _txn;
- unsigned int _opsApplied;
- SyncTail::ApplyOperationInLockFn _applyOp;
- SyncTail::ApplyCommandInLockFn _applyCmd;
- SyncTail::IncrementOpsAppliedStatsFn _incOps;
+ std::unique_ptr<OperationContext> _txn;
+ unsigned int _opsApplied;
+ SyncTail::ApplyOperationInLockFn _applyOp;
+ SyncTail::ApplyCommandInLockFn _applyCmd;
+ SyncTail::IncrementOpsAppliedStatsFn _incOps;
- private:
- void setUp() override;
- void tearDown() override;
- };
+private:
+ void setUp() override;
+ void tearDown() override;
+};
- void SyncTailTest::setUp() {
- ServiceContext* serviceContext = getGlobalServiceContext();
- if (!serviceContext->getGlobalStorageEngine()) {
- // When using the 'devnull' storage engine, it is fine for the temporary directory to
- // go away after the global storage engine is initialized.
- unittest::TempDir tempDir("sync_tail_test");
- mongo::storageGlobalParams.dbpath = tempDir.path();
- mongo::storageGlobalParams.engine = "devnull";
- mongo::storageGlobalParams.engineSetByUser = true;
- serviceContext->initializeGlobalStorageEngine();
- }
- ReplSettings replSettings;
- replSettings.oplogSize = 5 * 1024 * 1024;
+void SyncTailTest::setUp() {
+ ServiceContext* serviceContext = getGlobalServiceContext();
+ if (!serviceContext->getGlobalStorageEngine()) {
+ // When using the 'devnull' storage engine, it is fine for the temporary directory to
+ // go away after the global storage engine is initialized.
+ unittest::TempDir tempDir("sync_tail_test");
+ mongo::storageGlobalParams.dbpath = tempDir.path();
+ mongo::storageGlobalParams.engine = "devnull";
+ mongo::storageGlobalParams.engineSetByUser = true;
+ serviceContext->initializeGlobalStorageEngine();
+ }
+ ReplSettings replSettings;
+ replSettings.oplogSize = 5 * 1024 * 1024;
- setGlobalReplicationCoordinator(new ReplicationCoordinatorMock(replSettings));
+ setGlobalReplicationCoordinator(new ReplicationCoordinatorMock(replSettings));
- Client::initThreadIfNotAlready();
- _txn.reset(new OperationContextReplMock(&cc(), 0));
- _opsApplied = 0;
- _applyOp = [](OperationContext* txn,
- Database* db,
- const BSONObj& op,
- bool convertUpdateToUpsert) {
- return Status::OK();
- };
- _applyCmd = [](OperationContext* txn, const BSONObj& op) {
+ Client::initThreadIfNotAlready();
+ _txn.reset(new OperationContextReplMock(&cc(), 0));
+ _opsApplied = 0;
+ _applyOp =
+ [](OperationContext* txn, Database* db, const BSONObj& op, bool convertUpdateToUpsert) {
return Status::OK();
};
- _incOps = [this]() { _opsApplied++; };
- }
+ _applyCmd = [](OperationContext* txn, const BSONObj& op) { return Status::OK(); };
+ _incOps = [this]() { _opsApplied++; };
+}
- void SyncTailTest::tearDown() {
- {
- Lock::GlobalWrite globalLock(_txn->lockState());
- BSONObjBuilder unused;
- invariant(mongo::dbHolder().closeAll(_txn.get(), unused, false));
- }
- _txn.reset();
- setGlobalReplicationCoordinator(nullptr);
+void SyncTailTest::tearDown() {
+ {
+ Lock::GlobalWrite globalLock(_txn->lockState());
+ BSONObjBuilder unused;
+ invariant(mongo::dbHolder().closeAll(_txn.get(), unused, false));
}
+ _txn.reset();
+ setGlobalReplicationCoordinator(nullptr);
+}
- TEST_F(SyncTailTest, Peek) {
- BackgroundSyncMock bgsync;
- SyncTail syncTail(&bgsync, [](const std::vector<BSONObj>& ops, SyncTail* st) { });
- BSONObj obj;
- ASSERT_FALSE(syncTail.peek(&obj));
- }
+TEST_F(SyncTailTest, Peek) {
+ BackgroundSyncMock bgsync;
+ SyncTail syncTail(&bgsync, [](const std::vector<BSONObj>& ops, SyncTail* st) {});
+ BSONObj obj;
+ ASSERT_FALSE(syncTail.peek(&obj));
+}
- TEST_F(SyncTailTest, SyncApplyNoNamespaceBadOp) {
- const BSONObj op = BSON("op" << "x");
- ASSERT_OK(SyncTail::syncApply(_txn.get(), op, false, _applyOp, _applyCmd, _incOps));
- ASSERT_EQUALS(0U, _opsApplied);
- }
+TEST_F(SyncTailTest, SyncApplyNoNamespaceBadOp) {
+ const BSONObj op = BSON("op"
+ << "x");
+ ASSERT_OK(SyncTail::syncApply(_txn.get(), op, false, _applyOp, _applyCmd, _incOps));
+ ASSERT_EQUALS(0U, _opsApplied);
+}
- TEST_F(SyncTailTest, SyncApplyNoNamespaceNoOp) {
- ASSERT_OK(SyncTail::syncApply(_txn.get(), BSON("op" << "n"), false));
- ASSERT_EQUALS(0U, _opsApplied);
- }
+TEST_F(SyncTailTest, SyncApplyNoNamespaceNoOp) {
+ ASSERT_OK(SyncTail::syncApply(_txn.get(),
+ BSON("op"
+ << "n"),
+ false));
+ ASSERT_EQUALS(0U, _opsApplied);
+}
- TEST_F(SyncTailTest, SyncApplyBadOp) {
- const BSONObj op = BSON("op" << "x" << "ns" << "test.t");
- ASSERT_EQUALS(
- ErrorCodes::BadValue,
- SyncTail::syncApply(_txn.get(), op, false, _applyOp, _applyCmd, _incOps).code());
- ASSERT_EQUALS(0U, _opsApplied);
- }
+TEST_F(SyncTailTest, SyncApplyBadOp) {
+ const BSONObj op = BSON("op"
+ << "x"
+ << "ns"
+ << "test.t");
+ ASSERT_EQUALS(ErrorCodes::BadValue,
+ SyncTail::syncApply(_txn.get(), op, false, _applyOp, _applyCmd, _incOps).code());
+ ASSERT_EQUALS(0U, _opsApplied);
+}
- TEST_F(SyncTailTest, SyncApplyNoOp) {
- const BSONObj op = BSON("op" << "n" << "ns" << "test.t");
- bool applyOpCalled = false;
- SyncTail::ApplyOperationInLockFn applyOp = [&](OperationContext* txn,
- Database* db,
- const BSONObj& theOperation,
- bool convertUpdateToUpsert) {
- applyOpCalled = true;
- ASSERT_TRUE(txn);
- ASSERT_TRUE(txn->lockState()->isDbLockedForMode("test", MODE_X));
- ASSERT_FALSE(txn->writesAreReplicated());
- ASSERT_TRUE(documentValidationDisabled(txn));
- ASSERT_TRUE(db);
- ASSERT_EQUALS(op, theOperation);
- ASSERT_FALSE(convertUpdateToUpsert);
- return Status::OK();
- };
- SyncTail::ApplyCommandInLockFn applyCmd = [&](OperationContext* txn,
- const BSONObj& theOperation) {
+TEST_F(SyncTailTest, SyncApplyNoOp) {
+ const BSONObj op = BSON("op"
+ << "n"
+ << "ns"
+ << "test.t");
+ bool applyOpCalled = false;
+ SyncTail::ApplyOperationInLockFn applyOp = [&](OperationContext* txn,
+ Database* db,
+ const BSONObj& theOperation,
+ bool convertUpdateToUpsert) {
+ applyOpCalled = true;
+ ASSERT_TRUE(txn);
+ ASSERT_TRUE(txn->lockState()->isDbLockedForMode("test", MODE_X));
+ ASSERT_FALSE(txn->writesAreReplicated());
+ ASSERT_TRUE(documentValidationDisabled(txn));
+ ASSERT_TRUE(db);
+ ASSERT_EQUALS(op, theOperation);
+ ASSERT_FALSE(convertUpdateToUpsert);
+ return Status::OK();
+ };
+ SyncTail::ApplyCommandInLockFn applyCmd =
+ [&](OperationContext* txn, const BSONObj& theOperation) {
FAIL("applyCommand unexpectedly invoked.");
return Status::OK();
};
- ASSERT_TRUE(_txn->writesAreReplicated());
- ASSERT_FALSE(documentValidationDisabled(_txn.get()));
- ASSERT_OK(SyncTail::syncApply(_txn.get(), op, false, applyOp, applyCmd, _incOps));
- ASSERT_TRUE(applyOpCalled);
- ASSERT_EQUALS(1U, _opsApplied);
- }
+ ASSERT_TRUE(_txn->writesAreReplicated());
+ ASSERT_FALSE(documentValidationDisabled(_txn.get()));
+ ASSERT_OK(SyncTail::syncApply(_txn.get(), op, false, applyOp, applyCmd, _incOps));
+ ASSERT_TRUE(applyOpCalled);
+ ASSERT_EQUALS(1U, _opsApplied);
+}
- TEST_F(SyncTailTest, SyncApplyNoOpApplyOpThrowsException) {
- const BSONObj op = BSON("op" << "n" << "ns" << "test.t");
- int applyOpCalled = 0;
- SyncTail::ApplyOperationInLockFn applyOp = [&](OperationContext* txn,
- Database* db,
- const BSONObj& theOperation,
- bool convertUpdateToUpsert) {
- applyOpCalled++;
- if (applyOpCalled < 5) {
- throw WriteConflictException();
- }
- return Status::OK();
- };
- SyncTail::ApplyCommandInLockFn applyCmd = [&](OperationContext* txn,
- const BSONObj& theOperation) {
+TEST_F(SyncTailTest, SyncApplyNoOpApplyOpThrowsException) {
+ const BSONObj op = BSON("op"
+ << "n"
+ << "ns"
+ << "test.t");
+ int applyOpCalled = 0;
+ SyncTail::ApplyOperationInLockFn applyOp = [&](OperationContext* txn,
+ Database* db,
+ const BSONObj& theOperation,
+ bool convertUpdateToUpsert) {
+ applyOpCalled++;
+ if (applyOpCalled < 5) {
+ throw WriteConflictException();
+ }
+ return Status::OK();
+ };
+ SyncTail::ApplyCommandInLockFn applyCmd =
+ [&](OperationContext* txn, const BSONObj& theOperation) {
FAIL("applyCommand unexpectedly invoked.");
return Status::OK();
};
- ASSERT_OK(SyncTail::syncApply(_txn.get(), op, false, applyOp, applyCmd, _incOps));
- ASSERT_EQUALS(5, applyOpCalled);
- ASSERT_EQUALS(1U, _opsApplied);
- }
+ ASSERT_OK(SyncTail::syncApply(_txn.get(), op, false, applyOp, applyCmd, _incOps));
+ ASSERT_EQUALS(5, applyOpCalled);
+ ASSERT_EQUALS(1U, _opsApplied);
+}
- void SyncTailTest::_testSyncApplyInsertDocument(LockMode expectedMode) {
- const BSONObj op = BSON("op" << "i" << "ns" << "test.t");
- bool applyOpCalled = false;
- SyncTail::ApplyOperationInLockFn applyOp = [&](OperationContext* txn,
- Database* db,
- const BSONObj& theOperation,
- bool convertUpdateToUpsert) {
- applyOpCalled = true;
- ASSERT_TRUE(txn);
- ASSERT_TRUE(txn->lockState()->isDbLockedForMode("test", expectedMode));
- ASSERT_TRUE(txn->lockState()->isCollectionLockedForMode("test.t", expectedMode));
- ASSERT_FALSE(txn->writesAreReplicated());
- ASSERT_TRUE(documentValidationDisabled(txn));
- ASSERT_TRUE(db);
- ASSERT_EQUALS(op, theOperation);
- ASSERT_TRUE(convertUpdateToUpsert);
- return Status::OK();
- };
- SyncTail::ApplyCommandInLockFn applyCmd = [&](OperationContext* txn,
- const BSONObj& theOperation) {
+void SyncTailTest::_testSyncApplyInsertDocument(LockMode expectedMode) {
+ const BSONObj op = BSON("op"
+ << "i"
+ << "ns"
+ << "test.t");
+ bool applyOpCalled = false;
+ SyncTail::ApplyOperationInLockFn applyOp = [&](OperationContext* txn,
+ Database* db,
+ const BSONObj& theOperation,
+ bool convertUpdateToUpsert) {
+ applyOpCalled = true;
+ ASSERT_TRUE(txn);
+ ASSERT_TRUE(txn->lockState()->isDbLockedForMode("test", expectedMode));
+ ASSERT_TRUE(txn->lockState()->isCollectionLockedForMode("test.t", expectedMode));
+ ASSERT_FALSE(txn->writesAreReplicated());
+ ASSERT_TRUE(documentValidationDisabled(txn));
+ ASSERT_TRUE(db);
+ ASSERT_EQUALS(op, theOperation);
+ ASSERT_TRUE(convertUpdateToUpsert);
+ return Status::OK();
+ };
+ SyncTail::ApplyCommandInLockFn applyCmd =
+ [&](OperationContext* txn, const BSONObj& theOperation) {
FAIL("applyCommand unexpectedly invoked.");
return Status::OK();
};
- ASSERT_TRUE(_txn->writesAreReplicated());
- ASSERT_FALSE(documentValidationDisabled(_txn.get()));
- ASSERT_OK(SyncTail::syncApply(_txn.get(), op, true, applyOp, applyCmd, _incOps));
- ASSERT_TRUE(applyOpCalled);
- ASSERT_EQUALS(1U, _opsApplied);
- }
+ ASSERT_TRUE(_txn->writesAreReplicated());
+ ASSERT_FALSE(documentValidationDisabled(_txn.get()));
+ ASSERT_OK(SyncTail::syncApply(_txn.get(), op, true, applyOp, applyCmd, _incOps));
+ ASSERT_TRUE(applyOpCalled);
+ ASSERT_EQUALS(1U, _opsApplied);
+}
- TEST_F(SyncTailTest, SyncApplyInsertDocumentDatabaseMissing) {
- _testSyncApplyInsertDocument(MODE_X);
- }
+TEST_F(SyncTailTest, SyncApplyInsertDocumentDatabaseMissing) {
+ _testSyncApplyInsertDocument(MODE_X);
+}
- TEST_F(SyncTailTest, SyncApplyInsertDocumentCollectionMissing) {
- {
- Lock::GlobalWrite globalLock(_txn->lockState());
- bool justCreated = false;
- Database* db = dbHolder().openDb(_txn.get(), "test", &justCreated);
- ASSERT_TRUE(db);
- ASSERT_TRUE(justCreated);
- }
- _testSyncApplyInsertDocument(MODE_X);
+TEST_F(SyncTailTest, SyncApplyInsertDocumentCollectionMissing) {
+ {
+ Lock::GlobalWrite globalLock(_txn->lockState());
+ bool justCreated = false;
+ Database* db = dbHolder().openDb(_txn.get(), "test", &justCreated);
+ ASSERT_TRUE(db);
+ ASSERT_TRUE(justCreated);
}
+ _testSyncApplyInsertDocument(MODE_X);
+}
- TEST_F(SyncTailTest, SyncApplyInsertDocumentCollectionExists) {
- {
- Lock::GlobalWrite globalLock(_txn->lockState());
- bool justCreated = false;
- Database* db = dbHolder().openDb(_txn.get(), "test", &justCreated);
- ASSERT_TRUE(db);
- ASSERT_TRUE(justCreated);
- Collection* collection = db->createCollection(_txn.get(), "test.t");
- ASSERT_TRUE(collection);
- }
- _testSyncApplyInsertDocument(MODE_IX);
+TEST_F(SyncTailTest, SyncApplyInsertDocumentCollectionExists) {
+ {
+ Lock::GlobalWrite globalLock(_txn->lockState());
+ bool justCreated = false;
+ Database* db = dbHolder().openDb(_txn.get(), "test", &justCreated);
+ ASSERT_TRUE(db);
+ ASSERT_TRUE(justCreated);
+ Collection* collection = db->createCollection(_txn.get(), "test.t");
+ ASSERT_TRUE(collection);
}
+ _testSyncApplyInsertDocument(MODE_IX);
+}
- TEST_F(SyncTailTest, SyncApplyIndexBuild) {
- const BSONObj op = BSON("op" << "i" << "ns" << "test.system.indexes");
- bool applyOpCalled = false;
- SyncTail::ApplyOperationInLockFn applyOp = [&](OperationContext* txn,
- Database* db,
- const BSONObj& theOperation,
- bool convertUpdateToUpsert) {
- applyOpCalled = true;
- ASSERT_TRUE(txn);
- ASSERT_TRUE(txn->lockState()->isDbLockedForMode("test", MODE_X));
- ASSERT_FALSE(txn->writesAreReplicated());
- ASSERT_TRUE(documentValidationDisabled(txn));
- ASSERT_TRUE(db);
- ASSERT_EQUALS(op, theOperation);
- ASSERT_FALSE(convertUpdateToUpsert);
- return Status::OK();
- };
- SyncTail::ApplyCommandInLockFn applyCmd = [&](OperationContext* txn,
- const BSONObj& theOperation) {
+TEST_F(SyncTailTest, SyncApplyIndexBuild) {
+ const BSONObj op = BSON("op"
+ << "i"
+ << "ns"
+ << "test.system.indexes");
+ bool applyOpCalled = false;
+ SyncTail::ApplyOperationInLockFn applyOp = [&](OperationContext* txn,
+ Database* db,
+ const BSONObj& theOperation,
+ bool convertUpdateToUpsert) {
+ applyOpCalled = true;
+ ASSERT_TRUE(txn);
+ ASSERT_TRUE(txn->lockState()->isDbLockedForMode("test", MODE_X));
+ ASSERT_FALSE(txn->writesAreReplicated());
+ ASSERT_TRUE(documentValidationDisabled(txn));
+ ASSERT_TRUE(db);
+ ASSERT_EQUALS(op, theOperation);
+ ASSERT_FALSE(convertUpdateToUpsert);
+ return Status::OK();
+ };
+ SyncTail::ApplyCommandInLockFn applyCmd =
+ [&](OperationContext* txn, const BSONObj& theOperation) {
FAIL("applyCommand unexpectedly invoked.");
return Status::OK();
};
- ASSERT_TRUE(_txn->writesAreReplicated());
- ASSERT_FALSE(documentValidationDisabled(_txn.get()));
- ASSERT_OK(SyncTail::syncApply(_txn.get(), op, false, applyOp, applyCmd, _incOps));
- ASSERT_TRUE(applyOpCalled);
- ASSERT_EQUALS(1U, _opsApplied);
- }
+ ASSERT_TRUE(_txn->writesAreReplicated());
+ ASSERT_FALSE(documentValidationDisabled(_txn.get()));
+ ASSERT_OK(SyncTail::syncApply(_txn.get(), op, false, applyOp, applyCmd, _incOps));
+ ASSERT_TRUE(applyOpCalled);
+ ASSERT_EQUALS(1U, _opsApplied);
+}
- TEST_F(SyncTailTest, SyncApplyCommand) {
- const BSONObj op = BSON("op" << "c" << "ns" << "test.t");
- bool applyCmdCalled = false;
- SyncTail::ApplyOperationInLockFn applyOp = [&](OperationContext* txn,
- Database* db,
- const BSONObj& theOperation,
- bool convertUpdateToUpsert) {
- FAIL("applyOperation unexpectedly invoked.");
- return Status::OK();
- };
- SyncTail::ApplyCommandInLockFn applyCmd = [&](OperationContext* txn,
- const BSONObj& theOperation) {
+TEST_F(SyncTailTest, SyncApplyCommand) {
+ const BSONObj op = BSON("op"
+ << "c"
+ << "ns"
+ << "test.t");
+ bool applyCmdCalled = false;
+ SyncTail::ApplyOperationInLockFn applyOp = [&](OperationContext* txn,
+ Database* db,
+ const BSONObj& theOperation,
+ bool convertUpdateToUpsert) {
+ FAIL("applyOperation unexpectedly invoked.");
+ return Status::OK();
+ };
+ SyncTail::ApplyCommandInLockFn applyCmd =
+ [&](OperationContext* txn, const BSONObj& theOperation) {
applyCmdCalled = true;
ASSERT_TRUE(txn);
ASSERT_TRUE(txn->lockState()->isW());
@@ -305,34 +324,37 @@ namespace {
ASSERT_EQUALS(op, theOperation);
return Status::OK();
};
- ASSERT_TRUE(_txn->writesAreReplicated());
- ASSERT_FALSE(documentValidationDisabled(_txn.get()));
- ASSERT_OK(SyncTail::syncApply(_txn.get(), op, false, applyOp, applyCmd, _incOps));
- ASSERT_TRUE(applyCmdCalled);
- ASSERT_EQUALS(1U, _opsApplied);
- }
+ ASSERT_TRUE(_txn->writesAreReplicated());
+ ASSERT_FALSE(documentValidationDisabled(_txn.get()));
+ ASSERT_OK(SyncTail::syncApply(_txn.get(), op, false, applyOp, applyCmd, _incOps));
+ ASSERT_TRUE(applyCmdCalled);
+ ASSERT_EQUALS(1U, _opsApplied);
+}
- TEST_F(SyncTailTest, SyncApplyCommandThrowsException) {
- const BSONObj op = BSON("op" << "c" << "ns" << "test.t");
- int applyCmdCalled = 0;
- SyncTail::ApplyOperationInLockFn applyOp = [&](OperationContext* txn,
- Database* db,
- const BSONObj& theOperation,
- bool convertUpdateToUpsert) {
- FAIL("applyOperation unexpectedly invoked.");
- return Status::OK();
- };
- SyncTail::ApplyCommandInLockFn applyCmd = [&](OperationContext* txn,
- const BSONObj& theOperation) {
+TEST_F(SyncTailTest, SyncApplyCommandThrowsException) {
+ const BSONObj op = BSON("op"
+ << "c"
+ << "ns"
+ << "test.t");
+ int applyCmdCalled = 0;
+ SyncTail::ApplyOperationInLockFn applyOp = [&](OperationContext* txn,
+ Database* db,
+ const BSONObj& theOperation,
+ bool convertUpdateToUpsert) {
+ FAIL("applyOperation unexpectedly invoked.");
+ return Status::OK();
+ };
+ SyncTail::ApplyCommandInLockFn applyCmd =
+ [&](OperationContext* txn, const BSONObj& theOperation) {
applyCmdCalled++;
if (applyCmdCalled < 5) {
throw WriteConflictException();
}
return Status::OK();
};
- ASSERT_OK(SyncTail::syncApply(_txn.get(), op, false, applyOp, applyCmd, _incOps));
- ASSERT_EQUALS(5, applyCmdCalled);
- ASSERT_EQUALS(1U, _opsApplied);
- }
+ ASSERT_OK(SyncTail::syncApply(_txn.get(), op, false, applyOp, applyCmd, _incOps));
+ ASSERT_EQUALS(5, applyCmdCalled);
+ ASSERT_EQUALS(1U, _opsApplied);
+}
-} // namespace
+} // namespace
diff --git a/src/mongo/db/repl/task_runner.cpp b/src/mongo/db/repl/task_runner.cpp
index fc0a594ac83..385a76207cd 100644
--- a/src/mongo/db/repl/task_runner.cpp
+++ b/src/mongo/db/repl/task_runner.cpp
@@ -46,165 +46,162 @@ namespace repl {
namespace {
- /**
- * Runs a single task runner task.
- * Any exceptions thrown by the task will be logged and converted into a
- * next action of kCancel.
- */
- TaskRunner::NextAction runSingleTask(const TaskRunner::Task& task,
- OperationContext* txn,
- const Status& status) {
- try {
- return task(txn, status);
+/**
+ * Runs a single task runner task.
+ * Any exceptions thrown by the task will be logged and converted into a
+ * next action of kCancel.
+ */
+TaskRunner::NextAction runSingleTask(const TaskRunner::Task& task,
+ OperationContext* txn,
+ const Status& status) {
+ try {
+ return task(txn, status);
+ } catch (...) {
+ log() << "Unhandled exception in task runner: " << exceptionToStatus();
+ }
+ return TaskRunner::NextAction::kCancel;
+}
+
+} // namespace
+
+// static
+TaskRunner::Task TaskRunner::makeCancelTask() {
+ return [](OperationContext* txn, const Status& status) { return NextAction::kCancel; };
+}
+
+TaskRunner::TaskRunner(OldThreadPool* threadPool,
+ const CreateOperationContextFn& createOperationContext)
+ : _threadPool(threadPool),
+ _createOperationContext(createOperationContext),
+ _active(false),
+ _cancelRequested(false) {
+ uassert(ErrorCodes::BadValue, "null thread pool", threadPool);
+ uassert(ErrorCodes::BadValue, "null operation context factory", createOperationContext);
+}
+
+TaskRunner::~TaskRunner() {
+ try {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ if (!_active) {
+ return;
}
- catch (...) {
- log() << "Unhandled exception in task runner: " << exceptionToStatus();
+ _cancelRequested = true;
+ _condition.notify_all();
+ while (_active) {
+ _condition.wait(lk);
}
- return TaskRunner::NextAction::kCancel;
+ } catch (...) {
+ error() << "unexpected exception destroying task runner: " << exceptionToStatus();
}
+}
-} // namespace
+std::string TaskRunner::getDiagnosticString() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ str::stream output;
+ output << "TaskRunner";
+ output << " scheduled tasks: " << _tasks.size();
+ output << " active: " << _active;
+ output << " cancel requested: " << _cancelRequested;
+ return output;
+}
- // static
- TaskRunner::Task TaskRunner::makeCancelTask() {
- return [](OperationContext* txn, const Status& status) {
- return NextAction::kCancel;
- };
- }
+bool TaskRunner::isActive() const {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _active;
+}
- TaskRunner::TaskRunner(OldThreadPool* threadPool,
- const CreateOperationContextFn& createOperationContext)
- : _threadPool(threadPool),
- _createOperationContext(createOperationContext),
- _active(false),
- _cancelRequested(false) {
+void TaskRunner::schedule(const Task& task) {
+ invariant(task);
- uassert(ErrorCodes::BadValue, "null thread pool", threadPool);
- uassert(ErrorCodes::BadValue, "null operation context factory", createOperationContext);
- }
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
- TaskRunner::~TaskRunner() {
- try {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- if (!_active) {
- return;
- }
- _cancelRequested = true;
- _condition.notify_all();
- while (_active) {
- _condition.wait(lk);
- }
- }
- catch (...) {
- error() << "unexpected exception destroying task runner: " << exceptionToStatus();
- }
- }
+ _tasks.push_back(task);
+ _condition.notify_all();
- std::string TaskRunner::getDiagnosticString() const {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- str::stream output;
- output << "TaskRunner";
- output << " scheduled tasks: " << _tasks.size();
- output << " active: " << _active;
- output << " cancel requested: " << _cancelRequested;
- return output;
+ if (_active) {
+ return;
}
- bool TaskRunner::isActive() const {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- return _active;
- }
+ _threadPool->schedule(stdx::bind(&TaskRunner::_runTasks, this));
- void TaskRunner::schedule(const Task& task) {
- invariant(task);
+ _active = true;
+ _cancelRequested = false;
+}
- stdx::lock_guard<stdx::mutex> lk(_mutex);
+void TaskRunner::cancel() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _cancelRequested = true;
+ _condition.notify_all();
+}
- _tasks.push_back(task);
- _condition.notify_all();
+void TaskRunner::_runTasks() {
+ std::unique_ptr<OperationContext> txn;
- if (_active) {
- return;
+ while (Task task = _waitForNextTask()) {
+ if (!txn) {
+ txn.reset(_createOperationContext());
}
- _threadPool->schedule(stdx::bind(&TaskRunner::_runTasks, this));
-
- _active = true;
- _cancelRequested = false;
- }
+ NextAction nextAction = runSingleTask(task, txn.get(), Status::OK());
- void TaskRunner::cancel() {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- _cancelRequested = true;
- _condition.notify_all();
- }
-
- void TaskRunner::_runTasks() {
- std::unique_ptr<OperationContext> txn;
-
- while (Task task = _waitForNextTask()) {
- if (!txn) {
- txn.reset(_createOperationContext());
- }
-
- NextAction nextAction = runSingleTask(task, txn.get(), Status::OK());
-
- if (nextAction != NextAction::kKeepOperationContext) {
- txn.reset();
- }
-
- if (nextAction == NextAction::kCancel) {
- break;
- }
- // Release thread back to pool after disposing if no scheduled tasks in queue.
- if (nextAction == NextAction::kDisposeOperationContext ||
- nextAction == NextAction::kInvalid) {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- if (_tasks.empty()) {
- _finishRunTasks_inlock();
- return;
- }
- }
+ if (nextAction != NextAction::kKeepOperationContext) {
+ txn.reset();
}
- txn.reset();
- std::list<Task> tasks;
- {
- stdx::lock_guard<stdx::mutex> lk(_mutex);
- tasks.swap(_tasks);
+ if (nextAction == NextAction::kCancel) {
+ break;
}
-
- // Cancel remaining tasks with a CallbackCanceled status.
- for (auto task : tasks) {
- runSingleTask(task, nullptr, Status(ErrorCodes::CallbackCanceled,
- "this task has been canceled by a previously invoked task"));
+ // Release thread back to pool after disposing if no scheduled tasks in queue.
+ if (nextAction == NextAction::kDisposeOperationContext ||
+ nextAction == NextAction::kInvalid) {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ if (_tasks.empty()) {
+ _finishRunTasks_inlock();
+ return;
+ }
}
+ }
+ txn.reset();
+ std::list<Task> tasks;
+ {
stdx::lock_guard<stdx::mutex> lk(_mutex);
- _finishRunTasks_inlock();
+ tasks.swap(_tasks);
}
- void TaskRunner::_finishRunTasks_inlock() {
- _active = false;
- _cancelRequested = false;
- _condition.notify_all();
+ // Cancel remaining tasks with a CallbackCanceled status.
+ for (auto task : tasks) {
+ runSingleTask(task,
+ nullptr,
+ Status(ErrorCodes::CallbackCanceled,
+ "this task has been canceled by a previously invoked task"));
}
- TaskRunner::Task TaskRunner::_waitForNextTask() {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _finishRunTasks_inlock();
+}
- while (_tasks.empty() && !_cancelRequested) {
- _condition.wait(lk);
- }
+void TaskRunner::_finishRunTasks_inlock() {
+ _active = false;
+ _cancelRequested = false;
+ _condition.notify_all();
+}
- if (_cancelRequested) {
- return Task();
- }
+TaskRunner::Task TaskRunner::_waitForNextTask() {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
- Task task = _tasks.front();
- _tasks.pop_front();
- return task;
+ while (_tasks.empty() && !_cancelRequested) {
+ _condition.wait(lk);
}
-} // namespace repl
-} // namespace mongo
+ if (_cancelRequested) {
+ return Task();
+ }
+
+ Task task = _tasks.front();
+ _tasks.pop_front();
+ return task;
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/task_runner.h b/src/mongo/db/repl/task_runner.h
index 33e879e08b8..f041174ffd9 100644
--- a/src/mongo/db/repl/task_runner.h
+++ b/src/mongo/db/repl/task_runner.h
@@ -37,129 +37,127 @@
namespace mongo {
- class OperationContext;
- class Status;
- class OldThreadPool;
+class OperationContext;
+class Status;
+class OldThreadPool;
namespace repl {
- class TaskRunner {
- MONGO_DISALLOW_COPYING(TaskRunner);
- public:
-
- /**
- * Represents next steps of task runner.
- */
- enum class NextAction {
- kInvalid=0,
- kDisposeOperationContext=1,
- kKeepOperationContext=2,
- kCancel=3,
- };
-
- using CreateOperationContextFn = stdx::function<OperationContext*()>;
- using Task = stdx::function<NextAction (OperationContext*, const Status&)>;
-
- /**
- * Creates a Task returning kCancel. This is useful in shutting down the task runner after
- * running a series of tasks.
- *
- * Without a cancellation task, the client would need to coordinate the completion of the
- * last task with calling cancel() on the task runner.
- */
- static Task makeCancelTask();
-
- TaskRunner(OldThreadPool* threadPool,
- const CreateOperationContextFn& createOperationContext);
-
- virtual ~TaskRunner();
-
- /**
- * Returns diagnostic information.
- */
- std::string getDiagnosticString() const;
-
- /**
- * Returns true if there are any scheduled or actively running tasks.
- */
- bool isActive() const;
-
- /**
- * Schedules a task to be run by the task runner. Tasks are run in the same order that they
- * are scheduled.
- *
- * This transitions the task runner to an active state.
- *
- * The task runner creates an operation context using '_createOperationContext'
- * prior to running a scheduled task. Depending on the NextAction returned from the
- * task, operation contexts may be shared between consecutive tasks invoked by the task
- * runner.
- *
- * On completion, each task is expected to return a NextAction to the task runner.
- *
- * If the task returns kDisposeOperationContext, the task runner destroys the operation
- * context. The next task to be invoked will receive a new operation context.
- *
- * If the task returns kKeepOperationContext, the task runner will retain the operation
- * context to pass to the next task in the queue.
- *
- * If the task returns kCancel, the task runner will destroy the operation context and
- * cancel the remaining tasks (each task will be invoked with a status containing the
- * code ErrorCodes::CallbackCanceled). After all the tasks have been canceled, the task
- * runner will become inactive.
- *
- * If the task returns kInvalid, this NextAction will be handled in the same way as
- * kDisposeOperationContext.
- *
- * If the status passed to the task is not OK, the task should not proceed and return
- * immediately. This is usually the case when the task runner is canceled. Accessing the
- * operation context in the task will result in undefined behavior.
- */
- void schedule(const Task& task);
-
- /**
- * If there is a task that is already running, allows the task to run to completion.
- * Cancels all scheduled tasks that have not been run. Canceled tasks will still be
- * invoked with a status containing the code ErrorCodes::CallbackCanceled.
- * After all active tasks have completed and unscheduled tasks have been canceled, the
- * task runner will go into an inactive state.
- *
- * It is a no-op to call cancel() before scheduling any tasks.
- */
- void cancel();
-
- private:
-
- /**
- * Runs tasks in a loop.
- * Loop exits when any of the tasks returns a non-kContinue next action.
- */
- void _runTasks();
- void _finishRunTasks_inlock();
-
- /**
- * Waits for next scheduled task to be added to queue.
- * Returns null task when task runner is stopped.
- */
- Task _waitForNextTask();
-
- OldThreadPool* _threadPool;
- CreateOperationContextFn _createOperationContext;
-
- // Protects member data of this TaskRunner.
- mutable stdx::mutex _mutex;
-
- stdx::condition_variable _condition;
-
- // _active is true when there are scheduled tasks in the task queue or
- // when a task is being run by the task runner.
- bool _active;
-
- bool _cancelRequested;
-
- // FIFO queue of scheduled tasks
- std::list<Task> _tasks;
+class TaskRunner {
+ MONGO_DISALLOW_COPYING(TaskRunner);
+
+public:
+ /**
+ * Represents next steps of task runner.
+ */
+ enum class NextAction {
+ kInvalid = 0,
+ kDisposeOperationContext = 1,
+ kKeepOperationContext = 2,
+ kCancel = 3,
};
-} // namespace repl
-} // namespace mongo
+ using CreateOperationContextFn = stdx::function<OperationContext*()>;
+ using Task = stdx::function<NextAction(OperationContext*, const Status&)>;
+
+ /**
+ * Creates a Task returning kCancel. This is useful in shutting down the task runner after
+ * running a series of tasks.
+ *
+ * Without a cancellation task, the client would need to coordinate the completion of the
+ * last task with calling cancel() on the task runner.
+ */
+ static Task makeCancelTask();
+
+ TaskRunner(OldThreadPool* threadPool, const CreateOperationContextFn& createOperationContext);
+
+ virtual ~TaskRunner();
+
+ /**
+ * Returns diagnostic information.
+ */
+ std::string getDiagnosticString() const;
+
+ /**
+ * Returns true if there are any scheduled or actively running tasks.
+ */
+ bool isActive() const;
+
+ /**
+ * Schedules a task to be run by the task runner. Tasks are run in the same order that they
+ * are scheduled.
+ *
+ * This transitions the task runner to an active state.
+ *
+ * The task runner creates an operation context using '_createOperationContext'
+ * prior to running a scheduled task. Depending on the NextAction returned from the
+ * task, operation contexts may be shared between consecutive tasks invoked by the task
+ * runner.
+ *
+ * On completion, each task is expected to return a NextAction to the task runner.
+ *
+ * If the task returns kDisposeOperationContext, the task runner destroys the operation
+ * context. The next task to be invoked will receive a new operation context.
+ *
+ * If the task returns kKeepOperationContext, the task runner will retain the operation
+ * context to pass to the next task in the queue.
+ *
+ * If the task returns kCancel, the task runner will destroy the operation context and
+ * cancel the remaining tasks (each task will be invoked with a status containing the
+ * code ErrorCodes::CallbackCanceled). After all the tasks have been canceled, the task
+ * runner will become inactive.
+ *
+ * If the task returns kInvalid, this NextAction will be handled in the same way as
+ * kDisposeOperationContext.
+ *
+ * If the status passed to the task is not OK, the task should not proceed and return
+ * immediately. This is usually the case when the task runner is canceled. Accessing the
+ * operation context in the task will result in undefined behavior.
+ */
+ void schedule(const Task& task);
+
+ /**
+ * If there is a task that is already running, allows the task to run to completion.
+ * Cancels all scheduled tasks that have not been run. Canceled tasks will still be
+ * invoked with a status containing the code ErrorCodes::CallbackCanceled.
+ * After all active tasks have completed and unscheduled tasks have been canceled, the
+ * task runner will go into an inactive state.
+ *
+ * It is a no-op to call cancel() before scheduling any tasks.
+ */
+ void cancel();
+
+private:
+ /**
+ * Runs tasks in a loop.
+ * Loop exits when any of the tasks returns a non-kContinue next action.
+ */
+ void _runTasks();
+ void _finishRunTasks_inlock();
+
+ /**
+ * Waits for next scheduled task to be added to queue.
+ * Returns null task when task runner is stopped.
+ */
+ Task _waitForNextTask();
+
+ OldThreadPool* _threadPool;
+ CreateOperationContextFn _createOperationContext;
+
+ // Protects member data of this TaskRunner.
+ mutable stdx::mutex _mutex;
+
+ stdx::condition_variable _condition;
+
+ // _active is true when there are scheduled tasks in the task queue or
+ // when a task is being run by the task runner.
+ bool _active;
+
+ bool _cancelRequested;
+
+ // FIFO queue of scheduled tasks
+ std::list<Task> _tasks;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/task_runner_test.cpp b/src/mongo/db/repl/task_runner_test.cpp
index c1e4d10b731..3c6f9d29f45 100644
--- a/src/mongo/db/repl/task_runner_test.cpp
+++ b/src/mongo/db/repl/task_runner_test.cpp
@@ -40,312 +40,311 @@
namespace {
- using namespace mongo;
- using namespace mongo::repl;
-
- using Task = TaskRunner::Task;
-
- TEST_F(TaskRunnerTest, InvalidConstruction) {
- // Null thread pool.
- ASSERT_THROWS_CODE(TaskRunner(nullptr, []() -> OperationContext* { return nullptr; }),
- UserException,
- ErrorCodes::BadValue);
-
- // Null function for creating operation contexts.
- ASSERT_THROWS_CODE(TaskRunner(&getThreadPool(), TaskRunner::CreateOperationContextFn()),
- UserException,
- ErrorCodes::BadValue);
- }
-
- TEST_F(TaskRunnerTest, GetDiagnosticString) {
- ASSERT_FALSE(getTaskRunner().getDiagnosticString().empty());
- }
-
- TEST_F(TaskRunnerTest, CallbackValues) {
- stdx::mutex mutex;
- bool called = false;
- OperationContext* txn = nullptr;
- Status status = getDetectableErrorStatus();
- auto task = [&](OperationContext* theTxn, const Status& theStatus) {
- stdx::lock_guard<stdx::mutex> lk(mutex);
- called = true;
- txn = theTxn;
- status = theStatus;
- return TaskRunner::NextAction::kCancel;
- };
- getTaskRunner().schedule(task);
- getThreadPool().join();
- ASSERT_FALSE(getTaskRunner().isActive());
-
+using namespace mongo;
+using namespace mongo::repl;
+
+using Task = TaskRunner::Task;
+
+TEST_F(TaskRunnerTest, InvalidConstruction) {
+ // Null thread pool.
+ ASSERT_THROWS_CODE(TaskRunner(nullptr, []() -> OperationContext* { return nullptr; }),
+ UserException,
+ ErrorCodes::BadValue);
+
+ // Null function for creating operation contexts.
+ ASSERT_THROWS_CODE(TaskRunner(&getThreadPool(), TaskRunner::CreateOperationContextFn()),
+ UserException,
+ ErrorCodes::BadValue);
+}
+
+TEST_F(TaskRunnerTest, GetDiagnosticString) {
+ ASSERT_FALSE(getTaskRunner().getDiagnosticString().empty());
+}
+
+TEST_F(TaskRunnerTest, CallbackValues) {
+ stdx::mutex mutex;
+ bool called = false;
+ OperationContext* txn = nullptr;
+ Status status = getDetectableErrorStatus();
+ auto task = [&](OperationContext* theTxn, const Status& theStatus) {
stdx::lock_guard<stdx::mutex> lk(mutex);
- ASSERT_TRUE(called);
- ASSERT(txn);
- ASSERT_OK(status);
- }
-
- TEST_F(TaskRunnerTest, OperationContextFactoryReturnsNull) {
- resetTaskRunner(new TaskRunner(&getThreadPool(), []() -> OperationContext* {
- return nullptr;
- }));
- stdx::mutex mutex;
- bool called = false;
- OperationContextNoop opCtxNoop;
- OperationContext* txn = &opCtxNoop;
- Status status = getDetectableErrorStatus();
- auto task = [&](OperationContext* theTxn, const Status& theStatus) {
- stdx::lock_guard<stdx::mutex> lk(mutex);
- called = true;
- txn = theTxn;
- status = theStatus;
- return TaskRunner::NextAction::kCancel;
- };
- getTaskRunner().schedule(task);
- getThreadPool().join();
- ASSERT_FALSE(getTaskRunner().isActive());
-
+ called = true;
+ txn = theTxn;
+ status = theStatus;
+ return TaskRunner::NextAction::kCancel;
+ };
+ getTaskRunner().schedule(task);
+ getThreadPool().join();
+ ASSERT_FALSE(getTaskRunner().isActive());
+
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ ASSERT_TRUE(called);
+ ASSERT(txn);
+ ASSERT_OK(status);
+}
+
+TEST_F(TaskRunnerTest, OperationContextFactoryReturnsNull) {
+ resetTaskRunner(
+ new TaskRunner(&getThreadPool(), []() -> OperationContext* { return nullptr; }));
+ stdx::mutex mutex;
+ bool called = false;
+ OperationContextNoop opCtxNoop;
+ OperationContext* txn = &opCtxNoop;
+ Status status = getDetectableErrorStatus();
+ auto task = [&](OperationContext* theTxn, const Status& theStatus) {
stdx::lock_guard<stdx::mutex> lk(mutex);
- ASSERT_TRUE(called);
- ASSERT_FALSE(txn);
- ASSERT_OK(status);
- }
-
- std::vector<int> _testRunTaskTwice(TaskRunnerTest& test,
- TaskRunner::NextAction nextAction,
- stdx::function<void(const Task& task)> schedule) {
- unittest::Barrier barrier(2U);
- stdx::mutex mutex;
- int i = 0;
- OperationContext* txn[2] = {nullptr, nullptr};
- int txnId[2] = {-100, -100};
- auto task = [&](OperationContext* theTxn, const Status& theStatus) {
- stdx::lock_guard<stdx::mutex> lk(mutex);
- int j = i++;
- if (j >= 2) {
- return TaskRunner::NextAction::kInvalid;
- }
- txn[j] = theTxn;
- txnId[j] = TaskRunnerTest::getOperationContextId(txn[j]);
- TaskRunner::NextAction result = j == 0 ? nextAction : TaskRunner::NextAction::kCancel;
- barrier.countDownAndWait();
- return result;
- };
-
- schedule(task);
- ASSERT_TRUE(test.getTaskRunner().isActive());
- barrier.countDownAndWait();
-
- schedule(task);
- ASSERT_TRUE(test.getTaskRunner().isActive());
+ called = true;
+ txn = theTxn;
+ status = theStatus;
+ return TaskRunner::NextAction::kCancel;
+ };
+ getTaskRunner().schedule(task);
+ getThreadPool().join();
+ ASSERT_FALSE(getTaskRunner().isActive());
+
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ ASSERT_TRUE(called);
+ ASSERT_FALSE(txn);
+ ASSERT_OK(status);
+}
+
+std::vector<int> _testRunTaskTwice(TaskRunnerTest& test,
+ TaskRunner::NextAction nextAction,
+ stdx::function<void(const Task& task)> schedule) {
+ unittest::Barrier barrier(2U);
+ stdx::mutex mutex;
+ int i = 0;
+ OperationContext* txn[2] = {nullptr, nullptr};
+ int txnId[2] = {-100, -100};
+ auto task = [&](OperationContext* theTxn, const Status& theStatus) {
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ int j = i++;
+ if (j >= 2) {
+ return TaskRunner::NextAction::kInvalid;
+ }
+ txn[j] = theTxn;
+ txnId[j] = TaskRunnerTest::getOperationContextId(txn[j]);
+ TaskRunner::NextAction result = j == 0 ? nextAction : TaskRunner::NextAction::kCancel;
barrier.countDownAndWait();
+ return result;
+ };
+
+ schedule(task);
+ ASSERT_TRUE(test.getTaskRunner().isActive());
+ barrier.countDownAndWait();
+
+ schedule(task);
+ ASSERT_TRUE(test.getTaskRunner().isActive());
+ barrier.countDownAndWait();
+
+ test.getThreadPool().join();
+ ASSERT_FALSE(test.getTaskRunner().isActive());
+
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ ASSERT_EQUALS(2, i);
+ ASSERT(txn[0]);
+ ASSERT(txn[1]);
+ ASSERT_NOT_LESS_THAN(txnId[0], 0);
+ ASSERT_NOT_LESS_THAN(txnId[1], 0);
+ return {txnId[0], txnId[1]};
+}
+
+std::vector<int> _testRunTaskTwice(TaskRunnerTest& test, TaskRunner::NextAction nextAction) {
+ auto schedule = [&](const Task& task) { test.getTaskRunner().schedule(task); };
+ return _testRunTaskTwice(test, nextAction, schedule);
+}
+
+TEST_F(TaskRunnerTest, RunTaskTwiceDisposeOperationContext) {
+ std::vector<int> txnId =
+ _testRunTaskTwice(*this, TaskRunner::NextAction::kDisposeOperationContext);
+ ASSERT_NOT_EQUALS(txnId[0], txnId[1]);
+}
+
+// Joining thread pool before scheduling first task has no effect.
+// Joining thread pool before scheduling second task ensures that task runner releases
+// thread back to pool after disposing of operation context.
+TEST_F(TaskRunnerTest, RunTaskTwiceDisposeOperationContextJoinThreadPoolBeforeScheduling) {
+ auto schedule = [this](const Task& task) {
+ getThreadPool().join();
+ getTaskRunner().schedule(task);
+ };
+ std::vector<int> txnId =
+ _testRunTaskTwice(*this, TaskRunner::NextAction::kDisposeOperationContext, schedule);
+ ASSERT_NOT_EQUALS(txnId[0], txnId[1]);
+}
+
+TEST_F(TaskRunnerTest, RunTaskTwiceKeepOperationContext) {
+ std::vector<int> txnId =
+ _testRunTaskTwice(*this, TaskRunner::NextAction::kKeepOperationContext);
+ ASSERT_EQUALS(txnId[0], txnId[1]);
+}
+
+TEST_F(TaskRunnerTest, SkipSecondTask) {
+ stdx::mutex mutex;
+ int i = 0;
+ OperationContext* txn[2] = {nullptr, nullptr};
+ Status status[2] = {getDetectableErrorStatus(), getDetectableErrorStatus()};
+ stdx::condition_variable condition;
+ bool schedulingDone = false;
+ auto task = [&](OperationContext* theTxn, const Status& theStatus) {
+ stdx::unique_lock<stdx::mutex> lk(mutex);
+ int j = i++;
+ if (j >= 2) {
+ return TaskRunner::NextAction::kCancel;
+ }
+ txn[j] = theTxn;
+ status[j] = theStatus;
- test.getThreadPool().join();
- ASSERT_FALSE(test.getTaskRunner().isActive());
+ // Wait for the test code to schedule the second task.
+ while (!schedulingDone) {
+ condition.wait(lk);
+ }
+ return TaskRunner::NextAction::kCancel;
+ };
+ getTaskRunner().schedule(task);
+ ASSERT_TRUE(getTaskRunner().isActive());
+ getTaskRunner().schedule(task);
+ {
stdx::lock_guard<stdx::mutex> lk(mutex);
- ASSERT_EQUALS(2, i);
- ASSERT(txn[0]);
- ASSERT(txn[1]);
- ASSERT_NOT_LESS_THAN(txnId[0], 0);
- ASSERT_NOT_LESS_THAN(txnId[1], 0);
- return {txnId[0], txnId[1]};
+ schedulingDone = true;
+ condition.notify_all();
}
-
- std::vector<int> _testRunTaskTwice(TaskRunnerTest& test, TaskRunner::NextAction nextAction) {
- auto schedule = [&](const Task& task) { test.getTaskRunner().schedule(task); };
- return _testRunTaskTwice(test, nextAction, schedule);
- }
-
- TEST_F(TaskRunnerTest, RunTaskTwiceDisposeOperationContext) {
- std::vector<int> txnId =
- _testRunTaskTwice(*this, TaskRunner::NextAction::kDisposeOperationContext);
- ASSERT_NOT_EQUALS(txnId[0], txnId[1]);
- }
-
- // Joining thread pool before scheduling first task has no effect.
- // Joining thread pool before scheduling second task ensures that task runner releases
- // thread back to pool after disposing of operation context.
- TEST_F(TaskRunnerTest, RunTaskTwiceDisposeOperationContextJoinThreadPoolBeforeScheduling) {
- auto schedule = [this](const Task& task) {
- getThreadPool().join();
- getTaskRunner().schedule(task);
- };
- std::vector<int> txnId =
- _testRunTaskTwice(*this, TaskRunner::NextAction::kDisposeOperationContext, schedule);
- ASSERT_NOT_EQUALS(txnId[0], txnId[1]);
- }
-
- TEST_F(TaskRunnerTest, RunTaskTwiceKeepOperationContext) {
- std::vector<int> txnId =
- _testRunTaskTwice(*this, TaskRunner::NextAction::kKeepOperationContext);
- ASSERT_EQUALS(txnId[0], txnId[1]);
- }
-
- TEST_F(TaskRunnerTest, SkipSecondTask) {
- stdx::mutex mutex;
- int i = 0;
- OperationContext* txn[2] = {nullptr, nullptr};
- Status status[2] = {getDetectableErrorStatus(), getDetectableErrorStatus()};
- stdx::condition_variable condition;
- bool schedulingDone = false;
- auto task = [&](OperationContext* theTxn, const Status& theStatus) {
- stdx::unique_lock<stdx::mutex> lk(mutex);
- int j = i++;
- if (j >= 2) {
- return TaskRunner::NextAction::kCancel;
- }
- txn[j] = theTxn;
- status[j] = theStatus;
-
- // Wait for the test code to schedule the second task.
- while (!schedulingDone) {
- condition.wait(lk);
- }
-
+ getThreadPool().join();
+ ASSERT_FALSE(getTaskRunner().isActive());
+
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ ASSERT_EQUALS(2, i);
+ ASSERT(txn[0]);
+ ASSERT_OK(status[0]);
+ ASSERT_FALSE(txn[1]);
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, status[1].code());
+}
+
+TEST_F(TaskRunnerTest, FirstTaskThrowsException) {
+ stdx::mutex mutex;
+ int i = 0;
+ OperationContext* txn[2] = {nullptr, nullptr};
+ Status status[2] = {getDetectableErrorStatus(), getDetectableErrorStatus()};
+ stdx::condition_variable condition;
+ bool schedulingDone = false;
+ auto task = [&](OperationContext* theTxn, const Status& theStatus) {
+ stdx::unique_lock<stdx::mutex> lk(mutex);
+ int j = i++;
+ if (j >= 2) {
return TaskRunner::NextAction::kCancel;
- };
- getTaskRunner().schedule(task);
- ASSERT_TRUE(getTaskRunner().isActive());
- getTaskRunner().schedule(task);
- {
- stdx::lock_guard<stdx::mutex> lk(mutex);
- schedulingDone = true;
- condition.notify_all();
}
- getThreadPool().join();
- ASSERT_FALSE(getTaskRunner().isActive());
+ txn[j] = theTxn;
+ status[j] = theStatus;
- stdx::lock_guard<stdx::mutex> lk(mutex);
- ASSERT_EQUALS(2, i);
- ASSERT(txn[0]);
- ASSERT_OK(status[0]);
- ASSERT_FALSE(txn[1]);
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, status[1].code());
- }
-
- TEST_F(TaskRunnerTest, FirstTaskThrowsException) {
- stdx::mutex mutex;
- int i = 0;
- OperationContext* txn[2] = {nullptr, nullptr};
- Status status[2] = {getDetectableErrorStatus(), getDetectableErrorStatus()};
- stdx::condition_variable condition;
- bool schedulingDone = false;
- auto task = [&](OperationContext* theTxn, const Status& theStatus) {
- stdx::unique_lock<stdx::mutex> lk(mutex);
- int j = i++;
- if (j >= 2) {
- return TaskRunner::NextAction::kCancel;
- }
- txn[j] = theTxn;
- status[j] = theStatus;
-
- // Wait for the test code to schedule the second task.
- while (!schedulingDone) {
- condition.wait(lk);
- }
-
- // Throwing an exception from the first task should cancel
- // unscheduled tasks and make the task runner inactive.
- // When the second (canceled) task throws an exception, it should be ignored.
- uassert(ErrorCodes::OperationFailed, "task failure", false);
-
- // not reached.
- invariant(false);
- return TaskRunner::NextAction::kKeepOperationContext;
- };
- getTaskRunner().schedule(task);
- ASSERT_TRUE(getTaskRunner().isActive());
- getTaskRunner().schedule(task);
- {
- stdx::lock_guard<stdx::mutex> lk(mutex);
- schedulingDone = true;
- condition.notify_all();
+ // Wait for the test code to schedule the second task.
+ while (!schedulingDone) {
+ condition.wait(lk);
}
- getThreadPool().join();
- ASSERT_FALSE(getTaskRunner().isActive());
+ // Throwing an exception from the first task should cancel
+ // unscheduled tasks and make the task runner inactive.
+ // When the second (canceled) task throws an exception, it should be ignored.
+ uassert(ErrorCodes::OperationFailed, "task failure", false);
+
+ // not reached.
+ invariant(false);
+ return TaskRunner::NextAction::kKeepOperationContext;
+ };
+ getTaskRunner().schedule(task);
+ ASSERT_TRUE(getTaskRunner().isActive());
+ getTaskRunner().schedule(task);
+ {
stdx::lock_guard<stdx::mutex> lk(mutex);
- ASSERT_EQUALS(2, i);
- ASSERT(txn[0]);
- ASSERT_OK(status[0]);
- ASSERT_FALSE(txn[1]);
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, status[1].code());
+ schedulingDone = true;
+ condition.notify_all();
}
-
- TEST_F(TaskRunnerTest, Cancel) {
- stdx::mutex mutex;
- stdx::condition_variable condition;
- Status status = getDetectableErrorStatus();
- bool taskRunning = false;
-
- // Running this task causes the task runner to wait for another task that
- // is never scheduled.
- auto task = [&](OperationContext* theTxn, const Status& theStatus) {
- stdx::lock_guard<stdx::mutex> lk(mutex);
- status = theStatus;
- taskRunning = true;
- condition.notify_all();
- return TaskRunner::NextAction::kKeepOperationContext;
- };
-
- // Calling cancel() before schedule() has no effect.
- // The task should still be invoked with a successful status.
- getTaskRunner().cancel();
-
- getTaskRunner().schedule(task);
- ASSERT_TRUE(getTaskRunner().isActive());
- {
- stdx::unique_lock<stdx::mutex> lk(mutex);
- while (!taskRunning) {
- condition.wait(lk);
- }
+ getThreadPool().join();
+ ASSERT_FALSE(getTaskRunner().isActive());
+
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ ASSERT_EQUALS(2, i);
+ ASSERT(txn[0]);
+ ASSERT_OK(status[0]);
+ ASSERT_FALSE(txn[1]);
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, status[1].code());
+}
+
+TEST_F(TaskRunnerTest, Cancel) {
+ stdx::mutex mutex;
+ stdx::condition_variable condition;
+ Status status = getDetectableErrorStatus();
+ bool taskRunning = false;
+
+ // Running this task causes the task runner to wait for another task that
+ // is never scheduled.
+ auto task = [&](OperationContext* theTxn, const Status& theStatus) {
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ status = theStatus;
+ taskRunning = true;
+ condition.notify_all();
+ return TaskRunner::NextAction::kKeepOperationContext;
+ };
+
+ // Calling cancel() before schedule() has no effect.
+ // The task should still be invoked with a successful status.
+ getTaskRunner().cancel();
+
+ getTaskRunner().schedule(task);
+ ASSERT_TRUE(getTaskRunner().isActive());
+ {
+ stdx::unique_lock<stdx::mutex> lk(mutex);
+ while (!taskRunning) {
+ condition.wait(lk);
}
+ }
- // It is fine to call cancel() multiple times.
- getTaskRunner().cancel();
- getTaskRunner().cancel();
+ // It is fine to call cancel() multiple times.
+ getTaskRunner().cancel();
+ getTaskRunner().cancel();
- getThreadPool().join();
- ASSERT_FALSE(getTaskRunner().isActive());
+ getThreadPool().join();
+ ASSERT_FALSE(getTaskRunner().isActive());
- // This status will not be OK if canceling the task runner
- // before scheduling the task results in the task being canceled.
- stdx::lock_guard<stdx::mutex> lk(mutex);
- ASSERT_OK(status);
- }
+ // This status will not be OK if canceling the task runner
+ // before scheduling the task results in the task being canceled.
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ ASSERT_OK(status);
+}
- TEST_F(TaskRunnerTest, DestroyShouldWaitForTasksToComplete) {
- stdx::mutex mutex;
- stdx::condition_variable condition;
- Status status = getDetectableErrorStatus();
- bool taskRunning = false;
-
- // Running this task causes the task runner to wait for another task that
- // is never scheduled.
- auto task = [&](OperationContext* theTxn, const Status& theStatus) {
- stdx::lock_guard<stdx::mutex> lk(mutex);
- status = theStatus;
- taskRunning = true;
- condition.notify_all();
- return TaskRunner::NextAction::kKeepOperationContext;
- };
+TEST_F(TaskRunnerTest, DestroyShouldWaitForTasksToComplete) {
+ stdx::mutex mutex;
+ stdx::condition_variable condition;
+ Status status = getDetectableErrorStatus();
+ bool taskRunning = false;
- getTaskRunner().schedule(task);
- ASSERT_TRUE(getTaskRunner().isActive());
- {
- stdx::unique_lock<stdx::mutex> lk(mutex);
- while (!taskRunning) {
- condition.wait(lk);
- }
+ // Running this task causes the task runner to wait for another task that
+ // is never scheduled.
+ auto task = [&](OperationContext* theTxn, const Status& theStatus) {
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ status = theStatus;
+ taskRunning = true;
+ condition.notify_all();
+ return TaskRunner::NextAction::kKeepOperationContext;
+ };
+
+ getTaskRunner().schedule(task);
+ ASSERT_TRUE(getTaskRunner().isActive());
+ {
+ stdx::unique_lock<stdx::mutex> lk(mutex);
+ while (!taskRunning) {
+ condition.wait(lk);
}
+ }
- destroyTaskRunner();
+ destroyTaskRunner();
- getThreadPool().join();
+ getThreadPool().join();
- // This status will not be OK if canceling the task runner
- // before scheduling the task results in the task being canceled.
- stdx::lock_guard<stdx::mutex> lk(mutex);
- ASSERT_OK(status);
- }
+ // This status will not be OK if canceling the task runner
+ // before scheduling the task results in the task being canceled.
+ stdx::lock_guard<stdx::mutex> lk(mutex);
+ ASSERT_OK(status);
+}
-} // namespace
+} // namespace
diff --git a/src/mongo/db/repl/task_runner_test_fixture.cpp b/src/mongo/db/repl/task_runner_test_fixture.cpp
index d595fc4b9f3..ede8abb989f 100644
--- a/src/mongo/db/repl/task_runner_test_fixture.cpp
+++ b/src/mongo/db/repl/task_runner_test_fixture.cpp
@@ -39,69 +39,76 @@
namespace mongo {
namespace repl {
- using namespace mongo;
- using namespace mongo::repl;
+using namespace mongo;
+using namespace mongo::repl;
namespace {
- const int kNumThreads = 3;
+const int kNumThreads = 3;
- AtomicInt32 _nextId;
+AtomicInt32 _nextId;
- class TaskRunnerOperationContext : public OperationContextNoop {
- public:
- TaskRunnerOperationContext() : _id(_nextId.fetchAndAdd(1)) { }
- int getId() const { return _id; }
- private:
- int _id;
- };
-
-
-} // namespace
-
- Status TaskRunnerTest::getDetectableErrorStatus() {
- return Status(ErrorCodes::InternalError, "Not mutated");
+class TaskRunnerOperationContext : public OperationContextNoop {
+public:
+ TaskRunnerOperationContext() : _id(_nextId.fetchAndAdd(1)) {}
+ int getId() const {
+ return _id;
}
- int TaskRunnerTest::getOperationContextId(OperationContext* txn) {
- if (!txn) { return -1; }
- TaskRunnerOperationContext* taskRunnerTxn = dynamic_cast<TaskRunnerOperationContext*>(txn);
- if (!taskRunnerTxn) { return -2; }
- return taskRunnerTxn->getId();
- }
+private:
+ int _id;
+};
- OperationContext* TaskRunnerTest::createOperationContext() const {
- return new TaskRunnerOperationContext();
- }
- TaskRunner& TaskRunnerTest::getTaskRunner() const {
- ASSERT(_taskRunner.get());
- return *_taskRunner;
- }
+} // namespace
- OldThreadPool& TaskRunnerTest::getThreadPool() const {
- ASSERT(_threadPool.get());
- return *_threadPool;
- }
+Status TaskRunnerTest::getDetectableErrorStatus() {
+ return Status(ErrorCodes::InternalError, "Not mutated");
+}
- void TaskRunnerTest::resetTaskRunner(TaskRunner* taskRunner) {
- _taskRunner.reset(taskRunner);
+int TaskRunnerTest::getOperationContextId(OperationContext* txn) {
+ if (!txn) {
+ return -1;
}
-
- void TaskRunnerTest::destroyTaskRunner() {
- _taskRunner.reset();
- }
-
- void TaskRunnerTest::setUp() {
- _threadPool.reset(new OldThreadPool(kNumThreads, "TaskRunnerTest-"));
- resetTaskRunner(new TaskRunner(_threadPool.get(),
- stdx::bind(&TaskRunnerTest::createOperationContext, this)));
+ TaskRunnerOperationContext* taskRunnerTxn = dynamic_cast<TaskRunnerOperationContext*>(txn);
+ if (!taskRunnerTxn) {
+ return -2;
}
-
- void TaskRunnerTest::tearDown() {
- destroyTaskRunner();
- _threadPool.reset();
- }
-
-} // namespace repl
-} // namespace mongo
+ return taskRunnerTxn->getId();
+}
+
+OperationContext* TaskRunnerTest::createOperationContext() const {
+ return new TaskRunnerOperationContext();
+}
+
+TaskRunner& TaskRunnerTest::getTaskRunner() const {
+ ASSERT(_taskRunner.get());
+ return *_taskRunner;
+}
+
+OldThreadPool& TaskRunnerTest::getThreadPool() const {
+ ASSERT(_threadPool.get());
+ return *_threadPool;
+}
+
+void TaskRunnerTest::resetTaskRunner(TaskRunner* taskRunner) {
+ _taskRunner.reset(taskRunner);
+}
+
+void TaskRunnerTest::destroyTaskRunner() {
+ _taskRunner.reset();
+}
+
+void TaskRunnerTest::setUp() {
+ _threadPool.reset(new OldThreadPool(kNumThreads, "TaskRunnerTest-"));
+ resetTaskRunner(new TaskRunner(_threadPool.get(),
+ stdx::bind(&TaskRunnerTest::createOperationContext, this)));
+}
+
+void TaskRunnerTest::tearDown() {
+ destroyTaskRunner();
+ _threadPool.reset();
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/task_runner_test_fixture.h b/src/mongo/db/repl/task_runner_test_fixture.h
index 70b25923498..dcbc13bbc07 100644
--- a/src/mongo/db/repl/task_runner_test_fixture.h
+++ b/src/mongo/db/repl/task_runner_test_fixture.h
@@ -35,48 +35,47 @@
namespace mongo {
- class OldThreadPool;
- class OperationContext;
+class OldThreadPool;
+class OperationContext;
namespace repl {
- class TaskRunner;
+class TaskRunner;
+
+/**
+ * Test fixture for tests that require a TaskRunner and/or
+ * ThreadPool.
+ */
+class TaskRunnerTest : public unittest::Test {
+public:
+ static Status getDetectableErrorStatus();
/**
- * Test fixture for tests that require a TaskRunner and/or
- * ThreadPool.
+ * Returns ID of mock operation context returned from createOperationContext().
+ * Returns -1 if txn is null.
+ * Returns -2 if txn cannot be converted to a mock operation context containing an ID.
*/
- class TaskRunnerTest : public unittest::Test {
- public:
- static Status getDetectableErrorStatus();
-
- /**
- * Returns ID of mock operation context returned from createOperationContext().
- * Returns -1 if txn is null.
- * Returns -2 if txn cannot be converted to a mock operation context containing an ID.
- */
- static int getOperationContextId(OperationContext* txn);
+ static int getOperationContextId(OperationContext* txn);
- /**
- * Returns an noop operation context with an embedded numerical ID.
- */
- virtual OperationContext* createOperationContext() const;
-
- OldThreadPool& getThreadPool() const;
- TaskRunner& getTaskRunner() const;
+ /**
+ * Returns an noop operation context with an embedded numerical ID.
+ */
+ virtual OperationContext* createOperationContext() const;
- void resetTaskRunner(TaskRunner* taskRunner);
- void destroyTaskRunner();
+ OldThreadPool& getThreadPool() const;
+ TaskRunner& getTaskRunner() const;
- protected:
+ void resetTaskRunner(TaskRunner* taskRunner);
+ void destroyTaskRunner();
- void setUp() override;
- void tearDown() override;
+protected:
+ void setUp() override;
+ void tearDown() override;
- private:
- std::unique_ptr<OldThreadPool> _threadPool;
- std::unique_ptr<TaskRunner> _taskRunner;
- };
+private:
+ std::unique_ptr<OldThreadPool> _threadPool;
+ std::unique_ptr<TaskRunner> _taskRunner;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index 99738a38421..7ca7ba6aa84 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -39,30 +39,30 @@
namespace mongo {
namespace repl {
namespace {
- static const int kLeaderValue = 0;
- static const int kFollowerValue = 1;
- static const int kCandidateValue = 2;
+static const int kLeaderValue = 0;
+static const int kFollowerValue = 1;
+static const int kCandidateValue = 2;
} // namespace
- const TopologyCoordinator::Role TopologyCoordinator::Role::leader(kLeaderValue);
- const TopologyCoordinator::Role TopologyCoordinator::Role::follower(kFollowerValue);
- const TopologyCoordinator::Role TopologyCoordinator::Role::candidate(kCandidateValue);
+const TopologyCoordinator::Role TopologyCoordinator::Role::leader(kLeaderValue);
+const TopologyCoordinator::Role TopologyCoordinator::Role::follower(kFollowerValue);
+const TopologyCoordinator::Role TopologyCoordinator::Role::candidate(kCandidateValue);
- TopologyCoordinator::Role::Role(int value) : _value(value) {}
+TopologyCoordinator::Role::Role(int value) : _value(value) {}
- std::string TopologyCoordinator::Role::toString() const {
- switch(_value) {
+std::string TopologyCoordinator::Role::toString() const {
+ switch (_value) {
case kLeaderValue:
return "leader";
case kFollowerValue:
return "follower";
case kCandidateValue:
return "candidate";
- }
- invariant(false);
}
+ invariant(false);
+}
- TopologyCoordinator::~TopologyCoordinator() {}
+TopologyCoordinator::~TopologyCoordinator() {}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index 3af054b3010..0dbc81baac5 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -40,444 +40,444 @@
namespace mongo {
- class Timestamp;
+class Timestamp;
namespace repl {
- class HeartbeatResponseAction;
- class OpTime;
- class ReplSetHeartbeatArgs;
- class ReplicaSetConfig;
- class TagSubgroup;
- class LastVote;
- struct MemberState;
+class HeartbeatResponseAction;
+class OpTime;
+class ReplSetHeartbeatArgs;
+class ReplicaSetConfig;
+class TagSubgroup;
+class LastVote;
+struct MemberState;
+
+/**
+ * Replication Topology Coordinator interface.
+ *
+ * This object is responsible for managing the topology of the cluster.
+ * Tasks include consensus and leader election, chaining, and configuration management.
+ * Methods of this class should be non-blocking.
+ */
+class TopologyCoordinator {
+ MONGO_DISALLOW_COPYING(TopologyCoordinator);
+
+public:
+ class Role;
+
+ virtual ~TopologyCoordinator();
+
+ ////////////////////////////////////////////////////////////
+ //
+ // State inspection methods.
+ //
+ ////////////////////////////////////////////////////////////
+
+ /**
+ * Gets the role of this member in the replication protocol.
+ */
+ virtual Role getRole() const = 0;
+
+ /**
+ * Gets the MemberState of this member in the replica set.
+ */
+ virtual MemberState getMemberState() const = 0;
+
+ /**
+ * Returns the address of the current sync source, or an empty HostAndPort if there is no
+ * current sync source.
+ */
+ virtual HostAndPort getSyncSourceAddress() const = 0;
+
+ /**
+ * Retrieves a vector of HostAndPorts containing all nodes that are neither DOWN nor
+ * ourself.
+ */
+ virtual std::vector<HostAndPort> getMaybeUpHostAndPorts() const = 0;
/**
- * Replication Topology Coordinator interface.
+ * Gets the earliest time the current node will stand for election.
+ */
+ virtual Date_t getStepDownTime() const = 0;
+
+ /**
+ * Gets the current value of the maintenance mode counter.
+ */
+ virtual int getMaintenanceCount() const = 0;
+
+ /**
+ * Gets the latest term this member is aware of. If this member is the primary,
+ * it's the current term of the replica set.
+ */
+ virtual long long getTerm() const = 0;
+
+ /**
+ * Sets the latest term this member is aware of to the higher of its current value and
+ * the value passed in as "term".
+ * Returns true if the local term value is changed.
+ */
+ virtual bool updateTerm(long long term) = 0;
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Basic state manipulation methods.
+ //
+ ////////////////////////////////////////////////////////////
+
+ /**
+ * Sets the index into the config used when we next choose a sync source
+ */
+ virtual void setForceSyncSourceIndex(int index) = 0;
+
+ /**
+ * Chooses and sets a new sync source, based on our current knowledge of the world.
+ */
+ virtual HostAndPort chooseNewSyncSource(Date_t now, const OpTime& lastOpApplied) = 0;
+
+ /**
+ * Suppresses selecting "host" as sync source until "until".
+ */
+ virtual void blacklistSyncSource(const HostAndPort& host, Date_t until) = 0;
+
+ /**
+ * Removes a single entry "host" from the list of potential sync sources which we
+ * have blacklisted, if it is supposed to be unblacklisted by "now".
+ */
+ virtual void unblacklistSyncSource(const HostAndPort& host, Date_t now) = 0;
+
+ /**
+ * Clears the list of potential sync sources we have blacklisted.
+ */
+ virtual void clearSyncSourceBlacklist() = 0;
+
+ /**
+ * Determines if a new sync source should be chosen, if a better candidate sync source is
+ * available. If the current sync source's last optime is more than _maxSyncSourceLagSecs
+ * behind any syncable source, this function returns true.
+ *
+ * "now" is used to skip over currently blacklisted sync sources.
+ */
+ virtual bool shouldChangeSyncSource(const HostAndPort& currentSource, Date_t now) const = 0;
+
+ /**
+ * Checks whether we are a single node set and we are not in a stepdown period. If so,
+ * puts us into candidate mode, otherwise does nothing. This is used to ensure that
+ * nodes in a single node replset become primary again when their stepdown period ends.
+ */
+ virtual bool becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(Date_t now) = 0;
+
+ /**
+ * Sets the earliest time the current node will stand for election to "newTime".
+ *
+ * Until this time, while the node may report itself as electable, it will not stand
+ * for election.
+ */
+ virtual void setElectionSleepUntil(Date_t newTime) = 0;
+
+ /**
+ * Sets the reported mode of this node to one of RS_SECONDARY, RS_STARTUP2, RS_ROLLBACK or
+ * RS_RECOVERING, when getRole() == Role::follower. This is the interface by which the
+ * applier changes the reported member state of the current node, and enables or suppresses
+ * electability of the current node. All modes but RS_SECONDARY indicate an unelectable
+ * follower state (one that cannot transition to candidate).
+ */
+ virtual void setFollowerMode(MemberState::MS newMode) = 0;
+
+ /**
+ * Adjusts the maintenance mode count by "inc".
+ *
+ * It is an error to call this method if getRole() does not return Role::follower.
+ * It is an error to allow the maintenance count to go negative.
+ */
+ virtual void adjustMaintenanceCountBy(int inc) = 0;
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Methods that prepare responses to command requests.
+ //
+ ////////////////////////////////////////////////////////////
+
+ // produces a reply to a replSetSyncFrom command
+ virtual void prepareSyncFromResponse(const ReplicationExecutor::CallbackArgs& data,
+ const HostAndPort& target,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) = 0;
+
+ // produce a reply to a replSetFresh command
+ virtual void prepareFreshResponse(const ReplicationCoordinator::ReplSetFreshArgs& args,
+ Date_t now,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) = 0;
+
+ // produce a reply to a received electCmd
+ virtual void prepareElectResponse(const ReplicationCoordinator::ReplSetElectArgs& args,
+ Date_t now,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) = 0;
+
+ // produce a reply to a heartbeat
+ virtual Status prepareHeartbeatResponse(Date_t now,
+ const ReplSetHeartbeatArgs& args,
+ const std::string& ourSetName,
+ const OpTime& lastOpApplied,
+ ReplSetHeartbeatResponse* response) = 0;
+
+ // produce a reply to a V1 heartbeat
+ virtual Status prepareHeartbeatResponseV1(Date_t now,
+ const ReplSetHeartbeatArgsV1& args,
+ const std::string& ourSetName,
+ const OpTime& lastOpApplied,
+ ReplSetHeartbeatResponse* response) = 0;
+
+ // produce a reply to a status request
+ virtual void prepareStatusResponse(const ReplicationExecutor::CallbackArgs& data,
+ Date_t now,
+ unsigned uptime,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) = 0;
+
+ // produce a reply to an ismaster request. It is only valid to call this if we are a
+ // replset.
+ virtual void fillIsMasterForReplSet(IsMasterResponse* response) = 0;
+
+ // produce a reply to a freeze request
+ virtual void prepareFreezeResponse(Date_t now, int secs, BSONObjBuilder* response) = 0;
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Methods for sending and receiving heartbeats,
+ // reconfiguring and handling the results of standing for
+ // election.
+ //
+ ////////////////////////////////////////////////////////////
+
+ /**
+ * Updates the topology coordinator's notion of the replica set configuration.
*
- * This object is responsible for managing the topology of the cluster.
- * Tasks include consensus and leader election, chaining, and configuration management.
- * Methods of this class should be non-blocking.
- */
- class TopologyCoordinator {
- MONGO_DISALLOW_COPYING(TopologyCoordinator);
- public:
- class Role;
-
- virtual ~TopologyCoordinator();
-
- ////////////////////////////////////////////////////////////
- //
- // State inspection methods.
- //
- ////////////////////////////////////////////////////////////
-
- /**
- * Gets the role of this member in the replication protocol.
- */
- virtual Role getRole() const = 0;
-
- /**
- * Gets the MemberState of this member in the replica set.
- */
- virtual MemberState getMemberState() const = 0;
-
- /**
- * Returns the address of the current sync source, or an empty HostAndPort if there is no
- * current sync source.
- */
- virtual HostAndPort getSyncSourceAddress() const = 0;
-
- /**
- * Retrieves a vector of HostAndPorts containing all nodes that are neither DOWN nor
- * ourself.
- */
- virtual std::vector<HostAndPort> getMaybeUpHostAndPorts() const = 0;
-
- /**
- * Gets the earliest time the current node will stand for election.
- */
- virtual Date_t getStepDownTime() const = 0;
-
- /**
- * Gets the current value of the maintenance mode counter.
- */
- virtual int getMaintenanceCount() const = 0;
-
- /**
- * Gets the latest term this member is aware of. If this member is the primary,
- * it's the current term of the replica set.
- */
- virtual long long getTerm() const = 0;
-
- /**
- * Sets the latest term this member is aware of to the higher of its current value and
- * the value passed in as "term".
- * Returns true if the local term value is changed.
- */
- virtual bool updateTerm(long long term) = 0;
-
- ////////////////////////////////////////////////////////////
- //
- // Basic state manipulation methods.
- //
- ////////////////////////////////////////////////////////////
-
- /**
- * Sets the index into the config used when we next choose a sync source
- */
- virtual void setForceSyncSourceIndex(int index) = 0;
-
- /**
- * Chooses and sets a new sync source, based on our current knowledge of the world.
- */
- virtual HostAndPort chooseNewSyncSource(Date_t now, const OpTime& lastOpApplied) = 0;
-
- /**
- * Suppresses selecting "host" as sync source until "until".
- */
- virtual void blacklistSyncSource(const HostAndPort& host, Date_t until) = 0;
-
- /**
- * Removes a single entry "host" from the list of potential sync sources which we
- * have blacklisted, if it is supposed to be unblacklisted by "now".
- */
- virtual void unblacklistSyncSource(const HostAndPort& host, Date_t now) = 0;
-
- /**
- * Clears the list of potential sync sources we have blacklisted.
- */
- virtual void clearSyncSourceBlacklist() = 0;
-
- /**
- * Determines if a new sync source should be chosen, if a better candidate sync source is
- * available. If the current sync source's last optime is more than _maxSyncSourceLagSecs
- * behind any syncable source, this function returns true.
- *
- * "now" is used to skip over currently blacklisted sync sources.
- */
- virtual bool shouldChangeSyncSource(const HostAndPort& currentSource, Date_t now) const = 0;
-
- /**
- * Checks whether we are a single node set and we are not in a stepdown period. If so,
- * puts us into candidate mode, otherwise does nothing. This is used to ensure that
- * nodes in a single node replset become primary again when their stepdown period ends.
- */
- virtual bool becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(Date_t now) = 0;
-
- /**
- * Sets the earliest time the current node will stand for election to "newTime".
- *
- * Until this time, while the node may report itself as electable, it will not stand
- * for election.
- */
- virtual void setElectionSleepUntil(Date_t newTime) = 0;
-
- /**
- * Sets the reported mode of this node to one of RS_SECONDARY, RS_STARTUP2, RS_ROLLBACK or
- * RS_RECOVERING, when getRole() == Role::follower. This is the interface by which the
- * applier changes the reported member state of the current node, and enables or suppresses
- * electability of the current node. All modes but RS_SECONDARY indicate an unelectable
- * follower state (one that cannot transition to candidate).
- */
- virtual void setFollowerMode(MemberState::MS newMode) = 0;
-
- /**
- * Adjusts the maintenance mode count by "inc".
- *
- * It is an error to call this method if getRole() does not return Role::follower.
- * It is an error to allow the maintenance count to go negative.
- */
- virtual void adjustMaintenanceCountBy(int inc) = 0;
-
- ////////////////////////////////////////////////////////////
- //
- // Methods that prepare responses to command requests.
- //
- ////////////////////////////////////////////////////////////
-
- // produces a reply to a replSetSyncFrom command
- virtual void prepareSyncFromResponse(const ReplicationExecutor::CallbackArgs& data,
- const HostAndPort& target,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result) = 0;
-
- // produce a reply to a replSetFresh command
- virtual void prepareFreshResponse(const ReplicationCoordinator::ReplSetFreshArgs& args,
- Date_t now,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result) = 0;
-
- // produce a reply to a received electCmd
- virtual void prepareElectResponse(const ReplicationCoordinator::ReplSetElectArgs& args,
- Date_t now,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result) = 0;
-
- // produce a reply to a heartbeat
- virtual Status prepareHeartbeatResponse(Date_t now,
- const ReplSetHeartbeatArgs& args,
- const std::string& ourSetName,
- const OpTime& lastOpApplied,
- ReplSetHeartbeatResponse* response) = 0;
-
- // produce a reply to a V1 heartbeat
- virtual Status prepareHeartbeatResponseV1(Date_t now,
- const ReplSetHeartbeatArgsV1& args,
- const std::string& ourSetName,
- const OpTime& lastOpApplied,
- ReplSetHeartbeatResponse* response) = 0;
-
- // produce a reply to a status request
- virtual void prepareStatusResponse(const ReplicationExecutor::CallbackArgs& data,
- Date_t now,
- unsigned uptime,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result) = 0;
-
- // produce a reply to an ismaster request. It is only valid to call this if we are a
- // replset.
- virtual void fillIsMasterForReplSet(IsMasterResponse* response) = 0;
-
- // produce a reply to a freeze request
- virtual void prepareFreezeResponse(Date_t now, int secs, BSONObjBuilder* response) = 0;
-
- ////////////////////////////////////////////////////////////
- //
- // Methods for sending and receiving heartbeats,
- // reconfiguring and handling the results of standing for
- // election.
- //
- ////////////////////////////////////////////////////////////
-
- /**
- * Updates the topology coordinator's notion of the replica set configuration.
- *
- * "newConfig" is the new configuration, and "selfIndex" is the index of this
- * node's configuration information in "newConfig", or "selfIndex" is -1 to
- * indicate that this node is not a member of "newConfig".
- *
- * newConfig.isInitialized() should be true, though implementations may accept
- * configurations where this is not true, for testing purposes.
- */
- virtual void updateConfig(const ReplicaSetConfig& newConfig,
- int selfIndex,
- Date_t now,
- const OpTime& lastOpApplied) = 0;
-
- /**
- * Prepares a heartbeat request appropriate for sending to "target", assuming the
- * current time is "now". "ourSetName" is used as the name for our replica set if
- * the topology coordinator does not have a valid configuration installed.
- *
- * The returned pair contains proper arguments for a replSetHeartbeat command, and
- * an amount of time to wait for the response.
- *
- * This call should be paired (with intervening network communication) with a call to
- * processHeartbeatResponse for the same "target".
- */
- virtual std::pair<ReplSetHeartbeatArgs, Milliseconds> prepareHeartbeatRequest(
- Date_t now,
- const std::string& ourSetName,
- const HostAndPort& target) = 0;
- virtual std::pair<ReplSetHeartbeatArgsV1, Milliseconds> prepareHeartbeatRequestV1(
- Date_t now,
- const std::string& ourSetName,
- const HostAndPort& target) = 0;
-
- /**
- * Processes a heartbeat response from "target" that arrived around "now", having
- * spent "networkRoundTripTime" millis on the network.
- *
- * Updates internal topology coordinator state, and returns instructions about what action
- * to take next.
- *
- * If the next action indicates StartElection, the topology coordinator has transitioned to
- * the "candidate" role, and will remain there until processWinElection or
- * processLoseElection are called.
- *
- * If the next action indicates "StepDownSelf", the topology coordinator has transitioned
- * to the "follower" role from "leader", and the caller should take any necessary actions
- * to become a follower.
- *
- * If the next action indicates "StepDownRemotePrimary", the caller should take steps to
- * cause the specified remote host to step down from primary to secondary.
- *
- * If the next action indicates "Reconfig", the caller should verify the configuration in
- * hbResponse is acceptable, perform any other reconfiguration actions it must, and call
- * updateConfig with the new configuration and the appropriate value for "selfIndex". It
- * must also wrap up any outstanding elections (by calling processLoseElection or
- * processWinElection) before calling updateConfig.
- *
- * This call should be paired (with intervening network communication) with a call to
- * prepareHeartbeatRequest for the same "target".
- */
- virtual HeartbeatResponseAction processHeartbeatResponse(
- Date_t now,
- Milliseconds networkRoundTripTime,
- const HostAndPort& target,
- const StatusWith<ReplSetHeartbeatResponse>& hbResponse,
- const OpTime& myLastOpApplied) = 0;
-
- /**
- * If getRole() == Role::candidate and this node has not voted too recently, updates the
- * lastVote tracker and returns true. Otherwise, returns false.
- */
- virtual bool voteForMyself(Date_t now) = 0;
-
- /**
- * Increase the term.
- */
- virtual void incrementTerm() = 0;
-
- /**
- * Set lastVote to be for ourself in this term.
- */
- virtual void voteForMyselfV1() = 0;
-
- /**
- * Performs state updates associated with winning an election.
- *
- * It is an error to call this if the topology coordinator is not in candidate mode.
- *
- * Exactly one of either processWinElection or processLoseElection must be called if
- * processHeartbeatResponse returns StartElection, to exit candidate mode.
- */
- virtual void processWinElection(OID electionId, Timestamp electionOpTime) = 0;
-
- /**
- * Performs state updates associated with losing an election.
- *
- * It is an error to call this if the topology coordinator is not in candidate mode.
- *
- * Exactly one of either processWinElection or processLoseElection must be called if
- * processHeartbeatResponse returns StartElection, to exit candidate mode.
- */
- virtual void processLoseElection() = 0;
-
- /**
- * Tries to transition the coordinator from the leader role to the follower role.
- *
- * Fails if "force" is not set and no follower is known to be up. It is illegal
- * to call this method if the node is not leader.
- *
- * Returns whether or not the step down succeeded.
- */
- virtual bool stepDown(Date_t until, bool force, const OpTime& lastOpApplied) = 0;
-
- /**
- * Sometimes a request to step down comes in (like via a heartbeat), but we don't have the
- * global exclusive lock so we can't actually stepdown at that moment. When that happens
- * we record that a stepdown request is pending and schedule work to stepdown in the global
- * lock. This method is called after holding the global lock to perform the actual
- * stepdown, but only if the node hasn't already stepped down another way since the work was
- * scheduled. Returns true if it actually steps down, and false otherwise.
- */
- virtual bool stepDownIfPending() = 0;
-
- /**
- * Considers whether or not this node should stand for election, and returns true
- * if the node has transitioned to candidate role as a result of the call.
- */
- virtual bool checkShouldStandForElection(Date_t now, const OpTime& lastOpApplied) = 0;
-
- /**
- * Set the outgoing heartbeat message from self
- */
- virtual void setMyHeartbeatMessage(const Date_t now, const std::string& s) = 0;
-
- /**
- * Prepares a BSONObj describing the current term, primary, and lastOp information.
- */
- virtual void prepareCursorResponseInfo(BSONObjBuilder* objBuilder,
- const OpTime& lastCommittedOpTime) const = 0;
-
- /**
- * Writes into 'output' all the information needed to generate a summary of the current
- * replication state for use by the web interface.
- */
- virtual void summarizeAsHtml(ReplSetHtmlSummary* output) = 0;
-
- /**
- * Prepares a ReplSetRequestVotesResponse.
- */
- virtual void processReplSetRequestVotes(const ReplSetRequestVotesArgs& args,
- ReplSetRequestVotesResponse* response,
- const OpTime& lastAppliedOpTime) = 0;
-
- /**
- * Determines whether or not the newly elected primary is valid from our perspective.
- * If it is, sets the _currentPrimaryIndex and term to the received values.
- * If it is not, return ErrorCode::BadValue and the current term from our perspective.
- * Populate responseTerm with the current term from our perspective.
- */
- virtual Status processReplSetDeclareElectionWinner(
- const ReplSetDeclareElectionWinnerArgs& args,
- long long* responseTerm) = 0;
-
- /**
- * Loads an initial LastVote document, which was read from local storage.
- *
- * Called only during replication startup. All other updates are done internally.
- */
- virtual void loadLastVote(const LastVote& lastVote) = 0;
-
- /**
- * Returns the most recent term this node is aware of.
- */
- virtual long long getTerm() = 0;
-
- /**
- * Readies the TopologyCoordinator for stepdown.
- */
- virtual void prepareForStepDown() = 0;
-
- protected:
- TopologyCoordinator() {}
- };
-
- /**
- * Type that denotes the role of a node in the replication protocol.
+ * "newConfig" is the new configuration, and "selfIndex" is the index of this
+ * node's configuration information in "newConfig", or "selfIndex" is -1 to
+ * indicate that this node is not a member of "newConfig".
*
- * The role is distinct from MemberState, in that it only deals with the
- * roles a node plays in the basic protocol -- leader, follower and candidate.
- * The mapping between MemberState and Role is complex -- several MemberStates
- * map to the follower role, and MemberState::RS_SECONDARY maps to either
- * follower or candidate roles, e.g.
+ * newConfig.isInitialized() should be true, though implementations may accept
+ * configurations where this is not true, for testing purposes.
*/
- class TopologyCoordinator::Role {
- public:
- /**
- * Constant indicating leader role.
- */
- static const Role leader;
+ virtual void updateConfig(const ReplicaSetConfig& newConfig,
+ int selfIndex,
+ Date_t now,
+ const OpTime& lastOpApplied) = 0;
- /**
- * Constant indicating follower role.
- */
- static const Role follower;
+ /**
+ * Prepares a heartbeat request appropriate for sending to "target", assuming the
+ * current time is "now". "ourSetName" is used as the name for our replica set if
+ * the topology coordinator does not have a valid configuration installed.
+ *
+ * The returned pair contains proper arguments for a replSetHeartbeat command, and
+ * an amount of time to wait for the response.
+ *
+ * This call should be paired (with intervening network communication) with a call to
+ * processHeartbeatResponse for the same "target".
+ */
+ virtual std::pair<ReplSetHeartbeatArgs, Milliseconds> prepareHeartbeatRequest(
+ Date_t now, const std::string& ourSetName, const HostAndPort& target) = 0;
+ virtual std::pair<ReplSetHeartbeatArgsV1, Milliseconds> prepareHeartbeatRequestV1(
+ Date_t now, const std::string& ourSetName, const HostAndPort& target) = 0;
- /**
- * Constant indicating candidate role
- */
- static const Role candidate;
+ /**
+ * Processes a heartbeat response from "target" that arrived around "now", having
+ * spent "networkRoundTripTime" millis on the network.
+ *
+ * Updates internal topology coordinator state, and returns instructions about what action
+ * to take next.
+ *
+ * If the next action indicates StartElection, the topology coordinator has transitioned to
+ * the "candidate" role, and will remain there until processWinElection or
+ * processLoseElection are called.
+ *
+ * If the next action indicates "StepDownSelf", the topology coordinator has transitioned
+ * to the "follower" role from "leader", and the caller should take any necessary actions
+ * to become a follower.
+ *
+ * If the next action indicates "StepDownRemotePrimary", the caller should take steps to
+ * cause the specified remote host to step down from primary to secondary.
+ *
+ * If the next action indicates "Reconfig", the caller should verify the configuration in
+ * hbResponse is acceptable, perform any other reconfiguration actions it must, and call
+ * updateConfig with the new configuration and the appropriate value for "selfIndex". It
+ * must also wrap up any outstanding elections (by calling processLoseElection or
+ * processWinElection) before calling updateConfig.
+ *
+ * This call should be paired (with intervening network communication) with a call to
+ * prepareHeartbeatRequest for the same "target".
+ */
+ virtual HeartbeatResponseAction processHeartbeatResponse(
+ Date_t now,
+ Milliseconds networkRoundTripTime,
+ const HostAndPort& target,
+ const StatusWith<ReplSetHeartbeatResponse>& hbResponse,
+ const OpTime& myLastOpApplied) = 0;
+
+ /**
+ * If getRole() == Role::candidate and this node has not voted too recently, updates the
+ * lastVote tracker and returns true. Otherwise, returns false.
+ */
+ virtual bool voteForMyself(Date_t now) = 0;
+
+ /**
+ * Increase the term.
+ */
+ virtual void incrementTerm() = 0;
+
+ /**
+ * Set lastVote to be for ourself in this term.
+ */
+ virtual void voteForMyselfV1() = 0;
+
+ /**
+ * Performs state updates associated with winning an election.
+ *
+ * It is an error to call this if the topology coordinator is not in candidate mode.
+ *
+ * Exactly one of either processWinElection or processLoseElection must be called if
+ * processHeartbeatResponse returns StartElection, to exit candidate mode.
+ */
+ virtual void processWinElection(OID electionId, Timestamp electionOpTime) = 0;
+
+ /**
+ * Performs state updates associated with losing an election.
+ *
+ * It is an error to call this if the topology coordinator is not in candidate mode.
+ *
+ * Exactly one of either processWinElection or processLoseElection must be called if
+ * processHeartbeatResponse returns StartElection, to exit candidate mode.
+ */
+ virtual void processLoseElection() = 0;
+
+ /**
+ * Tries to transition the coordinator from the leader role to the follower role.
+ *
+ * Fails if "force" is not set and no follower is known to be up. It is illegal
+ * to call this method if the node is not leader.
+ *
+ * Returns whether or not the step down succeeded.
+ */
+ virtual bool stepDown(Date_t until, bool force, const OpTime& lastOpApplied) = 0;
+
+ /**
+ * Sometimes a request to step down comes in (like via a heartbeat), but we don't have the
+ * global exclusive lock so we can't actually stepdown at that moment. When that happens
+ * we record that a stepdown request is pending and schedule work to stepdown in the global
+ * lock. This method is called after holding the global lock to perform the actual
+ * stepdown, but only if the node hasn't already stepped down another way since the work was
+ * scheduled. Returns true if it actually steps down, and false otherwise.
+ */
+ virtual bool stepDownIfPending() = 0;
+
+ /**
+ * Considers whether or not this node should stand for election, and returns true
+ * if the node has transitioned to candidate role as a result of the call.
+ */
+ virtual bool checkShouldStandForElection(Date_t now, const OpTime& lastOpApplied) = 0;
+
+ /**
+ * Set the outgoing heartbeat message from self
+ */
+ virtual void setMyHeartbeatMessage(const Date_t now, const std::string& s) = 0;
+
+ /**
+ * Prepares a BSONObj describing the current term, primary, and lastOp information.
+ */
+ virtual void prepareCursorResponseInfo(BSONObjBuilder* objBuilder,
+ const OpTime& lastCommittedOpTime) const = 0;
+
+ /**
+ * Writes into 'output' all the information needed to generate a summary of the current
+ * replication state for use by the web interface.
+ */
+ virtual void summarizeAsHtml(ReplSetHtmlSummary* output) = 0;
+
+ /**
+ * Prepares a ReplSetRequestVotesResponse.
+ */
+ virtual void processReplSetRequestVotes(const ReplSetRequestVotesArgs& args,
+ ReplSetRequestVotesResponse* response,
+ const OpTime& lastAppliedOpTime) = 0;
+
+ /**
+ * Determines whether or not the newly elected primary is valid from our perspective.
+ * If it is, sets the _currentPrimaryIndex and term to the received values.
+ * If it is not, return ErrorCode::BadValue and the current term from our perspective.
+ * Populate responseTerm with the current term from our perspective.
+ */
+ virtual Status processReplSetDeclareElectionWinner(const ReplSetDeclareElectionWinnerArgs& args,
+ long long* responseTerm) = 0;
+
+ /**
+ * Loads an initial LastVote document, which was read from local storage.
+ *
+ * Called only during replication startup. All other updates are done internally.
+ */
+ virtual void loadLastVote(const LastVote& lastVote) = 0;
+
+ /**
+ * Returns the most recent term this node is aware of.
+ */
+ virtual long long getTerm() = 0;
+
+ /**
+ * Readies the TopologyCoordinator for stepdown.
+ */
+ virtual void prepareForStepDown() = 0;
+
+protected:
+ TopologyCoordinator() {}
+};
+
+/**
+ * Type that denotes the role of a node in the replication protocol.
+ *
+ * The role is distinct from MemberState, in that it only deals with the
+ * roles a node plays in the basic protocol -- leader, follower and candidate.
+ * The mapping between MemberState and Role is complex -- several MemberStates
+ * map to the follower role, and MemberState::RS_SECONDARY maps to either
+ * follower or candidate roles, e.g.
+ */
+class TopologyCoordinator::Role {
+public:
+ /**
+ * Constant indicating leader role.
+ */
+ static const Role leader;
+
+ /**
+ * Constant indicating follower role.
+ */
+ static const Role follower;
+
+ /**
+ * Constant indicating candidate role
+ */
+ static const Role candidate;
- Role() {}
+ Role() {}
- bool operator==(Role other) const { return _value == other._value; }
- bool operator!=(Role other) const { return _value != other._value; }
+ bool operator==(Role other) const {
+ return _value == other._value;
+ }
+ bool operator!=(Role other) const {
+ return _value != other._value;
+ }
- std::string toString() const;
+ std::string toString() const;
- private:
- explicit Role(int value);
+private:
+ explicit Role(int value);
- int _value;
- };
+ int _value;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp
index 08d7ac7198c..a2905c1eacb 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl.cpp
@@ -55,1308 +55,1232 @@
namespace mongo {
namespace repl {
- using std::vector;
+using std::vector;
- const Seconds TopologyCoordinatorImpl::VoteLease::leaseTime = Seconds(30);
+const Seconds TopologyCoordinatorImpl::VoteLease::leaseTime = Seconds(30);
namespace {
- template <typename T>
- int indexOfIterator(const std::vector<T>& vec,
- typename std::vector<T>::const_iterator& it) {
- return static_cast<int>(it - vec.begin());
- }
-
- // Interval between the time the last heartbeat from a node was received successfully, or
- // the time when we gave up retrying, and when the next heartbeat should be sent to a target.
- const auto kHeartbeatInterval = Seconds{2};
+template <typename T>
+int indexOfIterator(const std::vector<T>& vec, typename std::vector<T>::const_iterator& it) {
+ return static_cast<int>(it - vec.begin());
+}
- // Maximum number of retries for a failed heartbeat.
- const int kMaxHeartbeatRetries = 2;
+// Interval between the time the last heartbeat from a node was received successfully, or
+// the time when we gave up retrying, and when the next heartbeat should be sent to a target.
+const auto kHeartbeatInterval = Seconds{2};
- /**
- * Returns true if the only up heartbeats are auth errors.
- */
- bool _hasOnlyAuthErrorUpHeartbeats(const std::vector<MemberHeartbeatData>& hbdata,
- const int selfIndex) {
- bool foundAuthError = false;
- for (std::vector<MemberHeartbeatData>::const_iterator it = hbdata.begin();
- it != hbdata.end();
- ++it) {
- if (indexOfIterator(hbdata, it) == selfIndex) {
- continue;
- }
+// Maximum number of retries for a failed heartbeat.
+const int kMaxHeartbeatRetries = 2;
- if (it->up()) {
- return false;
- }
+/**
+ * Returns true if the only up heartbeats are auth errors.
+ */
+bool _hasOnlyAuthErrorUpHeartbeats(const std::vector<MemberHeartbeatData>& hbdata,
+ const int selfIndex) {
+ bool foundAuthError = false;
+ for (std::vector<MemberHeartbeatData>::const_iterator it = hbdata.begin(); it != hbdata.end();
+ ++it) {
+ if (indexOfIterator(hbdata, it) == selfIndex) {
+ continue;
+ }
- if (it->hasAuthIssue()) {
- foundAuthError = true;
- }
+ if (it->up()) {
+ return false;
}
- return foundAuthError;
+ if (it->hasAuthIssue()) {
+ foundAuthError = true;
+ }
}
-} // namespace
+ return foundAuthError;
+}
- PingStats::PingStats() :
- count(0),
- value(std::numeric_limits<unsigned int>::max()),
- _numFailuresSinceLastStart(std::numeric_limits<int>::max()) {
- }
+} // namespace
- void PingStats::start(Date_t now) {
- _lastHeartbeatStartDate = now;
- _numFailuresSinceLastStart = 0;
+PingStats::PingStats()
+ : count(0),
+ value(std::numeric_limits<unsigned int>::max()),
+ _numFailuresSinceLastStart(std::numeric_limits<int>::max()) {}
+
+void PingStats::start(Date_t now) {
+ _lastHeartbeatStartDate = now;
+ _numFailuresSinceLastStart = 0;
+}
+
+void PingStats::hit(int millis) {
+ _numFailuresSinceLastStart = std::numeric_limits<int>::max();
+ ++count;
+ value = value == std::numeric_limits<unsigned int>::max()
+ ? millis
+ : static_cast<unsigned long>((value * .8) + (millis * .2));
+}
+
+void PingStats::miss() {
+ ++_numFailuresSinceLastStart;
+}
+
+TopologyCoordinatorImpl::TopologyCoordinatorImpl(Seconds maxSyncSourceLagSecs)
+ : _role(Role::follower),
+ _term(0),
+ _currentPrimaryIndex(-1),
+ _forceSyncSourceIndex(-1),
+ _maxSyncSourceLagSecs(maxSyncSourceLagSecs),
+ _selfIndex(-1),
+ _stepDownPending(false),
+ _maintenanceModeCalls(0),
+ _followerMode(MemberState::RS_STARTUP2) {
+ invariant(getMemberState() == MemberState::RS_STARTUP);
+}
+
+TopologyCoordinator::Role TopologyCoordinatorImpl::getRole() const {
+ return _role;
+}
+
+void TopologyCoordinatorImpl::setForceSyncSourceIndex(int index) {
+ invariant(_forceSyncSourceIndex < _rsConfig.getNumMembers());
+ _forceSyncSourceIndex = index;
+}
+
+HostAndPort TopologyCoordinatorImpl::getSyncSourceAddress() const {
+ return _syncSource;
+}
+
+HostAndPort TopologyCoordinatorImpl::chooseNewSyncSource(Date_t now, const OpTime& lastOpApplied) {
+ // If we are primary, then we aren't syncing from anyone (else).
+ if (_iAmPrimary()) {
+ return HostAndPort();
+ }
+
+ // If we are not a member of the current replica set configuration, no sync source is valid.
+ if (_selfIndex == -1) {
+ LOG(2) << "Cannot sync from any members because we are not in the replica set config";
+ return HostAndPort();
+ }
+
+ // if we have a target we've requested to sync from, use it
+ if (_forceSyncSourceIndex != -1) {
+ invariant(_forceSyncSourceIndex < _rsConfig.getNumMembers());
+ _syncSource = _rsConfig.getMemberAt(_forceSyncSourceIndex).getHostAndPort();
+ _forceSyncSourceIndex = -1;
+ std::string msg(str::stream() << "syncing from: " << _syncSource.toString()
+ << " by request");
+ log() << msg << rsLog;
+ setMyHeartbeatMessage(now, msg);
+ return _syncSource;
}
- void PingStats::hit(int millis) {
- _numFailuresSinceLastStart = std::numeric_limits<int>::max();
- ++count;
- value = value == std::numeric_limits<unsigned int>::max() ? millis :
- static_cast<unsigned long>((value * .8) + (millis * .2));
- }
+ // wait for 2N pings (not counting ourselves) before choosing a sync target
+ int needMorePings = (_hbdata.size() - 1) * 2 - _getTotalPings();
- void PingStats::miss() {
- ++_numFailuresSinceLastStart;
+ if (needMorePings > 0) {
+ OCCASIONALLY log() << "waiting for " << needMorePings
+ << " pings from other members before syncing";
+ _syncSource = HostAndPort();
+ return _syncSource;
}
- TopologyCoordinatorImpl::TopologyCoordinatorImpl(Seconds maxSyncSourceLagSecs) :
- _role(Role::follower),
- _term(0),
- _currentPrimaryIndex(-1),
- _forceSyncSourceIndex(-1),
- _maxSyncSourceLagSecs(maxSyncSourceLagSecs),
- _selfIndex(-1),
- _stepDownPending(false),
- _maintenanceModeCalls(0),
- _followerMode(MemberState::RS_STARTUP2)
- {
- invariant(getMemberState() == MemberState::RS_STARTUP);
+ // If we are only allowed to sync from the primary, set that
+ if (!_rsConfig.isChainingAllowed()) {
+ if (_currentPrimaryIndex == -1) {
+ LOG(1) << "Cannot select sync source because chaining is"
+ " not allowed and primary is unknown/down";
+ _syncSource = HostAndPort();
+ return _syncSource;
+ } else if (_memberIsBlacklisted(*_currentPrimaryMember(), now)) {
+ LOG(1) << "Cannot select sync source because chaining is"
+ "not allowed and primary is not currently accepting our updates";
+ _syncSource = HostAndPort();
+ return _syncSource;
+ } else {
+ _syncSource = _rsConfig.getMemberAt(_currentPrimaryIndex).getHostAndPort();
+ std::string msg(str::stream() << "syncing from primary: " << _syncSource.toString());
+ log() << msg << rsLog;
+ setMyHeartbeatMessage(now, msg);
+ return _syncSource;
+ }
}
- TopologyCoordinator::Role TopologyCoordinatorImpl::getRole() const {
- return _role;
- }
+ // find the member with the lowest ping time that is ahead of me
- void TopologyCoordinatorImpl::setForceSyncSourceIndex(int index) {
- invariant(_forceSyncSourceIndex < _rsConfig.getNumMembers());
- _forceSyncSourceIndex = index;
+ // Find primary's oplog time. Reject sync candidates that are more than
+ // maxSyncSourceLagSecs seconds behind.
+ OpTime primaryOpTime;
+ if (_currentPrimaryIndex != -1) {
+ primaryOpTime = _hbdata[_currentPrimaryIndex].getOpTime();
+ } else {
+ // choose a time that will exclude no candidates, since we don't see a primary
+ primaryOpTime = OpTime(Timestamp(_maxSyncSourceLagSecs, 0), 0);
}
- HostAndPort TopologyCoordinatorImpl::getSyncSourceAddress() const {
- return _syncSource;
+ if (primaryOpTime.getSecs() < static_cast<unsigned int>(_maxSyncSourceLagSecs.count())) {
+ // erh - I think this means there was just a new election
+ // and we don't yet know the new primary's optime
+ primaryOpTime = OpTime(Timestamp(_maxSyncSourceLagSecs, 0), 0);
}
- HostAndPort TopologyCoordinatorImpl::chooseNewSyncSource(Date_t now,
- const OpTime& lastOpApplied) {
- // If we are primary, then we aren't syncing from anyone (else).
- if (_iAmPrimary()) {
- return HostAndPort();
- }
+ OpTime oldestSyncOpTime(Timestamp(primaryOpTime.getSecs() - _maxSyncSourceLagSecs.count(), 0),
+ primaryOpTime.getTerm());
- // If we are not a member of the current replica set configuration, no sync source is valid.
- if (_selfIndex == -1) {
- LOG(2) << "Cannot sync from any members because we are not in the replica set config";
- return HostAndPort();
- }
+ int closestIndex = -1;
- // if we have a target we've requested to sync from, use it
- if (_forceSyncSourceIndex != -1) {
- invariant(_forceSyncSourceIndex < _rsConfig.getNumMembers());
- _syncSource = _rsConfig.getMemberAt(_forceSyncSourceIndex).getHostAndPort();
- _forceSyncSourceIndex = -1;
- std::string msg(str::stream() << "syncing from: "
- << _syncSource.toString() << " by request");
- log() << msg << rsLog;
- setMyHeartbeatMessage(now, msg);
- return _syncSource;
- }
-
- // wait for 2N pings (not counting ourselves) before choosing a sync target
- int needMorePings = (_hbdata.size() - 1) * 2 - _getTotalPings();
-
- if (needMorePings > 0) {
- OCCASIONALLY log() << "waiting for " << needMorePings
- << " pings from other members before syncing";
- _syncSource = HostAndPort();
- return _syncSource;
- }
-
- // If we are only allowed to sync from the primary, set that
- if (!_rsConfig.isChainingAllowed()) {
- if (_currentPrimaryIndex == -1) {
- LOG(1) << "Cannot select sync source because chaining is"
- " not allowed and primary is unknown/down";
- _syncSource = HostAndPort();
- return _syncSource;
+ // Make two attempts. The first attempt, we ignore those nodes with
+ // slave delay higher than our own, hidden nodes, and nodes that are excessively lagged.
+ // The second attempt includes such nodes, in case those are the only ones we can reach.
+ // This loop attempts to set 'closestIndex'.
+ for (int attempts = 0; attempts < 2; ++attempts) {
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
+ it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ // Don't consider ourselves.
+ if (itIndex == _selfIndex) {
+ continue;
}
- else if (_memberIsBlacklisted(*_currentPrimaryMember(), now)) {
- LOG(1) << "Cannot select sync source because chaining is"
- "not allowed and primary is not currently accepting our updates";
- _syncSource = HostAndPort();
- return _syncSource;
+ // Candidate must be up to be considered.
+ if (!it->up()) {
+ continue;
}
- else {
- _syncSource = _rsConfig.getMemberAt(_currentPrimaryIndex).getHostAndPort();
- std::string msg(str::stream() << "syncing from primary: "
- << _syncSource.toString());
- log() << msg << rsLog;
- setMyHeartbeatMessage(now, msg);
- return _syncSource;
+ // Candidate must be PRIMARY or SECONDARY state to be considered.
+ if (!it->getState().readable()) {
+ continue;
}
- }
- // find the member with the lowest ping time that is ahead of me
+ const MemberConfig& itMemberConfig(_rsConfig.getMemberAt(itIndex));
- // Find primary's oplog time. Reject sync candidates that are more than
- // maxSyncSourceLagSecs seconds behind.
- OpTime primaryOpTime;
- if (_currentPrimaryIndex != -1) {
- primaryOpTime = _hbdata[_currentPrimaryIndex].getOpTime();
- }
- else {
- // choose a time that will exclude no candidates, since we don't see a primary
- primaryOpTime = OpTime(Timestamp(_maxSyncSourceLagSecs, 0), 0);
- }
-
- if (primaryOpTime.getSecs() <
- static_cast<unsigned int>(_maxSyncSourceLagSecs.count())) {
- // erh - I think this means there was just a new election
- // and we don't yet know the new primary's optime
- primaryOpTime = OpTime(Timestamp(_maxSyncSourceLagSecs, 0), 0);
- }
-
- OpTime oldestSyncOpTime(
- Timestamp(primaryOpTime.getSecs() - _maxSyncSourceLagSecs.count(), 0),
- primaryOpTime.getTerm());
-
- int closestIndex = -1;
-
- // Make two attempts. The first attempt, we ignore those nodes with
- // slave delay higher than our own, hidden nodes, and nodes that are excessively lagged.
- // The second attempt includes such nodes, in case those are the only ones we can reach.
- // This loop attempts to set 'closestIndex'.
- for (int attempts = 0; attempts < 2; ++attempts) {
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- // Don't consider ourselves.
- if (itIndex == _selfIndex) {
- continue;
- }
- // Candidate must be up to be considered.
- if (!it->up()) {
- continue;
- }
- // Candidate must be PRIMARY or SECONDARY state to be considered.
- if (!it->getState().readable()) {
- continue;
- }
-
- const MemberConfig& itMemberConfig(_rsConfig.getMemberAt(itIndex));
-
- // Candidate must build indexes if we build indexes, to be considered.
- if (_selfConfig().shouldBuildIndexes()) {
- if (!itMemberConfig.shouldBuildIndexes()) {
- continue;
- }
- }
-
- // only consider candidates that are ahead of where we are
- if (it->getOpTime() <= lastOpApplied) {
+ // Candidate must build indexes if we build indexes, to be considered.
+ if (_selfConfig().shouldBuildIndexes()) {
+ if (!itMemberConfig.shouldBuildIndexes()) {
continue;
}
+ }
- // omit candidates that are excessively behind, on the first attempt at least.
- if (attempts == 0 &&
- it->getOpTime() < oldestSyncOpTime) {
- continue;
- }
+ // only consider candidates that are ahead of where we are
+ if (it->getOpTime() <= lastOpApplied) {
+ continue;
+ }
- // omit nodes that are more latent than anything we've already considered
- if ((closestIndex != -1) &&
- (_getPing(itMemberConfig.getHostAndPort())
- > _getPing(_rsConfig.getMemberAt(closestIndex).getHostAndPort()))) {
- continue;
- }
+ // omit candidates that are excessively behind, on the first attempt at least.
+ if (attempts == 0 && it->getOpTime() < oldestSyncOpTime) {
+ continue;
+ }
- if (attempts == 0) {
- if (_selfConfig().getSlaveDelay() < itMemberConfig.getSlaveDelay()
- || itMemberConfig.isHidden()) {
- continue; // skip this one in the first attempt
- }
- }
+ // omit nodes that are more latent than anything we've already considered
+ if ((closestIndex != -1) &&
+ (_getPing(itMemberConfig.getHostAndPort()) >
+ _getPing(_rsConfig.getMemberAt(closestIndex).getHostAndPort()))) {
+ continue;
+ }
- if (_memberIsBlacklisted(itMemberConfig, now)) {
- continue;
+ if (attempts == 0) {
+ if (_selfConfig().getSlaveDelay() < itMemberConfig.getSlaveDelay() ||
+ itMemberConfig.isHidden()) {
+ continue; // skip this one in the first attempt
}
-
- // This candidate has passed all tests; set 'closestIndex'
- closestIndex = itIndex;
}
- if (closestIndex != -1) break; // no need for second attempt
- }
- if (closestIndex == -1) {
- // Did not find any members to sync from
- std::string msg("could not find member to sync from");
- // Only log when we had a valid sync source before
- if (!_syncSource.empty()) {
- log() << msg << rsLog;
+ if (_memberIsBlacklisted(itMemberConfig, now)) {
+ continue;
}
- setMyHeartbeatMessage(now, msg);
- _syncSource = HostAndPort();
- return _syncSource;
+ // This candidate has passed all tests; set 'closestIndex'
+ closestIndex = itIndex;
}
- _syncSource = _rsConfig.getMemberAt(closestIndex).getHostAndPort();
- std::string msg(str::stream() << "syncing from: " << _syncSource.toString(), 0);
- log() << msg << rsLog;
- setMyHeartbeatMessage(now, msg);
- return _syncSource;
+ if (closestIndex != -1)
+ break; // no need for second attempt
}
- bool TopologyCoordinatorImpl::_memberIsBlacklisted(const MemberConfig& memberConfig,
- Date_t now) const {
- std::map<HostAndPort,Date_t>::const_iterator blacklisted =
- _syncSourceBlacklist.find(memberConfig.getHostAndPort());
- if (blacklisted != _syncSourceBlacklist.end()) {
- if (blacklisted->second > now) {
- return true;
- }
+ if (closestIndex == -1) {
+ // Did not find any members to sync from
+ std::string msg("could not find member to sync from");
+ // Only log when we had a valid sync source before
+ if (!_syncSource.empty()) {
+ log() << msg << rsLog;
}
- return false;
- }
+ setMyHeartbeatMessage(now, msg);
- void TopologyCoordinatorImpl::blacklistSyncSource(const HostAndPort& host, Date_t until) {
- LOG(2) << "blacklisting " << host << " until " << until.toString();
- _syncSourceBlacklist[host] = until;
+ _syncSource = HostAndPort();
+ return _syncSource;
}
-
- void TopologyCoordinatorImpl::unblacklistSyncSource(const HostAndPort& host, Date_t now) {
- std::map<HostAndPort, Date_t>::iterator hostItr = _syncSourceBlacklist.find(host);
- if (hostItr != _syncSourceBlacklist.end() && now >= hostItr->second) {
- LOG(2) << "unblacklisting " << host;
- _syncSourceBlacklist.erase(hostItr);
+ _syncSource = _rsConfig.getMemberAt(closestIndex).getHostAndPort();
+ std::string msg(str::stream() << "syncing from: " << _syncSource.toString(), 0);
+ log() << msg << rsLog;
+ setMyHeartbeatMessage(now, msg);
+ return _syncSource;
+}
+
+bool TopologyCoordinatorImpl::_memberIsBlacklisted(const MemberConfig& memberConfig,
+ Date_t now) const {
+ std::map<HostAndPort, Date_t>::const_iterator blacklisted =
+ _syncSourceBlacklist.find(memberConfig.getHostAndPort());
+ if (blacklisted != _syncSourceBlacklist.end()) {
+ if (blacklisted->second > now) {
+ return true;
}
}
+ return false;
+}
- void TopologyCoordinatorImpl::clearSyncSourceBlacklist() {
- _syncSourceBlacklist.clear();
- }
-
- void TopologyCoordinatorImpl::prepareSyncFromResponse(
- const ReplicationExecutor::CallbackArgs& data,
- const HostAndPort& target,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result) {
- if (data.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
- return;
- }
-
- response->append("syncFromRequested", target.toString());
-
- if (_selfIndex == -1) {
- *result = Status(ErrorCodes::NotSecondary,
- "Removed and uninitialized nodes do not sync");
- return;
- }
-
- const MemberConfig& selfConfig = _selfConfig();
- if (selfConfig.isArbiter()) {
- *result = Status(ErrorCodes::NotSecondary, "arbiters don't sync");
- return;
- }
- if (_selfIndex == _currentPrimaryIndex) {
- *result = Status(ErrorCodes::NotSecondary, "primaries don't sync");
- return;
- }
+void TopologyCoordinatorImpl::blacklistSyncSource(const HostAndPort& host, Date_t until) {
+ LOG(2) << "blacklisting " << host << " until " << until.toString();
+ _syncSourceBlacklist[host] = until;
+}
- ReplicaSetConfig::MemberIterator targetConfig = _rsConfig.membersEnd();
- int targetIndex = 0;
- for (ReplicaSetConfig::MemberIterator it = _rsConfig.membersBegin();
- it != _rsConfig.membersEnd(); ++it) {
- if (it->getHostAndPort() == target) {
- targetConfig = it;
- break;
- }
- ++targetIndex;
- }
- if (targetConfig == _rsConfig.membersEnd()) {
- *result = Status(ErrorCodes::NodeNotFound,
- str::stream() << "Could not find member \"" << target.toString() <<
- "\" in replica set");
- return;
- }
- if (targetIndex == _selfIndex) {
- *result = Status(ErrorCodes::InvalidOptions, "I cannot sync from myself");
- return;
- }
- if (targetConfig->isArbiter()) {
- *result = Status(ErrorCodes::InvalidOptions,
- str::stream() << "Cannot sync from \"" << target.toString() <<
- "\" because it is an arbiter");
- return;
- }
- if (!targetConfig->shouldBuildIndexes() && selfConfig.shouldBuildIndexes()) {
- *result = Status(ErrorCodes::InvalidOptions,
- str::stream() << "Cannot sync from \"" << target.toString() <<
- "\" because it does not build indexes");
- return;
- }
-
- const MemberHeartbeatData& hbdata = _hbdata[targetIndex];
- if (hbdata.hasAuthIssue()) {
- *result = Status(ErrorCodes::Unauthorized,
- str::stream() << "not authorized to communicate with " <<
- target.toString());
- return;
- }
- if (hbdata.getHealth() == 0) {
- *result = Status(ErrorCodes::HostUnreachable,
- str::stream() << "I cannot reach the requested member: " <<
- target.toString());
- return;
- }
- if (hbdata.getOpTime().getSecs()+10 < lastOpApplied.getSecs()) {
- warning() << "attempting to sync from " << target
- << ", but its latest opTime is " << hbdata.getOpTime().getSecs()
- << " and ours is " << lastOpApplied.getSecs() << " so this may not work";
- response->append("warning",
- str::stream() << "requested member \"" << target.toString() <<
- "\" is more than 10 seconds behind us");
- // not returning bad Status, just warning
- }
-
- HostAndPort prevSyncSource = getSyncSourceAddress();
- if (!prevSyncSource.empty()) {
- response->append("prevSyncTarget", prevSyncSource.toString());
- }
-
- setForceSyncSourceIndex(targetIndex);
- *result = Status::OK();
+void TopologyCoordinatorImpl::unblacklistSyncSource(const HostAndPort& host, Date_t now) {
+ std::map<HostAndPort, Date_t>::iterator hostItr = _syncSourceBlacklist.find(host);
+ if (hostItr != _syncSourceBlacklist.end() && now >= hostItr->second) {
+ LOG(2) << "unblacklisting " << host;
+ _syncSourceBlacklist.erase(hostItr);
}
+}
- void TopologyCoordinatorImpl::prepareFreshResponse(
- const ReplicationCoordinator::ReplSetFreshArgs& args,
- const Date_t now,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result) {
-
- if (_selfIndex == -1) {
- *result = Status(ErrorCodes::ReplicaSetNotFound,
- "Cannot participate in elections because not initialized");
- return;
- }
+void TopologyCoordinatorImpl::clearSyncSourceBlacklist() {
+ _syncSourceBlacklist.clear();
+}
- if (args.setName != _rsConfig.getReplSetName()) {
- *result = Status(ErrorCodes::ReplicaSetNotFound,
- str::stream() << "Wrong repl set name. Expected: " <<
- _rsConfig.getReplSetName() <<
- ", received: " << args.setName);
- return;
- }
+void TopologyCoordinatorImpl::prepareSyncFromResponse(const ReplicationExecutor::CallbackArgs& data,
+ const HostAndPort& target,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (data.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
+ return;
+ }
- if (args.id == static_cast<unsigned>(_selfConfig().getId())) {
- *result = Status(ErrorCodes::BadValue,
- str::stream() << "Received replSetFresh command from member with the "
- "same member ID as ourself: " << args.id);
- return;
- }
+ response->append("syncFromRequested", target.toString());
- bool weAreFresher = false;
- if( _rsConfig.getConfigVersion() > args.cfgver ) {
- log() << "replSet member " << args.who << " is not yet aware its cfg version "
- << args.cfgver << " is stale";
- response->append("info", "config version stale");
- weAreFresher = true;
- }
- // check not only our own optime, but any other member we can reach
- else if (OpTime(args.opTime, _term) < _latestKnownOpTime(lastOpApplied)) {
- weAreFresher = true;
- }
- response->appendDate("opTime",
- Date_t::fromMillisSinceEpoch(lastOpApplied.getTimestamp().asLL()));
- response->append("fresher", weAreFresher);
-
- std::string errmsg;
- bool doVeto = _shouldVetoMember(args, now, lastOpApplied, &errmsg);
- response->append("veto", doVeto);
- if (doVeto) {
- response->append("errmsg", errmsg);
- }
- *result = Status::OK();
+ if (_selfIndex == -1) {
+ *result = Status(ErrorCodes::NotSecondary, "Removed and uninitialized nodes do not sync");
+ return;
}
- bool TopologyCoordinatorImpl::_shouldVetoMember(
- const ReplicationCoordinator::ReplSetFreshArgs& args,
- const Date_t& now,
- const OpTime& lastOpApplied,
- std::string* errmsg) const {
+ const MemberConfig& selfConfig = _selfConfig();
+ if (selfConfig.isArbiter()) {
+ *result = Status(ErrorCodes::NotSecondary, "arbiters don't sync");
+ return;
+ }
+ if (_selfIndex == _currentPrimaryIndex) {
+ *result = Status(ErrorCodes::NotSecondary, "primaries don't sync");
+ return;
+ }
- if (_rsConfig.getConfigVersion() < args.cfgver) {
- // We are stale; do not veto.
- return false;
+ ReplicaSetConfig::MemberIterator targetConfig = _rsConfig.membersEnd();
+ int targetIndex = 0;
+ for (ReplicaSetConfig::MemberIterator it = _rsConfig.membersBegin();
+ it != _rsConfig.membersEnd();
+ ++it) {
+ if (it->getHostAndPort() == target) {
+ targetConfig = it;
+ break;
}
+ ++targetIndex;
+ }
+ if (targetConfig == _rsConfig.membersEnd()) {
+ *result = Status(ErrorCodes::NodeNotFound,
+ str::stream() << "Could not find member \"" << target.toString()
+ << "\" in replica set");
+ return;
+ }
+ if (targetIndex == _selfIndex) {
+ *result = Status(ErrorCodes::InvalidOptions, "I cannot sync from myself");
+ return;
+ }
+ if (targetConfig->isArbiter()) {
+ *result = Status(ErrorCodes::InvalidOptions,
+ str::stream() << "Cannot sync from \"" << target.toString()
+ << "\" because it is an arbiter");
+ return;
+ }
+ if (!targetConfig->shouldBuildIndexes() && selfConfig.shouldBuildIndexes()) {
+ *result = Status(ErrorCodes::InvalidOptions,
+ str::stream() << "Cannot sync from \"" << target.toString()
+ << "\" because it does not build indexes");
+ return;
+ }
+
+ const MemberHeartbeatData& hbdata = _hbdata[targetIndex];
+ if (hbdata.hasAuthIssue()) {
+ *result =
+ Status(ErrorCodes::Unauthorized,
+ str::stream() << "not authorized to communicate with " << target.toString());
+ return;
+ }
+ if (hbdata.getHealth() == 0) {
+ *result =
+ Status(ErrorCodes::HostUnreachable,
+ str::stream() << "I cannot reach the requested member: " << target.toString());
+ return;
+ }
+ if (hbdata.getOpTime().getSecs() + 10 < lastOpApplied.getSecs()) {
+ warning() << "attempting to sync from " << target << ", but its latest opTime is "
+ << hbdata.getOpTime().getSecs() << " and ours is " << lastOpApplied.getSecs()
+ << " so this may not work";
+ response->append("warning",
+ str::stream() << "requested member \"" << target.toString()
+ << "\" is more than 10 seconds behind us");
+ // not returning bad Status, just warning
+ }
+
+ HostAndPort prevSyncSource = getSyncSourceAddress();
+ if (!prevSyncSource.empty()) {
+ response->append("prevSyncTarget", prevSyncSource.toString());
+ }
+
+ setForceSyncSourceIndex(targetIndex);
+ *result = Status::OK();
+}
+
+void TopologyCoordinatorImpl::prepareFreshResponse(
+ const ReplicationCoordinator::ReplSetFreshArgs& args,
+ const Date_t now,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (_selfIndex == -1) {
+ *result = Status(ErrorCodes::ReplicaSetNotFound,
+ "Cannot participate in elections because not initialized");
+ return;
+ }
+
+ if (args.setName != _rsConfig.getReplSetName()) {
+ *result =
+ Status(ErrorCodes::ReplicaSetNotFound,
+ str::stream() << "Wrong repl set name. Expected: " << _rsConfig.getReplSetName()
+ << ", received: " << args.setName);
+ return;
+ }
+
+ if (args.id == static_cast<unsigned>(_selfConfig().getId())) {
+ *result = Status(ErrorCodes::BadValue,
+ str::stream() << "Received replSetFresh command from member with the "
+ "same member ID as ourself: " << args.id);
+ return;
+ }
+
+ bool weAreFresher = false;
+ if (_rsConfig.getConfigVersion() > args.cfgver) {
+ log() << "replSet member " << args.who << " is not yet aware its cfg version "
+ << args.cfgver << " is stale";
+ response->append("info", "config version stale");
+ weAreFresher = true;
+ }
+ // check not only our own optime, but any other member we can reach
+ else if (OpTime(args.opTime, _term) < _latestKnownOpTime(lastOpApplied)) {
+ weAreFresher = true;
+ }
+ response->appendDate("opTime",
+ Date_t::fromMillisSinceEpoch(lastOpApplied.getTimestamp().asLL()));
+ response->append("fresher", weAreFresher);
+
+ std::string errmsg;
+ bool doVeto = _shouldVetoMember(args, now, lastOpApplied, &errmsg);
+ response->append("veto", doVeto);
+ if (doVeto) {
+ response->append("errmsg", errmsg);
+ }
+ *result = Status::OK();
+}
+
+bool TopologyCoordinatorImpl::_shouldVetoMember(
+ const ReplicationCoordinator::ReplSetFreshArgs& args,
+ const Date_t& now,
+ const OpTime& lastOpApplied,
+ std::string* errmsg) const {
+ if (_rsConfig.getConfigVersion() < args.cfgver) {
+ // We are stale; do not veto.
+ return false;
+ }
- const unsigned int memberID = args.id;
- const int hopefulIndex = _getMemberIndex(memberID);
- invariant(hopefulIndex != _selfIndex);
- const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied);
+ const unsigned int memberID = args.id;
+ const int hopefulIndex = _getMemberIndex(memberID);
+ invariant(hopefulIndex != _selfIndex);
+ const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied);
- if (hopefulIndex == -1) {
- *errmsg = str::stream() << "replSet couldn't find member with id " << memberID;
- return true;
- }
+ if (hopefulIndex == -1) {
+ *errmsg = str::stream() << "replSet couldn't find member with id " << memberID;
+ return true;
+ }
- if (_iAmPrimary() && lastOpApplied >= _hbdata[hopefulIndex].getOpTime()) {
- // hbinfo is not updated for ourself, so if we are primary we have to check the
- // primary's last optime separately
- *errmsg = str::stream() << "I am already primary, " <<
- _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString() <<
- " can try again once I've stepped down";
- return true;
- }
+ if (_iAmPrimary() && lastOpApplied >= _hbdata[hopefulIndex].getOpTime()) {
+ // hbinfo is not updated for ourself, so if we are primary we have to check the
+ // primary's last optime separately
+ *errmsg = str::stream() << "I am already primary, "
+ << _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString()
+ << " can try again once I've stepped down";
+ return true;
+ }
- if (_currentPrimaryIndex != -1 &&
- (hopefulIndex != _currentPrimaryIndex) &&
- (_hbdata[_currentPrimaryIndex].getOpTime() >=
- _hbdata[hopefulIndex].getOpTime())) {
- // other members might be aware of more up-to-date nodes
- *errmsg = str::stream() <<
- _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString() <<
- " is trying to elect itself but " <<
- _rsConfig.getMemberAt(_currentPrimaryIndex).getHostAndPort().toString() <<
- " is already primary and more up-to-date";
- return true;
- }
+ if (_currentPrimaryIndex != -1 && (hopefulIndex != _currentPrimaryIndex) &&
+ (_hbdata[_currentPrimaryIndex].getOpTime() >= _hbdata[hopefulIndex].getOpTime())) {
+ // other members might be aware of more up-to-date nodes
+ *errmsg =
+ str::stream() << _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString()
+ << " is trying to elect itself but "
+ << _rsConfig.getMemberAt(_currentPrimaryIndex).getHostAndPort().toString()
+ << " is already primary and more up-to-date";
+ return true;
+ }
- if ((highestPriorityIndex != -1)) {
- const MemberConfig& hopefulMember = _rsConfig.getMemberAt(hopefulIndex);
- const MemberConfig& priorityMember = _rsConfig.getMemberAt(highestPriorityIndex);
-
- if (priorityMember.getPriority() > hopefulMember.getPriority()) {
- *errmsg = str::stream()
- << hopefulMember.getHostAndPort().toString()
- << " has lower priority of " << hopefulMember.getPriority() << " than "
- << priorityMember.getHostAndPort().toString()
- << " which has a priority of " << priorityMember.getPriority();
- return true;
- }
- }
+ if ((highestPriorityIndex != -1)) {
+ const MemberConfig& hopefulMember = _rsConfig.getMemberAt(hopefulIndex);
+ const MemberConfig& priorityMember = _rsConfig.getMemberAt(highestPriorityIndex);
- UnelectableReasonMask reason = _getUnelectableReason(hopefulIndex, lastOpApplied);
- reason &= ~RefusesToStand;
- if (reason) {
- *errmsg = str::stream()
- << "I don't think "
- << _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString()
- << " is electable because the " << _getUnelectableReasonString(reason);
+ if (priorityMember.getPriority() > hopefulMember.getPriority()) {
+ *errmsg = str::stream() << hopefulMember.getHostAndPort().toString()
+ << " has lower priority of " << hopefulMember.getPriority()
+ << " than " << priorityMember.getHostAndPort().toString()
+ << " which has a priority of " << priorityMember.getPriority();
return true;
}
-
- return false;
}
- // produce a reply to a received electCmd
- void TopologyCoordinatorImpl::prepareElectResponse(
- const ReplicationCoordinator::ReplSetElectArgs& args,
- const Date_t now,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result) {
-
- if (_selfIndex == -1) {
- *result = Status(ErrorCodes::ReplicaSetNotFound,
- "Cannot participate in election because not initialized");
- return;
- }
-
- const long long myver = _rsConfig.getConfigVersion();
- const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied);
-
- const MemberConfig* primary = _currentPrimaryMember();
- const MemberConfig* hopeful = _rsConfig.findMemberByID(args.whoid);
- const MemberConfig* highestPriority = highestPriorityIndex == -1 ? NULL :
- &_rsConfig.getMemberAt(highestPriorityIndex);
-
- int vote = 0;
- if (args.set != _rsConfig.getReplSetName()) {
- log() << "replSet error received an elect request for '" << args.set
- << "' but our set name is '" <<
- _rsConfig.getReplSetName() << "'";
- }
- else if ( myver < args.cfgver ) {
- // we are stale. don't vote
- log() << "replSetElect not voting because our config version is stale. Our version: " <<
- myver << ", their version: " << args.cfgver;
- }
- else if ( myver > args.cfgver ) {
- // they are stale!
- log() << "replSetElect command received stale config version # during election. "
- "Our version: " << myver << ", their version: " << args.cfgver;
- vote = -10000;
- }
- else if (!hopeful) {
- log() << "replSetElect couldn't find member with id " << args.whoid;
- vote = -10000;
- }
- else if (_iAmPrimary()) {
- log() << "I am already primary, " << hopeful->getHostAndPort().toString()
- << " can try again once I've stepped down";
- vote = -10000;
- }
- else if (primary) {
- log() << hopeful->getHostAndPort().toString() << " is trying to elect itself but "
- << primary->getHostAndPort().toString() << " is already primary";
- vote = -10000;
- }
- else if (highestPriority && highestPriority->getPriority() > hopeful->getPriority()) {
- // TODO(spencer): What if the lower-priority member is more up-to-date?
- log() << hopeful->getHostAndPort().toString() << " has lower priority than "
- << highestPriority->getHostAndPort().toString();
- vote = -10000;
- }
- else if (_voteLease.when + VoteLease::leaseTime >= now &&
- _voteLease.whoId != args.whoid) {
- log() << "replSet voting no for "
- << hopeful->getHostAndPort().toString()
- << "; voted for " << _voteLease.whoHostAndPort.toString() << ' '
- << durationCount<Seconds>(now - _voteLease.when) << " secs ago";
- }
- else {
- _voteLease.when = now;
- _voteLease.whoId = args.whoid;
- _voteLease.whoHostAndPort = hopeful->getHostAndPort();
- vote = _selfConfig().getNumVotes();
- invariant(hopeful->getId() == args.whoid);
- if (vote > 0) {
- log() << "replSetElect voting yea for " << hopeful->getHostAndPort().toString()
- << " (" << args.whoid << ')';
- }
- }
-
- response->append("vote", vote);
- response->append("round", args.round);
- *result = Status::OK();
+ UnelectableReasonMask reason = _getUnelectableReason(hopefulIndex, lastOpApplied);
+ reason &= ~RefusesToStand;
+ if (reason) {
+ *errmsg = str::stream() << "I don't think "
+ << _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString()
+ << " is electable because the "
+ << _getUnelectableReasonString(reason);
+ return true;
}
- // produce a reply to a heartbeat
- Status TopologyCoordinatorImpl::prepareHeartbeatResponse(
- Date_t now,
- const ReplSetHeartbeatArgs& args,
- const std::string& ourSetName,
- const OpTime& lastOpApplied,
- ReplSetHeartbeatResponse* response) {
-
- if (args.getProtocolVersion() != 1) {
+ return false;
+}
+
+// produce a reply to a received electCmd
+void TopologyCoordinatorImpl::prepareElectResponse(
+ const ReplicationCoordinator::ReplSetElectArgs& args,
+ const Date_t now,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (_selfIndex == -1) {
+ *result = Status(ErrorCodes::ReplicaSetNotFound,
+ "Cannot participate in election because not initialized");
+ return;
+ }
+
+ const long long myver = _rsConfig.getConfigVersion();
+ const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied);
+
+ const MemberConfig* primary = _currentPrimaryMember();
+ const MemberConfig* hopeful = _rsConfig.findMemberByID(args.whoid);
+ const MemberConfig* highestPriority =
+ highestPriorityIndex == -1 ? NULL : &_rsConfig.getMemberAt(highestPriorityIndex);
+
+ int vote = 0;
+ if (args.set != _rsConfig.getReplSetName()) {
+ log() << "replSet error received an elect request for '" << args.set
+ << "' but our set name is '" << _rsConfig.getReplSetName() << "'";
+ } else if (myver < args.cfgver) {
+ // we are stale. don't vote
+ log() << "replSetElect not voting because our config version is stale. Our version: "
+ << myver << ", their version: " << args.cfgver;
+ } else if (myver > args.cfgver) {
+ // they are stale!
+ log() << "replSetElect command received stale config version # during election. "
+ "Our version: " << myver << ", their version: " << args.cfgver;
+ vote = -10000;
+ } else if (!hopeful) {
+ log() << "replSetElect couldn't find member with id " << args.whoid;
+ vote = -10000;
+ } else if (_iAmPrimary()) {
+ log() << "I am already primary, " << hopeful->getHostAndPort().toString()
+ << " can try again once I've stepped down";
+ vote = -10000;
+ } else if (primary) {
+ log() << hopeful->getHostAndPort().toString() << " is trying to elect itself but "
+ << primary->getHostAndPort().toString() << " is already primary";
+ vote = -10000;
+ } else if (highestPriority && highestPriority->getPriority() > hopeful->getPriority()) {
+ // TODO(spencer): What if the lower-priority member is more up-to-date?
+ log() << hopeful->getHostAndPort().toString() << " has lower priority than "
+ << highestPriority->getHostAndPort().toString();
+ vote = -10000;
+ } else if (_voteLease.when + VoteLease::leaseTime >= now && _voteLease.whoId != args.whoid) {
+ log() << "replSet voting no for " << hopeful->getHostAndPort().toString() << "; voted for "
+ << _voteLease.whoHostAndPort.toString() << ' '
+ << durationCount<Seconds>(now - _voteLease.when) << " secs ago";
+ } else {
+ _voteLease.when = now;
+ _voteLease.whoId = args.whoid;
+ _voteLease.whoHostAndPort = hopeful->getHostAndPort();
+ vote = _selfConfig().getNumVotes();
+ invariant(hopeful->getId() == args.whoid);
+ if (vote > 0) {
+ log() << "replSetElect voting yea for " << hopeful->getHostAndPort().toString() << " ("
+ << args.whoid << ')';
+ }
+ }
+
+ response->append("vote", vote);
+ response->append("round", args.round);
+ *result = Status::OK();
+}
+
+// produce a reply to a heartbeat
+Status TopologyCoordinatorImpl::prepareHeartbeatResponse(Date_t now,
+ const ReplSetHeartbeatArgs& args,
+ const std::string& ourSetName,
+ const OpTime& lastOpApplied,
+ ReplSetHeartbeatResponse* response) {
+ if (args.getProtocolVersion() != 1) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "replset: incompatible replset protocol version: "
+ << args.getProtocolVersion());
+ }
+
+ // Verify that replica set names match
+ const std::string rshb = args.getSetName();
+ if (ourSetName != rshb) {
+ log() << "replSet set names do not match, ours: " << ourSetName
+ << "; remote node's: " << rshb;
+ response->noteMismatched();
+ return Status(ErrorCodes::InconsistentReplicaSetNames,
+ str::stream() << "Our set name of " << ourSetName << " does not match name "
+ << rshb << " reported by remote node");
+ }
+
+ const MemberState myState = getMemberState();
+ if (_selfIndex == -1) {
+ if (myState.removed()) {
+ return Status(ErrorCodes::InvalidReplicaSetConfig,
+ "Our replica set configuration is invalid or does not include us");
+ }
+ } else {
+ invariant(_rsConfig.getReplSetName() == args.getSetName());
+ if (args.getSenderId() == _selfConfig().getId()) {
return Status(ErrorCodes::BadValue,
- str::stream() << "replset: incompatible replset protocol version: "
- << args.getProtocolVersion());
- }
-
- // Verify that replica set names match
- const std::string rshb = args.getSetName();
- if (ourSetName != rshb) {
- log() << "replSet set names do not match, ours: " << ourSetName <<
- "; remote node's: " << rshb;
- response->noteMismatched();
- return Status(ErrorCodes::InconsistentReplicaSetNames, str::stream() <<
- "Our set name of " << ourSetName << " does not match name " << rshb <<
- " reported by remote node");
- }
-
- const MemberState myState = getMemberState();
- if (_selfIndex == -1) {
- if (myState.removed()) {
- return Status(ErrorCodes::InvalidReplicaSetConfig,
- "Our replica set configuration is invalid or does not include us");
- }
+ str::stream() << "Received heartbeat from member with the same "
+ "member ID as ourself: " << args.getSenderId());
}
- else {
- invariant(_rsConfig.getReplSetName() == args.getSetName());
- if (args.getSenderId() == _selfConfig().getId()) {
- return Status(ErrorCodes::BadValue,
- str::stream() << "Received heartbeat from member with the same "
- "member ID as ourself: " << args.getSenderId());
- }
- }
-
- // This is a replica set
- response->noteReplSet();
-
- response->setSetName(ourSetName);
- response->setState(myState.s);
- if (myState.primary()) {
- response->setElectionTime(_electionTime);
- }
-
- // Are we electable
- response->setElectable(!_getMyUnelectableReason(now, lastOpApplied));
+ }
- // Heartbeat status message
- response->setHbMsg(_getHbmsg(now));
- response->setTime(duration_cast<Seconds>(now - Date_t{}));
- response->setOpTime(lastOpApplied);
+ // This is a replica set
+ response->noteReplSet();
- if (!_syncSource.empty()) {
- response->setSyncingTo(_syncSource);
- }
-
- if (!_rsConfig.isInitialized()) {
- response->setConfigVersion(-2);
- return Status::OK();
- }
+ response->setSetName(ourSetName);
+ response->setState(myState.s);
+ if (myState.primary()) {
+ response->setElectionTime(_electionTime);
+ }
- const long long v = _rsConfig.getConfigVersion();
- response->setConfigVersion(v);
- // Deliver new config if caller's version is older than ours
- if (v > args.getConfigVersion()) {
- response->setConfig(_rsConfig);
- }
+ // Are we electable
+ response->setElectable(!_getMyUnelectableReason(now, lastOpApplied));
- // Resolve the caller's id in our Member list
- int from = -1;
- if (v == args.getConfigVersion() && args.getSenderId() != -1) {
- from = _getMemberIndex(args.getSenderId());
- }
- if (from == -1) {
- // Can't find the member, so we leave out the stateDisagreement field
- return Status::OK();
- }
- invariant(from != _selfIndex);
+ // Heartbeat status message
+ response->setHbMsg(_getHbmsg(now));
+ response->setTime(duration_cast<Seconds>(now - Date_t{}));
+ response->setOpTime(lastOpApplied);
- // if we thought that this node is down, let it know
- if (!_hbdata[from].up()) {
- response->noteStateDisagreement();
- }
+ if (!_syncSource.empty()) {
+ response->setSyncingTo(_syncSource);
+ }
- // note that we got a heartbeat from this node
- _hbdata[from].setLastHeartbeatRecv(now);
+ if (!_rsConfig.isInitialized()) {
+ response->setConfigVersion(-2);
return Status::OK();
}
- Status TopologyCoordinatorImpl::prepareHeartbeatResponseV1(
- Date_t now,
- const ReplSetHeartbeatArgsV1& args,
- const std::string& ourSetName,
- const OpTime& lastOpApplied,
- ReplSetHeartbeatResponse* response) {
-
- // Verify that replica set names match
- const std::string rshb = args.getSetName();
- if (ourSetName != rshb) {
- log() << "replSet set names do not match, ours: " << ourSetName <<
- "; remote node's: " << rshb;
- return Status(ErrorCodes::InconsistentReplicaSetNames, str::stream() <<
- "Our set name of " << ourSetName << " does not match name " << rshb <<
- " reported by remote node");
- }
+ const long long v = _rsConfig.getConfigVersion();
+ response->setConfigVersion(v);
+ // Deliver new config if caller's version is older than ours
+ if (v > args.getConfigVersion()) {
+ response->setConfig(_rsConfig);
+ }
- const MemberState myState = getMemberState();
- if (_selfIndex == -1) {
- if (myState.removed()) {
- return Status(ErrorCodes::InvalidReplicaSetConfig,
- "Our replica set configuration is invalid or does not include us");
- }
- }
- else {
- if (args.getSenderId() == _selfConfig().getId()) {
- return Status(ErrorCodes::BadValue,
- str::stream() << "Received heartbeat from member with the same "
- "member ID as ourself: " << args.getSenderId());
- }
+ // Resolve the caller's id in our Member list
+ int from = -1;
+ if (v == args.getConfigVersion() && args.getSenderId() != -1) {
+ from = _getMemberIndex(args.getSenderId());
+ }
+ if (from == -1) {
+ // Can't find the member, so we leave out the stateDisagreement field
+ return Status::OK();
+ }
+ invariant(from != _selfIndex);
+
+ // if we thought that this node is down, let it know
+ if (!_hbdata[from].up()) {
+ response->noteStateDisagreement();
+ }
+
+ // note that we got a heartbeat from this node
+ _hbdata[from].setLastHeartbeatRecv(now);
+ return Status::OK();
+}
+
+Status TopologyCoordinatorImpl::prepareHeartbeatResponseV1(Date_t now,
+ const ReplSetHeartbeatArgsV1& args,
+ const std::string& ourSetName,
+ const OpTime& lastOpApplied,
+ ReplSetHeartbeatResponse* response) {
+ // Verify that replica set names match
+ const std::string rshb = args.getSetName();
+ if (ourSetName != rshb) {
+ log() << "replSet set names do not match, ours: " << ourSetName
+ << "; remote node's: " << rshb;
+ return Status(ErrorCodes::InconsistentReplicaSetNames,
+ str::stream() << "Our set name of " << ourSetName << " does not match name "
+ << rshb << " reported by remote node");
+ }
+
+ const MemberState myState = getMemberState();
+ if (_selfIndex == -1) {
+ if (myState.removed()) {
+ return Status(ErrorCodes::InvalidReplicaSetConfig,
+ "Our replica set configuration is invalid or does not include us");
+ }
+ } else {
+ if (args.getSenderId() == _selfConfig().getId()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "Received heartbeat from member with the same "
+ "member ID as ourself: " << args.getSenderId());
}
+ }
- response->setSetName(ourSetName);
-
- response->setState(myState.s);
-
- response->setOpTime(lastOpApplied);
+ response->setSetName(ourSetName);
- if (_currentPrimaryIndex != -1) {
- response->setPrimaryId(_rsConfig.getMemberAt(_currentPrimaryIndex).getId());
- }
+ response->setState(myState.s);
- response->setTerm(_term);
+ response->setOpTime(lastOpApplied);
- if (!_syncSource.empty()) {
- response->setSyncingTo(_syncSource);
- }
-
- if (!_rsConfig.isInitialized()) {
- response->setConfigVersion(-2);
- return Status::OK();
- }
+ if (_currentPrimaryIndex != -1) {
+ response->setPrimaryId(_rsConfig.getMemberAt(_currentPrimaryIndex).getId());
+ }
- const long long v = _rsConfig.getConfigVersion();
- response->setConfigVersion(v);
- // Deliver new config if caller's version is older than ours
- if (v > args.getConfigVersion()) {
- response->setConfig(_rsConfig);
- }
+ response->setTerm(_term);
- // Resolve the caller's id in our Member list
- int from = -1;
- if (v == args.getConfigVersion() && args.getSenderId() != -1) {
- from = _getMemberIndex(args.getSenderId());
- }
- if (from == -1) {
- return Status::OK();
- }
- invariant(from != _selfIndex);
+ if (!_syncSource.empty()) {
+ response->setSyncingTo(_syncSource);
+ }
- // note that we got a heartbeat from this node
- _hbdata[from].setLastHeartbeatRecv(now);
+ if (!_rsConfig.isInitialized()) {
+ response->setConfigVersion(-2);
return Status::OK();
}
- int TopologyCoordinatorImpl::_getMemberIndex(int id) const {
- int index = 0;
- for (ReplicaSetConfig::MemberIterator it = _rsConfig.membersBegin();
- it != _rsConfig.membersEnd();
- ++it, ++index) {
- if (it->getId() == id) {
- return index;
- }
- }
- return -1;
+ const long long v = _rsConfig.getConfigVersion();
+ response->setConfigVersion(v);
+ // Deliver new config if caller's version is older than ours
+ if (v > args.getConfigVersion()) {
+ response->setConfig(_rsConfig);
}
- std::pair<ReplSetHeartbeatArgs, Milliseconds> TopologyCoordinatorImpl::prepareHeartbeatRequest(
- Date_t now,
- const std::string& ourSetName,
- const HostAndPort& target) {
-
- PingStats& hbStats = _pings[target];
- Milliseconds alreadyElapsed = now - hbStats.getLastHeartbeatStartDate();
- if (!_rsConfig.isInitialized() ||
- (hbStats.getNumFailuresSinceLastStart() > kMaxHeartbeatRetries) ||
- (alreadyElapsed >= _rsConfig.getHeartbeatTimeoutPeriodMillis())) {
-
- // This is either the first request ever for "target", or the heartbeat timeout has
- // passed, so we're starting a "new" heartbeat.
- hbStats.start(now);
- alreadyElapsed = Milliseconds(0);
- }
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setProtocolVersion(1);
- hbArgs.setCheckEmpty(false);
- if (_rsConfig.isInitialized()) {
- hbArgs.setSetName(_rsConfig.getReplSetName());
- hbArgs.setConfigVersion(_rsConfig.getConfigVersion());
- if (_selfIndex >= 0) {
- const MemberConfig& me = _selfConfig();
- hbArgs.setSenderHost(me.getHostAndPort());
- hbArgs.setSenderId(me.getId());
- }
- }
- else {
- hbArgs.setSetName(ourSetName);
- hbArgs.setConfigVersion(-2);
- }
-
- const Milliseconds timeoutPeriod(
- _rsConfig.isInitialized() ?
- _rsConfig.getHeartbeatTimeoutPeriodMillis() :
- ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod);
- const Milliseconds timeout = timeoutPeriod - alreadyElapsed;
- return std::make_pair(hbArgs, timeout);
- }
-
- std::pair<ReplSetHeartbeatArgsV1, Milliseconds>
- TopologyCoordinatorImpl::prepareHeartbeatRequestV1(
- Date_t now,
- const std::string& ourSetName,
- const HostAndPort& target) {
-
- PingStats& hbStats = _pings[target];
- Milliseconds alreadyElapsed(now.asInt64() - hbStats.getLastHeartbeatStartDate().asInt64());
- if (!_rsConfig.isInitialized() ||
- (hbStats.getNumFailuresSinceLastStart() > kMaxHeartbeatRetries) ||
- (alreadyElapsed >= _rsConfig.getHeartbeatTimeoutPeriodMillis())) {
-
- // This is either the first request ever for "target", or the heartbeat timeout has
- // passed, so we're starting a "new" heartbeat.
- hbStats.start(now);
- alreadyElapsed = Milliseconds(0);
- }
- ReplSetHeartbeatArgsV1 hbArgs;
+ // Resolve the caller's id in our Member list
+ int from = -1;
+ if (v == args.getConfigVersion() && args.getSenderId() != -1) {
+ from = _getMemberIndex(args.getSenderId());
+ }
+ if (from == -1) {
+ return Status::OK();
+ }
+ invariant(from != _selfIndex);
+
+ // note that we got a heartbeat from this node
+ _hbdata[from].setLastHeartbeatRecv(now);
+ return Status::OK();
+}
+
+int TopologyCoordinatorImpl::_getMemberIndex(int id) const {
+ int index = 0;
+ for (ReplicaSetConfig::MemberIterator it = _rsConfig.membersBegin();
+ it != _rsConfig.membersEnd();
+ ++it, ++index) {
+ if (it->getId() == id) {
+ return index;
+ }
+ }
+ return -1;
+}
+
+std::pair<ReplSetHeartbeatArgs, Milliseconds> TopologyCoordinatorImpl::prepareHeartbeatRequest(
+ Date_t now, const std::string& ourSetName, const HostAndPort& target) {
+ PingStats& hbStats = _pings[target];
+ Milliseconds alreadyElapsed = now - hbStats.getLastHeartbeatStartDate();
+ if (!_rsConfig.isInitialized() ||
+ (hbStats.getNumFailuresSinceLastStart() > kMaxHeartbeatRetries) ||
+ (alreadyElapsed >= _rsConfig.getHeartbeatTimeoutPeriodMillis())) {
+ // This is either the first request ever for "target", or the heartbeat timeout has
+ // passed, so we're starting a "new" heartbeat.
+ hbStats.start(now);
+ alreadyElapsed = Milliseconds(0);
+ }
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setCheckEmpty(false);
+ if (_rsConfig.isInitialized()) {
hbArgs.setSetName(_rsConfig.getReplSetName());
hbArgs.setConfigVersion(_rsConfig.getConfigVersion());
if (_selfIndex >= 0) {
const MemberConfig& me = _selfConfig();
- hbArgs.setSenderId(me.getId());
hbArgs.setSenderHost(me.getHostAndPort());
+ hbArgs.setSenderId(me.getId());
}
- hbArgs.setTerm(_term);
-
- const Milliseconds timeoutPeriod(
- _rsConfig.isInitialized() ?
- _rsConfig.getHeartbeatTimeoutPeriodMillis() :
- Milliseconds(
- ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod.count()));
- const Milliseconds timeout(timeoutPeriod.count() - alreadyElapsed.count());
- return std::make_pair(hbArgs, timeout);
- }
-
- HeartbeatResponseAction TopologyCoordinatorImpl::processHeartbeatResponse(
- Date_t now,
- Milliseconds networkRoundTripTime,
- const HostAndPort& target,
- const StatusWith<ReplSetHeartbeatResponse>& hbResponse,
- const OpTime& myLastOpApplied) {
-
- const MemberState originalState = getMemberState();
- PingStats& hbStats = _pings[target];
- invariant(hbStats.getLastHeartbeatStartDate() != Date_t());
- if (!hbResponse.isOK()) {
- hbStats.miss();
- }
- else {
- hbStats.hit(networkRoundTripTime.count());
- // Log diagnostics.
- if (hbResponse.getValue().isStateDisagreement()) {
- LOG(1) << target <<
- " thinks that we are down because they cannot send us heartbeats.";
- }
- }
-
- const bool isUnauthorized =
- (hbResponse.getStatus().code() == ErrorCodes::Unauthorized) ||
- (hbResponse.getStatus().code() == ErrorCodes::AuthenticationFailed);
-
- const Milliseconds alreadyElapsed = now - hbStats.getLastHeartbeatStartDate();
- Date_t nextHeartbeatStartDate;
- // determine next start time
- if (_rsConfig.isInitialized() &&
- (hbStats.getNumFailuresSinceLastStart() <= kMaxHeartbeatRetries) &&
- (alreadyElapsed < _rsConfig.getHeartbeatTimeoutPeriodMillis())) {
-
- if (isUnauthorized) {
- nextHeartbeatStartDate = now + kHeartbeatInterval;
- } else {
- nextHeartbeatStartDate = now;
- }
- }
- else {
+ } else {
+ hbArgs.setSetName(ourSetName);
+ hbArgs.setConfigVersion(-2);
+ }
+
+ const Milliseconds timeoutPeriod(_rsConfig.isInitialized()
+ ? _rsConfig.getHeartbeatTimeoutPeriodMillis()
+ : ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod);
+ const Milliseconds timeout = timeoutPeriod - alreadyElapsed;
+ return std::make_pair(hbArgs, timeout);
+}
+
+std::pair<ReplSetHeartbeatArgsV1, Milliseconds> TopologyCoordinatorImpl::prepareHeartbeatRequestV1(
+ Date_t now, const std::string& ourSetName, const HostAndPort& target) {
+ PingStats& hbStats = _pings[target];
+ Milliseconds alreadyElapsed(now.asInt64() - hbStats.getLastHeartbeatStartDate().asInt64());
+ if (!_rsConfig.isInitialized() ||
+ (hbStats.getNumFailuresSinceLastStart() > kMaxHeartbeatRetries) ||
+ (alreadyElapsed >= _rsConfig.getHeartbeatTimeoutPeriodMillis())) {
+ // This is either the first request ever for "target", or the heartbeat timeout has
+ // passed, so we're starting a "new" heartbeat.
+ hbStats.start(now);
+ alreadyElapsed = Milliseconds(0);
+ }
+ ReplSetHeartbeatArgsV1 hbArgs;
+ hbArgs.setSetName(_rsConfig.getReplSetName());
+ hbArgs.setConfigVersion(_rsConfig.getConfigVersion());
+ if (_selfIndex >= 0) {
+ const MemberConfig& me = _selfConfig();
+ hbArgs.setSenderId(me.getId());
+ hbArgs.setSenderHost(me.getHostAndPort());
+ }
+ hbArgs.setTerm(_term);
+
+ const Milliseconds timeoutPeriod(
+ _rsConfig.isInitialized()
+ ? _rsConfig.getHeartbeatTimeoutPeriodMillis()
+ : Milliseconds(ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod.count()));
+ const Milliseconds timeout(timeoutPeriod.count() - alreadyElapsed.count());
+ return std::make_pair(hbArgs, timeout);
+}
+
+HeartbeatResponseAction TopologyCoordinatorImpl::processHeartbeatResponse(
+ Date_t now,
+ Milliseconds networkRoundTripTime,
+ const HostAndPort& target,
+ const StatusWith<ReplSetHeartbeatResponse>& hbResponse,
+ const OpTime& myLastOpApplied) {
+ const MemberState originalState = getMemberState();
+ PingStats& hbStats = _pings[target];
+ invariant(hbStats.getLastHeartbeatStartDate() != Date_t());
+ if (!hbResponse.isOK()) {
+ hbStats.miss();
+ } else {
+ hbStats.hit(networkRoundTripTime.count());
+ // Log diagnostics.
+ if (hbResponse.getValue().isStateDisagreement()) {
+ LOG(1) << target << " thinks that we are down because they cannot send us heartbeats.";
+ }
+ }
+
+ const bool isUnauthorized = (hbResponse.getStatus().code() == ErrorCodes::Unauthorized) ||
+ (hbResponse.getStatus().code() == ErrorCodes::AuthenticationFailed);
+
+ const Milliseconds alreadyElapsed = now - hbStats.getLastHeartbeatStartDate();
+ Date_t nextHeartbeatStartDate;
+ // determine next start time
+ if (_rsConfig.isInitialized() &&
+ (hbStats.getNumFailuresSinceLastStart() <= kMaxHeartbeatRetries) &&
+ (alreadyElapsed < _rsConfig.getHeartbeatTimeoutPeriodMillis())) {
+ if (isUnauthorized) {
nextHeartbeatStartDate = now + kHeartbeatInterval;
+ } else {
+ nextHeartbeatStartDate = now;
}
+ } else {
+ nextHeartbeatStartDate = now + kHeartbeatInterval;
+ }
- if (hbResponse.isOK() && hbResponse.getValue().hasConfig()) {
- const long long currentConfigVersion =
- _rsConfig.isInitialized() ? _rsConfig.getConfigVersion() : -2;
- const ReplicaSetConfig& newConfig = hbResponse.getValue().getConfig();
- if (newConfig.getConfigVersion() > currentConfigVersion) {
- HeartbeatResponseAction nextAction = HeartbeatResponseAction::makeReconfigAction();
- nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate);
- return nextAction;
- }
- else {
- // Could be we got the newer version before we got the response, or the
- // target erroneously sent us one, even through it isn't newer.
- if (newConfig.getConfigVersion() < currentConfigVersion) {
- LOG(1) << "Config version from heartbeat was older than ours.";
- }
- else {
- LOG(2) << "Config from heartbeat response was same as ours.";
- }
- if (logger::globalLogDomain()->shouldLog(
- MongoLogDefaultComponent_component,
- ::mongo::LogstreamBuilder::severityCast(2))) {
- LogstreamBuilder lsb = log();
- if (_rsConfig.isInitialized()) {
- lsb << "Current config: " << _rsConfig.toBSON() << "; ";
- }
- lsb << "Config in heartbeat: " << newConfig.toBSON();
- }
- }
- }
-
- // Check if the heartbeat target is in our config. If it isn't, there's nothing left to do,
- // so return early.
- if (!_rsConfig.isInitialized()) {
- HeartbeatResponseAction nextAction = HeartbeatResponseAction::makeNoAction();
+ if (hbResponse.isOK() && hbResponse.getValue().hasConfig()) {
+ const long long currentConfigVersion =
+ _rsConfig.isInitialized() ? _rsConfig.getConfigVersion() : -2;
+ const ReplicaSetConfig& newConfig = hbResponse.getValue().getConfig();
+ if (newConfig.getConfigVersion() > currentConfigVersion) {
+ HeartbeatResponseAction nextAction = HeartbeatResponseAction::makeReconfigAction();
nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate);
return nextAction;
- }
- const int memberIndex = _rsConfig.findMemberIndexByHostAndPort(target);
- if (memberIndex == -1) {
- LOG(1) << "Could not find " << target << " in current config so ignoring --"
- " current config: " << _rsConfig.toBSON();
- HeartbeatResponseAction nextAction = HeartbeatResponseAction::makeNoAction();
- nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate);
- return nextAction;
- }
- invariant(memberIndex != _selfIndex);
-
- MemberHeartbeatData& hbData = _hbdata[memberIndex];
- const MemberConfig member = _rsConfig.getMemberAt(memberIndex);
- if (!hbResponse.isOK()) {
- if (isUnauthorized) {
- LOG(1) << "setAuthIssue: heartbeat response failed due to authentication"
- " issue for member _id:" << member.getId();
- hbData.setAuthIssue(now);
- }
- else if (hbStats.getNumFailuresSinceLastStart() > kMaxHeartbeatRetries ||
- alreadyElapsed >= _rsConfig.getHeartbeatTimeoutPeriodMillis()) {
-
- LOG(1) << "setDownValues: heartbeat response failed for member _id:"
- << member.getId() << ", msg: "
- << hbResponse.getStatus().reason();
-
- hbData.setDownValues(now, hbResponse.getStatus().reason());
+ } else {
+ // Could be we got the newer version before we got the response, or the
+ // target erroneously sent us one, even through it isn't newer.
+ if (newConfig.getConfigVersion() < currentConfigVersion) {
+ LOG(1) << "Config version from heartbeat was older than ours.";
+ } else {
+ LOG(2) << "Config from heartbeat response was same as ours.";
}
- else {
- LOG(3) << "Bad heartbeat response from " << target <<
- "; trying again; Retries left: " <<
- (kMaxHeartbeatRetries - hbStats.getNumFailuresSinceLastStart()) <<
- "; " << alreadyElapsed.count() << "ms have already elapsed";
+ if (logger::globalLogDomain()->shouldLog(MongoLogDefaultComponent_component,
+ ::mongo::LogstreamBuilder::severityCast(2))) {
+ LogstreamBuilder lsb = log();
+ if (_rsConfig.isInitialized()) {
+ lsb << "Current config: " << _rsConfig.toBSON() << "; ";
+ }
+ lsb << "Config in heartbeat: " << newConfig.toBSON();
}
}
- else {
- ReplSetHeartbeatResponse hbr = hbResponse.getValue();
- LOG(3) << "setUpValues: heartbeat response good for member _id:"
- << member.getId() << ", msg: "
- << hbr.getHbMsg();
- hbData.setUpValues(now, member.getHostAndPort(), hbr);
- }
- HeartbeatResponseAction nextAction = _updateHeartbeatDataImpl(
- memberIndex,
- originalState,
- now,
- myLastOpApplied);
+ }
+ // Check if the heartbeat target is in our config. If it isn't, there's nothing left to do,
+ // so return early.
+ if (!_rsConfig.isInitialized()) {
+ HeartbeatResponseAction nextAction = HeartbeatResponseAction::makeNoAction();
nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate);
return nextAction;
}
+ const int memberIndex = _rsConfig.findMemberIndexByHostAndPort(target);
+ if (memberIndex == -1) {
+ LOG(1) << "Could not find " << target << " in current config so ignoring --"
+ " current config: " << _rsConfig.toBSON();
+ HeartbeatResponseAction nextAction = HeartbeatResponseAction::makeNoAction();
+ nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate);
+ return nextAction;
+ }
+ invariant(memberIndex != _selfIndex);
+
+ MemberHeartbeatData& hbData = _hbdata[memberIndex];
+ const MemberConfig member = _rsConfig.getMemberAt(memberIndex);
+ if (!hbResponse.isOK()) {
+ if (isUnauthorized) {
+ LOG(1) << "setAuthIssue: heartbeat response failed due to authentication"
+ " issue for member _id:" << member.getId();
+ hbData.setAuthIssue(now);
+ } else if (hbStats.getNumFailuresSinceLastStart() > kMaxHeartbeatRetries ||
+ alreadyElapsed >= _rsConfig.getHeartbeatTimeoutPeriodMillis()) {
+ LOG(1) << "setDownValues: heartbeat response failed for member _id:" << member.getId()
+ << ", msg: " << hbResponse.getStatus().reason();
+
+ hbData.setDownValues(now, hbResponse.getStatus().reason());
+ } else {
+ LOG(3) << "Bad heartbeat response from " << target << "; trying again; Retries left: "
+ << (kMaxHeartbeatRetries - hbStats.getNumFailuresSinceLastStart()) << "; "
+ << alreadyElapsed.count() << "ms have already elapsed";
+ }
+ } else {
+ ReplSetHeartbeatResponse hbr = hbResponse.getValue();
+ LOG(3) << "setUpValues: heartbeat response good for member _id:" << member.getId()
+ << ", msg: " << hbr.getHbMsg();
+ hbData.setUpValues(now, member.getHostAndPort(), hbr);
+ }
+ HeartbeatResponseAction nextAction =
+ _updateHeartbeatDataImpl(memberIndex, originalState, now, myLastOpApplied);
+
+ nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate);
+ return nextAction;
+}
+
+HeartbeatResponseAction TopologyCoordinatorImpl::_updateHeartbeatDataImpl(
+ int updatedConfigIndex,
+ const MemberState& originalState,
+ Date_t now,
+ const OpTime& lastOpApplied) {
+ // This method has two interrelated responsibilities, performed in two phases.
+ //
+ // First, it updates the local notion of which remote node, if any is primary. In the
+ // process, it may request a remote primary to step down because there is a higher priority
+ // node waiting, or because the local node thinks it is primary and that it has a more
+ // recent electionTime. It may instead decide that the local node should step down itself,
+ // because a remote has a more recent election time.
+ //
+ // Second, if there is no remote primary, and the local node is not primary, it considers
+ // whether or not to stand for election.
+ invariant(updatedConfigIndex != _selfIndex);
+
+ // We are missing from the config, so do not participate in primary maintenance or election.
+ if (_selfIndex == -1) {
+ return HeartbeatResponseAction::makeNoAction();
+ }
+
+ ////////////////////
+ // Phase 1
+ ////////////////////
+
+ // If we believe the node whose data was just updated is primary, confirm that
+ // the updated data supports that notion. If not, erase our notion of who is primary.
+ if (updatedConfigIndex == _currentPrimaryIndex) {
+ const MemberHeartbeatData& updatedHBData = _hbdata[updatedConfigIndex];
+ if (!updatedHBData.up() || !updatedHBData.getState().primary()) {
+ _currentPrimaryIndex = -1;
+ }
+ }
+
+ // If the current primary is not highest priority and up to date (within 10s),
+ // have them/me stepdown.
+ if (_currentPrimaryIndex != -1) {
+ // check if we should ask the primary (possibly ourselves) to step down
+ const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied);
+ if (highestPriorityIndex != -1) {
+ const MemberConfig& currentPrimaryMember = _rsConfig.getMemberAt(_currentPrimaryIndex);
+ const MemberConfig& highestPriorityMember = _rsConfig.getMemberAt(highestPriorityIndex);
+ const OpTime highestPriorityMemberOptime = highestPriorityIndex == _selfIndex
+ ? lastOpApplied
+ : _hbdata[highestPriorityIndex].getOpTime();
- HeartbeatResponseAction TopologyCoordinatorImpl::_updateHeartbeatDataImpl(
- int updatedConfigIndex,
- const MemberState& originalState,
- Date_t now,
- const OpTime& lastOpApplied) {
-
- // This method has two interrelated responsibilities, performed in two phases.
- //
- // First, it updates the local notion of which remote node, if any is primary. In the
- // process, it may request a remote primary to step down because there is a higher priority
- // node waiting, or because the local node thinks it is primary and that it has a more
- // recent electionTime. It may instead decide that the local node should step down itself,
- // because a remote has a more recent election time.
- //
- // Second, if there is no remote primary, and the local node is not primary, it considers
- // whether or not to stand for election.
- invariant(updatedConfigIndex != _selfIndex);
-
- // We are missing from the config, so do not participate in primary maintenance or election.
- if (_selfIndex == -1) {
- return HeartbeatResponseAction::makeNoAction();
- }
-
- ////////////////////
- // Phase 1
- ////////////////////
-
- // If we believe the node whose data was just updated is primary, confirm that
- // the updated data supports that notion. If not, erase our notion of who is primary.
- if (updatedConfigIndex == _currentPrimaryIndex) {
- const MemberHeartbeatData& updatedHBData = _hbdata[updatedConfigIndex];
- if (!updatedHBData.up() || !updatedHBData.getState().primary()) {
- _currentPrimaryIndex = -1;
- }
- }
+ if ((highestPriorityMember.getPriority() > currentPrimaryMember.getPriority()) &&
+ _isOpTimeCloseEnoughToLatestToElect(highestPriorityMemberOptime, lastOpApplied)) {
+ const OpTime latestOpTime = _latestKnownOpTime(lastOpApplied);
- // If the current primary is not highest priority and up to date (within 10s),
- // have them/me stepdown.
- if (_currentPrimaryIndex != -1) {
- // check if we should ask the primary (possibly ourselves) to step down
- const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied);
- if (highestPriorityIndex != -1) {
- const MemberConfig& currentPrimaryMember =
- _rsConfig.getMemberAt(_currentPrimaryIndex);
- const MemberConfig& highestPriorityMember =
- _rsConfig.getMemberAt(highestPriorityIndex);
- const OpTime highestPriorityMemberOptime = highestPriorityIndex == _selfIndex ?
- lastOpApplied : _hbdata[highestPriorityIndex].getOpTime();
-
- if ((highestPriorityMember.getPriority() > currentPrimaryMember.getPriority()) &&
- _isOpTimeCloseEnoughToLatestToElect(highestPriorityMemberOptime,
- lastOpApplied)) {
- const OpTime latestOpTime = _latestKnownOpTime(lastOpApplied);
-
- if (_iAmPrimary()) {
- if (_stepDownPending) {
- return HeartbeatResponseAction::makeNoAction();
- }
- _stepDownPending = true;
- log() << "Stepping down self (priority "
- << currentPrimaryMember.getPriority() << ") because "
- << highestPriorityMember.getHostAndPort() << " has higher priority "
- << highestPriorityMember.getPriority() << " and is only "
- << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs())
- << " seconds behind me";
- const Date_t until = now + VoteLease::leaseTime + kHeartbeatInterval;
- if (_electionSleepUntil < until) {
- _electionSleepUntil = until;
- }
- return HeartbeatResponseAction::makeStepDownSelfAction(_selfIndex);
+ if (_iAmPrimary()) {
+ if (_stepDownPending) {
+ return HeartbeatResponseAction::makeNoAction();
}
- else if ((highestPriorityIndex == _selfIndex) &&
- (_electionSleepUntil <= now)) {
- // If this node is the highest priority node, and it is not in
- // an inter-election sleep period, ask the current primary to step down.
- // This is an optimization, because the remote primary will almost certainly
- // notice this node's electability promptly, via its own heartbeat process.
- log() << "Requesting that " << currentPrimaryMember.getHostAndPort()
- << " (priority " << currentPrimaryMember.getPriority()
- << ") step down because I have higher priority "
- << highestPriorityMember.getPriority() << " and am only "
- << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs())
- << " seconds behind it";
- int primaryIndex = _currentPrimaryIndex;
- _currentPrimaryIndex = -1;
- return HeartbeatResponseAction::makeStepDownRemoteAction(primaryIndex);
+ _stepDownPending = true;
+ log() << "Stepping down self (priority " << currentPrimaryMember.getPriority()
+ << ") because " << highestPriorityMember.getHostAndPort()
+ << " has higher priority " << highestPriorityMember.getPriority()
+ << " and is only "
+ << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs())
+ << " seconds behind me";
+ const Date_t until = now + VoteLease::leaseTime + kHeartbeatInterval;
+ if (_electionSleepUntil < until) {
+ _electionSleepUntil = until;
}
+ return HeartbeatResponseAction::makeStepDownSelfAction(_selfIndex);
+ } else if ((highestPriorityIndex == _selfIndex) && (_electionSleepUntil <= now)) {
+ // If this node is the highest priority node, and it is not in
+ // an inter-election sleep period, ask the current primary to step down.
+ // This is an optimization, because the remote primary will almost certainly
+ // notice this node's electability promptly, via its own heartbeat process.
+ log() << "Requesting that " << currentPrimaryMember.getHostAndPort()
+ << " (priority " << currentPrimaryMember.getPriority()
+ << ") step down because I have higher priority "
+ << highestPriorityMember.getPriority() << " and am only "
+ << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs())
+ << " seconds behind it";
+ int primaryIndex = _currentPrimaryIndex;
+ _currentPrimaryIndex = -1;
+ return HeartbeatResponseAction::makeStepDownRemoteAction(primaryIndex);
}
}
}
+ }
- // Scan the member list's heartbeat data for who is primary, and update
- // _currentPrimaryIndex and _role, or request a remote to step down, as necessary.
- {
- int remotePrimaryIndex = -1;
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- if (itIndex == _selfIndex) {
- continue;
- }
-
- if( it->getState().primary() && it->up() ) {
- if (remotePrimaryIndex != -1) {
- // two other nodes think they are primary (asynchronously polled)
- // -- wait for things to settle down.
- warning() << "two remote primaries (transiently)";
- return HeartbeatResponseAction::makeNoAction();
- }
- remotePrimaryIndex = itIndex;
- }
+ // Scan the member list's heartbeat data for who is primary, and update
+ // _currentPrimaryIndex and _role, or request a remote to step down, as necessary.
+ {
+ int remotePrimaryIndex = -1;
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
+ it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ if (itIndex == _selfIndex) {
+ continue;
}
- if (remotePrimaryIndex != -1) {
- // If it's the same as last time, don't do anything further.
- if (_currentPrimaryIndex == remotePrimaryIndex) {
+ if (it->getState().primary() && it->up()) {
+ if (remotePrimaryIndex != -1) {
+ // two other nodes think they are primary (asynchronously polled)
+ // -- wait for things to settle down.
+ warning() << "two remote primaries (transiently)";
return HeartbeatResponseAction::makeNoAction();
}
- // Clear last heartbeat message on ourselves (why?)
- setMyHeartbeatMessage(now, "");
-
- // If we are also primary, this is a problem. Determine who should step down.
- if (_iAmPrimary()) {
- Timestamp remoteElectionTime = _hbdata[remotePrimaryIndex].getElectionTime();
- log() << "another primary seen with election time "
- << remoteElectionTime << " my election time is " << _electionTime;
-
- // Step down whomever has the older election time.
- if (remoteElectionTime > _electionTime) {
- if (_stepDownPending) {
- return HeartbeatResponseAction::makeNoAction();
- }
- _stepDownPending = true;
- log() << "stepping down; another primary was elected more recently";
- return HeartbeatResponseAction::makeStepDownSelfAction(_selfIndex);
- }
- else {
- log() << "another PRIMARY detected and it should step down"
- " since it was elected earlier than me";
- return HeartbeatResponseAction::makeStepDownRemoteAction(
- remotePrimaryIndex);
- }
- }
-
- _currentPrimaryIndex = remotePrimaryIndex;
- return HeartbeatResponseAction::makeNoAction();
+ remotePrimaryIndex = itIndex;
}
}
- ////////////////////
- // Phase 2
- ////////////////////
+ if (remotePrimaryIndex != -1) {
+ // If it's the same as last time, don't do anything further.
+ if (_currentPrimaryIndex == remotePrimaryIndex) {
+ return HeartbeatResponseAction::makeNoAction();
+ }
+ // Clear last heartbeat message on ourselves (why?)
+ setMyHeartbeatMessage(now, "");
- // We do not believe any remote to be primary.
+ // If we are also primary, this is a problem. Determine who should step down.
+ if (_iAmPrimary()) {
+ Timestamp remoteElectionTime = _hbdata[remotePrimaryIndex].getElectionTime();
+ log() << "another primary seen with election time " << remoteElectionTime
+ << " my election time is " << _electionTime;
- // If we are primary, check if we can still see majority of the set;
- // stepdown if we can't.
- if (_iAmPrimary()) {
- if (CannotSeeMajority & _getMyUnelectableReason(now, lastOpApplied)) {
- if (_stepDownPending) {
- return HeartbeatResponseAction::makeNoAction();
+ // Step down whomever has the older election time.
+ if (remoteElectionTime > _electionTime) {
+ if (_stepDownPending) {
+ return HeartbeatResponseAction::makeNoAction();
+ }
+ _stepDownPending = true;
+ log() << "stepping down; another primary was elected more recently";
+ return HeartbeatResponseAction::makeStepDownSelfAction(_selfIndex);
+ } else {
+ log() << "another PRIMARY detected and it should step down"
+ " since it was elected earlier than me";
+ return HeartbeatResponseAction::makeStepDownRemoteAction(remotePrimaryIndex);
}
- _stepDownPending = true;
- log() << "can't see a majority of the set, relinquishing primary";
- return HeartbeatResponseAction::makeStepDownSelfAction(_selfIndex);
}
- LOG(2) << "Choosing to remain primary";
+ _currentPrimaryIndex = remotePrimaryIndex;
return HeartbeatResponseAction::makeNoAction();
}
+ }
- fassert(18505, _currentPrimaryIndex == -1);
-
- const MemberState currentState = getMemberState();
- if (originalState.recovering() && currentState.secondary()) {
- // We just transitioned from RECOVERING to SECONDARY, this can only happen if we
- // received a heartbeat with an auth error when previously all the heartbeats we'd
- // received had auth errors. In this case, don't return makeElectAction() because
- // that could cause the election to start before the ReplicationCoordinator has updated
- // its notion of the member state to SECONDARY. Instead return noAction so that the
- // ReplicationCooridinator knows to update its tracking of the member state off of the
- // TopologyCoordinator, and leave starting the election until the next heartbeat comes
- // back.
- return HeartbeatResponseAction::makeNoAction();
- }
+ ////////////////////
+ // Phase 2
+ ////////////////////
- // At this point, there is no primary anywhere. Check to see if we should become a
- // candidate.
- if (!checkShouldStandForElection(now, lastOpApplied)) {
- return HeartbeatResponseAction::makeNoAction();
+ // We do not believe any remote to be primary.
+
+ // If we are primary, check if we can still see majority of the set;
+ // stepdown if we can't.
+ if (_iAmPrimary()) {
+ if (CannotSeeMajority & _getMyUnelectableReason(now, lastOpApplied)) {
+ if (_stepDownPending) {
+ return HeartbeatResponseAction::makeNoAction();
+ }
+ _stepDownPending = true;
+ log() << "can't see a majority of the set, relinquishing primary";
+ return HeartbeatResponseAction::makeStepDownSelfAction(_selfIndex);
}
- return HeartbeatResponseAction::makeElectAction();
+
+ LOG(2) << "Choosing to remain primary";
+ return HeartbeatResponseAction::makeNoAction();
}
- bool TopologyCoordinatorImpl::checkShouldStandForElection(
- Date_t now, const OpTime& lastOpApplied) {
- if (_currentPrimaryIndex != -1) {
- return false;
- }
- invariant (_role != Role::leader);
+ fassert(18505, _currentPrimaryIndex == -1);
- if (_role == Role::candidate) {
- LOG(2) << "Not standing for election again; already candidate";
- return false;
- }
+ const MemberState currentState = getMemberState();
+ if (originalState.recovering() && currentState.secondary()) {
+ // We just transitioned from RECOVERING to SECONDARY, this can only happen if we
+ // received a heartbeat with an auth error when previously all the heartbeats we'd
+ // received had auth errors. In this case, don't return makeElectAction() because
+ // that could cause the election to start before the ReplicationCoordinator has updated
+ // its notion of the member state to SECONDARY. Instead return noAction so that the
+ // ReplicationCooridinator knows to update its tracking of the member state off of the
+ // TopologyCoordinator, and leave starting the election until the next heartbeat comes
+ // back.
+ return HeartbeatResponseAction::makeNoAction();
+ }
- const UnelectableReasonMask unelectableReason = _getMyUnelectableReason(now, lastOpApplied);
- if (NotCloseEnoughToLatestOptime & unelectableReason) {
- LOG(2) << "Not standing for election because " <<
- _getUnelectableReasonString(unelectableReason) << "; my last optime is " <<
- lastOpApplied << " and the newest is " << _latestKnownOpTime(lastOpApplied);
- return false;
- }
- if (unelectableReason) {
- LOG(2) << "Not standing for election because " <<
- _getUnelectableReasonString(unelectableReason);
- return false;
- }
- if (_electionSleepUntil > now) {
- LOG(2) << "Not standing for election before " <<
- dateToISOStringLocal(_electionSleepUntil) << " because I stood too recently";
- return false;
- }
- // All checks passed, become a candidate and start election proceedings.
- _role = Role::candidate;
- return true;
+ // At this point, there is no primary anywhere. Check to see if we should become a
+ // candidate.
+ if (!checkShouldStandForElection(now, lastOpApplied)) {
+ return HeartbeatResponseAction::makeNoAction();
}
+ return HeartbeatResponseAction::makeElectAction();
+}
- bool TopologyCoordinatorImpl::_aMajoritySeemsToBeUp() const {
- int vUp = 0;
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- if (itIndex == _selfIndex || it->up()) {
- vUp += _rsConfig.getMemberAt(itIndex).getNumVotes();
- }
- }
+bool TopologyCoordinatorImpl::checkShouldStandForElection(Date_t now, const OpTime& lastOpApplied) {
+ if (_currentPrimaryIndex != -1) {
+ return false;
+ }
+ invariant(_role != Role::leader);
- return vUp * 2 > _rsConfig.getTotalVotingMembers();
+ if (_role == Role::candidate) {
+ LOG(2) << "Not standing for election again; already candidate";
+ return false;
}
- bool TopologyCoordinatorImpl::_isOpTimeCloseEnoughToLatestToElect(
- const OpTime& otherOpTime, const OpTime& ourLastOpApplied) const {
- const OpTime latestKnownOpTime = _latestKnownOpTime(ourLastOpApplied);
- // Use addition instead of subtraction to avoid overflow.
- return otherOpTime.getSecs() + 10 >= (latestKnownOpTime.getSecs());
+ const UnelectableReasonMask unelectableReason = _getMyUnelectableReason(now, lastOpApplied);
+ if (NotCloseEnoughToLatestOptime & unelectableReason) {
+ LOG(2) << "Not standing for election because "
+ << _getUnelectableReasonString(unelectableReason) << "; my last optime is "
+ << lastOpApplied << " and the newest is " << _latestKnownOpTime(lastOpApplied);
+ return false;
+ }
+ if (unelectableReason) {
+ LOG(2) << "Not standing for election because "
+ << _getUnelectableReasonString(unelectableReason);
+ return false;
}
+ if (_electionSleepUntil > now) {
+ LOG(2) << "Not standing for election before " << dateToISOStringLocal(_electionSleepUntil)
+ << " because I stood too recently";
+ return false;
+ }
+ // All checks passed, become a candidate and start election proceedings.
+ _role = Role::candidate;
+ return true;
+}
- bool TopologyCoordinatorImpl::_iAmPrimary() const {
- if (_role == Role::leader) {
- invariant(_currentPrimaryIndex == _selfIndex);
- return true;
+bool TopologyCoordinatorImpl::_aMajoritySeemsToBeUp() const {
+ int vUp = 0;
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ if (itIndex == _selfIndex || it->up()) {
+ vUp += _rsConfig.getMemberAt(itIndex).getNumVotes();
}
- return false;
}
- OpTime TopologyCoordinatorImpl::_latestKnownOpTime(const OpTime& ourLastOpApplied) const {
- OpTime latest = ourLastOpApplied;
+ return vUp * 2 > _rsConfig.getTotalVotingMembers();
+}
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
+bool TopologyCoordinatorImpl::_isOpTimeCloseEnoughToLatestToElect(
+ const OpTime& otherOpTime, const OpTime& ourLastOpApplied) const {
+ const OpTime latestKnownOpTime = _latestKnownOpTime(ourLastOpApplied);
+ // Use addition instead of subtraction to avoid overflow.
+ return otherOpTime.getSecs() + 10 >= (latestKnownOpTime.getSecs());
+}
- if (indexOfIterator(_hbdata, it) == _selfIndex) {
- continue;
- }
- if (!it->up()) {
- continue;
- }
+bool TopologyCoordinatorImpl::_iAmPrimary() const {
+ if (_role == Role::leader) {
+ invariant(_currentPrimaryIndex == _selfIndex);
+ return true;
+ }
+ return false;
+}
- OpTime optime = it->getOpTime();
+OpTime TopologyCoordinatorImpl::_latestKnownOpTime(const OpTime& ourLastOpApplied) const {
+ OpTime latest = ourLastOpApplied;
- if (optime > latest) {
- latest = optime;
- }
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end();
+ ++it) {
+ if (indexOfIterator(_hbdata, it) == _selfIndex) {
+ continue;
}
+ if (!it->up()) {
+ continue;
+ }
+
+ OpTime optime = it->getOpTime();
- return latest;
+ if (optime > latest) {
+ latest = optime;
+ }
}
- bool TopologyCoordinatorImpl::_isMemberHigherPriority(int memberOneIndex,
- int memberTwoIndex) const {
- if (memberOneIndex == -1)
- return false;
+ return latest;
+}
- if (memberTwoIndex == -1)
- return true;
+bool TopologyCoordinatorImpl::_isMemberHigherPriority(int memberOneIndex,
+ int memberTwoIndex) const {
+ if (memberOneIndex == -1)
+ return false;
- return _rsConfig.getMemberAt(memberOneIndex).getPriority() >
- _rsConfig.getMemberAt(memberTwoIndex).getPriority();
- }
+ if (memberTwoIndex == -1)
+ return true;
- int TopologyCoordinatorImpl::_getHighestPriorityElectableIndex(
- Date_t now, const OpTime& lastOpApplied) const {
- int maxIndex = -1;
- for (int currentIndex = 0; currentIndex < _rsConfig.getNumMembers(); currentIndex++) {
- UnelectableReasonMask reason = currentIndex == _selfIndex ?
- _getMyUnelectableReason(now, lastOpApplied) :
- _getUnelectableReason(currentIndex, lastOpApplied);
- if (None == reason && _isMemberHigherPriority(currentIndex, maxIndex)) {
- maxIndex = currentIndex;
- }
- }
+ return _rsConfig.getMemberAt(memberOneIndex).getPriority() >
+ _rsConfig.getMemberAt(memberTwoIndex).getPriority();
+}
- return maxIndex;
+int TopologyCoordinatorImpl::_getHighestPriorityElectableIndex(Date_t now,
+ const OpTime& lastOpApplied) const {
+ int maxIndex = -1;
+ for (int currentIndex = 0; currentIndex < _rsConfig.getNumMembers(); currentIndex++) {
+ UnelectableReasonMask reason = currentIndex == _selfIndex
+ ? _getMyUnelectableReason(now, lastOpApplied)
+ : _getUnelectableReason(currentIndex, lastOpApplied);
+ if (None == reason && _isMemberHigherPriority(currentIndex, maxIndex)) {
+ maxIndex = currentIndex;
+ }
}
- void TopologyCoordinatorImpl::prepareForStepDown() {
- _stepDownPending = true;
- }
+ return maxIndex;
+}
- void TopologyCoordinatorImpl::changeMemberState_forTest(const MemberState& newMemberState,
- const Timestamp& electionTime) {
- invariant(_selfIndex != -1);
- if (newMemberState == getMemberState())
- return;
- switch(newMemberState.s) {
+void TopologyCoordinatorImpl::prepareForStepDown() {
+ _stepDownPending = true;
+}
+
+void TopologyCoordinatorImpl::changeMemberState_forTest(const MemberState& newMemberState,
+ const Timestamp& electionTime) {
+ invariant(_selfIndex != -1);
+ if (newMemberState == getMemberState())
+ return;
+ switch (newMemberState.s) {
case MemberState::RS_PRIMARY:
_role = Role::candidate;
processWinElection(OID(), electionTime);
@@ -1374,728 +1298,692 @@ namespace {
}
break;
case MemberState::RS_STARTUP:
- updateConfig(
- ReplicaSetConfig(),
- -1,
- Date_t(),
- OpTime());
+ updateConfig(ReplicaSetConfig(), -1, Date_t(), OpTime());
break;
default:
severe() << "Cannot switch to state " << newMemberState;
invariant(false);
- }
- if (getMemberState() != newMemberState.s) {
- severe() << "Expected to enter state " << newMemberState << " but am now in " <<
- getMemberState();
- invariant(false);
- }
- log() << newMemberState;
}
-
- void TopologyCoordinatorImpl::_setCurrentPrimaryForTest(int primaryIndex) {
- if (primaryIndex == _selfIndex) {
- changeMemberState_forTest(MemberState::RS_PRIMARY);
- }
- else {
- if (_iAmPrimary()) {
- changeMemberState_forTest(MemberState::RS_SECONDARY);
- }
- if (primaryIndex != -1) {
- ReplSetHeartbeatResponse hbResponse;
- hbResponse.setState(MemberState::RS_PRIMARY);
- hbResponse.setElectionTime(Timestamp());
- hbResponse.setOpTime(_hbdata[primaryIndex].getOpTime());
- hbResponse.setSyncingTo(HostAndPort());
- hbResponse.setHbMsg("");
- _hbdata[primaryIndex].setUpValues(
- _hbdata[primaryIndex].getLastHeartbeat(),
- _rsConfig.getMemberAt(primaryIndex).getHostAndPort(),
- hbResponse);
- }
- _currentPrimaryIndex = primaryIndex;
- }
+ if (getMemberState() != newMemberState.s) {
+ severe() << "Expected to enter state " << newMemberState << " but am now in "
+ << getMemberState();
+ invariant(false);
}
+ log() << newMemberState;
+}
- const MemberConfig* TopologyCoordinatorImpl::_currentPrimaryMember() const {
- if (_currentPrimaryIndex == -1)
- return NULL;
-
- return &(_rsConfig.getMemberAt(_currentPrimaryIndex));
- }
-
- void TopologyCoordinatorImpl::prepareStatusResponse(
- const ReplicationExecutor::CallbackArgs& data,
- Date_t now,
- unsigned selfUptime,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result) {
- if (data.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
- return;
- }
-
- // output for each member
- vector<BSONObj> membersOut;
- const MemberState myState = getMemberState();
+void TopologyCoordinatorImpl::_setCurrentPrimaryForTest(int primaryIndex) {
+ if (primaryIndex == _selfIndex) {
+ changeMemberState_forTest(MemberState::RS_PRIMARY);
+ } else {
+ if (_iAmPrimary()) {
+ changeMemberState_forTest(MemberState::RS_SECONDARY);
+ }
+ if (primaryIndex != -1) {
+ ReplSetHeartbeatResponse hbResponse;
+ hbResponse.setState(MemberState::RS_PRIMARY);
+ hbResponse.setElectionTime(Timestamp());
+ hbResponse.setOpTime(_hbdata[primaryIndex].getOpTime());
+ hbResponse.setSyncingTo(HostAndPort());
+ hbResponse.setHbMsg("");
+ _hbdata[primaryIndex].setUpValues(_hbdata[primaryIndex].getLastHeartbeat(),
+ _rsConfig.getMemberAt(primaryIndex).getHostAndPort(),
+ hbResponse);
+ }
+ _currentPrimaryIndex = primaryIndex;
+ }
+}
+
+const MemberConfig* TopologyCoordinatorImpl::_currentPrimaryMember() const {
+ if (_currentPrimaryIndex == -1)
+ return NULL;
+
+ return &(_rsConfig.getMemberAt(_currentPrimaryIndex));
+}
+
+void TopologyCoordinatorImpl::prepareStatusResponse(const ReplicationExecutor::CallbackArgs& data,
+ Date_t now,
+ unsigned selfUptime,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (data.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
+ return;
+ }
+
+ // output for each member
+ vector<BSONObj> membersOut;
+ const MemberState myState = getMemberState();
+
+ if (_selfIndex == -1) {
+ // We're REMOVED or have an invalid config
+ response->append("state", static_cast<int>(myState.s));
+ response->append("stateStr", myState.toString());
+ response->append("uptime", selfUptime);
+
+ BSONObjBuilder opTime(response->subobjStart("optime"));
+ opTime.append("ts", lastOpApplied.getTimestamp());
+ opTime.append("term", lastOpApplied.getTerm());
+ opTime.done();
+
+ response->appendDate("optimeDate",
+ Date_t::fromDurationSinceEpoch(Seconds(lastOpApplied.getSecs())));
+ if (_maintenanceModeCalls) {
+ response->append("maintenanceMode", _maintenanceModeCalls);
+ }
+ std::string s = _getHbmsg(now);
+ if (!s.empty())
+ response->append("infoMessage", s);
+ *result = Status(ErrorCodes::InvalidReplicaSetConfig,
+ "Our replica set config is invalid or we are not a member of it");
+ return;
+ }
+
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ if (itIndex == _selfIndex) {
+ // add self
+ BSONObjBuilder bb;
+ bb.append("_id", _selfConfig().getId());
+ bb.append("name", _selfConfig().getHostAndPort().toString());
+ bb.append("health", 1.0);
+ bb.append("state", static_cast<int>(myState.s));
+ bb.append("stateStr", myState.toString());
+ bb.append("uptime", selfUptime);
+ if (!_selfConfig().isArbiter()) {
+ BSONObjBuilder opTime(bb.subobjStart("optime"));
+ opTime.append("ts", lastOpApplied.getTimestamp());
+ opTime.append("term", lastOpApplied.getTerm());
+ opTime.done();
+
+ bb.appendDate("optimeDate",
+ Date_t::fromDurationSinceEpoch(Seconds(lastOpApplied.getSecs())));
+ }
+
+ if (!_syncSource.empty() && !_iAmPrimary()) {
+ bb.append("syncingTo", _syncSource.toString());
+ }
- if (_selfIndex == -1) {
- // We're REMOVED or have an invalid config
- response->append("state", static_cast<int>(myState.s));
- response->append("stateStr", myState.toString());
- response->append("uptime", selfUptime);
-
- BSONObjBuilder opTime(response->subobjStart("optime"));
- opTime.append("ts", lastOpApplied.getTimestamp());
- opTime.append("term", lastOpApplied.getTerm());
- opTime.done();
-
- response->appendDate("optimeDate",
- Date_t::fromDurationSinceEpoch(Seconds(lastOpApplied.getSecs())));
if (_maintenanceModeCalls) {
- response->append("maintenanceMode", _maintenanceModeCalls);
+ bb.append("maintenanceMode", _maintenanceModeCalls);
}
- std::string s = _getHbmsg(now);
- if( !s.empty() )
- response->append("infoMessage", s);
- *result = Status(ErrorCodes::InvalidReplicaSetConfig,
- "Our replica set config is invalid or we are not a member of it");
- return;
- }
-
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- if (itIndex == _selfIndex) {
- // add self
- BSONObjBuilder bb;
- bb.append("_id", _selfConfig().getId());
- bb.append("name", _selfConfig().getHostAndPort().toString());
- bb.append("health", 1.0);
- bb.append("state", static_cast<int>(myState.s));
- bb.append("stateStr", myState.toString());
- bb.append("uptime", selfUptime);
- if (!_selfConfig().isArbiter()) {
- BSONObjBuilder opTime(bb.subobjStart("optime"));
- opTime.append("ts", lastOpApplied.getTimestamp());
- opTime.append("term", lastOpApplied.getTerm());
- opTime.done();
-
- bb.appendDate("optimeDate",
- Date_t::fromDurationSinceEpoch(Seconds(lastOpApplied.getSecs())));
- }
-
- if (!_syncSource.empty() && !_iAmPrimary()) {
- bb.append("syncingTo", _syncSource.toString());
- }
-
- if (_maintenanceModeCalls) {
- bb.append("maintenanceMode", _maintenanceModeCalls);
- }
-
- std::string s = _getHbmsg(now);
- if( !s.empty() )
- bb.append("infoMessage", s);
- if (myState.primary()) {
- bb.append("electionTime", _electionTime);
- bb.appendDate("electionDate",
- Date_t::fromDurationSinceEpoch(Seconds(_electionTime.getSecs())));
- }
- bb.appendIntOrLL("configVersion", _rsConfig.getConfigVersion());
- bb.append("self", true);
- membersOut.push_back(bb.obj());
+ std::string s = _getHbmsg(now);
+ if (!s.empty())
+ bb.append("infoMessage", s);
+
+ if (myState.primary()) {
+ bb.append("electionTime", _electionTime);
+ bb.appendDate("electionDate",
+ Date_t::fromDurationSinceEpoch(Seconds(_electionTime.getSecs())));
+ }
+ bb.appendIntOrLL("configVersion", _rsConfig.getConfigVersion());
+ bb.append("self", true);
+ membersOut.push_back(bb.obj());
+ } else {
+ // add non-self member
+ const MemberConfig& itConfig = _rsConfig.getMemberAt(itIndex);
+ BSONObjBuilder bb;
+ bb.append("_id", itConfig.getId());
+ bb.append("name", itConfig.getHostAndPort().toString());
+ double h = it->getHealth();
+ bb.append("health", h);
+ const MemberState state = it->getState();
+ bb.append("state", static_cast<int>(state.s));
+ if (h == 0) {
+ // if we can't connect the state info is from the past
+ // and could be confusing to show
+ bb.append("stateStr", "(not reachable/healthy)");
+ } else {
+ bb.append("stateStr", it->getState().toString());
+ }
+
+ const unsigned int uptime = static_cast<unsigned int>((
+ it->getUpSince() != Date_t() ? durationCount<Seconds>(now - it->getUpSince()) : 0));
+ bb.append("uptime", uptime);
+ if (!itConfig.isArbiter()) {
+ BSONObjBuilder opTime(bb.subobjStart("optime"));
+ opTime.append("ts", it->getOpTime().getTimestamp());
+ opTime.append("term", it->getOpTime().getTerm());
+ opTime.done();
+
+ bb.appendDate("optimeDate",
+ Date_t::fromDurationSinceEpoch(Seconds(it->getOpTime().getSecs())));
+ }
+ bb.appendDate("lastHeartbeat", it->getLastHeartbeat());
+ bb.appendDate("lastHeartbeatRecv", it->getLastHeartbeatRecv());
+ const int ping = _getPing(itConfig.getHostAndPort());
+ if (ping != -1) {
+ bb.append("pingMs", ping);
+ std::string s = it->getLastHeartbeatMsg();
+ if (!s.empty())
+ bb.append("lastHeartbeatMessage", s);
+ }
+ if (it->hasAuthIssue()) {
+ bb.append("authenticated", false);
+ }
+ const HostAndPort& syncSource = it->getSyncSource();
+ if (!syncSource.empty() && !state.primary()) {
+ bb.append("syncingTo", syncSource.toString());
}
- else {
- // add non-self member
- const MemberConfig& itConfig = _rsConfig.getMemberAt(itIndex);
- BSONObjBuilder bb;
- bb.append("_id", itConfig.getId());
- bb.append("name", itConfig.getHostAndPort().toString());
- double h = it->getHealth();
- bb.append("health", h);
- const MemberState state = it->getState();
- bb.append("state", static_cast<int>(state.s));
- if( h == 0 ) {
- // if we can't connect the state info is from the past
- // and could be confusing to show
- bb.append("stateStr", "(not reachable/healthy)");
- }
- else {
- bb.append("stateStr", it->getState().toString());
- }
-
- const unsigned int uptime = static_cast<unsigned int>(
- (it->getUpSince() != Date_t()?
- durationCount<Seconds>(now - it->getUpSince()) :
- 0));
- bb.append("uptime", uptime);
- if (!itConfig.isArbiter()) {
- BSONObjBuilder opTime(bb.subobjStart("optime"));
- opTime.append("ts", it->getOpTime().getTimestamp());
- opTime.append("term", it->getOpTime().getTerm());
- opTime.done();
-
- bb.appendDate("optimeDate",
- Date_t::fromDurationSinceEpoch(Seconds(it->getOpTime().getSecs())));
- }
- bb.appendDate("lastHeartbeat", it->getLastHeartbeat());
- bb.appendDate("lastHeartbeatRecv", it->getLastHeartbeatRecv());
- const int ping = _getPing(itConfig.getHostAndPort());
- if (ping != -1) {
- bb.append("pingMs", ping);
- std::string s = it->getLastHeartbeatMsg();
- if( !s.empty() )
- bb.append("lastHeartbeatMessage", s);
- }
- if (it->hasAuthIssue()) {
- bb.append("authenticated", false);
- }
- const HostAndPort& syncSource = it->getSyncSource();
- if (!syncSource.empty() && !state.primary()) {
- bb.append("syncingTo", syncSource.toString());
- }
- if (state == MemberState::RS_PRIMARY) {
- bb.append("electionTime", it->getElectionTime());
- bb.appendDate("electionDate",
- Date_t::fromDurationSinceEpoch(
- Seconds(it->getElectionTime().getSecs())));
- }
- bb.appendIntOrLL("configVersion", it->getConfigVersion());
- membersOut.push_back(bb.obj());
+ if (state == MemberState::RS_PRIMARY) {
+ bb.append("electionTime", it->getElectionTime());
+ bb.appendDate(
+ "electionDate",
+ Date_t::fromDurationSinceEpoch(Seconds(it->getElectionTime().getSecs())));
}
+ bb.appendIntOrLL("configVersion", it->getConfigVersion());
+ membersOut.push_back(bb.obj());
}
+ }
- // sort members bson
- sort(membersOut.begin(), membersOut.end());
-
- response->append("set",
- _rsConfig.isInitialized() ? _rsConfig.getReplSetName() : "");
- response->append("date", now);
- response->append("myState", myState.s);
+ // sort members bson
+ sort(membersOut.begin(), membersOut.end());
- // Add sync source info
- if (!_syncSource.empty() && !myState.primary() && !myState.removed()) {
- response->append("syncingTo", _syncSource.toString());
- }
+ response->append("set", _rsConfig.isInitialized() ? _rsConfig.getReplSetName() : "");
+ response->append("date", now);
+ response->append("myState", myState.s);
- response->append("members", membersOut);
- *result = Status::OK();
+ // Add sync source info
+ if (!_syncSource.empty() && !myState.primary() && !myState.removed()) {
+ response->append("syncingTo", _syncSource.toString());
}
- void TopologyCoordinatorImpl::fillIsMasterForReplSet(IsMasterResponse* response) {
+ response->append("members", membersOut);
+ *result = Status::OK();
+}
- const MemberState myState = getMemberState();
- if (!_rsConfig.isInitialized() || myState.removed()) {
- response->markAsNoConfig();
- return;
- }
-
- response->setReplSetName(_rsConfig.getReplSetName());
- response->setReplSetVersion(_rsConfig.getConfigVersion());
- response->setIsMaster(myState.primary());
- response->setIsSecondary(myState.secondary());
+void TopologyCoordinatorImpl::fillIsMasterForReplSet(IsMasterResponse* response) {
+ const MemberState myState = getMemberState();
+ if (!_rsConfig.isInitialized() || myState.removed()) {
+ response->markAsNoConfig();
+ return;
+ }
- {
- for (ReplicaSetConfig::MemberIterator it = _rsConfig.membersBegin();
- it != _rsConfig.membersEnd(); ++it) {
- if (it->isHidden() || it->getSlaveDelay() > Seconds{0}) {
- continue;
- }
+ response->setReplSetName(_rsConfig.getReplSetName());
+ response->setReplSetVersion(_rsConfig.getConfigVersion());
+ response->setIsMaster(myState.primary());
+ response->setIsSecondary(myState.secondary());
- if (it->isElectable()) {
- response->addHost(it->getHostAndPort());
- }
- else if (it->isArbiter()) {
- response->addArbiter(it->getHostAndPort());
- }
- else {
- response->addPassive(it->getHostAndPort());
- }
+ {
+ for (ReplicaSetConfig::MemberIterator it = _rsConfig.membersBegin();
+ it != _rsConfig.membersEnd();
+ ++it) {
+ if (it->isHidden() || it->getSlaveDelay() > Seconds{0}) {
+ continue;
}
- }
-
- const MemberConfig* curPrimary = _currentPrimaryMember();
- if (curPrimary) {
- response->setPrimary(curPrimary->getHostAndPort());
- }
- const MemberConfig& selfConfig = _rsConfig.getMemberAt(_selfIndex);
- if (selfConfig.isArbiter()) {
- response->setIsArbiterOnly(true);
- }
- else if (selfConfig.getPriority() == 0) {
- response->setIsPassive(true);
- }
- if (selfConfig.getSlaveDelay().count()) {
- response->setSlaveDelay(selfConfig.getSlaveDelay());
- }
- if (selfConfig.isHidden()) {
- response->setIsHidden(true);
- }
- if (!selfConfig.shouldBuildIndexes()) {
- response->setShouldBuildIndexes(false);
- }
- const ReplicaSetTagConfig tagConfig = _rsConfig.getTagConfig();
- if (selfConfig.hasTags(tagConfig)) {
- for (MemberConfig::TagIterator tag = selfConfig.tagsBegin();
- tag != selfConfig.tagsEnd(); ++tag) {
- std::string tagKey = tagConfig.getTagKey(*tag);
- if (tagKey[0] == '$') {
- // Filter out internal tags
- continue;
- }
- response->addTag(tagKey, tagConfig.getTagValue(*tag));
+ if (it->isElectable()) {
+ response->addHost(it->getHostAndPort());
+ } else if (it->isArbiter()) {
+ response->addArbiter(it->getHostAndPort());
+ } else {
+ response->addPassive(it->getHostAndPort());
}
}
- response->setMe(selfConfig.getHostAndPort());
- if (_iAmPrimary()) {
- response->setElectionId(_electionId);
- }
}
- void TopologyCoordinatorImpl::prepareFreezeResponse(
- Date_t now, int secs, BSONObjBuilder* response) {
-
- if (secs == 0) {
- _stepDownUntil = now;
- log() << "'unfreezing'";
- response->append("info", "unfreezing");
-
- if (_followerMode == MemberState::RS_SECONDARY &&
- _rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 &&
- _rsConfig.getMemberAt(_selfIndex).isElectable()) {
- // If we are a one-node replica set, we're the one member,
- // we're electable, and we are currently in followerMode SECONDARY,
- // we must transition to candidate now that our stepdown period
- // is no longer active, in leiu of heartbeats.
- _role = Role::candidate;
- }
- }
- else {
- if ( secs == 1 )
- response->append("warning", "you really want to freeze for only 1 second?");
+ const MemberConfig* curPrimary = _currentPrimaryMember();
+ if (curPrimary) {
+ response->setPrimary(curPrimary->getHostAndPort());
+ }
- if (!_iAmPrimary()) {
- _stepDownUntil = std::max(_stepDownUntil, now + Seconds(secs));
- log() << "'freezing' for " << secs << " seconds";
- }
- else {
- log() << "received freeze command but we are primary";
+ const MemberConfig& selfConfig = _rsConfig.getMemberAt(_selfIndex);
+ if (selfConfig.isArbiter()) {
+ response->setIsArbiterOnly(true);
+ } else if (selfConfig.getPriority() == 0) {
+ response->setIsPassive(true);
+ }
+ if (selfConfig.getSlaveDelay().count()) {
+ response->setSlaveDelay(selfConfig.getSlaveDelay());
+ }
+ if (selfConfig.isHidden()) {
+ response->setIsHidden(true);
+ }
+ if (!selfConfig.shouldBuildIndexes()) {
+ response->setShouldBuildIndexes(false);
+ }
+ const ReplicaSetTagConfig tagConfig = _rsConfig.getTagConfig();
+ if (selfConfig.hasTags(tagConfig)) {
+ for (MemberConfig::TagIterator tag = selfConfig.tagsBegin(); tag != selfConfig.tagsEnd();
+ ++tag) {
+ std::string tagKey = tagConfig.getTagKey(*tag);
+ if (tagKey[0] == '$') {
+ // Filter out internal tags
+ continue;
}
+ response->addTag(tagKey, tagConfig.getTagValue(*tag));
}
}
+ response->setMe(selfConfig.getHostAndPort());
+ if (_iAmPrimary()) {
+ response->setElectionId(_electionId);
+ }
+}
- bool TopologyCoordinatorImpl::becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(Date_t now) {
- if (_stepDownUntil > now) {
- return false;
- }
+void TopologyCoordinatorImpl::prepareFreezeResponse(Date_t now,
+ int secs,
+ BSONObjBuilder* response) {
+ if (secs == 0) {
+ _stepDownUntil = now;
+ log() << "'unfreezing'";
+ response->append("info", "unfreezing");
- if (_followerMode == MemberState::RS_SECONDARY &&
- _rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 &&
- _rsConfig.getMemberAt(_selfIndex).isElectable()) {
- // If the new config describes a one-node replica set, we're the one member,
+ if (_followerMode == MemberState::RS_SECONDARY && _rsConfig.getNumMembers() == 1 &&
+ _selfIndex == 0 && _rsConfig.getMemberAt(_selfIndex).isElectable()) {
+ // If we are a one-node replica set, we're the one member,
// we're electable, and we are currently in followerMode SECONDARY,
- // we must transition to candidate, in leiu of heartbeats.
+ // we must transition to candidate now that our stepdown period
+ // is no longer active, in leiu of heartbeats.
_role = Role::candidate;
- return true;
}
- return false;
- }
+ } else {
+ if (secs == 1)
+ response->append("warning", "you really want to freeze for only 1 second?");
- void TopologyCoordinatorImpl::setElectionSleepUntil(Date_t newTime) {
- if (_electionSleepUntil < newTime) {
- _electionSleepUntil = newTime;
+ if (!_iAmPrimary()) {
+ _stepDownUntil = std::max(_stepDownUntil, now + Seconds(secs));
+ log() << "'freezing' for " << secs << " seconds";
+ } else {
+ log() << "received freeze command but we are primary";
}
}
+}
- Timestamp TopologyCoordinatorImpl::getElectionTime() const {
- return _electionTime;
- }
-
- OID TopologyCoordinatorImpl::getElectionId() const {
- return _electionId;
- }
-
- int TopologyCoordinatorImpl::getCurrentPrimaryIndex() const {
- return _currentPrimaryIndex;
+bool TopologyCoordinatorImpl::becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(Date_t now) {
+ if (_stepDownUntil > now) {
+ return false;
}
- Date_t TopologyCoordinatorImpl::getStepDownTime() const {
- return _stepDownUntil;
+ if (_followerMode == MemberState::RS_SECONDARY && _rsConfig.getNumMembers() == 1 &&
+ _selfIndex == 0 && _rsConfig.getMemberAt(_selfIndex).isElectable()) {
+ // If the new config describes a one-node replica set, we're the one member,
+ // we're electable, and we are currently in followerMode SECONDARY,
+ // we must transition to candidate, in leiu of heartbeats.
+ _role = Role::candidate;
+ return true;
}
-
- void TopologyCoordinatorImpl::_updateHeartbeatDataForReconfig(const ReplicaSetConfig& newConfig,
- int selfIndex,
- Date_t now) {
- std::vector<MemberHeartbeatData> oldHeartbeats;
- _hbdata.swap(oldHeartbeats);
-
- int index = 0;
- for (ReplicaSetConfig::MemberIterator it = newConfig.membersBegin();
- it != newConfig.membersEnd();
- ++it, ++index) {
- const MemberConfig& newMemberConfig = *it;
- // TODO: C++11: use emplace_back()
- if (index == selfIndex) {
- // Insert placeholder for ourself, though we will never consult it.
- _hbdata.push_back(MemberHeartbeatData());
- }
- else {
- MemberHeartbeatData newHeartbeatData;
- for (int oldIndex = 0; oldIndex < _rsConfig.getNumMembers(); ++oldIndex) {
- const MemberConfig& oldMemberConfig = _rsConfig.getMemberAt(oldIndex);
- if (oldMemberConfig.getId() == newMemberConfig.getId() &&
- oldMemberConfig.getHostAndPort() == newMemberConfig.getHostAndPort()) {
- // This member existed in the old config with the same member ID and
- // HostAndPort, so copy its heartbeat data over.
- newHeartbeatData = oldHeartbeats[oldIndex];
- break;
- }
+ return false;
+}
+
+void TopologyCoordinatorImpl::setElectionSleepUntil(Date_t newTime) {
+ if (_electionSleepUntil < newTime) {
+ _electionSleepUntil = newTime;
+ }
+}
+
+Timestamp TopologyCoordinatorImpl::getElectionTime() const {
+ return _electionTime;
+}
+
+OID TopologyCoordinatorImpl::getElectionId() const {
+ return _electionId;
+}
+
+int TopologyCoordinatorImpl::getCurrentPrimaryIndex() const {
+ return _currentPrimaryIndex;
+}
+
+Date_t TopologyCoordinatorImpl::getStepDownTime() const {
+ return _stepDownUntil;
+}
+
+void TopologyCoordinatorImpl::_updateHeartbeatDataForReconfig(const ReplicaSetConfig& newConfig,
+ int selfIndex,
+ Date_t now) {
+ std::vector<MemberHeartbeatData> oldHeartbeats;
+ _hbdata.swap(oldHeartbeats);
+
+ int index = 0;
+ for (ReplicaSetConfig::MemberIterator it = newConfig.membersBegin();
+ it != newConfig.membersEnd();
+ ++it, ++index) {
+ const MemberConfig& newMemberConfig = *it;
+ // TODO: C++11: use emplace_back()
+ if (index == selfIndex) {
+ // Insert placeholder for ourself, though we will never consult it.
+ _hbdata.push_back(MemberHeartbeatData());
+ } else {
+ MemberHeartbeatData newHeartbeatData;
+ for (int oldIndex = 0; oldIndex < _rsConfig.getNumMembers(); ++oldIndex) {
+ const MemberConfig& oldMemberConfig = _rsConfig.getMemberAt(oldIndex);
+ if (oldMemberConfig.getId() == newMemberConfig.getId() &&
+ oldMemberConfig.getHostAndPort() == newMemberConfig.getHostAndPort()) {
+ // This member existed in the old config with the same member ID and
+ // HostAndPort, so copy its heartbeat data over.
+ newHeartbeatData = oldHeartbeats[oldIndex];
+ break;
}
- _hbdata.push_back(newHeartbeatData);
}
+ _hbdata.push_back(newHeartbeatData);
}
}
+}
- // This function installs a new config object and recreates MemberHeartbeatData objects
- // that reflect the new config.
- void TopologyCoordinatorImpl::updateConfig(const ReplicaSetConfig& newConfig,
- int selfIndex,
- Date_t now,
- const OpTime& lastOpApplied) {
- invariant(_role != Role::candidate);
- invariant(selfIndex < newConfig.getNumMembers());
+// This function installs a new config object and recreates MemberHeartbeatData objects
+// that reflect the new config.
+void TopologyCoordinatorImpl::updateConfig(const ReplicaSetConfig& newConfig,
+ int selfIndex,
+ Date_t now,
+ const OpTime& lastOpApplied) {
+ invariant(_role != Role::candidate);
+ invariant(selfIndex < newConfig.getNumMembers());
- _updateHeartbeatDataForReconfig(newConfig, selfIndex, now);
- _rsConfig = newConfig;
- _selfIndex = selfIndex;
- _forceSyncSourceIndex = -1;
+ _updateHeartbeatDataForReconfig(newConfig, selfIndex, now);
+ _rsConfig = newConfig;
+ _selfIndex = selfIndex;
+ _forceSyncSourceIndex = -1;
- if (_role == Role::leader) {
- if (_selfIndex == -1) {
- log() << "Could not remain primary because no longer a member of the replica set";
- }
- else if (!_selfConfig().isElectable()) {
- log() <<" Could not remain primary because no longer electable";
- }
- else {
- // Don't stepdown if you don't have to.
- _currentPrimaryIndex = _selfIndex;
- return;
- }
- _role = Role::follower;
+ if (_role == Role::leader) {
+ if (_selfIndex == -1) {
+ log() << "Could not remain primary because no longer a member of the replica set";
+ } else if (!_selfConfig().isElectable()) {
+ log() << " Could not remain primary because no longer electable";
+ } else {
+ // Don't stepdown if you don't have to.
+ _currentPrimaryIndex = _selfIndex;
+ return;
}
+ _role = Role::follower;
+ }
- // By this point we know we are in Role::follower
- _currentPrimaryIndex = -1; // force secondaries to re-detect who the primary is
- _stepDownPending = false;
-
- if (_followerMode == MemberState::RS_SECONDARY &&
- _rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 &&
- _rsConfig.getMemberAt(_selfIndex).isElectable()) {
- // If the new config describes a one-node replica set, we're the one member,
- // we're electable, and we are currently in followerMode SECONDARY,
- // we must transition to candidate, in leiu of heartbeats.
- _role = Role::candidate;
- }
+ // By this point we know we are in Role::follower
+ _currentPrimaryIndex = -1; // force secondaries to re-detect who the primary is
+ _stepDownPending = false;
+
+ if (_followerMode == MemberState::RS_SECONDARY && _rsConfig.getNumMembers() == 1 &&
+ _selfIndex == 0 && _rsConfig.getMemberAt(_selfIndex).isElectable()) {
+ // If the new config describes a one-node replica set, we're the one member,
+ // we're electable, and we are currently in followerMode SECONDARY,
+ // we must transition to candidate, in leiu of heartbeats.
+ _role = Role::candidate;
}
- std::string TopologyCoordinatorImpl::_getHbmsg(Date_t now) const {
- // ignore messages over 2 minutes old
- if ((now - _hbmsgTime) > Seconds{120}) {
- return "";
- }
- return _hbmsg;
+}
+std::string TopologyCoordinatorImpl::_getHbmsg(Date_t now) const {
+ // ignore messages over 2 minutes old
+ if ((now - _hbmsgTime) > Seconds{120}) {
+ return "";
}
+ return _hbmsg;
+}
- void TopologyCoordinatorImpl::setMyHeartbeatMessage(const Date_t now,
- const std::string& message) {
- _hbmsgTime = now;
- _hbmsg = message;
- }
+void TopologyCoordinatorImpl::setMyHeartbeatMessage(const Date_t now, const std::string& message) {
+ _hbmsgTime = now;
+ _hbmsg = message;
+}
+
+const MemberConfig& TopologyCoordinatorImpl::_selfConfig() const {
+ return _rsConfig.getMemberAt(_selfIndex);
+}
- const MemberConfig& TopologyCoordinatorImpl::_selfConfig() const {
- return _rsConfig.getMemberAt(_selfIndex);
+TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getUnelectableReason(
+ int index, const OpTime& lastOpApplied) const {
+ invariant(index != _selfIndex);
+ const MemberConfig& memberConfig = _rsConfig.getMemberAt(index);
+ const MemberHeartbeatData& hbData = _hbdata[index];
+ UnelectableReasonMask result = None;
+ if (memberConfig.isArbiter()) {
+ result |= ArbiterIAm;
+ }
+ if (memberConfig.getPriority() <= 0) {
+ result |= NoPriority;
+ }
+ if (hbData.getState() != MemberState::RS_SECONDARY) {
+ result |= NotSecondary;
+ }
+ if (!_isOpTimeCloseEnoughToLatestToElect(hbData.getOpTime(), lastOpApplied)) {
+ result |= NotCloseEnoughToLatestOptime;
+ }
+ if (hbData.up() && hbData.isUnelectable()) {
+ result |= RefusesToStand;
}
+ invariant(result || memberConfig.isElectable());
+ return result;
+}
- TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getUnelectableReason(
- int index,
- const OpTime& lastOpApplied) const {
- invariant(index != _selfIndex);
- const MemberConfig& memberConfig = _rsConfig.getMemberAt(index);
- const MemberHeartbeatData& hbData = _hbdata[index];
- UnelectableReasonMask result = None;
- if (memberConfig.isArbiter()) {
- result |= ArbiterIAm;
- }
- if (memberConfig.getPriority() <= 0) {
- result |= NoPriority;
- }
- if (hbData.getState() != MemberState::RS_SECONDARY) {
- result |= NotSecondary;
- }
- if (!_isOpTimeCloseEnoughToLatestToElect(hbData.getOpTime(), lastOpApplied)) {
- result |= NotCloseEnoughToLatestOptime;
- }
- if (hbData.up() && hbData.isUnelectable()) {
- result |= RefusesToStand;
- }
- invariant(result || memberConfig.isElectable());
+TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getMyUnelectableReason(
+ const Date_t now, const OpTime& lastApplied) const {
+ UnelectableReasonMask result = None;
+ if (lastApplied.isNull()) {
+ result |= NoData;
+ }
+ if (!_aMajoritySeemsToBeUp()) {
+ result |= CannotSeeMajority;
+ }
+ if (_selfIndex == -1) {
+ result |= NotInitialized;
return result;
}
+ if (_selfConfig().isArbiter()) {
+ result |= ArbiterIAm;
+ }
+ if (_selfConfig().getPriority() <= 0) {
+ result |= NoPriority;
+ }
+ if (_stepDownUntil > now) {
+ result |= StepDownPeriodActive;
+ }
+ if (_voteLease.whoId != -1 && _voteLease.whoId != _rsConfig.getMemberAt(_selfIndex).getId() &&
+ _voteLease.when + VoteLease::leaseTime >= now) {
+ result |= VotedTooRecently;
+ }
- TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getMyUnelectableReason(
- const Date_t now,
- const OpTime& lastApplied) const {
-
- UnelectableReasonMask result = None;
- if (lastApplied.isNull()) {
- result |= NoData;
- }
- if (!_aMajoritySeemsToBeUp()) {
- result |= CannotSeeMajority;
- }
- if (_selfIndex == -1) {
- result |= NotInitialized;
- return result;
- }
- if (_selfConfig().isArbiter()) {
- result |= ArbiterIAm;
- }
- if (_selfConfig().getPriority() <= 0) {
- result |= NoPriority;
- }
- if (_stepDownUntil > now) {
- result |= StepDownPeriodActive;
- }
- if (_voteLease.whoId != -1 &&
- _voteLease.whoId !=_rsConfig.getMemberAt(_selfIndex).getId() &&
- _voteLease.when + VoteLease::leaseTime >= now) {
- result |= VotedTooRecently;
- }
-
- // Cannot be electable unless secondary or already primary
- if (!getMemberState().secondary() && !_iAmPrimary()) {
- result |= NotSecondary;
- }
- if (!_isOpTimeCloseEnoughToLatestToElect(lastApplied, lastApplied)) {
- result |= NotCloseEnoughToLatestOptime;
- }
- return result;
+ // Cannot be electable unless secondary or already primary
+ if (!getMemberState().secondary() && !_iAmPrimary()) {
+ result |= NotSecondary;
+ }
+ if (!_isOpTimeCloseEnoughToLatestToElect(lastApplied, lastApplied)) {
+ result |= NotCloseEnoughToLatestOptime;
}
+ return result;
+}
- std::string TopologyCoordinatorImpl::_getUnelectableReasonString(
- const UnelectableReasonMask ur) const {
- invariant(ur);
- str::stream ss;
- bool hasWrittenToStream = false;
- if (ur & NoData) {
- ss << "node has no applied oplog entries";
- hasWrittenToStream = true;
- }
- if (ur & VotedTooRecently) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "I recently voted for " << _voteLease.whoHostAndPort.toString();
- }
- if (ur & CannotSeeMajority) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "I cannot see a majority";
- }
- if (ur & ArbiterIAm) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "member is an arbiter";
+std::string TopologyCoordinatorImpl::_getUnelectableReasonString(
+ const UnelectableReasonMask ur) const {
+ invariant(ur);
+ str::stream ss;
+ bool hasWrittenToStream = false;
+ if (ur & NoData) {
+ ss << "node has no applied oplog entries";
+ hasWrittenToStream = true;
+ }
+ if (ur & VotedTooRecently) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (ur & NoPriority) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "member has zero priority";
+ hasWrittenToStream = true;
+ ss << "I recently voted for " << _voteLease.whoHostAndPort.toString();
+ }
+ if (ur & CannotSeeMajority) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (ur & StepDownPeriodActive) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "I am still waiting for stepdown period to end at " <<
- dateToISOStringLocal(_stepDownUntil);
+ hasWrittenToStream = true;
+ ss << "I cannot see a majority";
+ }
+ if (ur & ArbiterIAm) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (ur & NotSecondary) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "member is not currently a secondary";
+ hasWrittenToStream = true;
+ ss << "member is an arbiter";
+ }
+ if (ur & NoPriority) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (ur & NotCloseEnoughToLatestOptime) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "member is more than 10 seconds behind the most up-to-date member";
+ hasWrittenToStream = true;
+ ss << "member has zero priority";
+ }
+ if (ur & StepDownPeriodActive) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (ur & NotInitialized) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "node is not a member of a valid replica set configuration";
+ hasWrittenToStream = true;
+ ss << "I am still waiting for stepdown period to end at "
+ << dateToISOStringLocal(_stepDownUntil);
+ }
+ if (ur & NotSecondary) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (ur & RefusesToStand) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "most recent heartbeat indicates node will not stand for election";
+ hasWrittenToStream = true;
+ ss << "member is not currently a secondary";
+ }
+ if (ur & NotCloseEnoughToLatestOptime) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (!hasWrittenToStream) {
- severe() << "Invalid UnelectableReasonMask value 0x" << integerToHex(ur);
- fassertFailed(26011);
+ hasWrittenToStream = true;
+ ss << "member is more than 10 seconds behind the most up-to-date member";
+ }
+ if (ur & NotInitialized) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- ss << " (mask 0x" << integerToHex(ur) << ")";
- return ss;
+ hasWrittenToStream = true;
+ ss << "node is not a member of a valid replica set configuration";
}
-
- int TopologyCoordinatorImpl::_getPing(const HostAndPort& host) {
- return _pings[host].getMillis();
+ if (ur & RefusesToStand) {
+ if (hasWrittenToStream) {
+ ss << "; ";
+ }
+ hasWrittenToStream = true;
+ ss << "most recent heartbeat indicates node will not stand for election";
}
-
- void TopologyCoordinatorImpl::_setElectionTime(const Timestamp& newElectionTime) {
- _electionTime = newElectionTime;
+ if (!hasWrittenToStream) {
+ severe() << "Invalid UnelectableReasonMask value 0x" << integerToHex(ur);
+ fassertFailed(26011);
}
+ ss << " (mask 0x" << integerToHex(ur) << ")";
+ return ss;
+}
- int TopologyCoordinatorImpl::_getTotalPings() {
- PingMap::iterator it = _pings.begin();
- PingMap::iterator end = _pings.end();
- int totalPings = 0;
- while (it != end) {
- totalPings += it->second.getCount();
- it++;
- }
- return totalPings;
- }
+int TopologyCoordinatorImpl::_getPing(const HostAndPort& host) {
+ return _pings[host].getMillis();
+}
- std::vector<HostAndPort> TopologyCoordinatorImpl::getMaybeUpHostAndPorts() const {
- std::vector<HostAndPort> upHosts;
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- if (itIndex == _selfIndex) {
- continue; // skip ourselves
- }
- if (!it->maybeUp()) {
- continue; // skip DOWN nodes
- }
+void TopologyCoordinatorImpl::_setElectionTime(const Timestamp& newElectionTime) {
+ _electionTime = newElectionTime;
+}
- upHosts.push_back(_rsConfig.getMemberAt(itIndex).getHostAndPort());
- }
- return upHosts;
+int TopologyCoordinatorImpl::_getTotalPings() {
+ PingMap::iterator it = _pings.begin();
+ PingMap::iterator end = _pings.end();
+ int totalPings = 0;
+ while (it != end) {
+ totalPings += it->second.getCount();
+ it++;
}
+ return totalPings;
+}
- bool TopologyCoordinatorImpl::voteForMyself(Date_t now) {
- if (_role != Role::candidate) {
- return false;
+std::vector<HostAndPort> TopologyCoordinatorImpl::getMaybeUpHostAndPorts() const {
+ std::vector<HostAndPort> upHosts;
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ if (itIndex == _selfIndex) {
+ continue; // skip ourselves
}
- int selfId = _selfConfig().getId();
- if ((_voteLease.when + VoteLease::leaseTime >= now)
- && (_voteLease.whoId != selfId)) {
- log() << "not voting yea for " << selfId <<
- " voted for " << _voteLease.whoHostAndPort.toString() << ' ' <<
- durationCount<Seconds>(now - _voteLease.when) << " secs ago";
- return false;
+ if (!it->maybeUp()) {
+ continue; // skip DOWN nodes
}
- _voteLease.when = now;
- _voteLease.whoId = selfId;
- _voteLease.whoHostAndPort = _selfConfig().getHostAndPort();
- return true;
+
+ upHosts.push_back(_rsConfig.getMemberAt(itIndex).getHostAndPort());
}
+ return upHosts;
+}
- MemberState TopologyCoordinatorImpl::getMemberState() const {
- if (_selfIndex == -1) {
- if (_rsConfig.isInitialized()) {
- return MemberState::RS_REMOVED;
- }
- return MemberState::RS_STARTUP;
- }
- if (_role == Role::leader) {
- invariant(_currentPrimaryIndex == _selfIndex);
- return MemberState::RS_PRIMARY;
- }
- const MemberConfig& myConfig = _selfConfig();
- if (myConfig.isArbiter()) {
- return MemberState::RS_ARBITER;
- }
- if (((_maintenanceModeCalls > 0) || (_hasOnlyAuthErrorUpHeartbeats(_hbdata, _selfIndex)))
- && (_followerMode == MemberState::RS_SECONDARY)) {
- return MemberState::RS_RECOVERING;
- }
- return _followerMode;
+bool TopologyCoordinatorImpl::voteForMyself(Date_t now) {
+ if (_role != Role::candidate) {
+ return false;
}
+ int selfId = _selfConfig().getId();
+ if ((_voteLease.when + VoteLease::leaseTime >= now) && (_voteLease.whoId != selfId)) {
+ log() << "not voting yea for " << selfId << " voted for "
+ << _voteLease.whoHostAndPort.toString() << ' '
+ << durationCount<Seconds>(now - _voteLease.when) << " secs ago";
+ return false;
+ }
+ _voteLease.when = now;
+ _voteLease.whoId = selfId;
+ _voteLease.whoHostAndPort = _selfConfig().getHostAndPort();
+ return true;
+}
- void TopologyCoordinatorImpl::processWinElection(
- OID electionId,
- Timestamp electionOpTime) {
- invariant(_role == Role::candidate);
- _electionTime = electionOpTime;
- _electionId = electionId;
- _role = Role::leader;
- _currentPrimaryIndex = _selfIndex;
- _syncSource = HostAndPort();
- _forceSyncSourceIndex = -1;
+MemberState TopologyCoordinatorImpl::getMemberState() const {
+ if (_selfIndex == -1) {
+ if (_rsConfig.isInitialized()) {
+ return MemberState::RS_REMOVED;
+ }
+ return MemberState::RS_STARTUP;
+ }
+ if (_role == Role::leader) {
+ invariant(_currentPrimaryIndex == _selfIndex);
+ return MemberState::RS_PRIMARY;
+ }
+ const MemberConfig& myConfig = _selfConfig();
+ if (myConfig.isArbiter()) {
+ return MemberState::RS_ARBITER;
}
+ if (((_maintenanceModeCalls > 0) || (_hasOnlyAuthErrorUpHeartbeats(_hbdata, _selfIndex))) &&
+ (_followerMode == MemberState::RS_SECONDARY)) {
+ return MemberState::RS_RECOVERING;
+ }
+ return _followerMode;
+}
- void TopologyCoordinatorImpl::processLoseElection() {
- invariant(_role == Role::candidate);
- const HostAndPort syncSourceAddress = getSyncSourceAddress();
- _electionTime = Timestamp(0, 0);
- _electionId = OID();
- _role = Role::follower;
+void TopologyCoordinatorImpl::processWinElection(OID electionId, Timestamp electionOpTime) {
+ invariant(_role == Role::candidate);
+ _electionTime = electionOpTime;
+ _electionId = electionId;
+ _role = Role::leader;
+ _currentPrimaryIndex = _selfIndex;
+ _syncSource = HostAndPort();
+ _forceSyncSourceIndex = -1;
+}
- // Clear voteLease time, if we voted for ourselves in this election.
- // This will allow us to vote for others.
- if (_voteLease.whoId == _selfConfig().getId()) {
- _voteLease.when = Date_t();
- }
+void TopologyCoordinatorImpl::processLoseElection() {
+ invariant(_role == Role::candidate);
+ const HostAndPort syncSourceAddress = getSyncSourceAddress();
+ _electionTime = Timestamp(0, 0);
+ _electionId = OID();
+ _role = Role::follower;
+
+ // Clear voteLease time, if we voted for ourselves in this election.
+ // This will allow us to vote for others.
+ if (_voteLease.whoId == _selfConfig().getId()) {
+ _voteLease.when = Date_t();
}
+}
- bool TopologyCoordinatorImpl::stepDown(Date_t until, bool force, const OpTime& lastOpApplied) {
- bool canStepDown = force;
- for (int i = 0; !canStepDown && i < _rsConfig.getNumMembers(); ++i) {
- if (i == _selfIndex) {
- continue;
- }
- UnelectableReasonMask reason = _getUnelectableReason(i, lastOpApplied);
- if (!reason && _hbdata[i].getOpTime() >= lastOpApplied) {
- canStepDown = true;
- }
+bool TopologyCoordinatorImpl::stepDown(Date_t until, bool force, const OpTime& lastOpApplied) {
+ bool canStepDown = force;
+ for (int i = 0; !canStepDown && i < _rsConfig.getNumMembers(); ++i) {
+ if (i == _selfIndex) {
+ continue;
}
-
- if (!canStepDown) {
- return false;
+ UnelectableReasonMask reason = _getUnelectableReason(i, lastOpApplied);
+ if (!reason && _hbdata[i].getOpTime() >= lastOpApplied) {
+ canStepDown = true;
}
- _stepDownUntil = until;
- _stepDownSelfAndReplaceWith(-1);
- return true;
}
- void TopologyCoordinatorImpl::setFollowerMode(MemberState::MS newMode) {
- invariant(_role == Role::follower);
- switch (newMode) {
+ if (!canStepDown) {
+ return false;
+ }
+ _stepDownUntil = until;
+ _stepDownSelfAndReplaceWith(-1);
+ return true;
+}
+
+void TopologyCoordinatorImpl::setFollowerMode(MemberState::MS newMode) {
+ invariant(_role == Role::follower);
+ switch (newMode) {
case MemberState::RS_RECOVERING:
case MemberState::RS_ROLLBACK:
case MemberState::RS_SECONDARY:
@@ -2104,223 +1992,209 @@ namespace {
break;
default:
invariant(false);
- }
-
- if (_followerMode != MemberState::RS_SECONDARY) {
- return;
- }
-
- // When a single node replica set transitions to SECONDARY, we must check if we should
- // be a candidate here. This is necessary because a single node replica set has no
- // heartbeats that would normally change the role to candidate.
-
- if (_rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 &&
- _rsConfig.getMemberAt(_selfIndex).isElectable()) {
- _role = Role::candidate;
- }
}
- bool TopologyCoordinatorImpl::stepDownIfPending() {
- if (!_stepDownPending) {
- return false;
- }
-
- int remotePrimaryIndex = -1;
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end(); ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- if (itIndex == _selfIndex) {
- continue;
- }
-
- if (it->getState().primary() && it->up()) {
- if (remotePrimaryIndex != -1) {
- // two other nodes think they are primary (asynchronously polled)
- // -- wait for things to settle down.
- remotePrimaryIndex = -1;
- warning() << "two remote primaries (transiently)";
- break;
- }
- remotePrimaryIndex = itIndex;
- }
- }
- _stepDownSelfAndReplaceWith(remotePrimaryIndex);
- return true;
- }
-
- void TopologyCoordinatorImpl::_stepDownSelfAndReplaceWith(int newPrimary) {
- invariant(_role == Role::leader);
- invariant(_selfIndex != -1);
- invariant(_selfIndex != newPrimary);
- invariant(_selfIndex == _currentPrimaryIndex);
- _currentPrimaryIndex = newPrimary;
- _role = Role::follower;
- _stepDownPending = false;
+ if (_followerMode != MemberState::RS_SECONDARY) {
+ return;
}
- void TopologyCoordinatorImpl::adjustMaintenanceCountBy(int inc) {
- invariant(_role == Role::follower);
- _maintenanceModeCalls += inc;
- invariant(_maintenanceModeCalls >= 0);
- }
-
- int TopologyCoordinatorImpl::getMaintenanceCount() const {
- return _maintenanceModeCalls;
- }
+ // When a single node replica set transitions to SECONDARY, we must check if we should
+ // be a candidate here. This is necessary because a single node replica set has no
+ // heartbeats that would normally change the role to candidate.
- bool TopologyCoordinatorImpl::updateTerm(long long term) {
- if (term <= _term) {
- return false;
- }
- _term = term;
- return true;
+ if (_rsConfig.getNumMembers() == 1 && _selfIndex == 0 &&
+ _rsConfig.getMemberAt(_selfIndex).isElectable()) {
+ _role = Role::candidate;
}
+}
- long long TopologyCoordinatorImpl::getTerm() const {
- return _term;
+bool TopologyCoordinatorImpl::stepDownIfPending() {
+ if (!_stepDownPending) {
+ return false;
}
- bool TopologyCoordinatorImpl::shouldChangeSyncSource(const HostAndPort& currentSource,
- Date_t now) const {
- // Methodology:
- // If there exists a viable sync source member other than currentSource, whose oplog has
- // reached an optime greater than _maxSyncSourceLagSecs later than currentSource's, return
- // true.
-
- // If the user requested a sync source change, return true.
- if (_forceSyncSourceIndex != -1) {
- return true;
- }
-
- const int currentMemberIndex = _rsConfig.findMemberIndexByHostAndPort(currentSource);
- if (currentMemberIndex == -1) {
- return true;
- }
- invariant(currentMemberIndex != _selfIndex);
-
- OpTime currentOpTime = _hbdata[currentMemberIndex].getOpTime();
- if (currentOpTime.isNull()) {
- // Haven't received a heartbeat from the sync source yet, so can't tell if we should
- // change.
- return false;
+ int remotePrimaryIndex = -1;
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ if (itIndex == _selfIndex) {
+ continue;
}
- unsigned int currentSecs = currentOpTime.getSecs();
- unsigned int goalSecs = currentSecs + _maxSyncSourceLagSecs.count();
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- const MemberConfig& candidateConfig = _rsConfig.getMemberAt(itIndex);
- if (it->up() &&
- (candidateConfig.shouldBuildIndexes() || !_selfConfig().shouldBuildIndexes()) &&
- it->getState().readable() &&
- !_memberIsBlacklisted(candidateConfig, now) &&
- goalSecs < it->getOpTime().getSecs()) {
- log() << "changing sync target because current sync target's most recent OpTime is "
- << currentOpTime.toString() << " which is more than "
- << _maxSyncSourceLagSecs.count() << " seconds behind member "
- << candidateConfig.getHostAndPort().toString()
- << " whose most recent OpTime is " << it->getOpTime().toString();
- invariant(itIndex != _selfIndex);
- return true;
+ if (it->getState().primary() && it->up()) {
+ if (remotePrimaryIndex != -1) {
+ // two other nodes think they are primary (asynchronously polled)
+ // -- wait for things to settle down.
+ remotePrimaryIndex = -1;
+ warning() << "two remote primaries (transiently)";
+ break;
}
+ remotePrimaryIndex = itIndex;
}
- return false;
}
+ _stepDownSelfAndReplaceWith(remotePrimaryIndex);
+ return true;
+}
- void TopologyCoordinatorImpl::prepareCursorResponseInfo(
- BSONObjBuilder* objBuilder,
- const OpTime& lastCommittedOpTime) const {
- objBuilder->append("term", _term);
- objBuilder->append("lastOpCommittedTimestamp", lastCommittedOpTime.getTimestamp());
- objBuilder->append("lastOpCommittedTerm", lastCommittedOpTime.getTerm());
- objBuilder->append("configVersion", _rsConfig.getConfigVersion());
- objBuilder->append("primaryId", _rsConfig.getMemberAt(_currentPrimaryIndex).getId());
- }
+void TopologyCoordinatorImpl::_stepDownSelfAndReplaceWith(int newPrimary) {
+ invariant(_role == Role::leader);
+ invariant(_selfIndex != -1);
+ invariant(_selfIndex != newPrimary);
+ invariant(_selfIndex == _currentPrimaryIndex);
+ _currentPrimaryIndex = newPrimary;
+ _role = Role::follower;
+ _stepDownPending = false;
+}
- void TopologyCoordinatorImpl::summarizeAsHtml(ReplSetHtmlSummary* output) {
- output->setConfig(_rsConfig);
- output->setHBData(_hbdata);
- output->setSelfIndex(_selfIndex);
- output->setPrimaryIndex(_currentPrimaryIndex);
- output->setSelfState(getMemberState());
- output->setSelfHeartbeatMessage(_hbmsg);
- }
+void TopologyCoordinatorImpl::adjustMaintenanceCountBy(int inc) {
+ invariant(_role == Role::follower);
+ _maintenanceModeCalls += inc;
+ invariant(_maintenanceModeCalls >= 0);
+}
- void TopologyCoordinatorImpl::processReplSetRequestVotes(
- const ReplSetRequestVotesArgs& args,
- ReplSetRequestVotesResponse* response,
- const OpTime& lastAppliedOpTime) {
- response->setOk(true);
- response->setTerm(_term);
-
- if (args.getTerm() < _term) {
- response->setVoteGranted(false);
- response->setReason("candidate's term is lower than mine");
- }
- else if (args.getConfigVersion() != _rsConfig.getConfigVersion()) {
- response->setVoteGranted(false);
- response->setReason("candidate's config version differs from mine");
- }
- else if (args.getSetName() != _rsConfig.getReplSetName()) {
- response->setVoteGranted(false);
- response->setReason("candidate's set name differs from mine");
- }
- else if (args.getLastCommittedOp() < lastAppliedOpTime) {
- response->setVoteGranted(false);
- response->setReason("candidate's data is staler than mine");
- }
- else if (!args.isADryRun() && _lastVote.getTerm() == args.getTerm()) {
- response->setVoteGranted(false);
- response->setReason("already voted for another candidate this term");
- }
- else {
- if (!args.isADryRun()) {
- _lastVote.setTerm(args.getTerm());
- _lastVote.setCandidateId(args.getCandidateId());
- }
- response->setVoteGranted(true);
- }
+int TopologyCoordinatorImpl::getMaintenanceCount() const {
+ return _maintenanceModeCalls;
+}
+bool TopologyCoordinatorImpl::updateTerm(long long term) {
+ if (term <= _term) {
+ return false;
}
+ _term = term;
+ return true;
+}
- Status TopologyCoordinatorImpl::processReplSetDeclareElectionWinner(
- const ReplSetDeclareElectionWinnerArgs& args,
- long long* responseTerm) {
- *responseTerm = _term;
- if (args.getReplSetName() != _rsConfig.getReplSetName()) {
- return {ErrorCodes::BadValue, "replSet name does not match"};
- }
- else if (args.getTerm() < _term) {
- return {ErrorCodes::BadValue, "term has already passed"};
- }
- else if (args.getTerm() == _term && _currentPrimaryIndex > -1 &&
- args.getWinnerId() != _rsConfig.getMemberAt(_currentPrimaryIndex).getId()) {
- return {ErrorCodes::BadValue, "term already has a primary"};
- }
+long long TopologyCoordinatorImpl::getTerm() const {
+ return _term;
+}
- _currentPrimaryIndex = _rsConfig.findMemberIndexByConfigId(args.getWinnerId());
- return Status::OK();
- }
+bool TopologyCoordinatorImpl::shouldChangeSyncSource(const HostAndPort& currentSource,
+ Date_t now) const {
+ // Methodology:
+ // If there exists a viable sync source member other than currentSource, whose oplog has
+ // reached an optime greater than _maxSyncSourceLagSecs later than currentSource's, return
+ // true.
- void TopologyCoordinatorImpl::loadLastVote(const LastVote& lastVote) {
- _lastVote = lastVote;
+ // If the user requested a sync source change, return true.
+ if (_forceSyncSourceIndex != -1) {
+ return true;
}
- long long TopologyCoordinatorImpl::getTerm() {
- return _term;
+ const int currentMemberIndex = _rsConfig.findMemberIndexByHostAndPort(currentSource);
+ if (currentMemberIndex == -1) {
+ return true;
}
+ invariant(currentMemberIndex != _selfIndex);
- void TopologyCoordinatorImpl::incrementTerm() {
- _term++;
+ OpTime currentOpTime = _hbdata[currentMemberIndex].getOpTime();
+ if (currentOpTime.isNull()) {
+ // Haven't received a heartbeat from the sync source yet, so can't tell if we should
+ // change.
+ return false;
}
-
- void TopologyCoordinatorImpl::voteForMyselfV1() {
- _lastVote.setTerm(_term);
- _lastVote.setCandidateId(_selfConfig().getId());
+ unsigned int currentSecs = currentOpTime.getSecs();
+ unsigned int goalSecs = currentSecs + _maxSyncSourceLagSecs.count();
+
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ const MemberConfig& candidateConfig = _rsConfig.getMemberAt(itIndex);
+ if (it->up() &&
+ (candidateConfig.shouldBuildIndexes() || !_selfConfig().shouldBuildIndexes()) &&
+ it->getState().readable() && !_memberIsBlacklisted(candidateConfig, now) &&
+ goalSecs < it->getOpTime().getSecs()) {
+ log() << "changing sync target because current sync target's most recent OpTime is "
+ << currentOpTime.toString() << " which is more than "
+ << _maxSyncSourceLagSecs.count() << " seconds behind member "
+ << candidateConfig.getHostAndPort().toString() << " whose most recent OpTime is "
+ << it->getOpTime().toString();
+ invariant(itIndex != _selfIndex);
+ return true;
+ }
}
-
-} // namespace repl
-} // namespace mongo
+ return false;
+}
+
+void TopologyCoordinatorImpl::prepareCursorResponseInfo(BSONObjBuilder* objBuilder,
+ const OpTime& lastCommittedOpTime) const {
+ objBuilder->append("term", _term);
+ objBuilder->append("lastOpCommittedTimestamp", lastCommittedOpTime.getTimestamp());
+ objBuilder->append("lastOpCommittedTerm", lastCommittedOpTime.getTerm());
+ objBuilder->append("configVersion", _rsConfig.getConfigVersion());
+ objBuilder->append("primaryId", _rsConfig.getMemberAt(_currentPrimaryIndex).getId());
+}
+
+void TopologyCoordinatorImpl::summarizeAsHtml(ReplSetHtmlSummary* output) {
+ output->setConfig(_rsConfig);
+ output->setHBData(_hbdata);
+ output->setSelfIndex(_selfIndex);
+ output->setPrimaryIndex(_currentPrimaryIndex);
+ output->setSelfState(getMemberState());
+ output->setSelfHeartbeatMessage(_hbmsg);
+}
+
+void TopologyCoordinatorImpl::processReplSetRequestVotes(const ReplSetRequestVotesArgs& args,
+ ReplSetRequestVotesResponse* response,
+ const OpTime& lastAppliedOpTime) {
+ response->setOk(true);
+ response->setTerm(_term);
+
+ if (args.getTerm() < _term) {
+ response->setVoteGranted(false);
+ response->setReason("candidate's term is lower than mine");
+ } else if (args.getConfigVersion() != _rsConfig.getConfigVersion()) {
+ response->setVoteGranted(false);
+ response->setReason("candidate's config version differs from mine");
+ } else if (args.getSetName() != _rsConfig.getReplSetName()) {
+ response->setVoteGranted(false);
+ response->setReason("candidate's set name differs from mine");
+ } else if (args.getLastCommittedOp() < lastAppliedOpTime) {
+ response->setVoteGranted(false);
+ response->setReason("candidate's data is staler than mine");
+ } else if (!args.isADryRun() && _lastVote.getTerm() == args.getTerm()) {
+ response->setVoteGranted(false);
+ response->setReason("already voted for another candidate this term");
+ } else {
+ if (!args.isADryRun()) {
+ _lastVote.setTerm(args.getTerm());
+ _lastVote.setCandidateId(args.getCandidateId());
+ }
+ response->setVoteGranted(true);
+ }
+}
+
+Status TopologyCoordinatorImpl::processReplSetDeclareElectionWinner(
+ const ReplSetDeclareElectionWinnerArgs& args, long long* responseTerm) {
+ *responseTerm = _term;
+ if (args.getReplSetName() != _rsConfig.getReplSetName()) {
+ return {ErrorCodes::BadValue, "replSet name does not match"};
+ } else if (args.getTerm() < _term) {
+ return {ErrorCodes::BadValue, "term has already passed"};
+ } else if (args.getTerm() == _term && _currentPrimaryIndex > -1 &&
+ args.getWinnerId() != _rsConfig.getMemberAt(_currentPrimaryIndex).getId()) {
+ return {ErrorCodes::BadValue, "term already has a primary"};
+ }
+
+ _currentPrimaryIndex = _rsConfig.findMemberIndexByConfigId(args.getWinnerId());
+ return Status::OK();
+}
+
+void TopologyCoordinatorImpl::loadLastVote(const LastVote& lastVote) {
+ _lastVote = lastVote;
+}
+
+long long TopologyCoordinatorImpl::getTerm() {
+ return _term;
+}
+
+void TopologyCoordinatorImpl::incrementTerm() {
+ _term++;
+}
+
+void TopologyCoordinatorImpl::voteForMyselfV1() {
+ _lastVote.setTerm(_term);
+ _lastVote.setCandidateId(_selfConfig().getId());
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/topology_coordinator_impl.h b/src/mongo/db/repl/topology_coordinator_impl.h
index cfc8e88e5f1..c6dd3e26533 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.h
+++ b/src/mongo/db/repl/topology_coordinator_impl.h
@@ -43,389 +43,385 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace repl {
+/**
+ * Represents a latency measurement for each replica set member based on heartbeat requests.
+ * The measurement is an average weighted 80% to the old value, and 20% to the new value.
+ *
+ * Also stores information about heartbeat progress and retries.
+ */
+class PingStats {
+public:
+ PingStats();
+
/**
- * Represents a latency measurement for each replica set member based on heartbeat requests.
- * The measurement is an average weighted 80% to the old value, and 20% to the new value.
+ * Records that a new heartbeat request started at "now".
*
- * Also stores information about heartbeat progress and retries.
+ * This resets the failure count used in determining whether the next request to a target
+ * should be a retry or a regularly scheduled heartbeat message.
*/
- class PingStats {
- public:
- PingStats();
-
- /**
- * Records that a new heartbeat request started at "now".
- *
- * This resets the failure count used in determining whether the next request to a target
- * should be a retry or a regularly scheduled heartbeat message.
- */
- void start(Date_t now);
-
- /**
- * Records that a heartbeat request completed successfully, and that "millis" milliseconds
- * were spent for a single network roundtrip plus remote processing time.
- */
- void hit(int millis);
-
- /**
- * Records that a heartbeat request failed.
- */
- void miss();
-
- /**
- * Gets the number of hit() calls.
- */
- unsigned int getCount() const { return count; }
-
- /**
- * Gets the weighted average round trip time for heartbeat messages to the target.
- */
- unsigned int getMillis() const { return value; }
-
- /**
- * Gets the date at which start() was last called, which is used to determine if
- * a heartbeat should be retried or if the time limit has expired.
- */
- Date_t getLastHeartbeatStartDate() const { return _lastHeartbeatStartDate; }
-
- /**
- * Gets the number of failures since start() was last called.
- *
- * This value is incremented by calls to miss(), cleared by calls to start() and
- * set to the maximum possible value by calls to hit().
- */
- int getNumFailuresSinceLastStart() const { return _numFailuresSinceLastStart; }
-
- private:
- unsigned int count;
- unsigned int value;
- Date_t _lastHeartbeatStartDate;
- int _numFailuresSinceLastStart;
- };
+ void start(Date_t now);
- class TopologyCoordinatorImpl : public TopologyCoordinator {
- public:
- /**
- * Constructs a Topology Coordinator object.
- * @param maxSyncSourceLagSecs a sync source is re-evaluated after it lags behind further
- * than this amount.
- **/
- TopologyCoordinatorImpl(Seconds maxSyncSourceLagSecs);
-
- ////////////////////////////////////////////////////////////
- //
- // Implementation of TopologyCoordinator interface
- //
- ////////////////////////////////////////////////////////////
-
- virtual Role getRole() const;
- virtual MemberState getMemberState() const;
- virtual HostAndPort getSyncSourceAddress() const;
- virtual std::vector<HostAndPort> getMaybeUpHostAndPorts() const;
- virtual int getMaintenanceCount() const;
- virtual long long getTerm() const;
- virtual bool updateTerm(long long term);
- virtual void setForceSyncSourceIndex(int index);
- virtual HostAndPort chooseNewSyncSource(Date_t now,
- const OpTime& lastOpApplied);
- virtual void blacklistSyncSource(const HostAndPort& host, Date_t until);
- virtual void unblacklistSyncSource(const HostAndPort& host, Date_t now);
- virtual void clearSyncSourceBlacklist();
- virtual bool shouldChangeSyncSource(const HostAndPort& currentSource, Date_t now) const;
- virtual bool becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(Date_t now);
- virtual void setElectionSleepUntil(Date_t newTime);
- virtual void setFollowerMode(MemberState::MS newMode);
- virtual void adjustMaintenanceCountBy(int inc);
- virtual void prepareSyncFromResponse(const ReplicationExecutor::CallbackArgs& data,
- const HostAndPort& target,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result);
- virtual void prepareFreshResponse(const ReplicationCoordinator::ReplSetFreshArgs& args,
- Date_t now,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result);
- virtual void prepareElectResponse(const ReplicationCoordinator::ReplSetElectArgs& args,
- Date_t now,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result);
- virtual Status prepareHeartbeatResponse(Date_t now,
- const ReplSetHeartbeatArgs& args,
- const std::string& ourSetName,
- const OpTime& lastOpApplied,
- ReplSetHeartbeatResponse* response);
- virtual Status prepareHeartbeatResponseV1(Date_t now,
- const ReplSetHeartbeatArgsV1& args,
- const std::string& ourSetName,
- const OpTime& lastOpApplied,
- ReplSetHeartbeatResponse* response);
- virtual void prepareStatusResponse(const ReplicationExecutor::CallbackArgs& data,
- Date_t now,
- unsigned uptime,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result);
- virtual void fillIsMasterForReplSet(IsMasterResponse* response);
- virtual void prepareFreezeResponse(Date_t now, int secs, BSONObjBuilder* response);
- virtual void updateConfig(const ReplicaSetConfig& newConfig,
- int selfIndex,
- Date_t now,
- const OpTime& lastOpApplied);
- virtual std::pair<ReplSetHeartbeatArgs, Milliseconds> prepareHeartbeatRequest(
- Date_t now,
- const std::string& ourSetName,
- const HostAndPort& target);
- virtual std::pair<ReplSetHeartbeatArgsV1, Milliseconds> prepareHeartbeatRequestV1(
- Date_t now,
- const std::string& ourSetName,
- const HostAndPort& target);
- virtual HeartbeatResponseAction processHeartbeatResponse(
- Date_t now,
- Milliseconds networkRoundTripTime,
- const HostAndPort& target,
- const StatusWith<ReplSetHeartbeatResponse>& hbResponse,
- const OpTime& myLastOpApplied);
- virtual bool voteForMyself(Date_t now);
- virtual void processWinElection(OID electionId, Timestamp electionOpTime);
- virtual void processLoseElection();
- virtual bool checkShouldStandForElection(Date_t now, const OpTime& lastOpApplied);
- virtual void setMyHeartbeatMessage(const Date_t now, const std::string& message);
- virtual bool stepDown(Date_t until, bool force, const OpTime& lastOpApplied);
- virtual bool stepDownIfPending();
- virtual Date_t getStepDownTime() const;
- virtual void prepareCursorResponseInfo(BSONObjBuilder* objBuilder,
- const OpTime& lastCommitttedOpTime) const;
- Status processReplSetDeclareElectionWinner(const ReplSetDeclareElectionWinnerArgs& args,
- long long* responseTerm);
- virtual void processReplSetRequestVotes(const ReplSetRequestVotesArgs& args,
- ReplSetRequestVotesResponse* response,
- const OpTime& lastAppliedOpTime);
- virtual void summarizeAsHtml(ReplSetHtmlSummary* output);
- virtual void loadLastVote(const LastVote& lastVote);
- virtual void incrementTerm();
- virtual void voteForMyselfV1();
- virtual long long getTerm();
- virtual void prepareForStepDown();
-
- ////////////////////////////////////////////////////////////
- //
- // Test support methods
- //
- ////////////////////////////////////////////////////////////
-
- // Changes _memberState to newMemberState. Only for testing.
- void changeMemberState_forTest(const MemberState& newMemberState,
- const Timestamp& electionTime = Timestamp(0,0));
-
- // Sets "_electionTime" to "newElectionTime". Only for testing.
- void _setElectionTime(const Timestamp& newElectionTime);
-
- // Sets _currentPrimaryIndex to the given index. Should only be used in unit tests!
- // TODO(spencer): Remove this once we can easily call for an election in unit tests to
- // set the current primary.
- void _setCurrentPrimaryForTest(int primaryIndex);
-
- // Returns _electionTime. Only used in unittests.
- Timestamp getElectionTime() const;
-
- // Returns _electionId. Only used in unittests.
- OID getElectionId() const;
-
- // Returns _currentPrimaryIndex. Only used in unittests.
- int getCurrentPrimaryIndex() const;
-
- private:
-
- enum UnelectableReason {
- None = 0,
- CannotSeeMajority = 1 << 0,
- NotCloseEnoughToLatestOptime = 1 << 1,
- ArbiterIAm = 1 << 2,
- NotSecondary = 1 << 3,
- NoPriority = 1 << 4,
- StepDownPeriodActive = 1 << 5,
- NoData = 1 << 6,
- NotInitialized = 1 << 7,
- VotedTooRecently = 1 << 8,
- RefusesToStand = 1 << 9
- };
- typedef int UnelectableReasonMask;
-
- // Returns the number of heartbeat pings which have occurred.
- int _getTotalPings();
-
- // Returns the current "ping" value for the given member by their address
- int _getPing(const HostAndPort& host);
-
- // Determines if we will veto the member specified by "args.id", given that the last op
- // we have applied locally is "lastOpApplied".
- // If we veto, the errmsg will be filled in with a reason
- bool _shouldVetoMember(const ReplicationCoordinator::ReplSetFreshArgs& args,
- const Date_t& now,
- const OpTime& lastOpApplied,
- std::string* errmsg) const;
-
- // Returns the index of the member with the matching id, or -1 if none match.
- int _getMemberIndex(int id) const;
-
- // Sees if a majority number of votes are held by members who are currently "up"
- bool _aMajoritySeemsToBeUp() const;
-
- // Is otherOpTime close enough (within 10 seconds) to the latest known optime to qualify
- // for an election
- bool _isOpTimeCloseEnoughToLatestToElect(const OpTime& otherOpTime,
- const OpTime& ourLastOpApplied) const;
-
- // Returns reason why "self" member is unelectable
- UnelectableReasonMask _getMyUnelectableReason(
- const Date_t now,
- const OpTime& lastOpApplied) const;
-
- // Returns reason why memberIndex is unelectable
- UnelectableReasonMask _getUnelectableReason(
- int memberIndex,
- const OpTime& lastOpApplied) const;
-
- // Returns the nice text of why the node is unelectable
- std::string _getUnelectableReasonString(UnelectableReasonMask ur) const;
-
- // Return true if we are currently primary
- bool _iAmPrimary() const;
-
- // Scans through all members that are 'up' and return the latest known optime.
- OpTime _latestKnownOpTime(const OpTime& ourLastOpApplied) const;
-
- // Scans the electable set and returns the highest priority member index
- int _getHighestPriorityElectableIndex(Date_t now, const OpTime& lastOpApplied) const;
-
- // Returns true if "one" member is higher priority than "two" member
- bool _isMemberHigherPriority(int memberOneIndex, int memberTwoIndex) const;
-
- // Helper shortcut to self config
- const MemberConfig& _selfConfig() const;
-
- // Returns NULL if there is no primary, or the MemberConfig* for the current primary
- const MemberConfig* _currentPrimaryMember() const;
-
- /**
- * Performs updating "_hbdata" and "_currentPrimaryIndex" for processHeartbeatResponse().
- */
- HeartbeatResponseAction _updateHeartbeatDataImpl(
- int updatedConfigIndex,
- const MemberState& originalState,
- Date_t now,
- const OpTime& lastOpApplied);
-
- /**
- * Updates _hbdata based on the newConfig, ensuring that every member in the newConfig
- * has an entry in _hbdata. If any nodes in the newConfig are also present in
- * _currentConfig, copies their heartbeat info into the corresponding entry in the updated
- * _hbdata vector.
- */
- void _updateHeartbeatDataForReconfig(const ReplicaSetConfig& newConfig,
- int selfIndex,
- Date_t now);
-
- void _stepDownSelfAndReplaceWith(int newPrimary);
-
- MemberState _getMyState() const;
-
- /**
- * Looks up the provided member in the blacklist and returns true if the member's blacklist
- * expire time is after 'now'. If the member is found but the expire time is before 'now',
- * the function returns false. If the member is not found in the blacklist, the function
- * returns false.
- **/
- bool _memberIsBlacklisted(const MemberConfig& memberConfig, Date_t now) const;
-
- // This node's role in the replication protocol.
- Role _role;
-
- // This is a unique id that is generated and set each time we transition to PRIMARY, as the
- // result of an election.
- OID _electionId;
- // The time at which the current PRIMARY was elected.
- Timestamp _electionTime;
-
- // This node's election term. The term is used as part of the consensus algorithm to elect
- // and maintain one primary (leader) node in the cluster.
- long long _term = 0;
-
- // the index of the member we currently believe is primary, if one exists, otherwise -1
- int _currentPrimaryIndex;
-
- // the hostandport we are currently syncing from
- // empty if no sync source (we are primary, or we cannot connect to anyone yet)
- HostAndPort _syncSource;
- // These members are not chosen as sync sources for a period of time, due to connection
- // issues with them
- std::map<HostAndPort, Date_t> _syncSourceBlacklist;
- // The next sync source to be chosen, requested via a replSetSyncFrom command
- int _forceSyncSourceIndex;
- // How far this node must fall behind before considering switching sync sources
- Seconds _maxSyncSourceLagSecs;
+ /**
+ * Records that a heartbeat request completed successfully, and that "millis" milliseconds
+ * were spent for a single network roundtrip plus remote processing time.
+ */
+ void hit(int millis);
- // "heartbeat message"
- // sent in requestHeartbeat respond in field "hbm"
- std::string _hbmsg;
- Date_t _hbmsgTime; // when it was logged
-
- // heartbeat msg to send to others; descriptive diagnostic info
- std::string _getHbmsg(Date_t now) const;
-
- int _selfIndex; // this node's index in _members and _currentConfig
-
- ReplicaSetConfig _rsConfig; // The current config, including a vector of MemberConfigs
-
- // heartbeat data for each member. It is guaranteed that this vector will be maintained
- // in the same order as the MemberConfigs in _currentConfig, therefore the member config
- // index can be used to index into this vector as well.
- std::vector<MemberHeartbeatData> _hbdata;
-
- // Indicates that we've received a request to stepdown from PRIMARY (likely via a heartbeat)
- bool _stepDownPending;
+ /**
+ * Records that a heartbeat request failed.
+ */
+ void miss();
- // Time when stepDown command expires
- Date_t _stepDownUntil;
-
- // A time before which this node will not stand for election.
- Date_t _electionSleepUntil;
-
- // The number of calls we have had to enter maintenance mode
- int _maintenanceModeCalls;
-
- // The sub-mode of follower that we are in. Legal values are RS_SECONDARY, RS_RECOVERING,
- // RS_STARTUP2 (initial sync) and RS_ROLLBACK. Only meaningful if _role == Role::follower.
- // Configured via setFollowerMode(). If the sub-mode is RS_SECONDARY, then the effective
- // sub-mode is either RS_SECONDARY or RS_RECOVERING, depending on _maintenanceModeCalls.
- // Rather than accesing this variable direclty, one should use the getMemberState() method,
- // which computes the replica set node state on the fly.
- MemberState::MS _followerMode;
-
- typedef std::map<HostAndPort, PingStats> PingMap;
- // Ping stats for each member by HostAndPort;
- PingMap _pings;
-
- // Last vote info from the election
- struct VoteLease {
-
- static const Seconds leaseTime;
-
- Date_t when;
- int whoId = -1;
- HostAndPort whoHostAndPort;
- } _voteLease;
-
- // V1 last vote info for elections
- LastVote _lastVote;
+ /**
+ * Gets the number of hit() calls.
+ */
+ unsigned int getCount() const {
+ return count;
+ }
+ /**
+ * Gets the weighted average round trip time for heartbeat messages to the target.
+ */
+ unsigned int getMillis() const {
+ return value;
+ }
+
+ /**
+ * Gets the date at which start() was last called, which is used to determine if
+ * a heartbeat should be retried or if the time limit has expired.
+ */
+ Date_t getLastHeartbeatStartDate() const {
+ return _lastHeartbeatStartDate;
+ }
+
+ /**
+ * Gets the number of failures since start() was last called.
+ *
+ * This value is incremented by calls to miss(), cleared by calls to start() and
+ * set to the maximum possible value by calls to hit().
+ */
+ int getNumFailuresSinceLastStart() const {
+ return _numFailuresSinceLastStart;
+ }
+
+private:
+ unsigned int count;
+ unsigned int value;
+ Date_t _lastHeartbeatStartDate;
+ int _numFailuresSinceLastStart;
+};
+
+class TopologyCoordinatorImpl : public TopologyCoordinator {
+public:
+ /**
+ * Constructs a Topology Coordinator object.
+ * @param maxSyncSourceLagSecs a sync source is re-evaluated after it lags behind further
+ * than this amount.
+ **/
+ TopologyCoordinatorImpl(Seconds maxSyncSourceLagSecs);
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Implementation of TopologyCoordinator interface
+ //
+ ////////////////////////////////////////////////////////////
+
+ virtual Role getRole() const;
+ virtual MemberState getMemberState() const;
+ virtual HostAndPort getSyncSourceAddress() const;
+ virtual std::vector<HostAndPort> getMaybeUpHostAndPorts() const;
+ virtual int getMaintenanceCount() const;
+ virtual long long getTerm() const;
+ virtual bool updateTerm(long long term);
+ virtual void setForceSyncSourceIndex(int index);
+ virtual HostAndPort chooseNewSyncSource(Date_t now, const OpTime& lastOpApplied);
+ virtual void blacklistSyncSource(const HostAndPort& host, Date_t until);
+ virtual void unblacklistSyncSource(const HostAndPort& host, Date_t now);
+ virtual void clearSyncSourceBlacklist();
+ virtual bool shouldChangeSyncSource(const HostAndPort& currentSource, Date_t now) const;
+ virtual bool becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(Date_t now);
+ virtual void setElectionSleepUntil(Date_t newTime);
+ virtual void setFollowerMode(MemberState::MS newMode);
+ virtual void adjustMaintenanceCountBy(int inc);
+ virtual void prepareSyncFromResponse(const ReplicationExecutor::CallbackArgs& data,
+ const HostAndPort& target,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result);
+ virtual void prepareFreshResponse(const ReplicationCoordinator::ReplSetFreshArgs& args,
+ Date_t now,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result);
+ virtual void prepareElectResponse(const ReplicationCoordinator::ReplSetElectArgs& args,
+ Date_t now,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result);
+ virtual Status prepareHeartbeatResponse(Date_t now,
+ const ReplSetHeartbeatArgs& args,
+ const std::string& ourSetName,
+ const OpTime& lastOpApplied,
+ ReplSetHeartbeatResponse* response);
+ virtual Status prepareHeartbeatResponseV1(Date_t now,
+ const ReplSetHeartbeatArgsV1& args,
+ const std::string& ourSetName,
+ const OpTime& lastOpApplied,
+ ReplSetHeartbeatResponse* response);
+ virtual void prepareStatusResponse(const ReplicationExecutor::CallbackArgs& data,
+ Date_t now,
+ unsigned uptime,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result);
+ virtual void fillIsMasterForReplSet(IsMasterResponse* response);
+ virtual void prepareFreezeResponse(Date_t now, int secs, BSONObjBuilder* response);
+ virtual void updateConfig(const ReplicaSetConfig& newConfig,
+ int selfIndex,
+ Date_t now,
+ const OpTime& lastOpApplied);
+ virtual std::pair<ReplSetHeartbeatArgs, Milliseconds> prepareHeartbeatRequest(
+ Date_t now, const std::string& ourSetName, const HostAndPort& target);
+ virtual std::pair<ReplSetHeartbeatArgsV1, Milliseconds> prepareHeartbeatRequestV1(
+ Date_t now, const std::string& ourSetName, const HostAndPort& target);
+ virtual HeartbeatResponseAction processHeartbeatResponse(
+ Date_t now,
+ Milliseconds networkRoundTripTime,
+ const HostAndPort& target,
+ const StatusWith<ReplSetHeartbeatResponse>& hbResponse,
+ const OpTime& myLastOpApplied);
+ virtual bool voteForMyself(Date_t now);
+ virtual void processWinElection(OID electionId, Timestamp electionOpTime);
+ virtual void processLoseElection();
+ virtual bool checkShouldStandForElection(Date_t now, const OpTime& lastOpApplied);
+ virtual void setMyHeartbeatMessage(const Date_t now, const std::string& message);
+ virtual bool stepDown(Date_t until, bool force, const OpTime& lastOpApplied);
+ virtual bool stepDownIfPending();
+ virtual Date_t getStepDownTime() const;
+ virtual void prepareCursorResponseInfo(BSONObjBuilder* objBuilder,
+ const OpTime& lastCommitttedOpTime) const;
+ Status processReplSetDeclareElectionWinner(const ReplSetDeclareElectionWinnerArgs& args,
+ long long* responseTerm);
+ virtual void processReplSetRequestVotes(const ReplSetRequestVotesArgs& args,
+ ReplSetRequestVotesResponse* response,
+ const OpTime& lastAppliedOpTime);
+ virtual void summarizeAsHtml(ReplSetHtmlSummary* output);
+ virtual void loadLastVote(const LastVote& lastVote);
+ virtual void incrementTerm();
+ virtual void voteForMyselfV1();
+ virtual long long getTerm();
+ virtual void prepareForStepDown();
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Test support methods
+ //
+ ////////////////////////////////////////////////////////////
+
+ // Changes _memberState to newMemberState. Only for testing.
+ void changeMemberState_forTest(const MemberState& newMemberState,
+ const Timestamp& electionTime = Timestamp(0, 0));
+
+ // Sets "_electionTime" to "newElectionTime". Only for testing.
+ void _setElectionTime(const Timestamp& newElectionTime);
+
+ // Sets _currentPrimaryIndex to the given index. Should only be used in unit tests!
+ // TODO(spencer): Remove this once we can easily call for an election in unit tests to
+ // set the current primary.
+ void _setCurrentPrimaryForTest(int primaryIndex);
+
+ // Returns _electionTime. Only used in unittests.
+ Timestamp getElectionTime() const;
+
+ // Returns _electionId. Only used in unittests.
+ OID getElectionId() const;
+
+ // Returns _currentPrimaryIndex. Only used in unittests.
+ int getCurrentPrimaryIndex() const;
+
+private:
+ enum UnelectableReason {
+ None = 0,
+ CannotSeeMajority = 1 << 0,
+ NotCloseEnoughToLatestOptime = 1 << 1,
+ ArbiterIAm = 1 << 2,
+ NotSecondary = 1 << 3,
+ NoPriority = 1 << 4,
+ StepDownPeriodActive = 1 << 5,
+ NoData = 1 << 6,
+ NotInitialized = 1 << 7,
+ VotedTooRecently = 1 << 8,
+ RefusesToStand = 1 << 9
};
+ typedef int UnelectableReasonMask;
+
+ // Returns the number of heartbeat pings which have occurred.
+ int _getTotalPings();
+
+ // Returns the current "ping" value for the given member by their address
+ int _getPing(const HostAndPort& host);
-} // namespace repl
-} // namespace mongo
+ // Determines if we will veto the member specified by "args.id", given that the last op
+ // we have applied locally is "lastOpApplied".
+ // If we veto, the errmsg will be filled in with a reason
+ bool _shouldVetoMember(const ReplicationCoordinator::ReplSetFreshArgs& args,
+ const Date_t& now,
+ const OpTime& lastOpApplied,
+ std::string* errmsg) const;
+
+ // Returns the index of the member with the matching id, or -1 if none match.
+ int _getMemberIndex(int id) const;
+
+ // Sees if a majority number of votes are held by members who are currently "up"
+ bool _aMajoritySeemsToBeUp() const;
+
+ // Is otherOpTime close enough (within 10 seconds) to the latest known optime to qualify
+ // for an election
+ bool _isOpTimeCloseEnoughToLatestToElect(const OpTime& otherOpTime,
+ const OpTime& ourLastOpApplied) const;
+
+ // Returns reason why "self" member is unelectable
+ UnelectableReasonMask _getMyUnelectableReason(const Date_t now,
+ const OpTime& lastOpApplied) const;
+
+ // Returns reason why memberIndex is unelectable
+ UnelectableReasonMask _getUnelectableReason(int memberIndex, const OpTime& lastOpApplied) const;
+
+ // Returns the nice text of why the node is unelectable
+ std::string _getUnelectableReasonString(UnelectableReasonMask ur) const;
+
+ // Return true if we are currently primary
+ bool _iAmPrimary() const;
+
+ // Scans through all members that are 'up' and return the latest known optime.
+ OpTime _latestKnownOpTime(const OpTime& ourLastOpApplied) const;
+
+ // Scans the electable set and returns the highest priority member index
+ int _getHighestPriorityElectableIndex(Date_t now, const OpTime& lastOpApplied) const;
+
+ // Returns true if "one" member is higher priority than "two" member
+ bool _isMemberHigherPriority(int memberOneIndex, int memberTwoIndex) const;
+
+ // Helper shortcut to self config
+ const MemberConfig& _selfConfig() const;
+
+ // Returns NULL if there is no primary, or the MemberConfig* for the current primary
+ const MemberConfig* _currentPrimaryMember() const;
+
+ /**
+ * Performs updating "_hbdata" and "_currentPrimaryIndex" for processHeartbeatResponse().
+ */
+ HeartbeatResponseAction _updateHeartbeatDataImpl(int updatedConfigIndex,
+ const MemberState& originalState,
+ Date_t now,
+ const OpTime& lastOpApplied);
+
+ /**
+ * Updates _hbdata based on the newConfig, ensuring that every member in the newConfig
+ * has an entry in _hbdata. If any nodes in the newConfig are also present in
+ * _currentConfig, copies their heartbeat info into the corresponding entry in the updated
+ * _hbdata vector.
+ */
+ void _updateHeartbeatDataForReconfig(const ReplicaSetConfig& newConfig,
+ int selfIndex,
+ Date_t now);
+
+ void _stepDownSelfAndReplaceWith(int newPrimary);
+
+ MemberState _getMyState() const;
+
+ /**
+ * Looks up the provided member in the blacklist and returns true if the member's blacklist
+ * expire time is after 'now'. If the member is found but the expire time is before 'now',
+ * the function returns false. If the member is not found in the blacklist, the function
+ * returns false.
+ **/
+ bool _memberIsBlacklisted(const MemberConfig& memberConfig, Date_t now) const;
+
+ // This node's role in the replication protocol.
+ Role _role;
+
+ // This is a unique id that is generated and set each time we transition to PRIMARY, as the
+ // result of an election.
+ OID _electionId;
+ // The time at which the current PRIMARY was elected.
+ Timestamp _electionTime;
+
+ // This node's election term. The term is used as part of the consensus algorithm to elect
+ // and maintain one primary (leader) node in the cluster.
+ long long _term = 0;
+
+ // the index of the member we currently believe is primary, if one exists, otherwise -1
+ int _currentPrimaryIndex;
+
+ // the hostandport we are currently syncing from
+ // empty if no sync source (we are primary, or we cannot connect to anyone yet)
+ HostAndPort _syncSource;
+ // These members are not chosen as sync sources for a period of time, due to connection
+ // issues with them
+ std::map<HostAndPort, Date_t> _syncSourceBlacklist;
+ // The next sync source to be chosen, requested via a replSetSyncFrom command
+ int _forceSyncSourceIndex;
+ // How far this node must fall behind before considering switching sync sources
+ Seconds _maxSyncSourceLagSecs;
+
+ // "heartbeat message"
+ // sent in requestHeartbeat respond in field "hbm"
+ std::string _hbmsg;
+ Date_t _hbmsgTime; // when it was logged
+
+ // heartbeat msg to send to others; descriptive diagnostic info
+ std::string _getHbmsg(Date_t now) const;
+
+ int _selfIndex; // this node's index in _members and _currentConfig
+
+ ReplicaSetConfig _rsConfig; // The current config, including a vector of MemberConfigs
+
+ // heartbeat data for each member. It is guaranteed that this vector will be maintained
+ // in the same order as the MemberConfigs in _currentConfig, therefore the member config
+ // index can be used to index into this vector as well.
+ std::vector<MemberHeartbeatData> _hbdata;
+
+ // Indicates that we've received a request to stepdown from PRIMARY (likely via a heartbeat)
+ bool _stepDownPending;
+
+ // Time when stepDown command expires
+ Date_t _stepDownUntil;
+
+ // A time before which this node will not stand for election.
+ Date_t _electionSleepUntil;
+
+ // The number of calls we have had to enter maintenance mode
+ int _maintenanceModeCalls;
+
+ // The sub-mode of follower that we are in. Legal values are RS_SECONDARY, RS_RECOVERING,
+ // RS_STARTUP2 (initial sync) and RS_ROLLBACK. Only meaningful if _role == Role::follower.
+ // Configured via setFollowerMode(). If the sub-mode is RS_SECONDARY, then the effective
+ // sub-mode is either RS_SECONDARY or RS_RECOVERING, depending on _maintenanceModeCalls.
+ // Rather than accesing this variable direclty, one should use the getMemberState() method,
+ // which computes the replica set node state on the fly.
+ MemberState::MS _followerMode;
+
+ typedef std::map<HostAndPort, PingStats> PingMap;
+ // Ping stats for each member by HostAndPort;
+ PingMap _pings;
+
+ // Last vote info from the election
+ struct VoteLease {
+ static const Seconds leaseTime;
+
+ Date_t when;
+ int whoId = -1;
+ HostAndPort whoHostAndPort;
+ } _voteLease;
+
+ // V1 last vote info for elections
+ LastVote _lastVote;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/topology_coordinator_impl_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
index 7378feb4067..b791ade3b96 100644
--- a/src/mongo/db/repl/topology_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
@@ -53,1628 +53,1372 @@ namespace mongo {
namespace repl {
namespace {
- Date_t operator++(Date_t& d, int) {
- Date_t result = d;
- d += Milliseconds(1);
- return result;
+Date_t operator++(Date_t& d, int) {
+ Date_t result = d;
+ d += Milliseconds(1);
+ return result;
+}
+
+bool stringContains(const std::string& haystack, const std::string& needle) {
+ return haystack.find(needle) != std::string::npos;
+}
+
+class TopoCoordTest : public mongo::unittest::Test {
+public:
+ virtual void setUp() {
+ _topo.reset(new TopologyCoordinatorImpl(Seconds(100)));
+ _now = Date_t();
+ _selfIndex = -1;
+ _cbData.reset(new ReplicationExecutor::CallbackArgs(
+ NULL, ReplicationExecutor::CallbackHandle(), Status::OK()));
}
- bool stringContains(const std::string &haystack, const std::string& needle) {
- return haystack.find(needle) != std::string::npos;
+ virtual void tearDown() {
+ _topo.reset(NULL);
+ _cbData.reset(NULL);
}
- class TopoCoordTest : public mongo::unittest::Test {
- public:
- virtual void setUp() {
- _topo.reset(new TopologyCoordinatorImpl(Seconds(100)));
- _now = Date_t();
- _selfIndex = -1;
- _cbData.reset(new ReplicationExecutor::CallbackArgs(
- NULL, ReplicationExecutor::CallbackHandle(), Status::OK()));
- }
-
- virtual void tearDown() {
- _topo.reset(NULL);
- _cbData.reset(NULL);
- }
-
- protected:
- TopologyCoordinatorImpl& getTopoCoord() {return *_topo;}
- ReplicationExecutor::CallbackArgs cbData() {return *_cbData;}
- Date_t& now() {return _now;}
-
- int64_t countLogLinesContaining(const std::string& needle) {
- return std::count_if(getCapturedLogMessages().begin(),
- getCapturedLogMessages().end(),
- stdx::bind(stringContains,
- stdx::placeholders::_1,
- needle));
- }
-
- void makeSelfPrimary(const Timestamp& electionOpTime = Timestamp(0,0)) {
- getTopoCoord().changeMemberState_forTest(MemberState::RS_PRIMARY, electionOpTime);
- getTopoCoord()._setCurrentPrimaryForTest(_selfIndex);
- }
-
- void setSelfMemberState(const MemberState& newState) {
- getTopoCoord().changeMemberState_forTest(newState);
- }
-
- int getCurrentPrimaryIndex() {
- return getTopoCoord().getCurrentPrimaryIndex();
- }
- // Update config and set selfIndex
- // If "now" is passed in, set _now to now+1
- void updateConfig(BSONObj cfg,
- int selfIndex,
- Date_t now = Date_t::fromMillisSinceEpoch(-1),
- const OpTime& lastOp = OpTime()) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(cfg));
- ASSERT_OK(config.validate());
-
- _selfIndex = selfIndex;
-
- if (now == Date_t::fromMillisSinceEpoch(-1)) {
- getTopoCoord().updateConfig(config, selfIndex, _now, lastOp);
- _now += Milliseconds(1);
- }
- else {
- invariant(now > _now);
- getTopoCoord().updateConfig(config, selfIndex, now, lastOp);
- _now = now + Milliseconds(1);
- }
- }
-
- HeartbeatResponseAction receiveUpHeartbeat(
- const HostAndPort& member,
- const std::string& setName,
- MemberState memberState,
- const OpTime& electionTime,
- const OpTime& lastOpTimeSender,
- const OpTime& lastOpTimeReceiver) {
- return _receiveHeartbeatHelper(Status::OK(),
- member,
- setName,
- memberState,
- electionTime.getTimestamp(),
- lastOpTimeSender,
- lastOpTimeReceiver,
- Milliseconds(1));
- }
-
- HeartbeatResponseAction receiveDownHeartbeat(
- const HostAndPort& member,
- const std::string& setName,
- const OpTime& lastOpTimeReceiver,
- ErrorCodes::Error errcode = ErrorCodes::HostUnreachable) {
- // timed out heartbeat to mark a node as down
-
- Milliseconds roundTripTime{ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod};
- return _receiveHeartbeatHelper(Status(errcode, ""),
- member,
- setName,
- MemberState::RS_UNKNOWN,
- Timestamp(),
- OpTime(),
- lastOpTimeReceiver,
- roundTripTime);
- }
-
- HeartbeatResponseAction heartbeatFromMember(const HostAndPort& member,
- const std::string& setName,
- MemberState memberState,
- const OpTime& lastOpTimeSender,
- Milliseconds roundTripTime = Milliseconds(1)) {
- return _receiveHeartbeatHelper(Status::OK(),
- member,
- setName,
- memberState,
- Timestamp(),
- lastOpTimeSender,
- OpTime(),
- roundTripTime);
- }
-
- private:
-
- HeartbeatResponseAction _receiveHeartbeatHelper(Status responseStatus,
- const HostAndPort& member,
- const std::string& setName,
- MemberState memberState,
- Timestamp electionTime,
- const OpTime& lastOpTimeSender,
- const OpTime& lastOpTimeReceiver,
- Milliseconds roundTripTime) {
-
- ReplSetHeartbeatResponse hb;
- hb.setConfigVersion(1);
- hb.setState(memberState);
- hb.setOpTime(lastOpTimeSender);
- hb.setElectionTime(electionTime);
-
- StatusWith<ReplSetHeartbeatResponse> hbResponse =
- responseStatus.isOK() ?
- StatusWith<ReplSetHeartbeatResponse>(hb) :
- StatusWith<ReplSetHeartbeatResponse>(responseStatus);
-
- getTopoCoord().prepareHeartbeatRequest(now(),
- setName,
- member);
- now() += roundTripTime;
- return getTopoCoord().processHeartbeatResponse(now(),
- roundTripTime,
- member,
- hbResponse,
- lastOpTimeReceiver);
- }
-
- private:
- unique_ptr<TopologyCoordinatorImpl> _topo;
- unique_ptr<ReplicationExecutor::CallbackArgs> _cbData;
- Date_t _now;
- int _selfIndex;
- };
-
- TEST_F(TopoCoordTest, ChooseSyncSourceBasic) {
- // if we do not have an index in the config, we should get an empty syncsource
- HostAndPort newSyncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_TRUE(newSyncSource.empty());
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- // member h2 is the furthest ahead
- heartbeatFromMember(HostAndPort("h2"),
- "rs0",
- MemberState::RS_SECONDARY,
- OpTime(Timestamp(1,0), 0));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime());
-
- // We start with no sync source
- ASSERT(getTopoCoord().getSyncSourceAddress().empty());
-
- // Fail due to insufficient number of pings
- newSyncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(getTopoCoord().getSyncSourceAddress(), newSyncSource);
- ASSERT(getTopoCoord().getSyncSourceAddress().empty());
-
- // Record 2nd round of pings to allow choosing a new sync source; all members equidistant
- heartbeatFromMember(HostAndPort("h2"),
- "rs0",
- MemberState::RS_SECONDARY,
- OpTime(Timestamp(1,0), 0));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime());
-
- // Should choose h2, since it is furthest ahead
- newSyncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(getTopoCoord().getSyncSourceAddress(), newSyncSource);
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- // h3 becomes further ahead, so it should be chosen
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(2,0), 0));
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
-
- // h3 becomes an invalid candidate for sync source; should choose h2 again
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_RECOVERING,
- OpTime(Timestamp(2,0), 0));
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- // h3 back in SECONDARY and ahead
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(2,0), 0));
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
-
- // h3 goes down
- receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- // h3 back up and ahead
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(2,0), 0));
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
-
+protected:
+ TopologyCoordinatorImpl& getTopoCoord() {
+ return *_topo;
}
-
- TEST_F(TopoCoordTest, ChooseSyncSourceCandidates) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "hself") <<
- BSON("_id" << 10 << "host" << "h1") <<
- BSON("_id" << 20 << "host" << "h2" <<
- "buildIndexes" << false << "priority" << 0) <<
- BSON("_id" << 30 << "host" << "h3" <<
- "hidden" << true << "priority" << 0 << "votes" << 0) <<
- BSON("_id" << 40 << "host" << "h4" <<"arbiterOnly" << true) <<
- BSON("_id" << 50 << "host" << "h5" <<
- "slaveDelay" << 1 << "priority" << 0) <<
- BSON("_id" << 60 << "host" << "h6") <<
- BSON("_id" << 70 << "host" << "hprimary"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
- OpTime lastOpTimeWeApplied = OpTime(Timestamp(100,0), 0);
-
- heartbeatFromMember(HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(501, 0), 0), Milliseconds(700));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(501, 0), 0), Milliseconds(600));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(501, 0), 0), Milliseconds(500));
- heartbeatFromMember(HostAndPort("h4"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(501, 0), 0), Milliseconds(400));
- heartbeatFromMember(HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(501, 0), 0), Milliseconds(300));
-
- // This node is lagged further than maxSyncSourceLagSeconds.
- heartbeatFromMember(HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(499, 0), 0), Milliseconds(200));
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- heartbeatFromMember(HostAndPort("hprimary"), "rs0", MemberState::RS_PRIMARY,
- OpTime(Timestamp(600, 0), 0), Milliseconds(100));
- ASSERT_EQUALS(7, getCurrentPrimaryIndex());
-
- // Record 2nd round of pings to allow choosing a new sync source
- heartbeatFromMember(HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(501, 0), 0), Milliseconds(700));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(501, 0), 0), Milliseconds(600));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(501, 0), 0), Milliseconds(500));
- heartbeatFromMember(HostAndPort("h4"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(501, 0), 0), Milliseconds(400));
- heartbeatFromMember(HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(501, 0), 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(499, 0), 0), Milliseconds(200));
- heartbeatFromMember(HostAndPort("hprimary"), "rs0", MemberState::RS_PRIMARY,
- OpTime(Timestamp(600, 0), 0), Milliseconds(100));
-
- // Should choose primary first; it's closest
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT_EQUALS(HostAndPort("hprimary"), getTopoCoord().getSyncSourceAddress());
-
- // Primary goes far far away
- heartbeatFromMember(HostAndPort("hprimary"), "rs0", MemberState::RS_PRIMARY,
- OpTime(Timestamp(600, 0), 0), Milliseconds(100000000));
-
- // Should choose h4. (if an arbiter has an oplog, it's a valid sync source)
- // h6 is not considered because it is outside the maxSyncLagSeconds window,
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT_EQUALS(HostAndPort("h4"), getTopoCoord().getSyncSourceAddress());
-
- // h4 goes down; should choose h1
- receiveDownHeartbeat(HostAndPort("h4"), "rs0", OpTime());
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT_EQUALS(HostAndPort("h1"), getTopoCoord().getSyncSourceAddress());
-
- // Primary and h1 go down; should choose h6
- receiveDownHeartbeat(HostAndPort("h1"), "rs0", OpTime());
- receiveDownHeartbeat(HostAndPort("hprimary"), "rs0", OpTime());
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT_EQUALS(HostAndPort("h6"), getTopoCoord().getSyncSourceAddress());
-
- // h6 goes down; should choose h5
- receiveDownHeartbeat(HostAndPort("h6"), "rs0", OpTime());
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT_EQUALS(HostAndPort("h5"), getTopoCoord().getSyncSourceAddress());
-
- // h5 goes down; should choose h3
- receiveDownHeartbeat(HostAndPort("h5"), "rs0", OpTime());
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
-
- // h3 goes down; no sync source candidates remain
- receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT(getTopoCoord().getSyncSourceAddress().empty());
+ ReplicationExecutor::CallbackArgs cbData() {
+ return *_cbData;
}
-
-
- TEST_F(TopoCoordTest, ChooseSyncSourceChainingNotAllowed) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "settings" << BSON("chainingAllowed" << false) <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(1, 0), 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(1, 0), 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(0, 0), 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(0, 0), 0), Milliseconds(300));
-
- // No primary situation: should choose no sync source.
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT(getTopoCoord().getSyncSourceAddress().empty());
-
- // Add primary
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_PRIMARY,
- OpTime(Timestamp(0, 0), 0), Milliseconds(300));
- ASSERT_EQUALS(2, getCurrentPrimaryIndex());
-
- // h3 is primary and should be chosen as sync source, despite being further away than h2
- // and the primary (h3) being behind our most recently applied optime
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(Timestamp(10,0), 0));
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
-
+ Date_t& now() {
+ return _now;
}
- TEST_F(TopoCoordTest, EmptySyncSourceOnPrimary) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(1, 0), 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(1, 0), 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(0, 0), 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(0, 0), 0), Milliseconds(300));
-
- // No primary situation: should choose h2 sync source.
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- // Become primary
- makeSelfPrimary(Timestamp(3.0));
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- // Check sync source
- ASSERT_EQUALS(HostAndPort(), getTopoCoord().getSyncSourceAddress());
+ int64_t countLogLinesContaining(const std::string& needle) {
+ return std::count_if(getCapturedLogMessages().begin(),
+ getCapturedLogMessages().end(),
+ stdx::bind(stringContains, stdx::placeholders::_1, needle));
}
- TEST_F(TopoCoordTest, ForceSyncSource) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- // two rounds of heartbeat pings from each member
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(1, 0), 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(1, 0), 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(2, 0), 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(2, 0), 0), Milliseconds(100));
-
- // force should overrule other defaults
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
- getTopoCoord().setForceSyncSourceIndex(1);
- // force should cause shouldChangeSyncSource() to return true
- // even if the currentSource is the force target
- ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("h2"), now()));
- ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("h3"), now()));
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- // force should only work for one call to chooseNewSyncSource
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+ void makeSelfPrimary(const Timestamp& electionOpTime = Timestamp(0, 0)) {
+ getTopoCoord().changeMemberState_forTest(MemberState::RS_PRIMARY, electionOpTime);
+ getTopoCoord()._setCurrentPrimaryForTest(_selfIndex);
}
- TEST_F(TopoCoordTest, BlacklistSyncSource) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(1, 0), 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(1, 0), 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(2, 0), 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(2, 0), 0), Milliseconds(100));
-
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
-
- Date_t expireTime = Date_t::fromMillisSinceEpoch(1000);
- getTopoCoord().blacklistSyncSource(HostAndPort("h3"), expireTime);
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- // Should choose second best choice now that h3 is blacklisted.
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- // After time has passed, should go back to original sync source
- getTopoCoord().chooseNewSyncSource(expireTime, OpTime());
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+ void setSelfMemberState(const MemberState& newState) {
+ getTopoCoord().changeMemberState_forTest(newState);
}
- TEST_F(TopoCoordTest, BlacklistSyncSourceNoChaining) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "settings" << BSON("chainingAllowed" << false) <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_PRIMARY,
- OpTime(Timestamp(2, 0), 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_PRIMARY,
- OpTime(Timestamp(2, 0), 0), Milliseconds(100));
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(2, 0), 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(2, 0), 0), Milliseconds(100));
-
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- Date_t expireTime = Date_t::fromMillisSinceEpoch(1000);
- getTopoCoord().blacklistSyncSource(HostAndPort("h2"), expireTime);
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- // Can't choose any sync source now.
- ASSERT(getTopoCoord().getSyncSourceAddress().empty());
-
- // After time has passed, should go back to the primary
- getTopoCoord().chooseNewSyncSource(expireTime, OpTime());
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+ int getCurrentPrimaryIndex() {
+ return getTopoCoord().getCurrentPrimaryIndex();
}
-
- TEST_F(TopoCoordTest, OnlyUnauthorizedUpCausesRecovering) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- // Generate enough heartbeats to select a sync source below
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(1, 0), 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(1, 0), 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(2, 0), 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(2, 0), 0), Milliseconds(100));
-
- ASSERT_EQUALS(HostAndPort("h3"),
- getTopoCoord().chooseNewSyncSource(now()++, OpTime()));
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
- // Good state setup done
-
- // Mark nodes down, ensure that we have no source and are secondary
- receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime(), ErrorCodes::NetworkTimeout);
- receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime(), ErrorCodes::NetworkTimeout);
- ASSERT_TRUE(getTopoCoord().chooseNewSyncSource(now()++, OpTime()).empty());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
-
- // Mark nodes down + unauth, ensure that we have no source and are secondary
- receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime(), ErrorCodes::NetworkTimeout);
- receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime(), ErrorCodes::Unauthorized);
- ASSERT_TRUE(getTopoCoord().chooseNewSyncSource(now()++, OpTime()).empty());
- ASSERT_EQUALS(MemberState::RS_RECOVERING, getTopoCoord().getMemberState().s);
-
- // Having an auth error but with another node up should bring us out of RECOVERING
- HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("h2"),
- "rs0",
- MemberState::RS_SECONDARY,
- OpTime(),
- OpTime(Timestamp(2, 0), 0),
- OpTime(Timestamp(2, 0), 0));
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
- // Test that the heartbeat that brings us from RECOVERING to SECONDARY doesn't initiate
- // an election (SERVER-17164)
- ASSERT_NO_ACTION(action.getAction());
+ // Update config and set selfIndex
+ // If "now" is passed in, set _now to now+1
+ void updateConfig(BSONObj cfg,
+ int selfIndex,
+ Date_t now = Date_t::fromMillisSinceEpoch(-1),
+ const OpTime& lastOp = OpTime()) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(cfg));
+ ASSERT_OK(config.validate());
+
+ _selfIndex = selfIndex;
+
+ if (now == Date_t::fromMillisSinceEpoch(-1)) {
+ getTopoCoord().updateConfig(config, selfIndex, _now, lastOp);
+ _now += Milliseconds(1);
+ } else {
+ invariant(now > _now);
+ getTopoCoord().updateConfig(config, selfIndex, now, lastOp);
+ _now = now + Milliseconds(1);
+ }
}
- TEST_F(TopoCoordTest, ReceiveHeartbeatWhileAbsentFromConfig) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "h1") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- -1);
- ASSERT_NO_ACTION(heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(Timestamp(1, 0), 0), Milliseconds(300)).getAction());
+ HeartbeatResponseAction receiveUpHeartbeat(const HostAndPort& member,
+ const std::string& setName,
+ MemberState memberState,
+ const OpTime& electionTime,
+ const OpTime& lastOpTimeSender,
+ const OpTime& lastOpTimeReceiver) {
+ return _receiveHeartbeatHelper(Status::OK(),
+ member,
+ setName,
+ memberState,
+ electionTime.getTimestamp(),
+ lastOpTimeSender,
+ lastOpTimeReceiver,
+ Milliseconds(1));
}
- TEST_F(TopoCoordTest, PrepareSyncFromResponse) {
- OpTime staleOpTime(Timestamp(1, 1), 0);
- OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0);
-
- Status result = Status::OK();
- BSONObjBuilder response;
-
- // if we do not have an index in the config, we should get ErrorCodes::NotSecondary
- getTopoCoord().prepareSyncFromResponse(cbData(), HostAndPort("h1"),
- ourOpTime, &response, &result);
- ASSERT_EQUALS(ErrorCodes::NotSecondary, result);
- ASSERT_EQUALS("Removed and uninitialized nodes do not sync", result.reason());
-
- // Test trying to sync from another node when we are an arbiter
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "hself" <<
- "arbiterOnly" << true) <<
- BSON("_id" << 1 <<
- "host" << "h1"))),
- 0);
-
- getTopoCoord().prepareSyncFromResponse(cbData(), HostAndPort("h1"),
- ourOpTime, &response, &result);
- ASSERT_EQUALS(ErrorCodes::NotSecondary, result);
- ASSERT_EQUALS("arbiters don't sync", result.reason());
-
- // Set up config for the rest of the tests
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "hself") <<
- BSON("_id" << 1 << "host" << "h1" << "arbiterOnly" << true) <<
- BSON("_id" << 2 << "host" << "h2" <<
- "priority" << 0 << "buildIndexes" << false) <<
- BSON("_id" << 3 << "host" << "h3") <<
- BSON("_id" << 4 << "host" << "h4") <<
- BSON("_id" << 5 << "host" << "h5") <<
- BSON("_id" << 6 << "host" << "h6"))),
- 0);
-
- // Try to sync while PRIMARY
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary();
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- getTopoCoord()._setCurrentPrimaryForTest(0);
- BSONObjBuilder response1;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h3"), ourOpTime, &response1, &result);
- ASSERT_EQUALS(ErrorCodes::NotSecondary, result);
- ASSERT_EQUALS("primaries don't sync", result.reason());
- ASSERT_EQUALS("h3:27017", response1.obj()["syncFromRequested"].String());
-
- // Try to sync from non-existent member
- setSelfMemberState(MemberState::RS_SECONDARY);
- getTopoCoord()._setCurrentPrimaryForTest(-1);
- BSONObjBuilder response2;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("fakemember"), ourOpTime, &response2, &result);
- ASSERT_EQUALS(ErrorCodes::NodeNotFound, result);
- ASSERT_EQUALS("Could not find member \"fakemember:27017\" in replica set", result.reason());
-
- // Try to sync from self
- BSONObjBuilder response3;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("hself"), ourOpTime, &response3, &result);
- ASSERT_EQUALS(ErrorCodes::InvalidOptions, result);
- ASSERT_EQUALS("I cannot sync from myself", result.reason());
-
- // Try to sync from an arbiter
- BSONObjBuilder response4;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h1"), ourOpTime, &response4, &result);
- ASSERT_EQUALS(ErrorCodes::InvalidOptions, result);
- ASSERT_EQUALS("Cannot sync from \"h1:27017\" because it is an arbiter", result.reason());
-
- // Try to sync from a node that doesn't build indexes
- BSONObjBuilder response5;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h2"), ourOpTime, &response5, &result);
- ASSERT_EQUALS(ErrorCodes::InvalidOptions, result);
- ASSERT_EQUALS("Cannot sync from \"h2:27017\" because it does not build indexes",
- result.reason());
-
- // Try to sync from a member that is down
- receiveDownHeartbeat(HostAndPort("h4"), "rs0", OpTime());
-
- BSONObjBuilder response7;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h4"), ourOpTime, &response7, &result);
- ASSERT_EQUALS(ErrorCodes::HostUnreachable, result);
- ASSERT_EQUALS("I cannot reach the requested member: h4:27017", result.reason());
-
- // Sync successfully from a member that is stale
- heartbeatFromMember(HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY,
- staleOpTime, Milliseconds(100));
-
- BSONObjBuilder response8;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h5"), ourOpTime, &response8, &result);
- ASSERT_OK(result);
- ASSERT_EQUALS("requested member \"h5:27017\" is more than 10 seconds behind us",
- response8.obj()["warning"].String());
- getTopoCoord().chooseNewSyncSource(now()++, ourOpTime);
- ASSERT_EQUALS(HostAndPort("h5"), getTopoCoord().getSyncSourceAddress());
-
- // Sync successfully from an up-to-date member
- heartbeatFromMember(HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY,
- ourOpTime, Milliseconds(100));
-
- BSONObjBuilder response9;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h6"), ourOpTime, &response9, &result);
- ASSERT_OK(result);
- BSONObj response9Obj = response9.obj();
- ASSERT_FALSE(response9Obj.hasField("warning"));
- ASSERT_EQUALS(HostAndPort("h5").toString(), response9Obj["prevSyncTarget"].String());
- getTopoCoord().chooseNewSyncSource(now()++, ourOpTime);
- ASSERT_EQUALS(HostAndPort("h6"), getTopoCoord().getSyncSourceAddress());
-
- // node goes down between forceSync and chooseNewSyncSource
- BSONObjBuilder response10;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h6"), ourOpTime, &response10, &result);
- BSONObj response10Obj = response10.obj();
- ASSERT_FALSE(response10Obj.hasField("warning"));
- ASSERT_EQUALS(HostAndPort("h6").toString(), response10Obj["prevSyncTarget"].String());
- receiveDownHeartbeat(HostAndPort("h6"), "rs0", OpTime());
- HostAndPort syncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(HostAndPort("h6"), syncSource);
-
- // Try to sync from a member that is unauth'd
- receiveDownHeartbeat(HostAndPort("h5"), "rs0", OpTime(), ErrorCodes::Unauthorized);
-
- BSONObjBuilder response11;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h5"), ourOpTime, &response11, &result);
- ASSERT_NOT_OK(result);
- ASSERT_EQUALS(ErrorCodes::Unauthorized, result.code());
- ASSERT_EQUALS("not authorized to communicate with h5:27017",
- result.reason());
-
- // Sync successfully from an up-to-date member.
- heartbeatFromMember(HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY,
- ourOpTime, Milliseconds(100));
- BSONObjBuilder response12;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h6"), ourOpTime, &response12, &result);
- ASSERT_OK(result);
- syncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime());
- ASSERT_EQUALS(HostAndPort("h6"), syncSource);
+ HeartbeatResponseAction receiveDownHeartbeat(
+ const HostAndPort& member,
+ const std::string& setName,
+ const OpTime& lastOpTimeReceiver,
+ ErrorCodes::Error errcode = ErrorCodes::HostUnreachable) {
+ // timed out heartbeat to mark a node as down
+
+ Milliseconds roundTripTime{ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod};
+ return _receiveHeartbeatHelper(Status(errcode, ""),
+ member,
+ setName,
+ MemberState::RS_UNKNOWN,
+ Timestamp(),
+ OpTime(),
+ lastOpTimeReceiver,
+ roundTripTime);
}
- TEST_F(TopoCoordTest, ReplSetGetStatus) {
- // This test starts by configuring a TopologyCoordinator as a member of a 4 node replica
- // set, with each node in a different state.
- // The first node is DOWN, as if we tried heartbeating them and it failed in some way.
- // The second node is in state SECONDARY, as if we've received a valid heartbeat from them.
- // The third node is in state UNKNOWN, as if we've not yet had any heartbeating activity
- // with them yet. The fourth node is PRIMARY and corresponds to ourself, which gets its
- // information for replSetGetStatus from a different source than the nodes that aren't
- // ourself. After this setup, we call prepareStatusResponse and make sure that the fields
- // returned for each member match our expectations.
- Date_t startupTime = Date_t::fromMillisSinceEpoch(100);
- Date_t heartbeatTime = Date_t::fromMillisSinceEpoch(5000);
- Seconds uptimeSecs(10);
- Date_t curTime = heartbeatTime + uptimeSecs;
- Timestamp electionTime(1, 2);
- OpTime oplogProgress(Timestamp(3, 4), 0);
- std::string setName = "mySet";
+ HeartbeatResponseAction heartbeatFromMember(const HostAndPort& member,
+ const std::string& setName,
+ MemberState memberState,
+ const OpTime& lastOpTimeSender,
+ Milliseconds roundTripTime = Milliseconds(1)) {
+ return _receiveHeartbeatHelper(Status::OK(),
+ member,
+ setName,
+ memberState,
+ Timestamp(),
+ lastOpTimeSender,
+ OpTime(),
+ roundTripTime);
+ }
+private:
+ HeartbeatResponseAction _receiveHeartbeatHelper(Status responseStatus,
+ const HostAndPort& member,
+ const std::string& setName,
+ MemberState memberState,
+ Timestamp electionTime,
+ const OpTime& lastOpTimeSender,
+ const OpTime& lastOpTimeReceiver,
+ Milliseconds roundTripTime) {
ReplSetHeartbeatResponse hb;
hb.setConfigVersion(1);
- hb.setState(MemberState::RS_SECONDARY);
+ hb.setState(memberState);
+ hb.setOpTime(lastOpTimeSender);
hb.setElectionTime(electionTime);
- hb.setHbMsg("READY");
- hb.setOpTime(oplogProgress);
- StatusWith<ReplSetHeartbeatResponse> hbResponseGood =
- StatusWith<ReplSetHeartbeatResponse>(hb);
-
- updateConfig(BSON("_id" << setName <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test0:1234") <<
- BSON("_id" << 1 << "host" << "test1:1234") <<
- BSON("_id" << 2 << "host" << "test2:1234") <<
- BSON("_id" << 3 << "host" << "test3:1234"))),
- 3,
- startupTime + Milliseconds(1));
-
- // Now that the replica set is setup, put the members into the states we want them in.
- HostAndPort member = HostAndPort("test0:1234");
- getTopoCoord().prepareHeartbeatRequest(startupTime + Milliseconds(1), setName, member);
- getTopoCoord().processHeartbeatResponse(startupTime + Milliseconds(2),
- Milliseconds(1),
- member,
- hbResponseGood,
- OpTime());
- getTopoCoord().prepareHeartbeatRequest(startupTime + Milliseconds(3), setName, member);
- Date_t timeoutTime = startupTime + Milliseconds(3) +
- ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod;
-
- StatusWith<ReplSetHeartbeatResponse> hbResponseDown =
- StatusWith<ReplSetHeartbeatResponse>(Status(ErrorCodes::HostUnreachable, ""));
-
- getTopoCoord().processHeartbeatResponse(timeoutTime,
- Milliseconds(5000),
- member,
- hbResponseDown,
- OpTime());
-
- member = HostAndPort("test1:1234");
- getTopoCoord().prepareHeartbeatRequest(startupTime + Milliseconds(2),
- setName,
- member);
- getTopoCoord().processHeartbeatResponse(heartbeatTime,
- Milliseconds(4000),
- member,
- hbResponseGood,
- OpTime());
- makeSelfPrimary();
-
- // Now node 0 is down, node 1 is up, and for node 2 we have no heartbeat data yet.
- BSONObjBuilder statusBuilder;
- Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
- getTopoCoord().prepareStatusResponse(cbData(),
- curTime,
- uptimeSecs.count(),
- oplogProgress,
- &statusBuilder,
- &resultStatus);
- ASSERT_OK(resultStatus);
- BSONObj rsStatus = statusBuilder.obj();
-
- // Test results for all non-self members
- ASSERT_EQUALS(setName, rsStatus["set"].String());
- ASSERT_EQUALS(curTime.asInt64(), rsStatus["date"].Date().asInt64());
- std::vector<BSONElement> memberArray = rsStatus["members"].Array();
- ASSERT_EQUALS(4U, memberArray.size());
- BSONObj member0Status = memberArray[0].Obj();
- BSONObj member1Status = memberArray[1].Obj();
- BSONObj member2Status = memberArray[2].Obj();
-
- // Test member 0, the node that's DOWN
- ASSERT_EQUALS(0, member0Status["_id"].numberInt());
- ASSERT_EQUALS("test0:1234", member0Status["name"].str());
- ASSERT_EQUALS(0, member0Status["health"].numberDouble());
- ASSERT_EQUALS(MemberState::RS_DOWN, member0Status["state"].numberInt());
- ASSERT_EQUALS("(not reachable/healthy)", member0Status["stateStr"].str());
- ASSERT_EQUALS(0, member0Status["uptime"].numberInt());
- ASSERT_EQUALS(Timestamp(), Timestamp(member0Status["optime"]["ts"].timestampValue()));
- ASSERT_TRUE(member0Status.hasField("optimeDate"));
- ASSERT_EQUALS(Date_t::fromMillisSinceEpoch(Timestamp().getSecs() * 1000ULL),
- member0Status["optimeDate"].Date());
- ASSERT_EQUALS(timeoutTime, member0Status["lastHeartbeat"].date());
- ASSERT_EQUALS(Date_t(), member0Status["lastHeartbeatRecv"].date());
-
- // Test member 1, the node that's SECONDARY
- ASSERT_EQUALS(1, member1Status["_id"].Int());
- ASSERT_EQUALS("test1:1234", member1Status["name"].String());
- ASSERT_EQUALS(1, member1Status["health"].Double());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, member1Status["state"].numberInt());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- member1Status["stateStr"].String());
- ASSERT_EQUALS(uptimeSecs.count(), member1Status["uptime"].numberInt());
- ASSERT_EQUALS(oplogProgress.getTimestamp(),
- Timestamp(member1Status["optime"]["ts"].timestampValue()));
- ASSERT_TRUE(member1Status.hasField("optimeDate"));
- ASSERT_EQUALS(Date_t::fromMillisSinceEpoch(oplogProgress.getSecs() * 1000ULL),
- member1Status["optimeDate"].Date());
- ASSERT_EQUALS(heartbeatTime, member1Status["lastHeartbeat"].date());
- ASSERT_EQUALS(Date_t(), member1Status["lastHeartbeatRecv"].date());
- ASSERT_EQUALS("READY", member1Status["lastHeartbeatMessage"].str());
-
- // Test member 2, the node that's UNKNOWN
- ASSERT_EQUALS(2, member2Status["_id"].numberInt());
- ASSERT_EQUALS("test2:1234", member2Status["name"].str());
- ASSERT_EQUALS(-1, member2Status["health"].numberDouble());
- ASSERT_EQUALS(MemberState::RS_UNKNOWN, member2Status["state"].numberInt());
- ASSERT_EQUALS(MemberState(MemberState::RS_UNKNOWN).toString(),
- member2Status["stateStr"].str());
- ASSERT_TRUE(member2Status.hasField("uptime"));
- ASSERT_TRUE(member2Status.hasField("optime"));
- ASSERT_TRUE(member2Status.hasField("optimeDate"));
- ASSERT_FALSE(member2Status.hasField("lastHearbeat"));
- ASSERT_FALSE(member2Status.hasField("lastHearbeatRecv"));
-
- // Now test results for ourself, the PRIMARY
- ASSERT_EQUALS(MemberState::RS_PRIMARY, rsStatus["myState"].numberInt());
- BSONObj selfStatus = memberArray[3].Obj();
- ASSERT_TRUE(selfStatus["self"].boolean());
- ASSERT_EQUALS(3, selfStatus["_id"].numberInt());
- ASSERT_EQUALS("test3:1234", selfStatus["name"].str());
- ASSERT_EQUALS(1, selfStatus["health"].numberDouble());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, selfStatus["state"].numberInt());
- ASSERT_EQUALS(MemberState(MemberState::RS_PRIMARY).toString(),
- selfStatus["stateStr"].str());
- ASSERT_EQUALS(uptimeSecs.count(), selfStatus["uptime"].numberInt());
- ASSERT_EQUALS(oplogProgress.getTimestamp(),
- Timestamp(selfStatus["optime"]["ts"].timestampValue()));
- ASSERT_TRUE(selfStatus.hasField("optimeDate"));
- ASSERT_EQUALS(Date_t::fromMillisSinceEpoch(oplogProgress.getSecs() * 1000ULL),
- selfStatus["optimeDate"].Date());
-
- // TODO(spencer): Test electionTime and pingMs are set properly
- }
-
- TEST_F(TopoCoordTest, ReplSetGetStatusFails) {
- // This test starts by configuring a TopologyCoordinator to NOT be a member of a 3 node
- // replica set. Then running prepareStatusResponse should fail.
- Date_t startupTime = Date_t::fromMillisSinceEpoch(100);
- Date_t heartbeatTime = Date_t::fromMillisSinceEpoch(5000);
- Seconds uptimeSecs(10);
- Date_t curTime = heartbeatTime + uptimeSecs;
- OpTime oplogProgress(Timestamp(3, 4), 0);
- std::string setName = "mySet";
-
- updateConfig(BSON("_id" << setName <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test0:1234") <<
- BSON("_id" << 1 << "host" << "test1:1234") <<
- BSON("_id" << 2 << "host" << "test2:1234"))),
- -1, // This one is not part of the replica set.
- startupTime + Milliseconds(1));
-
- BSONObjBuilder statusBuilder;
- Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
- getTopoCoord().prepareStatusResponse(cbData(),
- curTime,
- uptimeSecs.count(),
- oplogProgress,
- &statusBuilder,
- &resultStatus);
- ASSERT_NOT_OK(resultStatus);
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, resultStatus);
- }
-
- TEST_F(TopoCoordTest, PrepareFreshResponse) {
- ReplicationCoordinator::ReplSetFreshArgs args;
- OpTime freshestOpTime(Timestamp(15, 10), 0);
- OpTime ourOpTime(Timestamp(10, 10), 0);
- OpTime staleOpTime(Timestamp(1, 1), 0);
- Status internalErrorStatus(ErrorCodes::InternalError, "didn't set status");
-
- // if we do not have an index in the config, we should get ErrorCodes::ReplicaSetNotFound
- BSONObjBuilder responseBuilder;
- Status status = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status);
- ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status);
- ASSERT_EQUALS("Cannot participate in elections because not initialized", status.reason());
- ASSERT_TRUE(responseBuilder.obj().isEmpty());
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 10 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 <<
- "host" << "hself" <<
- "priority" << 10) <<
- BSON("_id" << 20 << "host" << "h1") <<
- BSON("_id" << 30 << "host" << "h2") <<
- BSON("_id" << 40 <<
- "host" << "h3" <<
- "priority" << 10))),
- 0);
- // Test with incorrect replset name
- args.setName = "fakeset";
-
- BSONObjBuilder responseBuilder0;
- Status status0 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder0, &status0);
- ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status0);
- ASSERT_TRUE(responseBuilder0.obj().isEmpty());
-
- heartbeatFromMember(HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
-
- // Test with old config version
- args.setName = "rs0";
- args.cfgver = 5;
- args.id = 20;
- args.who = HostAndPort("h1");
- args.opTime = ourOpTime.getTimestamp();
-
- BSONObjBuilder responseBuilder1;
- Status status1 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder1, &status1);
- ASSERT_OK(status1);
- BSONObj response1 = responseBuilder1.obj();
- ASSERT_EQUALS("config version stale", response1["info"].String());
- ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response1["opTime"].timestampValue()));
- ASSERT_TRUE(response1["fresher"].Bool());
- ASSERT_FALSE(response1["veto"].Bool());
- ASSERT_FALSE(response1.hasField("errmsg"));
-
- // Test with non-existent node.
- args.cfgver = 10;
- args.id = 0;
- args.who = HostAndPort("fakenode");
-
- BSONObjBuilder responseBuilder2;
- Status status2 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder2, &status2);
- ASSERT_OK(status2);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response2["opTime"].timestampValue()));
- ASSERT_FALSE(response2["fresher"].Bool());
- ASSERT_TRUE(response2["veto"].Bool());
- ASSERT_EQUALS("replSet couldn't find member with id 0", response2["errmsg"].String());
-
-
- // Test when we are primary.
- args.id = 20;
- args.who = HostAndPort("h1");
-
- makeSelfPrimary();
-
- BSONObjBuilder responseBuilder3;
- Status status3 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder3, &status3);
- ASSERT_OK(status3);
- BSONObj response3 = responseBuilder3.obj();
- ASSERT_FALSE(response3.hasField("info"));
- ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response3["opTime"].timestampValue()));
- ASSERT_FALSE(response3["fresher"].Bool());
- ASSERT_TRUE(response3["veto"].Bool());
- ASSERT_EQUALS("I am already primary, h1:27017 can try again once I've stepped down",
- response3["errmsg"].String());
-
-
- // Test when someone else is primary.
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
- setSelfMemberState(MemberState::RS_SECONDARY);
- getTopoCoord()._setCurrentPrimaryForTest(2);
-
- BSONObjBuilder responseBuilder4;
- Status status4 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder4, &status4);
- ASSERT_OK(status4);
- BSONObj response4 = responseBuilder4.obj();
- ASSERT_FALSE(response4.hasField("info"));
- ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response4["opTime"].timestampValue()));
- ASSERT_FALSE(response4["fresher"].Bool());
- ASSERT_TRUE(response4["veto"].Bool());
- ASSERT_EQUALS(
- "h1:27017 is trying to elect itself but h2:27017 is already primary and more "
- "up-to-date",
- response4["errmsg"].String());
-
-
- // Test trying to elect a node that is caught up but isn't the highest priority node.
- heartbeatFromMember(HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, staleOpTime);
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
-
- BSONObjBuilder responseBuilder5;
- Status status5 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder5, &status5);
- ASSERT_OK(status5);
- BSONObj response5 = responseBuilder5.obj();
- ASSERT_FALSE(response5.hasField("info"));
- ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response5["opTime"].timestampValue()));
- ASSERT_FALSE(response5["fresher"].Bool());
- ASSERT_TRUE(response5["veto"].Bool());
- ASSERT(response5["errmsg"].String().find("h1:27017 has lower priority of 1 than") !=
- std::string::npos) << response5["errmsg"].String();
-
- // Test trying to elect a node that isn't electable because its down
- args.id = 40;
- args.who = HostAndPort("h3");
-
- receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
-
- BSONObjBuilder responseBuilder6;
- Status status6 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder6, &status6);
- ASSERT_OK(status6);
- BSONObj response6 = responseBuilder6.obj();
- ASSERT_FALSE(response6.hasField("info"));
- ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response6["opTime"].timestampValue()));
- ASSERT_FALSE(response6["fresher"].Bool());
- ASSERT_TRUE(response6["veto"].Bool());
- ASSERT_NE(std::string::npos, response6["errmsg"].String().find(
- "I don't think h3:27017 is electable because the member is not "
- "currently a secondary")) << response6["errmsg"].String();
-
- // Test trying to elect a node that isn't electable because it's PRIMARY
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_PRIMARY, ourOpTime);
- ASSERT_EQUALS(3, getCurrentPrimaryIndex());
-
- BSONObjBuilder responseBuilder7;
- Status status7 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder7, &status7);
- ASSERT_OK(status7);
- BSONObj response7 = responseBuilder7.obj();
- ASSERT_FALSE(response7.hasField("info"));
- ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response7["opTime"].timestampValue()));
- ASSERT_FALSE(response7["fresher"].Bool());
- ASSERT_TRUE(response7["veto"].Bool());
- ASSERT_NE(std::string::npos, response7["errmsg"].String().find(
- "I don't think h3:27017 is electable because the member is not "
- "currently a secondary")) << response7["errmsg"].String();
-
- // Test trying to elect a node that isn't electable because it's STARTUP
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_STARTUP, ourOpTime);
-
- BSONObjBuilder responseBuilder8;
- Status status8 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder8, &status8);
- ASSERT_OK(status8);
- BSONObj response8 = responseBuilder8.obj();
- ASSERT_FALSE(response8.hasField("info"));
- ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response8["opTime"].timestampValue()));
- ASSERT_FALSE(response8["fresher"].Bool());
- ASSERT_TRUE(response8["veto"].Bool());
- ASSERT_NE(std::string::npos, response8["errmsg"].String().find(
- "I don't think h3:27017 is electable because the member is not "
- "currently a secondary")) << response8["errmsg"].String();
-
- // Test trying to elect a node that isn't electable because it's RECOVERING
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_RECOVERING, ourOpTime);
-
- BSONObjBuilder responseBuilder9;
- Status status9 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder9, &status9);
- ASSERT_OK(status9);
- BSONObj response9 = responseBuilder9.obj();
- ASSERT_FALSE(response9.hasField("info"));
- ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response9["opTime"].timestampValue()));
- ASSERT_FALSE(response9["fresher"].Bool());
- ASSERT_TRUE(response9["veto"].Bool());
- ASSERT_NE(std::string::npos, response9["errmsg"].String().find(
- "I don't think h3:27017 is electable because the member is not "
- "currently a secondary")) << response9["errmsg"].String();
-
- // Test trying to elect a node that is fresher but lower priority than the existing primary
- args.id = 30;
- args.who = HostAndPort("h2");
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_PRIMARY, ourOpTime);
- ASSERT_EQUALS(3, getCurrentPrimaryIndex());
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, freshestOpTime);
-
- BSONObjBuilder responseBuilder10;
- Status status10 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder10, &status10);
- ASSERT_OK(status10);
- BSONObj response10 = responseBuilder10.obj();
- ASSERT_FALSE(response10.hasField("info"));
- ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response10["opTime"].timestampValue()));
- ASSERT_TRUE(response10["fresher"].Bool());
- ASSERT_TRUE(response10["veto"].Bool());
- ASSERT_TRUE(response10.hasField("errmsg"));
-
-
- // Test trying to elect a valid node
- args.id = 40;
- args.who = HostAndPort("h3");
-
- receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime());
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
-
- BSONObjBuilder responseBuilder11;
- Status status11 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(
- args, Date_t(), ourOpTime, &responseBuilder11, &status11);
- ASSERT_OK(status11);
- BSONObj response11 = responseBuilder11.obj();
- ASSERT_FALSE(response11.hasField("info")) << response11.toString();
- ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response11["opTime"].timestampValue()));
- ASSERT_FALSE(response11["fresher"].Bool()) << response11.toString();
- ASSERT_FALSE(response11["veto"].Bool()) << response11.toString();
- ASSERT_FALSE(response11.hasField("errmsg")) << response11.toString();
-
- // Test with our id
- args.id = 10;
- BSONObjBuilder responseBuilder12;
- Status status12 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(
- args, Date_t(), ourOpTime, &responseBuilder12, &status12);
- ASSERT_EQUALS(ErrorCodes::BadValue, status12);
- ASSERT_EQUALS(
- "Received replSetFresh command from member with the same member ID as ourself: 10",
- status12.reason());
- ASSERT_TRUE(responseBuilder12.obj().isEmpty());
+ StatusWith<ReplSetHeartbeatResponse> hbResponse = responseStatus.isOK()
+ ? StatusWith<ReplSetHeartbeatResponse>(hb)
+ : StatusWith<ReplSetHeartbeatResponse>(responseStatus);
+ getTopoCoord().prepareHeartbeatRequest(now(), setName, member);
+ now() += roundTripTime;
+ return getTopoCoord().processHeartbeatResponse(
+ now(), roundTripTime, member, hbResponse, lastOpTimeReceiver);
}
- class HeartbeatResponseTest : public TopoCoordTest {
- public:
-
- virtual void setUp() {
- TopoCoordTest::setUp();
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)),
- 0);
- }
-
- };
-
- class HeartbeatResponseTestOneRetry : public HeartbeatResponseTest {
- public:
- virtual void setUp() {
- HeartbeatResponseTest::setUp();
-
- // Bring up the node we are heartbeating.
- _target = HostAndPort("host2", 27017);
- Date_t _upRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T12:55Z"));
- std::pair<ReplSetHeartbeatArgs, Milliseconds> uppingRequest =
- getTopoCoord().prepareHeartbeatRequest(_upRequestDate,
- "rs0",
- _target);
- HeartbeatResponseAction upAction =
- getTopoCoord().processHeartbeatResponse(
- _upRequestDate,
- Milliseconds(0),
- _target,
- makeStatusWith<ReplSetHeartbeatResponse>(),
- OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, upAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
-
-
- // Time of first request for this heartbeat period
- _firstRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T13:00Z"));
-
- // Initial heartbeat attempt prepared, at t + 0.
- std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(_firstRequestDate,
- "rs0",
- _target);
- // 5 seconds to successfully complete the heartbeat before the timeout expires.
- ASSERT_EQUALS(5000, request.second.count());
-
- // Initial heartbeat request fails at t + 4000ms
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- _firstRequestDate + Seconds(4), // 4 seconds elapsed, retry allowed.
- Milliseconds(3990), // Spent 3.99 of the 4 seconds in the network.
- _target,
- StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit,
- "Took too long"),
- OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
-
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- // Because the heartbeat failed without timing out, we expect to retry immediately.
- ASSERT_EQUALS(_firstRequestDate + Seconds(4), action.getNextHeartbeatStartDate());
-
- // First heartbeat retry prepared, at t + 4000ms.
- request =
- getTopoCoord().prepareHeartbeatRequest(
- _firstRequestDate + Milliseconds(4000),
+private:
+ unique_ptr<TopologyCoordinatorImpl> _topo;
+ unique_ptr<ReplicationExecutor::CallbackArgs> _cbData;
+ Date_t _now;
+ int _selfIndex;
+};
+
+TEST_F(TopoCoordTest, ChooseSyncSourceBasic) {
+ // if we do not have an index in the config, we should get an empty syncsource
+ HostAndPort newSyncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_TRUE(newSyncSource.empty());
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ // member h2 is the furthest ahead
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(1, 0), 0));
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime());
+
+ // We start with no sync source
+ ASSERT(getTopoCoord().getSyncSourceAddress().empty());
+
+ // Fail due to insufficient number of pings
+ newSyncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(getTopoCoord().getSyncSourceAddress(), newSyncSource);
+ ASSERT(getTopoCoord().getSyncSourceAddress().empty());
+
+ // Record 2nd round of pings to allow choosing a new sync source; all members equidistant
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(1, 0), 0));
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime());
+
+ // Should choose h2, since it is furthest ahead
+ newSyncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(getTopoCoord().getSyncSourceAddress(), newSyncSource);
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ // h3 becomes further ahead, so it should be chosen
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(2, 0), 0));
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+
+ // h3 becomes an invalid candidate for sync source; should choose h2 again
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_RECOVERING, OpTime(Timestamp(2, 0), 0));
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ // h3 back in SECONDARY and ahead
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(2, 0), 0));
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+
+ // h3 goes down
+ receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ // h3 back up and ahead
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(2, 0), 0));
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+}
+
+TEST_F(TopoCoordTest, ChooseSyncSourceCandidates) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "hself")
+ << BSON("_id" << 10 << "host"
+ << "h1")
+ << BSON("_id" << 20 << "host"
+ << "h2"
+ << "buildIndexes" << false << "priority" << 0)
+ << BSON("_id" << 30 << "host"
+ << "h3"
+ << "hidden" << true << "priority" << 0 << "votes"
+ << 0) << BSON("_id" << 40 << "host"
+ << "h4"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 50 << "host"
+ << "h5"
+ << "slaveDelay" << 1 << "priority" << 0)
+ << BSON("_id" << 60 << "host"
+ << "h6") << BSON("_id" << 70 << "host"
+ << "hprimary"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ OpTime lastOpTimeWeApplied = OpTime(Timestamp(100, 0), 0);
+
+ heartbeatFromMember(HostAndPort("h1"),
"rs0",
- _target);
- // One second left to complete the heartbeat.
- ASSERT_EQUALS(1000, request.second.count());
-
- // Ensure a single failed heartbeat did not cause the node to be marked down
- BSONObjBuilder statusBuilder;
- Status resultStatus(ErrorCodes::InternalError,
- "prepareStatusResponse didn't set result");
- getTopoCoord().prepareStatusResponse(cbData(),
- _firstRequestDate + Milliseconds(4000),
- 10,
- OpTime(Timestamp(100,0), 0),
- &statusBuilder,
- &resultStatus);
- ASSERT_OK(resultStatus);
- BSONObj rsStatus = statusBuilder.obj();
- std::vector<BSONElement> memberArray = rsStatus["members"].Array();
- BSONObj member1Status = memberArray[1].Obj();
-
- ASSERT_EQUALS(1, member1Status["_id"].Int());
- ASSERT_EQUALS(1, member1Status["health"].Double());
-
- }
-
- Date_t firstRequestDate() {
- return _firstRequestDate;
- }
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(501, 0), 0),
+ Milliseconds(700));
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(501, 0), 0),
+ Milliseconds(600));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(501, 0), 0),
+ Milliseconds(500));
+ heartbeatFromMember(HostAndPort("h4"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(501, 0), 0),
+ Milliseconds(400));
+ heartbeatFromMember(HostAndPort("h5"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(501, 0), 0),
+ Milliseconds(300));
- HostAndPort target() {
- return _target;
- }
+ // This node is lagged further than maxSyncSourceLagSeconds.
+ heartbeatFromMember(HostAndPort("h6"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(499, 0), 0),
+ Milliseconds(200));
- private:
- Date_t _firstRequestDate;
- HostAndPort _target;
-
- };
-
- class HeartbeatResponseTestTwoRetries : public HeartbeatResponseTestOneRetry {
- public:
- virtual void setUp() {
- HeartbeatResponseTestOneRetry::setUp();
- // First retry fails at t + 4500ms
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + Milliseconds(4500), // 4.5 of the 5 seconds elapsed;
- // could retry.
- Milliseconds(400), // Spent 0.4 of the 0.5 seconds in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"),
- OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- // Because the first retry failed without timing out, we expect to retry immediately.
- ASSERT_EQUALS(firstRequestDate() + Milliseconds(4500),
- action.getNextHeartbeatStartDate());
-
- // Second retry prepared at t + 4500ms.
- std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(
- firstRequestDate() + Milliseconds(4500),
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ heartbeatFromMember(HostAndPort("hprimary"),
"rs0",
- target());
- // 500ms left to complete the heartbeat.
- ASSERT_EQUALS(500, request.second.count());
-
- // Ensure a second failed heartbeat did not cause the node to be marked down
- BSONObjBuilder statusBuilder;
- Status resultStatus(ErrorCodes::InternalError,
- "prepareStatusResponse didn't set result");
- getTopoCoord().prepareStatusResponse(cbData(),
- firstRequestDate() + Seconds(4),
- 10,
- OpTime(Timestamp(100,0), 0),
- &statusBuilder,
- &resultStatus);
- ASSERT_OK(resultStatus);
- BSONObj rsStatus = statusBuilder.obj();
- std::vector<BSONElement> memberArray = rsStatus["members"].Array();
- BSONObj member1Status = memberArray[1].Obj();
-
- ASSERT_EQUALS(1, member1Status["_id"].Int());
- ASSERT_EQUALS(1, member1Status["health"].Double());
- }
- };
+ MemberState::RS_PRIMARY,
+ OpTime(Timestamp(600, 0), 0),
+ Milliseconds(100));
+ ASSERT_EQUALS(7, getCurrentPrimaryIndex());
- class HeartbeatResponseHighVerbosityTest : public HeartbeatResponseTest {
- public:
+ // Record 2nd round of pings to allow choosing a new sync source
+ heartbeatFromMember(HostAndPort("h1"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(501, 0), 0),
+ Milliseconds(700));
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(501, 0), 0),
+ Milliseconds(600));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(501, 0), 0),
+ Milliseconds(500));
+ heartbeatFromMember(HostAndPort("h4"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(501, 0), 0),
+ Milliseconds(400));
+ heartbeatFromMember(HostAndPort("h5"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(501, 0), 0),
+ Milliseconds(300));
+ heartbeatFromMember(HostAndPort("h6"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(499, 0), 0),
+ Milliseconds(200));
+ heartbeatFromMember(HostAndPort("hprimary"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ OpTime(Timestamp(600, 0), 0),
+ Milliseconds(100));
- virtual void setUp() {
- HeartbeatResponseTest::setUp();
- // set verbosity as high as the highest verbosity log message we'd like to check for
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- }
+ // Should choose primary first; it's closest
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT_EQUALS(HostAndPort("hprimary"), getTopoCoord().getSyncSourceAddress());
- virtual void tearDown() {
- HeartbeatResponseTest::tearDown();
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
- }
+ // Primary goes far far away
+ heartbeatFromMember(HostAndPort("hprimary"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ OpTime(Timestamp(600, 0), 0),
+ Milliseconds(100000000));
+
+ // Should choose h4. (if an arbiter has an oplog, it's a valid sync source)
+ // h6 is not considered because it is outside the maxSyncLagSeconds window,
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT_EQUALS(HostAndPort("h4"), getTopoCoord().getSyncSourceAddress());
+
+ // h4 goes down; should choose h1
+ receiveDownHeartbeat(HostAndPort("h4"), "rs0", OpTime());
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT_EQUALS(HostAndPort("h1"), getTopoCoord().getSyncSourceAddress());
+
+ // Primary and h1 go down; should choose h6
+ receiveDownHeartbeat(HostAndPort("h1"), "rs0", OpTime());
+ receiveDownHeartbeat(HostAndPort("hprimary"), "rs0", OpTime());
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT_EQUALS(HostAndPort("h6"), getTopoCoord().getSyncSourceAddress());
+
+ // h6 goes down; should choose h5
+ receiveDownHeartbeat(HostAndPort("h6"), "rs0", OpTime());
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT_EQUALS(HostAndPort("h5"), getTopoCoord().getSyncSourceAddress());
+
+ // h5 goes down; should choose h3
+ receiveDownHeartbeat(HostAndPort("h5"), "rs0", OpTime());
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+
+ // h3 goes down; no sync source candidates remain
+ receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT(getTopoCoord().getSyncSourceAddress().empty());
+}
+
+
+TEST_F(TopoCoordTest, ChooseSyncSourceChainingNotAllowed) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "settings" << BSON("chainingAllowed" << false)
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(1, 0), 0),
+ Milliseconds(100));
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(1, 0), 0),
+ Milliseconds(100));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(0, 0), 0),
+ Milliseconds(300));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(0, 0), 0),
+ Milliseconds(300));
- };
+ // No primary situation: should choose no sync source.
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT(getTopoCoord().getSyncSourceAddress().empty());
- TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataNodeBelivesWeAreDown) {
- OpTime lastOpTimeApplied = OpTime(Timestamp(3,0), 0);
+ // Add primary
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ OpTime(Timestamp(0, 0), 0),
+ Milliseconds(300));
+ ASSERT_EQUALS(2, getCurrentPrimaryIndex());
+
+ // h3 is primary and should be chosen as sync source, despite being further away than h2
+ // and the primary (h3) being behind our most recently applied optime
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(Timestamp(10, 0), 0));
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+}
+
+TEST_F(TopoCoordTest, EmptySyncSourceOnPrimary) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(1, 0), 0),
+ Milliseconds(100));
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(1, 0), 0),
+ Milliseconds(100));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(0, 0), 0),
+ Milliseconds(300));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(0, 0), 0),
+ Milliseconds(300));
+
+ // No primary situation: should choose h2 sync source.
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ // Become primary
+ makeSelfPrimary(Timestamp(3.0));
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ // Check sync source
+ ASSERT_EQUALS(HostAndPort(), getTopoCoord().getSyncSourceAddress());
+}
+
+TEST_F(TopoCoordTest, ForceSyncSource) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ // two rounds of heartbeat pings from each member
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(1, 0), 0),
+ Milliseconds(300));
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(1, 0), 0),
+ Milliseconds(300));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(2, 0), 0),
+ Milliseconds(100));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(2, 0), 0),
+ Milliseconds(100));
+
+ // force should overrule other defaults
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+ getTopoCoord().setForceSyncSourceIndex(1);
+ // force should cause shouldChangeSyncSource() to return true
+ // even if the currentSource is the force target
+ ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("h2"), now()));
+ ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("h3"), now()));
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ // force should only work for one call to chooseNewSyncSource
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+}
+
+TEST_F(TopoCoordTest, BlacklistSyncSource) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(1, 0), 0),
+ Milliseconds(300));
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(1, 0), 0),
+ Milliseconds(300));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(2, 0), 0),
+ Milliseconds(100));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(2, 0), 0),
+ Milliseconds(100));
+
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+
+ Date_t expireTime = Date_t::fromMillisSinceEpoch(1000);
+ getTopoCoord().blacklistSyncSource(HostAndPort("h3"), expireTime);
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ // Should choose second best choice now that h3 is blacklisted.
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ // After time has passed, should go back to original sync source
+ getTopoCoord().chooseNewSyncSource(expireTime, OpTime());
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+}
+
+TEST_F(TopoCoordTest, BlacklistSyncSourceNoChaining) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "settings" << BSON("chainingAllowed" << false)
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ OpTime(Timestamp(2, 0), 0),
+ Milliseconds(100));
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ OpTime(Timestamp(2, 0), 0),
+ Milliseconds(100));
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- // request heartbeat
- std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2"));
-
- ReplSetHeartbeatResponse believesWeAreDownResponse;
- believesWeAreDownResponse.noteReplSet();
- believesWeAreDownResponse.setSetName("rs0");
- believesWeAreDownResponse.setState(MemberState::RS_SECONDARY);
- believesWeAreDownResponse.setElectable(true);
- believesWeAreDownResponse.noteStateDisagreement();
- startCapturingLogMessages();
- HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
- now()++, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- HostAndPort("host2"),
- StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse),
- lastOpTimeApplied);
- stopCapturingLogMessages();
- ASSERT_NO_ACTION(action.getAction());
- ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down"));
-
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(2, 0), 0),
+ Milliseconds(100));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(2, 0), 0),
+ Milliseconds(100));
+
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ Date_t expireTime = Date_t::fromMillisSinceEpoch(1000);
+ getTopoCoord().blacklistSyncSource(HostAndPort("h2"), expireTime);
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ // Can't choose any sync source now.
+ ASSERT(getTopoCoord().getSyncSourceAddress().empty());
+
+ // After time has passed, should go back to the primary
+ getTopoCoord().chooseNewSyncSource(expireTime, OpTime());
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+}
+
+TEST_F(TopoCoordTest, OnlyUnauthorizedUpCausesRecovering) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ // Generate enough heartbeats to select a sync source below
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(1, 0), 0),
+ Milliseconds(300));
+ heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(1, 0), 0),
+ Milliseconds(300));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(2, 0), 0),
+ Milliseconds(100));
+ heartbeatFromMember(HostAndPort("h3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(2, 0), 0),
+ Milliseconds(100));
+
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().chooseNewSyncSource(now()++, OpTime()));
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+ // Good state setup done
+
+ // Mark nodes down, ensure that we have no source and are secondary
+ receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime(), ErrorCodes::NetworkTimeout);
+ receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime(), ErrorCodes::NetworkTimeout);
+ ASSERT_TRUE(getTopoCoord().chooseNewSyncSource(now()++, OpTime()).empty());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+
+ // Mark nodes down + unauth, ensure that we have no source and are secondary
+ receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime(), ErrorCodes::NetworkTimeout);
+ receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime(), ErrorCodes::Unauthorized);
+ ASSERT_TRUE(getTopoCoord().chooseNewSyncSource(now()++, OpTime()).empty());
+ ASSERT_EQUALS(MemberState::RS_RECOVERING, getTopoCoord().getMemberState().s);
+
+ // Having an auth error but with another node up should bring us out of RECOVERING
+ HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(),
+ OpTime(Timestamp(2, 0), 0),
+ OpTime(Timestamp(2, 0), 0));
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+ // Test that the heartbeat that brings us from RECOVERING to SECONDARY doesn't initiate
+ // an election (SERVER-17164)
+ ASSERT_NO_ACTION(action.getAction());
+}
+
+TEST_F(TopoCoordTest, ReceiveHeartbeatWhileAbsentFromConfig) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "h1")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ -1);
+ ASSERT_NO_ACTION(heartbeatFromMember(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(Timestamp(1, 0), 0),
+ Milliseconds(300)).getAction());
+}
+
+TEST_F(TopoCoordTest, PrepareSyncFromResponse) {
+ OpTime staleOpTime(Timestamp(1, 1), 0);
+ OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0);
+
+ Status result = Status::OK();
+ BSONObjBuilder response;
+
+ // if we do not have an index in the config, we should get ErrorCodes::NotSecondary
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h1"), ourOpTime, &response, &result);
+ ASSERT_EQUALS(ErrorCodes::NotSecondary, result);
+ ASSERT_EQUALS("Removed and uninitialized nodes do not sync", result.reason());
+
+ // Test trying to sync from another node when we are an arbiter
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "hself"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 1 << "host"
+ << "h1"))),
+ 0);
+
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h1"), ourOpTime, &response, &result);
+ ASSERT_EQUALS(ErrorCodes::NotSecondary, result);
+ ASSERT_EQUALS("arbiters don't sync", result.reason());
+
+ // Set up config for the rest of the tests
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "hself")
+ << BSON("_id" << 1 << "host"
+ << "h1"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "priority" << 0 << "buildIndexes" << false)
+ << BSON("_id" << 3 << "host"
+ << "h3") << BSON("_id" << 4 << "host"
+ << "h4")
+ << BSON("_id" << 5 << "host"
+ << "h5") << BSON("_id" << 6 << "host"
+ << "h6"))),
+ 0);
+
+ // Try to sync while PRIMARY
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary();
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+ getTopoCoord()._setCurrentPrimaryForTest(0);
+ BSONObjBuilder response1;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h3"), ourOpTime, &response1, &result);
+ ASSERT_EQUALS(ErrorCodes::NotSecondary, result);
+ ASSERT_EQUALS("primaries don't sync", result.reason());
+ ASSERT_EQUALS("h3:27017", response1.obj()["syncFromRequested"].String());
+
+ // Try to sync from non-existent member
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ getTopoCoord()._setCurrentPrimaryForTest(-1);
+ BSONObjBuilder response2;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("fakemember"), ourOpTime, &response2, &result);
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, result);
+ ASSERT_EQUALS("Could not find member \"fakemember:27017\" in replica set", result.reason());
+
+ // Try to sync from self
+ BSONObjBuilder response3;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("hself"), ourOpTime, &response3, &result);
+ ASSERT_EQUALS(ErrorCodes::InvalidOptions, result);
+ ASSERT_EQUALS("I cannot sync from myself", result.reason());
+
+ // Try to sync from an arbiter
+ BSONObjBuilder response4;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h1"), ourOpTime, &response4, &result);
+ ASSERT_EQUALS(ErrorCodes::InvalidOptions, result);
+ ASSERT_EQUALS("Cannot sync from \"h1:27017\" because it is an arbiter", result.reason());
+
+ // Try to sync from a node that doesn't build indexes
+ BSONObjBuilder response5;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h2"), ourOpTime, &response5, &result);
+ ASSERT_EQUALS(ErrorCodes::InvalidOptions, result);
+ ASSERT_EQUALS("Cannot sync from \"h2:27017\" because it does not build indexes",
+ result.reason());
+
+ // Try to sync from a member that is down
+ receiveDownHeartbeat(HostAndPort("h4"), "rs0", OpTime());
+
+ BSONObjBuilder response7;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h4"), ourOpTime, &response7, &result);
+ ASSERT_EQUALS(ErrorCodes::HostUnreachable, result);
+ ASSERT_EQUALS("I cannot reach the requested member: h4:27017", result.reason());
+
+ // Sync successfully from a member that is stale
+ heartbeatFromMember(
+ HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY, staleOpTime, Milliseconds(100));
+
+ BSONObjBuilder response8;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h5"), ourOpTime, &response8, &result);
+ ASSERT_OK(result);
+ ASSERT_EQUALS("requested member \"h5:27017\" is more than 10 seconds behind us",
+ response8.obj()["warning"].String());
+ getTopoCoord().chooseNewSyncSource(now()++, ourOpTime);
+ ASSERT_EQUALS(HostAndPort("h5"), getTopoCoord().getSyncSourceAddress());
+
+ // Sync successfully from an up-to-date member
+ heartbeatFromMember(
+ HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY, ourOpTime, Milliseconds(100));
+
+ BSONObjBuilder response9;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h6"), ourOpTime, &response9, &result);
+ ASSERT_OK(result);
+ BSONObj response9Obj = response9.obj();
+ ASSERT_FALSE(response9Obj.hasField("warning"));
+ ASSERT_EQUALS(HostAndPort("h5").toString(), response9Obj["prevSyncTarget"].String());
+ getTopoCoord().chooseNewSyncSource(now()++, ourOpTime);
+ ASSERT_EQUALS(HostAndPort("h6"), getTopoCoord().getSyncSourceAddress());
+
+ // node goes down between forceSync and chooseNewSyncSource
+ BSONObjBuilder response10;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h6"), ourOpTime, &response10, &result);
+ BSONObj response10Obj = response10.obj();
+ ASSERT_FALSE(response10Obj.hasField("warning"));
+ ASSERT_EQUALS(HostAndPort("h6").toString(), response10Obj["prevSyncTarget"].String());
+ receiveDownHeartbeat(HostAndPort("h6"), "rs0", OpTime());
+ HostAndPort syncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(HostAndPort("h6"), syncSource);
+
+ // Try to sync from a member that is unauth'd
+ receiveDownHeartbeat(HostAndPort("h5"), "rs0", OpTime(), ErrorCodes::Unauthorized);
+
+ BSONObjBuilder response11;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h5"), ourOpTime, &response11, &result);
+ ASSERT_NOT_OK(result);
+ ASSERT_EQUALS(ErrorCodes::Unauthorized, result.code());
+ ASSERT_EQUALS("not authorized to communicate with h5:27017", result.reason());
+
+ // Sync successfully from an up-to-date member.
+ heartbeatFromMember(
+ HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY, ourOpTime, Milliseconds(100));
+ BSONObjBuilder response12;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h6"), ourOpTime, &response12, &result);
+ ASSERT_OK(result);
+ syncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+ ASSERT_EQUALS(HostAndPort("h6"), syncSource);
+}
+
+TEST_F(TopoCoordTest, ReplSetGetStatus) {
+ // This test starts by configuring a TopologyCoordinator as a member of a 4 node replica
+ // set, with each node in a different state.
+ // The first node is DOWN, as if we tried heartbeating them and it failed in some way.
+ // The second node is in state SECONDARY, as if we've received a valid heartbeat from them.
+ // The third node is in state UNKNOWN, as if we've not yet had any heartbeating activity
+ // with them yet. The fourth node is PRIMARY and corresponds to ourself, which gets its
+ // information for replSetGetStatus from a different source than the nodes that aren't
+ // ourself. After this setup, we call prepareStatusResponse and make sure that the fields
+ // returned for each member match our expectations.
+ Date_t startupTime = Date_t::fromMillisSinceEpoch(100);
+ Date_t heartbeatTime = Date_t::fromMillisSinceEpoch(5000);
+ Seconds uptimeSecs(10);
+ Date_t curTime = heartbeatTime + uptimeSecs;
+ Timestamp electionTime(1, 2);
+ OpTime oplogProgress(Timestamp(3, 4), 0);
+ std::string setName = "mySet";
+
+ ReplSetHeartbeatResponse hb;
+ hb.setConfigVersion(1);
+ hb.setState(MemberState::RS_SECONDARY);
+ hb.setElectionTime(electionTime);
+ hb.setHbMsg("READY");
+ hb.setOpTime(oplogProgress);
+ StatusWith<ReplSetHeartbeatResponse> hbResponseGood = StatusWith<ReplSetHeartbeatResponse>(hb);
+
+ updateConfig(
+ BSON("_id" << setName << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test0:1234")
+ << BSON("_id" << 1 << "host"
+ << "test1:1234") << BSON("_id" << 2 << "host"
+ << "test2:1234")
+ << BSON("_id" << 3 << "host"
+ << "test3:1234"))),
+ 3,
+ startupTime + Milliseconds(1));
+
+ // Now that the replica set is setup, put the members into the states we want them in.
+ HostAndPort member = HostAndPort("test0:1234");
+ getTopoCoord().prepareHeartbeatRequest(startupTime + Milliseconds(1), setName, member);
+ getTopoCoord().processHeartbeatResponse(
+ startupTime + Milliseconds(2), Milliseconds(1), member, hbResponseGood, OpTime());
+ getTopoCoord().prepareHeartbeatRequest(startupTime + Milliseconds(3), setName, member);
+ Date_t timeoutTime =
+ startupTime + Milliseconds(3) + ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod;
+
+ StatusWith<ReplSetHeartbeatResponse> hbResponseDown =
+ StatusWith<ReplSetHeartbeatResponse>(Status(ErrorCodes::HostUnreachable, ""));
+
+ getTopoCoord().processHeartbeatResponse(
+ timeoutTime, Milliseconds(5000), member, hbResponseDown, OpTime());
+
+ member = HostAndPort("test1:1234");
+ getTopoCoord().prepareHeartbeatRequest(startupTime + Milliseconds(2), setName, member);
+ getTopoCoord().processHeartbeatResponse(
+ heartbeatTime, Milliseconds(4000), member, hbResponseGood, OpTime());
+ makeSelfPrimary();
+
+ // Now node 0 is down, node 1 is up, and for node 2 we have no heartbeat data yet.
+ BSONObjBuilder statusBuilder;
+ Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
+ getTopoCoord().prepareStatusResponse(
+ cbData(), curTime, uptimeSecs.count(), oplogProgress, &statusBuilder, &resultStatus);
+ ASSERT_OK(resultStatus);
+ BSONObj rsStatus = statusBuilder.obj();
+
+ // Test results for all non-self members
+ ASSERT_EQUALS(setName, rsStatus["set"].String());
+ ASSERT_EQUALS(curTime.asInt64(), rsStatus["date"].Date().asInt64());
+ std::vector<BSONElement> memberArray = rsStatus["members"].Array();
+ ASSERT_EQUALS(4U, memberArray.size());
+ BSONObj member0Status = memberArray[0].Obj();
+ BSONObj member1Status = memberArray[1].Obj();
+ BSONObj member2Status = memberArray[2].Obj();
+
+ // Test member 0, the node that's DOWN
+ ASSERT_EQUALS(0, member0Status["_id"].numberInt());
+ ASSERT_EQUALS("test0:1234", member0Status["name"].str());
+ ASSERT_EQUALS(0, member0Status["health"].numberDouble());
+ ASSERT_EQUALS(MemberState::RS_DOWN, member0Status["state"].numberInt());
+ ASSERT_EQUALS("(not reachable/healthy)", member0Status["stateStr"].str());
+ ASSERT_EQUALS(0, member0Status["uptime"].numberInt());
+ ASSERT_EQUALS(Timestamp(), Timestamp(member0Status["optime"]["ts"].timestampValue()));
+ ASSERT_TRUE(member0Status.hasField("optimeDate"));
+ ASSERT_EQUALS(Date_t::fromMillisSinceEpoch(Timestamp().getSecs() * 1000ULL),
+ member0Status["optimeDate"].Date());
+ ASSERT_EQUALS(timeoutTime, member0Status["lastHeartbeat"].date());
+ ASSERT_EQUALS(Date_t(), member0Status["lastHeartbeatRecv"].date());
+
+ // Test member 1, the node that's SECONDARY
+ ASSERT_EQUALS(1, member1Status["_id"].Int());
+ ASSERT_EQUALS("test1:1234", member1Status["name"].String());
+ ASSERT_EQUALS(1, member1Status["health"].Double());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, member1Status["state"].numberInt());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ member1Status["stateStr"].String());
+ ASSERT_EQUALS(uptimeSecs.count(), member1Status["uptime"].numberInt());
+ ASSERT_EQUALS(oplogProgress.getTimestamp(),
+ Timestamp(member1Status["optime"]["ts"].timestampValue()));
+ ASSERT_TRUE(member1Status.hasField("optimeDate"));
+ ASSERT_EQUALS(Date_t::fromMillisSinceEpoch(oplogProgress.getSecs() * 1000ULL),
+ member1Status["optimeDate"].Date());
+ ASSERT_EQUALS(heartbeatTime, member1Status["lastHeartbeat"].date());
+ ASSERT_EQUALS(Date_t(), member1Status["lastHeartbeatRecv"].date());
+ ASSERT_EQUALS("READY", member1Status["lastHeartbeatMessage"].str());
+
+ // Test member 2, the node that's UNKNOWN
+ ASSERT_EQUALS(2, member2Status["_id"].numberInt());
+ ASSERT_EQUALS("test2:1234", member2Status["name"].str());
+ ASSERT_EQUALS(-1, member2Status["health"].numberDouble());
+ ASSERT_EQUALS(MemberState::RS_UNKNOWN, member2Status["state"].numberInt());
+ ASSERT_EQUALS(MemberState(MemberState::RS_UNKNOWN).toString(), member2Status["stateStr"].str());
+ ASSERT_TRUE(member2Status.hasField("uptime"));
+ ASSERT_TRUE(member2Status.hasField("optime"));
+ ASSERT_TRUE(member2Status.hasField("optimeDate"));
+ ASSERT_FALSE(member2Status.hasField("lastHearbeat"));
+ ASSERT_FALSE(member2Status.hasField("lastHearbeatRecv"));
+
+ // Now test results for ourself, the PRIMARY
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, rsStatus["myState"].numberInt());
+ BSONObj selfStatus = memberArray[3].Obj();
+ ASSERT_TRUE(selfStatus["self"].boolean());
+ ASSERT_EQUALS(3, selfStatus["_id"].numberInt());
+ ASSERT_EQUALS("test3:1234", selfStatus["name"].str());
+ ASSERT_EQUALS(1, selfStatus["health"].numberDouble());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, selfStatus["state"].numberInt());
+ ASSERT_EQUALS(MemberState(MemberState::RS_PRIMARY).toString(), selfStatus["stateStr"].str());
+ ASSERT_EQUALS(uptimeSecs.count(), selfStatus["uptime"].numberInt());
+ ASSERT_EQUALS(oplogProgress.getTimestamp(),
+ Timestamp(selfStatus["optime"]["ts"].timestampValue()));
+ ASSERT_TRUE(selfStatus.hasField("optimeDate"));
+ ASSERT_EQUALS(Date_t::fromMillisSinceEpoch(oplogProgress.getSecs() * 1000ULL),
+ selfStatus["optimeDate"].Date());
+
+ // TODO(spencer): Test electionTime and pingMs are set properly
+}
+
+TEST_F(TopoCoordTest, ReplSetGetStatusFails) {
+ // This test starts by configuring a TopologyCoordinator to NOT be a member of a 3 node
+ // replica set. Then running prepareStatusResponse should fail.
+ Date_t startupTime = Date_t::fromMillisSinceEpoch(100);
+ Date_t heartbeatTime = Date_t::fromMillisSinceEpoch(5000);
+ Seconds uptimeSecs(10);
+ Date_t curTime = heartbeatTime + uptimeSecs;
+ OpTime oplogProgress(Timestamp(3, 4), 0);
+ std::string setName = "mySet";
+
+ updateConfig(
+ BSON("_id" << setName << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test0:1234")
+ << BSON("_id" << 1 << "host"
+ << "test1:1234") << BSON("_id" << 2 << "host"
+ << "test2:1234"))),
+ -1, // This one is not part of the replica set.
+ startupTime + Milliseconds(1));
+
+ BSONObjBuilder statusBuilder;
+ Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
+ getTopoCoord().prepareStatusResponse(
+ cbData(), curTime, uptimeSecs.count(), oplogProgress, &statusBuilder, &resultStatus);
+ ASSERT_NOT_OK(resultStatus);
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, resultStatus);
+}
+
+TEST_F(TopoCoordTest, PrepareFreshResponse) {
+ ReplicationCoordinator::ReplSetFreshArgs args;
+ OpTime freshestOpTime(Timestamp(15, 10), 0);
+ OpTime ourOpTime(Timestamp(10, 10), 0);
+ OpTime staleOpTime(Timestamp(1, 1), 0);
+ Status internalErrorStatus(ErrorCodes::InternalError, "didn't set status");
+
+ // if we do not have an index in the config, we should get ErrorCodes::ReplicaSetNotFound
+ BSONObjBuilder responseBuilder;
+ Status status = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status);
+ ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status);
+ ASSERT_EQUALS("Cannot participate in elections because not initialized", status.reason());
+ ASSERT_TRUE(responseBuilder.obj().isEmpty());
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 10 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself"
+ << "priority" << 10)
+ << BSON("_id" << 20 << "host"
+ << "h1") << BSON("_id" << 30 << "host"
+ << "h2")
+ << BSON("_id" << 40 << "host"
+ << "h3"
+ << "priority" << 10))),
+ 0);
+
+ // Test with incorrect replset name
+ args.setName = "fakeset";
+
+ BSONObjBuilder responseBuilder0;
+ Status status0 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder0, &status0);
+ ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status0);
+ ASSERT_TRUE(responseBuilder0.obj().isEmpty());
+
+ heartbeatFromMember(HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
+
+ // Test with old config version
+ args.setName = "rs0";
+ args.cfgver = 5;
+ args.id = 20;
+ args.who = HostAndPort("h1");
+ args.opTime = ourOpTime.getTimestamp();
+
+ BSONObjBuilder responseBuilder1;
+ Status status1 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder1, &status1);
+ ASSERT_OK(status1);
+ BSONObj response1 = responseBuilder1.obj();
+ ASSERT_EQUALS("config version stale", response1["info"].String());
+ ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response1["opTime"].timestampValue()));
+ ASSERT_TRUE(response1["fresher"].Bool());
+ ASSERT_FALSE(response1["veto"].Bool());
+ ASSERT_FALSE(response1.hasField("errmsg"));
+
+ // Test with non-existent node.
+ args.cfgver = 10;
+ args.id = 0;
+ args.who = HostAndPort("fakenode");
+
+ BSONObjBuilder responseBuilder2;
+ Status status2 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder2, &status2);
+ ASSERT_OK(status2);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response2["opTime"].timestampValue()));
+ ASSERT_FALSE(response2["fresher"].Bool());
+ ASSERT_TRUE(response2["veto"].Bool());
+ ASSERT_EQUALS("replSet couldn't find member with id 0", response2["errmsg"].String());
+
+
+ // Test when we are primary.
+ args.id = 20;
+ args.who = HostAndPort("h1");
+
+ makeSelfPrimary();
+
+ BSONObjBuilder responseBuilder3;
+ Status status3 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder3, &status3);
+ ASSERT_OK(status3);
+ BSONObj response3 = responseBuilder3.obj();
+ ASSERT_FALSE(response3.hasField("info"));
+ ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response3["opTime"].timestampValue()));
+ ASSERT_FALSE(response3["fresher"].Bool());
+ ASSERT_TRUE(response3["veto"].Bool());
+ ASSERT_EQUALS("I am already primary, h1:27017 can try again once I've stepped down",
+ response3["errmsg"].String());
+
+
+ // Test when someone else is primary.
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ getTopoCoord()._setCurrentPrimaryForTest(2);
+
+ BSONObjBuilder responseBuilder4;
+ Status status4 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder4, &status4);
+ ASSERT_OK(status4);
+ BSONObj response4 = responseBuilder4.obj();
+ ASSERT_FALSE(response4.hasField("info"));
+ ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response4["opTime"].timestampValue()));
+ ASSERT_FALSE(response4["fresher"].Bool());
+ ASSERT_TRUE(response4["veto"].Bool());
+ ASSERT_EQUALS(
+ "h1:27017 is trying to elect itself but h2:27017 is already primary and more "
+ "up-to-date",
+ response4["errmsg"].String());
+
+
+ // Test trying to elect a node that is caught up but isn't the highest priority node.
+ heartbeatFromMember(HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, staleOpTime);
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
+
+ BSONObjBuilder responseBuilder5;
+ Status status5 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder5, &status5);
+ ASSERT_OK(status5);
+ BSONObj response5 = responseBuilder5.obj();
+ ASSERT_FALSE(response5.hasField("info"));
+ ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response5["opTime"].timestampValue()));
+ ASSERT_FALSE(response5["fresher"].Bool());
+ ASSERT_TRUE(response5["veto"].Bool());
+ ASSERT(response5["errmsg"].String().find("h1:27017 has lower priority of 1 than") !=
+ std::string::npos)
+ << response5["errmsg"].String();
+
+ // Test trying to elect a node that isn't electable because its down
+ args.id = 40;
+ args.who = HostAndPort("h3");
+
+ receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
+
+ BSONObjBuilder responseBuilder6;
+ Status status6 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder6, &status6);
+ ASSERT_OK(status6);
+ BSONObj response6 = responseBuilder6.obj();
+ ASSERT_FALSE(response6.hasField("info"));
+ ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response6["opTime"].timestampValue()));
+ ASSERT_FALSE(response6["fresher"].Bool());
+ ASSERT_TRUE(response6["veto"].Bool());
+ ASSERT_NE(std::string::npos,
+ response6["errmsg"].String().find(
+ "I don't think h3:27017 is electable because the member is not "
+ "currently a secondary"))
+ << response6["errmsg"].String();
+
+ // Test trying to elect a node that isn't electable because it's PRIMARY
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_PRIMARY, ourOpTime);
+ ASSERT_EQUALS(3, getCurrentPrimaryIndex());
+
+ BSONObjBuilder responseBuilder7;
+ Status status7 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder7, &status7);
+ ASSERT_OK(status7);
+ BSONObj response7 = responseBuilder7.obj();
+ ASSERT_FALSE(response7.hasField("info"));
+ ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response7["opTime"].timestampValue()));
+ ASSERT_FALSE(response7["fresher"].Bool());
+ ASSERT_TRUE(response7["veto"].Bool());
+ ASSERT_NE(std::string::npos,
+ response7["errmsg"].String().find(
+ "I don't think h3:27017 is electable because the member is not "
+ "currently a secondary"))
+ << response7["errmsg"].String();
+
+ // Test trying to elect a node that isn't electable because it's STARTUP
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_STARTUP, ourOpTime);
+
+ BSONObjBuilder responseBuilder8;
+ Status status8 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder8, &status8);
+ ASSERT_OK(status8);
+ BSONObj response8 = responseBuilder8.obj();
+ ASSERT_FALSE(response8.hasField("info"));
+ ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response8["opTime"].timestampValue()));
+ ASSERT_FALSE(response8["fresher"].Bool());
+ ASSERT_TRUE(response8["veto"].Bool());
+ ASSERT_NE(std::string::npos,
+ response8["errmsg"].String().find(
+ "I don't think h3:27017 is electable because the member is not "
+ "currently a secondary"))
+ << response8["errmsg"].String();
+
+ // Test trying to elect a node that isn't electable because it's RECOVERING
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_RECOVERING, ourOpTime);
+
+ BSONObjBuilder responseBuilder9;
+ Status status9 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder9, &status9);
+ ASSERT_OK(status9);
+ BSONObj response9 = responseBuilder9.obj();
+ ASSERT_FALSE(response9.hasField("info"));
+ ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response9["opTime"].timestampValue()));
+ ASSERT_FALSE(response9["fresher"].Bool());
+ ASSERT_TRUE(response9["veto"].Bool());
+ ASSERT_NE(std::string::npos,
+ response9["errmsg"].String().find(
+ "I don't think h3:27017 is electable because the member is not "
+ "currently a secondary"))
+ << response9["errmsg"].String();
+
+ // Test trying to elect a node that is fresher but lower priority than the existing primary
+ args.id = 30;
+ args.who = HostAndPort("h2");
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_PRIMARY, ourOpTime);
+ ASSERT_EQUALS(3, getCurrentPrimaryIndex());
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, freshestOpTime);
+
+ BSONObjBuilder responseBuilder10;
+ Status status10 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder10, &status10);
+ ASSERT_OK(status10);
+ BSONObj response10 = responseBuilder10.obj();
+ ASSERT_FALSE(response10.hasField("info"));
+ ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response10["opTime"].timestampValue()));
+ ASSERT_TRUE(response10["fresher"].Bool());
+ ASSERT_TRUE(response10["veto"].Bool());
+ ASSERT_TRUE(response10.hasField("errmsg"));
+
+
+ // Test trying to elect a valid node
+ args.id = 40;
+ args.who = HostAndPort("h3");
+
+ receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime());
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
+
+ BSONObjBuilder responseBuilder11;
+ Status status11 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder11, &status11);
+ ASSERT_OK(status11);
+ BSONObj response11 = responseBuilder11.obj();
+ ASSERT_FALSE(response11.hasField("info")) << response11.toString();
+ ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response11["opTime"].timestampValue()));
+ ASSERT_FALSE(response11["fresher"].Bool()) << response11.toString();
+ ASSERT_FALSE(response11["veto"].Bool()) << response11.toString();
+ ASSERT_FALSE(response11.hasField("errmsg")) << response11.toString();
+
+ // Test with our id
+ args.id = 10;
+ BSONObjBuilder responseBuilder12;
+ Status status12 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder12, &status12);
+ ASSERT_EQUALS(ErrorCodes::BadValue, status12);
+ ASSERT_EQUALS(
+ "Received replSetFresh command from member with the same member ID as ourself: 10",
+ status12.reason());
+ ASSERT_TRUE(responseBuilder12.obj().isEmpty());
+}
+
+class HeartbeatResponseTest : public TopoCoordTest {
+public:
+ virtual void setUp() {
+ TopoCoordTest::setUp();
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))
+ << "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
}
+};
+
+class HeartbeatResponseTestOneRetry : public HeartbeatResponseTest {
+public:
+ virtual void setUp() {
+ HeartbeatResponseTest::setUp();
+
+ // Bring up the node we are heartbeating.
+ _target = HostAndPort("host2", 27017);
+ Date_t _upRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T12:55Z"));
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> uppingRequest =
+ getTopoCoord().prepareHeartbeatRequest(_upRequestDate, "rs0", _target);
+ HeartbeatResponseAction upAction = getTopoCoord().processHeartbeatResponse(
+ _upRequestDate,
+ Milliseconds(0),
+ _target,
+ makeStatusWith<ReplSetHeartbeatResponse>(),
+ OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, upAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataMemberNotInConfig) {
- OpTime lastOpTimeApplied = OpTime(Timestamp(3,0), 0);
-
- // request heartbeat
- std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host5"));
-
- ReplSetHeartbeatResponse memberMissingResponse;
- memberMissingResponse.noteReplSet();
- memberMissingResponse.setSetName("rs0");
- memberMissingResponse.setState(MemberState::RS_SECONDARY);
- memberMissingResponse.setElectable(true);
- memberMissingResponse.noteStateDisagreement();
- startCapturingLogMessages();
- HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
- now()++, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- HostAndPort("host5"),
- StatusWith<ReplSetHeartbeatResponse>(memberMissingResponse),
- lastOpTimeApplied);
- stopCapturingLogMessages();
- ASSERT_NO_ACTION(action.getAction());
- ASSERT_EQUALS(1, countLogLinesContaining("Could not find host5:27017 in current config"));
- }
- TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataSameConfig) {
- OpTime lastOpTimeApplied = OpTime(Timestamp(3,0), 0);
+ // Time of first request for this heartbeat period
+ _firstRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T13:00Z"));
- // request heartbeat
+ // Initial heartbeat attempt prepared, at t + 0.
std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2"));
-
- // construct a copy of the original config for log message checking later
- // see HeartbeatResponseTest for the origin of the original config
- ReplicaSetConfig originalConfig;
- originalConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)));
-
- ReplSetHeartbeatResponse sameConfigResponse;
- sameConfigResponse.noteReplSet();
- sameConfigResponse.setSetName("rs0");
- sameConfigResponse.setState(MemberState::RS_SECONDARY);
- sameConfigResponse.setElectable(true);
- sameConfigResponse.noteStateDisagreement();
- sameConfigResponse.setConfigVersion(2);
- sameConfigResponse.setConfig(originalConfig);
- startCapturingLogMessages();
- HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
- now()++, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- HostAndPort("host2"),
- StatusWith<ReplSetHeartbeatResponse>(sameConfigResponse),
- lastOpTimeApplied);
- stopCapturingLogMessages();
- ASSERT_NO_ACTION(action.getAction());
- ASSERT_EQUALS(1, countLogLinesContaining("Config from heartbeat response was "
- "same as ours."));
- }
-
- TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataOldConfig) {
- OpTime lastOpTimeApplied = OpTime(Timestamp(3,0), 0);
+ getTopoCoord().prepareHeartbeatRequest(_firstRequestDate, "rs0", _target);
+ // 5 seconds to successfully complete the heartbeat before the timeout expires.
+ ASSERT_EQUALS(5000, request.second.count());
- // request heartbeat
- std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2"));
-
- ReplSetHeartbeatResponse believesWeAreDownResponse;
- believesWeAreDownResponse.noteReplSet();
- believesWeAreDownResponse.setSetName("rs0");
- believesWeAreDownResponse.setState(MemberState::RS_SECONDARY);
- believesWeAreDownResponse.setElectable(true);
- believesWeAreDownResponse.noteStateDisagreement();
- startCapturingLogMessages();
+ // Initial heartbeat request fails at t + 4000ms
HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
- now()++, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- HostAndPort("host2"),
- StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse),
- lastOpTimeApplied);
- stopCapturingLogMessages();
- ASSERT_NO_ACTION(action.getAction());
- ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down"));
-
- }
+ _firstRequestDate + Seconds(4), // 4 seconds elapsed, retry allowed.
+ Milliseconds(3990), // Spent 3.99 of the 4 seconds in the network.
+ _target,
+ StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"),
+ OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
- TEST_F(HeartbeatResponseTestOneRetry, DecideToReconfig) {
- // Confirm that action responses can come back from retries; in this, expect a Reconfig
- // action.
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 7 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017") <<
- BSON("_id" << 3 << "host" << "host4:27017")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5))));
- ASSERT_OK(newConfig.validate());
-
- ReplSetHeartbeatResponse reconfigResponse;
- reconfigResponse.noteReplSet();
- reconfigResponse.setSetName("rs0");
- reconfigResponse.setState(MemberState::RS_SECONDARY);
- reconfigResponse.setElectable(true);
- reconfigResponse.setConfigVersion(7);
- reconfigResponse.setConfig(newConfig);
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + Milliseconds(4500), // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(reconfigResponse),
- OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::Reconfig, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate());
- }
-
- TEST_F(HeartbeatResponseTestOneRetry, DecideToStepDownRemotePrimary) {
- // Confirm that action responses can come back from retries; in this, expect a
- // StepDownRemotePrimary action.
-
- // make self primary
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(Timestamp(5,0));
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- ReplSetHeartbeatResponse electedMoreRecentlyResponse;
- electedMoreRecentlyResponse.noteReplSet();
- electedMoreRecentlyResponse.setSetName("rs0");
- electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
- electedMoreRecentlyResponse.setElectable(true);
- electedMoreRecentlyResponse.setElectionTime(Timestamp(3,0));
- electedMoreRecentlyResponse.setConfigVersion(5);
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + Milliseconds(4500), // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
- OpTime()); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, action.getAction());
- ASSERT_EQUALS(1, action.getPrimaryConfigIndex());
- ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate());
- }
-
- TEST_F(HeartbeatResponseTestOneRetry, DecideToStepDownSelf) {
- // Confirm that action responses can come back from retries; in this, expect a StepDownSelf
- // action.
-
- // acknowledge the other member so that we see a majority
- HeartbeatResponseAction action = receiveDownHeartbeat(HostAndPort("host3"),
- "rs0",
- OpTime(Timestamp(100, 0), 0));
- ASSERT_NO_ACTION(action.getAction());
-
- // make us PRIMARY
- makeSelfPrimary();
-
- ReplSetHeartbeatResponse electedMoreRecentlyResponse;
- electedMoreRecentlyResponse.noteReplSet();
- electedMoreRecentlyResponse.setSetName("rs0");
- electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
- electedMoreRecentlyResponse.setElectable(false);
- electedMoreRecentlyResponse.setElectionTime(Timestamp(10,0));
- electedMoreRecentlyResponse.setConfigVersion(5);
- action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + Milliseconds(4500), // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
- OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, action.getAction());
- ASSERT_EQUALS(0, action.getPrimaryConfigIndex());
- ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate());
- // Doesn't actually do the stepdown until stepDownIfPending is called
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- ASSERT_TRUE(getTopoCoord().stepDownIfPending());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- }
-
- TEST_F(HeartbeatResponseTestOneRetry, DecideToStartElection) {
- // Confirm that action responses can come back from retries; in this, expect a StartElection
- // action.
-
- // acknowledge the other member so that we see a majority
- OpTime election = OpTime(Timestamp(400,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(300,0), 0);
- HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(action.getAction());
-
- // make sure we are electable
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- ReplSetHeartbeatResponse startElectionResponse;
- startElectionResponse.noteReplSet();
- startElectionResponse.setSetName("rs0");
- startElectionResponse.setState(MemberState::RS_SECONDARY);
- startElectionResponse.setElectable(true);
- startElectionResponse.setConfigVersion(5);
- action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + Milliseconds(4500), // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(startElectionResponse),
- election);
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate());
- }
-
- TEST_F(HeartbeatResponseTestTwoRetries, HeartbeatRetriesAtMostTwice) {
- // Confirm that the topology coordinator attempts to retry a failed heartbeat two times
- // after initial failure, assuming that the heartbeat timeout (set to 5 seconds in the
- // fixture) has not expired.
- //
- // Failed heartbeats propose taking no action, other than scheduling the next heartbeat. We
- // can detect a retry vs the next regularly scheduled heartbeat because retries are
- // scheduled immediately, while subsequent heartbeats are scheduled after the hard-coded
- // heartbeat interval of 2 seconds.
-
- // Second retry fails at t + 4800ms
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + Milliseconds(4800), // 4.8 of the 5 seconds elapsed;
- // could still retry.
- Milliseconds(100), // Spent 0.1 of the 0.3 seconds in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"),
- OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- // Because this is the second retry, rather than retry again, we expect to wait for the
- // heartbeat interval of 2 seconds to elapse.
- ASSERT_EQUALS(firstRequestDate() + Milliseconds(6800), action.getNextHeartbeatStartDate());
+ // Because the heartbeat failed without timing out, we expect to retry immediately.
+ ASSERT_EQUALS(_firstRequestDate + Seconds(4), action.getNextHeartbeatStartDate());
- // Ensure a third failed heartbeat caused the node to be marked down
+ // First heartbeat retry prepared, at t + 4000ms.
+ request = getTopoCoord().prepareHeartbeatRequest(
+ _firstRequestDate + Milliseconds(4000), "rs0", _target);
+ // One second left to complete the heartbeat.
+ ASSERT_EQUALS(1000, request.second.count());
+
+ // Ensure a single failed heartbeat did not cause the node to be marked down
BSONObjBuilder statusBuilder;
- Status resultStatus(ErrorCodes::InternalError,
- "prepareStatusResponse didn't set result");
+ Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
getTopoCoord().prepareStatusResponse(cbData(),
- firstRequestDate() + Milliseconds(4900),
+ _firstRequestDate + Milliseconds(4000),
10,
- OpTime(Timestamp(100,0), 0),
+ OpTime(Timestamp(100, 0), 0),
&statusBuilder,
&resultStatus);
ASSERT_OK(resultStatus);
@@ -1683,209 +1427,53 @@ namespace {
BSONObj member1Status = memberArray[1].Obj();
ASSERT_EQUALS(1, member1Status["_id"].Int());
- ASSERT_EQUALS(0, member1Status["health"].Double());
- }
-
- TEST_F(HeartbeatResponseTestTwoRetries, DecideToStepDownRemotePrimary) {
- // Confirm that action responses can come back from retries; in this, expect a
- // StepDownRemotePrimary action.
-
- // make self primary
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(Timestamp(5,0));
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- ReplSetHeartbeatResponse electedMoreRecentlyResponse;
- electedMoreRecentlyResponse.noteReplSet();
- electedMoreRecentlyResponse.setSetName("rs0");
- electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
- electedMoreRecentlyResponse.setElectable(true);
- electedMoreRecentlyResponse.setElectionTime(Timestamp(3,0));
- electedMoreRecentlyResponse.setConfigVersion(5);
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + Milliseconds(5000), // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
- OpTime()); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, action.getAction());
- ASSERT_EQUALS(1, action.getPrimaryConfigIndex());
- ASSERT_EQUALS(firstRequestDate() + Milliseconds(7000), action.getNextHeartbeatStartDate());
- }
-
- TEST_F(HeartbeatResponseTestTwoRetries, DecideToStepDownSelf) {
- // Confirm that action responses can come back from retries; in this, expect a StepDownSelf
- // action.
-
- // acknowledge the other member so that we see a majority
- HeartbeatResponseAction action = receiveDownHeartbeat(HostAndPort("host3"),
- "rs0",
- OpTime(Timestamp(100, 0), 0));
- ASSERT_NO_ACTION(action.getAction());
-
- // make us PRIMARY
- makeSelfPrimary();
-
- ReplSetHeartbeatResponse electedMoreRecentlyResponse;
- electedMoreRecentlyResponse.noteReplSet();
- electedMoreRecentlyResponse.setSetName("rs0");
- electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
- electedMoreRecentlyResponse.setElectable(false);
- electedMoreRecentlyResponse.setElectionTime(Timestamp(10,0));
- electedMoreRecentlyResponse.setConfigVersion(5);
- action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + Milliseconds(5000), // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
- OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, action.getAction());
- ASSERT_EQUALS(0, action.getPrimaryConfigIndex());
- ASSERT_EQUALS(firstRequestDate() + Milliseconds(7000), action.getNextHeartbeatStartDate());
- // Doesn't actually do the stepdown until stepDownIfPending is called
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- ASSERT_TRUE(getTopoCoord().stepDownIfPending());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- }
-
- TEST_F(HeartbeatResponseTestTwoRetries, DecideToStartElection) {
- // Confirm that action responses can come back from retries; in this, expect a StartElection
- // action.
-
- // acknowledge the other member so that we see a majority
- OpTime election = OpTime(Timestamp(400,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(300,0), 0);
- HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(action.getAction());
-
- // make sure we are electable
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- ReplSetHeartbeatResponse startElectionResponse;
- startElectionResponse.noteReplSet();
- startElectionResponse.setSetName("rs0");
- startElectionResponse.setState(MemberState::RS_SECONDARY);
- startElectionResponse.setElectable(true);
- startElectionResponse.setConfigVersion(5);
- action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + Milliseconds(5000), // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(startElectionResponse),
- election);
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(firstRequestDate() + Milliseconds(7000), action.getNextHeartbeatStartDate());
+ ASSERT_EQUALS(1, member1Status["health"].Double());
}
- TEST_F(HeartbeatResponseTest, HeartbeatTimeoutSuppressesFirstRetry) {
- // Confirm that the topology coordinator does not schedule an immediate heartbeat retry if
- // the heartbeat timeout period expired before the initial request completed.
-
- HostAndPort target("host2", 27017);
- Date_t firstRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T13:00Z"));
-
- // Initial heartbeat request prepared, at t + 0.
- std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(firstRequestDate,
- "rs0",
- target);
- // 5 seconds to successfully complete the heartbeat before the timeout expires.
- ASSERT_EQUALS(5000, request.second.count());
-
- // Initial heartbeat request fails at t + 5000ms
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate + Milliseconds(5000), // Entire heartbeat period elapsed;
- // no retry allowed.
- Milliseconds(4990), // Spent 4.99 of the 4 seconds in the network.
- target,
- StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit,
- "Took too long"),
- OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
-
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- // Because the heartbeat timed out, we'll retry in 2 seconds.
- ASSERT_EQUALS(firstRequestDate + Milliseconds(7000), action.getNextHeartbeatStartDate());
+ Date_t firstRequestDate() {
+ return _firstRequestDate;
}
- TEST_F(HeartbeatResponseTestOneRetry, HeartbeatTimeoutSuppressesSecondRetry) {
- // Confirm that the topology coordinator does not schedule an second heartbeat retry if
- // the heartbeat timeout period expired before the first retry completed.
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + Milliseconds(5010), // Entire heartbeat period elapsed;
- // no retry allowed.
- Milliseconds(1000), // Spent 1 of the 1.01 seconds in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit,
- "Took too long"),
- OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
-
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- // Because the heartbeat timed out, we'll retry in 2 seconds.
- ASSERT_EQUALS(firstRequestDate() + Milliseconds(7010), action.getNextHeartbeatStartDate());
+ HostAndPort target() {
+ return _target;
}
- TEST_F(HeartbeatResponseTestTwoRetries, HeartbeatThreeNonconsecutiveFailures) {
- // Confirm that the topology coordinator does not mark a node down on three
- // nonconsecutive heartbeat failures.
- ReplSetHeartbeatResponse response;
- response.noteReplSet();
- response.setSetName("rs0");
- response.setState(MemberState::RS_SECONDARY);
- response.setElectable(true);
- response.setConfigVersion(5);
-
- // successful response (third response due to the two failures in setUp())
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + Milliseconds(4500),
- Milliseconds(400),
- target(),
- StatusWith<ReplSetHeartbeatResponse>(response),
- OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
+private:
+ Date_t _firstRequestDate;
+ HostAndPort _target;
+};
+class HeartbeatResponseTestTwoRetries : public HeartbeatResponseTestOneRetry {
+public:
+ virtual void setUp() {
+ HeartbeatResponseTestOneRetry::setUp();
+ // First retry fails at t + 4500ms
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + Milliseconds(4500), // 4.5 of the 5 seconds elapsed;
+ // could retry.
+ Milliseconds(400), // Spent 0.4 of the 0.5 seconds in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"),
+ OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- // Because the heartbeat succeeded, we'll retry in 2 seconds.
- ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate());
-
- // request next heartbeat
- getTopoCoord().prepareHeartbeatRequest(
- firstRequestDate() + Milliseconds(6500), "rs0", target());
- // third failed response
- action = getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + Milliseconds(7100),
- Milliseconds(400),
- target(),
- StatusWith<ReplSetHeartbeatResponse>(Status{ErrorCodes::HostUnreachable, ""}),
- OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
+ // Because the first retry failed without timing out, we expect to retry immediately.
+ ASSERT_EQUALS(firstRequestDate() + Milliseconds(4500), action.getNextHeartbeatStartDate());
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ // Second retry prepared at t + 4500ms.
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
+ getTopoCoord().prepareHeartbeatRequest(
+ firstRequestDate() + Milliseconds(4500), "rs0", target());
+ // 500ms left to complete the heartbeat.
+ ASSERT_EQUALS(500, request.second.count());
- // Ensure a third nonconsecutive heartbeat failure did not cause the node to be marked down
+ // Ensure a second failed heartbeat did not cause the node to be marked down
BSONObjBuilder statusBuilder;
- Status resultStatus(ErrorCodes::InternalError,
- "prepareStatusResponse didn't set result");
+ Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
getTopoCoord().prepareStatusResponse(cbData(),
- firstRequestDate() + Milliseconds(7000),
- 600,
- OpTime(Timestamp(100,0), 0),
+ firstRequestDate() + Seconds(4),
+ 10,
+ OpTime(Timestamp(100, 0), 0),
&statusBuilder,
&resultStatus);
ASSERT_OK(resultStatus);
@@ -1895,3057 +1483,3534 @@ namespace {
ASSERT_EQUALS(1, member1Status["_id"].Int());
ASSERT_EQUALS(1, member1Status["health"].Double());
-
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataNewPrimary) {
- OpTime election = OpTime(Timestamp(5,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(3,0), 0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesNewOneOlder) {
- OpTime election = OpTime(Timestamp(5,0), 0);
- OpTime election2 = OpTime(Timestamp(4,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(3,0), 0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_PRIMARY,
- election2,
- election,
- lastOpTimeApplied);
- // second primary does not change primary index
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
}
+};
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesNewOneNewer) {
- OpTime election = OpTime(Timestamp(4,0), 0);
- OpTime election2 = OpTime(Timestamp(5,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(3,0), 0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_PRIMARY,
- election2,
- election,
- lastOpTimeApplied);
- // second primary does not change primary index
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesIncludingMeNewOneOlder) {
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(Timestamp(5,0));
-
- OpTime election = OpTime(Timestamp(4,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(3,0), 0);
-
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, nextAction.getAction());
- ASSERT_EQUALS(1, nextAction.getPrimaryConfigIndex());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataStepDownPrimaryForHighPriorityFreshNode) {
- // In this test, the Topology coordinator sees a PRIMARY ("host2") and then sees a higher
- // priority and similarly fresh node ("host3"). However, since the coordinator's node
- // (host1) is not the higher priority node, it takes no action.
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 6 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017" << "priority" << 3)) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- OpTime election = OpTime();
- OpTime lastOpTimeApplied = OpTime(Timestamp(13,0), 0);
- OpTime slightlyLessFreshLastOpTimeApplied = OpTime(Timestamp(3,0), 0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- slightlyLessFreshLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, nextAction.getAction());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataStepDownSelfForHighPriorityFreshNode) {
- // In this test, the Topology coordinator becomes PRIMARY and then sees a higher priority
- // and equally fresh node ("host3"). As a result it responds with a StepDownSelf action.
- //
- // Despite having stepped down, we should remain electable, in order to dissuade lower
- // priority nodes from standing for election.
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 6 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017" << "priority" << 3)) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)),
- 0);
- OpTime election = OpTime(Timestamp(1000,0), 0);
-
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(election.getTimestamp());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- election);
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction());
- ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex());
-
- // Process a heartbeat response to confirm that this node, which is no longer primary,
- // still tells other nodes that it is electable. This will stop lower priority nodes
- // from standing for election.
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setSetName("rs0");
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(6);
- hbArgs.setSenderId(1);
- hbArgs.setSenderHost(HostAndPort("host3", 27017));
- ReplSetHeartbeatResponse hbResp;
- ASSERT_OK(getTopoCoord().prepareHeartbeatResponse(now(),
- hbArgs,
- "rs0",
- election,
- &hbResp));
- ASSERT(!hbResp.hasIsElectable() || hbResp.isElectable()) << hbResp.toString();
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataDoNotStepDownSelfForHighPriorityStaleNode) {
- // In this test, the Topology coordinator becomes PRIMARY and then sees a higher priority
- // and stale node ("host3"). As a result it responds with NoAction.
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 6 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017" << "priority" << 3)) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)),
- 0);
- OpTime election = OpTime(Timestamp(1000,0), 0);
- OpTime staleTime = OpTime();
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(election.getTimestamp());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- staleTime,
- election);
- ASSERT_NO_ACTION(nextAction.getAction());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataDoNotStepDownPrimaryForHighPriorityStaleNode) {
- // In this test, the Topology coordinator sees a PRIMARY ("host2") and then sees a higher
- // priority and stale node ("host3"). As a result it responds with NoAction.
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 6 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017" << "priority" << 3)) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- OpTime election = OpTime(Timestamp(1000,0), 0);
- OpTime stale = OpTime();
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- election);
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- stale,
- election);
- ASSERT_NO_ACTION(nextAction.getAction());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesIncludingMeNewOneNewer) {
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(Timestamp(2,0));
-
- OpTime election = OpTime(Timestamp(4,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(3,0), 0);
-
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction());
- ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex());
- // Doesn't actually do the stepdown until stepDownIfPending is called
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- ASSERT_TRUE(getTopoCoord().stepDownIfPending());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownNoMajority) {
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- OpTime election = OpTime(Timestamp(400,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(300,0), 0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButNoPriority) {
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017" << "priority" << 0) <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
-
- OpTime election = OpTime(Timestamp(400,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(300,0), 0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIAmStarting) {
- setSelfMemberState(MemberState::RS_STARTUP);
-
- OpTime election = OpTime(Timestamp(400,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(300,0), 0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIAmRecovering) {
- setSelfMemberState(MemberState::RS_RECOVERING);
-
- OpTime election = OpTime(Timestamp(400,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(300,0), 0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIHaveStepdownWait) {
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- OpTime election = OpTime(Timestamp(400,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(300,0), 0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // freeze node to set stepdown wait
- BSONObjBuilder response;
- getTopoCoord().prepareFreezeResponse(now()++, 20, &response);
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIAmArbiter) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017" <<
- "arbiterOnly" << true) <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
-
- OpTime election = OpTime(Timestamp(400,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(300,0), 0);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajority) {
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- OpTime election = OpTime(Timestamp(400,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(399,0), 0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+class HeartbeatResponseHighVerbosityTest : public HeartbeatResponseTest {
+public:
+ virtual void setUp() {
+ HeartbeatResponseTest::setUp();
+ // set verbosity as high as the highest verbosity log message we'd like to check for
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
}
- TEST_F(HeartbeatResponseTest, ElectionStartElectionWhileCandidate) {
- // In this test, the TopologyCoordinator goes through the steps of a successful election,
- // during which it receives a heartbeat that would normally trigger it to become a candidate
- // and respond with a StartElection HeartbeatResponseAction. However, since it is already in
- // candidate state, it responds with a NoAction HeartbeatResponseAction. Then finishes by
- // being winning the election.
-
- // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
- // 2. "host2" goes down, triggering an election.
- // 3. "host2" comes back, which would normally trigger election, but since the
- // TopologyCoordinator is already in candidate mode, does not.
- // 4. TopologyCoordinator concludes its freshness round successfully and wins the election.
-
- setSelfMemberState(MemberState::RS_SECONDARY);
- now() += Seconds(30); // we need to be more than LastVote::leaseTime from the start of time or
- // else some Date_t math goes horribly awry
-
- OpTime election = OpTime();
- OpTime lastOpTimeApplied = OpTime(Timestamp(130,0), 0);
- OID round = OID::gen();
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // candidate time!
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // see the downed node as SECONDARY and decide to take no action, but are still a candidate
- nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
-
- // normally this would trigger StartElection, but we are already a candidate
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // now voteForSelf as though we received all our fresh responses
- ASSERT_TRUE(getTopoCoord().voteForMyself(now()++));
-
- // now win election and ensure _electionId and _electionTime are set properly
- getTopoCoord().processWinElection(round, election.getTimestamp());
- ASSERT_EQUALS(round, getTopoCoord().getElectionId());
- ASSERT_EQUALS(election.getTimestamp(), getTopoCoord().getElectionTime());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+ virtual void tearDown() {
+ HeartbeatResponseTest::tearDown();
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
}
+};
+
+TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataNodeBelivesWeAreDown) {
+ OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0);
+
+ // request heartbeat
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
+ getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2"));
+
+ ReplSetHeartbeatResponse believesWeAreDownResponse;
+ believesWeAreDownResponse.noteReplSet();
+ believesWeAreDownResponse.setSetName("rs0");
+ believesWeAreDownResponse.setState(MemberState::RS_SECONDARY);
+ believesWeAreDownResponse.setElectable(true);
+ believesWeAreDownResponse.noteStateDisagreement();
+ startCapturingLogMessages();
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ now()++, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ HostAndPort("host2"),
+ StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse),
+ lastOpTimeApplied);
+ stopCapturingLogMessages();
+ ASSERT_NO_ACTION(action.getAction());
+ ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down"));
+}
+
+TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataMemberNotInConfig) {
+ OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0);
+
+ // request heartbeat
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
+ getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host5"));
+
+ ReplSetHeartbeatResponse memberMissingResponse;
+ memberMissingResponse.noteReplSet();
+ memberMissingResponse.setSetName("rs0");
+ memberMissingResponse.setState(MemberState::RS_SECONDARY);
+ memberMissingResponse.setElectable(true);
+ memberMissingResponse.noteStateDisagreement();
+ startCapturingLogMessages();
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ now()++, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ HostAndPort("host5"),
+ StatusWith<ReplSetHeartbeatResponse>(memberMissingResponse),
+ lastOpTimeApplied);
+ stopCapturingLogMessages();
+ ASSERT_NO_ACTION(action.getAction());
+ ASSERT_EQUALS(1, countLogLinesContaining("Could not find host5:27017 in current config"));
+}
+
+TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataSameConfig) {
+ OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0);
+
+ // request heartbeat
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
+ getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2"));
+
+ // construct a copy of the original config for log message checking later
+ // see HeartbeatResponseTest for the origin of the original config
+ ReplicaSetConfig originalConfig;
+ originalConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017")
+ << BSON("_id" << 2 << "host"
+ << "host3:27017")) << "settings"
+ << BSON("heartbeatTimeoutSecs" << 5)));
+
+ ReplSetHeartbeatResponse sameConfigResponse;
+ sameConfigResponse.noteReplSet();
+ sameConfigResponse.setSetName("rs0");
+ sameConfigResponse.setState(MemberState::RS_SECONDARY);
+ sameConfigResponse.setElectable(true);
+ sameConfigResponse.noteStateDisagreement();
+ sameConfigResponse.setConfigVersion(2);
+ sameConfigResponse.setConfig(originalConfig);
+ startCapturingLogMessages();
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ now()++, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ HostAndPort("host2"),
+ StatusWith<ReplSetHeartbeatResponse>(sameConfigResponse),
+ lastOpTimeApplied);
+ stopCapturingLogMessages();
+ ASSERT_NO_ACTION(action.getAction());
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "Config from heartbeat response was "
+ "same as ours."));
+}
+
+TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataOldConfig) {
+ OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0);
+
+ // request heartbeat
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
+ getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2"));
+
+ ReplSetHeartbeatResponse believesWeAreDownResponse;
+ believesWeAreDownResponse.noteReplSet();
+ believesWeAreDownResponse.setSetName("rs0");
+ believesWeAreDownResponse.setState(MemberState::RS_SECONDARY);
+ believesWeAreDownResponse.setElectable(true);
+ believesWeAreDownResponse.noteStateDisagreement();
+ startCapturingLogMessages();
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ now()++, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ HostAndPort("host2"),
+ StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse),
+ lastOpTimeApplied);
+ stopCapturingLogMessages();
+ ASSERT_NO_ACTION(action.getAction());
+ ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down"));
+}
+
+TEST_F(HeartbeatResponseTestOneRetry, DecideToReconfig) {
+ // Confirm that action responses can come back from retries; in this, expect a Reconfig
+ // action.
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 7 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017")
+ << BSON("_id" << 2 << "host"
+ << "host3:27017")
+ << BSON("_id" << 3 << "host"
+ << "host4:27017")) << "settings"
+ << BSON("heartbeatTimeoutSecs" << 5))));
+ ASSERT_OK(newConfig.validate());
+
+ ReplSetHeartbeatResponse reconfigResponse;
+ reconfigResponse.noteReplSet();
+ reconfigResponse.setSetName("rs0");
+ reconfigResponse.setState(MemberState::RS_SECONDARY);
+ reconfigResponse.setElectable(true);
+ reconfigResponse.setConfigVersion(7);
+ reconfigResponse.setConfig(newConfig);
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + Milliseconds(4500), // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(reconfigResponse),
+ OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::Reconfig, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTestOneRetry, DecideToStepDownRemotePrimary) {
+ // Confirm that action responses can come back from retries; in this, expect a
+ // StepDownRemotePrimary action.
+
+ // make self primary
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(Timestamp(5, 0));
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ ReplSetHeartbeatResponse electedMoreRecentlyResponse;
+ electedMoreRecentlyResponse.noteReplSet();
+ electedMoreRecentlyResponse.setSetName("rs0");
+ electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
+ electedMoreRecentlyResponse.setElectable(true);
+ electedMoreRecentlyResponse.setElectionTime(Timestamp(3, 0));
+ electedMoreRecentlyResponse.setConfigVersion(5);
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + Milliseconds(4500), // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
+ OpTime()); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, action.getAction());
+ ASSERT_EQUALS(1, action.getPrimaryConfigIndex());
+ ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTestOneRetry, DecideToStepDownSelf) {
+ // Confirm that action responses can come back from retries; in this, expect a StepDownSelf
+ // action.
+
+ // acknowledge the other member so that we see a majority
+ HeartbeatResponseAction action =
+ receiveDownHeartbeat(HostAndPort("host3"), "rs0", OpTime(Timestamp(100, 0), 0));
+ ASSERT_NO_ACTION(action.getAction());
+
+ // make us PRIMARY
+ makeSelfPrimary();
+
+ ReplSetHeartbeatResponse electedMoreRecentlyResponse;
+ electedMoreRecentlyResponse.noteReplSet();
+ electedMoreRecentlyResponse.setSetName("rs0");
+ electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
+ electedMoreRecentlyResponse.setElectable(false);
+ electedMoreRecentlyResponse.setElectionTime(Timestamp(10, 0));
+ electedMoreRecentlyResponse.setConfigVersion(5);
+ action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + Milliseconds(4500), // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
+ OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, action.getAction());
+ ASSERT_EQUALS(0, action.getPrimaryConfigIndex());
+ ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate());
+ // Doesn't actually do the stepdown until stepDownIfPending is called
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ ASSERT_TRUE(getTopoCoord().stepDownIfPending());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTestOneRetry, DecideToStartElection) {
+ // Confirm that action responses can come back from retries; in this, expect a StartElection
+ // action.
+
+ // acknowledge the other member so that we see a majority
+ OpTime election = OpTime(Timestamp(400, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0);
+ HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(action.getAction());
+
+ // make sure we are electable
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ ReplSetHeartbeatResponse startElectionResponse;
+ startElectionResponse.noteReplSet();
+ startElectionResponse.setSetName("rs0");
+ startElectionResponse.setState(MemberState::RS_SECONDARY);
+ startElectionResponse.setElectable(true);
+ startElectionResponse.setConfigVersion(5);
+ action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + Milliseconds(4500), // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(startElectionResponse),
+ election);
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTestTwoRetries, HeartbeatRetriesAtMostTwice) {
+ // Confirm that the topology coordinator attempts to retry a failed heartbeat two times
+ // after initial failure, assuming that the heartbeat timeout (set to 5 seconds in the
+ // fixture) has not expired.
+ //
+ // Failed heartbeats propose taking no action, other than scheduling the next heartbeat. We
+ // can detect a retry vs the next regularly scheduled heartbeat because retries are
+ // scheduled immediately, while subsequent heartbeats are scheduled after the hard-coded
+ // heartbeat interval of 2 seconds.
+
+ // Second retry fails at t + 4800ms
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + Milliseconds(4800), // 4.8 of the 5 seconds elapsed;
+ // could still retry.
+ Milliseconds(100), // Spent 0.1 of the 0.3 seconds in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"),
+ OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ // Because this is the second retry, rather than retry again, we expect to wait for the
+ // heartbeat interval of 2 seconds to elapse.
+ ASSERT_EQUALS(firstRequestDate() + Milliseconds(6800), action.getNextHeartbeatStartDate());
+
+ // Ensure a third failed heartbeat caused the node to be marked down
+ BSONObjBuilder statusBuilder;
+ Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
+ getTopoCoord().prepareStatusResponse(cbData(),
+ firstRequestDate() + Milliseconds(4900),
+ 10,
+ OpTime(Timestamp(100, 0), 0),
+ &statusBuilder,
+ &resultStatus);
+ ASSERT_OK(resultStatus);
+ BSONObj rsStatus = statusBuilder.obj();
+ std::vector<BSONElement> memberArray = rsStatus["members"].Array();
+ BSONObj member1Status = memberArray[1].Obj();
+
+ ASSERT_EQUALS(1, member1Status["_id"].Int());
+ ASSERT_EQUALS(0, member1Status["health"].Double());
+}
+
+TEST_F(HeartbeatResponseTestTwoRetries, DecideToStepDownRemotePrimary) {
+ // Confirm that action responses can come back from retries; in this, expect a
+ // StepDownRemotePrimary action.
+
+ // make self primary
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(Timestamp(5, 0));
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ ReplSetHeartbeatResponse electedMoreRecentlyResponse;
+ electedMoreRecentlyResponse.noteReplSet();
+ electedMoreRecentlyResponse.setSetName("rs0");
+ electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
+ electedMoreRecentlyResponse.setElectable(true);
+ electedMoreRecentlyResponse.setElectionTime(Timestamp(3, 0));
+ electedMoreRecentlyResponse.setConfigVersion(5);
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + Milliseconds(5000), // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
+ OpTime()); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, action.getAction());
+ ASSERT_EQUALS(1, action.getPrimaryConfigIndex());
+ ASSERT_EQUALS(firstRequestDate() + Milliseconds(7000), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTestTwoRetries, DecideToStepDownSelf) {
+ // Confirm that action responses can come back from retries; in this, expect a StepDownSelf
+ // action.
+
+ // acknowledge the other member so that we see a majority
+ HeartbeatResponseAction action =
+ receiveDownHeartbeat(HostAndPort("host3"), "rs0", OpTime(Timestamp(100, 0), 0));
+ ASSERT_NO_ACTION(action.getAction());
+
+ // make us PRIMARY
+ makeSelfPrimary();
+
+ ReplSetHeartbeatResponse electedMoreRecentlyResponse;
+ electedMoreRecentlyResponse.noteReplSet();
+ electedMoreRecentlyResponse.setSetName("rs0");
+ electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
+ electedMoreRecentlyResponse.setElectable(false);
+ electedMoreRecentlyResponse.setElectionTime(Timestamp(10, 0));
+ electedMoreRecentlyResponse.setConfigVersion(5);
+ action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + Milliseconds(5000), // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
+ OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, action.getAction());
+ ASSERT_EQUALS(0, action.getPrimaryConfigIndex());
+ ASSERT_EQUALS(firstRequestDate() + Milliseconds(7000), action.getNextHeartbeatStartDate());
+ // Doesn't actually do the stepdown until stepDownIfPending is called
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ ASSERT_TRUE(getTopoCoord().stepDownIfPending());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTestTwoRetries, DecideToStartElection) {
+ // Confirm that action responses can come back from retries; in this, expect a StartElection
+ // action.
+
+ // acknowledge the other member so that we see a majority
+ OpTime election = OpTime(Timestamp(400, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0);
+ HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(action.getAction());
+
+ // make sure we are electable
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ ReplSetHeartbeatResponse startElectionResponse;
+ startElectionResponse.noteReplSet();
+ startElectionResponse.setSetName("rs0");
+ startElectionResponse.setState(MemberState::RS_SECONDARY);
+ startElectionResponse.setElectable(true);
+ startElectionResponse.setConfigVersion(5);
+ action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + Milliseconds(5000), // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(startElectionResponse),
+ election);
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(firstRequestDate() + Milliseconds(7000), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTest, HeartbeatTimeoutSuppressesFirstRetry) {
+ // Confirm that the topology coordinator does not schedule an immediate heartbeat retry if
+ // the heartbeat timeout period expired before the initial request completed.
+
+ HostAndPort target("host2", 27017);
+ Date_t firstRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T13:00Z"));
+
+ // Initial heartbeat request prepared, at t + 0.
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
+ getTopoCoord().prepareHeartbeatRequest(firstRequestDate, "rs0", target);
+ // 5 seconds to successfully complete the heartbeat before the timeout expires.
+ ASSERT_EQUALS(5000, request.second.count());
+
+ // Initial heartbeat request fails at t + 5000ms
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate + Milliseconds(5000), // Entire heartbeat period elapsed;
+ // no retry allowed.
+ Milliseconds(4990), // Spent 4.99 of the 4 seconds in the network.
+ target,
+ StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"),
+ OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
+
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ // Because the heartbeat timed out, we'll retry in 2 seconds.
+ ASSERT_EQUALS(firstRequestDate + Milliseconds(7000), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTestOneRetry, HeartbeatTimeoutSuppressesSecondRetry) {
+ // Confirm that the topology coordinator does not schedule an second heartbeat retry if
+ // the heartbeat timeout period expired before the first retry completed.
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + Milliseconds(5010), // Entire heartbeat period elapsed;
+ // no retry allowed.
+ Milliseconds(1000), // Spent 1 of the 1.01 seconds in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"),
+ OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
+
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ // Because the heartbeat timed out, we'll retry in 2 seconds.
+ ASSERT_EQUALS(firstRequestDate() + Milliseconds(7010), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTestTwoRetries, HeartbeatThreeNonconsecutiveFailures) {
+ // Confirm that the topology coordinator does not mark a node down on three
+ // nonconsecutive heartbeat failures.
+ ReplSetHeartbeatResponse response;
+ response.noteReplSet();
+ response.setSetName("rs0");
+ response.setState(MemberState::RS_SECONDARY);
+ response.setElectable(true);
+ response.setConfigVersion(5);
+
+ // successful response (third response due to the two failures in setUp())
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + Milliseconds(4500),
+ Milliseconds(400),
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(response),
+ OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
+
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ // Because the heartbeat succeeded, we'll retry in 2 seconds.
+ ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate());
+
+ // request next heartbeat
+ getTopoCoord().prepareHeartbeatRequest(
+ firstRequestDate() + Milliseconds(6500), "rs0", target());
+ // third failed response
+ action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + Milliseconds(7100),
+ Milliseconds(400),
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(Status{ErrorCodes::HostUnreachable, ""}),
+ OpTime(Timestamp(0, 0), 0)); // We've never applied anything.
+
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+
+ // Ensure a third nonconsecutive heartbeat failure did not cause the node to be marked down
+ BSONObjBuilder statusBuilder;
+ Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
+ getTopoCoord().prepareStatusResponse(cbData(),
+ firstRequestDate() + Milliseconds(7000),
+ 600,
+ OpTime(Timestamp(100, 0), 0),
+ &statusBuilder,
+ &resultStatus);
+ ASSERT_OK(resultStatus);
+ BSONObj rsStatus = statusBuilder.obj();
+ std::vector<BSONElement> memberArray = rsStatus["members"].Array();
+ BSONObj member1Status = memberArray[1].Obj();
+
+ ASSERT_EQUALS(1, member1Status["_id"].Int());
+ ASSERT_EQUALS(1, member1Status["health"].Double());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataNewPrimary) {
+ OpTime election = OpTime(Timestamp(5, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesNewOneOlder) {
+ OpTime election = OpTime(Timestamp(5, 0), 0);
+ OpTime election2 = OpTime(Timestamp(4, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election2,
+ election,
+ lastOpTimeApplied);
+ // second primary does not change primary index
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesNewOneNewer) {
+ OpTime election = OpTime(Timestamp(4, 0), 0);
+ OpTime election2 = OpTime(Timestamp(5, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election2,
+ election,
+ lastOpTimeApplied);
+ // second primary does not change primary index
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesIncludingMeNewOneOlder) {
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(Timestamp(5, 0));
+
+ OpTime election = OpTime(Timestamp(4, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0);
+
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, nextAction.getAction());
+ ASSERT_EQUALS(1, nextAction.getPrimaryConfigIndex());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataStepDownPrimaryForHighPriorityFreshNode) {
+ // In this test, the Topology coordinator sees a PRIMARY ("host2") and then sees a higher
+ // priority and similarly fresh node ("host3"). However, since the coordinator's node
+ // (host1) is not the higher priority node, it takes no action.
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 6 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"
+ << "priority" << 3))
+ << "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(13, 0), 0);
+ OpTime slightlyLessFreshLastOpTimeApplied = OpTime(Timestamp(3, 0), 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ slightlyLessFreshLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, nextAction.getAction());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataStepDownSelfForHighPriorityFreshNode) {
+ // In this test, the Topology coordinator becomes PRIMARY and then sees a higher priority
+ // and equally fresh node ("host3"). As a result it responds with a StepDownSelf action.
+ //
+ // Despite having stepped down, we should remain electable, in order to dissuade lower
+ // priority nodes from standing for election.
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 6 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"
+ << "priority" << 3))
+ << "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+ OpTime election = OpTime(Timestamp(1000, 0), 0);
+
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(election.getTimestamp());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(
+ HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election, election);
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction());
+ ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex());
+
+ // Process a heartbeat response to confirm that this node, which is no longer primary,
+ // still tells other nodes that it is electable. This will stop lower priority nodes
+ // from standing for election.
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setSetName("rs0");
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(6);
+ hbArgs.setSenderId(1);
+ hbArgs.setSenderHost(HostAndPort("host3", 27017));
+ ReplSetHeartbeatResponse hbResp;
+ ASSERT_OK(getTopoCoord().prepareHeartbeatResponse(now(), hbArgs, "rs0", election, &hbResp));
+ ASSERT(!hbResp.hasIsElectable() || hbResp.isElectable()) << hbResp.toString();
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataDoNotStepDownSelfForHighPriorityStaleNode) {
+ // In this test, the Topology coordinator becomes PRIMARY and then sees a higher priority
+ // and stale node ("host3"). As a result it responds with NoAction.
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 6 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"
+ << "priority" << 3))
+ << "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+ OpTime election = OpTime(Timestamp(1000, 0), 0);
+ OpTime staleTime = OpTime();
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(election.getTimestamp());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(
+ HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, staleTime, election);
+ ASSERT_NO_ACTION(nextAction.getAction());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataDoNotStepDownPrimaryForHighPriorityStaleNode) {
+ // In this test, the Topology coordinator sees a PRIMARY ("host2") and then sees a higher
+ // priority and stale node ("host3"). As a result it responds with NoAction.
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 6 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"
+ << "priority" << 3))
+ << "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ OpTime election = OpTime(Timestamp(1000, 0), 0);
+ OpTime stale = OpTime();
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(
+ HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election, election);
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(
+ HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, stale, election);
+ ASSERT_NO_ACTION(nextAction.getAction());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesIncludingMeNewOneNewer) {
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(Timestamp(2, 0));
+
+ OpTime election = OpTime(Timestamp(4, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0);
+
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction());
+ ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex());
+ // Doesn't actually do the stepdown until stepDownIfPending is called
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- TEST_F(HeartbeatResponseTest, ElectionVoteForAnotherNodeBeforeFreshnessReturns) {
- // In this test, the TopologyCoordinator goes through the steps of an election. However,
- // before its freshness round ends, it receives a fresh command followed by an elect command
- // from another node, both of which it responds positively to. The TopologyCoordinator's
- // freshness round then concludes successfully, but it fails to vote for itself, since it
- // recently voted for another node.
-
- // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
- // 2. "host2" goes down, triggering an election.
- // 3. "host3" sends a fresh command, which the TopologyCoordinator responds to positively.
- // 4. "host3" sends an elect command, which the TopologyCoordinator responds to positively.
- // 5. The TopologyCoordinator's concludes its freshness round successfully.
- // 6. The TopologyCoordinator loses the election.
+ ASSERT_TRUE(getTopoCoord().stepDownIfPending());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+}
- setSelfMemberState(MemberState::RS_SECONDARY);
- now() += Seconds(30); // we need to be more than LastVote::leaseTime from the start of time or
- // else some Date_t math goes horribly awry
-
- OpTime election = OpTime();
- OpTime lastOpTimeApplied = OpTime(Timestamp(100,0), 0);
- OpTime fresherOpApplied = OpTime(Timestamp(200,0), 0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // candidate time!
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- Timestamp originalElectionTime = getTopoCoord().getElectionTime();
- OID originalElectionId = getTopoCoord().getElectionId();
- // prepare an incoming fresh command
- ReplicationCoordinator::ReplSetFreshArgs freshArgs;
- freshArgs.setName = "rs0";
- freshArgs.cfgver = 5;
- freshArgs.id = 2;
- freshArgs.who = HostAndPort("host3");
- freshArgs.opTime = fresherOpApplied.getTimestamp();
-
- BSONObjBuilder freshResponseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- getTopoCoord().prepareFreshResponse(
- freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result);
- BSONObj response = freshResponseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(lastOpTimeApplied.getTimestamp(), Timestamp(response["opTime"].timestampValue()));
- ASSERT_FALSE(response["fresher"].trueValue());
- ASSERT_FALSE(response["veto"].trueValue());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- // make sure incoming fresh commands do not change electionTime and electionId
- ASSERT_EQUALS(originalElectionTime, getTopoCoord().getElectionTime());
- ASSERT_EQUALS(originalElectionId, getTopoCoord().getElectionId());
-
- // an elect command comes in
- ReplicationCoordinator::ReplSetElectArgs electArgs;
- OID round = OID::gen();
- electArgs.set = "rs0";
- electArgs.round = round;
- electArgs.cfgver = 5;
- electArgs.whoid = 2;
-
- BSONObjBuilder electResponseBuilder;
- result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(
- electArgs, now()++, OpTime(), &electResponseBuilder, &result);
- stopCapturingLogMessages();
- response = electResponseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(1, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("voting yea for host3:27017 (2)"));
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- // make sure incoming elect commands do not change electionTime and electionId
- ASSERT_EQUALS(originalElectionTime, getTopoCoord().getElectionTime());
- ASSERT_EQUALS(originalElectionId, getTopoCoord().getElectionId());
-
- // now voteForSelf as though we received all our fresh responses
- ASSERT_FALSE(getTopoCoord().voteForMyself(now()++));
-
- // receive a heartbeat indicating the other node was elected
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(2, getCurrentPrimaryIndex());
- // make sure seeing a new primary does not change electionTime and electionId
- ASSERT_EQUALS(originalElectionTime, getTopoCoord().getElectionTime());
- ASSERT_EQUALS(originalElectionId, getTopoCoord().getElectionId());
-
- // now lose election and ensure _electionTime and _electionId are 0'd out
- getTopoCoord().processLoseElection();
- ASSERT_EQUALS(OID(), getTopoCoord().getElectionId());
- ASSERT_EQUALS(Timestamp(), getTopoCoord().getElectionTime());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(2, getCurrentPrimaryIndex());
- }
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownNoMajority) {
+ setSelfMemberState(MemberState::RS_SECONDARY);
- TEST_F(HeartbeatResponseTest, ElectionRespondToFreshBeforeOurFreshnessReturns) {
- // In this test, the TopologyCoordinator goes through the steps of an election. However,
- // before its freshness round ends, the TopologyCoordinator receives a fresh command from
- // another node, which it responds positively to. Its freshness then ends successfully and
- // it wins the election. The other node's elect command then comes in and is responded to
- // negatively, maintaining the TopologyCoordinator's PRIMARY state.
+ OpTime election = OpTime(Timestamp(400, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0);
- // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
- // 2. "host2" goes down, triggering an election.
- // 3. "host3" sends a fresh command, which the TopologyCoordinator responds to positively.
- // 4. The TopologyCoordinator concludes its freshness round successfully and wins
- // the election.
- // 5. "host3" sends an elect command, which the TopologyCoordinator responds to negatively.
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButNoPriority) {
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"
+ << "priority" << 0)
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+
+ OpTime election = OpTime(Timestamp(400, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIAmStarting) {
+ setSelfMemberState(MemberState::RS_STARTUP);
+
+ OpTime election = OpTime(Timestamp(400, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIAmRecovering) {
+ setSelfMemberState(MemberState::RS_RECOVERING);
+
+ OpTime election = OpTime(Timestamp(400, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- setSelfMemberState(MemberState::RS_SECONDARY);
- now() += Seconds(30); // we need to be more than LastVote::leaseTime from the start of time or
- // else some Date_t math goes horribly awry
-
- OpTime election = OpTime();
- OpTime lastOpTimeApplied = OpTime(Timestamp(100,0), 0);
- OpTime fresherLastOpTimeApplied = OpTime(Timestamp(200,0), 0);
- OID round = OID::gen();
- OID remoteRound = OID::gen();
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // candidate time!
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // prepare an incoming fresh command
- ReplicationCoordinator::ReplSetFreshArgs freshArgs;
- freshArgs.setName = "rs0";
- freshArgs.cfgver = 5;
- freshArgs.id = 2;
- freshArgs.who = HostAndPort("host3");
- freshArgs.opTime = fresherLastOpTimeApplied.getTimestamp();
-
- BSONObjBuilder freshResponseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- getTopoCoord().prepareFreshResponse(
- freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result);
- BSONObj response = freshResponseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(lastOpTimeApplied.getTimestamp(), Timestamp(response["opTime"].timestampValue()));
- ASSERT_FALSE(response["fresher"].trueValue());
- ASSERT_FALSE(response["veto"].trueValue());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // now voteForSelf as though we received all our fresh responses
- ASSERT_TRUE(getTopoCoord().voteForMyself(now()++));
- // now win election and ensure _electionId and _electionTime are set properly
- getTopoCoord().processWinElection(round, election.getTimestamp());
- ASSERT_EQUALS(round, getTopoCoord().getElectionId());
- ASSERT_EQUALS(election.getTimestamp(), getTopoCoord().getElectionTime());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- // an elect command comes in
- ReplicationCoordinator::ReplSetElectArgs electArgs;
- electArgs.set = "rs0";
- electArgs.round = remoteRound;
- electArgs.cfgver = 5;
- electArgs.whoid = 2;
-
- BSONObjBuilder electResponseBuilder;
- result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(
- electArgs, now()++, OpTime(), &electResponseBuilder, &result);
- stopCapturingLogMessages();
- response = electResponseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(remoteRound, response["round"].OID());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- }
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
- TEST_F(HeartbeatResponseTest, ElectionCompleteElectionThenReceiveFresh) {
- // In this test, the TopologyCoordinator goes through the steps of an election. After
- // being successfully elected, a fresher node sends a fresh command, which the
- // TopologyCoordinator responds positively to. The fresher node then sends an elect command,
- // which the Topology coordinator negatively to since the TopologyCoordinator just elected
- // itself.
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIHaveStepdownWait) {
+ setSelfMemberState(MemberState::RS_SECONDARY);
- // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
- // 2. "host2" goes down, triggering an election.
- // 3. The TopologyCoordinator concludes its freshness round successfully and wins
- // the election.
- // 4. "host3" sends a fresh command, which the TopologyCoordinator responds to positively.
- // 5. "host3" sends an elect command, which the TopologyCoordinator responds to negatively.
+ OpTime election = OpTime(Timestamp(400, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
- now() += Seconds(30); // we need to be more than LastVote::leaseTime from the start of time or
- // else some Date_t math goes horribly awry
-
- OpTime election = OpTime();
- OpTime lastOpTimeApplied = OpTime(Timestamp(100,0), 0);
- OpTime fresherLastOpTimeApplied = OpTime(Timestamp(200,0), 0);
- OID round = OID::gen();
- OID remoteRound = OID::gen();
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // candidate time!
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // now voteForSelf as though we received all our fresh responses
- ASSERT_TRUE(getTopoCoord().voteForMyself(now()++));
- // now win election
- getTopoCoord().processWinElection(round, election.getTimestamp());
- ASSERT_EQUALS(0, getTopoCoord().getCurrentPrimaryIndex());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
-
- // prepare an incoming fresh command
- ReplicationCoordinator::ReplSetFreshArgs freshArgs;
- freshArgs.setName = "rs0";
- freshArgs.cfgver = 5;
- freshArgs.id = 2;
- freshArgs.who = HostAndPort("host3");
- freshArgs.opTime = fresherLastOpTimeApplied.getTimestamp();
-
- BSONObjBuilder freshResponseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- getTopoCoord().prepareFreshResponse(
- freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result);
- BSONObj response = freshResponseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(lastOpTimeApplied.getTimestamp(),
- Timestamp(response["opTime"].timestampValue()));
- ASSERT_FALSE(response["fresher"].trueValue());
- ASSERT_TRUE(response["veto"].trueValue()) << response["errmsg"];
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- // an elect command comes in
- ReplicationCoordinator::ReplSetElectArgs electArgs;
- electArgs.set = "rs0";
- electArgs.round = remoteRound;
- electArgs.cfgver = 5;
- electArgs.whoid = 2;
-
- BSONObjBuilder electResponseBuilder;
- result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(
- electArgs, now()++, OpTime(), &electResponseBuilder, &result);
- stopCapturingLogMessages();
- response = electResponseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(remoteRound, response["round"].OID());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityOfVotersUp) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017" << "votes" << 0) <<
- BSON("_id" << 3 << "host" << "host4:27017" << "votes" << 0) <<
- BSON("_id" << 4 << "host" << "host5:27017" << "votes" << 0) <<
- BSON("_id" << 5 << "host" << "host6:27017" << "votes" << 0) <<
- BSON("_id" << 6 << "host" << "host7:27017")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // freeze node to set stepdown wait
+ BSONObjBuilder response;
+ getTopoCoord().prepareFreezeResponse(now()++, 20, &response);
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIAmArbiter) {
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+
+ OpTime election = OpTime(Timestamp(400, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajority) {
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ OpTime election = OpTime(Timestamp(400, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(399, 0), 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, ElectionStartElectionWhileCandidate) {
+ // In this test, the TopologyCoordinator goes through the steps of a successful election,
+ // during which it receives a heartbeat that would normally trigger it to become a candidate
+ // and respond with a StartElection HeartbeatResponseAction. However, since it is already in
+ // candidate state, it responds with a NoAction HeartbeatResponseAction. Then finishes by
+ // being winning the election.
+
+ // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
+ // 2. "host2" goes down, triggering an election.
+ // 3. "host2" comes back, which would normally trigger election, but since the
+ // TopologyCoordinator is already in candidate mode, does not.
+ // 4. TopologyCoordinator concludes its freshness round successfully and wins the election.
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ now() += Seconds(30); // we need to be more than LastVote::leaseTime from the start of time or
+ // else some Date_t math goes horribly awry
+
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(130, 0), 0);
+ OID round = OID::gen();
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // candidate time!
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // see the downed node as SECONDARY and decide to take no action, but are still a candidate
+ nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+
+ // normally this would trigger StartElection, but we are already a candidate
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // now voteForSelf as though we received all our fresh responses
+ ASSERT_TRUE(getTopoCoord().voteForMyself(now()++));
+
+ // now win election and ensure _electionId and _electionTime are set properly
+ getTopoCoord().processWinElection(round, election.getTimestamp());
+ ASSERT_EQUALS(round, getTopoCoord().getElectionId());
+ ASSERT_EQUALS(election.getTimestamp(), getTopoCoord().getElectionTime());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTest, ElectionVoteForAnotherNodeBeforeFreshnessReturns) {
+ // In this test, the TopologyCoordinator goes through the steps of an election. However,
+ // before its freshness round ends, it receives a fresh command followed by an elect command
+ // from another node, both of which it responds positively to. The TopologyCoordinator's
+ // freshness round then concludes successfully, but it fails to vote for itself, since it
+ // recently voted for another node.
+
+ // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
+ // 2. "host2" goes down, triggering an election.
+ // 3. "host3" sends a fresh command, which the TopologyCoordinator responds to positively.
+ // 4. "host3" sends an elect command, which the TopologyCoordinator responds to positively.
+ // 5. The TopologyCoordinator's concludes its freshness round successfully.
+ // 6. The TopologyCoordinator loses the election.
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ now() += Seconds(30); // we need to be more than LastVote::leaseTime from the start of time or
+ // else some Date_t math goes horribly awry
+
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(100, 0), 0);
+ OpTime fresherOpApplied = OpTime(Timestamp(200, 0), 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // candidate time!
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ Timestamp originalElectionTime = getTopoCoord().getElectionTime();
+ OID originalElectionId = getTopoCoord().getElectionId();
+ // prepare an incoming fresh command
+ ReplicationCoordinator::ReplSetFreshArgs freshArgs;
+ freshArgs.setName = "rs0";
+ freshArgs.cfgver = 5;
+ freshArgs.id = 2;
+ freshArgs.who = HostAndPort("host3");
+ freshArgs.opTime = fresherOpApplied.getTimestamp();
+
+ BSONObjBuilder freshResponseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ getTopoCoord().prepareFreshResponse(
+ freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result);
+ BSONObj response = freshResponseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(lastOpTimeApplied.getTimestamp(), Timestamp(response["opTime"].timestampValue()));
+ ASSERT_FALSE(response["fresher"].trueValue());
+ ASSERT_FALSE(response["veto"].trueValue());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ // make sure incoming fresh commands do not change electionTime and electionId
+ ASSERT_EQUALS(originalElectionTime, getTopoCoord().getElectionTime());
+ ASSERT_EQUALS(originalElectionId, getTopoCoord().getElectionId());
+
+ // an elect command comes in
+ ReplicationCoordinator::ReplSetElectArgs electArgs;
+ OID round = OID::gen();
+ electArgs.set = "rs0";
+ electArgs.round = round;
+ electArgs.cfgver = 5;
+ electArgs.whoid = 2;
+
+ BSONObjBuilder electResponseBuilder;
+ result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ electArgs, now()++, OpTime(), &electResponseBuilder, &result);
+ stopCapturingLogMessages();
+ response = electResponseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(1, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("voting yea for host3:27017 (2)"));
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ // make sure incoming elect commands do not change electionTime and electionId
+ ASSERT_EQUALS(originalElectionTime, getTopoCoord().getElectionTime());
+ ASSERT_EQUALS(originalElectionId, getTopoCoord().getElectionId());
+
+ // now voteForSelf as though we received all our fresh responses
+ ASSERT_FALSE(getTopoCoord().voteForMyself(now()++));
+
+ // receive a heartbeat indicating the other node was elected
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(2, getCurrentPrimaryIndex());
+ // make sure seeing a new primary does not change electionTime and electionId
+ ASSERT_EQUALS(originalElectionTime, getTopoCoord().getElectionTime());
+ ASSERT_EQUALS(originalElectionId, getTopoCoord().getElectionId());
+
+ // now lose election and ensure _electionTime and _electionId are 0'd out
+ getTopoCoord().processLoseElection();
+ ASSERT_EQUALS(OID(), getTopoCoord().getElectionId());
+ ASSERT_EQUALS(Timestamp(), getTopoCoord().getElectionTime());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(2, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTest, ElectionRespondToFreshBeforeOurFreshnessReturns) {
+ // In this test, the TopologyCoordinator goes through the steps of an election. However,
+ // before its freshness round ends, the TopologyCoordinator receives a fresh command from
+ // another node, which it responds positively to. Its freshness then ends successfully and
+ // it wins the election. The other node's elect command then comes in and is responded to
+ // negatively, maintaining the TopologyCoordinator's PRIMARY state.
+
+ // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
+ // 2. "host2" goes down, triggering an election.
+ // 3. "host3" sends a fresh command, which the TopologyCoordinator responds to positively.
+ // 4. The TopologyCoordinator concludes its freshness round successfully and wins
+ // the election.
+ // 5. "host3" sends an elect command, which the TopologyCoordinator responds to negatively.
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ now() += Seconds(30); // we need to be more than LastVote::leaseTime from the start of time or
+ // else some Date_t math goes horribly awry
+
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(100, 0), 0);
+ OpTime fresherLastOpTimeApplied = OpTime(Timestamp(200, 0), 0);
+ OID round = OID::gen();
+ OID remoteRound = OID::gen();
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // candidate time!
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // prepare an incoming fresh command
+ ReplicationCoordinator::ReplSetFreshArgs freshArgs;
+ freshArgs.setName = "rs0";
+ freshArgs.cfgver = 5;
+ freshArgs.id = 2;
+ freshArgs.who = HostAndPort("host3");
+ freshArgs.opTime = fresherLastOpTimeApplied.getTimestamp();
+
+ BSONObjBuilder freshResponseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ getTopoCoord().prepareFreshResponse(
+ freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result);
+ BSONObj response = freshResponseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(lastOpTimeApplied.getTimestamp(), Timestamp(response["opTime"].timestampValue()));
+ ASSERT_FALSE(response["fresher"].trueValue());
+ ASSERT_FALSE(response["veto"].trueValue());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // now voteForSelf as though we received all our fresh responses
+ ASSERT_TRUE(getTopoCoord().voteForMyself(now()++));
+ // now win election and ensure _electionId and _electionTime are set properly
+ getTopoCoord().processWinElection(round, election.getTimestamp());
+ ASSERT_EQUALS(round, getTopoCoord().getElectionId());
+ ASSERT_EQUALS(election.getTimestamp(), getTopoCoord().getElectionTime());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ // an elect command comes in
+ ReplicationCoordinator::ReplSetElectArgs electArgs;
+ electArgs.set = "rs0";
+ electArgs.round = remoteRound;
+ electArgs.cfgver = 5;
+ electArgs.whoid = 2;
+
+ BSONObjBuilder electResponseBuilder;
+ result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ electArgs, now()++, OpTime(), &electResponseBuilder, &result);
+ stopCapturingLogMessages();
+ response = electResponseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(remoteRound, response["round"].OID());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTest, ElectionCompleteElectionThenReceiveFresh) {
+ // In this test, the TopologyCoordinator goes through the steps of an election. After
+ // being successfully elected, a fresher node sends a fresh command, which the
+ // TopologyCoordinator responds positively to. The fresher node then sends an elect command,
+ // which the Topology coordinator negatively to since the TopologyCoordinator just elected
+ // itself.
+
+ // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
+ // 2. "host2" goes down, triggering an election.
+ // 3. The TopologyCoordinator concludes its freshness round successfully and wins
+ // the election.
+ // 4. "host3" sends a fresh command, which the TopologyCoordinator responds to positively.
+ // 5. "host3" sends an elect command, which the TopologyCoordinator responds to negatively.
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ now() += Seconds(30); // we need to be more than LastVote::leaseTime from the start of time or
+ // else some Date_t math goes horribly awry
+
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(100, 0), 0);
+ OpTime fresherLastOpTimeApplied = OpTime(Timestamp(200, 0), 0);
+ OID round = OID::gen();
+ OID remoteRound = OID::gen();
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // candidate time!
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // now voteForSelf as though we received all our fresh responses
+ ASSERT_TRUE(getTopoCoord().voteForMyself(now()++));
+ // now win election
+ getTopoCoord().processWinElection(round, election.getTimestamp());
+ ASSERT_EQUALS(0, getTopoCoord().getCurrentPrimaryIndex());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+
+ // prepare an incoming fresh command
+ ReplicationCoordinator::ReplSetFreshArgs freshArgs;
+ freshArgs.setName = "rs0";
+ freshArgs.cfgver = 5;
+ freshArgs.id = 2;
+ freshArgs.who = HostAndPort("host3");
+ freshArgs.opTime = fresherLastOpTimeApplied.getTimestamp();
+
+ BSONObjBuilder freshResponseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ getTopoCoord().prepareFreshResponse(
+ freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result);
+ BSONObj response = freshResponseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(lastOpTimeApplied.getTimestamp(), Timestamp(response["opTime"].timestampValue()));
+ ASSERT_FALSE(response["fresher"].trueValue());
+ ASSERT_TRUE(response["veto"].trueValue()) << response["errmsg"];
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ // an elect command comes in
+ ReplicationCoordinator::ReplSetElectArgs electArgs;
+ electArgs.set = "rs0";
+ electArgs.round = remoteRound;
+ electArgs.cfgver = 5;
+ electArgs.whoid = 2;
+
+ BSONObjBuilder electResponseBuilder;
+ result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ electArgs, now()++, OpTime(), &electResponseBuilder, &result);
+ stopCapturingLogMessages();
+ response = electResponseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(remoteRound, response["round"].OID());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityOfVotersUp) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"
+ << "votes" << 0)
+ << BSON("_id" << 3 << "host"
+ << "host4:27017"
+ << "votes" << 0) << BSON("_id" << 4 << "host"
+ << "host5:27017"
+ << "votes" << 0)
+ << BSON("_id" << 5 << "host"
+ << "host6:27017"
+ << "votes" << 0) << BSON("_id" << 6 << "host"
+ << "host7:27017"))
+ << "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ OpTime election = OpTime(Timestamp(400, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ // make sure all non-voting nodes are down, that way we do not have a majority of nodes
+ // but do have a majority of votes since one of two voting members is up and so are we
+ nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveDownHeartbeat(HostAndPort("host4"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveDownHeartbeat(HostAndPort("host5"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveDownHeartbeat(HostAndPort("host6"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveUpHeartbeat(HostAndPort("host7"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataRelinquishPrimaryDueToNodeDisappearing) {
+ // become PRIMARY
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(Timestamp(2, 0));
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ // become aware of other nodes
+ heartbeatFromMember(
+ HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(1, 0), 0));
+ heartbeatFromMember(
+ HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(1, 0), 0));
+ heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, OpTime());
+ heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, OpTime());
+
+ // lose that awareness and be sure we are going to stepdown
+ HeartbeatResponseAction nextAction =
+ receiveDownHeartbeat(HostAndPort("host2"), "rs0", OpTime(Timestamp(100, 0), 0));
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", OpTime(Timestamp(100, 0), 0));
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction());
+ ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex());
+ // Doesn't actually do the stepdown until stepDownIfPending is called
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ ASSERT_TRUE(getTopoCoord().stepDownIfPending());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataRemoteDoesNotExist) {
+ OpTime election = OpTime(Timestamp(5, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host9"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+class PrepareElectResponseTest : public TopoCoordTest {
+public:
+ PrepareElectResponseTest()
+ : round(OID::gen()), cbData(NULL, ReplicationExecutor::CallbackHandle(), Status::OK()) {}
+
+ virtual void setUp() {
+ TopoCoordTest::setUp();
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 10 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "hself")
+ << BSON("_id" << 1 << "host"
+ << "h1") << BSON("_id" << 2 << "host"
+ << "h2"
+ << "priority" << 10)
+ << BSON("_id" << 3 << "host"
+ << "h3"
+ << "priority" << 10))),
0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- OpTime election = OpTime(Timestamp(400,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(300,0), 0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- // make sure all non-voting nodes are down, that way we do not have a majority of nodes
- // but do have a majority of votes since one of two voting members is up and so are we
- nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction = receiveDownHeartbeat(HostAndPort("host4"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction = receiveDownHeartbeat(HostAndPort("host5"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction = receiveDownHeartbeat(HostAndPort("host6"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction = receiveUpHeartbeat(HostAndPort("host7"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataRelinquishPrimaryDueToNodeDisappearing) {
- // become PRIMARY
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(Timestamp(2,0));
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- // become aware of other nodes
- heartbeatFromMember(HostAndPort("host2"), "rs0",
- MemberState::RS_SECONDARY, OpTime(Timestamp(1,0), 0));
- heartbeatFromMember(HostAndPort("host2"), "rs0",
- MemberState::RS_SECONDARY, OpTime(Timestamp(1,0), 0));
- heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, OpTime());
- heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, OpTime());
-
- // lose that awareness and be sure we are going to stepdown
- HeartbeatResponseAction nextAction = receiveDownHeartbeat(HostAndPort("host2"),
- "rs0",
- OpTime(Timestamp(100, 0), 0));
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction =
- receiveDownHeartbeat(HostAndPort("host3"), "rs0", OpTime(Timestamp(100, 0), 0));
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction());
- ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex());
- // Doesn't actually do the stepdown until stepDownIfPending is called
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- ASSERT_TRUE(getTopoCoord().stepDownIfPending());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataRemoteDoesNotExist) {
- OpTime election = OpTime(Timestamp(5,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(3,0), 0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host9"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
-
- class PrepareElectResponseTest : public TopoCoordTest {
- public:
-
- PrepareElectResponseTest() :
- round(OID::gen()),
- cbData(NULL, ReplicationExecutor::CallbackHandle(), Status::OK()) {}
-
- virtual void setUp() {
- TopoCoordTest::setUp();
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 10 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "hself") <<
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 <<
- "host" << "h2" <<
- "priority" << 10) <<
- BSON("_id" << 3 <<
- "host" << "h3" <<
- "priority" << 10))),
- 0);
- }
-
- protected:
- Date_t now;
- OID round;
- ReplicationExecutor::CallbackArgs cbData;
- };
-
- TEST_F(PrepareElectResponseTest, ElectResponseIncorrectReplSetName) {
- // Test with incorrect replset name
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "fakeset";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 1;
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args,
- now += Seconds(60),
- OpTime(),
- &responseBuilder,
- &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(0, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1,
- countLogLinesContaining("received an elect request for 'fakeset' but our "
- "set name is 'rs0'"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- args.set = "rs0";
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseOurConfigStale) {
- // Test with us having a stale config version
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 20;
- args.whoid = 1;
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args,
- now += Seconds(60),
- OpTime(),
- &responseBuilder,
- &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(0, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1,
- countLogLinesContaining("not voting because our config version is stale"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- args.cfgver = 10;
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseTheirConfigStale) {
- // Test with them having a stale config version
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 5;
- args.whoid = 1;
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args,
- now += Seconds(60),
- OpTime(),
- &responseBuilder,
- &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1,
- countLogLinesContaining("received stale config version # during election"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- args.cfgver = 10;
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseNonExistentNode) {
- // Test with a non-existent node
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 99;
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args,
- now += Seconds(60),
- OpTime(),
- &responseBuilder,
- &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("couldn't find member with id 99"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- args.whoid = 1;
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
}
- TEST_F(PrepareElectResponseTest, ElectResponseWeArePrimary) {
- // Test when we are already primary
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 1;
-
- getTopoCoord()._setCurrentPrimaryForTest(0);
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args,
- now += Seconds(60),
- OpTime(),
- &responseBuilder,
- &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("I am already primary"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- getTopoCoord()._setCurrentPrimaryForTest(-1);
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseSomeoneElseIsPrimary) {
- // Test when someone else is already primary
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 1;
- getTopoCoord()._setCurrentPrimaryForTest(2);
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args,
- now += Seconds(60),
- OpTime(),
- &responseBuilder,
- &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("h2:27017 is already primary"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- getTopoCoord()._setCurrentPrimaryForTest(-1);
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseNotHighestPriority) {
- // Test trying to elect someone who isn't the highest priority node
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 1;
-
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime());
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args,
- now += Seconds(60),
- OpTime(),
- &responseBuilder,
- &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("h1:27017 has lower priority than h3:27017"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- args.whoid = 3;
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseHighestPriorityOfLiveNodes) {
- // Test trying to elect someone who isn't the highest priority node, but all higher nodes
- // are down
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 1;
-
- receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
- receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime());
-
- BSONObjBuilder responseBuilder;
- Status result = Status::OK();
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args,
- now += Seconds(60),
- OpTime(),
- &responseBuilder,
- &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_EQUALS(1, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseValidVotes) {
- // Test a valid vote
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 2;
- now = Date_t::fromMillisSinceEpoch(100);
-
- BSONObjBuilder responseBuilder1;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args,
- now += Seconds(60),
- OpTime(),
- &responseBuilder1,
- &result);
- stopCapturingLogMessages();
- BSONObj response1 = responseBuilder1.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(1, response1["vote"].Int());
- ASSERT_EQUALS(round, response1["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("voting yea for h2:27017 (2)"));
-
- // Test what would be a valid vote except that we already voted too recently
- args.whoid = 3;
-
- BSONObjBuilder responseBuilder2;
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args, now, OpTime(), &responseBuilder2, &result);
- stopCapturingLogMessages();
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(0, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("voting no for h3:27017; "
- "voted for h2:27017 0 secs ago"));
-
- // Test that after enough time passes the same vote can proceed
- now += Seconds(30) + Milliseconds(1); // just over 30 seconds later
-
- BSONObjBuilder responseBuilder3;
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder3, &result);
- stopCapturingLogMessages();
- BSONObj response3 = responseBuilder3.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(1, response3["vote"].Int());
- ASSERT_EQUALS(round, response3["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("voting yea for h3:27017 (3)"));
- }
-
- TEST_F(TopoCoordTest, ElectResponseNotInConfig) {
- ReplicationCoordinator::ReplSetElectArgs args;
- BSONObjBuilder response;
- Status status = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- getTopoCoord().prepareElectResponse(args, now(), OpTime(), &response, &status);
- ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status);
- ASSERT_EQUALS("Cannot participate in election because not initialized", status.reason());
- }
-
- class PrepareFreezeResponseTest : public TopoCoordTest {
- public:
-
- virtual void setUp() {
- TopoCoordTest::setUp();
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017"))),
- 0);
- }
-
- BSONObj prepareFreezeResponse(int duration) {
- BSONObjBuilder response;
- startCapturingLogMessages();
- getTopoCoord().prepareFreezeResponse(now()++, duration, &response);
- stopCapturingLogMessages();
- return response.obj();
- }
- };
-
- TEST_F(PrepareFreezeResponseTest, UnfreezeEvenWhenNotFrozen) {
- BSONObj response = prepareFreezeResponse(0);
- ASSERT_EQUALS("unfreezing", response["info"].String());
- ASSERT_EQUALS(1, countLogLinesContaining("'unfreezing'"));
- // 1 instead of 0 because it assigns to "now" in this case
- ASSERT_EQUALS(1LL, getTopoCoord().getStepDownTime().asInt64());
- }
-
- TEST_F(PrepareFreezeResponseTest, FreezeForOneSecond) {
- BSONObj response = prepareFreezeResponse(1);
- ASSERT_EQUALS("you really want to freeze for only 1 second?",
- response["warning"].String());
- ASSERT_EQUALS(1, countLogLinesContaining("'freezing' for 1 seconds"));
- // 1001 because "now" was incremented once during initialization + 1000 ms wait
- ASSERT_EQUALS(1001LL, getTopoCoord().getStepDownTime().asInt64());
- }
-
- TEST_F(PrepareFreezeResponseTest, FreezeForManySeconds) {
- BSONObj response = prepareFreezeResponse(20);
- ASSERT_TRUE(response.isEmpty());
- ASSERT_EQUALS(1, countLogLinesContaining("'freezing' for 20 seconds"));
- // 20001 because "now" was incremented once during initialization + 20000 ms wait
- ASSERT_EQUALS(20001LL, getTopoCoord().getStepDownTime().asInt64());
- }
-
- TEST_F(PrepareFreezeResponseTest, UnfreezeEvenWhenNotFrozenWhilePrimary) {
- makeSelfPrimary();
- BSONObj response = prepareFreezeResponse(0);
- ASSERT_EQUALS("unfreezing", response["info"].String());
- // doesn't mention being primary in this case for some reason
- ASSERT_EQUALS(0, countLogLinesContaining(
- "received freeze command but we are primary"));
- // 1 instead of 0 because it assigns to "now" in this case
- ASSERT_EQUALS(1LL, getTopoCoord().getStepDownTime().asInt64());
- }
-
- TEST_F(PrepareFreezeResponseTest, FreezeForOneSecondWhilePrimary) {
- makeSelfPrimary();
- BSONObj response = prepareFreezeResponse(1);
- ASSERT_EQUALS("you really want to freeze for only 1 second?",
- response["warning"].String());
- ASSERT_EQUALS(1, countLogLinesContaining(
- "received freeze command but we are primary"));
- ASSERT_EQUALS(0LL, getTopoCoord().getStepDownTime().asInt64());
- }
-
- TEST_F(PrepareFreezeResponseTest, FreezeForManySecondsWhilePrimary) {
- makeSelfPrimary();
- BSONObj response = prepareFreezeResponse(20);
- ASSERT_TRUE(response.isEmpty());
- ASSERT_EQUALS(1, countLogLinesContaining(
- "received freeze command but we are primary"));
- ASSERT_EQUALS(0LL, getTopoCoord().getStepDownTime().asInt64());
- }
-
- TEST_F(TopoCoordTest, UnfreezeWhileLoneNode) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "host1:27017"))),
+protected:
+ Date_t now;
+ OID round;
+ ReplicationExecutor::CallbackArgs cbData;
+};
+
+TEST_F(PrepareElectResponseTest, ElectResponseIncorrectReplSetName) {
+ // Test with incorrect replset name
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "fakeset";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 1;
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ args, now += Seconds(60), OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(0, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "received an elect request for 'fakeset' but our "
+ "set name is 'rs0'"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ args.set = "rs0";
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseOurConfigStale) {
+ // Test with us having a stale config version
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 20;
+ args.whoid = 1;
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ args, now += Seconds(60), OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(0, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("not voting because our config version is stale"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ args.cfgver = 10;
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseTheirConfigStale) {
+ // Test with them having a stale config version
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 5;
+ args.whoid = 1;
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ args, now += Seconds(60), OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("received stale config version # during election"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ args.cfgver = 10;
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseNonExistentNode) {
+ // Test with a non-existent node
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 99;
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ args, now += Seconds(60), OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("couldn't find member with id 99"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ args.whoid = 1;
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseWeArePrimary) {
+ // Test when we are already primary
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 1;
+
+ getTopoCoord()._setCurrentPrimaryForTest(0);
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ args, now += Seconds(60), OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("I am already primary"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ getTopoCoord()._setCurrentPrimaryForTest(-1);
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseSomeoneElseIsPrimary) {
+ // Test when someone else is already primary
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 1;
+ getTopoCoord()._setCurrentPrimaryForTest(2);
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ args, now += Seconds(60), OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("h2:27017 is already primary"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ getTopoCoord()._setCurrentPrimaryForTest(-1);
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseNotHighestPriority) {
+ // Test trying to elect someone who isn't the highest priority node
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 1;
+
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime());
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ args, now += Seconds(60), OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("h1:27017 has lower priority than h3:27017"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ args.whoid = 3;
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseHighestPriorityOfLiveNodes) {
+ // Test trying to elect someone who isn't the highest priority node, but all higher nodes
+ // are down
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 1;
+
+ receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
+ receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime());
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status::OK();
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ args, now += Seconds(60), OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_EQUALS(1, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseValidVotes) {
+ // Test a valid vote
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 2;
+ now = Date_t::fromMillisSinceEpoch(100);
+
+ BSONObjBuilder responseBuilder1;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ args, now += Seconds(60), OpTime(), &responseBuilder1, &result);
+ stopCapturingLogMessages();
+ BSONObj response1 = responseBuilder1.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(1, response1["vote"].Int());
+ ASSERT_EQUALS(round, response1["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("voting yea for h2:27017 (2)"));
+
+ // Test what would be a valid vote except that we already voted too recently
+ args.whoid = 3;
+
+ BSONObjBuilder responseBuilder2;
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(args, now, OpTime(), &responseBuilder2, &result);
+ stopCapturingLogMessages();
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(0, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "voting no for h3:27017; "
+ "voted for h2:27017 0 secs ago"));
+
+ // Test that after enough time passes the same vote can proceed
+ now += Seconds(30) + Milliseconds(1); // just over 30 seconds later
+
+ BSONObjBuilder responseBuilder3;
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder3, &result);
+ stopCapturingLogMessages();
+ BSONObj response3 = responseBuilder3.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(1, response3["vote"].Int());
+ ASSERT_EQUALS(round, response3["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("voting yea for h3:27017 (3)"));
+}
+
+TEST_F(TopoCoordTest, ElectResponseNotInConfig) {
+ ReplicationCoordinator::ReplSetElectArgs args;
+ BSONObjBuilder response;
+ Status status = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ getTopoCoord().prepareElectResponse(args, now(), OpTime(), &response, &status);
+ ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status);
+ ASSERT_EQUALS("Cannot participate in election because not initialized", status.reason());
+}
+
+class PrepareFreezeResponseTest : public TopoCoordTest {
+public:
+ virtual void setUp() {
+ TopoCoordTest::setUp();
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017"))),
0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- BSONObjBuilder response;
- getTopoCoord().prepareFreezeResponse(now()++, 20, &response);
- ASSERT(response.obj().isEmpty());
- BSONObjBuilder response2;
- getTopoCoord().prepareFreezeResponse(now()++, 0, &response2);
- ASSERT_EQUALS("unfreezing", response2.obj()["info"].String());
- ASSERT(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
}
- class ShutdownInProgressTest : public TopoCoordTest {
- public:
-
- ShutdownInProgressTest() :
- ourCbData(NULL,
- ReplicationExecutor::CallbackHandle(),
- Status(ErrorCodes::CallbackCanceled, "")) {}
-
- virtual ReplicationExecutor::CallbackArgs cbData() { return ourCbData; }
-
- private:
- ReplicationExecutor::CallbackArgs ourCbData;
- };
-
- TEST_F(ShutdownInProgressTest, ShutdownInProgressWhenCallbackCanceledSyncFrom) {
- Status result = Status::OK();
- BSONObjBuilder response;
- getTopoCoord().prepareSyncFromResponse(cbData(),
- HostAndPort("host2:27017"),
- OpTime(),
- &response,
- &result);
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, result);
- ASSERT_TRUE(response.obj().isEmpty());
-
- }
-
- TEST_F(ShutdownInProgressTest, ShutDownInProgressWhenCallbackCanceledStatus) {
- Status result = Status::OK();
+ BSONObj prepareFreezeResponse(int duration) {
BSONObjBuilder response;
- getTopoCoord().prepareStatusResponse(cbData(),
- Date_t(),
- 0,
- OpTime(),
- &response,
- &result);
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, result);
- ASSERT_TRUE(response.obj().isEmpty());
- }
-
- class PrepareHeartbeatResponseTest : public TopoCoordTest {
- public:
-
- virtual void setUp() {
- TopoCoordTest::setUp();
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
- }
-
- void prepareHeartbeatResponse(const ReplSetHeartbeatArgs& args,
- OpTime lastOpApplied,
- ReplSetHeartbeatResponse* response,
- Status* result) {
- *result = getTopoCoord().prepareHeartbeatResponse(now()++,
- args,
- "rs0",
- lastOpApplied,
- response);
- }
-
- };
-
- class PrepareHeartbeatResponseV1Test : public TopoCoordTest {
- public:
-
- virtual void setUp() {
- TopoCoordTest::setUp();
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3")) <<
- "protocolVersion" << 1),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
- }
-
- void prepareHeartbeatResponseV1(const ReplSetHeartbeatArgsV1& args,
- OpTime lastOpApplied,
- ReplSetHeartbeatResponse* response,
- Status* result) {
- *result = getTopoCoord().prepareHeartbeatResponseV1(now()++,
- args,
- "rs0",
- lastOpApplied,
- response);
- }
-
- };
-
- TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseBadSetName) {
- // set up args with incorrect replset name
- ReplSetHeartbeatArgsV1 args;
- args.setSetName("rs1");
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- startCapturingLogMessages();
- prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
- stopCapturingLogMessages();
- ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, result);
- ASSERT(result.reason().find("repl set names do not match")) << "Actual string was \"" <<
- result.reason() << '"';
- ASSERT_EQUALS(1,
- countLogLinesContaining("replSet set names do not match, ours: rs0; remote "
- "node's: rs1"));
- // only protocolVersion should be set in this failure case
- ASSERT_EQUALS("", response.getReplicaSetName());
- }
-
- TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseWhenOutOfSet) {
- // reconfig self out of set
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3")) <<
- "protocolVersion" << 1),
- -1);
- ReplSetHeartbeatArgsV1 args;
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
- prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, result);
- ASSERT(result.reason().find("replica set configuration is invalid or does not include us"))
- << "Actual string was \"" << result.reason() << '"';
- // only protocolVersion should be set in this failure case
- ASSERT_EQUALS("", response.getReplicaSetName());
- }
-
- TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseFromSelf) {
- // set up args with our id as the senderId
- ReplSetHeartbeatArgsV1 args;
- args.setSetName("rs0");
- args.setSenderId(10);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
- prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
- ASSERT_EQUALS(ErrorCodes::BadValue, result);
- ASSERT(result.reason().find("from member with the same member ID as our self")) <<
- "Actual string was \"" << result.reason() << '"';
- // only protocolVersion should be set in this failure case
- ASSERT_EQUALS("", response.getReplicaSetName());
- }
-
- TEST_F(TopoCoordTest, PrepareHeartbeatResponseV1NoConfigYet) {
- // set up args and acknowledge sender
- ReplSetHeartbeatArgsV1 args;
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- // prepare response and check the results
- Status result = getTopoCoord().prepareHeartbeatResponseV1(now()++,
- args,
- "rs0",
- OpTime(),
- &response);
- ASSERT_OK(result);
- // this change to true because we can now see a majority, unlike in the previous cases
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(MemberState::RS_STARTUP, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
- ASSERT_EQUALS(0, response.getTerm());
- ASSERT_EQUALS(-2, response.getConfigVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseSenderIDMissing) {
- // set up args without a senderID
- ReplSetHeartbeatArgsV1 args;
- args.setSetName("rs0");
- args.setConfigVersion(1);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
- ASSERT_OK(result);
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
- ASSERT_EQUALS(0, response.getTerm());
- ASSERT_EQUALS(1, response.getConfigVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseSenderIDNotInConfig) {
- // set up args with a senderID which is not present in our config
- ReplSetHeartbeatArgsV1 args;
- args.setSetName("rs0");
- args.setConfigVersion(1);
- args.setSenderId(2);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
- ASSERT_OK(result);
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
- ASSERT_EQUALS(0, response.getTerm());
- ASSERT_EQUALS(1, response.getConfigVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseConfigVersionLow) {
- // set up args with a config version lower than ours
- ReplSetHeartbeatArgsV1 args;
- args.setConfigVersion(0);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
- ASSERT_OK(result);
- ASSERT_TRUE(response.hasConfig());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
- ASSERT_EQUALS(0, response.getTerm());
- ASSERT_EQUALS(1, response.getConfigVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseConfigVersionHigh) {
- // set up args with a config version higher than ours
- ReplSetHeartbeatArgsV1 args;
- args.setConfigVersion(10);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
- ASSERT_OK(result);
- ASSERT_FALSE(response.hasConfig());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
- ASSERT_EQUALS(0, response.getTerm());
- ASSERT_EQUALS(1, response.getConfigVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseAsPrimary) {
- makeSelfPrimary(Timestamp(10,0));
-
- ReplSetHeartbeatArgsV1 args;
- args.setConfigVersion(1);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponseV1(args, OpTime(Timestamp(11,0), 0), &response, &result);
- ASSERT_OK(result);
- ASSERT_FALSE(response.hasConfig());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, response.getState().s);
- ASSERT_EQUALS(OpTime(Timestamp(11,0), 0), response.getOpTime());
- ASSERT_EQUALS(0, response.getTerm());
- ASSERT_EQUALS(1, response.getConfigVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseWithSyncSource) {
- // get a sync source
- heartbeatFromMember(HostAndPort("h3"), "rs0",
- MemberState::RS_SECONDARY, OpTime());
- heartbeatFromMember(HostAndPort("h3"), "rs0",
- MemberState::RS_SECONDARY, OpTime());
- heartbeatFromMember(HostAndPort("h2"), "rs0",
- MemberState::RS_SECONDARY, OpTime(Timestamp(1,0), 0));
- heartbeatFromMember(HostAndPort("h2"), "rs0",
- MemberState::RS_SECONDARY, OpTime(Timestamp(1,0), 0));
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
-
- // set up args
- ReplSetHeartbeatArgsV1 args;
- args.setConfigVersion(1);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponseV1(args, OpTime(Timestamp(100,0), 0), &response, &result);
- ASSERT_OK(result);
- ASSERT_FALSE(response.hasConfig());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(Timestamp(100,0), 0), response.getOpTime());
- ASSERT_EQUALS(0, response.getTerm());
- ASSERT_EQUALS(1, response.getConfigVersion());
- ASSERT_EQUALS(HostAndPort("h2"), response.getSyncingTo());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseBadProtocolVersion) {
- // set up args with bad protocol version
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(3);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(), &response, &result);
- ASSERT_EQUALS(ErrorCodes::BadValue, result);
- ASSERT_EQUALS("replset: incompatible replset protocol version: 3", result.reason());
- ASSERT_EQUALS("", response.getHbMsg());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseFromSelf) {
- // set up args with incorrect replset name
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setSetName("rs0");
- args.setSenderId(10);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
- prepareHeartbeatResponse(args, OpTime(), &response, &result);
- ASSERT_EQUALS(ErrorCodes::BadValue, result);
- ASSERT(result.reason().find("from member with the same member ID as our self")) <<
- "Actual string was \"" << result.reason() << '"';
- ASSERT_EQUALS("", response.getHbMsg());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseBadSetName) {
- // set up args with incorrect replset name
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setSetName("rs1");
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- startCapturingLogMessages();
- prepareHeartbeatResponse(args, OpTime(), &response, &result);
- stopCapturingLogMessages();
- ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, result);
- ASSERT(result.reason().find("repl set names do not match")) << "Actual string was \"" <<
- result.reason() << '"';
- ASSERT_EQUALS(1,
- countLogLinesContaining("replSet set names do not match, ours: rs0; remote "
- "node's: rs1"));
- ASSERT_TRUE(response.isMismatched());
- ASSERT_EQUALS("", response.getHbMsg());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderIDMissing) {
- // set up args without a senderID
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setSetName("rs0");
- args.setConfigVersion(1);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(), &response, &result);
- ASSERT_OK(result);
- ASSERT_FALSE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
- ASSERT_EQUALS(0, response.getTime().count());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getConfigVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderIDNotInConfig) {
- // set up args with a senderID which is not present in our config
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setSetName("rs0");
- args.setConfigVersion(1);
- args.setSenderId(2);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(), &response, &result);
- ASSERT_OK(result);
- ASSERT_FALSE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
- ASSERT_EQUALS(0, response.getTime().count());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getConfigVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseConfigVersionLow) {
- // set up args with a config version lower than ours
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(0);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(), &response, &result);
- ASSERT_OK(result);
- ASSERT_TRUE(response.hasConfig());
- ASSERT_FALSE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
- ASSERT_EQUALS(0, response.getTime().count());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getConfigVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseConfigVersionHigh) {
- // set up args with a config version higher than ours
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(10);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(), &response, &result);
- ASSERT_OK(result);
- ASSERT_FALSE(response.hasConfig());
- ASSERT_FALSE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
- ASSERT_EQUALS(0, response.getTime().count());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getConfigVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderDown) {
- // set up args with sender down from our perspective
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(1);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(), &response, &result);
- ASSERT_OK(result);
- ASSERT_FALSE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
- ASSERT_EQUALS(0, response.getTime().count());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getConfigVersion());
- ASSERT_TRUE(response.isStateDisagreement());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderUp) {
- // set up args and acknowledge sender
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime());
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(1);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(Timestamp(100,0), 0), &response, &result);
- ASSERT_OK(result);
- // this change to true because we can now see a majority, unlike in the previous cases
- ASSERT_TRUE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(Timestamp(100,0), 0), response.getOpTime());
- ASSERT_EQUALS(0, response.getTime().count());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getConfigVersion());
- }
-
- TEST_F(TopoCoordTest, PrepareHeartbeatResponseNoConfigYet) {
- // set up args and acknowledge sender
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(1);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- // prepare response and check the results
- Status result = getTopoCoord().prepareHeartbeatResponse(now()++,
- args,
- "rs0",
- OpTime(),
- &response);
- ASSERT_OK(result);
- // this change to true because we can now see a majority, unlike in the previous cases
- ASSERT_FALSE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_STARTUP, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
- ASSERT_EQUALS(0, response.getTime().count());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(-2, response.getConfigVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseAsPrimary) {
- makeSelfPrimary(Timestamp(10,0));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime());
-
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(1);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(Timestamp(11,0), 0), &response, &result);
- ASSERT_OK(result);
- // electable because we are already primary
- ASSERT_TRUE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, response.getState().s);
- ASSERT_EQUALS(OpTime(Timestamp(11,0), 0), response.getOpTime());
- ASSERT_EQUALS(Timestamp(10,0), response.getElectionTime());
- ASSERT_EQUALS(0, response.getTime().count());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getConfigVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseWithSyncSource) {
- // get a sync source
- heartbeatFromMember(HostAndPort("h3"), "rs0",
- MemberState::RS_SECONDARY, OpTime());
- heartbeatFromMember(HostAndPort("h3"), "rs0",
- MemberState::RS_SECONDARY, OpTime());
- heartbeatFromMember(HostAndPort("h2"), "rs0",
- MemberState::RS_SECONDARY, OpTime(Timestamp(1,0), 0));
- heartbeatFromMember(HostAndPort("h2"), "rs0",
- MemberState::RS_SECONDARY, OpTime(Timestamp(1,0), 0));
- getTopoCoord().chooseNewSyncSource(now()++, OpTime());
-
- // set up args
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(1);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(Timestamp(100,0), 0), &response, &result);
- ASSERT_OK(result);
- ASSERT_TRUE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(Timestamp(100,0), 0), response.getOpTime());
- ASSERT_EQUALS(0, response.getTime().count());
- // changed to a syncing message because our sync source changed recently
- ASSERT_EQUALS("syncing from: h2:27017", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getConfigVersion());
- ASSERT_EQUALS(HostAndPort("h2"), response.getSyncingTo());
- }
-
- TEST_F(TopoCoordTest, SetFollowerSecondaryWhenLoneNode) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "hself"))),
- 0);
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
-
- // if we are the only node, we should become a candidate when we transition to SECONDARY
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, CandidateWhenLoneSecondaryNodeReconfig) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- ReplicaSetConfig cfg;
- cfg.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "hself" << "priority" << 0))));
- getTopoCoord().updateConfig(cfg, 0, now()++, OpTime());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
-
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
-
- // we should become a candidate when we reconfig to become electable
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "hself"))),
- 0);
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- }
-
- TEST_F(TopoCoordTest, SetFollowerSecondaryWhenLoneUnelectableNode) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- ReplicaSetConfig cfg;
- cfg.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "hself" << "priority" << 0))));
-
- getTopoCoord().updateConfig(cfg, 0, now()++, OpTime());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
-
- // despite being the only node, we are unelectable, so we should not become a candidate
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, ReconfigToBeAddedToTheSet) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- // config to be absent from the set
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- -1);
- // should become removed since we are not in the set
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_REMOVED, getTopoCoord().getMemberState().s);
-
- // reconfig to add to set
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
- // having been added to the config, we should no longer be REMOVED and should enter STARTUP2
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, ReconfigToBeRemovedFromTheSet) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
-
- // reconfig to remove self
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- -1);
- // should become removed since we are no longer in the set
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_REMOVED, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, ReconfigToBeRemovedFromTheSetAsPrimary) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017"))),
- 0);
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // win election and primary
- getTopoCoord().processWinElection(OID::gen(), Timestamp());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
-
- // reconfig to remove self
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- -1);
- // should become removed since we are no longer in the set even though we were primary
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_REMOVED, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, ReconfigCanNoLongerBePrimary) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017"))),
- 0);
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // win election and primary
- getTopoCoord().processWinElection(OID::gen(), Timestamp());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
-
- // now lose primary due to loss of electability
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017" << "priority" << 0) <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, ReconfigContinueToBePrimary) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017"))),
- 0);
-
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // win election and primary
- getTopoCoord().processWinElection(OID::gen(), Timestamp());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
-
- // Now reconfig in ways that leave us electable and ensure we are still the primary.
- // Add hosts
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0,
- Date_t::fromMillisSinceEpoch(-1),
- OpTime(Timestamp(10,0), 0));
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
-
- // Change priorities and tags
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017" << "priority" << 10) <<
- BSON("_id" << 1 <<
- "host" << "host2:27017" <<
- "priority" << 5 <<
- "tags" << BSON("dc" << "NA" << "rack" << "rack1")))),
- 0,
- Date_t::fromMillisSinceEpoch(-1),
- OpTime(Timestamp(10,0), 0));
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, ReconfigKeepSecondary) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "host1:27017") <<
- BSON("_id" << 2 << "host" << "host2:27017"))),
- 0);
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
- setSelfMemberState(MemberState::RS_SECONDARY);
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
-
- // reconfig and stay secondary
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(HeartbeatResponseTest, ReconfigBetweenHeartbeatRequestAndRepsonse) {
- OpTime election = OpTime(Timestamp(14,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(13,0), 0);
-
- // all three members up and secondaries
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // now request from host3 and receive after host2 has been removed via reconfig
- getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host3"));
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
-
- ReplSetHeartbeatResponse hb;
- hb.initialize(BSON("ok" << 1 <<
- "v" << 1 <<
- "state" << MemberState::RS_PRIMARY), 0);
- hb.setOpTime(lastOpTimeApplied);
- hb.setElectionTime(election.getTimestamp());
- StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
- HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(now()++,
- Milliseconds(0),
- HostAndPort("host3"),
- hbResponse,
- lastOpTimeApplied);
-
- // now primary should be host3, index 1, and we should perform NoAction in response
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(action.getAction());
- }
-
- TEST_F(HeartbeatResponseTest, ReconfigNodeRemovedBetweenHeartbeatRequestAndRepsonse) {
- OpTime election = OpTime(Timestamp(14,0), 0);
- OpTime lastOpTimeApplied = OpTime(Timestamp(13,0), 0);
-
- // all three members up and secondaries
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // now request from host3 and receive after host2 has been removed via reconfig
- getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host3"));
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017"))),
- 0);
-
- ReplSetHeartbeatResponse hb;
- hb.initialize(BSON("ok" << 1 <<
- "v" << 1 <<
- "state" << MemberState::RS_PRIMARY), 0);
- hb.setOpTime(lastOpTimeApplied);
- hb.setElectionTime(election.getTimestamp());
- StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
- HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(now()++,
- Milliseconds(0),
- HostAndPort("host3"),
- hbResponse,
- lastOpTimeApplied);
-
- // primary should not be set and we should perform NoAction in response
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(action.getAction());
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceMemberNotInConfig) {
- // In this test, the TopologyCoordinator should tell us to change sync sources away from
- // "host4" since "host4" is absent from the config
- ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host4"), now()));
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceMemberHasYetToHeartbeat) {
- // In this test, the TopologyCoordinator should not tell us to change sync sources away from
- // "host2" since we do not yet have a heartbeat (and as a result do not yet have an optime)
- // for "host2"
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherHappierMemberExists) {
- // In this test, the TopologyCoordinator should tell us to change sync sources away from
- // "host2" and to "host3" since "host2" is more than maxSyncSourceLagSecs(30) behind "host3"
- OpTime election = OpTime();
- OpTime lastOpTimeApplied = OpTime(Timestamp(4,0), 0);
- // ahead by more than maxSyncSourceLagSecs (30)
- OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005,0), 0);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- fresherLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // set up complete, time for actual check
- startCapturingLogMessages();
- ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("changing sync target"));
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberIsBlackListed) {
- // In this test, the TopologyCoordinator should not tell us to change sync sources away from
- // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
- // "host3", since "host3" is blacklisted
- // Then, confirm that unblacklisting only works if time has passed the blacklist time.
- OpTime election = OpTime();
- OpTime lastOpTimeApplied = OpTime(Timestamp(400,0), 0);
- // ahead by more than maxSyncSourceLagSecs (30)
- OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005,0), 0);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- fresherLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- getTopoCoord().blacklistSyncSource(HostAndPort("host3"), now() + Milliseconds(100));
-
- // set up complete, time for actual check
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
-
- // unblacklist with too early a time (node should remained blacklisted)
- getTopoCoord().unblacklistSyncSource(HostAndPort("host3"), now() + Milliseconds(90));
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
-
- // unblacklist and it should succeed
- getTopoCoord().unblacklistSyncSource(HostAndPort("host3"), now() + Milliseconds(100));
startCapturingLogMessages();
- ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+ getTopoCoord().prepareFreezeResponse(now()++, duration, &response);
stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("changing sync target"));
+ return response.obj();
}
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberIsDown) {
- // In this test, the TopologyCoordinator should not tell us to change sync sources away from
- // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
- // "host3", since "host3" is down
- OpTime election = OpTime();
- OpTime lastOpTimeApplied = OpTime(Timestamp(400,0), 0);
- // ahead by more than maxSyncSourceLagSecs (30)
- OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005,0), 0);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- fresherLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // set up complete, time for actual check
- nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+};
+
+TEST_F(PrepareFreezeResponseTest, UnfreezeEvenWhenNotFrozen) {
+ BSONObj response = prepareFreezeResponse(0);
+ ASSERT_EQUALS("unfreezing", response["info"].String());
+ ASSERT_EQUALS(1, countLogLinesContaining("'unfreezing'"));
+ // 1 instead of 0 because it assigns to "now" in this case
+ ASSERT_EQUALS(1LL, getTopoCoord().getStepDownTime().asInt64());
+}
+
+TEST_F(PrepareFreezeResponseTest, FreezeForOneSecond) {
+ BSONObj response = prepareFreezeResponse(1);
+ ASSERT_EQUALS("you really want to freeze for only 1 second?", response["warning"].String());
+ ASSERT_EQUALS(1, countLogLinesContaining("'freezing' for 1 seconds"));
+ // 1001 because "now" was incremented once during initialization + 1000 ms wait
+ ASSERT_EQUALS(1001LL, getTopoCoord().getStepDownTime().asInt64());
+}
+
+TEST_F(PrepareFreezeResponseTest, FreezeForManySeconds) {
+ BSONObj response = prepareFreezeResponse(20);
+ ASSERT_TRUE(response.isEmpty());
+ ASSERT_EQUALS(1, countLogLinesContaining("'freezing' for 20 seconds"));
+ // 20001 because "now" was incremented once during initialization + 20000 ms wait
+ ASSERT_EQUALS(20001LL, getTopoCoord().getStepDownTime().asInt64());
+}
+
+TEST_F(PrepareFreezeResponseTest, UnfreezeEvenWhenNotFrozenWhilePrimary) {
+ makeSelfPrimary();
+ BSONObj response = prepareFreezeResponse(0);
+ ASSERT_EQUALS("unfreezing", response["info"].String());
+ // doesn't mention being primary in this case for some reason
+ ASSERT_EQUALS(0, countLogLinesContaining("received freeze command but we are primary"));
+ // 1 instead of 0 because it assigns to "now" in this case
+ ASSERT_EQUALS(1LL, getTopoCoord().getStepDownTime().asInt64());
+}
+
+TEST_F(PrepareFreezeResponseTest, FreezeForOneSecondWhilePrimary) {
+ makeSelfPrimary();
+ BSONObj response = prepareFreezeResponse(1);
+ ASSERT_EQUALS("you really want to freeze for only 1 second?", response["warning"].String());
+ ASSERT_EQUALS(1, countLogLinesContaining("received freeze command but we are primary"));
+ ASSERT_EQUALS(0LL, getTopoCoord().getStepDownTime().asInt64());
+}
+
+TEST_F(PrepareFreezeResponseTest, FreezeForManySecondsWhilePrimary) {
+ makeSelfPrimary();
+ BSONObj response = prepareFreezeResponse(20);
+ ASSERT_TRUE(response.isEmpty());
+ ASSERT_EQUALS(1, countLogLinesContaining("received freeze command but we are primary"));
+ ASSERT_EQUALS(0LL, getTopoCoord().getStepDownTime().asInt64());
+}
+
+TEST_F(TopoCoordTest, UnfreezeWhileLoneNode) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ BSONObjBuilder response;
+ getTopoCoord().prepareFreezeResponse(now()++, 20, &response);
+ ASSERT(response.obj().isEmpty());
+ BSONObjBuilder response2;
+ getTopoCoord().prepareFreezeResponse(now()++, 0, &response2);
+ ASSERT_EQUALS("unfreezing", response2.obj()["info"].String());
+ ASSERT(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+}
+
+class ShutdownInProgressTest : public TopoCoordTest {
+public:
+ ShutdownInProgressTest()
+ : ourCbData(NULL,
+ ReplicationExecutor::CallbackHandle(),
+ Status(ErrorCodes::CallbackCanceled, "")) {}
+
+ virtual ReplicationExecutor::CallbackArgs cbData() {
+ return ourCbData;
}
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberIsNotReadable) {
- // In this test, the TopologyCoordinator should not tell us to change sync sources away from
- // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
- // "host3", since "host3" is in a non-readable mode (RS_ROLLBACK)
- OpTime election = OpTime();
- OpTime lastOpTimeApplied = OpTime(Timestamp(4,0), 0);
- // ahead by more than m, 0)axSyncSourceLagSecs (30)
- OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005,0), 0);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_ROLLBACK,
- election,
- fresherLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // set up complete, time for actual check
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberDoesNotBuildIndexes) {
- // In this test, the TopologyCoordinator should not tell us to change sync sources away from
- // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
- // "host3", since "host3" does not build indexes
- OpTime election = OpTime();
- OpTime lastOpTimeApplied = OpTime(Timestamp(4,0), 0);
- // ahead by more than maxSyncSourceLagSecs (30)
- OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005,0), 0);
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 6 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "hself") <<
- BSON("_id" << 1 << "host" << "host2") <<
- BSON("_id" << 2 << "host" << "host3" <<
- "buildIndexes" << false << "priority" << 0))),
- 0);
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- fresherLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // set up complete, time for actual check
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberDoesNotBuildIndexesNorDoWe) {
- // In this test, the TopologyCoordinator should tell us to change sync sources away from
- // "host2" and to "host3" despite "host3" not building indexes because we do not build
- // indexes either and "host2" is more than maxSyncSourceLagSecs(30) behind "host3"
- OpTime election = OpTime();
- OpTime lastOpTimeApplied = OpTime(Timestamp(4,0), 0);
- // ahead by more than maxSyncSourceLagSecs (30)
- OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005,0), 0);
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 7 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "hself" <<
- "buildIndexes" << false << "priority" << 0) <<
- BSON("_id" << 1 << "host" << "host2") <<
- BSON("_id" << 2 << "host" << "host3" <<
- "buildIndexes" << false << "priority" << 0))),
- 0);
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- fresherLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // set up complete, time for actual check
- startCapturingLogMessages();
- ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("changing sync target"));
- }
-
- TEST_F(TopoCoordTest, CheckShouldStandForElectionWithPrimary) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
+private:
+ ReplicationExecutor::CallbackArgs ourCbData;
+};
+
+TEST_F(ShutdownInProgressTest, ShutdownInProgressWhenCallbackCanceledSyncFrom) {
+ Status result = Status::OK();
+ BSONObjBuilder response;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("host2:27017"), OpTime(), &response, &result);
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, result);
+ ASSERT_TRUE(response.obj().isEmpty());
+}
+
+TEST_F(ShutdownInProgressTest, ShutDownInProgressWhenCallbackCanceledStatus) {
+ Status result = Status::OK();
+ BSONObjBuilder response;
+ getTopoCoord().prepareStatusResponse(cbData(), Date_t(), 0, OpTime(), &response, &result);
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, result);
+ ASSERT_TRUE(response.obj().isEmpty());
+}
+
+class PrepareHeartbeatResponseTest : public TopoCoordTest {
+public:
+ virtual void setUp() {
+ TopoCoordTest::setUp();
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
0);
setSelfMemberState(MemberState::RS_SECONDARY);
-
- heartbeatFromMember(HostAndPort("h2"), "rs0",
- MemberState::RS_PRIMARY, OpTime(Timestamp(1,0), 0));
- ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++, OpTime()));
}
- TEST_F(TopoCoordTest, CheckShouldStandForElectionNotCloseEnoughToLastOptime) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- heartbeatFromMember(HostAndPort("h2"), "rs0",
- MemberState::RS_SECONDARY, OpTime(Timestamp(10000,0), 0));
- ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++,
- OpTime(Timestamp(100,0), 0)));
+ void prepareHeartbeatResponse(const ReplSetHeartbeatArgs& args,
+ OpTime lastOpApplied,
+ ReplSetHeartbeatResponse* response,
+ Status* result) {
+ *result =
+ getTopoCoord().prepareHeartbeatResponse(now()++, args, "rs0", lastOpApplied, response);
}
-
- TEST_F(TopoCoordTest, VoteForMyselfFailsWhileNotCandidate) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
+};
+
+class PrepareHeartbeatResponseV1Test : public TopoCoordTest {
+public:
+ virtual void setUp() {
+ TopoCoordTest::setUp();
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))
+ << "protocolVersion" << 1),
0);
setSelfMemberState(MemberState::RS_SECONDARY);
- ASSERT_FALSE(getTopoCoord().voteForMyself(now()++));
}
- TEST_F(TopoCoordTest, GetMemberStateArbiter) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself" << "arbiterOnly" << true) <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- ASSERT_EQUALS(MemberState::RS_ARBITER, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, UnelectableIfAbsentFromConfig) {
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- startCapturingLogMessages();
- ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++,
- OpTime(Timestamp(10,0), 0)));
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("not a member of a valid replica set config"));
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
- }
-
- TEST_F(TopoCoordTest, UnelectableIfVotedRecently) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
- heartbeatFromMember(HostAndPort("h2"), "rs0",
- MemberState::RS_SECONDARY, OpTime(Timestamp(100,0), 0));
-
- // vote for another node
- OID remoteRound = OID::gen();
- ReplicationCoordinator::ReplSetElectArgs electArgs;
- electArgs.set = "rs0";
- electArgs.round = remoteRound;
- electArgs.cfgver = 1;
- electArgs.whoid = 20;
-
- // need to be 30 secs beyond the start of time to pass last vote lease
- now() += Seconds(30);
- BSONObjBuilder electResponseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- getTopoCoord().prepareElectResponse(
- electArgs, now()++, OpTime(Timestamp(100,0), 0), &electResponseBuilder, &result);
- BSONObj response = electResponseBuilder.obj();
- ASSERT_OK(result);
- std::cout << response;
- ASSERT_EQUALS(1, response["vote"].Int());
- ASSERT_EQUALS(remoteRound, response["round"].OID());
-
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- startCapturingLogMessages();
- ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++,
- OpTime(Timestamp(10,0), 0)));
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("I recently voted for "));
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
- }
-
- TEST_F(TopoCoordTest, ProcessRequestVotesTwoRequestsForSameTerm) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- ReplSetRequestVotesArgs args;
- args.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "term" << 1LL
- << "candidateId" << 10LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response;
- OpTime lastAppliedOpTime;
-
- getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime);
- ASSERT_EQUALS("", response.getReason());
- ASSERT_TRUE(response.getVoteGranted());
-
- ReplSetRequestVotesArgs args2;
- args2.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "term" << 1LL
- << "candidateId" << 20LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response2;
-
- // different candidate same term, should be a problem
- getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime);
- ASSERT_EQUALS("already voted for another candidate this term", response2.getReason());
- ASSERT_FALSE(response2.getVoteGranted());
-
- }
-
- TEST_F(TopoCoordTest, ProcessRequestVotesDryRunsDoNotDisallowFutureRequestVotes) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- // dry run
- ReplSetRequestVotesArgs args;
- args.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "dryRun" << true
- << "term" << 1LL
- << "candidateId" << 10LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response;
- OpTime lastAppliedOpTime;
-
- getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime);
- ASSERT_EQUALS("", response.getReason());
- ASSERT_TRUE(response.getVoteGranted());
-
- // second dry run fine
- ReplSetRequestVotesArgs args2;
- args2.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "dryRun" << true
- << "term" << 1LL
- << "candidateId" << 10LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response2;
-
- getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime);
- ASSERT_EQUALS("", response2.getReason());
- ASSERT_TRUE(response2.getVoteGranted());
-
- // real request fine
- ReplSetRequestVotesArgs args3;
- args3.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "dryRun" << false
- << "term" << 1LL
- << "candidateId" << 10LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response3;
-
- getTopoCoord().processReplSetRequestVotes(args3, &response3, lastAppliedOpTime);
- ASSERT_EQUALS("", response3.getReason());
- ASSERT_TRUE(response3.getVoteGranted());
-
- // dry post real, fails
- ReplSetRequestVotesArgs args4;
- args4.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "dryRun" << false
- << "term" << 1LL
- << "candidateId" << 10LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response4;
-
- getTopoCoord().processReplSetRequestVotes(args4, &response4, lastAppliedOpTime);
- ASSERT_EQUALS("already voted for another candidate this term", response4.getReason());
- ASSERT_FALSE(response4.getVoteGranted());
-
- }
-
- TEST_F(TopoCoordTest, ProcessRequestVotesBadCommands) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- // mismatched setName
- ReplSetRequestVotesArgs args;
- args.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "wrongName"
- << "term" << 1LL
- << "candidateId" << 10LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response;
- OpTime lastAppliedOpTime;
-
- getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime);
- ASSERT_EQUALS("candidate's set name differs from mine", response.getReason());
- ASSERT_FALSE(response.getVoteGranted());
-
- // mismatched configVersion
- ReplSetRequestVotesArgs args2;
- args2.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "term" << 1LL
- << "candidateId" << 20LL
- << "configVersion" << 0LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response2;
-
- getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime);
- ASSERT_EQUALS("candidate's config version differs from mine", response2.getReason());
- ASSERT_FALSE(response2.getVoteGranted());
-
- // set term higher by receiving a replSetDeclareElectionWinnerCommand
- ReplSetDeclareElectionWinnerArgs winnerArgs;
- winnerArgs.initialize(BSON("replSetDeclareElectionWinner" << 1
- << "setName" << "rs0"
- << "term" << 2
- << "winnerId" << 30));
- long long responseTerm;
- ASSERT(getTopoCoord().updateTerm(winnerArgs.getTerm()));
- ASSERT_OK(getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs, &responseTerm));
- ASSERT_EQUALS(2, responseTerm);
-
- // stale term
- ReplSetRequestVotesArgs args3;
- args3.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "term" << 1LL
- << "candidateId" << 20LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response3;
-
- getTopoCoord().processReplSetRequestVotes(args3, &response3, lastAppliedOpTime);
- ASSERT_EQUALS("candidate's term is lower than mine", response3.getReason());
- ASSERT_EQUALS(2, response3.getTerm());
- ASSERT_FALSE(response3.getVoteGranted());
-
- // stale OpTime
- ReplSetRequestVotesArgs args4;
- args4.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "term" << 3LL
- << "candidateId" << 20LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response4;
- OpTime lastAppliedOpTime2 = {Timestamp(20, 0), 0};
-
- getTopoCoord().processReplSetRequestVotes(args4, &response4, lastAppliedOpTime2);
- ASSERT_EQUALS("candidate's data is staler than mine", response4.getReason());
- ASSERT_FALSE(response4.getVoteGranted());
- }
-
- TEST_F(TopoCoordTest, ProcessRequestVotesBadCommandsDryRun) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
- // set term to 1
- ASSERT(getTopoCoord().updateTerm(1));
- // and make sure we voted in term 1
- ReplSetRequestVotesArgs argsForRealVote;
- argsForRealVote.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "term" << 1LL
- << "candidateId" << 10LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse responseForRealVote;
- OpTime lastAppliedOpTime;
-
- getTopoCoord().processReplSetRequestVotes(argsForRealVote,
- &responseForRealVote,
- lastAppliedOpTime);
- ASSERT_EQUALS("", responseForRealVote.getReason());
- ASSERT_TRUE(responseForRealVote.getVoteGranted());
-
-
- // mismatched setName
- ReplSetRequestVotesArgs args;
- args.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "wrongName"
- << "dryRun" << true
- << "term" << 2LL
- << "candidateId" << 10LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response;
-
- getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime);
- ASSERT_EQUALS("candidate's set name differs from mine", response.getReason());
- ASSERT_EQUALS(1, response.getTerm());
- ASSERT_FALSE(response.getVoteGranted());
-
- // mismatched configVersion
- ReplSetRequestVotesArgs args2;
- args2.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "dryRun" << true
- << "term" << 2LL
- << "candidateId" << 20LL
- << "configVersion" << 0LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response2;
-
- getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime);
- ASSERT_EQUALS("candidate's config version differs from mine", response2.getReason());
- ASSERT_EQUALS(1, response2.getTerm());
- ASSERT_FALSE(response2.getVoteGranted());
-
- // stale term
- ReplSetRequestVotesArgs args3;
- args3.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "dryRun" << true
- << "term" << 0LL
- << "candidateId" << 20LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response3;
-
- getTopoCoord().processReplSetRequestVotes(args3, &response3, lastAppliedOpTime);
- ASSERT_EQUALS("candidate's term is lower than mine", response3.getReason());
- ASSERT_EQUALS(1, response3.getTerm());
- ASSERT_FALSE(response3.getVoteGranted());
-
- // repeat term
- ReplSetRequestVotesArgs args4;
- args4.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "dryRun" << true
- << "term" << 1LL
- << "candidateId" << 20LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response4;
-
- getTopoCoord().processReplSetRequestVotes(args4, &response4, lastAppliedOpTime);
- ASSERT_EQUALS("", response4.getReason());
- ASSERT_EQUALS(1, response4.getTerm());
- ASSERT_TRUE(response4.getVoteGranted());
-
- // stale OpTime
- ReplSetRequestVotesArgs args5;
- args5.initialize(BSON("replSetRequestVotes" << 1
- << "setName" << "rs0"
- << "dryRun" << true
- << "term" << 3LL
- << "candidateId" << 20LL
- << "configVersion" << 1LL
- << "lastCommittedOp" << BSON ("ts" << Timestamp(10, 0)
- << "term" << 0LL)));
- ReplSetRequestVotesResponse response5;
- OpTime lastAppliedOpTime2 = {Timestamp(20, 0), 0};
-
- getTopoCoord().processReplSetRequestVotes(args5, &response5, lastAppliedOpTime2);
- ASSERT_EQUALS("candidate's data is staler than mine", response5.getReason());
- ASSERT_EQUALS(1, response5.getTerm());
- ASSERT_FALSE(response5.getVoteGranted());
- }
-
- TEST_F(TopoCoordTest, ProcessDeclareElectionWinner) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- // successful
- ReplSetDeclareElectionWinnerArgs winnerArgs;
- winnerArgs.initialize(BSON("replSetDeclareElectionWinner" << 1
- << "setName" << "rs0"
- << "term" << 2
- << "winnerId" << 30));
- long long responseTerm = -1;
- ASSERT(getTopoCoord().updateTerm(winnerArgs.getTerm()));
- ASSERT_OK(getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs, &responseTerm));
- ASSERT_EQUALS(2, responseTerm);
-
- // repeat, should be problem free
- ReplSetDeclareElectionWinnerArgs winnerArgs2;
- winnerArgs2.initialize(BSON("replSetDeclareElectionWinner" << 1
- << "setName" << "rs0"
- << "term" << 2
- << "winnerId" << 30));
- long long responseTerm2 = -1;
- ASSERT_OK(getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs2, &responseTerm2));
- ASSERT_EQUALS(2, responseTerm2);
-
- // same term, different primary, should fail
- ReplSetDeclareElectionWinnerArgs winnerArgs3;
- winnerArgs3.initialize(BSON("replSetDeclareElectionWinner" << 1
- << "setName" << "rs0"
- << "term" << 2
- << "winnerId" << 20));
- long long responseTerm3 = -1;
- ASSERT_EQUALS("term already has a primary",
- getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs3,
- &responseTerm3).reason());
- ASSERT_EQUALS(2, responseTerm3);
-
- // stale term, should fail
- ReplSetDeclareElectionWinnerArgs winnerArgs4;
- winnerArgs4.initialize(BSON("replSetDeclareElectionWinner" << 1
- << "setName" << "rs0"
- << "term" << 0
- << "winnerId" << 20));
- long long responseTerm4 = -1;
- ASSERT_EQUALS("term has already passed",
- getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs4,
- &responseTerm4).reason());
- ASSERT_EQUALS(2, responseTerm4);
-
- // wrong setName
- ReplSetDeclareElectionWinnerArgs winnerArgs5;
- winnerArgs5.initialize(BSON("replSetDeclareElectionWinner" << 1
- << "setName" << "wrongName"
- << "term" << 3
- << "winnerId" << 20));
- long long responseTerm5 = -1;
- ASSERT_EQUALS("replSet name does not match",
- getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs5,
- &responseTerm5).reason());
- ASSERT_EQUALS(2, responseTerm5);
+ void prepareHeartbeatResponseV1(const ReplSetHeartbeatArgsV1& args,
+ OpTime lastOpApplied,
+ ReplSetHeartbeatResponse* response,
+ Status* result) {
+ *result = getTopoCoord().prepareHeartbeatResponseV1(
+ now()++, args, "rs0", lastOpApplied, response);
}
+};
+
+TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseBadSetName) {
+ // set up args with incorrect replset name
+ ReplSetHeartbeatArgsV1 args;
+ args.setSetName("rs1");
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ startCapturingLogMessages();
+ prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, result);
+ ASSERT(result.reason().find("repl set names do not match")) << "Actual string was \""
+ << result.reason() << '"';
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "replSet set names do not match, ours: rs0; remote "
+ "node's: rs1"));
+ // only protocolVersion should be set in this failure case
+ ASSERT_EQUALS("", response.getReplicaSetName());
+}
+
+TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseWhenOutOfSet) {
+ // reconfig self out of set
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 3 << "members" << BSON_ARRAY(BSON("_id" << 20 << "host"
+ << "h2")
+ << BSON("_id" << 30 << "host"
+ << "h3"))
+ << "protocolVersion" << 1),
+ -1);
+ ReplSetHeartbeatArgsV1 args;
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+ prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, result);
+ ASSERT(result.reason().find("replica set configuration is invalid or does not include us"))
+ << "Actual string was \"" << result.reason() << '"';
+ // only protocolVersion should be set in this failure case
+ ASSERT_EQUALS("", response.getReplicaSetName());
+}
+
+TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseFromSelf) {
+ // set up args with our id as the senderId
+ ReplSetHeartbeatArgsV1 args;
+ args.setSetName("rs0");
+ args.setSenderId(10);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+ prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
+ ASSERT_EQUALS(ErrorCodes::BadValue, result);
+ ASSERT(result.reason().find("from member with the same member ID as our self"))
+ << "Actual string was \"" << result.reason() << '"';
+ // only protocolVersion should be set in this failure case
+ ASSERT_EQUALS("", response.getReplicaSetName());
+}
+
+TEST_F(TopoCoordTest, PrepareHeartbeatResponseV1NoConfigYet) {
+ // set up args and acknowledge sender
+ ReplSetHeartbeatArgsV1 args;
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ // prepare response and check the results
+ Status result =
+ getTopoCoord().prepareHeartbeatResponseV1(now()++, args, "rs0", OpTime(), &response);
+ ASSERT_OK(result);
+ // this change to true because we can now see a majority, unlike in the previous cases
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, response.getState().s);
+ ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTerm());
+ ASSERT_EQUALS(-2, response.getConfigVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseSenderIDMissing) {
+ // set up args without a senderID
+ ReplSetHeartbeatArgsV1 args;
+ args.setSetName("rs0");
+ args.setConfigVersion(1);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTerm());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseSenderIDNotInConfig) {
+ // set up args with a senderID which is not present in our config
+ ReplSetHeartbeatArgsV1 args;
+ args.setSetName("rs0");
+ args.setConfigVersion(1);
+ args.setSenderId(2);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTerm());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseConfigVersionLow) {
+ // set up args with a config version lower than ours
+ ReplSetHeartbeatArgsV1 args;
+ args.setConfigVersion(0);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_TRUE(response.hasConfig());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTerm());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseConfigVersionHigh) {
+ // set up args with a config version higher than ours
+ ReplSetHeartbeatArgsV1 args;
+ args.setConfigVersion(10);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponseV1(args, OpTime(), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_FALSE(response.hasConfig());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTerm());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseAsPrimary) {
+ makeSelfPrimary(Timestamp(10, 0));
+
+ ReplSetHeartbeatArgsV1 args;
+ args.setConfigVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponseV1(args, OpTime(Timestamp(11, 0), 0), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_FALSE(response.hasConfig());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(Timestamp(11, 0), 0), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTerm());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseV1Test, PrepareHeartbeatResponseWithSyncSource) {
+ // get a sync source
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime());
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime());
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(1, 0), 0));
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(1, 0), 0));
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+
+ // set up args
+ ReplSetHeartbeatArgsV1 args;
+ args.setConfigVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponseV1(args, OpTime(Timestamp(100, 0), 0), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_FALSE(response.hasConfig());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTerm());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+ ASSERT_EQUALS(HostAndPort("h2"), response.getSyncingTo());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseBadProtocolVersion) {
+ // set up args with bad protocol version
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(3);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(), &response, &result);
+ ASSERT_EQUALS(ErrorCodes::BadValue, result);
+ ASSERT_EQUALS("replset: incompatible replset protocol version: 3", result.reason());
+ ASSERT_EQUALS("", response.getHbMsg());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseFromSelf) {
+ // set up args with incorrect replset name
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(10);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+ prepareHeartbeatResponse(args, OpTime(), &response, &result);
+ ASSERT_EQUALS(ErrorCodes::BadValue, result);
+ ASSERT(result.reason().find("from member with the same member ID as our self"))
+ << "Actual string was \"" << result.reason() << '"';
+ ASSERT_EQUALS("", response.getHbMsg());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseBadSetName) {
+ // set up args with incorrect replset name
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setSetName("rs1");
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ startCapturingLogMessages();
+ prepareHeartbeatResponse(args, OpTime(), &response, &result);
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, result);
+ ASSERT(result.reason().find("repl set names do not match")) << "Actual string was \""
+ << result.reason() << '"';
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "replSet set names do not match, ours: rs0; remote "
+ "node's: rs1"));
+ ASSERT_TRUE(response.isMismatched());
+ ASSERT_EQUALS("", response.getHbMsg());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderIDMissing) {
+ // set up args without a senderID
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setSetName("rs0");
+ args.setConfigVersion(1);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_FALSE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTime().count());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderIDNotInConfig) {
+ // set up args with a senderID which is not present in our config
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setSetName("rs0");
+ args.setConfigVersion(1);
+ args.setSenderId(2);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_FALSE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTime().count());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseConfigVersionLow) {
+ // set up args with a config version lower than ours
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(0);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_TRUE(response.hasConfig());
+ ASSERT_FALSE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTime().count());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseConfigVersionHigh) {
+ // set up args with a config version higher than ours
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(10);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_FALSE(response.hasConfig());
+ ASSERT_FALSE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTime().count());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderDown) {
+ // set up args with sender down from our perspective
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_FALSE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTime().count());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+ ASSERT_TRUE(response.isStateDisagreement());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderUp) {
+ // set up args and acknowledge sender
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime());
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(Timestamp(100, 0), 0), &response, &result);
+ ASSERT_OK(result);
+ // this change to true because we can now see a majority, unlike in the previous cases
+ ASSERT_TRUE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTime().count());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+}
+
+TEST_F(TopoCoordTest, PrepareHeartbeatResponseNoConfigYet) {
+ // set up args and acknowledge sender
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ // prepare response and check the results
+ Status result =
+ getTopoCoord().prepareHeartbeatResponse(now()++, args, "rs0", OpTime(), &response);
+ ASSERT_OK(result);
+ // this change to true because we can now see a majority, unlike in the previous cases
+ ASSERT_FALSE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, response.getState().s);
+ ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTime().count());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(-2, response.getConfigVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseAsPrimary) {
+ makeSelfPrimary(Timestamp(10, 0));
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime());
+
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(Timestamp(11, 0), 0), &response, &result);
+ ASSERT_OK(result);
+ // electable because we are already primary
+ ASSERT_TRUE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(Timestamp(11, 0), 0), response.getOpTime());
+ ASSERT_EQUALS(Timestamp(10, 0), response.getElectionTime());
+ ASSERT_EQUALS(0, response.getTime().count());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseWithSyncSource) {
+ // get a sync source
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime());
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime());
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(1, 0), 0));
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(1, 0), 0));
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime());
+
+ // set up args
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(Timestamp(100, 0), 0), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_TRUE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getOpTime());
+ ASSERT_EQUALS(0, response.getTime().count());
+ // changed to a syncing message because our sync source changed recently
+ ASSERT_EQUALS("syncing from: h2:27017", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getConfigVersion());
+ ASSERT_EQUALS(HostAndPort("h2"), response.getSyncingTo());
+}
+
+TEST_F(TopoCoordTest, SetFollowerSecondaryWhenLoneNode) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "hself"))),
+ 0);
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+
+ // if we are the only node, we should become a candidate when we transition to SECONDARY
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, CandidateWhenLoneSecondaryNodeReconfig) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ ReplicaSetConfig cfg;
+ cfg.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "hself"
+ << "priority" << 0))));
+ getTopoCoord().updateConfig(cfg, 0, now()++, OpTime());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+
+ // we should become a candidate when we reconfig to become electable
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "hself"))),
+ 0);
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+}
+
+TEST_F(TopoCoordTest, SetFollowerSecondaryWhenLoneUnelectableNode) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ ReplicaSetConfig cfg;
+ cfg.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "hself"
+ << "priority" << 0))));
+
+ getTopoCoord().updateConfig(cfg, 0, now()++, OpTime());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+
+ // despite being the only node, we are unelectable, so we should not become a candidate
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, ReconfigToBeAddedToTheSet) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ // config to be absent from the set
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "host2:27017")
+ << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ -1);
+ // should become removed since we are not in the set
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_REMOVED, getTopoCoord().getMemberState().s);
+
+ // reconfig to add to set
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+ // having been added to the config, we should no longer be REMOVED and should enter STARTUP2
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, ReconfigToBeRemovedFromTheSet) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+
+ // reconfig to remove self
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "host2:27017")
+ << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ -1);
+ // should become removed since we are no longer in the set
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_REMOVED, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, ReconfigToBeRemovedFromTheSetAsPrimary) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"))),
+ 0);
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // win election and primary
+ getTopoCoord().processWinElection(OID::gen(), Timestamp());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
+
+ // reconfig to remove self
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "host2:27017")
+ << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ -1);
+ // should become removed since we are no longer in the set even though we were primary
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_REMOVED, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, ReconfigCanNoLongerBePrimary) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"))),
+ 0);
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // win election and primary
+ getTopoCoord().processWinElection(OID::gen(), Timestamp());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
+
+ // now lose primary due to loss of electability
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"
+ << "priority" << 0)
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, ReconfigContinueToBePrimary) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"))),
+ 0);
+
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // win election and primary
+ getTopoCoord().processWinElection(OID::gen(), Timestamp());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
+
+ // Now reconfig in ways that leave us electable and ensure we are still the primary.
+ // Add hosts
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0,
+ Date_t::fromMillisSinceEpoch(-1),
+ OpTime(Timestamp(10, 0), 0));
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
+
+ // Change priorities and tags
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"
+ << "priority" << 10)
+ << BSON("_id" << 1 << "host"
+ << "host2:27017"
+ << "priority" << 5 << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rack1")))),
+ 0,
+ Date_t::fromMillisSinceEpoch(-1),
+ OpTime(Timestamp(10, 0), 0));
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, ReconfigKeepSecondary) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host2:27017"))),
+ 0);
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+
+ // reconfig and stay secondary
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(HeartbeatResponseTest, ReconfigBetweenHeartbeatRequestAndRepsonse) {
+ OpTime election = OpTime(Timestamp(14, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(13, 0), 0);
+
+ // all three members up and secondaries
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // now request from host3 and receive after host2 has been removed via reconfig
+ getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host3"));
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+
+ ReplSetHeartbeatResponse hb;
+ hb.initialize(BSON("ok" << 1 << "v" << 1 << "state" << MemberState::RS_PRIMARY), 0);
+ hb.setOpTime(lastOpTimeApplied);
+ hb.setElectionTime(election.getTimestamp());
+ StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ now()++, Milliseconds(0), HostAndPort("host3"), hbResponse, lastOpTimeApplied);
+
+ // now primary should be host3, index 1, and we should perform NoAction in response
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(action.getAction());
+}
+
+TEST_F(HeartbeatResponseTest, ReconfigNodeRemovedBetweenHeartbeatRequestAndRepsonse) {
+ OpTime election = OpTime(Timestamp(14, 0), 0);
+ OpTime lastOpTimeApplied = OpTime(Timestamp(13, 0), 0);
+
+ // all three members up and secondaries
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // now request from host3 and receive after host2 has been removed via reconfig
+ getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host3"));
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017"))),
+ 0);
+
+ ReplSetHeartbeatResponse hb;
+ hb.initialize(BSON("ok" << 1 << "v" << 1 << "state" << MemberState::RS_PRIMARY), 0);
+ hb.setOpTime(lastOpTimeApplied);
+ hb.setElectionTime(election.getTimestamp());
+ StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ now()++, Milliseconds(0), HostAndPort("host3"), hbResponse, lastOpTimeApplied);
+
+ // primary should not be set and we should perform NoAction in response
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(action.getAction());
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceMemberNotInConfig) {
+ // In this test, the TopologyCoordinator should tell us to change sync sources away from
+ // "host4" since "host4" is absent from the config
+ ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host4"), now()));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceMemberHasYetToHeartbeat) {
+ // In this test, the TopologyCoordinator should not tell us to change sync sources away from
+ // "host2" since we do not yet have a heartbeat (and as a result do not yet have an optime)
+ // for "host2"
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherHappierMemberExists) {
+ // In this test, the TopologyCoordinator should tell us to change sync sources away from
+ // "host2" and to "host3" since "host2" is more than maxSyncSourceLagSecs(30) behind "host3"
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(4, 0), 0);
+ // ahead by more than maxSyncSourceLagSecs (30)
+ OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ fresherLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // set up complete, time for actual check
+ startCapturingLogMessages();
+ ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("changing sync target"));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberIsBlackListed) {
+ // In this test, the TopologyCoordinator should not tell us to change sync sources away from
+ // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
+ // "host3", since "host3" is blacklisted
+ // Then, confirm that unblacklisting only works if time has passed the blacklist time.
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(400, 0), 0);
+ // ahead by more than maxSyncSourceLagSecs (30)
+ OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ fresherLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ getTopoCoord().blacklistSyncSource(HostAndPort("host3"), now() + Milliseconds(100));
+
+ // set up complete, time for actual check
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+
+ // unblacklist with too early a time (node should remained blacklisted)
+ getTopoCoord().unblacklistSyncSource(HostAndPort("host3"), now() + Milliseconds(90));
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+
+ // unblacklist and it should succeed
+ getTopoCoord().unblacklistSyncSource(HostAndPort("host3"), now() + Milliseconds(100));
+ startCapturingLogMessages();
+ ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("changing sync target"));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberIsDown) {
+ // In this test, the TopologyCoordinator should not tell us to change sync sources away from
+ // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
+ // "host3", since "host3" is down
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(400, 0), 0);
+ // ahead by more than maxSyncSourceLagSecs (30)
+ OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ fresherLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // set up complete, time for actual check
+ nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberIsNotReadable) {
+ // In this test, the TopologyCoordinator should not tell us to change sync sources away from
+ // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
+ // "host3", since "host3" is in a non-readable mode (RS_ROLLBACK)
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(4, 0), 0);
+ // ahead by more than m, 0)axSyncSourceLagSecs (30)
+ OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_ROLLBACK,
+ election,
+ fresherLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // set up complete, time for actual check
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberDoesNotBuildIndexes) {
+ // In this test, the TopologyCoordinator should not tell us to change sync sources away from
+ // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
+ // "host3", since "host3" does not build indexes
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(4, 0), 0);
+ // ahead by more than maxSyncSourceLagSecs (30)
+ OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0);
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 6 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "hself")
+ << BSON("_id" << 1 << "host"
+ << "host2")
+ << BSON("_id" << 2 << "host"
+ << "host3"
+ << "buildIndexes" << false << "priority" << 0))),
+ 0);
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ fresherLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // set up complete, time for actual check
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberDoesNotBuildIndexesNorDoWe) {
+ // In this test, the TopologyCoordinator should tell us to change sync sources away from
+ // "host2" and to "host3" despite "host3" not building indexes because we do not build
+ // indexes either and "host2" is more than maxSyncSourceLagSecs(30) behind "host3"
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(4, 0), 0);
+ // ahead by more than maxSyncSourceLagSecs (30)
+ OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0);
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 7 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "hself"
+ << "buildIndexes" << false << "priority" << 0)
+ << BSON("_id" << 1 << "host"
+ << "host2")
+ << BSON("_id" << 2 << "host"
+ << "host3"
+ << "buildIndexes" << false << "priority" << 0))),
+ 0);
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ fresherLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // set up complete, time for actual check
+ startCapturingLogMessages();
+ ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("changing sync target"));
+}
+
+TEST_F(TopoCoordTest, CheckShouldStandForElectionWithPrimary) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_PRIMARY, OpTime(Timestamp(1, 0), 0));
+ ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++, OpTime()));
+}
+
+TEST_F(TopoCoordTest, CheckShouldStandForElectionNotCloseEnoughToLastOptime) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(10000, 0), 0));
+ ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++, OpTime(Timestamp(100, 0), 0)));
+}
+
+TEST_F(TopoCoordTest, VoteForMyselfFailsWhileNotCandidate) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ ASSERT_FALSE(getTopoCoord().voteForMyself(now()++));
+}
+
+TEST_F(TopoCoordTest, GetMemberStateArbiter) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ ASSERT_EQUALS(MemberState::RS_ARBITER, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, UnelectableIfAbsentFromConfig) {
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ startCapturingLogMessages();
+ ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++, OpTime(Timestamp(10, 0), 0)));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("not a member of a valid replica set config"));
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
+}
+
+TEST_F(TopoCoordTest, UnelectableIfVotedRecently) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(100, 0), 0));
+
+ // vote for another node
+ OID remoteRound = OID::gen();
+ ReplicationCoordinator::ReplSetElectArgs electArgs;
+ electArgs.set = "rs0";
+ electArgs.round = remoteRound;
+ electArgs.cfgver = 1;
+ electArgs.whoid = 20;
+
+ // need to be 30 secs beyond the start of time to pass last vote lease
+ now() += Seconds(30);
+ BSONObjBuilder electResponseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ getTopoCoord().prepareElectResponse(
+ electArgs, now()++, OpTime(Timestamp(100, 0), 0), &electResponseBuilder, &result);
+ BSONObj response = electResponseBuilder.obj();
+ ASSERT_OK(result);
+ std::cout << response;
+ ASSERT_EQUALS(1, response["vote"].Int());
+ ASSERT_EQUALS(remoteRound, response["round"].OID());
+
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ startCapturingLogMessages();
+ ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++, OpTime(Timestamp(10, 0), 0)));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("I recently voted for "));
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
+}
+
+TEST_F(TopoCoordTest, ProcessRequestVotesTwoRequestsForSameTerm) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ ReplSetRequestVotesArgs args;
+ args.initialize(BSON("replSetRequestVotes" << 1 << "setName"
+ << "rs0"
+ << "term" << 1LL << "candidateId" << 10LL
+ << "configVersion" << 1LL << "lastCommittedOp"
+ << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response;
+ OpTime lastAppliedOpTime;
+
+ getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime);
+ ASSERT_EQUALS("", response.getReason());
+ ASSERT_TRUE(response.getVoteGranted());
+
+ ReplSetRequestVotesArgs args2;
+ args2.initialize(BSON("replSetRequestVotes"
+ << 1 << "setName"
+ << "rs0"
+ << "term" << 1LL << "candidateId" << 20LL << "configVersion" << 1LL
+ << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response2;
+
+ // different candidate same term, should be a problem
+ getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime);
+ ASSERT_EQUALS("already voted for another candidate this term", response2.getReason());
+ ASSERT_FALSE(response2.getVoteGranted());
+}
+
+TEST_F(TopoCoordTest, ProcessRequestVotesDryRunsDoNotDisallowFutureRequestVotes) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ // dry run
+ ReplSetRequestVotesArgs args;
+ args.initialize(BSON("replSetRequestVotes" << 1 << "setName"
+ << "rs0"
+ << "dryRun" << true << "term" << 1LL << "candidateId"
+ << 10LL << "configVersion" << 1LL
+ << "lastCommittedOp"
+ << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response;
+ OpTime lastAppliedOpTime;
+
+ getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime);
+ ASSERT_EQUALS("", response.getReason());
+ ASSERT_TRUE(response.getVoteGranted());
+
+ // second dry run fine
+ ReplSetRequestVotesArgs args2;
+ args2.initialize(BSON("replSetRequestVotes"
+ << 1 << "setName"
+ << "rs0"
+ << "dryRun" << true << "term" << 1LL << "candidateId" << 10LL
+ << "configVersion" << 1LL << "lastCommittedOp"
+ << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response2;
+
+ getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime);
+ ASSERT_EQUALS("", response2.getReason());
+ ASSERT_TRUE(response2.getVoteGranted());
+
+ // real request fine
+ ReplSetRequestVotesArgs args3;
+ args3.initialize(BSON("replSetRequestVotes"
+ << 1 << "setName"
+ << "rs0"
+ << "dryRun" << false << "term" << 1LL << "candidateId" << 10LL
+ << "configVersion" << 1LL << "lastCommittedOp"
+ << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response3;
+
+ getTopoCoord().processReplSetRequestVotes(args3, &response3, lastAppliedOpTime);
+ ASSERT_EQUALS("", response3.getReason());
+ ASSERT_TRUE(response3.getVoteGranted());
+
+ // dry post real, fails
+ ReplSetRequestVotesArgs args4;
+ args4.initialize(BSON("replSetRequestVotes"
+ << 1 << "setName"
+ << "rs0"
+ << "dryRun" << false << "term" << 1LL << "candidateId" << 10LL
+ << "configVersion" << 1LL << "lastCommittedOp"
+ << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response4;
+
+ getTopoCoord().processReplSetRequestVotes(args4, &response4, lastAppliedOpTime);
+ ASSERT_EQUALS("already voted for another candidate this term", response4.getReason());
+ ASSERT_FALSE(response4.getVoteGranted());
+}
+
+TEST_F(TopoCoordTest, ProcessRequestVotesBadCommands) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ // mismatched setName
+ ReplSetRequestVotesArgs args;
+ args.initialize(BSON("replSetRequestVotes" << 1 << "setName"
+ << "wrongName"
+ << "term" << 1LL << "candidateId" << 10LL
+ << "configVersion" << 1LL << "lastCommittedOp"
+ << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response;
+ OpTime lastAppliedOpTime;
+
+ getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime);
+ ASSERT_EQUALS("candidate's set name differs from mine", response.getReason());
+ ASSERT_FALSE(response.getVoteGranted());
+
+ // mismatched configVersion
+ ReplSetRequestVotesArgs args2;
+ args2.initialize(BSON("replSetRequestVotes"
+ << 1 << "setName"
+ << "rs0"
+ << "term" << 1LL << "candidateId" << 20LL << "configVersion" << 0LL
+ << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response2;
+
+ getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime);
+ ASSERT_EQUALS("candidate's config version differs from mine", response2.getReason());
+ ASSERT_FALSE(response2.getVoteGranted());
+
+ // set term higher by receiving a replSetDeclareElectionWinnerCommand
+ ReplSetDeclareElectionWinnerArgs winnerArgs;
+ winnerArgs.initialize(BSON("replSetDeclareElectionWinner" << 1 << "setName"
+ << "rs0"
+ << "term" << 2 << "winnerId" << 30));
+ long long responseTerm;
+ ASSERT(getTopoCoord().updateTerm(winnerArgs.getTerm()));
+ ASSERT_OK(getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs, &responseTerm));
+ ASSERT_EQUALS(2, responseTerm);
+
+ // stale term
+ ReplSetRequestVotesArgs args3;
+ args3.initialize(BSON("replSetRequestVotes"
+ << 1 << "setName"
+ << "rs0"
+ << "term" << 1LL << "candidateId" << 20LL << "configVersion" << 1LL
+ << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response3;
+
+ getTopoCoord().processReplSetRequestVotes(args3, &response3, lastAppliedOpTime);
+ ASSERT_EQUALS("candidate's term is lower than mine", response3.getReason());
+ ASSERT_EQUALS(2, response3.getTerm());
+ ASSERT_FALSE(response3.getVoteGranted());
+
+ // stale OpTime
+ ReplSetRequestVotesArgs args4;
+ args4.initialize(BSON("replSetRequestVotes"
+ << 1 << "setName"
+ << "rs0"
+ << "term" << 3LL << "candidateId" << 20LL << "configVersion" << 1LL
+ << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response4;
+ OpTime lastAppliedOpTime2 = {Timestamp(20, 0), 0};
+
+ getTopoCoord().processReplSetRequestVotes(args4, &response4, lastAppliedOpTime2);
+ ASSERT_EQUALS("candidate's data is staler than mine", response4.getReason());
+ ASSERT_FALSE(response4.getVoteGranted());
+}
+
+TEST_F(TopoCoordTest, ProcessRequestVotesBadCommandsDryRun) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ // set term to 1
+ ASSERT(getTopoCoord().updateTerm(1));
+ // and make sure we voted in term 1
+ ReplSetRequestVotesArgs argsForRealVote;
+ argsForRealVote.initialize(BSON("replSetRequestVotes"
+ << 1 << "setName"
+ << "rs0"
+ << "term" << 1LL << "candidateId" << 10LL << "configVersion"
+ << 1LL << "lastCommittedOp"
+ << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse responseForRealVote;
+ OpTime lastAppliedOpTime;
+
+ getTopoCoord().processReplSetRequestVotes(
+ argsForRealVote, &responseForRealVote, lastAppliedOpTime);
+ ASSERT_EQUALS("", responseForRealVote.getReason());
+ ASSERT_TRUE(responseForRealVote.getVoteGranted());
+
+
+ // mismatched setName
+ ReplSetRequestVotesArgs args;
+ args.initialize(BSON("replSetRequestVotes" << 1 << "setName"
+ << "wrongName"
+ << "dryRun" << true << "term" << 2LL << "candidateId"
+ << 10LL << "configVersion" << 1LL
+ << "lastCommittedOp"
+ << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response;
+
+ getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime);
+ ASSERT_EQUALS("candidate's set name differs from mine", response.getReason());
+ ASSERT_EQUALS(1, response.getTerm());
+ ASSERT_FALSE(response.getVoteGranted());
+
+ // mismatched configVersion
+ ReplSetRequestVotesArgs args2;
+ args2.initialize(BSON("replSetRequestVotes"
+ << 1 << "setName"
+ << "rs0"
+ << "dryRun" << true << "term" << 2LL << "candidateId" << 20LL
+ << "configVersion" << 0LL << "lastCommittedOp"
+ << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response2;
+
+ getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime);
+ ASSERT_EQUALS("candidate's config version differs from mine", response2.getReason());
+ ASSERT_EQUALS(1, response2.getTerm());
+ ASSERT_FALSE(response2.getVoteGranted());
+
+ // stale term
+ ReplSetRequestVotesArgs args3;
+ args3.initialize(BSON("replSetRequestVotes"
+ << 1 << "setName"
+ << "rs0"
+ << "dryRun" << true << "term" << 0LL << "candidateId" << 20LL
+ << "configVersion" << 1LL << "lastCommittedOp"
+ << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response3;
+
+ getTopoCoord().processReplSetRequestVotes(args3, &response3, lastAppliedOpTime);
+ ASSERT_EQUALS("candidate's term is lower than mine", response3.getReason());
+ ASSERT_EQUALS(1, response3.getTerm());
+ ASSERT_FALSE(response3.getVoteGranted());
+
+ // repeat term
+ ReplSetRequestVotesArgs args4;
+ args4.initialize(BSON("replSetRequestVotes"
+ << 1 << "setName"
+ << "rs0"
+ << "dryRun" << true << "term" << 1LL << "candidateId" << 20LL
+ << "configVersion" << 1LL << "lastCommittedOp"
+ << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response4;
+
+ getTopoCoord().processReplSetRequestVotes(args4, &response4, lastAppliedOpTime);
+ ASSERT_EQUALS("", response4.getReason());
+ ASSERT_EQUALS(1, response4.getTerm());
+ ASSERT_TRUE(response4.getVoteGranted());
+
+ // stale OpTime
+ ReplSetRequestVotesArgs args5;
+ args5.initialize(BSON("replSetRequestVotes"
+ << 1 << "setName"
+ << "rs0"
+ << "dryRun" << true << "term" << 3LL << "candidateId" << 20LL
+ << "configVersion" << 1LL << "lastCommittedOp"
+ << BSON("ts" << Timestamp(10, 0) << "term" << 0LL)));
+ ReplSetRequestVotesResponse response5;
+ OpTime lastAppliedOpTime2 = {Timestamp(20, 0), 0};
+
+ getTopoCoord().processReplSetRequestVotes(args5, &response5, lastAppliedOpTime2);
+ ASSERT_EQUALS("candidate's data is staler than mine", response5.getReason());
+ ASSERT_EQUALS(1, response5.getTerm());
+ ASSERT_FALSE(response5.getVoteGranted());
+}
+
+TEST_F(TopoCoordTest, ProcessDeclareElectionWinner) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ // successful
+ ReplSetDeclareElectionWinnerArgs winnerArgs;
+ winnerArgs.initialize(BSON("replSetDeclareElectionWinner" << 1 << "setName"
+ << "rs0"
+ << "term" << 2 << "winnerId" << 30));
+ long long responseTerm = -1;
+ ASSERT(getTopoCoord().updateTerm(winnerArgs.getTerm()));
+ ASSERT_OK(getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs, &responseTerm));
+ ASSERT_EQUALS(2, responseTerm);
+
+ // repeat, should be problem free
+ ReplSetDeclareElectionWinnerArgs winnerArgs2;
+ winnerArgs2.initialize(BSON("replSetDeclareElectionWinner" << 1 << "setName"
+ << "rs0"
+ << "term" << 2 << "winnerId" << 30));
+ long long responseTerm2 = -1;
+ ASSERT_OK(getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs2, &responseTerm2));
+ ASSERT_EQUALS(2, responseTerm2);
+
+ // same term, different primary, should fail
+ ReplSetDeclareElectionWinnerArgs winnerArgs3;
+ winnerArgs3.initialize(BSON("replSetDeclareElectionWinner" << 1 << "setName"
+ << "rs0"
+ << "term" << 2 << "winnerId" << 20));
+ long long responseTerm3 = -1;
+ ASSERT_EQUALS(
+ "term already has a primary",
+ getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs3, &responseTerm3).reason());
+ ASSERT_EQUALS(2, responseTerm3);
+
+ // stale term, should fail
+ ReplSetDeclareElectionWinnerArgs winnerArgs4;
+ winnerArgs4.initialize(BSON("replSetDeclareElectionWinner" << 1 << "setName"
+ << "rs0"
+ << "term" << 0 << "winnerId" << 20));
+ long long responseTerm4 = -1;
+ ASSERT_EQUALS(
+ "term has already passed",
+ getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs4, &responseTerm4).reason());
+ ASSERT_EQUALS(2, responseTerm4);
+
+ // wrong setName
+ ReplSetDeclareElectionWinnerArgs winnerArgs5;
+ winnerArgs5.initialize(BSON("replSetDeclareElectionWinner" << 1 << "setName"
+ << "wrongName"
+ << "term" << 3 << "winnerId" << 20));
+ long long responseTerm5 = -1;
+ ASSERT_EQUALS(
+ "replSet name does not match",
+ getTopoCoord().processReplSetDeclareElectionWinner(winnerArgs5, &responseTerm5).reason());
+ ASSERT_EQUALS(2, responseTerm5);
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/update_position_args.cpp b/src/mongo/db/repl/update_position_args.cpp
index bce62dff3dc..4265efc0586 100644
--- a/src/mongo/db/repl/update_position_args.cpp
+++ b/src/mongo/db/repl/update_position_args.cpp
@@ -39,109 +39,106 @@ namespace mongo {
namespace repl {
- UpdatePositionArgs::UpdateInfo::UpdateInfo(
- const OID& anRid, const OpTime& aTs, long long aCfgver, long long aMemberId)
- : rid(anRid), ts(aTs), cfgver(aCfgver), memberId(aMemberId) {}
+UpdatePositionArgs::UpdateInfo::UpdateInfo(const OID& anRid,
+ const OpTime& aTs,
+ long long aCfgver,
+ long long aMemberId)
+ : rid(anRid), ts(aTs), cfgver(aCfgver), memberId(aMemberId) {}
namespace {
- const std::string kCommandFieldName = "replSetUpdatePosition";
- const std::string kUpdateArrayFieldName = "optimes";
-
- const std::string kLegalUpdatePositionFieldNames[] = {
- kCommandFieldName,
- kUpdateArrayFieldName,
- };
-
- const std::string kMemberRIDFieldName = "_id";
- const std::string kMemberConfigFieldName = "config";
- const std::string kOpTimeFieldName = "optime";
- const std::string kMemberIdFieldName = "memberId";
- const std::string kConfigVersionFieldName = "cfgver";
-
- const std::string kLegalUpdateInfoFieldNames[] = {
- kMemberConfigFieldName,
- kMemberRIDFieldName,
- kOpTimeFieldName,
- kMemberIdFieldName,
- kConfigVersionFieldName,
- };
-
-} // namespace
+const std::string kCommandFieldName = "replSetUpdatePosition";
+const std::string kUpdateArrayFieldName = "optimes";
+
+const std::string kLegalUpdatePositionFieldNames[] = {
+ kCommandFieldName, kUpdateArrayFieldName,
+};
+
+const std::string kMemberRIDFieldName = "_id";
+const std::string kMemberConfigFieldName = "config";
+const std::string kOpTimeFieldName = "optime";
+const std::string kMemberIdFieldName = "memberId";
+const std::string kConfigVersionFieldName = "cfgver";
+
+const std::string kLegalUpdateInfoFieldNames[] = {
+ kMemberConfigFieldName,
+ kMemberRIDFieldName,
+ kOpTimeFieldName,
+ kMemberIdFieldName,
+ kConfigVersionFieldName,
+};
+
+} // namespace
+
+Status UpdatePositionArgs::initialize(const BSONObj& argsObj) {
+ Status status =
+ bsonCheckOnlyHasFields("UpdatePositionArgs", argsObj, kLegalUpdatePositionFieldNames);
+
+ if (!status.isOK())
+ return status;
+
+ // grab the array of changes
+ BSONElement updateArray;
+ status = bsonExtractTypedField(argsObj, kUpdateArrayFieldName, Array, &updateArray);
+ if (!status.isOK())
+ return status;
+
+ // now parse each array entry into an update
+ BSONObjIterator i(updateArray.Obj());
+ while (i.more()) {
+ BSONObj entry = i.next().Obj();
+ status = bsonCheckOnlyHasFields("UpdateInfoArgs", entry, kLegalUpdateInfoFieldNames);
+ if (!status.isOK())
+ return status;
- Status UpdatePositionArgs::initialize(const BSONObj& argsObj) {
- Status status = bsonCheckOnlyHasFields("UpdatePositionArgs",
- argsObj,
- kLegalUpdatePositionFieldNames);
+ Timestamp ts;
+ status = bsonExtractTimestampField(entry, kOpTimeFieldName, &ts);
+ if (!status.isOK())
+ return status;
+ // TODO(spencer): The following three fields are optional in 3.0, but should be made
+ // required or ignored in 3.0
+ long long cfgver;
+ status = bsonExtractIntegerFieldWithDefault(entry, kConfigVersionFieldName, -1, &cfgver);
if (!status.isOK())
return status;
- // grab the array of changes
- BSONElement updateArray;
- status = bsonExtractTypedField(argsObj, kUpdateArrayFieldName, Array, &updateArray);
+ OID rid;
+ status = bsonExtractOIDFieldWithDefault(entry, kMemberRIDFieldName, OID(), &rid);
if (!status.isOK())
return status;
- // now parse each array entry into an update
- BSONObjIterator i(updateArray.Obj());
- while(i.more()) {
- BSONObj entry = i.next().Obj();
- status = bsonCheckOnlyHasFields("UpdateInfoArgs",
- entry,
- kLegalUpdateInfoFieldNames);
- if (!status.isOK())
- return status;
-
- Timestamp ts;
- status = bsonExtractTimestampField(entry, kOpTimeFieldName, &ts);
- if (!status.isOK())
- return status;
-
- // TODO(spencer): The following three fields are optional in 3.0, but should be made
- // required or ignored in 3.0
- long long cfgver;
- status = bsonExtractIntegerFieldWithDefault(entry, kConfigVersionFieldName, -1, &cfgver);
- if (!status.isOK())
- return status;
-
- OID rid;
- status = bsonExtractOIDFieldWithDefault(entry, kMemberRIDFieldName, OID(), &rid);
- if (!status.isOK())
- return status;
-
- long long memberID;
- status = bsonExtractIntegerFieldWithDefault(entry, kMemberIdFieldName, -1, &memberID);
- if (!status.isOK())
- return status;
-
- // TODO(siyuan) parse and fill term whem adding it to update position command.
- _updates.push_back(UpdateInfo(rid, OpTime(ts, 0), cfgver, memberID));
- }
+ long long memberID;
+ status = bsonExtractIntegerFieldWithDefault(entry, kMemberIdFieldName, -1, &memberID);
+ if (!status.isOK())
+ return status;
- return Status::OK();
+ // TODO(siyuan) parse and fill term whem adding it to update position command.
+ _updates.push_back(UpdateInfo(rid, OpTime(ts, 0), cfgver, memberID));
}
- BSONObj UpdatePositionArgs::toBSON() const {
- BSONObjBuilder builder;
- // add command name
- builder.append(kCommandFieldName, 1);
-
- // build array of updates
- if (!_updates.empty()) {
- BSONArrayBuilder updateArray(builder.subarrayStart(kUpdateArrayFieldName));
- for (UpdatePositionArgs::UpdateIterator update = updatesBegin();
- update != updatesEnd();
- ++update) {
- updateArray.append(BSON(kMemberRIDFieldName << update->rid <<
- kOpTimeFieldName << update->ts.getTimestamp() <<
- kConfigVersionFieldName << update->cfgver <<
- kMemberIdFieldName << update->memberId));
- }
- updateArray.doneFast();
+ return Status::OK();
+}
+
+BSONObj UpdatePositionArgs::toBSON() const {
+ BSONObjBuilder builder;
+ // add command name
+ builder.append(kCommandFieldName, 1);
+
+ // build array of updates
+ if (!_updates.empty()) {
+ BSONArrayBuilder updateArray(builder.subarrayStart(kUpdateArrayFieldName));
+ for (UpdatePositionArgs::UpdateIterator update = updatesBegin(); update != updatesEnd();
+ ++update) {
+ updateArray.append(BSON(kMemberRIDFieldName << update->rid << kOpTimeFieldName
+ << update->ts.getTimestamp()
+ << kConfigVersionFieldName << update->cfgver
+ << kMemberIdFieldName << update->memberId));
}
- return builder.obj();
+ updateArray.doneFast();
}
+ return builder.obj();
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/update_position_args.h b/src/mongo/db/repl/update_position_args.h
index a3450163d56..ecaf9ec5d4e 100644
--- a/src/mongo/db/repl/update_position_args.h
+++ b/src/mongo/db/repl/update_position_args.h
@@ -35,49 +35,54 @@
namespace mongo {
- class Status;
+class Status;
namespace repl {
- /**
- * Arguments to the handshake command.
- */
- class UpdatePositionArgs {
- public:
- struct UpdateInfo {
- UpdateInfo(const OID& anRid, const OpTime& aTs, long long aCfgver, long long aMemberId);
+/**
+ * Arguments to the handshake command.
+ */
+class UpdatePositionArgs {
+public:
+ struct UpdateInfo {
+ UpdateInfo(const OID& anRid, const OpTime& aTs, long long aCfgver, long long aMemberId);
- OID rid;
- OpTime ts;
- long long cfgver;
- long long memberId;
- };
+ OID rid;
+ OpTime ts;
+ long long cfgver;
+ long long memberId;
+ };
- typedef std::vector<UpdateInfo>::const_iterator UpdateIterator;
+ typedef std::vector<UpdateInfo>::const_iterator UpdateIterator;
- /**
- * Initializes this UpdatePositionArgs from the contents of "argsObj".
- */
- Status initialize(const BSONObj& argsObj);
+ /**
+ * Initializes this UpdatePositionArgs from the contents of "argsObj".
+ */
+ Status initialize(const BSONObj& argsObj);
- /**
- * Gets a begin iterator over the UpdateInfos stored in this UpdatePositionArgs.
- */
- UpdateIterator updatesBegin() const { return _updates.begin(); }
+ /**
+ * Gets a begin iterator over the UpdateInfos stored in this UpdatePositionArgs.
+ */
+ UpdateIterator updatesBegin() const {
+ return _updates.begin();
+ }
- /**
- * Gets an end iterator over the UpdateInfos stored in this UpdatePositionArgs.
- */
- UpdateIterator updatesEnd() const { return _updates.end(); }
+ /**
+ * Gets an end iterator over the UpdateInfos stored in this UpdatePositionArgs.
+ */
+ UpdateIterator updatesEnd() const {
+ return _updates.end();
+ }
- /**
- * Returns a BSONified version of the object.
- * _updates is only included if it is not empty.
- */
- BSONObj toBSON() const;
- private:
- std::vector<UpdateInfo> _updates;
- };
+ /**
+ * Returns a BSONified version of the object.
+ * _updates is only included if it is not empty.
+ */
+ BSONObj toBSON() const;
+
+private:
+ std::vector<UpdateInfo> _updates;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/vote_requester.cpp b/src/mongo/db/repl/vote_requester.cpp
index 81691edf5d7..c23bd7d6916 100644
--- a/src/mongo/db/repl/vote_requester.cpp
+++ b/src/mongo/db/repl/vote_requester.cpp
@@ -42,128 +42,115 @@
namespace mongo {
namespace repl {
- VoteRequester::Algorithm::Algorithm(const ReplicaSetConfig& rsConfig,
- long long candidateId,
- long long term,
- bool dryRun,
- OpTime lastOplogEntry) :
- _rsConfig(rsConfig),
- _candidateId(candidateId),
- _term(term),
- _dryRun(dryRun),
- _lastOplogEntry(lastOplogEntry) {
-
- // populate targets with all voting members that aren't this node
- for (auto member = _rsConfig.membersBegin(); member != _rsConfig.membersEnd(); member++) {
- if (member->isVoter() && member->getId() != candidateId) {
- _targets.push_back(member->getHostAndPort());
- }
+VoteRequester::Algorithm::Algorithm(const ReplicaSetConfig& rsConfig,
+ long long candidateId,
+ long long term,
+ bool dryRun,
+ OpTime lastOplogEntry)
+ : _rsConfig(rsConfig),
+ _candidateId(candidateId),
+ _term(term),
+ _dryRun(dryRun),
+ _lastOplogEntry(lastOplogEntry) {
+ // populate targets with all voting members that aren't this node
+ for (auto member = _rsConfig.membersBegin(); member != _rsConfig.membersEnd(); member++) {
+ if (member->isVoter() && member->getId() != candidateId) {
+ _targets.push_back(member->getHostAndPort());
}
}
-
- VoteRequester::Algorithm::~Algorithm() {}
-
- std::vector<RemoteCommandRequest>
- VoteRequester::Algorithm::getRequests() const {
- BSONObjBuilder requestVotesCmdBuilder;
- requestVotesCmdBuilder.append("replSetRequestVotes", 1);
- requestVotesCmdBuilder.append("setName", _rsConfig.getReplSetName());
- requestVotesCmdBuilder.append("dryRun", _dryRun);
- requestVotesCmdBuilder.append("term", _term);
- requestVotesCmdBuilder.append("candidateId", _candidateId);
- requestVotesCmdBuilder.append("configVersion", _rsConfig.getConfigVersion());
-
- BSONObjBuilder lastCommittedOp(requestVotesCmdBuilder.subobjStart("lastCommittedOp"));
- lastCommittedOp.append("ts", _lastOplogEntry.getTimestamp());
- lastCommittedOp.append("term", _lastOplogEntry.getTerm());
- lastCommittedOp.done();
-
- const BSONObj requestVotesCmd = requestVotesCmdBuilder.obj();
-
- std::vector<RemoteCommandRequest> requests;
- for (const auto& target : _targets) {
- requests.push_back(RemoteCommandRequest(
- target,
- "admin",
- requestVotesCmd,
- Milliseconds(30*1000))); // trying to match current Socket timeout
- }
-
- return requests;
+}
+
+VoteRequester::Algorithm::~Algorithm() {}
+
+std::vector<RemoteCommandRequest> VoteRequester::Algorithm::getRequests() const {
+ BSONObjBuilder requestVotesCmdBuilder;
+ requestVotesCmdBuilder.append("replSetRequestVotes", 1);
+ requestVotesCmdBuilder.append("setName", _rsConfig.getReplSetName());
+ requestVotesCmdBuilder.append("dryRun", _dryRun);
+ requestVotesCmdBuilder.append("term", _term);
+ requestVotesCmdBuilder.append("candidateId", _candidateId);
+ requestVotesCmdBuilder.append("configVersion", _rsConfig.getConfigVersion());
+
+ BSONObjBuilder lastCommittedOp(requestVotesCmdBuilder.subobjStart("lastCommittedOp"));
+ lastCommittedOp.append("ts", _lastOplogEntry.getTimestamp());
+ lastCommittedOp.append("term", _lastOplogEntry.getTerm());
+ lastCommittedOp.done();
+
+ const BSONObj requestVotesCmd = requestVotesCmdBuilder.obj();
+
+ std::vector<RemoteCommandRequest> requests;
+ for (const auto& target : _targets) {
+ requests.push_back(RemoteCommandRequest(
+ target,
+ "admin",
+ requestVotesCmd,
+ Milliseconds(30 * 1000))); // trying to match current Socket timeout
}
- void VoteRequester::Algorithm::processResponse(
- const RemoteCommandRequest& request,
- const ResponseStatus& response) {
- _responsesProcessed++;
- if (!response.isOK()) { // failed response
- log() << "VoteRequester: Got failed response from " << request.target
- << ": " << response.getStatus();
- }
- else {
- ReplSetRequestVotesResponse voteResponse;
- voteResponse.initialize(response.getValue().data);
- if (voteResponse.getVoteGranted()) {
- _votes++;
- }
- else {
- log() << "VoteRequester: Got no vote from " << request.target
- << " because: " << voteResponse.getReason();
- }
-
- if (voteResponse.getTerm() > _term) {
- _staleTerm = true;
- }
+ return requests;
+}
+
+void VoteRequester::Algorithm::processResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response) {
+ _responsesProcessed++;
+ if (!response.isOK()) { // failed response
+ log() << "VoteRequester: Got failed response from " << request.target << ": "
+ << response.getStatus();
+ } else {
+ ReplSetRequestVotesResponse voteResponse;
+ voteResponse.initialize(response.getValue().data);
+ if (voteResponse.getVoteGranted()) {
+ _votes++;
+ } else {
+ log() << "VoteRequester: Got no vote from " << request.target
+ << " because: " << voteResponse.getReason();
}
- }
- bool VoteRequester::Algorithm::hasReceivedSufficientResponses() const {
- return _staleTerm ||
- _votes == _rsConfig.getMajorityVoteCount() ||
- _responsesProcessed == static_cast<int>(_targets.size());
- }
-
- VoteRequester::VoteRequestResult VoteRequester::Algorithm::getResult() const {
- if (_staleTerm) {
- return StaleTerm;
- }
- else if (_votes >= _rsConfig.getMajorityVoteCount()) {
- return SuccessfullyElected;
+ if (voteResponse.getTerm() > _term) {
+ _staleTerm = true;
}
- else {
- return InsufficientVotes;
- }
- }
-
- VoteRequester::VoteRequester() : _isCanceled(false) {}
- VoteRequester::~VoteRequester() {}
-
- StatusWith<ReplicationExecutor::EventHandle> VoteRequester::start(
- ReplicationExecutor* executor,
- const ReplicaSetConfig& rsConfig,
- long long candidateId,
- long long term,
- bool dryRun,
- OpTime lastOplogEntry,
- const stdx::function<void ()>& onCompletion) {
-
- _algorithm.reset(new Algorithm(rsConfig,
- candidateId,
- term,
- dryRun,
- lastOplogEntry));
- _runner.reset(new ScatterGatherRunner(_algorithm.get()));
- return _runner->start(executor, onCompletion);
}
-
- void VoteRequester::cancel(ReplicationExecutor* executor) {
- _isCanceled = true;
- _runner->cancel(executor);
+}
+
+bool VoteRequester::Algorithm::hasReceivedSufficientResponses() const {
+ return _staleTerm || _votes == _rsConfig.getMajorityVoteCount() ||
+ _responsesProcessed == static_cast<int>(_targets.size());
+}
+
+VoteRequester::VoteRequestResult VoteRequester::Algorithm::getResult() const {
+ if (_staleTerm) {
+ return StaleTerm;
+ } else if (_votes >= _rsConfig.getMajorityVoteCount()) {
+ return SuccessfullyElected;
+ } else {
+ return InsufficientVotes;
}
-
- VoteRequester::VoteRequestResult VoteRequester::getResult() const {
- return _algorithm->getResult();
- }
-
-} // namespace repl
-} // namespace mongo
+}
+
+VoteRequester::VoteRequester() : _isCanceled(false) {}
+VoteRequester::~VoteRequester() {}
+
+StatusWith<ReplicationExecutor::EventHandle> VoteRequester::start(
+ ReplicationExecutor* executor,
+ const ReplicaSetConfig& rsConfig,
+ long long candidateId,
+ long long term,
+ bool dryRun,
+ OpTime lastOplogEntry,
+ const stdx::function<void()>& onCompletion) {
+ _algorithm.reset(new Algorithm(rsConfig, candidateId, term, dryRun, lastOplogEntry));
+ _runner.reset(new ScatterGatherRunner(_algorithm.get()));
+ return _runner->start(executor, onCompletion);
+}
+
+void VoteRequester::cancel(ReplicationExecutor* executor) {
+ _isCanceled = true;
+ _runner->cancel(executor);
+}
+
+VoteRequester::VoteRequestResult VoteRequester::getResult() const {
+ return _algorithm->getResult();
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/vote_requester.h b/src/mongo/db/repl/vote_requester.h
index a5102192ebd..8000d12ddb8 100644
--- a/src/mongo/db/repl/vote_requester.h
+++ b/src/mongo/db/repl/vote_requester.h
@@ -41,92 +41,91 @@
namespace mongo {
- class Status;
+class Status;
namespace repl {
- class ScatterGatherRunner;
- class ReplSetDeclareRequestVotesArgs;
+class ScatterGatherRunner;
+class ReplSetDeclareRequestVotesArgs;
- class VoteRequester {
- MONGO_DISALLOW_COPYING(VoteRequester);
- public:
+class VoteRequester {
+ MONGO_DISALLOW_COPYING(VoteRequester);
- enum VoteRequestResult {
- SuccessfullyElected,
- StaleTerm,
- InsufficientVotes,
- };
-
- class Algorithm : public ScatterGatherAlgorithm {
- public:
- Algorithm(const ReplicaSetConfig& rsConfig,
- long long candidateId,
- long long term,
- bool dryRun,
- OpTime lastOplogEntry);
- virtual ~Algorithm();
- virtual std::vector<RemoteCommandRequest> getRequests() const;
- virtual void processResponse(
- const RemoteCommandRequest& request,
- const ResponseStatus& response);
- virtual bool hasReceivedSufficientResponses() const;
-
- /**
- * Returns a VoteRequestResult indicating the result of the election.
- *
- * It is invalid to call this before hasReceivedSufficientResponses returns true.
- */
- VoteRequestResult getResult() const;
-
- private:
- const ReplicaSetConfig _rsConfig;
- const long long _candidateId;
- const long long _term;
- bool _dryRun = false; // this bool indicates this is a mock election when true
- const OpTime _lastOplogEntry;
- std::vector<HostAndPort> _targets;
- bool _staleTerm = false;
- long long _responsesProcessed = 0;
- long long _votes = 1;
- };
-
- VoteRequester();
- virtual ~VoteRequester();
+public:
+ enum VoteRequestResult {
+ SuccessfullyElected,
+ StaleTerm,
+ InsufficientVotes,
+ };
- /**
- * Begins the process of sending replSetRequestVotes commands to all non-DOWN nodes
- * in currentConfig, in attempt to receive sufficient votes to win the election.
- *
- * evh can be used to schedule a callback when the process is complete.
- * This function must be run in the executor, as it must be synchronous with the command
- * callbacks that it schedules.
- * If this function returns Status::OK(), evh is then guaranteed to be signaled.
- **/
- StatusWith<ReplicationExecutor::EventHandle> start(
- ReplicationExecutor* executor,
- const ReplicaSetConfig& rsConfig,
- long long candidateId,
- long long term,
- bool dryRun,
- OpTime lastOplogEntry,
- const stdx::function<void ()>& onCompletion = stdx::function<void ()>());
+ class Algorithm : public ScatterGatherAlgorithm {
+ public:
+ Algorithm(const ReplicaSetConfig& rsConfig,
+ long long candidateId,
+ long long term,
+ bool dryRun,
+ OpTime lastOplogEntry);
+ virtual ~Algorithm();
+ virtual std::vector<RemoteCommandRequest> getRequests() const;
+ virtual void processResponse(const RemoteCommandRequest& request,
+ const ResponseStatus& response);
+ virtual bool hasReceivedSufficientResponses() const;
/**
- * Informs the VoteRequester to cancel further processing. The "executor"
- * argument must point to the same executor passed to "start()".
+ * Returns a VoteRequestResult indicating the result of the election.
*
- * Like start(), this method must run in the executor context.
+ * It is invalid to call this before hasReceivedSufficientResponses returns true.
*/
- void cancel(ReplicationExecutor* executor);
-
VoteRequestResult getResult() const;
private:
- std::unique_ptr<Algorithm> _algorithm;
- std::unique_ptr<ScatterGatherRunner> _runner;
- bool _isCanceled = false;
+ const ReplicaSetConfig _rsConfig;
+ const long long _candidateId;
+ const long long _term;
+ bool _dryRun = false; // this bool indicates this is a mock election when true
+ const OpTime _lastOplogEntry;
+ std::vector<HostAndPort> _targets;
+ bool _staleTerm = false;
+ long long _responsesProcessed = 0;
+ long long _votes = 1;
};
+ VoteRequester();
+ virtual ~VoteRequester();
+
+ /**
+ * Begins the process of sending replSetRequestVotes commands to all non-DOWN nodes
+ * in currentConfig, in attempt to receive sufficient votes to win the election.
+ *
+ * evh can be used to schedule a callback when the process is complete.
+ * This function must be run in the executor, as it must be synchronous with the command
+ * callbacks that it schedules.
+ * If this function returns Status::OK(), evh is then guaranteed to be signaled.
+ **/
+ StatusWith<ReplicationExecutor::EventHandle> start(
+ ReplicationExecutor* executor,
+ const ReplicaSetConfig& rsConfig,
+ long long candidateId,
+ long long term,
+ bool dryRun,
+ OpTime lastOplogEntry,
+ const stdx::function<void()>& onCompletion = stdx::function<void()>());
+
+ /**
+ * Informs the VoteRequester to cancel further processing. The "executor"
+ * argument must point to the same executor passed to "start()".
+ *
+ * Like start(), this method must run in the executor context.
+ */
+ void cancel(ReplicationExecutor* executor);
+
+ VoteRequestResult getResult() const;
+
+private:
+ std::unique_ptr<Algorithm> _algorithm;
+ std::unique_ptr<ScatterGatherRunner> _runner;
+ bool _isCanceled = false;
+};
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/vote_requester_test.cpp b/src/mongo/db/repl/vote_requester_test.cpp
index d5b9d9ce3d5..fb5fb8d757f 100644
--- a/src/mongo/db/repl/vote_requester_test.cpp
+++ b/src/mongo/db/repl/vote_requester_test.cpp
@@ -44,345 +44,348 @@ namespace mongo {
namespace repl {
namespace {
- using executor::NetworkInterfaceMock;
- using unittest::assertGet;
-
- using RemoteCommandRequest = RemoteCommandRequest;
-
- bool stringContains(const std::string &haystack, const std::string& needle) {
- return haystack.find(needle) != std::string::npos;
- }
-
-
- class VoteRequesterTest : public mongo::unittest::Test {
- public:
- virtual void setUp() {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host0") <<
- BSON("_id" << 1 << "host" << "host1") <<
- BSON("_id" << 2 << "host" << "host2") <<
- BSON("_id" << 3 << "host" << "host3" << "votes" << 0) <<
- BSON("_id" << 4 << "host" << "host4" << "votes" << 0)))));
- ASSERT_OK(config.validate());
- long long candidateId = 0;
- long long term = 2;
- OpTime lastOplogEntry = OpTime(Timestamp(999,0), 1);
-
- _requester.reset(new VoteRequester::Algorithm(config,
- candidateId,
- term,
- false, // not a dryRun
- lastOplogEntry));
- }
-
- virtual void tearDown() {
- _requester.reset(NULL);
- }
-
- protected:
- int64_t countLogLinesContaining(const std::string& needle) {
- return std::count_if(getCapturedLogMessages().begin(),
- getCapturedLogMessages().end(),
- stdx::bind(stringContains,
- stdx::placeholders::_1,
- needle));
- }
-
- bool hasReceivedSufficientResponses() {
- return _requester->hasReceivedSufficientResponses();
- }
-
- void processResponse(const RemoteCommandRequest& request, const ResponseStatus& response) {
- _requester->processResponse(request, response);
- }
-
- VoteRequester::VoteRequestResult getResult() {
- return _requester->getResult();
- }
-
- RemoteCommandRequest requestFrom(std::string hostname) {
- return RemoteCommandRequest(HostAndPort(hostname),
- "", // fields do not matter in VoteRequester
- BSONObj(),
- Milliseconds(0));
- }
-
- ResponseStatus badResponseStatus() {
- return ResponseStatus(ErrorCodes::NodeNotFound, "not on my watch");
- }
-
- ResponseStatus votedYes() {
- ReplSetRequestVotesResponse response;
- response.setOk(true);
- response.setVoteGranted(true);
- response.setTerm(1);
- return ResponseStatus(NetworkInterfaceMock::Response(response.toBSON(),
- Milliseconds(10)));
- }
-
- ResponseStatus votedNoBecauseConfigVersionDoesNotMatch() {
- ReplSetRequestVotesResponse response;
- response.setOk(true);
- response.setVoteGranted(false);
- response.setTerm(1);
- response.setReason("candidate's config version differs from mine");
- return ResponseStatus(NetworkInterfaceMock::Response(response.toBSON(),
- Milliseconds(10)));
- }
-
- ResponseStatus votedNoBecauseSetNameDiffers() {
- ReplSetRequestVotesResponse response;
- response.setOk(true);
- response.setVoteGranted(false);
- response.setTerm(1);
- response.setReason("candidate's set name differs from mine");
- return ResponseStatus(NetworkInterfaceMock::Response(response.toBSON(),
- Milliseconds(10)));
- }
-
- ResponseStatus votedNoBecauseLastOpTimeIsGreater() {
- ReplSetRequestVotesResponse response;
- response.setOk(true);
- response.setVoteGranted(false);
- response.setTerm(1);
- response.setReason("candidate's data is staler than mine");
- return ResponseStatus(NetworkInterfaceMock::Response(response.toBSON(),
- Milliseconds(10)));
- }
-
- ResponseStatus votedNoBecauseTermIsGreater() {
- ReplSetRequestVotesResponse response;
- response.setOk(true);
- response.setVoteGranted(false);
- response.setTerm(3);
- response.setReason("candidate's term is lower than mine");
- return ResponseStatus(NetworkInterfaceMock::Response(response.toBSON(),
- Milliseconds(10)));
- }
-
- ResponseStatus votedNoBecauseAlreadyVoted() {
- ReplSetRequestVotesResponse response;
- response.setOk(true);
- response.setVoteGranted(false);
- response.setTerm(2);
- response.setReason("already voted for another candidate this term");
- return ResponseStatus(NetworkInterfaceMock::Response(response.toBSON(),
- Milliseconds(10)));
- }
-
- std::unique_ptr<VoteRequester::Algorithm> _requester;
- };
-
- class VoteRequesterDryRunTest : public VoteRequesterTest {
- public:
- virtual void setUp() {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host0") <<
- BSON("_id" << 1 << "host" << "host1") <<
- BSON("_id" << 2 << "host" << "host2") <<
- BSON("_id" << 3 << "host" << "host3" << "votes" << 0) <<
- BSON("_id" << 4 << "host" << "host4" << "votes" << 0)))));
- ASSERT_OK(config.validate());
- long long candidateId = 0;
- long long term = 2;
- OpTime lastOplogEntry = OpTime(Timestamp(999,0), 1);
-
- _requester.reset(new VoteRequester::Algorithm(config,
- candidateId,
- term,
- true, // dryRun
- lastOplogEntry));
- }
-
- };
-
- TEST_F(VoteRequesterTest, ImmediateGoodResponseWinElection) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedYes());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
- }
-
- TEST_F(VoteRequesterTest, BadConfigVersionWinElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedNoBecauseConfigVersionDoesNotMatch());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
- processResponse(requestFrom("host2"), votedYes());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
- stopCapturingLogMessages();
+using executor::NetworkInterfaceMock;
+using unittest::assertGet;
+
+using RemoteCommandRequest = RemoteCommandRequest;
+
+bool stringContains(const std::string& haystack, const std::string& needle) {
+ return haystack.find(needle) != std::string::npos;
+}
+
+
+class VoteRequesterTest : public mongo::unittest::Test {
+public:
+ virtual void setUp() {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0")
+ << BSON("_id" << 1 << "host"
+ << "host1")
+ << BSON("_id" << 2 << "host"
+ << "host2")
+ << BSON("_id" << 3 << "host"
+ << "host3"
+ << "votes" << 0)
+ << BSON("_id" << 4 << "host"
+ << "host4"
+ << "votes" << 0)))));
+ ASSERT_OK(config.validate());
+ long long candidateId = 0;
+ long long term = 2;
+ OpTime lastOplogEntry = OpTime(Timestamp(999, 0), 1);
+
+ _requester.reset(new VoteRequester::Algorithm(config,
+ candidateId,
+ term,
+ false, // not a dryRun
+ lastOplogEntry));
}
- TEST_F(VoteRequesterTest, SetNameDiffersWinElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedNoBecauseSetNameDiffers());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
- processResponse(requestFrom("host2"), votedYes());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
- stopCapturingLogMessages();
+ virtual void tearDown() {
+ _requester.reset(NULL);
}
- TEST_F(VoteRequesterTest, LastOpTimeIsGreaterWinElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedNoBecauseLastOpTimeIsGreater());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
- processResponse(requestFrom("host2"), votedYes());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
- stopCapturingLogMessages();
+protected:
+ int64_t countLogLinesContaining(const std::string& needle) {
+ return std::count_if(getCapturedLogMessages().begin(),
+ getCapturedLogMessages().end(),
+ stdx::bind(stringContains, stdx::placeholders::_1, needle));
}
- TEST_F(VoteRequesterTest, FailedToContactWinElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), badResponseStatus());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host1"));
- processResponse(requestFrom("host2"), votedYes());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
- stopCapturingLogMessages();
+ bool hasReceivedSufficientResponses() {
+ return _requester->hasReceivedSufficientResponses();
}
- TEST_F(VoteRequesterTest, AlreadyVotedWinElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedNoBecauseAlreadyVoted());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
- processResponse(requestFrom("host2"), votedYes());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
- stopCapturingLogMessages();
+ void processResponse(const RemoteCommandRequest& request, const ResponseStatus& response) {
+ _requester->processResponse(request, response);
}
- TEST_F(VoteRequesterTest, StaleTermLoseElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedNoBecauseTermIsGreater());
- ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::StaleTerm, getResult());
- stopCapturingLogMessages();
+ VoteRequester::VoteRequestResult getResult() {
+ return _requester->getResult();
}
- TEST_F(VoteRequesterTest, NotEnoughVotesLoseElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedNoBecauseSetNameDiffers());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
- processResponse(requestFrom("host2"), badResponseStatus());
- ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host2"));
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::InsufficientVotes, getResult());
- stopCapturingLogMessages();
+ RemoteCommandRequest requestFrom(std::string hostname) {
+ return RemoteCommandRequest(HostAndPort(hostname),
+ "", // fields do not matter in VoteRequester
+ BSONObj(),
+ Milliseconds(0));
}
- TEST_F(VoteRequesterDryRunTest, ImmediateGoodResponseWinElection) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedYes());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
+ ResponseStatus badResponseStatus() {
+ return ResponseStatus(ErrorCodes::NodeNotFound, "not on my watch");
}
- TEST_F(VoteRequesterDryRunTest, BadConfigVersionWinElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedNoBecauseConfigVersionDoesNotMatch());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
- processResponse(requestFrom("host2"), votedYes());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
- stopCapturingLogMessages();
+ ResponseStatus votedYes() {
+ ReplSetRequestVotesResponse response;
+ response.setOk(true);
+ response.setVoteGranted(true);
+ response.setTerm(1);
+ return ResponseStatus(NetworkInterfaceMock::Response(response.toBSON(), Milliseconds(10)));
}
- TEST_F(VoteRequesterDryRunTest, SetNameDiffersWinElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedNoBecauseSetNameDiffers());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
- processResponse(requestFrom("host2"), votedYes());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
- stopCapturingLogMessages();
+ ResponseStatus votedNoBecauseConfigVersionDoesNotMatch() {
+ ReplSetRequestVotesResponse response;
+ response.setOk(true);
+ response.setVoteGranted(false);
+ response.setTerm(1);
+ response.setReason("candidate's config version differs from mine");
+ return ResponseStatus(NetworkInterfaceMock::Response(response.toBSON(), Milliseconds(10)));
}
- TEST_F(VoteRequesterDryRunTest, LastOpTimeIsGreaterWinElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedNoBecauseLastOpTimeIsGreater());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
- processResponse(requestFrom("host2"), votedYes());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
- stopCapturingLogMessages();
+ ResponseStatus votedNoBecauseSetNameDiffers() {
+ ReplSetRequestVotesResponse response;
+ response.setOk(true);
+ response.setVoteGranted(false);
+ response.setTerm(1);
+ response.setReason("candidate's set name differs from mine");
+ return ResponseStatus(NetworkInterfaceMock::Response(response.toBSON(), Milliseconds(10)));
}
- TEST_F(VoteRequesterDryRunTest, FailedToContactWinElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), badResponseStatus());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host1"));
- processResponse(requestFrom("host2"), votedYes());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
- stopCapturingLogMessages();
+ ResponseStatus votedNoBecauseLastOpTimeIsGreater() {
+ ReplSetRequestVotesResponse response;
+ response.setOk(true);
+ response.setVoteGranted(false);
+ response.setTerm(1);
+ response.setReason("candidate's data is staler than mine");
+ return ResponseStatus(NetworkInterfaceMock::Response(response.toBSON(), Milliseconds(10)));
}
- TEST_F(VoteRequesterDryRunTest, AlreadyVotedWinElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedNoBecauseAlreadyVoted());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
- processResponse(requestFrom("host2"), votedYes());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
- stopCapturingLogMessages();
+ ResponseStatus votedNoBecauseTermIsGreater() {
+ ReplSetRequestVotesResponse response;
+ response.setOk(true);
+ response.setVoteGranted(false);
+ response.setTerm(3);
+ response.setReason("candidate's term is lower than mine");
+ return ResponseStatus(NetworkInterfaceMock::Response(response.toBSON(), Milliseconds(10)));
}
- TEST_F(VoteRequesterDryRunTest, StaleTermLoseElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedNoBecauseTermIsGreater());
- ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::StaleTerm, getResult());
- stopCapturingLogMessages();
+ ResponseStatus votedNoBecauseAlreadyVoted() {
+ ReplSetRequestVotesResponse response;
+ response.setOk(true);
+ response.setVoteGranted(false);
+ response.setTerm(2);
+ response.setReason("already voted for another candidate this term");
+ return ResponseStatus(NetworkInterfaceMock::Response(response.toBSON(), Milliseconds(10)));
}
- TEST_F(VoteRequesterDryRunTest, NotEnoughVotesLoseElection) {
- startCapturingLogMessages();
- ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host1"), votedNoBecauseSetNameDiffers());
- ASSERT_FALSE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
- processResponse(requestFrom("host2"), badResponseStatus());
- ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host2"));
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(VoteRequester::InsufficientVotes, getResult());
- stopCapturingLogMessages();
+ std::unique_ptr<VoteRequester::Algorithm> _requester;
+};
+
+class VoteRequesterDryRunTest : public VoteRequesterTest {
+public:
+ virtual void setUp() {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0")
+ << BSON("_id" << 1 << "host"
+ << "host1")
+ << BSON("_id" << 2 << "host"
+ << "host2")
+ << BSON("_id" << 3 << "host"
+ << "host3"
+ << "votes" << 0)
+ << BSON("_id" << 4 << "host"
+ << "host4"
+ << "votes" << 0)))));
+ ASSERT_OK(config.validate());
+ long long candidateId = 0;
+ long long term = 2;
+ OpTime lastOplogEntry = OpTime(Timestamp(999, 0), 1);
+
+ _requester.reset(new VoteRequester::Algorithm(config,
+ candidateId,
+ term,
+ true, // dryRun
+ lastOplogEntry));
}
+};
+
+TEST_F(VoteRequesterTest, ImmediateGoodResponseWinElection) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedYes());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
+}
+
+TEST_F(VoteRequesterTest, BadConfigVersionWinElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedNoBecauseConfigVersionDoesNotMatch());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
+ processResponse(requestFrom("host2"), votedYes());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
+ stopCapturingLogMessages();
+}
+
+TEST_F(VoteRequesterTest, SetNameDiffersWinElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedNoBecauseSetNameDiffers());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
+ processResponse(requestFrom("host2"), votedYes());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
+ stopCapturingLogMessages();
+}
+
+TEST_F(VoteRequesterTest, LastOpTimeIsGreaterWinElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedNoBecauseLastOpTimeIsGreater());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
+ processResponse(requestFrom("host2"), votedYes());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
+ stopCapturingLogMessages();
+}
+
+TEST_F(VoteRequesterTest, FailedToContactWinElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), badResponseStatus());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host1"));
+ processResponse(requestFrom("host2"), votedYes());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
+ stopCapturingLogMessages();
+}
+
+TEST_F(VoteRequesterTest, AlreadyVotedWinElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedNoBecauseAlreadyVoted());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
+ processResponse(requestFrom("host2"), votedYes());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
+ stopCapturingLogMessages();
+}
+
+TEST_F(VoteRequesterTest, StaleTermLoseElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedNoBecauseTermIsGreater());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::StaleTerm, getResult());
+ stopCapturingLogMessages();
+}
+
+TEST_F(VoteRequesterTest, NotEnoughVotesLoseElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedNoBecauseSetNameDiffers());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
+ processResponse(requestFrom("host2"), badResponseStatus());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host2"));
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::InsufficientVotes, getResult());
+ stopCapturingLogMessages();
+}
+
+TEST_F(VoteRequesterDryRunTest, ImmediateGoodResponseWinElection) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedYes());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
+}
+
+TEST_F(VoteRequesterDryRunTest, BadConfigVersionWinElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedNoBecauseConfigVersionDoesNotMatch());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
+ processResponse(requestFrom("host2"), votedYes());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
+ stopCapturingLogMessages();
+}
+
+TEST_F(VoteRequesterDryRunTest, SetNameDiffersWinElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedNoBecauseSetNameDiffers());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
+ processResponse(requestFrom("host2"), votedYes());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
+ stopCapturingLogMessages();
+}
+
+TEST_F(VoteRequesterDryRunTest, LastOpTimeIsGreaterWinElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedNoBecauseLastOpTimeIsGreater());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
+ processResponse(requestFrom("host2"), votedYes());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
+ stopCapturingLogMessages();
+}
+
+TEST_F(VoteRequesterDryRunTest, FailedToContactWinElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), badResponseStatus());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host1"));
+ processResponse(requestFrom("host2"), votedYes());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
+ stopCapturingLogMessages();
+}
+
+TEST_F(VoteRequesterDryRunTest, AlreadyVotedWinElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedNoBecauseAlreadyVoted());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
+ processResponse(requestFrom("host2"), votedYes());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::SuccessfullyElected, getResult());
+ stopCapturingLogMessages();
+}
+
+TEST_F(VoteRequesterDryRunTest, StaleTermLoseElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedNoBecauseTermIsGreater());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::StaleTerm, getResult());
+ stopCapturingLogMessages();
+}
+
+TEST_F(VoteRequesterDryRunTest, NotEnoughVotesLoseElection) {
+ startCapturingLogMessages();
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), votedNoBecauseSetNameDiffers());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got no vote from host1"));
+ processResponse(requestFrom("host2"), badResponseStatus());
+ ASSERT_EQUALS(1, countLogLinesContaining("Got failed response from host2"));
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(VoteRequester::InsufficientVotes, getResult());
+ stopCapturingLogMessages();
+}
} // namespace
} // namespace repl