summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl
diff options
context:
space:
mode:
authorMark Benvenuto <mark.benvenuto@mongodb.com>2015-07-28 18:16:39 -0400
committerMark Benvenuto <mark.benvenuto@mongodb.com>2015-07-28 18:27:27 -0400
commitb66e993f1c742518d9b5e93b0d8a5f8255a4127c (patch)
tree55e6fed05333d2d37f34586726a342ed7f7dbc29 /src/mongo/db/repl
parent314a22e93f283ab80e650618cbd3ed8babb8510f (diff)
downloadmongo-b66e993f1c742518d9b5e93b0d8a5f8255a4127c.tar.gz
SERVER-18579: Clang-Format - reformat code, no comment reflow
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r--src/mongo/db/repl/bgsync.cpp806
-rw-r--r--src/mongo/db/repl/bgsync.h218
-rw-r--r--src/mongo/db/repl/check_quorum_for_config_change.cpp410
-rw-r--r--src/mongo/db/repl/check_quorum_for_config_change.h201
-rw-r--r--src/mongo/db/repl/check_quorum_for_config_change_test.cpp1436
-rw-r--r--src/mongo/db/repl/elect_cmd_runner.cpp204
-rw-r--r--src/mongo/db/repl/elect_cmd_runner.h151
-rw-r--r--src/mongo/db/repl/elect_cmd_runner_test.cpp665
-rw-r--r--src/mongo/db/repl/freshness_checker.cpp331
-rw-r--r--src/mongo/db/repl/freshness_checker.h225
-rw-r--r--src/mongo/db/repl/freshness_checker_test.cpp1820
-rw-r--r--src/mongo/db/repl/handshake_args.cpp118
-rw-r--r--src/mongo/db/repl/handshake_args.h116
-rw-r--r--src/mongo/db/repl/heartbeat_response_action.cpp63
-rw-r--r--src/mongo/db/repl/heartbeat_response_action.h158
-rw-r--r--src/mongo/db/repl/initial_sync.cpp33
-rw-r--r--src/mongo/db/repl/initial_sync.h28
-rw-r--r--src/mongo/db/repl/is_master_response.cpp714
-rw-r--r--src/mongo/db/repl/is_master_response.h293
-rw-r--r--src/mongo/db/repl/isself.cpp423
-rw-r--r--src/mongo/db/repl/isself.h48
-rw-r--r--src/mongo/db/repl/isself_test.cpp58
-rw-r--r--src/mongo/db/repl/master_slave.cpp2181
-rw-r--r--src/mongo/db/repl/master_slave.h283
-rw-r--r--src/mongo/db/repl/member_config.cpp481
-rw-r--r--src/mongo/db/repl/member_config.h291
-rw-r--r--src/mongo/db/repl/member_config_test.cpp729
-rw-r--r--src/mongo/db/repl/member_heartbeat_data.cpp124
-rw-r--r--src/mongo/db/repl/member_heartbeat_data.h172
-rw-r--r--src/mongo/db/repl/member_state.h140
-rw-r--r--src/mongo/db/repl/minvalid.cpp96
-rw-r--r--src/mongo/db/repl/minvalid.h58
-rw-r--r--src/mongo/db/repl/multicmd.cpp19
-rw-r--r--src/mongo/db/repl/multicmd.h75
-rw-r--r--src/mongo/db/repl/network_interface_impl.cpp997
-rw-r--r--src/mongo/db/repl/network_interface_impl.h247
-rw-r--r--src/mongo/db/repl/network_interface_mock.cpp675
-rw-r--r--src/mongo/db/repl/network_interface_mock.h561
-rw-r--r--src/mongo/db/repl/operation_context_repl_mock.cpp5
-rw-r--r--src/mongo/db/repl/operation_context_repl_mock.h30
-rw-r--r--src/mongo/db/repl/oplog.cpp1211
-rw-r--r--src/mongo/db/repl/oplog.h180
-rw-r--r--src/mongo/db/repl/oplogreader.cpp280
-rw-r--r--src/mongo/db/repl/oplogreader.h233
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_args.cpp245
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_args.h210
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_response.cpp563
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_response.h382
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_response_test.cpp1443
-rw-r--r--src/mongo/db/repl/repl_set_html_summary.cpp315
-rw-r--r--src/mongo/db/repl/repl_set_html_summary.h121
-rw-r--r--src/mongo/db/repl/repl_set_seed_list.cpp91
-rw-r--r--src/mongo/db/repl/repl_set_seed_list.h48
-rw-r--r--src/mongo/db/repl/repl_settings.cpp13
-rw-r--r--src/mongo/db/repl/repl_settings.h171
-rw-r--r--src/mongo/db/repl/replica_set_config.cpp866
-rw-r--r--src/mongo/db/repl/replica_set_config.h424
-rw-r--r--src/mongo/db/repl/replica_set_config_checks.cpp409
-rw-r--r--src/mongo/db/repl/replica_set_config_checks.h100
-rw-r--r--src/mongo/db/repl/replica_set_config_checks_test.cpp1327
-rw-r--r--src/mongo/db/repl/replica_set_config_test.cpp2046
-rw-r--r--src/mongo/db/repl/replica_set_tag.cpp374
-rw-r--r--src/mongo/db/repl/replica_set_tag.h481
-rw-r--r--src/mongo/db/repl/replica_set_tag_test.cpp238
-rw-r--r--src/mongo/db/repl/replication_coordinator.cpp12
-rw-r--r--src/mongo/db/repl/replication_coordinator.h1012
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state.cpp8
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state.h275
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp395
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.h94
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.cpp184
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.h135
-rw-r--r--src/mongo/db/repl/replication_coordinator_global.cpp20
-rw-r--r--src/mongo/db/repl/replication_coordinator_global.h8
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp4078
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h1491
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect.cpp397
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp683
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp731
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat_test.cpp392
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp882
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp4248
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.cpp511
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.h178
-rw-r--r--src/mongo/db/repl/replication_coordinator_test_fixture.cpp383
-rw-r--r--src/mongo/db/repl/replication_coordinator_test_fixture.h300
-rw-r--r--src/mongo/db/repl/replication_executor.cpp927
-rw-r--r--src/mongo/db/repl/replication_executor.h1134
-rw-r--r--src/mongo/db/repl/replication_executor_test.cpp865
-rw-r--r--src/mongo/db/repl/replication_info.cpp325
-rw-r--r--src/mongo/db/repl/replset_commands.cpp1251
-rw-r--r--src/mongo/db/repl/replset_commands.h38
-rw-r--r--src/mongo/db/repl/replset_web_handler.cpp85
-rw-r--r--src/mongo/db/repl/resync.cpp166
-rw-r--r--src/mongo/db/repl/rs_base.h96
-rw-r--r--src/mongo/db/repl/rs_exception.h24
-rw-r--r--src/mongo/db/repl/rs_initialsync.cpp877
-rw-r--r--src/mongo/db/repl/rs_initialsync.h10
-rw-r--r--src/mongo/db/repl/rs_rollback.cpp1309
-rw-r--r--src/mongo/db/repl/rs_rollback.h59
-rw-r--r--src/mongo/db/repl/rs_sync.cpp142
-rw-r--r--src/mongo/db/repl/rs_sync.h8
-rw-r--r--src/mongo/db/repl/rslog.cpp14
-rw-r--r--src/mongo/db/repl/rslog.h14
-rw-r--r--src/mongo/db/repl/scatter_gather_algorithm.cpp2
-rw-r--r--src/mongo/db/repl/scatter_gather_algorithm.h66
-rw-r--r--src/mongo/db/repl/scatter_gather_runner.cpp188
-rw-r--r--src/mongo/db/repl/scatter_gather_runner.h154
-rw-r--r--src/mongo/db/repl/scatter_gather_test.cpp609
-rw-r--r--src/mongo/db/repl/scoped_conn.cpp65
-rw-r--r--src/mongo/db/repl/scoped_conn.h204
-rw-r--r--src/mongo/db/repl/server.h58
-rw-r--r--src/mongo/db/repl/sync.cpp180
-rw-r--r--src/mongo/db/repl/sync.h39
-rw-r--r--src/mongo/db/repl/sync_source_feedback.cpp435
-rw-r--r--src/mongo/db/repl/sync_source_feedback.h150
-rw-r--r--src/mongo/db/repl/sync_tail.cpp994
-rw-r--r--src/mongo/db/repl/sync_tail.h182
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp24
-rw-r--r--src/mongo/db/repl/topology_coordinator.h693
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.cpp3406
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.h692
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl_test.cpp8211
-rw-r--r--src/mongo/db/repl/update_position_args.cpp169
-rw-r--r--src/mongo/db/repl/update_position_args.h77
125 files changed, 33931 insertions, 33882 deletions
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp
index 49ba4ac5668..f1a2b36ef33 100644
--- a/src/mongo/db/repl/bgsync.cpp
+++ b/src/mongo/db/repl/bgsync.cpp
@@ -52,503 +52,493 @@
namespace mongo {
- using std::string;
+using std::string;
namespace repl {
namespace {
- const char hashFieldName[] = "h";
- int SleepToAllowBatchingMillis = 2;
- const int BatchIsSmallish = 40000; // bytes
-} // namespace
-
- MONGO_FP_DECLARE(rsBgSyncProduce);
- MONGO_FP_DECLARE(stepDownWhileDrainingFailPoint);
-
- BackgroundSync* BackgroundSync::s_instance = 0;
- boost::mutex BackgroundSync::s_mutex;
-
- //The number and time spent reading batches off the network
- static TimerStats getmoreReplStats;
- static ServerStatusMetricField<TimerStats> displayBatchesRecieved(
- "repl.network.getmores",
- &getmoreReplStats );
- //The oplog entries read via the oplog reader
- static Counter64 opsReadStats;
- static ServerStatusMetricField<Counter64> displayOpsRead( "repl.network.ops",
- &opsReadStats );
- //The bytes read via the oplog reader
- static Counter64 networkByteStats;
- static ServerStatusMetricField<Counter64> displayBytesRead( "repl.network.bytes",
- &networkByteStats );
-
- //The count of items in the buffer
- static Counter64 bufferCountGauge;
- static ServerStatusMetricField<Counter64> displayBufferCount( "repl.buffer.count",
- &bufferCountGauge );
- //The size (bytes) of items in the buffer
- static Counter64 bufferSizeGauge;
- static ServerStatusMetricField<Counter64> displayBufferSize( "repl.buffer.sizeBytes",
- &bufferSizeGauge );
- //The max size (bytes) of the buffer
- static int bufferMaxSizeGauge = 256*1024*1024;
- static ServerStatusMetricField<int> displayBufferMaxSize( "repl.buffer.maxSizeBytes",
- &bufferMaxSizeGauge );
-
-
- BackgroundSyncInterface::~BackgroundSyncInterface() {}
-
- size_t getSize(const BSONObj& o) {
- // SERVER-9808 Avoid Fortify complaint about implicit signed->unsigned conversion
- return static_cast<size_t>(o.objsize());
+const char hashFieldName[] = "h";
+int SleepToAllowBatchingMillis = 2;
+const int BatchIsSmallish = 40000; // bytes
+} // namespace
+
+MONGO_FP_DECLARE(rsBgSyncProduce);
+MONGO_FP_DECLARE(stepDownWhileDrainingFailPoint);
+
+BackgroundSync* BackgroundSync::s_instance = 0;
+boost::mutex BackgroundSync::s_mutex;
+
+// The number and time spent reading batches off the network
+static TimerStats getmoreReplStats;
+static ServerStatusMetricField<TimerStats> displayBatchesRecieved("repl.network.getmores",
+ &getmoreReplStats);
+// The oplog entries read via the oplog reader
+static Counter64 opsReadStats;
+static ServerStatusMetricField<Counter64> displayOpsRead("repl.network.ops", &opsReadStats);
+// The bytes read via the oplog reader
+static Counter64 networkByteStats;
+static ServerStatusMetricField<Counter64> displayBytesRead("repl.network.bytes", &networkByteStats);
+
+// The count of items in the buffer
+static Counter64 bufferCountGauge;
+static ServerStatusMetricField<Counter64> displayBufferCount("repl.buffer.count",
+ &bufferCountGauge);
+// The size (bytes) of items in the buffer
+static Counter64 bufferSizeGauge;
+static ServerStatusMetricField<Counter64> displayBufferSize("repl.buffer.sizeBytes",
+ &bufferSizeGauge);
+// The max size (bytes) of the buffer
+static int bufferMaxSizeGauge = 256 * 1024 * 1024;
+static ServerStatusMetricField<int> displayBufferMaxSize("repl.buffer.maxSizeBytes",
+ &bufferMaxSizeGauge);
+
+
+BackgroundSyncInterface::~BackgroundSyncInterface() {}
+
+size_t getSize(const BSONObj& o) {
+ // SERVER-9808 Avoid Fortify complaint about implicit signed->unsigned conversion
+ return static_cast<size_t>(o.objsize());
+}
+
+BackgroundSync::BackgroundSync()
+ : _buffer(bufferMaxSizeGauge, &getSize),
+ _lastOpTimeFetched(std::numeric_limits<int>::max(), 0),
+ _lastAppliedHash(0),
+ _lastFetchedHash(0),
+ _pause(true),
+ _appliedBuffer(true),
+ _replCoord(getGlobalReplicationCoordinator()),
+ _initialSyncRequestedFlag(false),
+ _indexPrefetchConfig(PREFETCH_ALL) {}
+
+BackgroundSync* BackgroundSync::get() {
+ boost::unique_lock<boost::mutex> lock(s_mutex);
+ if (s_instance == NULL && !inShutdown()) {
+ s_instance = new BackgroundSync();
}
+ return s_instance;
+}
- BackgroundSync::BackgroundSync() : _buffer(bufferMaxSizeGauge, &getSize),
- _lastOpTimeFetched(std::numeric_limits<int>::max(),
- 0),
- _lastAppliedHash(0),
- _lastFetchedHash(0),
- _pause(true),
- _appliedBuffer(true),
- _replCoord(getGlobalReplicationCoordinator()),
- _initialSyncRequestedFlag(false),
- _indexPrefetchConfig(PREFETCH_ALL) {
- }
+void BackgroundSync::shutdown() {
+ boost::lock_guard<boost::mutex> lock(_mutex);
- BackgroundSync* BackgroundSync::get() {
- boost::unique_lock<boost::mutex> lock(s_mutex);
- if (s_instance == NULL && !inShutdown()) {
- s_instance = new BackgroundSync();
- }
- return s_instance;
- }
+ // Clear the buffer in case the producerThread is waiting in push() due to a full queue.
+ invariant(inShutdown());
+ _buffer.clear();
+ _pause = true;
- void BackgroundSync::shutdown() {
- boost::lock_guard<boost::mutex> lock(_mutex);
+ // Wake up producerThread so it notices that we're in shutdown
+ _appliedBufferCondition.notify_all();
+ _pausedCondition.notify_all();
+}
- // Clear the buffer in case the producerThread is waiting in push() due to a full queue.
- invariant(inShutdown());
- _buffer.clear();
- _pause = true;
+void BackgroundSync::notify(OperationContext* txn) {
+ boost::lock_guard<boost::mutex> lock(_mutex);
- // Wake up producerThread so it notices that we're in shutdown
+ // If all ops in the buffer have been applied, unblock waitForRepl (if it's waiting)
+ if (_buffer.empty()) {
+ _appliedBuffer = true;
_appliedBufferCondition.notify_all();
- _pausedCondition.notify_all();
}
+}
- void BackgroundSync::notify(OperationContext* txn) {
- boost::lock_guard<boost::mutex> lock(_mutex);
+void BackgroundSync::producerThread() {
+ Client::initThread("rsBackgroundSync");
+ cc().getAuthorizationSession()->grantInternalAuthorization();
- // If all ops in the buffer have been applied, unblock waitForRepl (if it's waiting)
- if (_buffer.empty()) {
- _appliedBuffer = true;
- _appliedBufferCondition.notify_all();
+ while (!inShutdown()) {
+ try {
+ _producerThread();
+ } catch (const DBException& e) {
+ std::string msg(str::stream() << "sync producer problem: " << e.toString());
+ error() << msg;
+ _replCoord->setMyHeartbeatMessage(msg);
+ } catch (const std::exception& e2) {
+ severe() << "sync producer exception: " << e2.what();
+ fassertFailed(28546);
}
}
- void BackgroundSync::producerThread() {
- Client::initThread("rsBackgroundSync");
- cc().getAuthorizationSession()->grantInternalAuthorization();
+ cc().shutdown();
+}
- while (!inShutdown()) {
- try {
- _producerThread();
- }
- catch (const DBException& e) {
- std::string msg(str::stream() << "sync producer problem: " << e.toString());
- error() << msg;
- _replCoord->setMyHeartbeatMessage(msg);
- }
- catch (const std::exception& e2) {
- severe() << "sync producer exception: " << e2.what();
- fassertFailed(28546);
- }
+void BackgroundSync::_producerThread() {
+ const MemberState state = _replCoord->getMemberState();
+ // we want to pause when the state changes to primary
+ if (_replCoord->isWaitingForApplierToDrain() || state.primary()) {
+ if (!_pause) {
+ stop();
}
+ sleepsecs(1);
+ return;
+ }
- cc().shutdown();
+ // TODO(spencer): Use a condition variable to await loading a config.
+ if (state.startup()) {
+ // Wait for a config to be loaded
+ sleepsecs(1);
+ return;
}
- void BackgroundSync::_producerThread() {
- const MemberState state = _replCoord->getMemberState();
- // we want to pause when the state changes to primary
- if (_replCoord->isWaitingForApplierToDrain() || state.primary()) {
- if (!_pause) {
- stop();
- }
- sleepsecs(1);
- return;
- }
+ OperationContextImpl txn;
- // TODO(spencer): Use a condition variable to await loading a config.
- if (state.startup()) {
- // Wait for a config to be loaded
- sleepsecs(1);
- return;
- }
+ // We need to wait until initial sync has started.
+ if (_replCoord->getMyLastOptime().isNull()) {
+ sleepsecs(1);
+ return;
+ }
+ // we want to unpause when we're no longer primary
+ // start() also loads _lastOpTimeFetched, which we know is set from the "if"
+ else if (_pause) {
+ start(&txn);
+ }
- OperationContextImpl txn;
+ produce(&txn);
+}
- // We need to wait until initial sync has started.
- if (_replCoord->getMyLastOptime().isNull()) {
+void BackgroundSync::produce(OperationContext* txn) {
+ // this oplog reader does not do a handshake because we don't want the server it's syncing
+ // from to track how far it has synced
+ {
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ if (_lastOpTimeFetched.isNull()) {
+ // then we're initial syncing and we're still waiting for this to be set
+ lock.unlock();
sleepsecs(1);
+ // if there is no one to sync from
return;
}
- // we want to unpause when we're no longer primary
- // start() also loads _lastOpTimeFetched, which we know is set from the "if"
- else if (_pause) {
- start(&txn);
- }
-
- produce(&txn);
- }
-
- void BackgroundSync::produce(OperationContext* txn) {
- // this oplog reader does not do a handshake because we don't want the server it's syncing
- // from to track how far it has synced
- {
- boost::unique_lock<boost::mutex> lock(_mutex);
- if (_lastOpTimeFetched.isNull()) {
- // then we're initial syncing and we're still waiting for this to be set
- lock.unlock();
- sleepsecs(1);
- // if there is no one to sync from
- return;
- }
-
- if (_replCoord->isWaitingForApplierToDrain() ||
- _replCoord->getMemberState().primary() ||
- inShutdownStrict()) {
- return;
- }
- }
-
- while (MONGO_FAIL_POINT(rsBgSyncProduce)) {
- sleepmillis(0);
- }
-
- // find a target to sync from the last optime fetched
- OpTime lastOpTimeFetched;
- {
- boost::unique_lock<boost::mutex> lock(_mutex);
- lastOpTimeFetched = _lastOpTimeFetched;
- _syncSourceHost = HostAndPort();
+ if (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary() ||
+ inShutdownStrict()) {
+ return;
}
- _syncSourceReader.resetConnection();
- _syncSourceReader.connectToSyncSource(txn, lastOpTimeFetched, _replCoord);
+ }
- {
- boost::unique_lock<boost::mutex> lock(_mutex);
- // no server found
- if (_syncSourceReader.getHost().empty()) {
- lock.unlock();
- sleepsecs(1);
- // if there is no one to sync from
- return;
- }
- lastOpTimeFetched = _lastOpTimeFetched;
- _syncSourceHost = _syncSourceReader.getHost();
- _replCoord->signalUpstreamUpdater();
- }
+ while (MONGO_FAIL_POINT(rsBgSyncProduce)) {
+ sleepmillis(0);
+ }
- _syncSourceReader.tailingQueryGTE(rsoplog, lastOpTimeFetched);
- // if target cut connections between connecting and querying (for
- // example, because it stepped down) we might not have a cursor
- if (!_syncSourceReader.haveCursor()) {
- return;
- }
+ // find a target to sync from the last optime fetched
+ OpTime lastOpTimeFetched;
+ {
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ lastOpTimeFetched = _lastOpTimeFetched;
+ _syncSourceHost = HostAndPort();
+ }
+ _syncSourceReader.resetConnection();
+ _syncSourceReader.connectToSyncSource(txn, lastOpTimeFetched, _replCoord);
- if (_rollbackIfNeeded(txn, _syncSourceReader)) {
- stop();
+ {
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ // no server found
+ if (_syncSourceReader.getHost().empty()) {
+ lock.unlock();
+ sleepsecs(1);
+ // if there is no one to sync from
return;
}
+ lastOpTimeFetched = _lastOpTimeFetched;
+ _syncSourceHost = _syncSourceReader.getHost();
+ _replCoord->signalUpstreamUpdater();
+ }
- while (!inShutdown()) {
- if (!_syncSourceReader.moreInCurrentBatch()) {
- // Check some things periodically
- // (whenever we run out of items in the
- // current cursor batch)
-
- int bs = _syncSourceReader.currentBatchMessageSize();
- if( bs > 0 && bs < BatchIsSmallish ) {
- // on a very low latency network, if we don't wait a little, we'll be
- // getting ops to write almost one at a time. this will both be expensive
- // for the upstream server as well as potentially defeating our parallel
- // application of batches on the secondary.
- //
- // the inference here is basically if the batch is really small, we are
- // "caught up".
- //
- sleepmillis(SleepToAllowBatchingMillis);
- }
-
- // If we are transitioning to primary state, we need to leave
- // this loop in order to go into bgsync-pause mode.
- if (_replCoord->isWaitingForApplierToDrain() ||
- _replCoord->getMemberState().primary()) {
- return;
- }
-
- // re-evaluate quality of sync target
- if (shouldChangeSyncSource()) {
- return;
- }
+ _syncSourceReader.tailingQueryGTE(rsoplog, lastOpTimeFetched);
- {
- //record time for each getmore
- TimerHolder batchTimer(&getmoreReplStats);
-
- // This calls receiveMore() on the oplogreader cursor.
- // It can wait up to five seconds for more data.
- _syncSourceReader.more();
- }
- networkByteStats.increment(_syncSourceReader.currentBatchMessageSize());
-
- if (!_syncSourceReader.moreInCurrentBatch()) {
- // If there is still no data from upstream, check a few more things
- // and then loop back for another pass at getting more data
- {
- boost::unique_lock<boost::mutex> lock(_mutex);
- if (_pause) {
- return;
- }
- }
+ // if target cut connections between connecting and querying (for
+ // example, because it stepped down) we might not have a cursor
+ if (!_syncSourceReader.haveCursor()) {
+ return;
+ }
- _syncSourceReader.tailCheck();
- if( !_syncSourceReader.haveCursor() ) {
- LOG(1) << "replSet end syncTail pass";
- return;
- }
+ if (_rollbackIfNeeded(txn, _syncSourceReader)) {
+ stop();
+ return;
+ }
- continue;
- }
+ while (!inShutdown()) {
+ if (!_syncSourceReader.moreInCurrentBatch()) {
+ // Check some things periodically
+ // (whenever we run out of items in the
+ // current cursor batch)
+
+ int bs = _syncSourceReader.currentBatchMessageSize();
+ if (bs > 0 && bs < BatchIsSmallish) {
+ // on a very low latency network, if we don't wait a little, we'll be
+ // getting ops to write almost one at a time. this will both be expensive
+ // for the upstream server as well as potentially defeating our parallel
+ // application of batches on the secondary.
+ //
+ // the inference here is basically if the batch is really small, we are
+ // "caught up".
+ //
+ sleepmillis(SleepToAllowBatchingMillis);
}
// If we are transitioning to primary state, we need to leave
// this loop in order to go into bgsync-pause mode.
if (_replCoord->isWaitingForApplierToDrain() ||
_replCoord->getMemberState().primary()) {
- LOG(1) << "waiting for draining or we are primary, not adding more ops to buffer";
return;
}
- // At this point, we are guaranteed to have at least one thing to read out
- // of the oplogreader cursor.
- BSONObj o = _syncSourceReader.nextSafe().getOwned();
- opsReadStats.increment();
-
-
- if (MONGO_FAIL_POINT(stepDownWhileDrainingFailPoint)) {
- sleepsecs(20);
- }
+ // re-evaluate quality of sync target
+ if (shouldChangeSyncSource()) {
+ return;
+ }
{
- boost::unique_lock<boost::mutex> lock(_mutex);
- _appliedBuffer = false;
- }
+ // record time for each getmore
+ TimerHolder batchTimer(&getmoreReplStats);
- OCCASIONALLY {
- LOG(2) << "bgsync buffer has " << _buffer.size() << " bytes";
+ // This calls receiveMore() on the oplogreader cursor.
+ // It can wait up to five seconds for more data.
+ _syncSourceReader.more();
}
+ networkByteStats.increment(_syncSourceReader.currentBatchMessageSize());
- bufferCountGauge.increment();
- bufferSizeGauge.increment(getSize(o));
- _buffer.push(o);
+ if (!_syncSourceReader.moreInCurrentBatch()) {
+ // If there is still no data from upstream, check a few more things
+ // and then loop back for another pass at getting more data
+ {
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ if (_pause) {
+ return;
+ }
+ }
- {
- boost::unique_lock<boost::mutex> lock(_mutex);
- _lastFetchedHash = o["h"].numberLong();
- _lastOpTimeFetched = o["ts"]._opTime();
- LOG(3) << "replSet lastOpTimeFetched: " << _lastOpTimeFetched.toStringPretty();
+ _syncSourceReader.tailCheck();
+ if (!_syncSourceReader.haveCursor()) {
+ LOG(1) << "replSet end syncTail pass";
+ return;
+ }
+
+ continue;
}
}
- }
- bool BackgroundSync::shouldChangeSyncSource() {
- // is it even still around?
- if (getSyncTarget().empty() || _syncSourceReader.getHost().empty()) {
- return true;
+ // If we are transitioning to primary state, we need to leave
+ // this loop in order to go into bgsync-pause mode.
+ if (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary()) {
+ LOG(1) << "waiting for draining or we are primary, not adding more ops to buffer";
+ return;
}
- // check other members: is any member's optime more than MaxSyncSourceLag seconds
- // ahead of the current sync source?
- return _replCoord->shouldChangeSyncSource(_syncSourceReader.getHost());
- }
-
-
- bool BackgroundSync::peek(BSONObj* op) {
- return _buffer.peek(*op);
- }
-
- void BackgroundSync::waitForMore() {
- BSONObj op;
- // Block for one second before timing out.
- // Ignore the value of the op we peeked at.
- _buffer.blockingPeek(op, 1);
- }
+ // At this point, we are guaranteed to have at least one thing to read out
+ // of the oplogreader cursor.
+ BSONObj o = _syncSourceReader.nextSafe().getOwned();
+ opsReadStats.increment();
- void BackgroundSync::consume() {
- // this is just to get the op off the queue, it's been peeked at
- // and queued for application already
- BSONObj op = _buffer.blockingPop();
- bufferCountGauge.decrement(1);
- bufferSizeGauge.decrement(getSize(op));
- }
- bool BackgroundSync::_rollbackIfNeeded(OperationContext* txn, OplogReader& r) {
- string hn = r.conn()->getServerAddress();
+ if (MONGO_FAIL_POINT(stepDownWhileDrainingFailPoint)) {
+ sleepsecs(20);
+ }
- if (!r.more()) {
- try {
- BSONObj theirLastOp = r.getLastOp(rsoplog);
- if (theirLastOp.isEmpty()) {
- log() << "replSet error empty query result from " << hn << " oplog";
- sleepsecs(2);
- return true;
- }
- OpTime theirTS = theirLastOp["ts"]._opTime();
- if (theirTS < _lastOpTimeFetched) {
- log() << "replSet we are ahead of the sync source, will try to roll back";
- syncRollback(txn, _replCoord->getMyLastOptime(), &r, _replCoord);
- return true;
- }
- /* we're not ahead? maybe our new query got fresher data. best to come back and try again */
- log() << "replSet syncTail condition 1";
- sleepsecs(1);
- }
- catch(DBException& e) {
- log() << "replSet error querying " << hn << ' ' << e.toString();
- sleepsecs(2);
- }
- return true;
+ {
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ _appliedBuffer = false;
}
- BSONObj o = r.nextSafe();
- OpTime ts = o["ts"]._opTime();
- long long hash = o["h"].numberLong();
- if( ts != _lastOpTimeFetched || hash != _lastFetchedHash ) {
- log() << "replSet our last op time fetched: " << _lastOpTimeFetched.toStringPretty();
- log() << "replset source's GTE: " << ts.toStringPretty();
- syncRollback(txn, _replCoord->getMyLastOptime(), &r, _replCoord);
- return true;
+ OCCASIONALLY {
+ LOG(2) << "bgsync buffer has " << _buffer.size() << " bytes";
}
- return false;
- }
+ bufferCountGauge.increment();
+ bufferSizeGauge.increment(getSize(o));
+ _buffer.push(o);
- HostAndPort BackgroundSync::getSyncTarget() {
- boost::unique_lock<boost::mutex> lock(_mutex);
- return _syncSourceHost;
+ {
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ _lastFetchedHash = o["h"].numberLong();
+ _lastOpTimeFetched = o["ts"]._opTime();
+ LOG(3) << "replSet lastOpTimeFetched: " << _lastOpTimeFetched.toStringPretty();
+ }
}
+}
- void BackgroundSync::clearSyncTarget() {
- boost::unique_lock<boost::mutex> lock(_mutex);
- _syncSourceHost = HostAndPort();
+bool BackgroundSync::shouldChangeSyncSource() {
+ // is it even still around?
+ if (getSyncTarget().empty() || _syncSourceReader.getHost().empty()) {
+ return true;
}
- void BackgroundSync::stop() {
- boost::lock_guard<boost::mutex> lock(_mutex);
+ // check other members: is any member's optime more than MaxSyncSourceLag seconds
+ // ahead of the current sync source?
+ return _replCoord->shouldChangeSyncSource(_syncSourceReader.getHost());
+}
- _pause = true;
- _syncSourceHost = HostAndPort();
- _lastOpTimeFetched = OpTime(0,0);
- _lastFetchedHash = 0;
- _appliedBufferCondition.notify_all();
- _pausedCondition.notify_all();
- }
- void BackgroundSync::start(OperationContext* txn) {
- massert(16235, "going to start syncing, but buffer is not empty", _buffer.empty());
+bool BackgroundSync::peek(BSONObj* op) {
+ return _buffer.peek(*op);
+}
- long long updatedLastAppliedHash = _readLastAppliedHash(txn);
- boost::lock_guard<boost::mutex> lk(_mutex);
- _pause = false;
+void BackgroundSync::waitForMore() {
+ BSONObj op;
+ // Block for one second before timing out.
+ // Ignore the value of the op we peeked at.
+ _buffer.blockingPeek(op, 1);
+}
- // reset _last fields with current oplog data
- _lastAppliedHash = updatedLastAppliedHash;
- _lastOpTimeFetched = _replCoord->getMyLastOptime();
- _lastFetchedHash = _lastAppliedHash;
+void BackgroundSync::consume() {
+ // this is just to get the op off the queue, it's been peeked at
+ // and queued for application already
+ BSONObj op = _buffer.blockingPop();
+ bufferCountGauge.decrement(1);
+ bufferSizeGauge.decrement(getSize(op));
+}
- LOG(1) << "replset bgsync fetch queue set to: " << _lastOpTimeFetched <<
- " " << _lastFetchedHash;
- }
+bool BackgroundSync::_rollbackIfNeeded(OperationContext* txn, OplogReader& r) {
+ string hn = r.conn()->getServerAddress();
- void BackgroundSync::waitUntilPaused() {
- boost::unique_lock<boost::mutex> lock(_mutex);
- while (!_pause) {
- _pausedCondition.wait(lock);
+ if (!r.more()) {
+ try {
+ BSONObj theirLastOp = r.getLastOp(rsoplog);
+ if (theirLastOp.isEmpty()) {
+ log() << "replSet error empty query result from " << hn << " oplog";
+ sleepsecs(2);
+ return true;
+ }
+ OpTime theirTS = theirLastOp["ts"]._opTime();
+ if (theirTS < _lastOpTimeFetched) {
+ log() << "replSet we are ahead of the sync source, will try to roll back";
+ syncRollback(txn, _replCoord->getMyLastOptime(), &r, _replCoord);
+ return true;
+ }
+ /* we're not ahead? maybe our new query got fresher data. best to come back and try again */
+ log() << "replSet syncTail condition 1";
+ sleepsecs(1);
+ } catch (DBException& e) {
+ log() << "replSet error querying " << hn << ' ' << e.toString();
+ sleepsecs(2);
}
+ return true;
}
- long long BackgroundSync::getLastAppliedHash() const {
- boost::lock_guard<boost::mutex> lck(_mutex);
- return _lastAppliedHash;
+ BSONObj o = r.nextSafe();
+ OpTime ts = o["ts"]._opTime();
+ long long hash = o["h"].numberLong();
+ if (ts != _lastOpTimeFetched || hash != _lastFetchedHash) {
+ log() << "replSet our last op time fetched: " << _lastOpTimeFetched.toStringPretty();
+ log() << "replset source's GTE: " << ts.toStringPretty();
+ syncRollback(txn, _replCoord->getMyLastOptime(), &r, _replCoord);
+ return true;
}
- void BackgroundSync::clearBuffer() {
- _buffer.clear();
+ return false;
+}
+
+HostAndPort BackgroundSync::getSyncTarget() {
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ return _syncSourceHost;
+}
+
+void BackgroundSync::clearSyncTarget() {
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ _syncSourceHost = HostAndPort();
+}
+
+void BackgroundSync::stop() {
+ boost::lock_guard<boost::mutex> lock(_mutex);
+
+ _pause = true;
+ _syncSourceHost = HostAndPort();
+ _lastOpTimeFetched = OpTime(0, 0);
+ _lastFetchedHash = 0;
+ _appliedBufferCondition.notify_all();
+ _pausedCondition.notify_all();
+}
+
+void BackgroundSync::start(OperationContext* txn) {
+ massert(16235, "going to start syncing, but buffer is not empty", _buffer.empty());
+
+ long long updatedLastAppliedHash = _readLastAppliedHash(txn);
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ _pause = false;
+
+ // reset _last fields with current oplog data
+ _lastAppliedHash = updatedLastAppliedHash;
+ _lastOpTimeFetched = _replCoord->getMyLastOptime();
+ _lastFetchedHash = _lastAppliedHash;
+
+ LOG(1) << "replset bgsync fetch queue set to: " << _lastOpTimeFetched << " "
+ << _lastFetchedHash;
+}
+
+void BackgroundSync::waitUntilPaused() {
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ while (!_pause) {
+ _pausedCondition.wait(lock);
}
-
- void BackgroundSync::setLastAppliedHash(long long newHash) {
- boost::lock_guard<boost::mutex> lck(_mutex);
- _lastAppliedHash = newHash;
+}
+
+long long BackgroundSync::getLastAppliedHash() const {
+ boost::lock_guard<boost::mutex> lck(_mutex);
+ return _lastAppliedHash;
+}
+
+void BackgroundSync::clearBuffer() {
+ _buffer.clear();
+}
+
+void BackgroundSync::setLastAppliedHash(long long newHash) {
+ boost::lock_guard<boost::mutex> lck(_mutex);
+ _lastAppliedHash = newHash;
+}
+
+void BackgroundSync::loadLastAppliedHash(OperationContext* txn) {
+ long long result = _readLastAppliedHash(txn);
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ _lastAppliedHash = result;
+}
+
+long long BackgroundSync::_readLastAppliedHash(OperationContext* txn) {
+ BSONObj oplogEntry;
+ try {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock lk(txn->lockState(), "local", MODE_X);
+ bool success = Helpers::getLast(txn, rsoplog, oplogEntry);
+ if (!success) {
+ // This can happen when we are to do an initial sync. lastHash will be set
+ // after the initial sync is complete.
+ return 0;
+ }
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "readLastAppliedHash", rsoplog);
+ } catch (const DBException& ex) {
+ severe() << "Problem reading " << rsoplog << ": " << ex.toStatus();
+ fassertFailed(18904);
}
-
- void BackgroundSync::loadLastAppliedHash(OperationContext* txn) {
- long long result = _readLastAppliedHash(txn);
- boost::lock_guard<boost::mutex> lk(_mutex);
- _lastAppliedHash = result;
+ BSONElement hashElement = oplogEntry[hashFieldName];
+ if (hashElement.eoo()) {
+ severe() << "Most recent entry in " << rsoplog << " missing \"" << hashFieldName
+ << "\" field";
+ fassertFailed(18902);
}
-
- long long BackgroundSync::_readLastAppliedHash(OperationContext* txn) {
- BSONObj oplogEntry;
- try {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock lk(txn->lockState(), "local", MODE_X);
- bool success = Helpers::getLast(txn, rsoplog, oplogEntry);
- if (!success) {
- // This can happen when we are to do an initial sync. lastHash will be set
- // after the initial sync is complete.
- return 0;
- }
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "readLastAppliedHash", rsoplog);
- }
- catch (const DBException& ex) {
- severe() << "Problem reading " << rsoplog << ": " << ex.toStatus();
- fassertFailed(18904);
- }
- BSONElement hashElement = oplogEntry[hashFieldName];
- if (hashElement.eoo()) {
- severe() << "Most recent entry in " << rsoplog << " missing \"" << hashFieldName <<
- "\" field";
- fassertFailed(18902);
- }
- if (hashElement.type() != NumberLong) {
- severe() << "Expected type of \"" << hashFieldName << "\" in most recent " <<
- rsoplog << " entry to have type NumberLong, but found " <<
- typeName(hashElement.type());
- fassertFailed(18903);
- }
- return hashElement.safeNumberLong();
+ if (hashElement.type() != NumberLong) {
+ severe() << "Expected type of \"" << hashFieldName << "\" in most recent " << rsoplog
+ << " entry to have type NumberLong, but found " << typeName(hashElement.type());
+ fassertFailed(18903);
}
+ return hashElement.safeNumberLong();
+}
- bool BackgroundSync::getInitialSyncRequestedFlag() {
- boost::lock_guard<boost::mutex> lock(_initialSyncMutex);
- return _initialSyncRequestedFlag;
- }
+bool BackgroundSync::getInitialSyncRequestedFlag() {
+ boost::lock_guard<boost::mutex> lock(_initialSyncMutex);
+ return _initialSyncRequestedFlag;
+}
- void BackgroundSync::setInitialSyncRequestedFlag(bool value) {
- boost::lock_guard<boost::mutex> lock(_initialSyncMutex);
- _initialSyncRequestedFlag = value;
- }
+void BackgroundSync::setInitialSyncRequestedFlag(bool value) {
+ boost::lock_guard<boost::mutex> lock(_initialSyncMutex);
+ _initialSyncRequestedFlag = value;
+}
- void BackgroundSync::pushTestOpToBuffer(const BSONObj& op) {
- boost::lock_guard<boost::mutex> lock(_mutex);
- _buffer.push(op);
- }
+void BackgroundSync::pushTestOpToBuffer(const BSONObj& op) {
+ boost::lock_guard<boost::mutex> lock(_mutex);
+ _buffer.push(op);
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/bgsync.h b/src/mongo/db/repl/bgsync.h
index 2952879f246..c4d7212413d 100644
--- a/src/mongo/db/repl/bgsync.h
+++ b/src/mongo/db/repl/bgsync.h
@@ -37,154 +37,152 @@
namespace mongo {
namespace repl {
- class Member;
- class ReplicationCoordinator;
+class Member;
+class ReplicationCoordinator;
- // This interface exists to facilitate easier testing;
- // the test infrastructure implements these functions with stubs.
- class BackgroundSyncInterface {
- public:
- virtual ~BackgroundSyncInterface();
+// This interface exists to facilitate easier testing;
+// the test infrastructure implements these functions with stubs.
+class BackgroundSyncInterface {
+public:
+ virtual ~BackgroundSyncInterface();
- // Gets the head of the buffer, but does not remove it.
- // Returns true if an element was present at the head;
- // false if the queue was empty.
- virtual bool peek(BSONObj* op) = 0;
+ // Gets the head of the buffer, but does not remove it.
+ // Returns true if an element was present at the head;
+ // false if the queue was empty.
+ virtual bool peek(BSONObj* op) = 0;
- // Deletes objects in the queue;
- // called by sync thread after it has applied an op
- virtual void consume() = 0;
+ // Deletes objects in the queue;
+ // called by sync thread after it has applied an op
+ virtual void consume() = 0;
- // wait up to 1 second for more ops to appear
- virtual void waitForMore() = 0;
- };
+ // wait up to 1 second for more ops to appear
+ virtual void waitForMore() = 0;
+};
- /**
- * Lock order:
- * 1. rslock
- * 2. rwlock
- * 3. BackgroundSync::_mutex
- */
- class BackgroundSync : public BackgroundSyncInterface {
- public:
- // Allow index prefetching to be turned on/off
- enum IndexPrefetchConfig {
- PREFETCH_NONE=0, PREFETCH_ID_ONLY=1, PREFETCH_ALL=2
- };
+/**
+ * Lock order:
+ * 1. rslock
+ * 2. rwlock
+ * 3. BackgroundSync::_mutex
+ */
+class BackgroundSync : public BackgroundSyncInterface {
+public:
+ // Allow index prefetching to be turned on/off
+ enum IndexPrefetchConfig { PREFETCH_NONE = 0, PREFETCH_ID_ONLY = 1, PREFETCH_ALL = 2 };
- static BackgroundSync* get();
+ static BackgroundSync* get();
- // stop syncing (when this node becomes a primary, e.g.)
- void stop();
+ // stop syncing (when this node becomes a primary, e.g.)
+ void stop();
- void shutdown();
- void notify(OperationContext* txn);
+ void shutdown();
+ void notify(OperationContext* txn);
- // Blocks until _pause becomes true from a call to stop() or shutdown()
- void waitUntilPaused();
+ // Blocks until _pause becomes true from a call to stop() or shutdown()
+ void waitUntilPaused();
- virtual ~BackgroundSync() {}
+ virtual ~BackgroundSync() {}
- // starts the producer thread
- void producerThread();
- // starts the sync target notifying thread
- void notifierThread();
+ // starts the producer thread
+ void producerThread();
+ // starts the sync target notifying thread
+ void notifierThread();
- HostAndPort getSyncTarget();
+ HostAndPort getSyncTarget();
- // Interface implementation
+ // Interface implementation
- virtual bool peek(BSONObj* op);
- virtual void consume();
- virtual void clearSyncTarget();
- virtual void waitForMore();
+ virtual bool peek(BSONObj* op);
+ virtual void consume();
+ virtual void clearSyncTarget();
+ virtual void waitForMore();
- // For monitoring
- BSONObj getCounters();
+ // For monitoring
+ BSONObj getCounters();
- long long getLastAppliedHash() const;
- void setLastAppliedHash(long long oldH);
- void loadLastAppliedHash(OperationContext* txn);
+ long long getLastAppliedHash() const;
+ void setLastAppliedHash(long long oldH);
+ void loadLastAppliedHash(OperationContext* txn);
- // Clears any fetched and buffered oplog entries.
- void clearBuffer();
+ // Clears any fetched and buffered oplog entries.
+ void clearBuffer();
- bool getInitialSyncRequestedFlag();
- void setInitialSyncRequestedFlag(bool value);
+ bool getInitialSyncRequestedFlag();
+ void setInitialSyncRequestedFlag(bool value);
- void setIndexPrefetchConfig(const IndexPrefetchConfig cfg) {
- _indexPrefetchConfig = cfg;
- }
+ void setIndexPrefetchConfig(const IndexPrefetchConfig cfg) {
+ _indexPrefetchConfig = cfg;
+ }
- IndexPrefetchConfig getIndexPrefetchConfig() {
- return _indexPrefetchConfig;
- }
+ IndexPrefetchConfig getIndexPrefetchConfig() {
+ return _indexPrefetchConfig;
+ }
- // Testing related stuff
- void pushTestOpToBuffer(const BSONObj& op);
- private:
- static BackgroundSync *s_instance;
- // protects creation of s_instance
- static boost::mutex s_mutex;
+ // Testing related stuff
+ void pushTestOpToBuffer(const BSONObj& op);
- // Production thread
- BlockingQueue<BSONObj> _buffer;
- OplogReader _syncSourceReader;
+private:
+ static BackgroundSync* s_instance;
+ // protects creation of s_instance
+ static boost::mutex s_mutex;
- // _mutex protects all of the class variables except _syncSourceReader and _buffer
- mutable boost::mutex _mutex;
+ // Production thread
+ BlockingQueue<BSONObj> _buffer;
+ OplogReader _syncSourceReader;
- OpTime _lastOpTimeFetched;
+ // _mutex protects all of the class variables except _syncSourceReader and _buffer
+ mutable boost::mutex _mutex;
- // lastAppliedHash is used to generate a new hash for the following op, when primary.
- long long _lastAppliedHash;
- // lastFetchedHash is used to match ops to determine if we need to rollback, when
- // a secondary.
- long long _lastFetchedHash;
+ OpTime _lastOpTimeFetched;
- // if produce thread should be running
- bool _pause;
- boost::condition _pausedCondition;
- bool _appliedBuffer;
- boost::condition _appliedBufferCondition;
+ // lastAppliedHash is used to generate a new hash for the following op, when primary.
+ long long _lastAppliedHash;
+ // lastFetchedHash is used to match ops to determine if we need to rollback, when
+ // a secondary.
+ long long _lastFetchedHash;
- HostAndPort _syncSourceHost;
+ // if produce thread should be running
+ bool _pause;
+ boost::condition _pausedCondition;
+ bool _appliedBuffer;
+ boost::condition _appliedBufferCondition;
- BackgroundSync();
- BackgroundSync(const BackgroundSync& s);
- BackgroundSync operator=(const BackgroundSync& s);
+ HostAndPort _syncSourceHost;
- // Production thread
- void _producerThread();
- // Adds elements to the list, up to maxSize.
- void produce(OperationContext* txn);
- // Checks the criteria for rolling back and executes a rollback if warranted.
- bool _rollbackIfNeeded(OperationContext* txn, OplogReader& r);
+ BackgroundSync();
+ BackgroundSync(const BackgroundSync& s);
+ BackgroundSync operator=(const BackgroundSync& s);
- // Evaluate if the current sync target is still good
- bool shouldChangeSyncSource();
+ // Production thread
+ void _producerThread();
+ // Adds elements to the list, up to maxSize.
+ void produce(OperationContext* txn);
+ // Checks the criteria for rolling back and executes a rollback if warranted.
+ bool _rollbackIfNeeded(OperationContext* txn, OplogReader& r);
- // restart syncing
- void start(OperationContext* txn);
+ // Evaluate if the current sync target is still good
+ bool shouldChangeSyncSource();
- long long _readLastAppliedHash(OperationContext* txn);
+ // restart syncing
+ void start(OperationContext* txn);
- // A pointer to the replication coordinator running the show.
- ReplicationCoordinator* _replCoord;
+ long long _readLastAppliedHash(OperationContext* txn);
- // bool for indicating resync need on this node and the mutex that protects it
- // The resync command sets this flag; the Applier thread observes and clears it.
- bool _initialSyncRequestedFlag;
- boost::mutex _initialSyncMutex;
+ // A pointer to the replication coordinator running the show.
+ ReplicationCoordinator* _replCoord;
- // This setting affects the Applier prefetcher behavior.
- IndexPrefetchConfig _indexPrefetchConfig;
+ // bool for indicating resync need on this node and the mutex that protects it
+ // The resync command sets this flag; the Applier thread observes and clears it.
+ bool _initialSyncRequestedFlag;
+ boost::mutex _initialSyncMutex;
- };
+ // This setting affects the Applier prefetcher behavior.
+ IndexPrefetchConfig _indexPrefetchConfig;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/check_quorum_for_config_change.cpp b/src/mongo/db/repl/check_quorum_for_config_change.cpp
index 7064b9a473d..6a9bbdf510a 100644
--- a/src/mongo/db/repl/check_quorum_for_config_change.cpp
+++ b/src/mongo/db/repl/check_quorum_for_config_change.cpp
@@ -45,256 +45,248 @@
namespace mongo {
namespace repl {
- QuorumChecker::QuorumChecker(const ReplicaSetConfig* rsConfig, int myIndex)
- : _rsConfig(rsConfig),
- _myIndex(myIndex),
- _numResponses(1), // We "responded" to ourself already.
- _numElectable(0),
- _vetoStatus(Status::OK()),
- _finalStatus(ErrorCodes::CallbackCanceled, "Quorum check canceled") {
-
- invariant(myIndex < _rsConfig->getNumMembers());
- const MemberConfig& myConfig = _rsConfig->getMemberAt(_myIndex);
-
- if (myConfig.isVoter()) {
- _voters.push_back(myConfig.getHostAndPort());
- }
- if (myConfig.isElectable()) {
- _numElectable = 1;
- }
-
- if (hasReceivedSufficientResponses()) {
- _onQuorumCheckComplete();
- }
+QuorumChecker::QuorumChecker(const ReplicaSetConfig* rsConfig, int myIndex)
+ : _rsConfig(rsConfig),
+ _myIndex(myIndex),
+ _numResponses(1), // We "responded" to ourself already.
+ _numElectable(0),
+ _vetoStatus(Status::OK()),
+ _finalStatus(ErrorCodes::CallbackCanceled, "Quorum check canceled") {
+ invariant(myIndex < _rsConfig->getNumMembers());
+ const MemberConfig& myConfig = _rsConfig->getMemberAt(_myIndex);
+
+ if (myConfig.isVoter()) {
+ _voters.push_back(myConfig.getHostAndPort());
+ }
+ if (myConfig.isElectable()) {
+ _numElectable = 1;
}
- QuorumChecker::~QuorumChecker() {}
+ if (hasReceivedSufficientResponses()) {
+ _onQuorumCheckComplete();
+ }
+}
- std::vector<ReplicationExecutor::RemoteCommandRequest> QuorumChecker::getRequests() const {
- const bool isInitialConfig = _rsConfig->getConfigVersion() == 1;
- const MemberConfig& myConfig = _rsConfig->getMemberAt(_myIndex);
+QuorumChecker::~QuorumChecker() {}
- std::vector<ReplicationExecutor::RemoteCommandRequest> requests;
- if (hasReceivedSufficientResponses()) {
- return requests;
- }
+std::vector<ReplicationExecutor::RemoteCommandRequest> QuorumChecker::getRequests() const {
+ const bool isInitialConfig = _rsConfig->getConfigVersion() == 1;
+ const MemberConfig& myConfig = _rsConfig->getMemberAt(_myIndex);
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setSetName(_rsConfig->getReplSetName());
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(_rsConfig->getConfigVersion());
- hbArgs.setCheckEmpty(isInitialConfig);
- hbArgs.setSenderHost(myConfig.getHostAndPort());
- hbArgs.setSenderId(myConfig.getId());
- const BSONObj hbRequest = hbArgs.toBSON();
+ std::vector<ReplicationExecutor::RemoteCommandRequest> requests;
+ if (hasReceivedSufficientResponses()) {
+ return requests;
+ }
- // Send a bunch of heartbeat requests.
- // Schedule an operation when a "sufficient" number of them have completed, and use that
- // to compute the quorum check results.
- // Wait for the "completion" callback to finish, and then it's OK to return the results.
- for (int i = 0; i < _rsConfig->getNumMembers(); ++i) {
- if (_myIndex == i) {
- // No need to check self for liveness or unreadiness.
- continue;
- }
- requests.push_back(ReplicationExecutor::RemoteCommandRequest(
- _rsConfig->getMemberAt(i).getHostAndPort(),
- "admin",
- hbRequest,
- _rsConfig->getHeartbeatTimeoutPeriodMillis()));
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setSetName(_rsConfig->getReplSetName());
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(_rsConfig->getConfigVersion());
+ hbArgs.setCheckEmpty(isInitialConfig);
+ hbArgs.setSenderHost(myConfig.getHostAndPort());
+ hbArgs.setSenderId(myConfig.getId());
+ const BSONObj hbRequest = hbArgs.toBSON();
+
+ // Send a bunch of heartbeat requests.
+ // Schedule an operation when a "sufficient" number of them have completed, and use that
+ // to compute the quorum check results.
+ // Wait for the "completion" callback to finish, and then it's OK to return the results.
+ for (int i = 0; i < _rsConfig->getNumMembers(); ++i) {
+ if (_myIndex == i) {
+ // No need to check self for liveness or unreadiness.
+ continue;
}
-
- return requests;
+ requests.push_back(ReplicationExecutor::RemoteCommandRequest(
+ _rsConfig->getMemberAt(i).getHostAndPort(),
+ "admin",
+ hbRequest,
+ _rsConfig->getHeartbeatTimeoutPeriodMillis()));
}
- void QuorumChecker::processResponse(
- const ReplicationExecutor::RemoteCommandRequest& request,
- const ResponseStatus& response) {
+ return requests;
+}
- _tabulateHeartbeatResponse(request, response);
- if (hasReceivedSufficientResponses()) {
- _onQuorumCheckComplete();
- }
+void QuorumChecker::processResponse(const ReplicationExecutor::RemoteCommandRequest& request,
+ const ResponseStatus& response) {
+ _tabulateHeartbeatResponse(request, response);
+ if (hasReceivedSufficientResponses()) {
+ _onQuorumCheckComplete();
}
+}
- void QuorumChecker::_onQuorumCheckComplete() {
- if (!_vetoStatus.isOK()) {
- _finalStatus = _vetoStatus;
- return;
+void QuorumChecker::_onQuorumCheckComplete() {
+ if (!_vetoStatus.isOK()) {
+ _finalStatus = _vetoStatus;
+ return;
+ }
+ if (_rsConfig->getConfigVersion() == 1 && !_badResponses.empty()) {
+ str::stream message;
+ message << "replSetInitiate quorum check failed because not all proposed set members "
+ "responded affirmatively: ";
+ for (std::vector<std::pair<HostAndPort, Status>>::const_iterator it = _badResponses.begin();
+ it != _badResponses.end();
+ ++it) {
+ if (it != _badResponses.begin()) {
+ message << ", ";
+ }
+ message << it->first.toString() << " failed with " << it->second.reason();
}
- if (_rsConfig->getConfigVersion() == 1 && !_badResponses.empty()) {
- str::stream message;
- message << "replSetInitiate quorum check failed because not all proposed set members "
- "responded affirmatively: ";
- for (std::vector<std::pair<HostAndPort, Status> >::const_iterator it =
- _badResponses.begin();
- it != _badResponses.end();
- ++it) {
+ _finalStatus = Status(ErrorCodes::NodeNotFound, message);
+ return;
+ }
+ if (_numElectable == 0) {
+ _finalStatus = Status(ErrorCodes::NodeNotFound,
+ "Quorum check failed because no "
+ "electable nodes responded; at least one required for config");
+ return;
+ }
+ if (int(_voters.size()) < _rsConfig->getMajorityVoteCount()) {
+ str::stream message;
+ message << "Quorum check failed because not enough voting nodes responded; required "
+ << _rsConfig->getMajorityVoteCount() << " but ";
+
+ if (_voters.size() == 0) {
+ message << "none responded";
+ } else {
+ message << "only the following " << _voters.size()
+ << " voting nodes responded: " << _voters.front().toString();
+ for (size_t i = 1; i < _voters.size(); ++i) {
+ message << ", " << _voters[i].toString();
+ }
+ }
+ if (!_badResponses.empty()) {
+ message << "; the following nodes did not respond affirmatively: ";
+ for (std::vector<std::pair<HostAndPort, Status>>::const_iterator it =
+ _badResponses.begin();
+ it != _badResponses.end();
+ ++it) {
if (it != _badResponses.begin()) {
message << ", ";
}
message << it->first.toString() << " failed with " << it->second.reason();
}
- _finalStatus = Status(ErrorCodes::NodeNotFound, message);
- return;
}
- if (_numElectable == 0) {
- _finalStatus = Status(
- ErrorCodes::NodeNotFound, "Quorum check failed because no "
- "electable nodes responded; at least one required for config");
- return;
- }
- if (int(_voters.size()) < _rsConfig->getMajorityVoteCount()) {
- str::stream message;
- message << "Quorum check failed because not enough voting nodes responded; required " <<
- _rsConfig->getMajorityVoteCount() << " but ";
-
- if (_voters.size() == 0) {
- message << "none responded";
- }
- else {
- message << "only the following " << _voters.size() <<
- " voting nodes responded: " << _voters.front().toString();
- for (size_t i = 1; i < _voters.size(); ++i) {
- message << ", " << _voters[i].toString();
- }
- }
- if (!_badResponses.empty()) {
- message << "; the following nodes did not respond affirmatively: ";
- for (std::vector<std::pair<HostAndPort, Status> >::const_iterator it =
- _badResponses.begin();
- it != _badResponses.end();
- ++it) {
- if (it != _badResponses.begin()) {
- message << ", ";
- }
- message << it->first.toString() << " failed with " << it->second.reason();
- }
- }
- _finalStatus = Status(ErrorCodes::NodeNotFound, message);
- return;
- }
- _finalStatus = Status::OK();
+ _finalStatus = Status(ErrorCodes::NodeNotFound, message);
+ return;
+ }
+ _finalStatus = Status::OK();
+}
+
+void QuorumChecker::_tabulateHeartbeatResponse(
+ const ReplicationExecutor::RemoteCommandRequest& request, const ResponseStatus& response) {
+ ++_numResponses;
+ if (!response.isOK()) {
+ warning() << "Failed to complete heartbeat request to " << request.target << "; "
+ << response.getStatus();
+ _badResponses.push_back(std::make_pair(request.target, response.getStatus()));
+ return;
}
- void QuorumChecker::_tabulateHeartbeatResponse(
- const ReplicationExecutor::RemoteCommandRequest& request,
- const ResponseStatus& response) {
+ BSONObj resBSON = response.getValue().data;
+ ReplSetHeartbeatResponse hbResp;
+ Status hbStatus = hbResp.initialize(resBSON);
- ++_numResponses;
- if (!response.isOK()) {
- warning() << "Failed to complete heartbeat request to " << request.target <<
- "; " << response.getStatus();
- _badResponses.push_back(std::make_pair(request.target, response.getStatus()));
- return;
- }
+ if (hbStatus.code() == ErrorCodes::InconsistentReplicaSetNames) {
+ std::string message = str::stream() << "Our set name did not match that of "
+ << request.target.toString();
+ _vetoStatus = Status(ErrorCodes::NewReplicaSetConfigurationIncompatible, message);
+ warning() << message;
+ return;
+ }
- BSONObj resBSON = response.getValue().data;
- ReplSetHeartbeatResponse hbResp;
- Status hbStatus = hbResp.initialize(resBSON);
+ if (!hbStatus.isOK() && hbStatus != ErrorCodes::InvalidReplicaSetConfig) {
+ warning() << "Got error (" << hbStatus << ") response on heartbeat request to "
+ << request.target << "; " << hbResp;
+ _badResponses.push_back(std::make_pair(request.target, hbStatus));
+ return;
+ }
- if (hbStatus.code() == ErrorCodes::InconsistentReplicaSetNames) {
- std::string message = str::stream() << "Our set name did not match that of " <<
- request.target.toString();
+ if (!hbResp.getReplicaSetName().empty()) {
+ if (hbResp.getVersion() >= _rsConfig->getConfigVersion()) {
+ std::string message = str::stream()
+ << "Our config version of " << _rsConfig->getConfigVersion()
+ << " is no larger than the version on " << request.target.toString()
+ << ", which is " << hbResp.getVersion();
_vetoStatus = Status(ErrorCodes::NewReplicaSetConfigurationIncompatible, message);
warning() << message;
return;
}
+ }
- if (!hbStatus.isOK() && hbStatus != ErrorCodes::InvalidReplicaSetConfig) {
- warning() << "Got error (" << hbStatus
- << ") response on heartbeat request to " << request.target
- << "; " << hbResp;
- _badResponses.push_back(std::make_pair(request.target, hbStatus));
- return;
- }
-
- if (!hbResp.getReplicaSetName().empty()) {
- if (hbResp.getVersion() >= _rsConfig->getConfigVersion()) {
- std::string message = str::stream() << "Our config version of " <<
- _rsConfig->getConfigVersion() <<
- " is no larger than the version on " << request.target.toString() <<
- ", which is " << hbResp.getVersion();
- _vetoStatus = Status(ErrorCodes::NewReplicaSetConfigurationIncompatible, message);
- warning() << message;
- return;
- }
- }
-
- const bool isInitialConfig = _rsConfig->getConfigVersion() == 1;
- if (isInitialConfig && hbResp.hasData()) {
- std::string message = str::stream() << "'" << request.target.toString()
- << "' has data already, cannot initiate set.";
- _vetoStatus = Status(ErrorCodes::CannotInitializeNodeWithData, message);
- warning() << message;
- return;
- }
-
- for (int i = 0; i < _rsConfig->getNumMembers(); ++i) {
- const MemberConfig& memberConfig = _rsConfig->getMemberAt(i);
- if (memberConfig.getHostAndPort() != request.target) {
- continue;
- }
- if (memberConfig.isElectable()) {
- ++_numElectable;
- }
- if (memberConfig.isVoter()) {
- _voters.push_back(request.target);
- }
- return;
- }
- invariant(false);
+ const bool isInitialConfig = _rsConfig->getConfigVersion() == 1;
+ if (isInitialConfig && hbResp.hasData()) {
+ std::string message = str::stream() << "'" << request.target.toString()
+ << "' has data already, cannot initiate set.";
+ _vetoStatus = Status(ErrorCodes::CannotInitializeNodeWithData, message);
+ warning() << message;
+ return;
}
- bool QuorumChecker::hasReceivedSufficientResponses() const {
- if (!_vetoStatus.isOK() || _numResponses == _rsConfig->getNumMembers()) {
- // Vetoed or everybody has responded. All done.
- return true;
- }
- if (_rsConfig->getConfigVersion() == 1) {
- // Have not received responses from every member, and the proposed config
- // version is 1 (initial configuration). Keep waiting.
- return false;
+ for (int i = 0; i < _rsConfig->getNumMembers(); ++i) {
+ const MemberConfig& memberConfig = _rsConfig->getMemberAt(i);
+ if (memberConfig.getHostAndPort() != request.target) {
+ continue;
}
- if (_numElectable == 0) {
- // Have not heard from at least one electable node. Keep waiting.
- return false;
+ if (memberConfig.isElectable()) {
+ ++_numElectable;
}
- if (int(_voters.size()) < _rsConfig->getMajorityVoteCount()) {
- // Have not heard from a majority of voters. Keep waiting.
- return false;
+ if (memberConfig.isVoter()) {
+ _voters.push_back(request.target);
}
+ return;
+ }
+ invariant(false);
+}
- // Have heard from a majority of voters and one electable node. All done.
+bool QuorumChecker::hasReceivedSufficientResponses() const {
+ if (!_vetoStatus.isOK() || _numResponses == _rsConfig->getNumMembers()) {
+ // Vetoed or everybody has responded. All done.
return true;
}
-
- Status checkQuorumGeneral(ReplicationExecutor* executor,
- const ReplicaSetConfig& rsConfig,
- const int myIndex) {
- QuorumChecker checker(&rsConfig, myIndex);
- ScatterGatherRunner runner(&checker);
- Status status = runner.run(executor);
- if (!status.isOK()) {
- return status;
- }
-
- return checker.getFinalStatus();
+ if (_rsConfig->getConfigVersion() == 1) {
+ // Have not received responses from every member, and the proposed config
+ // version is 1 (initial configuration). Keep waiting.
+ return false;
}
-
- Status checkQuorumForInitiate(ReplicationExecutor* executor,
- const ReplicaSetConfig& rsConfig,
- const int myIndex) {
- invariant(rsConfig.getConfigVersion() == 1);
- return checkQuorumGeneral(executor, rsConfig, myIndex);
+ if (_numElectable == 0) {
+ // Have not heard from at least one electable node. Keep waiting.
+ return false;
+ }
+ if (int(_voters.size()) < _rsConfig->getMajorityVoteCount()) {
+ // Have not heard from a majority of voters. Keep waiting.
+ return false;
}
- Status checkQuorumForReconfig(ReplicationExecutor* executor,
- const ReplicaSetConfig& rsConfig,
- const int myIndex) {
- invariant(rsConfig.getConfigVersion() > 1);
- return checkQuorumGeneral(executor, rsConfig, myIndex);
+ // Have heard from a majority of voters and one electable node. All done.
+ return true;
+}
+
+Status checkQuorumGeneral(ReplicationExecutor* executor,
+ const ReplicaSetConfig& rsConfig,
+ const int myIndex) {
+ QuorumChecker checker(&rsConfig, myIndex);
+ ScatterGatherRunner runner(&checker);
+ Status status = runner.run(executor);
+ if (!status.isOK()) {
+ return status;
}
+ return checker.getFinalStatus();
+}
+
+Status checkQuorumForInitiate(ReplicationExecutor* executor,
+ const ReplicaSetConfig& rsConfig,
+ const int myIndex) {
+ invariant(rsConfig.getConfigVersion() == 1);
+ return checkQuorumGeneral(executor, rsConfig, myIndex);
+}
+
+Status checkQuorumForReconfig(ReplicationExecutor* executor,
+ const ReplicaSetConfig& rsConfig,
+ const int myIndex) {
+ invariant(rsConfig.getConfigVersion() > 1);
+ return checkQuorumGeneral(executor, rsConfig, myIndex);
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/check_quorum_for_config_change.h b/src/mongo/db/repl/check_quorum_for_config_change.h
index 396ac4dea39..96e8a9aad46 100644
--- a/src/mongo/db/repl/check_quorum_for_config_change.h
+++ b/src/mongo/db/repl/check_quorum_for_config_change.h
@@ -35,117 +35,118 @@
namespace mongo {
namespace repl {
- class ReplicaSetConfig;
+class ReplicaSetConfig;
+/**
+ * Quorum checking state machine.
+ *
+ * Usage: Construct a QuorumChecker, pass in a pointer to the configuration for which you're
+ * checking quorum, and the integer index of the member config representing the "executing"
+ * node. Use ScatterGatherRunner or otherwise execute a scatter-gather procedure as described
+ * in the class comment for the ScatterGatherAlgorithm class. After
+ * hasReceivedSufficientResponses() returns true, you may call getFinalStatus() to get the
+ * result of the quorum check.
+ */
+class QuorumChecker : public ScatterGatherAlgorithm {
+ MONGO_DISALLOW_COPYING(QuorumChecker);
+
+public:
/**
- * Quorum checking state machine.
+ * Constructs a QuorumChecker that is used to confirm that sufficient nodes are up to accept
+ * "rsConfig". "myIndex" is the index of the local node, which is assumed to be up.
*
- * Usage: Construct a QuorumChecker, pass in a pointer to the configuration for which you're
- * checking quorum, and the integer index of the member config representing the "executing"
- * node. Use ScatterGatherRunner or otherwise execute a scatter-gather procedure as described
- * in the class comment for the ScatterGatherAlgorithm class. After
- * hasReceivedSufficientResponses() returns true, you may call getFinalStatus() to get the
- * result of the quorum check.
+ * "rsConfig" must stay in scope until QuorumChecker's destructor completes.
*/
- class QuorumChecker : public ScatterGatherAlgorithm {
- MONGO_DISALLOW_COPYING(QuorumChecker);
- public:
- /**
- * Constructs a QuorumChecker that is used to confirm that sufficient nodes are up to accept
- * "rsConfig". "myIndex" is the index of the local node, which is assumed to be up.
- *
- * "rsConfig" must stay in scope until QuorumChecker's destructor completes.
- */
- QuorumChecker(const ReplicaSetConfig* rsConfig, int myIndex);
- virtual ~QuorumChecker();
-
- virtual std::vector<ReplicationExecutor::RemoteCommandRequest> getRequests() const;
- virtual void processResponse(
- const ReplicationExecutor::RemoteCommandRequest& request,
- const ResponseStatus& response);
-
- virtual bool hasReceivedSufficientResponses() const;
-
- Status getFinalStatus() const { return _finalStatus; }
-
- private:
- /**
- * Callback that executes after _haveReceivedSufficientReplies() becomes true.
- *
- * Computes the quorum result based on responses received so far, stores it into
- * _finalStatus, and enables QuorumChecker::run() to return.
- */
- void _onQuorumCheckComplete();
-
- /**
- * Updates the QuorumChecker state based on the data from a single heartbeat response.
- */
- void _tabulateHeartbeatResponse(
- const ReplicationExecutor::RemoteCommandRequest& request,
- const ResponseStatus& response);
-
- // Pointer to the replica set configuration for which we're checking quorum.
- const ReplicaSetConfig* const _rsConfig;
-
- // Index of the local node's member configuration in _rsConfig.
- const int _myIndex;
-
- // List of voting nodes that have responded affirmatively.
- std::vector<HostAndPort> _voters;
-
- // List of nodes with bad responses and the bad response status they returned.
- std::vector<std::pair<HostAndPort, Status> > _badResponses;
-
- // Total number of responses and timeouts processed.
- int _numResponses;
-
- // Number of electable nodes that have responded affirmatively.
- int _numElectable;
-
- // Set to a non-OK status if a response from a remote node indicates
- // that the quorum check should definitely fail, such as because of
- // a replica set name mismatch.
- Status _vetoStatus;
-
- // Final status of the quorum check, returned by run().
- Status _finalStatus;
- };
+ QuorumChecker(const ReplicaSetConfig* rsConfig, int myIndex);
+ virtual ~QuorumChecker();
+
+ virtual std::vector<ReplicationExecutor::RemoteCommandRequest> getRequests() const;
+ virtual void processResponse(const ReplicationExecutor::RemoteCommandRequest& request,
+ const ResponseStatus& response);
+
+ virtual bool hasReceivedSufficientResponses() const;
+
+ Status getFinalStatus() const {
+ return _finalStatus;
+ }
+private:
/**
- * Performs a quorum call to determine if a sufficient number of nodes are up
- * to initiate a replica set with configuration "rsConfig".
+ * Callback that executes after _haveReceivedSufficientReplies() becomes true.
*
- * "myIndex" is the index of this node's member configuration in "rsConfig".
- * "executor" is the event loop in which to schedule network/aysnchronous processing.
- *
- * For purposes of initiate, a quorum is only met if all of the following conditions
- * are met:
- * - All nodes respond.
- * - No nodes other than the node running the quorum check have data.
- * - No nodes are already joined to a replica set.
- * - No node reports a replica set name other than the one in "rsConfig".
+ * Computes the quorum result based on responses received so far, stores it into
+ * _finalStatus, and enables QuorumChecker::run() to return.
*/
- Status checkQuorumForInitiate(ReplicationExecutor* executor,
- const ReplicaSetConfig& rsConfig,
- const int myIndex);
+ void _onQuorumCheckComplete();
/**
- * Performs a quorum call to determine if a sufficient number of nodes are up
- * to replace the current replica set configuration with "rsConfig".
- *
- * "myIndex" is the index of this node's member configuration in "rsConfig".
- * "executor" is the event loop in which to schedule network/aysnchronous processing.
- *
- * For purposes of reconfig, a quorum is only met if all of the following conditions
- * are met:
- * - A majority of voting nodes respond.
- * - At least one electable node responds.
- * - No responding node reports a replica set name other than the one in "rsConfig".
- * - All responding nodes report a config version less than the one in "rsConfig".
+ * Updates the QuorumChecker state based on the data from a single heartbeat response.
*/
- Status checkQuorumForReconfig(ReplicationExecutor* executor,
- const ReplicaSetConfig& rsConfig,
- const int myIndex);
+ void _tabulateHeartbeatResponse(const ReplicationExecutor::RemoteCommandRequest& request,
+ const ResponseStatus& response);
+
+ // Pointer to the replica set configuration for which we're checking quorum.
+ const ReplicaSetConfig* const _rsConfig;
+
+ // Index of the local node's member configuration in _rsConfig.
+ const int _myIndex;
+
+ // List of voting nodes that have responded affirmatively.
+ std::vector<HostAndPort> _voters;
+
+ // List of nodes with bad responses and the bad response status they returned.
+ std::vector<std::pair<HostAndPort, Status>> _badResponses;
+
+ // Total number of responses and timeouts processed.
+ int _numResponses;
+
+ // Number of electable nodes that have responded affirmatively.
+ int _numElectable;
+
+ // Set to a non-OK status if a response from a remote node indicates
+ // that the quorum check should definitely fail, such as because of
+ // a replica set name mismatch.
+ Status _vetoStatus;
+
+ // Final status of the quorum check, returned by run().
+ Status _finalStatus;
+};
+
+/**
+ * Performs a quorum call to determine if a sufficient number of nodes are up
+ * to initiate a replica set with configuration "rsConfig".
+ *
+ * "myIndex" is the index of this node's member configuration in "rsConfig".
+ * "executor" is the event loop in which to schedule network/aysnchronous processing.
+ *
+ * For purposes of initiate, a quorum is only met if all of the following conditions
+ * are met:
+ * - All nodes respond.
+ * - No nodes other than the node running the quorum check have data.
+ * - No nodes are already joined to a replica set.
+ * - No node reports a replica set name other than the one in "rsConfig".
+ */
+Status checkQuorumForInitiate(ReplicationExecutor* executor,
+ const ReplicaSetConfig& rsConfig,
+ const int myIndex);
+
+/**
+ * Performs a quorum call to determine if a sufficient number of nodes are up
+ * to replace the current replica set configuration with "rsConfig".
+ *
+ * "myIndex" is the index of this node's member configuration in "rsConfig".
+ * "executor" is the event loop in which to schedule network/aysnchronous processing.
+ *
+ * For purposes of reconfig, a quorum is only met if all of the following conditions
+ * are met:
+ * - A majority of voting nodes respond.
+ * - At least one electable node responds.
+ * - No responding node reports a replica set name other than the one in "rsConfig".
+ * - All responding nodes report a config version less than the one in "rsConfig".
+ */
+Status checkQuorumForReconfig(ReplicationExecutor* executor,
+ const ReplicaSetConfig& rsConfig,
+ const int myIndex);
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/check_quorum_for_config_change_test.cpp b/src/mongo/db/repl/check_quorum_for_config_change_test.cpp
index 49d87e9cf4f..064f133e3d2 100644
--- a/src/mongo/db/repl/check_quorum_for_config_change_test.cpp
+++ b/src/mongo/db/repl/check_quorum_for_config_change_test.cpp
@@ -46,754 +46,768 @@
#include "mongo/unittest/unittest.h"
#include "mongo/util/net/hostandport.h"
-#define ASSERT_REASON_CONTAINS(STATUS, PATTERN) do { \
- const mongo::Status s_ = (STATUS); \
- ASSERT_FALSE(s_.reason().find(PATTERN) == std::string::npos) << \
- #STATUS ".reason() == " << s_.reason(); \
+#define ASSERT_REASON_CONTAINS(STATUS, PATTERN) \
+ do { \
+ const mongo::Status s_ = (STATUS); \
+ ASSERT_FALSE(s_.reason().find(PATTERN) == std::string::npos) \
+ << #STATUS ".reason() == " << s_.reason(); \
} while (false)
-#define ASSERT_NOT_REASON_CONTAINS(STATUS, PATTERN) do { \
- const mongo::Status s_ = (STATUS); \
- ASSERT_TRUE(s_.reason().find(PATTERN) == std::string::npos) << \
- #STATUS ".reason() == " << s_.reason(); \
+#define ASSERT_NOT_REASON_CONTAINS(STATUS, PATTERN) \
+ do { \
+ const mongo::Status s_ = (STATUS); \
+ ASSERT_TRUE(s_.reason().find(PATTERN) == std::string::npos) \
+ << #STATUS ".reason() == " << s_.reason(); \
} while (false)
namespace mongo {
namespace repl {
namespace {
- typedef ReplicationExecutor::RemoteCommandRequest RemoteCommandRequest;
-
- class CheckQuorumTest : public mongo::unittest::Test {
- protected:
- CheckQuorumTest();
-
- void startQuorumCheck(const ReplicaSetConfig& config, int myIndex);
- Status waitForQuorumCheck();
- bool isQuorumCheckDone();
-
- NetworkInterfaceMock* _net;
- boost::scoped_ptr<ReplicationExecutor> _executor;
-
- private:
- void setUp();
- void tearDown();
-
- void _runQuorumCheck(const ReplicaSetConfig& config, int myIndex);
- virtual Status _runQuorumCheckImpl(const ReplicaSetConfig& config, int myIndex) = 0;
-
- boost::scoped_ptr<boost::thread> _executorThread;
- boost::scoped_ptr<boost::thread> _quorumCheckThread;
- Status _quorumCheckStatus;
- boost::mutex _mutex;
- bool _isQuorumCheckDone;
- };
-
- CheckQuorumTest::CheckQuorumTest() :
- _quorumCheckStatus(ErrorCodes::InternalError, "Not executed") {
+typedef ReplicationExecutor::RemoteCommandRequest RemoteCommandRequest;
+
+class CheckQuorumTest : public mongo::unittest::Test {
+protected:
+ CheckQuorumTest();
+
+ void startQuorumCheck(const ReplicaSetConfig& config, int myIndex);
+ Status waitForQuorumCheck();
+ bool isQuorumCheckDone();
+
+ NetworkInterfaceMock* _net;
+ boost::scoped_ptr<ReplicationExecutor> _executor;
+
+private:
+ void setUp();
+ void tearDown();
+
+ void _runQuorumCheck(const ReplicaSetConfig& config, int myIndex);
+ virtual Status _runQuorumCheckImpl(const ReplicaSetConfig& config, int myIndex) = 0;
+
+ boost::scoped_ptr<boost::thread> _executorThread;
+ boost::scoped_ptr<boost::thread> _quorumCheckThread;
+ Status _quorumCheckStatus;
+ boost::mutex _mutex;
+ bool _isQuorumCheckDone;
+};
+
+CheckQuorumTest::CheckQuorumTest()
+ : _quorumCheckStatus(ErrorCodes::InternalError, "Not executed") {}
+
+void CheckQuorumTest::setUp() {
+ _net = new NetworkInterfaceMock;
+ _executor.reset(new ReplicationExecutor(_net, 1 /* prng */));
+ _executorThread.reset(
+ new boost::thread(stdx::bind(&ReplicationExecutor::run, _executor.get())));
+}
+
+void CheckQuorumTest::tearDown() {
+ _executor->shutdown();
+ _executorThread->join();
+}
+
+void CheckQuorumTest::startQuorumCheck(const ReplicaSetConfig& config, int myIndex) {
+ ASSERT_FALSE(_quorumCheckThread);
+ _isQuorumCheckDone = false;
+ _quorumCheckThread.reset(
+ new boost::thread(stdx::bind(&CheckQuorumTest::_runQuorumCheck, this, config, myIndex)));
+}
+
+Status CheckQuorumTest::waitForQuorumCheck() {
+ ASSERT_TRUE(_quorumCheckThread);
+ _quorumCheckThread->join();
+ return _quorumCheckStatus;
+}
+
+bool CheckQuorumTest::isQuorumCheckDone() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ return _isQuorumCheckDone;
+}
+
+void CheckQuorumTest::_runQuorumCheck(const ReplicaSetConfig& config, int myIndex) {
+ _quorumCheckStatus = _runQuorumCheckImpl(config, myIndex);
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ _isQuorumCheckDone = true;
+}
+
+class CheckQuorumForInitiate : public CheckQuorumTest {
+private:
+ virtual Status _runQuorumCheckImpl(const ReplicaSetConfig& config, int myIndex) {
+ return checkQuorumForInitiate(_executor.get(), config, myIndex);
}
+};
- void CheckQuorumTest::setUp() {
- _net = new NetworkInterfaceMock;
- _executor.reset(new ReplicationExecutor(_net, 1 /* prng */ ));
- _executorThread.reset(new boost::thread(stdx::bind(&ReplicationExecutor::run,
- _executor.get())));
+class CheckQuorumForReconfig : public CheckQuorumTest {
+protected:
+ virtual Status _runQuorumCheckImpl(const ReplicaSetConfig& config, int myIndex) {
+ return checkQuorumForReconfig(_executor.get(), config, myIndex);
}
-
- void CheckQuorumTest::tearDown() {
- _executor->shutdown();
- _executorThread->join();
+};
+
+ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(configBson));
+ ASSERT_OK(config.validate());
+ return config;
+}
+
+TEST_F(CheckQuorumForInitiate, ValidSingleNodeSet) {
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1"))));
+ startQuorumCheck(config, 0);
+ ASSERT_OK(waitForQuorumCheck());
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckCanceledByShutdown) {
+ _executor->shutdown();
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1"))));
+ startQuorumCheck(config, 0);
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, waitForQuorumCheck());
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToSeveralDownNodes) {
+ // In this test, "we" are host "h3:1". All other nodes time out on
+ // their heartbeat request, and so the quorum check for initiate
+ // will fail because some members were unavailable.
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1") << BSON("_id" << 5 << "host"
+ << "h5:1"))));
+ startQuorumCheck(config, 2);
+ _net->enterNetwork();
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = config.getNumMembers() - 1;
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ _net->scheduleResponse(_net->getNextReadyRequest(),
+ startDate + 10,
+ ResponseStatus(ErrorCodes::NoSuchKey, "No reply"));
}
-
- void CheckQuorumTest::startQuorumCheck(const ReplicaSetConfig& config, int myIndex) {
- ASSERT_FALSE(_quorumCheckThread);
- _isQuorumCheckDone = false;
- _quorumCheckThread.reset(new boost::thread(stdx::bind(&CheckQuorumTest::_runQuorumCheck,
- this,
- config,
- myIndex)));
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + 10, _net->now());
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
+ ASSERT_REASON_CONTAINS(
+ status, "replSetInitiate quorum check failed because not all proposed set members");
+ ASSERT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_REASON_CONTAINS(status, "h5:1");
+}
+
+const BSONObj makeHeartbeatRequest(const ReplicaSetConfig& rsConfig, int myConfigIndex) {
+ const MemberConfig& myConfig = rsConfig.getMemberAt(myConfigIndex);
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setSetName(rsConfig.getReplSetName());
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(rsConfig.getConfigVersion());
+ hbArgs.setCheckEmpty(rsConfig.getConfigVersion() == 1);
+ hbArgs.setSenderHost(myConfig.getHostAndPort());
+ hbArgs.setSenderId(myConfig.getId());
+ return hbArgs.toBSON();
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckSuccessForFiveNodes) {
+ // In this test, "we" are host "h3:1". All nodes respond successfully to their heartbeat
+ // requests, and the quorum check succeeds.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1") << BSON("_id" << 5 << "host"
+ << "h5:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), Milliseconds(8))));
}
-
- Status CheckQuorumTest::waitForQuorumCheck() {
- ASSERT_TRUE(_quorumCheckThread);
- _quorumCheckThread->join();
- return _quorumCheckStatus;
- }
-
- bool CheckQuorumTest::isQuorumCheckDone() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- return _isQuorumCheckDone;
- }
-
- void CheckQuorumTest::_runQuorumCheck(const ReplicaSetConfig& config, int myIndex) {
- _quorumCheckStatus = _runQuorumCheckImpl(config, myIndex);
- boost::lock_guard<boost::mutex> lk(_mutex);
- _isQuorumCheckDone = true;
- }
-
- class CheckQuorumForInitiate : public CheckQuorumTest {
- private:
- virtual Status _runQuorumCheckImpl(const ReplicaSetConfig& config, int myIndex) {
- return checkQuorumForInitiate(_executor.get(), config, myIndex);
- }
- };
-
- class CheckQuorumForReconfig : public CheckQuorumTest {
- protected:
- virtual Status _runQuorumCheckImpl(const ReplicaSetConfig& config, int myIndex) {
- return checkQuorumForReconfig(_executor.get(), config, myIndex);
- }
- };
-
- ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(configBson));
- ASSERT_OK(config.validate());
- return config;
- }
-
- TEST_F(CheckQuorumForInitiate, ValidSingleNodeSet) {
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1"))));
- startQuorumCheck(config, 0);
- ASSERT_OK(waitForQuorumCheck());
- }
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckCanceledByShutdown) {
- _executor->shutdown();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1"))));
- startQuorumCheck(config, 0);
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, waitForQuorumCheck());
- }
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToSeveralDownNodes) {
- // In this test, "we" are host "h3:1". All other nodes time out on
- // their heartbeat request, and so the quorum check for initiate
- // will fail because some members were unavailable.
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1"))));
- startQuorumCheck(config, 2);
- _net->enterNetwork();
- const Date_t startDate = _net->now();
- const int numCommandsExpected = config.getNumMembers() - 1;
- for (int i = 0; i < numCommandsExpected; ++i) {
- _net->scheduleResponse(_net->getNextReadyRequest(),
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ ASSERT_OK(waitForQuorumCheck());
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToOneDownNode) {
+ // In this test, "we" are host "h3:1". All nodes except "h2:1" respond
+ // successfully to their heartbeat requests, but quorum check fails because
+ // all nodes must be available for initiate. This is so even though "h2"
+ // is neither voting nor electable.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1"
+ << "priority" << 0 << "votes" << 0)
+ << BSON("_id" << 3 << "host"
+ << "h3:1") << BSON("_id" << 4 << "host"
+ << "h4:1")
+ << BSON("_id" << 5 << "host"
+ << "h5:1") << BSON("_id" << 6 << "host"
+ << "h6:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h2", 1)) {
+ _net->scheduleResponse(
+ noi, startDate + 10, ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
+ } else {
+ _net->scheduleResponse(noi,
startDate + 10,
- ResponseStatus(ErrorCodes::NoSuchKey, "No reply"));
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), Milliseconds(8))));
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + 10, _net->now());
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
- ASSERT_REASON_CONTAINS(
- status, "replSetInitiate quorum check failed because not all proposed set members");
- ASSERT_REASON_CONTAINS(status, "h1:1");
- ASSERT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_REASON_CONTAINS(status, "h4:1");
- ASSERT_REASON_CONTAINS(status, "h5:1");
- }
-
- const BSONObj makeHeartbeatRequest(const ReplicaSetConfig& rsConfig, int myConfigIndex) {
- const MemberConfig& myConfig = rsConfig.getMemberAt(myConfigIndex);
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setSetName(rsConfig.getReplSetName());
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(rsConfig.getConfigVersion());
- hbArgs.setCheckEmpty(rsConfig.getConfigVersion() == 1);
- hbArgs.setSenderHost(myConfig.getHostAndPort());
- hbArgs.setSenderId(myConfig.getId());
- return hbArgs.toBSON();
}
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckSuccessForFiveNodes) {
- // In this test, "we" are host "h3:1". All nodes respond successfully to their heartbeat
- // requests, and the quorum check succeeds.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
+ ASSERT_REASON_CONTAINS(
+ status, "replSetInitiate quorum check failed because not all proposed set members");
+ ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h6:1");
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToSetNameMismatch) {
+ // In this test, "we" are host "h3:1". All nodes respond
+ // successfully to their heartbeat requests, but quorum check fails because
+ // "h4" declares that the requested replica set name was not what it expected.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1") << BSON("_id" << 5 << "host"
+ << "h5:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h4", 1)) {
_net->scheduleResponse(noi,
startDate + 10,
ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1), Milliseconds(8))));
- }
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- ASSERT_OK(waitForQuorumCheck());
- }
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToOneDownNode) {
- // In this test, "we" are host "h3:1". All nodes except "h2:1" respond
- // successfully to their heartbeat requests, but quorum check fails because
- // all nodes must be available for initiate. This is so even though "h2"
- // is neither voting nor electable.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1" <<
- "priority" << 0 << "votes" << 0) <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1") <<
- BSON("_id" << 6 << "host" << "h6:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h2", 1)) {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
- }
- else {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1), Milliseconds(8))));
- }
- }
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
- ASSERT_REASON_CONTAINS(
- status, "replSetInitiate quorum check failed because not all proposed set members");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h6:1");
- }
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToSetNameMismatch) {
- // In this test, "we" are host "h3:1". All nodes respond
- // successfully to their heartbeat requests, but quorum check fails because
- // "h4" declares that the requested replica set name was not what it expected.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h4", 1)) {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 0 << "mismatch" << true),
- Milliseconds(8))));
- }
- else {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1), Milliseconds(8))));
- }
+ BSON("ok" << 0 << "mismatch" << true), Milliseconds(8))));
+ } else {
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), Milliseconds(8))));
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- ASSERT_REASON_CONTAINS(
- status, "Our set name did not match");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_REASON_CONTAINS(status, "h4:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
}
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToInitializedNode) {
- // In this test, "we" are host "h3:1". All nodes respond
- // successfully to their heartbeat requests, but quorum check fails because
- // "h5" declares that it is already initialized.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h5", 1)) {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 0 <<
- "set" << "rs0" <<
- "v" << 1),
- Milliseconds(8))));
- }
- else {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1), Milliseconds(8))));
- }
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+ ASSERT_REASON_CONTAINS(status, "Our set name did not match");
+ ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToInitializedNode) {
+ // In this test, "we" are host "h3:1". All nodes respond
+ // successfully to their heartbeat requests, but quorum check fails because
+ // "h5" declares that it is already initialized.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1") << BSON("_id" << 5 << "host"
+ << "h5:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h5", 1)) {
+ _net->scheduleResponse(
+ noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(BSON("ok" << 0 << "set"
+ << "rs0"
+ << "v" << 1),
+ Milliseconds(8))));
+ } else {
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), Milliseconds(8))));
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- ASSERT_REASON_CONTAINS(
- status, "Our config version of");
- ASSERT_REASON_CONTAINS(
- status, "is no larger than the version");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
- ASSERT_REASON_CONTAINS(status, "h5:1");
}
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToInitializedNodeOnlyOneRespondent) {
- // In this test, "we" are host "h3:1". Only node "h5" responds before the test completes,
- // and quorum check fails because "h5" declares that it is already initialized.
- //
- // Compare to QuorumCheckFailedDueToInitializedNode, above.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h5", 1)) {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 0 <<
- "set" << "rs0" <<
- "v" << 1),
- Milliseconds(8))));
- }
- else {
- _net->blackHole(noi);
- }
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+ ASSERT_REASON_CONTAINS(status, "Our config version of");
+ ASSERT_REASON_CONTAINS(status, "is no larger than the version");
+ ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_REASON_CONTAINS(status, "h5:1");
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToInitializedNodeOnlyOneRespondent) {
+ // In this test, "we" are host "h3:1". Only node "h5" responds before the test completes,
+ // and quorum check fails because "h5" declares that it is already initialized.
+ //
+ // Compare to QuorumCheckFailedDueToInitializedNode, above.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1") << BSON("_id" << 5 << "host"
+ << "h5:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h5", 1)) {
+ _net->scheduleResponse(
+ noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(BSON("ok" << 0 << "set"
+ << "rs0"
+ << "v" << 1),
+ Milliseconds(8))));
+ } else {
+ _net->blackHole(noi);
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- ASSERT_REASON_CONTAINS(
- status, "Our config version of");
- ASSERT_REASON_CONTAINS(
- status, "is no larger than the version");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
- ASSERT_REASON_CONTAINS(status, "h5:1");
}
-
- TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToNodeWithData) {
- // In this test, "we" are host "h3:1". Only node "h5" responds before the test completes,
- // and quorum check fails because "h5" declares that it has data already.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1") <<
- BSON("_id" << 5 << "host" << "h5:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- ReplSetHeartbeatResponse hbResp;
- hbResp.setVersion(0);
- hbResp.noteHasData();
- if (request.target == HostAndPort("h5", 1)) {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- hbResp.toBSON(),
- Milliseconds(8))));
- }
- else {
- _net->blackHole(noi);
- }
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+ ASSERT_REASON_CONTAINS(status, "Our config version of");
+ ASSERT_REASON_CONTAINS(status, "is no larger than the version");
+ ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_REASON_CONTAINS(status, "h5:1");
+}
+
+TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToNodeWithData) {
+ // In this test, "we" are host "h3:1". Only node "h5" responds before the test completes,
+ // and quorum check fails because "h5" declares that it has data already.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1") << BSON("_id" << 5 << "host"
+ << "h5:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setVersion(0);
+ hbResp.noteHasData();
+ if (request.target == HostAndPort("h5", 1)) {
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ hbResp.toBSON(), Milliseconds(8))));
+ } else {
+ _net->blackHole(noi);
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::CannotInitializeNodeWithData, status);
- ASSERT_REASON_CONTAINS(
- status, "has data already");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
- ASSERT_REASON_CONTAINS(status, "h5:1");
}
- TEST_F(CheckQuorumForReconfig, QuorumCheckVetoedDueToHigherConfigVersion) {
- // In this test, "we" are host "h3:1". The request to "h2" does not arrive before the end
- // of the test, and the request to "h1" comes back indicating a higher config version.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h1", 1)) {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 0 <<
- "set" << "rs0" <<
- "v" << 5),
- Milliseconds(8))));
- }
- else {
- _net->blackHole(noi);
- }
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::CannotInitializeNodeWithData, status);
+ ASSERT_REASON_CONTAINS(status, "has data already");
+ ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_REASON_CONTAINS(status, "h5:1");
+}
+TEST_F(CheckQuorumForReconfig, QuorumCheckVetoedDueToHigherConfigVersion) {
+ // In this test, "we" are host "h3:1". The request to "h2" does not arrive before the end
+ // of the test, and the request to "h1" comes back indicating a higher config version.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h1", 1)) {
+ _net->scheduleResponse(
+ noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(BSON("ok" << 0 << "set"
+ << "rs0"
+ << "v" << 5),
+ Milliseconds(8))));
+ } else {
+ _net->blackHole(noi);
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- ASSERT_REASON_CONTAINS(
- status, "Our config version of");
- ASSERT_REASON_CONTAINS(
- status, "is no larger than the version");
- ASSERT_REASON_CONTAINS(status, "h1:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
}
-
- TEST_F(CheckQuorumForReconfig, QuorumCheckVetoedDueToIncompatibleSetName) {
- // In this test, "we" are host "h3:1". The request to "h1" times out,
- // and the request to "h2" comes back indicating an incompatible set name.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1"))));
- const int myConfigIndex = 2;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h2", 1)) {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 0 << "mismatch" << true),
- Milliseconds(8))));
- }
- else {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
- }
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+ ASSERT_REASON_CONTAINS(status, "Our config version of");
+ ASSERT_REASON_CONTAINS(status, "is no larger than the version");
+ ASSERT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+}
+
+TEST_F(CheckQuorumForReconfig, QuorumCheckVetoedDueToIncompatibleSetName) {
+ // In this test, "we" are host "h3:1". The request to "h1" times out,
+ // and the request to "h2" comes back indicating an incompatible set name.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1"))));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h2", 1)) {
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 0 << "mismatch" << true), Milliseconds(8))));
+ } else {
+ _net->scheduleResponse(
+ noi, startDate + 10, ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- ASSERT_REASON_CONTAINS(status, "Our set name did not match");
- ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
- ASSERT_REASON_CONTAINS(status, "h2:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
-
}
-
- TEST_F(CheckQuorumForReconfig, QuorumCheckFailsDueToInsufficientVoters) {
- // In this test, "we" are host "h4". Only "h1", "h2" and "h3" are voters,
- // and of the voters, only "h1" responds. As a result, quorum check fails.
- // "h5" also responds, but because it cannot vote, is irrelevant for the reconfig
- // quorum check.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1" << "votes" << 0) <<
- BSON("_id" << 5 << "host" << "h5:1" << "votes" << 0))));
- const int myConfigIndex = 3;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h1", 1) || request.target == HostAndPort("h5", 1)) {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(8))));
- }
- else {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
- }
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+ ASSERT_REASON_CONTAINS(status, "Our set name did not match");
+ ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+}
+
+TEST_F(CheckQuorumForReconfig, QuorumCheckFailsDueToInsufficientVoters) {
+ // In this test, "we" are host "h4". Only "h1", "h2" and "h3" are voters,
+ // and of the voters, only "h1" responds. As a result, quorum check fails.
+ // "h5" also responds, but because it cannot vote, is irrelevant for the reconfig
+ // quorum check.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1"
+ << "votes" << 0)
+ << BSON("_id" << 5 << "host"
+ << "h5:1"
+ << "votes" << 0))));
+ const int myConfigIndex = 3;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h1", 1) || request.target == HostAndPort("h5", 1)) {
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), Milliseconds(8))));
+ } else {
+ _net->scheduleResponse(
+ noi, startDate + 10, ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
- ASSERT_REASON_CONTAINS(status, "not enough voting nodes responded; required 2 but only");
- ASSERT_REASON_CONTAINS(status, "h1:1");
- ASSERT_REASON_CONTAINS(status, "h2:1 failed with");
- ASSERT_REASON_CONTAINS(status, "h3:1 failed with");
- ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
- ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
}
-
- TEST_F(CheckQuorumForReconfig, QuorumCheckFailsDueToNoElectableNodeResponding) {
- // In this test, "we" are host "h4". Only "h1", "h2" and "h3" are electable,
- // and none of them respond.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1" << "priority" << 0) <<
- BSON("_id" << 5 << "host" << "h5:1" << "priority" << 0))));
- const int myConfigIndex = 3;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h5", 1)) {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(8))));
- }
- else {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
- }
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
+ ASSERT_REASON_CONTAINS(status, "not enough voting nodes responded; required 2 but only");
+ ASSERT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_REASON_CONTAINS(status, "h2:1 failed with");
+ ASSERT_REASON_CONTAINS(status, "h3:1 failed with");
+ ASSERT_NOT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
+}
+
+TEST_F(CheckQuorumForReconfig, QuorumCheckFailsDueToNoElectableNodeResponding) {
+ // In this test, "we" are host "h4". Only "h1", "h2" and "h3" are electable,
+ // and none of them respond.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1"
+ << "priority" << 0)
+ << BSON("_id" << 5 << "host"
+ << "h5:1"
+ << "priority" << 0))));
+ const int myConfigIndex = 3;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h5", 1)) {
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), Milliseconds(8))));
+ } else {
+ _net->scheduleResponse(
+ noi, startDate + 10, ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- Status status = waitForQuorumCheck();
- ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
- ASSERT_REASON_CONTAINS(status, "no electable nodes responded");
}
-
- TEST_F(CheckQuorumForReconfig, QuorumCheckSucceedsWithAsSoonAsPossible) {
- // In this test, "we" are host "h4". Only "h1", "h2" and "h3" can vote.
- // This test should succeed as soon as h1 and h2 respond, so we block
- // h3 and h5 from responding or timing out until the test completes.
-
- const ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1" << "votes" << 0) <<
- BSON("_id" << 5 << "host" << "h5:1" << "votes" << 0))));
- const int myConfigIndex = 3;
- const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
-
- startQuorumCheck(rsConfig, myConfigIndex);
- const Date_t startDate = _net->now();
- const int numCommandsExpected = rsConfig.getNumMembers() - 1;
- unordered_set<HostAndPort> seenHosts;
- _net->enterNetwork();
- for (int i = 0; i < numCommandsExpected; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS("admin", request.dbname);
- ASSERT_EQUALS(hbRequest, request.cmdObj);
- ASSERT(seenHosts.insert(request.target).second) <<
- "Already saw " << request.target.toString();
- if (request.target == HostAndPort("h1", 1) || request.target == HostAndPort("h2", 1)) {
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1),
- Milliseconds(8))));
- }
- else {
- _net->blackHole(noi);
- }
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
+ ASSERT_REASON_CONTAINS(status, "no electable nodes responded");
+}
+
+TEST_F(CheckQuorumForReconfig, QuorumCheckSucceedsWithAsSoonAsPossible) {
+ // In this test, "we" are host "h4". Only "h1", "h2" and "h3" can vote.
+ // This test should succeed as soon as h1 and h2 respond, so we block
+ // h3 and h5 from responding or timing out until the test completes.
+
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1"
+ << "votes" << 0)
+ << BSON("_id" << 5 << "host"
+ << "h5:1"
+ << "votes" << 0))));
+ const int myConfigIndex = 3;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == HostAndPort("h1", 1) || request.target == HostAndPort("h2", 1)) {
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), Milliseconds(8))));
+ } else {
+ _net->blackHole(noi);
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- ASSERT_OK(waitForQuorumCheck());
}
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ ASSERT_OK(waitForQuorumCheck());
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/elect_cmd_runner.cpp b/src/mongo/db/repl/elect_cmd_runner.cpp
index 2eb8599c7a4..d423a74dc64 100644
--- a/src/mongo/db/repl/elect_cmd_runner.cpp
+++ b/src/mongo/db/repl/elect_cmd_runner.cpp
@@ -42,119 +42,109 @@
namespace mongo {
namespace repl {
- ElectCmdRunner::Algorithm::Algorithm(
- const ReplicaSetConfig& rsConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets,
- OID round)
- : _actualResponses(0),
- _sufficientResponsesReceived(false),
- _rsConfig(rsConfig),
- _selfIndex(selfIndex),
- _targets(targets),
- _round(round) {
-
- // Vote for ourselves, first.
- _receivedVotes = _rsConfig.getMemberAt(_selfIndex).getNumVotes();
+ElectCmdRunner::Algorithm::Algorithm(const ReplicaSetConfig& rsConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets,
+ OID round)
+ : _actualResponses(0),
+ _sufficientResponsesReceived(false),
+ _rsConfig(rsConfig),
+ _selfIndex(selfIndex),
+ _targets(targets),
+ _round(round) {
+ // Vote for ourselves, first.
+ _receivedVotes = _rsConfig.getMemberAt(_selfIndex).getNumVotes();
+}
+
+ElectCmdRunner::Algorithm::~Algorithm() {}
+
+std::vector<ReplicationExecutor::RemoteCommandRequest> ElectCmdRunner::Algorithm::getRequests()
+ const {
+ const MemberConfig& selfConfig = _rsConfig.getMemberAt(_selfIndex);
+ std::vector<ReplicationExecutor::RemoteCommandRequest> requests;
+ BSONObjBuilder electCmdBuilder;
+ electCmdBuilder.append("replSetElect", 1);
+ electCmdBuilder.append("set", _rsConfig.getReplSetName());
+ electCmdBuilder.append("who", selfConfig.getHostAndPort().toString());
+ electCmdBuilder.append("whoid", selfConfig.getId());
+ electCmdBuilder.appendIntOrLL("cfgver", _rsConfig.getConfigVersion());
+ electCmdBuilder.append("round", _round);
+ const BSONObj replSetElectCmd = electCmdBuilder.obj();
+
+ // Schedule a RemoteCommandRequest for each non-DOWN node
+ for (std::vector<HostAndPort>::const_iterator it = _targets.begin(); it != _targets.end();
+ ++it) {
+ invariant(*it != selfConfig.getHostAndPort());
+ requests.push_back(ReplicationExecutor::RemoteCommandRequest(
+ *it,
+ "admin",
+ replSetElectCmd,
+ Milliseconds(30 * 1000))); // trying to match current Socket timeout
}
- ElectCmdRunner::Algorithm::~Algorithm() {}
-
- std::vector<ReplicationExecutor::RemoteCommandRequest>
- ElectCmdRunner::Algorithm::getRequests() const {
-
- const MemberConfig& selfConfig = _rsConfig.getMemberAt(_selfIndex);
- std::vector<ReplicationExecutor::RemoteCommandRequest> requests;
- BSONObjBuilder electCmdBuilder;
- electCmdBuilder.append("replSetElect", 1);
- electCmdBuilder.append("set", _rsConfig.getReplSetName());
- electCmdBuilder.append("who", selfConfig.getHostAndPort().toString());
- electCmdBuilder.append("whoid", selfConfig.getId());
- electCmdBuilder.appendIntOrLL("cfgver", _rsConfig.getConfigVersion());
- electCmdBuilder.append("round", _round);
- const BSONObj replSetElectCmd = electCmdBuilder.obj();
-
- // Schedule a RemoteCommandRequest for each non-DOWN node
- for (std::vector<HostAndPort>::const_iterator it = _targets.begin();
- it != _targets.end();
- ++it) {
-
- invariant(*it != selfConfig.getHostAndPort());
- requests.push_back(ReplicationExecutor::RemoteCommandRequest(
- *it,
- "admin",
- replSetElectCmd,
- Milliseconds(30*1000))); // trying to match current Socket timeout
- }
-
- return requests;
- }
+ return requests;
+}
- bool ElectCmdRunner::Algorithm::hasReceivedSufficientResponses() const {
- if (_sufficientResponsesReceived) {
- return true;
- }
- if (_receivedVotes >= _rsConfig.getMajorityVoteCount()) {
- return true;
- }
- if (_receivedVotes < 0) {
- return true;
- }
- if (_actualResponses == _targets.size()) {
- return true;
- }
- return false;
+bool ElectCmdRunner::Algorithm::hasReceivedSufficientResponses() const {
+ if (_sufficientResponsesReceived) {
+ return true;
}
-
- void ElectCmdRunner::Algorithm::processResponse(
- const ReplicationExecutor::RemoteCommandRequest& request,
- const ResponseStatus& response) {
-
- ++_actualResponses;
-
- if (response.isOK()) {
- BSONObj res = response.getValue().data;
- log() << "received " << res["vote"] << " votes from " << request.target;
- LOG(1) << "full elect res: " << res.toString();
- BSONElement vote(res["vote"]);
- if (vote.type() != mongo::NumberInt) {
- error() << "wrong type for vote argument in replSetElect command: " <<
- typeName(vote.type());
- _sufficientResponsesReceived = true;
- return;
- }
-
- _receivedVotes += vote._numberInt();
- }
- else {
- warning() << "elect command to " << request.target << " failed: " <<
- response.getStatus();
- }
+ if (_receivedVotes >= _rsConfig.getMajorityVoteCount()) {
+ return true;
}
-
- ElectCmdRunner::ElectCmdRunner() : _isCanceled(false) {}
- ElectCmdRunner::~ElectCmdRunner() {}
-
- StatusWith<ReplicationExecutor::EventHandle> ElectCmdRunner::start(
- ReplicationExecutor* executor,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets,
- const stdx::function<void ()>& onCompletion) {
-
- _algorithm.reset(new Algorithm(currentConfig, selfIndex, targets, OID::gen()));
- _runner.reset(new ScatterGatherRunner(_algorithm.get()));
- return _runner->start(executor, onCompletion);
+ if (_receivedVotes < 0) {
+ return true;
}
-
- void ElectCmdRunner::cancel(ReplicationExecutor* executor) {
- _isCanceled = true;
- _runner->cancel(executor);
+ if (_actualResponses == _targets.size()) {
+ return true;
}
+ return false;
+}
+
+void ElectCmdRunner::Algorithm::processResponse(
+ const ReplicationExecutor::RemoteCommandRequest& request, const ResponseStatus& response) {
+ ++_actualResponses;
+
+ if (response.isOK()) {
+ BSONObj res = response.getValue().data;
+ log() << "received " << res["vote"] << " votes from " << request.target;
+ LOG(1) << "full elect res: " << res.toString();
+ BSONElement vote(res["vote"]);
+ if (vote.type() != mongo::NumberInt) {
+ error() << "wrong type for vote argument in replSetElect command: "
+ << typeName(vote.type());
+ _sufficientResponsesReceived = true;
+ return;
+ }
- int ElectCmdRunner::getReceivedVotes() const {
- return _algorithm->getReceivedVotes();
+ _receivedVotes += vote._numberInt();
+ } else {
+ warning() << "elect command to " << request.target << " failed: " << response.getStatus();
}
-
-} // namespace repl
-} // namespace mongo
+}
+
+ElectCmdRunner::ElectCmdRunner() : _isCanceled(false) {}
+ElectCmdRunner::~ElectCmdRunner() {}
+
+StatusWith<ReplicationExecutor::EventHandle> ElectCmdRunner::start(
+ ReplicationExecutor* executor,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets,
+ const stdx::function<void()>& onCompletion) {
+ _algorithm.reset(new Algorithm(currentConfig, selfIndex, targets, OID::gen()));
+ _runner.reset(new ScatterGatherRunner(_algorithm.get()));
+ return _runner->start(executor, onCompletion);
+}
+
+void ElectCmdRunner::cancel(ReplicationExecutor* executor) {
+ _isCanceled = true;
+ _runner->cancel(executor);
+}
+
+int ElectCmdRunner::getReceivedVotes() const {
+ return _algorithm->getReceivedVotes();
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/elect_cmd_runner.h b/src/mongo/db/repl/elect_cmd_runner.h
index 3007f5dc2b5..711445a8025 100644
--- a/src/mongo/db/repl/elect_cmd_runner.h
+++ b/src/mongo/db/repl/elect_cmd_runner.h
@@ -39,88 +39,91 @@
namespace mongo {
- class Status;
+class Status;
namespace repl {
- class ReplicaSetConfig;
- class ScatterGatherRunner;
+class ReplicaSetConfig;
+class ScatterGatherRunner;
- class ElectCmdRunner {
- MONGO_DISALLOW_COPYING(ElectCmdRunner);
+class ElectCmdRunner {
+ MONGO_DISALLOW_COPYING(ElectCmdRunner);
+
+public:
+ class Algorithm : public ScatterGatherAlgorithm {
public:
- class Algorithm : public ScatterGatherAlgorithm {
- public:
- Algorithm(const ReplicaSetConfig& rsConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets,
- OID round);
-
- virtual ~Algorithm();
- virtual std::vector<ReplicationExecutor::RemoteCommandRequest> getRequests() const;
- virtual void processResponse(
- const ReplicationExecutor::RemoteCommandRequest& request,
- const ResponseStatus& response);
- virtual bool hasReceivedSufficientResponses() const;
-
- int getReceivedVotes() const { return _receivedVotes; }
-
- private:
- // Tally of the number of received votes for this election.
- int _receivedVotes;
-
- // Number of responses received so far.
- size_t _actualResponses;
-
- bool _sufficientResponsesReceived;
-
- const ReplicaSetConfig _rsConfig;
- const int _selfIndex;
- const std::vector<HostAndPort> _targets;
- const OID _round;
- };
-
- ElectCmdRunner();
- ~ElectCmdRunner();
-
- /**
- * Begins the process of sending replSetElect commands to all non-DOWN nodes
- * in currentConfig.
- *
- * Returned handle can be used to schedule a callback when the process is complete.
- */
- StatusWith<ReplicationExecutor::EventHandle> start(
- ReplicationExecutor* executor,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets,
- const stdx::function<void ()>& onCompletion = stdx::function<void ()>());
-
- /**
- * Informs the ElectCmdRunner to cancel further processing. The "executor"
- * argument must point to the same executor passed to "start()".
- *
- * Like start(), this method must run in the executor context.
- */
- void cancel(ReplicationExecutor* executor);
-
- /**
- * Returns the number of received votes. Only valid to call after
- * the event handle returned from start() has been signaled, which guarantees that
- * the vote count will no longer be touched by callbacks.
- */
- int getReceivedVotes() const;
-
- /**
- * Returns true if cancel() was called on this instance.
- */
- bool isCanceled() const { return _isCanceled; }
+ Algorithm(const ReplicaSetConfig& rsConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets,
+ OID round);
+
+ virtual ~Algorithm();
+ virtual std::vector<ReplicationExecutor::RemoteCommandRequest> getRequests() const;
+ virtual void processResponse(const ReplicationExecutor::RemoteCommandRequest& request,
+ const ResponseStatus& response);
+ virtual bool hasReceivedSufficientResponses() const;
+
+ int getReceivedVotes() const {
+ return _receivedVotes;
+ }
private:
- boost::scoped_ptr<Algorithm> _algorithm;
- boost::scoped_ptr<ScatterGatherRunner> _runner;
- bool _isCanceled;
+ // Tally of the number of received votes for this election.
+ int _receivedVotes;
+
+ // Number of responses received so far.
+ size_t _actualResponses;
+
+ bool _sufficientResponsesReceived;
+
+ const ReplicaSetConfig _rsConfig;
+ const int _selfIndex;
+ const std::vector<HostAndPort> _targets;
+ const OID _round;
};
+ ElectCmdRunner();
+ ~ElectCmdRunner();
+
+ /**
+ * Begins the process of sending replSetElect commands to all non-DOWN nodes
+ * in currentConfig.
+ *
+ * Returned handle can be used to schedule a callback when the process is complete.
+ */
+ StatusWith<ReplicationExecutor::EventHandle> start(
+ ReplicationExecutor* executor,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets,
+ const stdx::function<void()>& onCompletion = stdx::function<void()>());
+
+ /**
+ * Informs the ElectCmdRunner to cancel further processing. The "executor"
+ * argument must point to the same executor passed to "start()".
+ *
+ * Like start(), this method must run in the executor context.
+ */
+ void cancel(ReplicationExecutor* executor);
+
+ /**
+ * Returns the number of received votes. Only valid to call after
+ * the event handle returned from start() has been signaled, which guarantees that
+ * the vote count will no longer be touched by callbacks.
+ */
+ int getReceivedVotes() const;
+
+ /**
+ * Returns true if cancel() was called on this instance.
+ */
+ bool isCanceled() const {
+ return _isCanceled;
+ }
+
+private:
+ boost::scoped_ptr<Algorithm> _algorithm;
+ boost::scoped_ptr<ScatterGatherRunner> _runner;
+ bool _isCanceled;
+};
}
}
diff --git a/src/mongo/db/repl/elect_cmd_runner_test.cpp b/src/mongo/db/repl/elect_cmd_runner_test.cpp
index 983e39a3b1b..ae8118d4c92 100644
--- a/src/mongo/db/repl/elect_cmd_runner_test.cpp
+++ b/src/mongo/db/repl/elect_cmd_runner_test.cpp
@@ -48,375 +48,366 @@ namespace mongo {
namespace repl {
namespace {
- typedef ReplicationExecutor::RemoteCommandRequest RemoteCommandRequest;
-
- class ElectCmdRunnerTest : public mongo::unittest::Test {
- public:
- void startTest(ElectCmdRunner* electCmdRunner,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts);
-
- void waitForTest();
-
- void electCmdRunnerRunner(const ReplicationExecutor::CallbackData& data,
- ElectCmdRunner* electCmdRunner,
- StatusWith<ReplicationExecutor::EventHandle>* evh,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts);
-
- NetworkInterfaceMock* _net;
- boost::scoped_ptr<ReplicationExecutor> _executor;
- boost::scoped_ptr<boost::thread> _executorThread;
-
- private:
- void setUp();
- void tearDown();
-
- ReplicationExecutor::EventHandle _allDoneEvent;
- };
-
- void ElectCmdRunnerTest::setUp() {
- _net = new NetworkInterfaceMock;
- _executor.reset(new ReplicationExecutor(_net, 1 /* prng seed */));
- _executorThread.reset(new boost::thread(stdx::bind(&ReplicationExecutor::run,
- _executor.get())));
- }
-
- void ElectCmdRunnerTest::tearDown() {
- _executor->shutdown();
- _executorThread->join();
+typedef ReplicationExecutor::RemoteCommandRequest RemoteCommandRequest;
+
+class ElectCmdRunnerTest : public mongo::unittest::Test {
+public:
+ void startTest(ElectCmdRunner* electCmdRunner,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts);
+
+ void waitForTest();
+
+ void electCmdRunnerRunner(const ReplicationExecutor::CallbackData& data,
+ ElectCmdRunner* electCmdRunner,
+ StatusWith<ReplicationExecutor::EventHandle>* evh,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts);
+
+ NetworkInterfaceMock* _net;
+ boost::scoped_ptr<ReplicationExecutor> _executor;
+ boost::scoped_ptr<boost::thread> _executorThread;
+
+private:
+ void setUp();
+ void tearDown();
+
+ ReplicationExecutor::EventHandle _allDoneEvent;
+};
+
+void ElectCmdRunnerTest::setUp() {
+ _net = new NetworkInterfaceMock;
+ _executor.reset(new ReplicationExecutor(_net, 1 /* prng seed */));
+ _executorThread.reset(
+ new boost::thread(stdx::bind(&ReplicationExecutor::run, _executor.get())));
+}
+
+void ElectCmdRunnerTest::tearDown() {
+ _executor->shutdown();
+ _executorThread->join();
+}
+
+ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(configBson));
+ ASSERT_OK(config.validate());
+ return config;
+}
+
+const BSONObj makeElectRequest(const ReplicaSetConfig& rsConfig, int selfIndex) {
+ const MemberConfig& myConfig = rsConfig.getMemberAt(selfIndex);
+ return BSON("replSetElect" << 1 << "set" << rsConfig.getReplSetName() << "who"
+ << myConfig.getHostAndPort().toString() << "whoid"
+ << myConfig.getId() << "cfgver" << rsConfig.getConfigVersion()
+ << "round" << 380865962699346850ll);
+}
+
+BSONObj stripRound(const BSONObj& orig) {
+ BSONObjBuilder builder;
+ for (BSONObjIterator iter(orig); iter.more(); iter.next()) {
+ BSONElement e = *iter;
+ if (e.fieldNameStringData() == "round") {
+ continue;
+ }
+ builder.append(e);
}
+ return builder.obj();
+}
+
+// This is necessary because the run method must be scheduled in the Replication Executor
+// for correct concurrency operation.
+void ElectCmdRunnerTest::electCmdRunnerRunner(const ReplicationExecutor::CallbackData& data,
+ ElectCmdRunner* electCmdRunner,
+ StatusWith<ReplicationExecutor::EventHandle>* evh,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts) {
+ invariant(data.status.isOK());
+ *evh = electCmdRunner->start(data.executor, currentConfig, selfIndex, hosts);
+}
+
+void ElectCmdRunnerTest::startTest(ElectCmdRunner* electCmdRunner,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts) {
+ StatusWith<ReplicationExecutor::EventHandle> evh(ErrorCodes::InternalError, "Not set");
+ StatusWith<ReplicationExecutor::CallbackHandle> cbh =
+ _executor->scheduleWork(stdx::bind(&ElectCmdRunnerTest::electCmdRunnerRunner,
+ this,
+ stdx::placeholders::_1,
+ electCmdRunner,
+ &evh,
+ currentConfig,
+ selfIndex,
+ hosts));
+ ASSERT_OK(cbh.getStatus());
+ _executor->wait(cbh.getValue());
+ ASSERT_OK(evh.getStatus());
+ _allDoneEvent = evh.getValue();
+}
+
+void ElectCmdRunnerTest::waitForTest() {
+ _executor->waitForEvent(_allDoneEvent);
+}
+
+TEST_F(ElectCmdRunnerTest, OneNode) {
+ // Only one node in the config.
+ const ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ ElectCmdRunner electCmdRunner;
+ startTest(&electCmdRunner, config, 0, hosts);
+ waitForTest();
+ ASSERT_EQUALS(electCmdRunner.getReceivedVotes(), 1);
+}
+
+TEST_F(ElectCmdRunnerTest, TwoNodes) {
+ // Two nodes, we are node h1.
+ const ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ const BSONObj electRequest = makeElectRequest(config, 0);
+
+ ElectCmdRunner electCmdRunner;
+ startTest(&electCmdRunner, config, 0, hosts);
+ const Date_t startDate = _net->now();
+ _net->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(stripRound(electRequest), stripRound(noi->getRequest().cmdObj));
+ ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ _net->scheduleResponse(
+ noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1 << "vote" << 1 << "round" << 380865962699346850ll), Milliseconds(8))));
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + 10, _net->now());
+ waitForTest();
+ ASSERT_EQUALS(electCmdRunner.getReceivedVotes(), 2);
+}
+
+TEST_F(ElectCmdRunnerTest, ShuttingDown) {
+ // Two nodes, we are node h1. Shutdown happens while we're scheduling remote commands.
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ ElectCmdRunner electCmdRunner;
+ StatusWith<ReplicationExecutor::EventHandle> evh(ErrorCodes::InternalError, "Not set");
+ StatusWith<ReplicationExecutor::CallbackHandle> cbh =
+ _executor->scheduleWork(stdx::bind(&ElectCmdRunnerTest::electCmdRunnerRunner,
+ this,
+ stdx::placeholders::_1,
+ &electCmdRunner,
+ &evh,
+ config,
+ 0,
+ hosts));
+ ASSERT_OK(cbh.getStatus());
+ _executor->wait(cbh.getValue());
+ ASSERT_OK(evh.getStatus());
+ _executor->shutdown();
+ _executor->waitForEvent(evh.getValue());
+ ASSERT_EQUALS(electCmdRunner.getReceivedVotes(), 1);
+}
+
+class ElectScatterGatherTest : public mongo::unittest::Test {
+public:
+ virtual void start(const BSONObj& configObj) {
+ int selfConfigIndex = 0;
- ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
ReplicaSetConfig config;
- ASSERT_OK(config.initialize(configBson));
- ASSERT_OK(config.validate());
- return config;
- }
+ config.initialize(configObj);
- const BSONObj makeElectRequest(const ReplicaSetConfig& rsConfig,
- int selfIndex) {
- const MemberConfig& myConfig = rsConfig.getMemberAt(selfIndex);
- return BSON("replSetElect" << 1 <<
- "set" << rsConfig.getReplSetName() <<
- "who" << myConfig.getHostAndPort().toString() <<
- "whoid" << myConfig.getId() <<
- "cfgver" << rsConfig.getConfigVersion() <<
- "round" << 380865962699346850ll);
- }
-
- BSONObj stripRound(const BSONObj& orig) {
- BSONObjBuilder builder;
- for (BSONObjIterator iter(orig); iter.more(); iter.next()) {
- BSONElement e = *iter;
- if (e.fieldNameStringData() == "round") {
- continue;
- }
- builder.append(e);
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
+ mem != config.membersEnd();
+ ++mem) {
+ hosts.push_back(mem->getHostAndPort());
}
- return builder.obj();
- }
- // This is necessary because the run method must be scheduled in the Replication Executor
- // for correct concurrency operation.
- void ElectCmdRunnerTest::electCmdRunnerRunner(
- const ReplicationExecutor::CallbackData& data,
- ElectCmdRunner* electCmdRunner,
- StatusWith<ReplicationExecutor::EventHandle>* evh,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts) {
-
- invariant(data.status.isOK());
- *evh = electCmdRunner->start(
- data.executor,
- currentConfig,
- selfIndex,
- hosts);
+ _checker.reset(new ElectCmdRunner::Algorithm(config, selfConfigIndex, hosts, OID()));
}
- void ElectCmdRunnerTest::startTest(ElectCmdRunner* electCmdRunner,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts) {
-
- StatusWith<ReplicationExecutor::EventHandle> evh(ErrorCodes::InternalError, "Not set");
- StatusWith<ReplicationExecutor::CallbackHandle> cbh =
- _executor->scheduleWork(
- stdx::bind(&ElectCmdRunnerTest::electCmdRunnerRunner,
- this,
- stdx::placeholders::_1,
- electCmdRunner,
- &evh,
- currentConfig,
- selfIndex,
- hosts));
- ASSERT_OK(cbh.getStatus());
- _executor->wait(cbh.getValue());
- ASSERT_OK(evh.getStatus());
- _allDoneEvent = evh.getValue();
+ virtual void tearDown() {
+ _checker.reset(NULL);
}
- void ElectCmdRunnerTest::waitForTest() {
- _executor->waitForEvent(_allDoneEvent);
+protected:
+ bool hasReceivedSufficientResponses() {
+ return _checker->hasReceivedSufficientResponses();
}
- TEST_F(ElectCmdRunnerTest, OneNode) {
- // Only one node in the config.
- const ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- ElectCmdRunner electCmdRunner;
- startTest(&electCmdRunner, config, 0, hosts);
- waitForTest();
- ASSERT_EQUALS(electCmdRunner.getReceivedVotes(), 1);
+ int getReceivedVotes() {
+ return _checker->getReceivedVotes();
}
- TEST_F(ElectCmdRunnerTest, TwoNodes) {
- // Two nodes, we are node h1.
- const ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- const BSONObj electRequest = makeElectRequest(config, 0);
-
- ElectCmdRunner electCmdRunner;
- startTest(&electCmdRunner, config, 0, hosts);
- const Date_t startDate = _net->now();
- _net->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(stripRound(electRequest), stripRound(noi->getRequest().cmdObj));
- ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
- _net->scheduleResponse(noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1 <<
- "vote" << 1 <<
- "round" << 380865962699346850ll),
- Milliseconds(8))));
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + 10, _net->now());
- waitForTest();
- ASSERT_EQUALS(electCmdRunner.getReceivedVotes(), 2);
+ void processResponse(const RemoteCommandRequest& request, const ResponseStatus& response) {
+ _checker->processResponse(request, response);
}
- TEST_F(ElectCmdRunnerTest, ShuttingDown) {
- // Two nodes, we are node h1. Shutdown happens while we're scheduling remote commands.
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- ElectCmdRunner electCmdRunner;
- StatusWith<ReplicationExecutor::EventHandle> evh(ErrorCodes::InternalError, "Not set");
- StatusWith<ReplicationExecutor::CallbackHandle> cbh =
- _executor->scheduleWork(
- stdx::bind(&ElectCmdRunnerTest::electCmdRunnerRunner,
- this,
- stdx::placeholders::_1,
- &electCmdRunner,
- &evh,
- config,
- 0,
- hosts));
- ASSERT_OK(cbh.getStatus());
- _executor->wait(cbh.getValue());
- ASSERT_OK(evh.getStatus());
- _executor->shutdown();
- _executor->waitForEvent(evh.getValue());
- ASSERT_EQUALS(electCmdRunner.getReceivedVotes(), 1);
+ RemoteCommandRequest requestFrom(std::string hostname) {
+ return RemoteCommandRequest(HostAndPort(hostname),
+ "", // the non-hostname fields do not matter for Elect
+ BSONObj(),
+ Milliseconds(0));
}
- class ElectScatterGatherTest : public mongo::unittest::Test {
- public:
- virtual void start(const BSONObj& configObj) {
- int selfConfigIndex = 0;
-
- ReplicaSetConfig config;
- config.initialize(configObj);
-
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- hosts.push_back(mem->getHostAndPort());
- }
-
- _checker.reset(new ElectCmdRunner::Algorithm(config,
- selfConfigIndex,
- hosts,
- OID()));
- }
-
- virtual void tearDown() {
- _checker.reset(NULL);
- }
-
- protected:
- bool hasReceivedSufficientResponses() {
- return _checker->hasReceivedSufficientResponses();
- }
-
- int getReceivedVotes() {
- return _checker->getReceivedVotes();
- }
-
- void processResponse(const RemoteCommandRequest& request, const ResponseStatus& response) {
- _checker->processResponse(request, response);
- }
-
- RemoteCommandRequest requestFrom(std::string hostname) {
- return RemoteCommandRequest(HostAndPort(hostname),
- "", // the non-hostname fields do not matter for Elect
- BSONObj(),
- Milliseconds(0));
- }
-
- ResponseStatus badResponseStatus() {
- return ResponseStatus(ErrorCodes::NodeNotFound, "not on my watch");
- }
-
- ResponseStatus wrongTypeForVoteField() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("vote" << std::string("yea")),
- Milliseconds(10)));
- }
-
- ResponseStatus voteYea() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("vote" << 1),
- Milliseconds(10)));
- }
-
- ResponseStatus voteNay() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("vote" << -10000),
- Milliseconds(10)));
- }
-
- ResponseStatus abstainFromVoting() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("vote" << 0),
- Milliseconds(10)));
- }
-
- BSONObj threeNodesTwoArbitersConfig() {
- return BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host0") <<
- BSON("_id" << 1 << "host" << "host1" << "arbiterOnly" << true) <<
- BSON("_id" << 2 << "host" << "host2" << "arbiterOnly" << true)));
- }
-
- BSONObj basicThreeNodeConfig() {
- return BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host0") <<
- BSON("_id" << 1 << "host" << "host1") <<
- BSON("_id" << 2 << "host" << "host2")));
- }
-
- private:
- scoped_ptr<ElectCmdRunner::Algorithm> _checker;
- };
-
- TEST_F(ElectScatterGatherTest, NodeRespondsWithBadVoteType) {
- start(basicThreeNodeConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), wrongTypeForVoteField());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, getReceivedVotes()); // 1 because we have 1 vote and voted for ourself
+ ResponseStatus badResponseStatus() {
+ return ResponseStatus(ErrorCodes::NodeNotFound, "not on my watch");
}
- TEST_F(ElectScatterGatherTest, NodeRespondsWithBadStatus) {
- start(basicThreeNodeConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), badResponseStatus());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host3"), abstainFromVoting());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, getReceivedVotes()); // 1 because we have 1 vote and voted for ourself
+ ResponseStatus wrongTypeForVoteField() {
+ return ResponseStatus(
+ NetworkInterfaceMock::Response(BSON("vote" << std::string("yea")), Milliseconds(10)));
}
- TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithYea) {
- start(basicThreeNodeConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), voteYea());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(2, getReceivedVotes());
+ ResponseStatus voteYea() {
+ return ResponseStatus(NetworkInterfaceMock::Response(BSON("vote" << 1), Milliseconds(10)));
}
- TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithNaySecondWithYea) {
- start(basicThreeNodeConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), voteNay());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(-9999, getReceivedVotes());
+ ResponseStatus voteNay() {
+ return ResponseStatus(
+ NetworkInterfaceMock::Response(BSON("vote" << -10000), Milliseconds(10)));
}
- TEST_F(ElectScatterGatherTest, BothNodesAbstainFromVoting) {
- start(basicThreeNodeConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), abstainFromVoting());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host3"), abstainFromVoting());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, getReceivedVotes());
+ ResponseStatus abstainFromVoting() {
+ return ResponseStatus(NetworkInterfaceMock::Response(BSON("vote" << 0), Milliseconds(10)));
}
- TEST_F(ElectScatterGatherTest, NodeRespondsWithBadStatusArbiters) {
- start(threeNodesTwoArbitersConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), badResponseStatus());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host3"), abstainFromVoting());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(1, getReceivedVotes()); // 1 because we have 1 vote and voted for ourself
+ BSONObj threeNodesTwoArbitersConfig() {
+ return BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0")
+ << BSON("_id" << 1 << "host"
+ << "host1"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 2 << "host"
+ << "host2"
+ << "arbiterOnly" << true)));
}
- TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithYeaArbiters) {
- start(threeNodesTwoArbitersConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), voteYea());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(2, getReceivedVotes());
+ BSONObj basicThreeNodeConfig() {
+ return BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0")
+ << BSON("_id" << 1 << "host"
+ << "host1") << BSON("_id" << 2 << "host"
+ << "host2")));
}
- TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithNaySecondWithYeaArbiters) {
- start(threeNodesTwoArbitersConfig());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), voteNay());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(-9999, getReceivedVotes());
- }
+private:
+ scoped_ptr<ElectCmdRunner::Algorithm> _checker;
+};
+
+TEST_F(ElectScatterGatherTest, NodeRespondsWithBadVoteType) {
+ start(basicThreeNodeConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), wrongTypeForVoteField());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, getReceivedVotes()); // 1 because we have 1 vote and voted for ourself
+}
+
+TEST_F(ElectScatterGatherTest, NodeRespondsWithBadStatus) {
+ start(basicThreeNodeConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), badResponseStatus());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host3"), abstainFromVoting());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, getReceivedVotes()); // 1 because we have 1 vote and voted for ourself
+}
+
+TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithYea) {
+ start(basicThreeNodeConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), voteYea());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(2, getReceivedVotes());
+}
+
+TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithNaySecondWithYea) {
+ start(basicThreeNodeConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), voteNay());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(-9999, getReceivedVotes());
+}
+
+TEST_F(ElectScatterGatherTest, BothNodesAbstainFromVoting) {
+ start(basicThreeNodeConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), abstainFromVoting());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host3"), abstainFromVoting());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, getReceivedVotes());
+}
+
+TEST_F(ElectScatterGatherTest, NodeRespondsWithBadStatusArbiters) {
+ start(threeNodesTwoArbitersConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), badResponseStatus());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host3"), abstainFromVoting());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(1, getReceivedVotes()); // 1 because we have 1 vote and voted for ourself
+}
+
+TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithYeaArbiters) {
+ start(threeNodesTwoArbitersConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), voteYea());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(2, getReceivedVotes());
+}
+
+TEST_F(ElectScatterGatherTest, FirstNodeRespondsWithNaySecondWithYeaArbiters) {
+ start(threeNodesTwoArbitersConfig());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), voteNay());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(-9999, getReceivedVotes());
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/freshness_checker.cpp b/src/mongo/db/repl/freshness_checker.cpp
index 62e514c6793..54a020280e9 100644
--- a/src/mongo/db/repl/freshness_checker.cpp
+++ b/src/mongo/db/repl/freshness_checker.cpp
@@ -46,192 +46,179 @@
namespace mongo {
namespace repl {
- FreshnessChecker::Algorithm::Algorithm(
- OpTime lastOpTimeApplied,
- const ReplicaSetConfig& rsConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets) :
- _responsesProcessed(0),
- _failedVoterResponses(0),
- _lastOpTimeApplied(lastOpTimeApplied),
- _rsConfig(rsConfig),
- _selfIndex(selfIndex),
- _targets(targets),
- _votingTargets(0),
- _losableVoters(0),
- _myVote(0),
- _abortReason(None) {
-
- // Count voting targets (since the targets could be a subset of members).
- for (std::vector<HostAndPort>::const_iterator it = _targets.begin();
- it != _targets.end();
- ++it) {
- const MemberConfig* member = _rsConfig.findMemberByHostAndPort(*it);
- if (member && member->isVoter())
- ++_votingTargets;
- }
-
- _myVote = _rsConfig.getMemberAt(_selfIndex).isVoter() ? 1 : 0;
- _losableVoters = std::max(0,
- ((_votingTargets + _myVote) - _rsConfig.getMajorityVoteCount()));
-
- }
-
- FreshnessChecker::Algorithm::~Algorithm() {}
-
- std::vector<ReplicationExecutor::RemoteCommandRequest>
- FreshnessChecker::Algorithm::getRequests() const {
- const MemberConfig& selfConfig = _rsConfig.getMemberAt(_selfIndex);
-
- // gather all not-down nodes, get their fullnames(or hostandport's)
- // schedule fresh command for each node
- BSONObjBuilder freshCmdBuilder;
- freshCmdBuilder.append("replSetFresh", 1);
- freshCmdBuilder.append("set", _rsConfig.getReplSetName());
- freshCmdBuilder.append("opTime", Date_t(_lastOpTimeApplied.asDate()));
- freshCmdBuilder.append("who", selfConfig.getHostAndPort().toString());
- freshCmdBuilder.appendIntOrLL("cfgver", _rsConfig.getConfigVersion());
- freshCmdBuilder.append("id", selfConfig.getId());
- const BSONObj replSetFreshCmd = freshCmdBuilder.obj();
-
- std::vector<ReplicationExecutor::RemoteCommandRequest> requests;
- for (std::vector<HostAndPort>::const_iterator it = _targets.begin();
- it != _targets.end();
- ++it) {
- invariant(*it != selfConfig.getHostAndPort());
- requests.push_back(ReplicationExecutor::RemoteCommandRequest(
- *it,
- "admin",
- replSetFreshCmd,
- Milliseconds(30*1000))); // trying to match current Socket timeout
- }
-
- return requests;
+FreshnessChecker::Algorithm::Algorithm(OpTime lastOpTimeApplied,
+ const ReplicaSetConfig& rsConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets)
+ : _responsesProcessed(0),
+ _failedVoterResponses(0),
+ _lastOpTimeApplied(lastOpTimeApplied),
+ _rsConfig(rsConfig),
+ _selfIndex(selfIndex),
+ _targets(targets),
+ _votingTargets(0),
+ _losableVoters(0),
+ _myVote(0),
+ _abortReason(None) {
+ // Count voting targets (since the targets could be a subset of members).
+ for (std::vector<HostAndPort>::const_iterator it = _targets.begin(); it != _targets.end();
+ ++it) {
+ const MemberConfig* member = _rsConfig.findMemberByHostAndPort(*it);
+ if (member && member->isVoter())
+ ++_votingTargets;
}
- bool FreshnessChecker::Algorithm::hadTooManyFailedVoterResponses() const {
- const bool tooManyLostVoters = (_failedVoterResponses > _losableVoters);
-
- LOG(3) << "hadTooManyFailedVoterResponses(" << tooManyLostVoters << ") = "
- << _failedVoterResponses << " failed responses <"
- << " (" << _votingTargets << " total voters - "
- << _rsConfig.getMajorityVoteCount() << " majority voters - me ("
- << _myVote << ")) -- losableVotes: " << _losableVoters;
- return tooManyLostVoters;
+ _myVote = _rsConfig.getMemberAt(_selfIndex).isVoter() ? 1 : 0;
+ _losableVoters = std::max(0, ((_votingTargets + _myVote) - _rsConfig.getMajorityVoteCount()));
+}
+
+FreshnessChecker::Algorithm::~Algorithm() {}
+
+std::vector<ReplicationExecutor::RemoteCommandRequest> FreshnessChecker::Algorithm::getRequests()
+ const {
+ const MemberConfig& selfConfig = _rsConfig.getMemberAt(_selfIndex);
+
+ // gather all not-down nodes, get their fullnames(or hostandport's)
+ // schedule fresh command for each node
+ BSONObjBuilder freshCmdBuilder;
+ freshCmdBuilder.append("replSetFresh", 1);
+ freshCmdBuilder.append("set", _rsConfig.getReplSetName());
+ freshCmdBuilder.append("opTime", Date_t(_lastOpTimeApplied.asDate()));
+ freshCmdBuilder.append("who", selfConfig.getHostAndPort().toString());
+ freshCmdBuilder.appendIntOrLL("cfgver", _rsConfig.getConfigVersion());
+ freshCmdBuilder.append("id", selfConfig.getId());
+ const BSONObj replSetFreshCmd = freshCmdBuilder.obj();
+
+ std::vector<ReplicationExecutor::RemoteCommandRequest> requests;
+ for (std::vector<HostAndPort>::const_iterator it = _targets.begin(); it != _targets.end();
+ ++it) {
+ invariant(*it != selfConfig.getHostAndPort());
+ requests.push_back(ReplicationExecutor::RemoteCommandRequest(
+ *it,
+ "admin",
+ replSetFreshCmd,
+ Milliseconds(30 * 1000))); // trying to match current Socket timeout
}
- bool FreshnessChecker::Algorithm::_isVotingMember(const HostAndPort hap) const {
- const MemberConfig* member = _rsConfig.findMemberByHostAndPort(hap);
- invariant(member);
- return member->isVoter();
- }
-
- void FreshnessChecker::Algorithm::processResponse(
- const ReplicationExecutor::RemoteCommandRequest& request,
- const ResponseStatus& response) {
- ++_responsesProcessed;
- bool votingMember = _isVotingMember(request.target);
-
- Status status = Status::OK();
-
- if (!response.isOK() ||
- !((status = getStatusFromCommandResult(response.getValue().data)).isOK())) {
- if (votingMember) {
- ++_failedVoterResponses;
- if (hadTooManyFailedVoterResponses()) {
- _abortReason = QuorumUnreachable;
- }
- }
- if (!response.isOK()) { // network/executor error
- LOG(2) << "FreshnessChecker: Got failed response from " << request.target;
- }
- else { // command error, like unauth
- LOG(2) << "FreshnessChecker: Got error response from " << request.target
- << " :" << status;
+ return requests;
+}
+
+bool FreshnessChecker::Algorithm::hadTooManyFailedVoterResponses() const {
+ const bool tooManyLostVoters = (_failedVoterResponses > _losableVoters);
+
+ LOG(3) << "hadTooManyFailedVoterResponses(" << tooManyLostVoters
+ << ") = " << _failedVoterResponses << " failed responses <"
+ << " (" << _votingTargets << " total voters - " << _rsConfig.getMajorityVoteCount()
+ << " majority voters - me (" << _myVote << ")) -- losableVotes: " << _losableVoters;
+ return tooManyLostVoters;
+}
+
+bool FreshnessChecker::Algorithm::_isVotingMember(const HostAndPort hap) const {
+ const MemberConfig* member = _rsConfig.findMemberByHostAndPort(hap);
+ invariant(member);
+ return member->isVoter();
+}
+
+void FreshnessChecker::Algorithm::processResponse(
+ const ReplicationExecutor::RemoteCommandRequest& request, const ResponseStatus& response) {
+ ++_responsesProcessed;
+ bool votingMember = _isVotingMember(request.target);
+
+ Status status = Status::OK();
+
+ if (!response.isOK() ||
+ !((status = getStatusFromCommandResult(response.getValue().data)).isOK())) {
+ if (votingMember) {
+ ++_failedVoterResponses;
+ if (hadTooManyFailedVoterResponses()) {
+ _abortReason = QuorumUnreachable;
}
- return;
}
-
- const BSONObj res = response.getValue().data;
-
- LOG(2) << "FreshnessChecker: Got response from " << request.target
- << " of " << res;
-
- if (res["fresher"].trueValue()) {
- log() << "not electing self, we are not freshest";
- _abortReason = FresherNodeFound;
- return;
- }
-
- if (res["opTime"].type() != mongo::Date) {
- error() << "wrong type for opTime argument in replSetFresh response: " <<
- typeName(res["opTime"].type());
- _abortReason = FresherNodeFound;
- return;
- }
- OpTime remoteTime(res["opTime"].date());
- if (remoteTime == _lastOpTimeApplied) {
- _abortReason = FreshnessTie;
- }
- if (remoteTime > _lastOpTimeApplied) {
- // something really wrong (rogue command?)
- _abortReason = FresherNodeFound;
- return;
- }
-
- if (res["veto"].trueValue()) {
- BSONElement msg = res["errmsg"];
- if (msg.type() == String) {
- log() << "not electing self, " << request.target.toString() <<
- " would veto with '" << msg.String() << "'";
- }
- else {
- log() << "not electing self, " << request.target.toString() <<
- " would veto";
- }
- _abortReason = FresherNodeFound;
- return;
+ if (!response.isOK()) { // network/executor error
+ LOG(2) << "FreshnessChecker: Got failed response from " << request.target;
+ } else { // command error, like unauth
+ LOG(2) << "FreshnessChecker: Got error response from " << request.target << " :"
+ << status;
}
+ return;
}
- bool FreshnessChecker::Algorithm::hasReceivedSufficientResponses() const {
- return (_abortReason != None && _abortReason != FreshnessTie) ||
- (_responsesProcessed == static_cast<int>(_targets.size()));
- }
+ const BSONObj res = response.getValue().data;
- FreshnessChecker::ElectionAbortReason FreshnessChecker::Algorithm::shouldAbortElection() const {
- return _abortReason;
- }
+ LOG(2) << "FreshnessChecker: Got response from " << request.target << " of " << res;
- FreshnessChecker::ElectionAbortReason FreshnessChecker::shouldAbortElection() const {
- return _algorithm->shouldAbortElection();
+ if (res["fresher"].trueValue()) {
+ log() << "not electing self, we are not freshest";
+ _abortReason = FresherNodeFound;
+ return;
}
- long long FreshnessChecker::getOriginalConfigVersion() const {
- return _originalConfigVersion;
+ if (res["opTime"].type() != mongo::Date) {
+ error() << "wrong type for opTime argument in replSetFresh response: "
+ << typeName(res["opTime"].type());
+ _abortReason = FresherNodeFound;
+ return;
}
-
- FreshnessChecker::FreshnessChecker() : _isCanceled(false) {}
- FreshnessChecker::~FreshnessChecker() {}
-
- StatusWith<ReplicationExecutor::EventHandle> FreshnessChecker::start(
- ReplicationExecutor* executor,
- const OpTime& lastOpTimeApplied,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets,
- const stdx::function<void ()>& onCompletion) {
-
- _originalConfigVersion = currentConfig.getConfigVersion();
- _algorithm.reset(new Algorithm(lastOpTimeApplied, currentConfig, selfIndex, targets));
- _runner.reset(new ScatterGatherRunner(_algorithm.get()));
- return _runner->start(executor, onCompletion);
+ OpTime remoteTime(res["opTime"].date());
+ if (remoteTime == _lastOpTimeApplied) {
+ _abortReason = FreshnessTie;
}
-
- void FreshnessChecker::cancel(ReplicationExecutor* executor) {
- _isCanceled = true;
- _runner->cancel(executor);
+ if (remoteTime > _lastOpTimeApplied) {
+ // something really wrong (rogue command?)
+ _abortReason = FresherNodeFound;
+ return;
}
-} // namespace repl
-} // namespace mongo
+ if (res["veto"].trueValue()) {
+ BSONElement msg = res["errmsg"];
+ if (msg.type() == String) {
+ log() << "not electing self, " << request.target.toString() << " would veto with '"
+ << msg.String() << "'";
+ } else {
+ log() << "not electing self, " << request.target.toString() << " would veto";
+ }
+ _abortReason = FresherNodeFound;
+ return;
+ }
+}
+
+bool FreshnessChecker::Algorithm::hasReceivedSufficientResponses() const {
+ return (_abortReason != None && _abortReason != FreshnessTie) ||
+ (_responsesProcessed == static_cast<int>(_targets.size()));
+}
+
+FreshnessChecker::ElectionAbortReason FreshnessChecker::Algorithm::shouldAbortElection() const {
+ return _abortReason;
+}
+
+FreshnessChecker::ElectionAbortReason FreshnessChecker::shouldAbortElection() const {
+ return _algorithm->shouldAbortElection();
+}
+
+long long FreshnessChecker::getOriginalConfigVersion() const {
+ return _originalConfigVersion;
+}
+
+FreshnessChecker::FreshnessChecker() : _isCanceled(false) {}
+FreshnessChecker::~FreshnessChecker() {}
+
+StatusWith<ReplicationExecutor::EventHandle> FreshnessChecker::start(
+ ReplicationExecutor* executor,
+ const OpTime& lastOpTimeApplied,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets,
+ const stdx::function<void()>& onCompletion) {
+ _originalConfigVersion = currentConfig.getConfigVersion();
+ _algorithm.reset(new Algorithm(lastOpTimeApplied, currentConfig, selfIndex, targets));
+ _runner.reset(new ScatterGatherRunner(_algorithm.get()));
+ return _runner->start(executor, onCompletion);
+}
+
+void FreshnessChecker::cancel(ReplicationExecutor* executor) {
+ _isCanceled = true;
+ _runner->cancel(executor);
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/freshness_checker.h b/src/mongo/db/repl/freshness_checker.h
index 71f82cb86d3..fdb95765959 100644
--- a/src/mongo/db/repl/freshness_checker.h
+++ b/src/mongo/db/repl/freshness_checker.h
@@ -39,127 +39,128 @@
namespace mongo {
- class Status;
+class Status;
namespace repl {
- class ReplicaSetConfig;
- class ScatterGatherRunner;
+class ReplicaSetConfig;
+class ScatterGatherRunner;
- class FreshnessChecker {
- MONGO_DISALLOW_COPYING(FreshnessChecker);
+class FreshnessChecker {
+ MONGO_DISALLOW_COPYING(FreshnessChecker);
+
+public:
+ enum ElectionAbortReason {
+ None = 0,
+ FresherNodeFound, // Freshness check found fresher node
+ FreshnessTie, // Freshness check resulted in one or more nodes with our lastAppliedOpTime
+ QuorumUnavailable, // Not enough up voters
+ QuorumUnreachable // Too many failed voter responses
+ };
+
+ class Algorithm : public ScatterGatherAlgorithm {
public:
- enum ElectionAbortReason {
- None = 0,
- FresherNodeFound, // Freshness check found fresher node
- FreshnessTie, // Freshness check resulted in one or more nodes with our lastAppliedOpTime
- QuorumUnavailable, // Not enough up voters
- QuorumUnreachable // Too many failed voter responses
- };
-
- class Algorithm : public ScatterGatherAlgorithm {
- public:
- Algorithm(OpTime lastOpTimeApplied,
- const ReplicaSetConfig& rsConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets);
- virtual ~Algorithm();
- virtual std::vector<ReplicationExecutor::RemoteCommandRequest> getRequests() const;
- virtual void processResponse(
- const ReplicationExecutor::RemoteCommandRequest& request,
- const ResponseStatus& response);
- virtual bool hasReceivedSufficientResponses() const;
- ElectionAbortReason shouldAbortElection() const;
-
- private:
- // Returns true if the number of failed votes is over _losableVotes()
- bool hadTooManyFailedVoterResponses() const;
-
- // Returns true if the member, by host and port, has a vote.
- bool _isVotingMember(const HostAndPort host) const;
-
- // Number of responses received so far.
- int _responsesProcessed;
-
- // Number of failed voter responses so far.
- int _failedVoterResponses;
-
- // Last OpTime applied by the caller; used in the Fresh command
- const OpTime _lastOpTimeApplied;
-
- // Config to use for this check
- const ReplicaSetConfig _rsConfig;
-
- // Our index position in _rsConfig
- const int _selfIndex;
-
- // The UP members we are checking
- const std::vector<HostAndPort> _targets;
-
- // Number of voting targets
- int _votingTargets;
-
- // Number of voting nodes which can error
- int _losableVoters;
-
- // 1 if I have a vote, otherwise 0
- int _myVote;
-
- // Reason to abort, start with None
- ElectionAbortReason _abortReason;
-
- };
-
- FreshnessChecker();
- virtual ~FreshnessChecker();
-
- /**
- * Begins the process of sending replSetFresh commands to all non-DOWN nodes
- * in currentConfig, with the intention of determining whether the current node
- * is freshest.
- * evh can be used to schedule a callback when the process is complete.
- * This function must be run in the executor, as it must be synchronous with the command
- * callbacks that it schedules.
- * If this function returns Status::OK(), evh is then guaranteed to be signaled.
- **/
- StatusWith<ReplicationExecutor::EventHandle> start(
- ReplicationExecutor* executor,
- const OpTime& lastOpTimeApplied,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& targets,
- const stdx::function<void ()>& onCompletion = stdx::function<void ()>());
-
- /**
- * Informs the freshness checker to cancel further processing. The "executor"
- * argument must point to the same executor passed to "start()".
- *
- * Like start(), this method must run in the executor context.
- */
- void cancel(ReplicationExecutor* executor);
-
- /**
- * Returns true if cancel() was called on this instance.
- */
- bool isCanceled() const { return _isCanceled; }
-
- /**
- * 'None' if the election should continue, otherwise the reason to abort
- */
+ Algorithm(OpTime lastOpTimeApplied,
+ const ReplicaSetConfig& rsConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets);
+ virtual ~Algorithm();
+ virtual std::vector<ReplicationExecutor::RemoteCommandRequest> getRequests() const;
+ virtual void processResponse(const ReplicationExecutor::RemoteCommandRequest& request,
+ const ResponseStatus& response);
+ virtual bool hasReceivedSufficientResponses() const;
ElectionAbortReason shouldAbortElection() const;
- /**
- * Returns the config version supplied in the config when start() was called.
- * Useful for determining if the the config version has changed.
- */
- long long getOriginalConfigVersion() const;
-
private:
- boost::scoped_ptr<Algorithm> _algorithm;
- boost::scoped_ptr<ScatterGatherRunner> _runner;
- long long _originalConfigVersion;
- bool _isCanceled;
+ // Returns true if the number of failed votes is over _losableVotes()
+ bool hadTooManyFailedVoterResponses() const;
+
+ // Returns true if the member, by host and port, has a vote.
+ bool _isVotingMember(const HostAndPort host) const;
+
+ // Number of responses received so far.
+ int _responsesProcessed;
+
+ // Number of failed voter responses so far.
+ int _failedVoterResponses;
+
+ // Last OpTime applied by the caller; used in the Fresh command
+ const OpTime _lastOpTimeApplied;
+
+ // Config to use for this check
+ const ReplicaSetConfig _rsConfig;
+
+ // Our index position in _rsConfig
+ const int _selfIndex;
+
+ // The UP members we are checking
+ const std::vector<HostAndPort> _targets;
+
+ // Number of voting targets
+ int _votingTargets;
+
+ // Number of voting nodes which can error
+ int _losableVoters;
+
+ // 1 if I have a vote, otherwise 0
+ int _myVote;
+
+ // Reason to abort, start with None
+ ElectionAbortReason _abortReason;
};
+ FreshnessChecker();
+ virtual ~FreshnessChecker();
+
+ /**
+ * Begins the process of sending replSetFresh commands to all non-DOWN nodes
+ * in currentConfig, with the intention of determining whether the current node
+ * is freshest.
+ * evh can be used to schedule a callback when the process is complete.
+ * This function must be run in the executor, as it must be synchronous with the command
+ * callbacks that it schedules.
+ * If this function returns Status::OK(), evh is then guaranteed to be signaled.
+ **/
+ StatusWith<ReplicationExecutor::EventHandle> start(
+ ReplicationExecutor* executor,
+ const OpTime& lastOpTimeApplied,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& targets,
+ const stdx::function<void()>& onCompletion = stdx::function<void()>());
+
+ /**
+ * Informs the freshness checker to cancel further processing. The "executor"
+ * argument must point to the same executor passed to "start()".
+ *
+ * Like start(), this method must run in the executor context.
+ */
+ void cancel(ReplicationExecutor* executor);
+
+ /**
+ * Returns true if cancel() was called on this instance.
+ */
+ bool isCanceled() const {
+ return _isCanceled;
+ }
+
+ /**
+ * 'None' if the election should continue, otherwise the reason to abort
+ */
+ ElectionAbortReason shouldAbortElection() const;
+
+ /**
+ * Returns the config version supplied in the config when start() was called.
+ * Useful for determining if the the config version has changed.
+ */
+ long long getOriginalConfigVersion() const;
+
+private:
+ boost::scoped_ptr<Algorithm> _algorithm;
+ boost::scoped_ptr<ScatterGatherRunner> _runner;
+ long long _originalConfigVersion;
+ bool _isCanceled;
+};
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/freshness_checker_test.cpp b/src/mongo/db/repl/freshness_checker_test.cpp
index 362b4746606..36d8f00ca1f 100644
--- a/src/mongo/db/repl/freshness_checker_test.cpp
+++ b/src/mongo/db/repl/freshness_checker_test.cpp
@@ -49,1029 +49,987 @@ namespace mongo {
namespace repl {
namespace {
- using unittest::assertGet;
-
- typedef ReplicationExecutor::RemoteCommandRequest RemoteCommandRequest;
-
- bool stringContains(const std::string &haystack, const std::string& needle) {
- return haystack.find(needle) != std::string::npos;
+using unittest::assertGet;
+
+typedef ReplicationExecutor::RemoteCommandRequest RemoteCommandRequest;
+
+bool stringContains(const std::string& haystack, const std::string& needle) {
+ return haystack.find(needle) != std::string::npos;
+}
+
+class FreshnessCheckerTest : public mongo::unittest::Test {
+protected:
+ void startTest(const OpTime& lastOpTimeApplied,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts);
+ void waitOnChecker();
+ FreshnessChecker::ElectionAbortReason shouldAbortElection() const;
+
+ int64_t countLogLinesContaining(const std::string& needle) {
+ return std::count_if(getCapturedLogMessages().begin(),
+ getCapturedLogMessages().end(),
+ stdx::bind(stringContains, stdx::placeholders::_1, needle));
}
- class FreshnessCheckerTest : public mongo::unittest::Test {
- protected:
- void startTest(const OpTime& lastOpTimeApplied,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts);
- void waitOnChecker();
- FreshnessChecker::ElectionAbortReason shouldAbortElection() const;
-
- int64_t countLogLinesContaining(const std::string& needle) {
- return std::count_if(getCapturedLogMessages().begin(),
- getCapturedLogMessages().end(),
- stdx::bind(stringContains,
- stdx::placeholders::_1,
- needle));
- }
-
- NetworkInterfaceMock* _net;
- boost::scoped_ptr<ReplicationExecutor> _executor;
- boost::scoped_ptr<boost::thread> _executorThread;
-
- private:
- void freshnessCheckerRunner(const ReplicationExecutor::CallbackData& data,
- const OpTime& lastOpTimeApplied,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts);
- void setUp();
- void tearDown();
-
- boost::scoped_ptr<FreshnessChecker> _checker;
- ReplicationExecutor::EventHandle _checkerDoneEvent;
- };
-
- void FreshnessCheckerTest::setUp() {
- _net = new NetworkInterfaceMock;
- _executor.reset(new ReplicationExecutor(_net, 1 /* prng seed */));
- _executorThread.reset(new boost::thread(stdx::bind(&ReplicationExecutor::run,
- _executor.get())));
- _checker.reset(new FreshnessChecker);
+ NetworkInterfaceMock* _net;
+ boost::scoped_ptr<ReplicationExecutor> _executor;
+ boost::scoped_ptr<boost::thread> _executorThread;
+
+private:
+ void freshnessCheckerRunner(const ReplicationExecutor::CallbackData& data,
+ const OpTime& lastOpTimeApplied,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts);
+ void setUp();
+ void tearDown();
+
+ boost::scoped_ptr<FreshnessChecker> _checker;
+ ReplicationExecutor::EventHandle _checkerDoneEvent;
+};
+
+void FreshnessCheckerTest::setUp() {
+ _net = new NetworkInterfaceMock;
+ _executor.reset(new ReplicationExecutor(_net, 1 /* prng seed */));
+ _executorThread.reset(
+ new boost::thread(stdx::bind(&ReplicationExecutor::run, _executor.get())));
+ _checker.reset(new FreshnessChecker);
+}
+
+void FreshnessCheckerTest::tearDown() {
+ _executor->shutdown();
+ _executorThread->join();
+}
+
+void FreshnessCheckerTest::waitOnChecker() {
+ _executor->waitForEvent(_checkerDoneEvent);
+}
+
+FreshnessChecker::ElectionAbortReason FreshnessCheckerTest::shouldAbortElection() const {
+ return _checker->shouldAbortElection();
+}
+
+ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(configBson));
+ ASSERT_OK(config.validate());
+ return config;
+}
+
+const BSONObj makeFreshRequest(const ReplicaSetConfig& rsConfig,
+ OpTime lastOpTimeApplied,
+ int selfIndex) {
+ const MemberConfig& myConfig = rsConfig.getMemberAt(selfIndex);
+ return BSON("replSetFresh" << 1 << "set" << rsConfig.getReplSetName() << "opTime"
+ << Date_t(lastOpTimeApplied.asDate()) << "who"
+ << myConfig.getHostAndPort().toString() << "cfgver"
+ << rsConfig.getConfigVersion() << "id" << myConfig.getId());
+}
+
+// This is necessary because the run method must be scheduled in the Replication Executor
+// for correct concurrency operation.
+void FreshnessCheckerTest::freshnessCheckerRunner(const ReplicationExecutor::CallbackData& data,
+ const OpTime& lastOpTimeApplied,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts) {
+ invariant(data.status.isOK());
+ StatusWith<ReplicationExecutor::EventHandle> evh =
+ _checker->start(data.executor, lastOpTimeApplied, currentConfig, selfIndex, hosts);
+ _checkerDoneEvent = assertGet(evh);
+}
+
+void FreshnessCheckerTest::startTest(const OpTime& lastOpTimeApplied,
+ const ReplicaSetConfig& currentConfig,
+ int selfIndex,
+ const std::vector<HostAndPort>& hosts) {
+ _executor->wait(
+ assertGet(_executor->scheduleWork(stdx::bind(&FreshnessCheckerTest::freshnessCheckerRunner,
+ this,
+ stdx::placeholders::_1,
+ lastOpTimeApplied,
+ currentConfig,
+ selfIndex,
+ hosts))));
+}
+
+TEST_F(FreshnessCheckerTest, TwoNodes) {
+ // Two nodes, we are node h1. We are freshest, but we tie with h2.
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+ const BSONObj freshRequest = makeFreshRequest(config, OpTime(0, 0), 0);
+
+ startTest(OpTime(0, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ _net->scheduleResponse(
+ noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1 << "id" << 2 << "set"
+ << "rs0"
+ << "who"
+ << "h1"
+ << "cfgver" << 1 << "opTime" << Date_t(OpTime(0, 0).asDate())),
+ Milliseconds(8))));
}
-
- void FreshnessCheckerTest::tearDown() {
- _executor->shutdown();
- _executorThread->join();
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + 10, _net->now());
+ waitOnChecker();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FreshnessTie);
+}
+
+TEST_F(FreshnessCheckerTest, ShuttingDown) {
+ // Two nodes, we are node h1. Shutdown happens while we're scheduling remote commands.
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ startTest(OpTime(0, 0), config, 0, hosts);
+ _executor->shutdown();
+ waitOnChecker();
+
+ // This seems less than ideal, but if we are shutting down, the next phase of election
+ // cannot proceed anyway.
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::None);
+}
+
+TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshest) {
+ // other responds as fresher than us
+ startCapturingLogMessages();
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ const BSONObj freshRequest = makeFreshRequest(config, OpTime(10, 0), 0);
+
+ startTest(OpTime(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1 << "id" << 2 << "set"
+ << "rs0"
+ << "who"
+ << "h1"
+ << "cfgver" << 1 << "fresher" << true << "opTime"
+ << Date_t(OpTime(0, 0).asDate())),
+ Milliseconds(8))));
}
-
- void FreshnessCheckerTest::waitOnChecker() {
- _executor->waitForEvent(_checkerDoneEvent);
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + 10, _net->now());
+ waitOnChecker();
+
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1, countLogLinesContaining("not electing self, we are not freshest"));
+}
+
+TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshestOpTime) {
+ // other responds with a later optime than ours
+ startCapturingLogMessages();
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ const BSONObj freshRequest = makeFreshRequest(config, OpTime(0, 0), 0);
+
+ startTest(OpTime(0, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ _net->scheduleResponse(
+ noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1 << "id" << 2 << "set"
+ << "rs0"
+ << "who"
+ << "h1"
+ << "cfgver" << 1 << "opTime" << Date_t(OpTime(10, 0).asDate())),
+ Milliseconds(8))));
}
-
- FreshnessChecker::ElectionAbortReason FreshnessCheckerTest::shouldAbortElection() const {
- return _checker->shouldAbortElection();
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + 10, _net->now());
+ waitOnChecker();
+
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
+
+TEST_F(FreshnessCheckerTest, ElectWrongTypeInFreshnessResponse) {
+ // other responds with "opTime" field of non-Date value, causing not freshest
+ startCapturingLogMessages();
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ const BSONObj freshRequest = makeFreshRequest(config, OpTime(10, 0), 0);
+
+ startTest(OpTime(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1 << "id" << 2 << "set"
+ << "rs0"
+ << "who"
+ << "h1"
+ << "cfgver" << 1 << "opTime" << 3),
+ Milliseconds(8))));
}
-
- ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBson) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(configBson));
- ASSERT_OK(config.validate());
- return config;
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + 10, _net->now());
+ waitOnChecker();
+
+ stopCapturingLogMessages();
+
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "wrong type for opTime argument in replSetFresh "
+ "response: NumberInt32"));
+}
+
+TEST_F(FreshnessCheckerTest, ElectVetoed) {
+ // other responds with veto
+ startCapturingLogMessages();
+ ReplicaSetConfig config = assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1"))));
+
+ std::vector<HostAndPort> hosts;
+ hosts.push_back(config.getMemberAt(1).getHostAndPort());
+
+ const BSONObj freshRequest = makeFreshRequest(config, OpTime(10, 0), 0);
+
+ startTest(OpTime(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1 << "id" << 2 << "set"
+ << "rs0"
+ << "who"
+ << "h1"
+ << "cfgver" << 1 << "veto" << true << "errmsg"
+ << "I'd rather you didn't"
+ << "opTime" << Date_t(OpTime(0, 0).asDate())),
+ Milliseconds(8))));
}
-
- const BSONObj makeFreshRequest(const ReplicaSetConfig& rsConfig,
- OpTime lastOpTimeApplied,
- int selfIndex) {
- const MemberConfig& myConfig = rsConfig.getMemberAt(selfIndex);
- return BSON("replSetFresh" << 1 <<
- "set" << rsConfig.getReplSetName() <<
- "opTime" << Date_t(lastOpTimeApplied.asDate()) <<
- "who" << myConfig.getHostAndPort().toString() <<
- "cfgver" << rsConfig.getConfigVersion() <<
- "id" << myConfig.getId());
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + 10, _net->now());
+ waitOnChecker();
+
+ stopCapturingLogMessages();
+
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "not electing self, h1:27017 would veto with "
+ "'I'd rather you didn't'"));
+}
+
+int findIdForMember(const ReplicaSetConfig& rsConfig, const HostAndPort& host) {
+ const MemberConfig* member = rsConfig.findMemberByHostAndPort(host);
+ ASSERT_TRUE(member != NULL) << "No host named " << host.toString() << " in config";
+ return member->getId();
+}
+
+TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshestManyNodes) {
+ // one other responds as fresher than us
+ startCapturingLogMessages();
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1") << BSON("_id" << 3 << "host"
+ << "h2")
+ << BSON("_id" << 4 << "host"
+ << "h3") << BSON("_id" << 5 << "host"
+ << "h4"))));
+
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin(); mem != config.membersEnd();
+ ++mem) {
+ hosts.push_back(mem->getHostAndPort());
}
- // This is necessary because the run method must be scheduled in the Replication Executor
- // for correct concurrency operation.
- void FreshnessCheckerTest::freshnessCheckerRunner(
- const ReplicationExecutor::CallbackData& data,
- const OpTime& lastOpTimeApplied,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts) {
-
- invariant(data.status.isOK());
- StatusWith<ReplicationExecutor::EventHandle> evh = _checker->start(data.executor,
- lastOpTimeApplied,
- currentConfig,
- selfIndex,
- hosts);
- _checkerDoneEvent = assertGet(evh);
+ const BSONObj freshRequest = makeFreshRequest(config, OpTime(10, 0), 0);
+
+ startTest(OpTime(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ unordered_set<HostAndPort> seen;
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const HostAndPort target = noi->getRequest().target;
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT(seen.insert(target).second) << "Already saw " << target;
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "opTime"
+ << Date_t(OpTime(0, 0).asDate());
+ if (target.host() == "h1") {
+ responseBuilder << "fresher" << true;
+ }
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ responseBuilder.obj(), Milliseconds(8))));
}
-
- void FreshnessCheckerTest::startTest(const OpTime& lastOpTimeApplied,
- const ReplicaSetConfig& currentConfig,
- int selfIndex,
- const std::vector<HostAndPort>& hosts) {
- _executor->wait(
- assertGet(
- _executor->scheduleWork(
- stdx::bind(&FreshnessCheckerTest::freshnessCheckerRunner,
- this,
- stdx::placeholders::_1,
- lastOpTimeApplied,
- currentConfig,
- selfIndex,
- hosts))));
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + 10, _net->now());
+ waitOnChecker();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1, countLogLinesContaining("not electing self, we are not freshest"));
+}
+
+TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshestOpTimeManyNodes) {
+ // one other responds with a later optime than ours
+ startCapturingLogMessages();
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1") << BSON("_id" << 3 << "host"
+ << "h2")
+ << BSON("_id" << 4 << "host"
+ << "h3") << BSON("_id" << 5 << "host"
+ << "h4"))));
+
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = config.membersBegin(); mem != config.membersEnd();
+ ++mem) {
+ if (HostAndPort("h0") == mem->getHostAndPort()) {
+ continue;
+ }
+ hosts.push_back(mem->getHostAndPort());
}
- TEST_F(FreshnessCheckerTest, TwoNodes) {
- // Two nodes, we are node h1. We are freshest, but we tie with h2.
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
- const BSONObj freshRequest = makeFreshRequest(config, OpTime(0,0), 0);
-
- startTest(OpTime(0, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
- _net->scheduleResponse(
- noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1 <<
- "id" << 2 <<
- "set" << "rs0" <<
- "who" << "h1" <<
- "cfgver" << 1 <<
- "opTime" << Date_t(OpTime(0,0).asDate())),
- Milliseconds(8))));
+ const BSONObj freshRequest = makeFreshRequest(config, OpTime(10, 0), 0);
+
+ startTest(OpTime(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ unordered_set<HostAndPort> seen;
+ _net->enterNetwork();
+
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const HostAndPort target = noi->getRequest().target;
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT(seen.insert(target).second) << "Already saw " << target;
+ BSONObjBuilder responseBuilder;
+ if (target.host() == "h4") {
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "opTime"
+ << Date_t(OpTime(20, 0).asDate());
+ _net->scheduleResponse(noi,
+ startDate + 20,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ responseBuilder.obj(), Milliseconds(8))));
+ } else {
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "opTime"
+ << Date_t(OpTime(10, 0).asDate());
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ responseBuilder.obj(), Milliseconds(8))));
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + 10, _net->now());
- waitOnChecker();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FreshnessTie);
}
-
- TEST_F(FreshnessCheckerTest, ShuttingDown) {
- // Two nodes, we are node h1. Shutdown happens while we're scheduling remote commands.
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- startTest(
- OpTime(0, 0),
- config,
- 0,
- hosts);
- _executor->shutdown();
- waitOnChecker();
-
- // This seems less than ideal, but if we are shutting down, the next phase of election
- // cannot proceed anyway.
- ASSERT_EQUALS(shouldAbortElection(),FreshnessChecker::None);
-
+ _net->runUntil(startDate + 10);
+ ASSERT_EQUALS(startDate + 10, _net->now());
+ ASSERT_EQUALS(0, countLogLinesContaining("not electing self, we are not freshest"));
+ _net->runUntil(startDate + 20);
+ ASSERT_EQUALS(startDate + 20, _net->now());
+ _net->exitNetwork();
+ waitOnChecker();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
+
+TEST_F(FreshnessCheckerTest, ElectWrongTypeInFreshnessResponseManyNodes) {
+ // one other responds with "opTime" field of non-Date value, causing not freshest
+ startCapturingLogMessages();
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1") << BSON("_id" << 3 << "host"
+ << "h2")
+ << BSON("_id" << 4 << "host"
+ << "h3") << BSON("_id" << 5 << "host"
+ << "h4"))));
+
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin(); mem != config.membersEnd();
+ ++mem) {
+ hosts.push_back(mem->getHostAndPort());
}
- TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshest) {
- // other responds as fresher than us
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- const BSONObj freshRequest = makeFreshRequest(config, OpTime(10,0), 0);
-
- startTest(OpTime(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
- _net->scheduleResponse(
- noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1 <<
- "id" << 2 <<
- "set" << "rs0" <<
- "who" << "h1" <<
- "cfgver" << 1 <<
- "fresher" << true <<
- "opTime" << Date_t(OpTime(0,0).asDate())),
- Milliseconds(8))));
+ const BSONObj freshRequest = makeFreshRequest(config, OpTime(10, 0), 0);
+
+ startTest(OpTime(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ unordered_set<HostAndPort> seen;
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const HostAndPort target = noi->getRequest().target;
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT(seen.insert(target).second) << "Already saw " << target;
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1;
+ if (target.host() == "h1") {
+ responseBuilder << "opTime" << 3;
+ } else {
+ responseBuilder << "opTime" << Date_t(OpTime(0, 0).asDate());
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + 10, _net->now());
- waitOnChecker();
-
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("not electing self, we are not freshest"));
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ responseBuilder.obj(), Milliseconds(8))));
+ }
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + 10, _net->now());
+ waitOnChecker();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "wrong type for opTime argument in replSetFresh "
+ "response: NumberInt32"));
+}
+
+TEST_F(FreshnessCheckerTest, ElectVetoedManyNodes) {
+ // one other responds with veto
+ startCapturingLogMessages();
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1") << BSON("_id" << 3 << "host"
+ << "h2")
+ << BSON("_id" << 4 << "host"
+ << "h3") << BSON("_id" << 5 << "host"
+ << "h4"))));
+
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin(); mem != config.membersEnd();
+ ++mem) {
+ hosts.push_back(mem->getHostAndPort());
}
- TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshestOpTime) {
- // other responds with a later optime than ours
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- const BSONObj freshRequest = makeFreshRequest(config, OpTime(0,0), 0);
-
- startTest(OpTime(0, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
- _net->scheduleResponse(
- noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1 <<
- "id" << 2 <<
- "set" << "rs0" <<
- "who" << "h1" <<
- "cfgver" << 1 <<
- "opTime" << Date_t(OpTime(10,0).asDate())),
- Milliseconds(8))));
+ const BSONObj freshRequest = makeFreshRequest(config, OpTime(10, 0), 0);
+
+ startTest(OpTime(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ unordered_set<HostAndPort> seen;
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const HostAndPort target = noi->getRequest().target;
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT(seen.insert(target).second) << "Already saw " << target;
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "opTime"
+ << Date_t(OpTime(0, 0).asDate());
+ if (target.host() == "h1") {
+ responseBuilder << "veto" << true << "errmsg"
+ << "I'd rather you didn't";
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + 10, _net->now());
- waitOnChecker();
-
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ responseBuilder.obj(), Milliseconds(8))));
}
-
- TEST_F(FreshnessCheckerTest, ElectWrongTypeInFreshnessResponse) {
- // other responds with "opTime" field of non-Date value, causing not freshest
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- const BSONObj freshRequest = makeFreshRequest(config, OpTime(10,0), 0);
-
- startTest(OpTime(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
- _net->scheduleResponse(
- noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1 <<
- "id" << 2 <<
- "set" << "rs0" <<
- "who" << "h1" <<
- "cfgver" << 1 <<
- "opTime" << 3),
- Milliseconds(8))));
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + 10, _net->now());
+ waitOnChecker();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "not electing self, h1:27017 would veto with "
+ "'I'd rather you didn't'"));
+}
+
+TEST_F(FreshnessCheckerTest, ElectVetoedAndTiedFreshnessManyNodes) {
+ // one other responds with veto and another responds with tie
+ startCapturingLogMessages();
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1") << BSON("_id" << 3 << "host"
+ << "h2")
+ << BSON("_id" << 4 << "host"
+ << "h3") << BSON("_id" << 5 << "host"
+ << "h4"))));
+
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = config.membersBegin(); mem != config.membersEnd();
+ ++mem) {
+ if (HostAndPort("h0") == mem->getHostAndPort()) {
+ continue;
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + 10, _net->now());
- waitOnChecker();
-
- stopCapturingLogMessages();
-
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("wrong type for opTime argument in replSetFresh "
- "response: NumberInt32"));
+ hosts.push_back(mem->getHostAndPort());
}
- TEST_F(FreshnessCheckerTest, ElectVetoed) {
- // other responds with veto
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1"))));
-
- std::vector<HostAndPort> hosts;
- hosts.push_back(config.getMemberAt(1).getHostAndPort());
-
- const BSONObj freshRequest = makeFreshRequest(config, OpTime(10,0), 0);
-
- startTest(OpTime(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT_EQUALS(HostAndPort("h1"), noi->getRequest().target);
- _net->scheduleResponse(
- noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1 <<
- "id" << 2 <<
- "set" << "rs0" <<
- "who" << "h1" <<
- "cfgver" << 1 <<
- "veto" << true <<
- "errmsg" << "I'd rather you didn't" <<
- "opTime" << Date_t(OpTime(0,0).asDate())),
- Milliseconds(8))));
+ const BSONObj freshRequest = makeFreshRequest(config, OpTime(10, 0), 0);
+
+ startTest(OpTime(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ unordered_set<HostAndPort> seen;
+ _net->enterNetwork();
+
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const HostAndPort target = noi->getRequest().target;
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT(seen.insert(target).second) << "Already saw " << target;
+ BSONObjBuilder responseBuilder;
+ if (target.host() == "h4") {
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "veto" << true
+ << "errmsg"
+ << "I'd rather you didn't"
+ << "opTime" << Date_t(OpTime(10, 0).asDate());
+ _net->scheduleResponse(noi,
+ startDate + 20,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ responseBuilder.obj(), Milliseconds(8))));
+ } else {
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "opTime"
+ << Date_t(OpTime(10, 0).asDate());
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ responseBuilder.obj(), Milliseconds(8))));
}
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + 10, _net->now());
- waitOnChecker();
-
- stopCapturingLogMessages();
-
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("not electing self, h1:27017 would veto with "
- "'I'd rather you didn't'"));
}
-
- int findIdForMember(const ReplicaSetConfig& rsConfig, const HostAndPort& host) {
- const MemberConfig* member = rsConfig.findMemberByHostAndPort(host);
- ASSERT_TRUE(member != NULL) << "No host named " << host.toString() << " in config";
- return member->getId();
+ _net->runUntil(startDate + 10);
+ ASSERT_EQUALS(startDate + 10, _net->now());
+ ASSERT_EQUALS(0,
+ countLogLinesContaining(
+ "not electing self, h4:27017 would veto with '"
+ "errmsg: \"I'd rather you didn't\"'"));
+ _net->runUntil(startDate + 20);
+ ASSERT_EQUALS(startDate + 20, _net->now());
+ _net->exitNetwork();
+ waitOnChecker();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "not electing self, h4:27017 would veto with "
+ "'I'd rather you didn't'"));
+}
+
+TEST_F(FreshnessCheckerTest, ElectManyNodesNotAllRespond) {
+ ReplicaSetConfig config =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h0")
+ << BSON("_id" << 2 << "host"
+ << "h1") << BSON("_id" << 3 << "host"
+ << "h2")
+ << BSON("_id" << 4 << "host"
+ << "h3") << BSON("_id" << 5 << "host"
+ << "h4"))));
+
+ std::vector<HostAndPort> hosts;
+ for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin(); mem != config.membersEnd();
+ ++mem) {
+ hosts.push_back(mem->getHostAndPort());
}
- TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshestManyNodes) {
- // one other responds as fresher than us
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1") <<
- BSON("_id" << 3 << "host" << "h2") <<
- BSON("_id" << 4 << "host" << "h3") <<
- BSON("_id" << 5 << "host" << "h4"))));
-
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- hosts.push_back(mem->getHostAndPort());
- }
-
- const BSONObj freshRequest = makeFreshRequest(config, OpTime(10,0), 0);
-
- startTest(OpTime(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- unordered_set<HostAndPort> seen;
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const HostAndPort target = noi->getRequest().target;
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT(seen.insert(target).second) << "Already saw " << target;
- BSONObjBuilder responseBuilder;
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "opTime" << Date_t(OpTime(0,0).asDate());
- if (target.host() == "h1") {
- responseBuilder << "fresher" << true;
- }
+ const OpTime lastOpTimeApplied(10, 0);
+ const BSONObj freshRequest = makeFreshRequest(config, lastOpTimeApplied, 0);
+
+ startTest(OpTime(10, 0), config, 0, hosts);
+ const Date_t startDate = _net->now();
+ unordered_set<HostAndPort> seen;
+ _net->enterNetwork();
+ for (size_t i = 0; i < hosts.size(); ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const HostAndPort target = noi->getRequest().target;
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
+ ASSERT(seen.insert(target).second) << "Already saw " << target;
+ if (target.host() == "h2" || target.host() == "h3") {
_net->scheduleResponse(
- noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + 10, _net->now());
- waitOnChecker();
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("not electing self, we are not freshest"));
- }
-
- TEST_F(FreshnessCheckerTest, ElectNotElectingSelfWeAreNotFreshestOpTimeManyNodes) {
- // one other responds with a later optime than ours
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1") <<
- BSON("_id" << 3 << "host" << "h2") <<
- BSON("_id" << 4 << "host" << "h3") <<
- BSON("_id" << 5 << "host" << "h4"))));
-
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- if (HostAndPort("h0") == mem->getHostAndPort()) {
- continue;
- }
- hosts.push_back(mem->getHostAndPort());
- }
-
- const BSONObj freshRequest = makeFreshRequest(config, OpTime(10,0), 0);
-
- startTest(OpTime(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- unordered_set<HostAndPort> seen;
- _net->enterNetwork();
-
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const HostAndPort target = noi->getRequest().target;
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT(seen.insert(target).second) << "Already saw " << target;
+ noi, startDate + 10, ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
+ } else {
BSONObjBuilder responseBuilder;
- if (target.host() == "h4") {
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "opTime" << Date_t(OpTime(20,0).asDate());
- _net->scheduleResponse(
- noi,
- startDate + 20,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- else {
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "opTime" << Date_t(OpTime(10,0).asDate());
- _net->scheduleResponse(
- noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
+ responseBuilder << "ok" << 1 << "id" << findIdForMember(config, target) << "set"
+ << "rs0"
+ << "who" << target.toString() << "cfgver" << 1 << "opTime"
+ << Date_t(OpTime(0, 0).asDate());
+ _net->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ responseBuilder.obj(), Milliseconds(8))));
}
- _net->runUntil(startDate + 10);
- ASSERT_EQUALS(startDate + 10, _net->now());
- ASSERT_EQUALS(0, countLogLinesContaining("not electing self, we are not freshest"));
- _net->runUntil(startDate + 20);
- ASSERT_EQUALS(startDate + 20, _net->now());
- _net->exitNetwork();
- waitOnChecker();
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
}
+ _net->runUntil(startDate + 10);
+ _net->exitNetwork();
+ ASSERT_EQUALS(startDate + 10, _net->now());
+ waitOnChecker();
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::None);
+}
+
+class FreshnessScatterGatherTest : public mongo::unittest::Test {
+public:
+ virtual void setUp() {
+ int selfConfigIndex = 0;
+ OpTime lastOpTimeApplied(100, 0);
- TEST_F(FreshnessCheckerTest, ElectWrongTypeInFreshnessResponseManyNodes) {
- // one other responds with "opTime" field of non-Date value, causing not freshest
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1") <<
- BSON("_id" << 3 << "host" << "h2") <<
- BSON("_id" << 4 << "host" << "h3") <<
- BSON("_id" << 5 << "host" << "h4"))));
+ ReplicaSetConfig config;
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0")
+ << BSON("_id" << 1 << "host"
+ << "host1") << BSON("_id" << 2 << "host"
+ << "host2"))));
std::vector<HostAndPort> hosts;
for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
+ mem != config.membersEnd();
+ ++mem) {
hosts.push_back(mem->getHostAndPort());
}
- const BSONObj freshRequest = makeFreshRequest(config, OpTime(10,0), 0);
-
- startTest(OpTime(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- unordered_set<HostAndPort> seen;
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const HostAndPort target = noi->getRequest().target;
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT(seen.insert(target).second) << "Already saw " << target;
- BSONObjBuilder responseBuilder;
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1;
- if (target.host() == "h1") {
- responseBuilder << "opTime" << 3;
- }
- else {
- responseBuilder << "opTime" << Date_t(OpTime(0,0).asDate());
- }
- _net->scheduleResponse(
- noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + 10, _net->now());
- waitOnChecker();
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("wrong type for opTime argument in replSetFresh "
- "response: NumberInt32"));
+ _checker.reset(
+ new FreshnessChecker::Algorithm(lastOpTimeApplied, config, selfConfigIndex, hosts));
}
- TEST_F(FreshnessCheckerTest, ElectVetoedManyNodes) {
- // one other responds with veto
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1") <<
- BSON("_id" << 3 << "host" << "h2") <<
- BSON("_id" << 4 << "host" << "h3") <<
- BSON("_id" << 5 << "host" << "h4"))));
+ virtual void tearDown() {
+ _checker.reset(NULL);
+ }
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- hosts.push_back(mem->getHostAndPort());
- }
+protected:
+ bool hasReceivedSufficientResponses() {
+ return _checker->hasReceivedSufficientResponses();
+ }
- const BSONObj freshRequest = makeFreshRequest(config, OpTime(10,0), 0);
-
- startTest(OpTime(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- unordered_set<HostAndPort> seen;
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const HostAndPort target = noi->getRequest().target;
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT(seen.insert(target).second) << "Already saw " << target;
- BSONObjBuilder responseBuilder;
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "opTime" << Date_t(OpTime(0,0).asDate());
- if (target.host() == "h1") {
- responseBuilder << "veto" << true << "errmsg" << "I'd rather you didn't";
- }
- _net->scheduleResponse(
- noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + 10, _net->now());
- waitOnChecker();
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("not electing self, h1:27017 would veto with "
- "'I'd rather you didn't'"));
+ void processResponse(const RemoteCommandRequest& request, const ResponseStatus& response) {
+ _checker->processResponse(request, response);
}
- TEST_F(FreshnessCheckerTest, ElectVetoedAndTiedFreshnessManyNodes) {
- // one other responds with veto and another responds with tie
- startCapturingLogMessages();
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1") <<
- BSON("_id" << 3 << "host" << "h2") <<
- BSON("_id" << 4 << "host" << "h3") <<
- BSON("_id" << 5 << "host" << "h4"))));
-
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- if (HostAndPort("h0") == mem->getHostAndPort()) {
- continue;
- }
- hosts.push_back(mem->getHostAndPort());
- }
+ FreshnessChecker::ElectionAbortReason shouldAbortElection() const {
+ return _checker->shouldAbortElection();
+ }
- const BSONObj freshRequest = makeFreshRequest(config, OpTime(10,0), 0);
+ ResponseStatus lessFresh() {
+ BSONObjBuilder bb;
+ bb.append("ok", 1.0);
+ bb.appendDate("opTime", OpTime(10, 0).asDate());
+ return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
+ }
- startTest(OpTime(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- unordered_set<HostAndPort> seen;
- _net->enterNetwork();
+ ResponseStatus moreFreshViaOpTime() {
+ BSONObjBuilder bb;
+ bb.append("ok", 1.0);
+ bb.appendDate("opTime", OpTime(110, 0).asDate());
+ return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
+ }
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const HostAndPort target = noi->getRequest().target;
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT(seen.insert(target).second) << "Already saw " << target;
- BSONObjBuilder responseBuilder;
- if (target.host() == "h4") {
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "veto" << true <<
- "errmsg" << "I'd rather you didn't" <<
- "opTime" << Date_t(OpTime(10,0).asDate());
- _net->scheduleResponse(
- noi,
- startDate + 20,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- else {
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "opTime" << Date_t(OpTime(10,0).asDate());
- _net->scheduleResponse(
- noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- }
- _net->runUntil(startDate + 10);
- ASSERT_EQUALS(startDate + 10, _net->now());
- ASSERT_EQUALS(0, countLogLinesContaining("not electing self, h4:27017 would veto with '"
- "errmsg: \"I'd rather you didn't\"'"));
- _net->runUntil(startDate + 20);
- ASSERT_EQUALS(startDate + 20, _net->now());
- _net->exitNetwork();
- waitOnChecker();
- stopCapturingLogMessages();
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- ASSERT_EQUALS(1, countLogLinesContaining("not electing self, h4:27017 would veto with "
- "'I'd rather you didn't'"));
+ ResponseStatus wrongTypeForOpTime() {
+ BSONObjBuilder bb;
+ bb.append("ok", 1.0);
+ bb.append("opTime", std::string("several minutes ago"));
+ return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
}
- TEST_F(FreshnessCheckerTest, ElectManyNodesNotAllRespond) {
- ReplicaSetConfig config = assertMakeRSConfig(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h0") <<
- BSON("_id" << 2 << "host" << "h1") <<
- BSON("_id" << 3 << "host" << "h2") <<
- BSON("_id" << 4 << "host" << "h3") <<
- BSON("_id" << 5 << "host" << "h4"))));
+ ResponseStatus unauthorized() {
+ BSONObjBuilder bb;
+ bb.append("ok", 0.0);
+ bb.append("code", ErrorCodes::Unauthorized);
+ bb.append("errmsg", "Unauthorized");
+ return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
+ }
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- hosts.push_back(mem->getHostAndPort());
- }
+ ResponseStatus tiedForFreshness() {
+ BSONObjBuilder bb;
+ bb.append("ok", 1.0);
+ bb.appendDate("opTime", OpTime(100, 0).asDate());
+ return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
+ }
- const OpTime lastOpTimeApplied(10,0);
- const BSONObj freshRequest = makeFreshRequest(config, lastOpTimeApplied, 0);
-
- startTest(OpTime(10, 0), config, 0, hosts);
- const Date_t startDate = _net->now();
- unordered_set<HostAndPort> seen;
- _net->enterNetwork();
- for (size_t i = 0; i < hosts.size(); ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
- const HostAndPort target = noi->getRequest().target;
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(freshRequest, noi->getRequest().cmdObj);
- ASSERT(seen.insert(target).second) << "Already saw " << target;
- if (target.host() == "h2" || target.host() == "h3") {
- _net->scheduleResponse(
- noi,
- startDate + 10,
- ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
- }
- else {
- BSONObjBuilder responseBuilder;
- responseBuilder <<
- "ok" << 1 <<
- "id" << findIdForMember(config, target) <<
- "set" << "rs0" <<
- "who" << target.toString() <<
- "cfgver" << 1 <<
- "opTime" << Date_t(OpTime(0,0).asDate());
- _net->scheduleResponse(
- noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- responseBuilder.obj(),
- Milliseconds(8))));
- }
- }
- _net->runUntil(startDate + 10);
- _net->exitNetwork();
- ASSERT_EQUALS(startDate + 10, _net->now());
- waitOnChecker();
- ASSERT_EQUALS(shouldAbortElection(),FreshnessChecker::None);
+ ResponseStatus moreFresh() {
+ return ResponseStatus(NetworkInterfaceMock::Response(BSON("ok" << 1.0 << "fresher" << true),
+ Milliseconds(10)));
}
- class FreshnessScatterGatherTest : public mongo::unittest::Test {
- public:
- virtual void setUp() {
- int selfConfigIndex = 0;
- OpTime lastOpTimeApplied(100, 0);
-
- ReplicaSetConfig config;
- config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host0") <<
- BSON("_id" << 1 << "host" << "host1") <<
- BSON("_id" << 2 << "host" << "host2"))));
-
- std::vector<HostAndPort> hosts;
- for (ReplicaSetConfig::MemberIterator mem = ++config.membersBegin();
- mem != config.membersEnd();
- ++mem) {
- hosts.push_back(mem->getHostAndPort());
- }
-
- _checker.reset(new FreshnessChecker::Algorithm(lastOpTimeApplied,
- config,
- selfConfigIndex,
- hosts));
+ ResponseStatus veto() {
+ return ResponseStatus(
+ NetworkInterfaceMock::Response(BSON("ok" << 1.0 << "veto" << true << "errmsg"
+ << "vetoed!"),
+ Milliseconds(10)));
+ }
- }
+ RemoteCommandRequest requestFrom(std::string hostname) {
+ return RemoteCommandRequest(HostAndPort(hostname),
+ "", // the non-hostname fields do not matter in Freshness
+ BSONObj(),
+ Milliseconds(0));
+ }
- virtual void tearDown() {
- _checker.reset(NULL);
- }
+private:
+ scoped_ptr<FreshnessChecker::Algorithm> _checker;
+};
- protected:
- bool hasReceivedSufficientResponses() {
- return _checker->hasReceivedSufficientResponses();
- }
+TEST_F(FreshnessScatterGatherTest, BothNodesLessFresh) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- void processResponse(const RemoteCommandRequest& request, const ResponseStatus& response) {
- _checker->processResponse(request, response);
- }
+ processResponse(requestFrom("host1"), lessFresh());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- FreshnessChecker::ElectionAbortReason shouldAbortElection() const {
- return _checker->shouldAbortElection();
- }
+ processResponse(requestFrom("host2"), lessFresh());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::None);
+}
- ResponseStatus lessFresh() {
- BSONObjBuilder bb;
- bb.append("ok", 1.0);
- bb.appendDate("opTime", OpTime(10, 0).asDate());
- return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeFresher) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- ResponseStatus moreFreshViaOpTime() {
- BSONObjBuilder bb;
- bb.append("ok", 1.0);
- bb.appendDate("opTime", OpTime(110, 0).asDate());
- return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
- }
+ processResponse(requestFrom("host1"), moreFresh());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- ResponseStatus wrongTypeForOpTime() {
- BSONObjBuilder bb;
- bb.append("ok", 1.0);
- bb.append("opTime", std::string("several minutes ago"));
- return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeFresherViaOpTime) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- ResponseStatus unauthorized() {
- BSONObjBuilder bb;
- bb.append("ok", 0.0);
- bb.append("code", ErrorCodes::Unauthorized);
- bb.append("errmsg", "Unauthorized");
- return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
- }
+ processResponse(requestFrom("host1"), moreFreshViaOpTime());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- ResponseStatus tiedForFreshness() {
- BSONObjBuilder bb;
- bb.append("ok", 1.0);
- bb.appendDate("opTime", OpTime(100, 0).asDate());
- return ResponseStatus(NetworkInterfaceMock::Response(bb.obj(), Milliseconds(10)));
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeVetoes) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- ResponseStatus moreFresh() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("ok" << 1.0 <<
- "fresher" << true),
- Milliseconds(10)));
- }
+ processResponse(requestFrom("host1"), veto());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- ResponseStatus veto() {
- return ResponseStatus(NetworkInterfaceMock::Response(BSON("ok" << 1.0 <<
- "veto" << true <<
- "errmsg" << "vetoed!"),
- Milliseconds(10)));
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeWrongTypeForOpTime) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- RemoteCommandRequest requestFrom(std::string hostname) {
- return RemoteCommandRequest(HostAndPort(hostname),
- "", // the non-hostname fields do not matter in Freshness
- BSONObj(),
- Milliseconds(0));
- }
- private:
- scoped_ptr<FreshnessChecker::Algorithm> _checker;
- };
-
- TEST_F(FreshnessScatterGatherTest, BothNodesLessFresh) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), lessFresh());
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host2"), lessFresh());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(),FreshnessChecker::None);
- }
+ processResponse(requestFrom("host1"), wrongTypeForOpTime());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- TEST_F(FreshnessScatterGatherTest, FirstNodeFresher) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), moreFresh());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeTiedForFreshness) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, FirstNodeFresherViaOpTime) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), moreFreshViaOpTime());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host1"), tiedForFreshness());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, FirstNodeVetoes) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), veto());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host2"), lessFresh());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FreshnessTie);
+}
- TEST_F(FreshnessScatterGatherTest, FirstNodeWrongTypeForOpTime) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), wrongTypeForOpTime());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondFresher) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, FirstNodeTiedForFreshness) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), tiedForFreshness());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), tiedForFreshness());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), lessFresh());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FreshnessTie);
- }
+ processResponse(requestFrom("host2"), moreFresh());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondFresher) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), tiedForFreshness());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondFresherViaOpTime) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), moreFresh());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host1"), tiedForFreshness());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondFresherViaOpTime) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), tiedForFreshness());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host2"), moreFreshViaOpTime());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- processResponse(requestFrom("host2"), moreFreshViaOpTime());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondVetoes) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondVetoes) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), tiedForFreshness());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host1"), tiedForFreshness());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), veto());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host2"), veto());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondWrongTypeForOpTime) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
-
- processResponse(requestFrom("host1"), tiedForFreshness());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+TEST_F(FreshnessScatterGatherTest, FirstNodeTiedAndSecondWrongTypeForOpTime) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), wrongTypeForOpTime());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host1"), tiedForFreshness());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, FirstNodeLessFreshAndSecondWrongTypeForOpTime) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host2"), wrongTypeForOpTime());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- processResponse(requestFrom("host1"), lessFresh());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+TEST_F(FreshnessScatterGatherTest, FirstNodeLessFreshAndSecondWrongTypeForOpTime) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), wrongTypeForOpTime());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+ processResponse(requestFrom("host1"), lessFresh());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, SecondNodeTiedAndFirstWrongTypeForOpTime) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host2"), wrongTypeForOpTime());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- processResponse(requestFrom("host2"), wrongTypeForOpTime());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
- }
+TEST_F(FreshnessScatterGatherTest, SecondNodeTiedAndFirstWrongTypeForOpTime) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, NotEnoughVotersDueNetworkErrors) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host2"), wrongTypeForOpTime());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::FresherNodeFound);
+}
- processResponse(requestFrom("host1"),
- ResponseStatus(Status(ErrorCodes::NetworkTimeout, "")));
- ASSERT_FALSE(hasReceivedSufficientResponses());
+TEST_F(FreshnessScatterGatherTest, NotEnoughVotersDueNetworkErrors) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"),
- ResponseStatus(Status(ErrorCodes::NetworkTimeout, "")));
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::QuorumUnreachable);
- }
+ processResponse(requestFrom("host1"), ResponseStatus(Status(ErrorCodes::NetworkTimeout, "")));
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- TEST_F(FreshnessScatterGatherTest, NotEnoughVotersDueToUnauthorized) {
- ASSERT_FALSE(hasReceivedSufficientResponses());
+ processResponse(requestFrom("host2"), ResponseStatus(Status(ErrorCodes::NetworkTimeout, "")));
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::QuorumUnreachable);
+}
- processResponse(requestFrom("host1"), unauthorized());
- ASSERT_FALSE(hasReceivedSufficientResponses());
+TEST_F(FreshnessScatterGatherTest, NotEnoughVotersDueToUnauthorized) {
+ ASSERT_FALSE(hasReceivedSufficientResponses());
- processResponse(requestFrom("host2"), unauthorized());
- ASSERT_TRUE(hasReceivedSufficientResponses());
- ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::QuorumUnreachable);
- }
+ processResponse(requestFrom("host1"), unauthorized());
+ ASSERT_FALSE(hasReceivedSufficientResponses());
+
+ processResponse(requestFrom("host2"), unauthorized());
+ ASSERT_TRUE(hasReceivedSufficientResponses());
+ ASSERT_EQUALS(shouldAbortElection(), FreshnessChecker::QuorumUnreachable);
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/handshake_args.cpp b/src/mongo/db/repl/handshake_args.cpp
index db815ee3aa2..2ceae3df86e 100644
--- a/src/mongo/db/repl/handshake_args.cpp
+++ b/src/mongo/db/repl/handshake_args.cpp
@@ -40,75 +40,65 @@ namespace repl {
namespace {
- const std::string kRIDFieldName = "handshake";
- // TODO(danneberg) remove after 3.0 since this field is only allowed for backwards compatibility
- const std::string kOldMemberConfigFieldName = "config";
- const std::string kMemberIdFieldName = "member";
-
- const std::string kLegalHandshakeFieldNames[] = {
- kRIDFieldName,
- kOldMemberConfigFieldName,
- kMemberIdFieldName
- };
-
-} // namespace
-
- HandshakeArgs::HandshakeArgs() :
- _hasRid(false),
- _hasMemberId(false),
- _rid(OID()),
- _memberId(-1) {}
-
- Status HandshakeArgs::initialize(const BSONObj& argsObj) {
- Status status = bsonCheckOnlyHasFields("HandshakeArgs",
- argsObj,
- kLegalHandshakeFieldNames);
- if (!status.isOK())
+const std::string kRIDFieldName = "handshake";
+// TODO(danneberg) remove after 3.0 since this field is only allowed for backwards compatibility
+const std::string kOldMemberConfigFieldName = "config";
+const std::string kMemberIdFieldName = "member";
+
+const std::string kLegalHandshakeFieldNames[] = {
+ kRIDFieldName, kOldMemberConfigFieldName, kMemberIdFieldName};
+
+} // namespace
+
+HandshakeArgs::HandshakeArgs() : _hasRid(false), _hasMemberId(false), _rid(OID()), _memberId(-1) {}
+
+Status HandshakeArgs::initialize(const BSONObj& argsObj) {
+ Status status = bsonCheckOnlyHasFields("HandshakeArgs", argsObj, kLegalHandshakeFieldNames);
+ if (!status.isOK())
+ return status;
+
+ BSONElement oid;
+ status = bsonExtractTypedField(argsObj, kRIDFieldName, jstOID, &oid);
+ if (!status.isOK())
+ return status;
+ _rid = oid.OID();
+ _hasRid = true;
+
+ status = bsonExtractIntegerField(argsObj, kMemberIdFieldName, &_memberId);
+ if (!status.isOK()) {
+ // field not necessary for master slave, do not return NoSuchKey Error
+ if (status != ErrorCodes::NoSuchKey) {
return status;
-
- BSONElement oid;
- status = bsonExtractTypedField(argsObj, kRIDFieldName, jstOID, &oid);
- if (!status.isOK())
- return status;
- _rid = oid.OID();
- _hasRid = true;
-
- status = bsonExtractIntegerField(argsObj, kMemberIdFieldName, &_memberId);
- if (!status.isOK()) {
- // field not necessary for master slave, do not return NoSuchKey Error
- if (status != ErrorCodes::NoSuchKey) {
- return status;
- }
- _memberId = -1;
- }
- else {
- _hasMemberId = true;
}
-
- return Status::OK();
- }
-
- bool HandshakeArgs::isInitialized() const {
- return _hasRid;
- }
-
- void HandshakeArgs::setRid(const OID& newVal) {
- _rid = newVal;
- _hasRid = true;
- }
-
- void HandshakeArgs::setMemberId(long long newVal) {
- _memberId = newVal;
+ _memberId = -1;
+ } else {
_hasMemberId = true;
}
- BSONObj HandshakeArgs::toBSON() const {
- invariant(isInitialized());
- BSONObjBuilder builder;
- builder.append(kRIDFieldName, _rid);
- builder.append(kMemberIdFieldName, _memberId);
- return builder.obj();
- }
+ return Status::OK();
+}
+
+bool HandshakeArgs::isInitialized() const {
+ return _hasRid;
+}
+
+void HandshakeArgs::setRid(const OID& newVal) {
+ _rid = newVal;
+ _hasRid = true;
+}
+
+void HandshakeArgs::setMemberId(long long newVal) {
+ _memberId = newVal;
+ _hasMemberId = true;
+}
+
+BSONObj HandshakeArgs::toBSON() const {
+ invariant(isInitialized());
+ BSONObjBuilder builder;
+ builder.append(kRIDFieldName, _rid);
+ builder.append(kMemberIdFieldName, _memberId);
+ return builder.obj();
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/handshake_args.h b/src/mongo/db/repl/handshake_args.h
index b0d442aaaf6..b83bef87842 100644
--- a/src/mongo/db/repl/handshake_args.h
+++ b/src/mongo/db/repl/handshake_args.h
@@ -32,64 +32,72 @@
namespace mongo {
- class Status;
+class Status;
namespace repl {
+/**
+ * Arguments to the handshake command.
+ */
+class HandshakeArgs {
+public:
+ HandshakeArgs();
+
+ /**
+ * Initializes this HandshakeArgs from the contents of args.
+ */
+ Status initialize(const BSONObj& argsObj);
+
+ /**
+ * Returns true if all required fields have been initialized.
+ */
+ bool isInitialized() const;
+
+ /**
+ * Gets the _id of the sender in their ReplSetConfig.
+ */
+ long long getMemberId() const {
+ return _memberId;
+ }
+
/**
- * Arguments to the handshake command.
+ * Gets the unique identifier of the sender, which is used to track replication progress.
*/
- class HandshakeArgs {
- public:
- HandshakeArgs();
-
- /**
- * Initializes this HandshakeArgs from the contents of args.
- */
- Status initialize(const BSONObj& argsObj);
-
- /**
- * Returns true if all required fields have been initialized.
- */
- bool isInitialized() const;
-
- /**
- * Gets the _id of the sender in their ReplSetConfig.
- */
- long long getMemberId() const { return _memberId; }
-
- /**
- * Gets the unique identifier of the sender, which is used to track replication progress.
- */
- OID getRid() const { return _rid; }
-
- /**
- * The below methods check whether or not value in the method name has been set.
- */
- bool hasRid() { return _hasRid; };
- bool hasMemberId() { return _hasMemberId; };
-
- /**
- * The below methods set the value in the method name to 'newVal'.
- */
- void setRid(const OID& newVal);
- void setMemberId(long long newVal);
-
- /**
- * Returns a BSONified version of the object.
- * Should only be called if the mandatory fields have been set.
- * Optional fields are only included if they have been set.
- */
- BSONObj toBSON() const;
-
- private:
- bool _hasRid;
- bool _hasMemberId;
-
- // look at the body of the isInitialized() function to see which fields are mandatory
- OID _rid;
- long long _memberId;
+ OID getRid() const {
+ return _rid;
+ }
+
+ /**
+ * The below methods check whether or not value in the method name has been set.
+ */
+ bool hasRid() {
+ return _hasRid;
};
+ bool hasMemberId() {
+ return _hasMemberId;
+ };
+
+ /**
+ * The below methods set the value in the method name to 'newVal'.
+ */
+ void setRid(const OID& newVal);
+ void setMemberId(long long newVal);
+
+ /**
+ * Returns a BSONified version of the object.
+ * Should only be called if the mandatory fields have been set.
+ * Optional fields are only included if they have been set.
+ */
+ BSONObj toBSON() const;
+
+private:
+ bool _hasRid;
+ bool _hasMemberId;
+
+ // look at the body of the isInitialized() function to see which fields are mandatory
+ OID _rid;
+ long long _memberId;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/heartbeat_response_action.cpp b/src/mongo/db/repl/heartbeat_response_action.cpp
index 4f26bc2953e..8ea8a1c4819 100644
--- a/src/mongo/db/repl/heartbeat_response_action.cpp
+++ b/src/mongo/db/repl/heartbeat_response_action.cpp
@@ -33,45 +33,42 @@
namespace mongo {
namespace repl {
- HeartbeatResponseAction HeartbeatResponseAction::makeNoAction() {
- return HeartbeatResponseAction();
- }
+HeartbeatResponseAction HeartbeatResponseAction::makeNoAction() {
+ return HeartbeatResponseAction();
+}
- HeartbeatResponseAction HeartbeatResponseAction::makeReconfigAction() {
- HeartbeatResponseAction result;
- result._action = Reconfig;
- return result;
- }
+HeartbeatResponseAction HeartbeatResponseAction::makeReconfigAction() {
+ HeartbeatResponseAction result;
+ result._action = Reconfig;
+ return result;
+}
- HeartbeatResponseAction HeartbeatResponseAction::makeElectAction() {
- HeartbeatResponseAction result;
- result._action = StartElection;
- return result;
- }
+HeartbeatResponseAction HeartbeatResponseAction::makeElectAction() {
+ HeartbeatResponseAction result;
+ result._action = StartElection;
+ return result;
+}
- HeartbeatResponseAction HeartbeatResponseAction::makeStepDownSelfAction(int primaryIndex) {
- HeartbeatResponseAction result;
- result._action = StepDownSelf;
- result._primaryIndex = primaryIndex;
- return result;
- }
+HeartbeatResponseAction HeartbeatResponseAction::makeStepDownSelfAction(int primaryIndex) {
+ HeartbeatResponseAction result;
+ result._action = StepDownSelf;
+ result._primaryIndex = primaryIndex;
+ return result;
+}
- HeartbeatResponseAction HeartbeatResponseAction::makeStepDownRemoteAction(int primaryIndex) {
- HeartbeatResponseAction result;
- result._action = StepDownRemotePrimary;
- result._primaryIndex = primaryIndex;
- return result;
- }
+HeartbeatResponseAction HeartbeatResponseAction::makeStepDownRemoteAction(int primaryIndex) {
+ HeartbeatResponseAction result;
+ result._action = StepDownRemotePrimary;
+ result._primaryIndex = primaryIndex;
+ return result;
+}
- HeartbeatResponseAction::HeartbeatResponseAction() :
- _action(NoAction),
- _primaryIndex(-1),
- _nextHeartbeatStartDate(0) {
- }
+HeartbeatResponseAction::HeartbeatResponseAction()
+ : _action(NoAction), _primaryIndex(-1), _nextHeartbeatStartDate(0) {}
- void HeartbeatResponseAction::setNextHeartbeatStartDate(Date_t when) {
- _nextHeartbeatStartDate = when;
- }
+void HeartbeatResponseAction::setNextHeartbeatStartDate(Date_t when) {
+ _nextHeartbeatStartDate = when;
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/heartbeat_response_action.h b/src/mongo/db/repl/heartbeat_response_action.h
index 55c2d459920..f45b3668a91 100644
--- a/src/mongo/db/repl/heartbeat_response_action.h
+++ b/src/mongo/db/repl/heartbeat_response_action.h
@@ -33,88 +33,88 @@
namespace mongo {
namespace repl {
+/**
+ * Description of actions taken in response to a heartbeat.
+ *
+ * This includes when to schedule the next heartbeat to a target, and any other actions to
+ * take, such as scheduling an election or stepping down as primary.
+ */
+class HeartbeatResponseAction {
+public:
+ /**
+ * Actions taken based on heartbeat responses
+ */
+ enum Action { NoAction, Reconfig, StartElection, StepDownSelf, StepDownRemotePrimary };
+
+ /**
+ * Makes a new action representing doing nothing.
+ */
+ static HeartbeatResponseAction makeNoAction();
+
+ /**
+ * Makes a new action representing the instruction to reconfigure the current node.
+ */
+ static HeartbeatResponseAction makeReconfigAction();
+
+ /**
+ * Makes a new action telling the current node to attempt to elect itself primary.
+ */
+ static HeartbeatResponseAction makeElectAction();
+
+ /**
+ * Makes a new action telling the current node to step down as primary.
+ *
+ * It is an error to call this with primaryIndex != the index of the current node.
+ */
+ static HeartbeatResponseAction makeStepDownSelfAction(int primaryIndex);
+
/**
- * Description of actions taken in response to a heartbeat.
+ * Makes a new action telling the current node to ask the specified remote node to step
+ * down as primary.
*
- * This includes when to schedule the next heartbeat to a target, and any other actions to
- * take, such as scheduling an election or stepping down as primary.
+ * It is an error to call this with primaryIndex == the index of the current node.
+ */
+ static HeartbeatResponseAction makeStepDownRemoteAction(int primaryIndex);
+
+ /**
+ * Construct an action with unspecified action and a next heartbeat start date in the
+ * past.
+ */
+ HeartbeatResponseAction();
+
+ /**
+ * Sets the date at which the next heartbeat should be scheduled.
+ */
+ void setNextHeartbeatStartDate(Date_t when);
+
+ /**
+ * Gets the action type of this action.
+ */
+ Action getAction() const {
+ return _action;
+ }
+
+ /**
+ * Gets the time at which the next heartbeat should be scheduled. If the
+ * time is not in the future, the next heartbeat should be scheduled immediately.
+ */
+ Date_t getNextHeartbeatStartDate() const {
+ return _nextHeartbeatStartDate;
+ }
+
+ /**
+ * If getAction() returns StepDownSelf or StepDownPrimary, this is the index
+ * in the current replica set config of the node that ought to step down.
*/
- class HeartbeatResponseAction {
- public:
- /**
- * Actions taken based on heartbeat responses
- */
- enum Action {
- NoAction,
- Reconfig,
- StartElection,
- StepDownSelf,
- StepDownRemotePrimary
- };
-
- /**
- * Makes a new action representing doing nothing.
- */
- static HeartbeatResponseAction makeNoAction();
-
- /**
- * Makes a new action representing the instruction to reconfigure the current node.
- */
- static HeartbeatResponseAction makeReconfigAction();
-
- /**
- * Makes a new action telling the current node to attempt to elect itself primary.
- */
- static HeartbeatResponseAction makeElectAction();
-
- /**
- * Makes a new action telling the current node to step down as primary.
- *
- * It is an error to call this with primaryIndex != the index of the current node.
- */
- static HeartbeatResponseAction makeStepDownSelfAction(int primaryIndex);
-
- /**
- * Makes a new action telling the current node to ask the specified remote node to step
- * down as primary.
- *
- * It is an error to call this with primaryIndex == the index of the current node.
- */
- static HeartbeatResponseAction makeStepDownRemoteAction(int primaryIndex);
-
- /**
- * Construct an action with unspecified action and a next heartbeat start date in the
- * past.
- */
- HeartbeatResponseAction();
-
- /**
- * Sets the date at which the next heartbeat should be scheduled.
- */
- void setNextHeartbeatStartDate(Date_t when);
-
- /**
- * Gets the action type of this action.
- */
- Action getAction() const { return _action; }
-
- /**
- * Gets the time at which the next heartbeat should be scheduled. If the
- * time is not in the future, the next heartbeat should be scheduled immediately.
- */
- Date_t getNextHeartbeatStartDate() const { return _nextHeartbeatStartDate; }
-
- /**
- * If getAction() returns StepDownSelf or StepDownPrimary, this is the index
- * in the current replica set config of the node that ought to step down.
- */
- int getPrimaryConfigIndex() const { return _primaryIndex; }
-
- private:
- Action _action;
- int _primaryIndex;
- Date_t _nextHeartbeatStartDate;
- };
+ int getPrimaryConfigIndex() const {
+ return _primaryIndex;
+ }
+
+private:
+ Action _action;
+ int _primaryIndex;
+ Date_t _nextHeartbeatStartDate;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/initial_sync.cpp b/src/mongo/db/repl/initial_sync.cpp
index 61729f92139..91978f9a34d 100644
--- a/src/mongo/db/repl/initial_sync.cpp
+++ b/src/mongo/db/repl/initial_sync.cpp
@@ -41,22 +41,21 @@
namespace mongo {
namespace repl {
- InitialSync::InitialSync(BackgroundSyncInterface *q) :
- SyncTail(q, multiInitialSyncApply) {}
-
- InitialSync::~InitialSync() {}
-
- /* initial oplog application, during initial sync, after cloning.
- */
- void InitialSync::oplogApplication(OperationContext* txn, const OpTime& endOpTime) {
- if (replSetForceInitialSyncFailure > 0) {
- log() << "replSet test code invoked, forced InitialSync failure: "
- << replSetForceInitialSyncFailure;
- replSetForceInitialSyncFailure--;
- throw DBException("forced error",0);
- }
- _applyOplogUntil(txn, endOpTime);
+InitialSync::InitialSync(BackgroundSyncInterface* q) : SyncTail(q, multiInitialSyncApply) {}
+
+InitialSync::~InitialSync() {}
+
+/* initial oplog application, during initial sync, after cloning.
+*/
+void InitialSync::oplogApplication(OperationContext* txn, const OpTime& endOpTime) {
+ if (replSetForceInitialSyncFailure > 0) {
+ log() << "replSet test code invoked, forced InitialSync failure: "
+ << replSetForceInitialSyncFailure;
+ replSetForceInitialSyncFailure--;
+ throw DBException("forced error", 0);
}
+ _applyOplogUntil(txn, endOpTime);
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/initial_sync.h b/src/mongo/db/repl/initial_sync.h
index 0840a9a261a..3672ceb82e9 100644
--- a/src/mongo/db/repl/initial_sync.h
+++ b/src/mongo/db/repl/initial_sync.h
@@ -33,21 +33,21 @@
namespace mongo {
namespace repl {
- class BackgroundSyncInterface;
+class BackgroundSyncInterface;
+
+/**
+ * Initial clone and sync
+ */
+class InitialSync : public SyncTail {
+public:
+ virtual ~InitialSync();
+ InitialSync(BackgroundSyncInterface* q);
/**
- * Initial clone and sync
+ * applies up to endOpTime, fetching missing documents as needed.
*/
- class InitialSync : public SyncTail {
- public:
- virtual ~InitialSync();
- InitialSync(BackgroundSyncInterface *q);
-
- /**
- * applies up to endOpTime, fetching missing documents as needed.
- */
- void oplogApplication(OperationContext* txn, const OpTime& endOpTime);
- };
+ void oplogApplication(OperationContext* txn, const OpTime& endOpTime);
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/is_master_response.cpp b/src/mongo/db/repl/is_master_response.cpp
index a789fd7b6dd..392ba515e42 100644
--- a/src/mongo/db/repl/is_master_response.cpp
+++ b/src/mongo/db/repl/is_master_response.cpp
@@ -42,415 +42,417 @@ namespace mongo {
namespace repl {
namespace {
- const std::string kIsMasterFieldName = "ismaster";
- const std::string kSecondaryFieldName = "secondary";
- const std::string kSetNameFieldName = "setName";
- const std::string kSetVersionFieldName = "setVersion";
- const std::string kHostsFieldName = "hosts";
- const std::string kPassivesFieldName = "passives";
- const std::string kArbitersFieldName = "arbiters";
- const std::string kPrimaryFieldName = "primary";
- const std::string kArbiterOnlyFieldName = "arbiterOnly";
- const std::string kPassiveFieldName = "passive";
- const std::string kHiddenFieldName = "hidden";
- const std::string kBuildIndexesFieldName = "buildIndexes";
- const std::string kSlaveDelayFieldName = "slaveDelay";
- const std::string kTagsFieldName = "tags";
- const std::string kMeFieldName = "me";
- const std::string kElectionIdFieldName = "electionId";
-
- // field name constants that don't directly correspond to member variables
- const std::string kInfoFieldName = "info";
- const std::string kIsReplicaSetFieldName = "isreplicaset";
- const std::string kErrmsgFieldName = "errmsg";
- const std::string kCodeFieldName = "code";
+const std::string kIsMasterFieldName = "ismaster";
+const std::string kSecondaryFieldName = "secondary";
+const std::string kSetNameFieldName = "setName";
+const std::string kSetVersionFieldName = "setVersion";
+const std::string kHostsFieldName = "hosts";
+const std::string kPassivesFieldName = "passives";
+const std::string kArbitersFieldName = "arbiters";
+const std::string kPrimaryFieldName = "primary";
+const std::string kArbiterOnlyFieldName = "arbiterOnly";
+const std::string kPassiveFieldName = "passive";
+const std::string kHiddenFieldName = "hidden";
+const std::string kBuildIndexesFieldName = "buildIndexes";
+const std::string kSlaveDelayFieldName = "slaveDelay";
+const std::string kTagsFieldName = "tags";
+const std::string kMeFieldName = "me";
+const std::string kElectionIdFieldName = "electionId";
+
+// field name constants that don't directly correspond to member variables
+const std::string kInfoFieldName = "info";
+const std::string kIsReplicaSetFieldName = "isreplicaset";
+const std::string kErrmsgFieldName = "errmsg";
+const std::string kCodeFieldName = "code";
} // namespace
- IsMasterResponse::IsMasterResponse() :
- _isMaster(false),
- _isMasterSet(false),
- _secondary(false),
- _isSecondarySet(false),
- _setNameSet(false),
- _setVersion(0),
- _setVersionSet(false),
- _hostsSet(false),
- _passivesSet(false),
- _arbitersSet(false),
- _primarySet(false),
- _arbiterOnly(false),
- _arbiterOnlySet(false),
- _passive(false),
- _passiveSet(false),
- _hidden(false),
- _hiddenSet(false),
- _buildIndexes(true),
- _buildIndexesSet(false),
- _slaveDelay(0),
- _slaveDelaySet(false),
- _tagsSet(false),
- _meSet(false),
- _electionId(OID()),
- _configSet(true),
- _shutdownInProgress(false)
- {}
-
- void IsMasterResponse::addToBSON(BSONObjBuilder* builder) const {
- if (_shutdownInProgress) {
- builder->append(kCodeFieldName, ErrorCodes::ShutdownInProgress);
- builder->append(kErrmsgFieldName, "replication shutdown in progress");
- return;
- }
+IsMasterResponse::IsMasterResponse()
+ : _isMaster(false),
+ _isMasterSet(false),
+ _secondary(false),
+ _isSecondarySet(false),
+ _setNameSet(false),
+ _setVersion(0),
+ _setVersionSet(false),
+ _hostsSet(false),
+ _passivesSet(false),
+ _arbitersSet(false),
+ _primarySet(false),
+ _arbiterOnly(false),
+ _arbiterOnlySet(false),
+ _passive(false),
+ _passiveSet(false),
+ _hidden(false),
+ _hiddenSet(false),
+ _buildIndexes(true),
+ _buildIndexesSet(false),
+ _slaveDelay(0),
+ _slaveDelaySet(false),
+ _tagsSet(false),
+ _meSet(false),
+ _electionId(OID()),
+ _configSet(true),
+ _shutdownInProgress(false) {}
+
+void IsMasterResponse::addToBSON(BSONObjBuilder* builder) const {
+ if (_shutdownInProgress) {
+ builder->append(kCodeFieldName, ErrorCodes::ShutdownInProgress);
+ builder->append(kErrmsgFieldName, "replication shutdown in progress");
+ return;
+ }
- if (!_configSet) {
- builder->append(kIsMasterFieldName, false);
- builder->append(kSecondaryFieldName, false);
- builder->append(kInfoFieldName, "Does not have a valid replica set config");
- builder->append(kIsReplicaSetFieldName , true);
- return;
- }
+ if (!_configSet) {
+ builder->append(kIsMasterFieldName, false);
+ builder->append(kSecondaryFieldName, false);
+ builder->append(kInfoFieldName, "Does not have a valid replica set config");
+ builder->append(kIsReplicaSetFieldName, true);
+ return;
+ }
- invariant(_setNameSet);
- builder->append(kSetNameFieldName, _setName);
- invariant(_setVersionSet);
- builder->append(kSetVersionFieldName, static_cast<int>(_setVersion));
- invariant(_isMasterSet);
- builder->append(kIsMasterFieldName, _isMaster);
- invariant(_isSecondarySet);
- builder->append(kSecondaryFieldName, _secondary);
-
- if (_hostsSet) {
- std::vector<std::string> hosts;
- for (size_t i = 0; i < _hosts.size(); ++i) {
- hosts.push_back(_hosts[i].toString());
- }
- builder->append(kHostsFieldName, hosts);
+ invariant(_setNameSet);
+ builder->append(kSetNameFieldName, _setName);
+ invariant(_setVersionSet);
+ builder->append(kSetVersionFieldName, static_cast<int>(_setVersion));
+ invariant(_isMasterSet);
+ builder->append(kIsMasterFieldName, _isMaster);
+ invariant(_isSecondarySet);
+ builder->append(kSecondaryFieldName, _secondary);
+
+ if (_hostsSet) {
+ std::vector<std::string> hosts;
+ for (size_t i = 0; i < _hosts.size(); ++i) {
+ hosts.push_back(_hosts[i].toString());
}
- if (_passivesSet) {
- std::vector<std::string> passives;
- for (size_t i = 0; i < _passives.size(); ++i) {
- passives.push_back(_passives[i].toString());
- }
- builder->append(kPassivesFieldName, passives);
+ builder->append(kHostsFieldName, hosts);
+ }
+ if (_passivesSet) {
+ std::vector<std::string> passives;
+ for (size_t i = 0; i < _passives.size(); ++i) {
+ passives.push_back(_passives[i].toString());
}
- if (_arbitersSet) {
- std::vector<std::string> arbiters;
- for (size_t i = 0; i < _arbiters.size(); ++i) {
- arbiters.push_back(_arbiters[i].toString());
- }
- builder->append(kArbitersFieldName, arbiters);
+ builder->append(kPassivesFieldName, passives);
+ }
+ if (_arbitersSet) {
+ std::vector<std::string> arbiters;
+ for (size_t i = 0; i < _arbiters.size(); ++i) {
+ arbiters.push_back(_arbiters[i].toString());
}
- if (_primarySet)
- builder->append(kPrimaryFieldName, _primary.toString());
- if (_arbiterOnlySet)
- builder->append(kArbiterOnlyFieldName, _arbiterOnly);
- if (_passiveSet)
- builder->append(kPassiveFieldName, _passive);
- if (_hiddenSet)
- builder->append(kHiddenFieldName, _hidden);
- if (_buildIndexesSet)
- builder->append(kBuildIndexesFieldName, _buildIndexes);
- if (_slaveDelaySet)
- builder->append(kSlaveDelayFieldName, _slaveDelay.total_seconds());
- if (_tagsSet) {
- BSONObjBuilder tags(builder->subobjStart(kTagsFieldName));
- for (unordered_map<std::string, std::string>::const_iterator it = _tags.begin();
- it != _tags.end(); ++it) {
- tags.append(it->first, it->second);
- }
+ builder->append(kArbitersFieldName, arbiters);
+ }
+ if (_primarySet)
+ builder->append(kPrimaryFieldName, _primary.toString());
+ if (_arbiterOnlySet)
+ builder->append(kArbiterOnlyFieldName, _arbiterOnly);
+ if (_passiveSet)
+ builder->append(kPassiveFieldName, _passive);
+ if (_hiddenSet)
+ builder->append(kHiddenFieldName, _hidden);
+ if (_buildIndexesSet)
+ builder->append(kBuildIndexesFieldName, _buildIndexes);
+ if (_slaveDelaySet)
+ builder->append(kSlaveDelayFieldName, _slaveDelay.total_seconds());
+ if (_tagsSet) {
+ BSONObjBuilder tags(builder->subobjStart(kTagsFieldName));
+ for (unordered_map<std::string, std::string>::const_iterator it = _tags.begin();
+ it != _tags.end();
+ ++it) {
+ tags.append(it->first, it->second);
+ }
+ }
+ invariant(_meSet);
+ builder->append(kMeFieldName, _me.toString());
+ if (_electionId.isSet())
+ builder->append(kElectionIdFieldName, _electionId);
+}
+
+BSONObj IsMasterResponse::toBSON() const {
+ BSONObjBuilder builder;
+ addToBSON(&builder);
+ return builder.obj();
+}
+
+Status IsMasterResponse::initialize(const BSONObj& doc) {
+ Status status = bsonExtractBooleanField(doc, kIsMasterFieldName, &_isMaster);
+ if (!status.isOK()) {
+ return status;
+ }
+ _isMasterSet = true;
+ status = bsonExtractBooleanField(doc, kSecondaryFieldName, &_secondary);
+ if (!status.isOK()) {
+ return status;
+ }
+ _isSecondarySet = true;
+ if (doc.hasField(kInfoFieldName)) {
+ if (_isMaster || _secondary || !doc.hasField(kIsReplicaSetFieldName) ||
+ !doc[kIsReplicaSetFieldName].booleanSafe()) {
+ return Status(ErrorCodes::FailedToParse,
+ str::stream() << "Expected presence of \"" << kInfoFieldName
+ << "\" field to indicate no valid config loaded, but other "
+ "fields weren't as we expected");
+ }
+ _configSet = false;
+ return Status::OK();
+ } else {
+ if (doc.hasField(kIsReplicaSetFieldName)) {
+ return Status(ErrorCodes::FailedToParse,
+ str::stream() << "Found \"" << kIsReplicaSetFieldName
+ << "\" field which should indicate that no valid config "
+ "is loaded, but we didn't also have an \""
+ << kInfoFieldName << "\" field as we expected");
}
- invariant(_meSet);
- builder->append(kMeFieldName, _me.toString());
- if (_electionId.isSet())
- builder->append(kElectionIdFieldName, _electionId);
}
- BSONObj IsMasterResponse::toBSON() const {
- BSONObjBuilder builder;
- addToBSON(&builder);
- return builder.obj();
+ status = bsonExtractStringField(doc, kSetNameFieldName, &_setName);
+ if (!status.isOK()) {
+ return status;
+ }
+ _setNameSet = true;
+ status = bsonExtractIntegerField(doc, kSetVersionFieldName, &_setVersion);
+ if (!status.isOK()) {
+ return status;
}
+ _setVersionSet = true;
- Status IsMasterResponse::initialize(const BSONObj& doc) {
- Status status = bsonExtractBooleanField(doc, kIsMasterFieldName, &_isMaster);
- if (!status.isOK()) {
- return status;
- }
- _isMasterSet = true;
- status = bsonExtractBooleanField(doc, kSecondaryFieldName, &_secondary);
+ if (doc.hasField(kHostsFieldName)) {
+ BSONElement hostsElement;
+ status = bsonExtractTypedField(doc, kHostsFieldName, Array, &hostsElement);
if (!status.isOK()) {
return status;
}
- _isSecondarySet = true;
- if (doc.hasField(kInfoFieldName)) {
- if (_isMaster ||
- _secondary ||
- !doc.hasField(kIsReplicaSetFieldName) ||
- !doc[kIsReplicaSetFieldName].booleanSafe()) {
- return Status(ErrorCodes::FailedToParse,
- str::stream() << "Expected presence of \"" << kInfoFieldName <<
- "\" field to indicate no valid config loaded, but other "
- "fields weren't as we expected");
- }
- _configSet = false;
- return Status::OK();
- }
- else {
- if (doc.hasField(kIsReplicaSetFieldName)) {
- return Status(ErrorCodes::FailedToParse,
- str::stream() << "Found \"" << kIsReplicaSetFieldName <<
- "\" field which should indicate that no valid config "
- "is loaded, but we didn't also have an \"" <<
- kInfoFieldName << "\" field as we expected");
+ for (BSONObjIterator it(hostsElement.Obj()); it.more();) {
+ BSONElement hostElement = it.next();
+ if (hostElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Elements in \"" << kHostsFieldName
+ << "\" array of isMaster response must be of type "
+ << typeName(String) << " but found type "
+ << typeName(hostElement.type()));
}
+ _hosts.push_back(HostAndPort(hostElement.String()));
}
+ _hostsSet = true;
+ }
- status = bsonExtractStringField(doc, kSetNameFieldName, &_setName);
- if (!status.isOK()) {
- return status;
- }
- _setNameSet = true;
- status = bsonExtractIntegerField(doc, kSetVersionFieldName, &_setVersion);
+ if (doc.hasField(kPassivesFieldName)) {
+ BSONElement passivesElement;
+ status = bsonExtractTypedField(doc, kPassivesFieldName, Array, &passivesElement);
if (!status.isOK()) {
return status;
}
- _setVersionSet = true;
-
- if (doc.hasField(kHostsFieldName)) {
- BSONElement hostsElement;
- status = bsonExtractTypedField(doc, kHostsFieldName, Array, &hostsElement);
- if (!status.isOK()) {
- return status;
- }
- for (BSONObjIterator it(hostsElement.Obj()); it.more();) {
- BSONElement hostElement = it.next();
- if (hostElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch,
- str::stream() << "Elements in \"" << kHostsFieldName <<
- "\" array of isMaster response must be of type " <<
- typeName(String) << " but found type " <<
- typeName(hostElement.type()));
- }
- _hosts.push_back(HostAndPort(hostElement.String()));
- }
- _hostsSet = true;
- }
-
- if (doc.hasField(kPassivesFieldName)) {
- BSONElement passivesElement;
- status = bsonExtractTypedField(doc, kPassivesFieldName, Array, &passivesElement);
- if (!status.isOK()) {
- return status;
- }
- for (BSONObjIterator it(passivesElement.Obj()); it.more();) {
- BSONElement passiveElement = it.next();
- if (passiveElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch,
- str::stream() << "Elements in \"" << kPassivesFieldName <<
- "\" array of isMaster response must be of type " <<
- typeName(String) << " but found type " <<
- typeName(passiveElement.type()));
- }
- _passives.push_back(HostAndPort(passiveElement.String()));
- }
- _passivesSet = true;
- }
-
- if (doc.hasField(kArbitersFieldName)) {
- BSONElement arbitersElement;
- status = bsonExtractTypedField(doc, kArbitersFieldName, Array, &arbitersElement);
- if (!status.isOK()) {
- return status;
- }
- for (BSONObjIterator it(arbitersElement.Obj()); it.more();) {
- BSONElement arbiterElement = it.next();
- if (arbiterElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch,
- str::stream() << "Elements in \"" << kArbitersFieldName <<
- "\" array of isMaster response must be of type " <<
- typeName(String) << " but found type " <<
- typeName(arbiterElement.type()));
- }
- _arbiters.push_back(HostAndPort(arbiterElement.String()));
- }
- _arbitersSet = true;
- }
-
- if (doc.hasField(kPrimaryFieldName)) {
- std::string primaryString;
- status = bsonExtractStringField(doc, kPrimaryFieldName, &primaryString);
- if (!status.isOK()) {
- return status;
- }
- _primary = HostAndPort(primaryString);
- _primarySet = true;
- }
-
- if (doc.hasField(kArbiterOnlyFieldName)) {
- status = bsonExtractBooleanField(doc, kArbiterOnlyFieldName, &_arbiterOnly);
- if (!status.isOK()) {
- return status;
- }
- _arbiterOnlySet = true;
- }
-
- if (doc.hasField(kPassiveFieldName)) {
- status = bsonExtractBooleanField(doc, kPassiveFieldName, &_passive);
- if (!status.isOK()) {
- return status;
- }
- _passiveSet = true;
- }
-
- if (doc.hasField(kHiddenFieldName)) {
- status = bsonExtractBooleanField(doc, kHiddenFieldName, &_hidden);
- if (!status.isOK()) {
- return status;
- }
- _hiddenSet = true;
- }
-
- if (doc.hasField(kBuildIndexesFieldName)) {
- status = bsonExtractBooleanField(doc, kBuildIndexesFieldName, &_buildIndexes);
- if (!status.isOK()) {
- return status;
- }
- _buildIndexesSet = true;
- }
-
- if (doc.hasField(kSlaveDelayFieldName)) {
- long long slaveDelaySecs;
- status = bsonExtractIntegerField(doc, kSlaveDelayFieldName, &slaveDelaySecs);
- if (!status.isOK()) {
- return status;
+ for (BSONObjIterator it(passivesElement.Obj()); it.more();) {
+ BSONElement passiveElement = it.next();
+ if (passiveElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Elements in \"" << kPassivesFieldName
+ << "\" array of isMaster response must be of type "
+ << typeName(String) << " but found type "
+ << typeName(passiveElement.type()));
}
- _slaveDelaySet = true;
- _slaveDelay = Seconds(slaveDelaySecs);
+ _passives.push_back(HostAndPort(passiveElement.String()));
}
+ _passivesSet = true;
+ }
- if (doc.hasField(kTagsFieldName)) {
- BSONElement tagsElement;
- status = bsonExtractTypedField(doc, kTagsFieldName, Object, &tagsElement);
- if (!status.isOK()) {
- return status;
- }
- for (BSONObjIterator it(tagsElement.Obj()); it.more();) {
- BSONElement tagElement = it.next();
- if (tagElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch,
- str::stream() << "Elements in \"" << kTagsFieldName << "\" obj "
- "of isMaster response must be of type " <<
- typeName(String) << " but found type " <<
- typeName(tagsElement.type()));
- }
- _tags[tagElement.fieldNameStringData().toString()] = tagElement.String();
- }
- _tagsSet = true;
+ if (doc.hasField(kArbitersFieldName)) {
+ BSONElement arbitersElement;
+ status = bsonExtractTypedField(doc, kArbitersFieldName, Array, &arbitersElement);
+ if (!status.isOK()) {
+ return status;
}
-
- if (doc.hasField(kElectionIdFieldName)) {
- BSONElement electionIdElem;
- status = bsonExtractTypedField(doc, kElectionIdFieldName, jstOID, &electionIdElem);
- if (!status.isOK()) {
- return status;
+ for (BSONObjIterator it(arbitersElement.Obj()); it.more();) {
+ BSONElement arbiterElement = it.next();
+ if (arbiterElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Elements in \"" << kArbitersFieldName
+ << "\" array of isMaster response must be of type "
+ << typeName(String) << " but found type "
+ << typeName(arbiterElement.type()));
}
- _electionId = electionIdElem.OID();
+ _arbiters.push_back(HostAndPort(arbiterElement.String()));
}
+ _arbitersSet = true;
+ }
- std::string meString;
- status = bsonExtractStringField(doc, kMeFieldName, &meString);
+ if (doc.hasField(kPrimaryFieldName)) {
+ std::string primaryString;
+ status = bsonExtractStringField(doc, kPrimaryFieldName, &primaryString);
if (!status.isOK()) {
return status;
}
- _me = HostAndPort(meString);
- _meSet = true;
-
- return Status::OK();
- }
-
- void IsMasterResponse::setIsMaster(bool isMaster) {
- _isMasterSet = true;
- _isMaster = isMaster;
- }
-
- void IsMasterResponse::setIsSecondary(bool secondary) {
- _isSecondarySet = true;
- _secondary = secondary;
- }
-
- void IsMasterResponse::setReplSetName(const std::string& setName) {
- _setNameSet = true;
- _setName = setName;
- }
-
- void IsMasterResponse::setReplSetVersion(long long version) {
- _setVersionSet = true;
- _setVersion = version;
- }
-
- void IsMasterResponse::addHost(const HostAndPort& host) {
- _hostsSet = true;
- _hosts.push_back(host);
- }
-
- void IsMasterResponse::addPassive(const HostAndPort& passive) {
- _passivesSet = true;
- _passives.push_back(passive);
- }
-
- void IsMasterResponse::addArbiter(const HostAndPort& arbiter) {
- _arbitersSet = true;
- _arbiters.push_back(arbiter);
- }
-
- void IsMasterResponse::setPrimary(const HostAndPort& primary) {
+ _primary = HostAndPort(primaryString);
_primarySet = true;
- _primary = primary;
}
- void IsMasterResponse::setIsArbiterOnly(bool arbiterOnly) {
+ if (doc.hasField(kArbiterOnlyFieldName)) {
+ status = bsonExtractBooleanField(doc, kArbiterOnlyFieldName, &_arbiterOnly);
+ if (!status.isOK()) {
+ return status;
+ }
_arbiterOnlySet = true;
- _arbiterOnly = arbiterOnly;
}
- void IsMasterResponse::setIsPassive(bool passive) {
+ if (doc.hasField(kPassiveFieldName)) {
+ status = bsonExtractBooleanField(doc, kPassiveFieldName, &_passive);
+ if (!status.isOK()) {
+ return status;
+ }
_passiveSet = true;
- _passive = passive;
}
- void IsMasterResponse::setIsHidden(bool hidden) {
+ if (doc.hasField(kHiddenFieldName)) {
+ status = bsonExtractBooleanField(doc, kHiddenFieldName, &_hidden);
+ if (!status.isOK()) {
+ return status;
+ }
_hiddenSet = true;
- _hidden = hidden;
}
- void IsMasterResponse::setShouldBuildIndexes(bool buildIndexes) {
+ if (doc.hasField(kBuildIndexesFieldName)) {
+ status = bsonExtractBooleanField(doc, kBuildIndexesFieldName, &_buildIndexes);
+ if (!status.isOK()) {
+ return status;
+ }
_buildIndexesSet = true;
- _buildIndexes = buildIndexes;
}
- void IsMasterResponse::setSlaveDelay(Seconds slaveDelay) {
+ if (doc.hasField(kSlaveDelayFieldName)) {
+ long long slaveDelaySecs;
+ status = bsonExtractIntegerField(doc, kSlaveDelayFieldName, &slaveDelaySecs);
+ if (!status.isOK()) {
+ return status;
+ }
_slaveDelaySet = true;
- _slaveDelay = slaveDelay;
+ _slaveDelay = Seconds(slaveDelaySecs);
}
- void IsMasterResponse::addTag(const std::string& tagKey, const std::string& tagValue) {
+ if (doc.hasField(kTagsFieldName)) {
+ BSONElement tagsElement;
+ status = bsonExtractTypedField(doc, kTagsFieldName, Object, &tagsElement);
+ if (!status.isOK()) {
+ return status;
+ }
+ for (BSONObjIterator it(tagsElement.Obj()); it.more();) {
+ BSONElement tagElement = it.next();
+ if (tagElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Elements in \"" << kTagsFieldName
+ << "\" obj "
+ "of isMaster response must be of type "
+ << typeName(String) << " but found type "
+ << typeName(tagsElement.type()));
+ }
+ _tags[tagElement.fieldNameStringData().toString()] = tagElement.String();
+ }
_tagsSet = true;
- _tags[tagKey] = tagValue;
}
- void IsMasterResponse::setMe(const HostAndPort& me) {
- _meSet = true;
- _me = me;
+ if (doc.hasField(kElectionIdFieldName)) {
+ BSONElement electionIdElem;
+ status = bsonExtractTypedField(doc, kElectionIdFieldName, jstOID, &electionIdElem);
+ if (!status.isOK()) {
+ return status;
+ }
+ _electionId = electionIdElem.OID();
}
- void IsMasterResponse::setElectionId(const OID& electionId) {
- _electionId = electionId;
+ std::string meString;
+ status = bsonExtractStringField(doc, kMeFieldName, &meString);
+ if (!status.isOK()) {
+ return status;
}
-
- void IsMasterResponse::markAsNoConfig() { _configSet = false; }
-
- void IsMasterResponse::markAsShutdownInProgress() { _shutdownInProgress = true; }
-
-} // namespace repl
-} // namespace mongo
+ _me = HostAndPort(meString);
+ _meSet = true;
+
+ return Status::OK();
+}
+
+void IsMasterResponse::setIsMaster(bool isMaster) {
+ _isMasterSet = true;
+ _isMaster = isMaster;
+}
+
+void IsMasterResponse::setIsSecondary(bool secondary) {
+ _isSecondarySet = true;
+ _secondary = secondary;
+}
+
+void IsMasterResponse::setReplSetName(const std::string& setName) {
+ _setNameSet = true;
+ _setName = setName;
+}
+
+void IsMasterResponse::setReplSetVersion(long long version) {
+ _setVersionSet = true;
+ _setVersion = version;
+}
+
+void IsMasterResponse::addHost(const HostAndPort& host) {
+ _hostsSet = true;
+ _hosts.push_back(host);
+}
+
+void IsMasterResponse::addPassive(const HostAndPort& passive) {
+ _passivesSet = true;
+ _passives.push_back(passive);
+}
+
+void IsMasterResponse::addArbiter(const HostAndPort& arbiter) {
+ _arbitersSet = true;
+ _arbiters.push_back(arbiter);
+}
+
+void IsMasterResponse::setPrimary(const HostAndPort& primary) {
+ _primarySet = true;
+ _primary = primary;
+}
+
+void IsMasterResponse::setIsArbiterOnly(bool arbiterOnly) {
+ _arbiterOnlySet = true;
+ _arbiterOnly = arbiterOnly;
+}
+
+void IsMasterResponse::setIsPassive(bool passive) {
+ _passiveSet = true;
+ _passive = passive;
+}
+
+void IsMasterResponse::setIsHidden(bool hidden) {
+ _hiddenSet = true;
+ _hidden = hidden;
+}
+
+void IsMasterResponse::setShouldBuildIndexes(bool buildIndexes) {
+ _buildIndexesSet = true;
+ _buildIndexes = buildIndexes;
+}
+
+void IsMasterResponse::setSlaveDelay(Seconds slaveDelay) {
+ _slaveDelaySet = true;
+ _slaveDelay = slaveDelay;
+}
+
+void IsMasterResponse::addTag(const std::string& tagKey, const std::string& tagValue) {
+ _tagsSet = true;
+ _tags[tagKey] = tagValue;
+}
+
+void IsMasterResponse::setMe(const HostAndPort& me) {
+ _meSet = true;
+ _me = me;
+}
+
+void IsMasterResponse::setElectionId(const OID& electionId) {
+ _electionId = electionId;
+}
+
+void IsMasterResponse::markAsNoConfig() {
+ _configSet = false;
+}
+
+void IsMasterResponse::markAsShutdownInProgress() {
+ _shutdownInProgress = true;
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/is_master_response.h b/src/mongo/db/repl/is_master_response.h
index dd0eda70e2b..7bfaf1ac0b2 100644
--- a/src/mongo/db/repl/is_master_response.h
+++ b/src/mongo/db/repl/is_master_response.h
@@ -38,179 +38,216 @@
namespace mongo {
- class BSONObj;
- class BSONObjBuilder;
- class Status;
+class BSONObj;
+class BSONObjBuilder;
+class Status;
namespace repl {
+/**
+ * Response structure for the ismaster command. Only handles responses from nodes
+ * that are in replset mode.
+ */
+class IsMasterResponse {
+public:
+ IsMasterResponse();
+
/**
- * Response structure for the ismaster command. Only handles responses from nodes
- * that are in replset mode.
+ * Initializes this IsMasterResponse from the contents of "doc".
*/
- class IsMasterResponse {
- public:
- IsMasterResponse();
-
- /**
- * Initializes this IsMasterResponse from the contents of "doc".
- */
- Status initialize(const BSONObj& doc);
-
- /**
- * Appends all non-default values to "builder".
- * There are two values that are handled specially: if _inShutdown is true or _configSet
- * is false, we will add a standard response to "builder" indicating either that we are
- * in the middle of shutting down or we do not have a valid replica set config, ignoring
- * the values of all other member variables.
- */
- void addToBSON(BSONObjBuilder* builder) const;
+ Status initialize(const BSONObj& doc);
- /**
- * Returns a BSONObj consisting the results of calling addToBSON on an otherwise empty
- * BSONObjBuilder.
- */
- BSONObj toBSON() const;
-
-
- // ===================== Accessors for member variables ================================= //
-
- bool isMaster() const { return _isMaster; }
+ /**
+ * Appends all non-default values to "builder".
+ * There are two values that are handled specially: if _inShutdown is true or _configSet
+ * is false, we will add a standard response to "builder" indicating either that we are
+ * in the middle of shutting down or we do not have a valid replica set config, ignoring
+ * the values of all other member variables.
+ */
+ void addToBSON(BSONObjBuilder* builder) const;
- bool isSecondary() const { return _secondary; }
+ /**
+ * Returns a BSONObj consisting the results of calling addToBSON on an otherwise empty
+ * BSONObjBuilder.
+ */
+ BSONObj toBSON() const;
- const std::string& getReplSetName() const { return _setName; }
- long long getReplSetVersion() const { return _setVersion; }
+ // ===================== Accessors for member variables ================================= //
- const std::vector<HostAndPort>& getHosts() const { return _hosts; }
+ bool isMaster() const {
+ return _isMaster;
+ }
- const std::vector<HostAndPort>& getPassives() const { return _passives; }
+ bool isSecondary() const {
+ return _secondary;
+ }
- const std::vector<HostAndPort>& getArbiters() const { return _arbiters; }
+ const std::string& getReplSetName() const {
+ return _setName;
+ }
- const HostAndPort& getPrimary() const { return _primary; }
+ long long getReplSetVersion() const {
+ return _setVersion;
+ }
- bool hasPrimary() const { return _primarySet; }
+ const std::vector<HostAndPort>& getHosts() const {
+ return _hosts;
+ }
- bool isArbiterOnly() const { return _arbiterOnly; }
+ const std::vector<HostAndPort>& getPassives() const {
+ return _passives;
+ }
- bool isPassive() const { return _passive; }
+ const std::vector<HostAndPort>& getArbiters() const {
+ return _arbiters;
+ }
- bool isHidden() const { return _hidden; }
+ const HostAndPort& getPrimary() const {
+ return _primary;
+ }
- bool shouldBuildIndexes() const { return _buildIndexes; }
+ bool hasPrimary() const {
+ return _primarySet;
+ }
- Seconds getSlaveDelay() const { return _slaveDelay; }
+ bool isArbiterOnly() const {
+ return _arbiterOnly;
+ }
- const unordered_map<std::string, std::string> getTags() const { return _tags; }
+ bool isPassive() const {
+ return _passive;
+ }
- const HostAndPort& getMe() const { return _me; }
+ bool isHidden() const {
+ return _hidden;
+ }
- const OID& getElectionId() const { return _electionId; }
+ bool shouldBuildIndexes() const {
+ return _buildIndexes;
+ }
- /**
- * If false, calls to toBSON/addToBSON will ignore all other fields and add a specific
- * message to indicate that we have no replica set config.
- */
- bool isConfigSet() const { return _configSet; }
+ Seconds getSlaveDelay() const {
+ return _slaveDelay;
+ }
- /**
- * If false, calls to toBSON/addToBSON will ignore all other fields and add a specific
- * message to indicate that we are in the middle of shutting down.
- */
- bool isShutdownInProgress() const { return _shutdownInProgress; }
+ const unordered_map<std::string, std::string> getTags() const {
+ return _tags;
+ }
+ const HostAndPort& getMe() const {
+ return _me;
+ }
- // ===================== Mutators for member variables ================================= //
+ const OID& getElectionId() const {
+ return _electionId;
+ }
- void setIsMaster(bool isMaster);
+ /**
+ * If false, calls to toBSON/addToBSON will ignore all other fields and add a specific
+ * message to indicate that we have no replica set config.
+ */
+ bool isConfigSet() const {
+ return _configSet;
+ }
- void setIsSecondary(bool secondary);
+ /**
+ * If false, calls to toBSON/addToBSON will ignore all other fields and add a specific
+ * message to indicate that we are in the middle of shutting down.
+ */
+ bool isShutdownInProgress() const {
+ return _shutdownInProgress;
+ }
- void setReplSetName(const std::string& setName);
- void setReplSetVersion(long long version);
+ // ===================== Mutators for member variables ================================= //
- void addHost(const HostAndPort& host);
+ void setIsMaster(bool isMaster);
- void addPassive(const HostAndPort& passive);
+ void setIsSecondary(bool secondary);
- void addArbiter(const HostAndPort& arbiter);
+ void setReplSetName(const std::string& setName);
- void setPrimary(const HostAndPort& primary);
+ void setReplSetVersion(long long version);
- void setIsArbiterOnly(bool arbiterOnly);
+ void addHost(const HostAndPort& host);
- void setIsPassive(bool passive);
+ void addPassive(const HostAndPort& passive);
- void setIsHidden(bool hidden);
+ void addArbiter(const HostAndPort& arbiter);
- void setShouldBuildIndexes(bool buildIndexes);
+ void setPrimary(const HostAndPort& primary);
- void setSlaveDelay(Seconds slaveDelay);
+ void setIsArbiterOnly(bool arbiterOnly);
- void addTag(const std::string& tagKey, const std::string& tagValue);
+ void setIsPassive(bool passive);
- void setMe(const HostAndPort& me);
+ void setIsHidden(bool hidden);
- void setElectionId(const OID& electionId);
+ void setShouldBuildIndexes(bool buildIndexes);
- /**
- * Marks _configSet as false, which will cause future calls to toBSON/addToBSON to ignore
- * all other member variables and output a hardcoded response indicating that we have no
- * valid replica set config.
- */
- void markAsNoConfig();
+ void setSlaveDelay(Seconds slaveDelay);
- /**
- * Marks _shutdownInProgress as true, which will cause future calls to toBSON/addToBSON to
- * ignore all other member variables and output a hardcoded response indicating that we are
- * in the middle of shutting down.
- */
- void markAsShutdownInProgress();
+ void addTag(const std::string& tagKey, const std::string& tagValue);
- private:
+ void setMe(const HostAndPort& me);
- bool _isMaster;
- bool _isMasterSet;
- bool _secondary;
- bool _isSecondarySet;
- std::string _setName;
- bool _setNameSet;
- long long _setVersion;
- bool _setVersionSet;
- std::vector<HostAndPort> _hosts;
- bool _hostsSet;
- std::vector<HostAndPort> _passives;
- bool _passivesSet;
- std::vector<HostAndPort> _arbiters;
- bool _arbitersSet;
- HostAndPort _primary;
- bool _primarySet;
- bool _arbiterOnly;
- bool _arbiterOnlySet;
- bool _passive;
- bool _passiveSet;
- bool _hidden;
- bool _hiddenSet;
- bool _buildIndexes;
- bool _buildIndexesSet;
- Seconds _slaveDelay;
- bool _slaveDelaySet;
- unordered_map<std::string, std::string> _tags;
- bool _tagsSet;
- HostAndPort _me;
- bool _meSet;
- OID _electionId;
+ void setElectionId(const OID& electionId);
- // If _configSet is false this means we don't have a valid repl set config, so toBSON
- // will return a set of hardcoded values that indicate this.
- bool _configSet;
- // If _shutdownInProgress is true toBSON will return a set of hardcoded values to indicate
- // that we are mid shutdown
- bool _shutdownInProgress;
- };
+ /**
+ * Marks _configSet as false, which will cause future calls to toBSON/addToBSON to ignore
+ * all other member variables and output a hardcoded response indicating that we have no
+ * valid replica set config.
+ */
+ void markAsNoConfig();
-} // namespace repl
-} // namespace mongo
+ /**
+ * Marks _shutdownInProgress as true, which will cause future calls to toBSON/addToBSON to
+ * ignore all other member variables and output a hardcoded response indicating that we are
+ * in the middle of shutting down.
+ */
+ void markAsShutdownInProgress();
+
+private:
+ bool _isMaster;
+ bool _isMasterSet;
+ bool _secondary;
+ bool _isSecondarySet;
+ std::string _setName;
+ bool _setNameSet;
+ long long _setVersion;
+ bool _setVersionSet;
+ std::vector<HostAndPort> _hosts;
+ bool _hostsSet;
+ std::vector<HostAndPort> _passives;
+ bool _passivesSet;
+ std::vector<HostAndPort> _arbiters;
+ bool _arbitersSet;
+ HostAndPort _primary;
+ bool _primarySet;
+ bool _arbiterOnly;
+ bool _arbiterOnlySet;
+ bool _passive;
+ bool _passiveSet;
+ bool _hidden;
+ bool _hiddenSet;
+ bool _buildIndexes;
+ bool _buildIndexesSet;
+ Seconds _slaveDelay;
+ bool _slaveDelaySet;
+ unordered_map<std::string, std::string> _tags;
+ bool _tagsSet;
+ HostAndPort _me;
+ bool _meSet;
+ OID _electionId;
+
+ // If _configSet is false this means we don't have a valid repl set config, so toBSON
+ // will return a set of hardcoded values that indicate this.
+ bool _configSet;
+ // If _shutdownInProgress is true toBSON will return a set of hardcoded values to indicate
+ // that we are mid shutdown
+ bool _shutdownInProgress;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/isself.cpp b/src/mongo/db/repl/isself.cpp
index 423d3025a1e..57e4720375f 100644
--- a/src/mongo/db/repl/isself.cpp
+++ b/src/mongo/db/repl/isself.cpp
@@ -47,7 +47,8 @@
#include "mongo/util/scopeguard.h"
#include "mongo/util/log.h"
-#if defined(__linux__) || defined(__APPLE__) || defined(__freebsd__) || defined(__sunos__) || defined(__openbsd__)
+#if defined(__linux__) || defined(__APPLE__) || defined(__freebsd__) || defined(__sunos__) || \
+ defined(__openbsd__)
#define FASTPATH_UNIX 1
#endif
@@ -76,281 +77,269 @@
namespace mongo {
namespace repl {
- OID instanceId;
+OID instanceId;
- MONGO_INITIALIZER(GenerateInstanceId)(InitializerContext*) {
- instanceId = OID::gen();
- return Status::OK();
- }
+MONGO_INITIALIZER(GenerateInstanceId)(InitializerContext*) {
+ instanceId = OID::gen();
+ return Status::OK();
+}
namespace {
- /**
- * Helper to convert a message from a networking function to a string.
- * Needed because errnoWithDescription uses strerror on linux, when
- * we need gai_strerror.
- */
- std::string stringifyError(int code) {
+/**
+ * Helper to convert a message from a networking function to a string.
+ * Needed because errnoWithDescription uses strerror on linux, when
+ * we need gai_strerror.
+ */
+std::string stringifyError(int code) {
#if FASTPATH_UNIX
- return gai_strerror(code);
+ return gai_strerror(code);
#elif defined(_WIN32)
- // FormatMessage in errnoWithDescription works here on windows
- return errnoWithDescription(code);
+ // FormatMessage in errnoWithDescription works here on windows
+ return errnoWithDescription(code);
#endif
- }
-
- /**
- * Resolves a host and port to a list of IP addresses. This requires a syscall. If the
- * ipv6enabled parameter is true, both IPv6 and IPv4 addresses will be returned.
- */
- std::vector<std::string> getAddrsForHost(const std::string& iporhost,
- const int port,
- const bool ipv6enabled) {
- addrinfo* addrs = NULL;
- addrinfo hints = {0};
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_family = (ipv6enabled ? AF_UNSPEC : AF_INET);
-
- const std::string portNum = BSONObjBuilder::numStr(port);
-
- std::vector<std::string> out;
-
- int err = getaddrinfo(iporhost.c_str(), portNum.c_str(), &hints, &addrs);
+}
- if (err) {
- warning() << "getaddrinfo(\"" << iporhost << "\") failed: "
- << stringifyError(err) << std::endl;
- return out;
- }
+/**
+ * Resolves a host and port to a list of IP addresses. This requires a syscall. If the
+ * ipv6enabled parameter is true, both IPv6 and IPv4 addresses will be returned.
+ */
+std::vector<std::string> getAddrsForHost(const std::string& iporhost,
+ const int port,
+ const bool ipv6enabled) {
+ addrinfo* addrs = NULL;
+ addrinfo hints = {0};
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_family = (ipv6enabled ? AF_UNSPEC : AF_INET);
+
+ const std::string portNum = BSONObjBuilder::numStr(port);
+
+ std::vector<std::string> out;
+
+ int err = getaddrinfo(iporhost.c_str(), portNum.c_str(), &hints, &addrs);
+
+ if (err) {
+ warning() << "getaddrinfo(\"" << iporhost << "\") failed: " << stringifyError(err)
+ << std::endl;
+ return out;
+ }
- ON_BLOCK_EXIT(freeaddrinfo, addrs);
+ ON_BLOCK_EXIT(freeaddrinfo, addrs);
- for (addrinfo* addr = addrs; addr != NULL; addr = addr->ai_next) {
- int family = addr->ai_family;
- char host[NI_MAXHOST];
+ for (addrinfo* addr = addrs; addr != NULL; addr = addr->ai_next) {
+ int family = addr->ai_family;
+ char host[NI_MAXHOST];
- if (family == AF_INET || family == AF_INET6) {
- err = getnameinfo(addr->ai_addr, addr->ai_addrlen, host,
- NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
- if (err) {
- warning() << "getnameinfo() failed: " << stringifyError(err) << std::endl;
- continue;
- }
- out.push_back(host);
+ if (family == AF_INET || family == AF_INET6) {
+ err = getnameinfo(
+ addr->ai_addr, addr->ai_addrlen, host, NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+ if (err) {
+ warning() << "getnameinfo() failed: " << stringifyError(err) << std::endl;
+ continue;
}
-
+ out.push_back(host);
}
+ }
- if (shouldLog(logger::LogSeverity::Debug(2))) {
- StringBuilder builder;
- builder << "getAddrsForHost(\"" << iporhost << ":" << port << "\"):";
- for (std::vector<std::string>::const_iterator o = out.begin(); o != out.end(); ++o) {
- builder << " [ " << *o << "]";
- }
- LOG(2) << builder.str();
+ if (shouldLog(logger::LogSeverity::Debug(2))) {
+ StringBuilder builder;
+ builder << "getAddrsForHost(\"" << iporhost << ":" << port << "\"):";
+ for (std::vector<std::string>::const_iterator o = out.begin(); o != out.end(); ++o) {
+ builder << " [ " << *o << "]";
}
-
- return out;
+ LOG(2) << builder.str();
}
-} // namespace
+ return out;
+}
- bool isSelf(const HostAndPort& hostAndPort) {
+} // namespace
- // Fastpath: check if the host&port in question is bound to one
- // of the interfaces on this machine.
- // No need for ip match if the ports do not match
- if (hostAndPort.port() == serverGlobalParams.port) {
- std::vector<std::string> myAddrs = serverGlobalParams.bind_ip.empty() ?
- getBoundAddrs(IPv6Enabled()) :
- std::vector<std::string>();
+bool isSelf(const HostAndPort& hostAndPort) {
+ // Fastpath: check if the host&port in question is bound to one
+ // of the interfaces on this machine.
+ // No need for ip match if the ports do not match
+ if (hostAndPort.port() == serverGlobalParams.port) {
+ std::vector<std::string> myAddrs = serverGlobalParams.bind_ip.empty()
+ ? getBoundAddrs(IPv6Enabled())
+ : std::vector<std::string>();
+
+ if (!serverGlobalParams.bind_ip.empty()) {
+ boost::split(myAddrs, serverGlobalParams.bind_ip, boost::is_any_of(", "));
+ }
- if (!serverGlobalParams.bind_ip.empty()) {
- boost::split(myAddrs, serverGlobalParams.bind_ip, boost::is_any_of(", "));
- }
+ const std::vector<std::string> hostAddrs =
+ getAddrsForHost(hostAndPort.host(), hostAndPort.port(), IPv6Enabled());
- const std::vector<std::string> hostAddrs = getAddrsForHost(hostAndPort.host(),
- hostAndPort.port(),
- IPv6Enabled());
-
- for (std::vector<std::string>::const_iterator i = myAddrs.begin();
- i != myAddrs.end(); ++i) {
- for (std::vector<std::string>::const_iterator j = hostAddrs.begin();
- j != hostAddrs.end(); ++j) {
- if (*i == *j) {
- return true;
- }
+ for (std::vector<std::string>::const_iterator i = myAddrs.begin(); i != myAddrs.end();
+ ++i) {
+ for (std::vector<std::string>::const_iterator j = hostAddrs.begin();
+ j != hostAddrs.end();
+ ++j) {
+ if (*i == *j) {
+ return true;
}
}
}
+ }
+
+ // Ensure that the server is up and ready to accept incoming network requests.
+ const Listener* listener = Listener::getTimeTracker();
+ if (!listener) {
+ return false;
+ }
+ listener->waitUntilListening();
- // Ensure that the server is up and ready to accept incoming network requests.
- const Listener* listener = Listener::getTimeTracker();
- if (!listener) {
+ try {
+ DBClientConnection conn;
+ std::string errmsg;
+ conn.setSoTimeout(30); // 30 second timeout
+ if (!conn.connect(hostAndPort, errmsg)) {
return false;
}
- listener->waitUntilListening();
- try {
- DBClientConnection conn;
- std::string errmsg;
- conn.setSoTimeout(30); // 30 second timeout
- if (!conn.connect(hostAndPort, errmsg)) {
+ if (getGlobalAuthorizationManager()->isAuthEnabled() && isInternalAuthSet()) {
+ if (!authenticateInternalUser(&conn)) {
return false;
}
-
- if (getGlobalAuthorizationManager()->isAuthEnabled() && isInternalAuthSet()) {
- if (!authenticateInternalUser(&conn)) {
- return false;
- }
- }
- BSONObj out;
- bool ok = conn.simpleCommand("admin" , &out, "_isSelf");
- bool me = ok && out["id"].type() == jstOID && instanceId == out["id"].OID();
-
- return me;
- }
- catch (const std::exception& e) {
- warning() << "could't check isSelf (" << hostAndPort << ") " << e.what() << std::endl;
}
+ BSONObj out;
+ bool ok = conn.simpleCommand("admin", &out, "_isSelf");
+ bool me = ok && out["id"].type() == jstOID && instanceId == out["id"].OID();
- return false;
+ return me;
+ } catch (const std::exception& e) {
+ warning() << "could't check isSelf (" << hostAndPort << ") " << e.what() << std::endl;
}
- /**
- * Returns all the IP addresses bound to the network interfaces of this machine.
- * This requires a syscall. If the ipv6enabled parameter is true, both IPv6 AND IPv4
- * addresses will be returned.
- */
- std::vector<std::string> getBoundAddrs(const bool ipv6enabled) {
- std::vector<std::string> out;
+ return false;
+}
+
+/**
+ * Returns all the IP addresses bound to the network interfaces of this machine.
+ * This requires a syscall. If the ipv6enabled parameter is true, both IPv6 AND IPv4
+ * addresses will be returned.
+ */
+std::vector<std::string> getBoundAddrs(const bool ipv6enabled) {
+ std::vector<std::string> out;
#ifdef FASTPATH_UNIX
- ifaddrs* addrs;
+ ifaddrs* addrs;
- int err = getifaddrs(&addrs);
- if (err) {
- warning() << "getifaddrs failure: " << errnoWithDescription(err) << std::endl;
- return out;
- }
- ON_BLOCK_EXIT(freeifaddrs, addrs);
-
- // based on example code from linux getifaddrs manpage
- for (ifaddrs* addr = addrs; addr != NULL; addr = addr->ifa_next) {
- if (addr->ifa_addr == NULL) continue;
- int family = addr->ifa_addr->sa_family;
- char host[NI_MAXHOST];
-
- if (family == AF_INET || (ipv6enabled && (family == AF_INET6))) {
- err = getnameinfo(addr->ifa_addr,
- (family == AF_INET ? sizeof(struct sockaddr_in)
- : sizeof(struct sockaddr_in6)),
- host, NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
- if (err) {
- warning() << "getnameinfo() failed: " << gai_strerror(err) << std::endl;
- continue;
- }
- out.push_back(host);
+ int err = getifaddrs(&addrs);
+ if (err) {
+ warning() << "getifaddrs failure: " << errnoWithDescription(err) << std::endl;
+ return out;
+ }
+ ON_BLOCK_EXIT(freeifaddrs, addrs);
+
+ // based on example code from linux getifaddrs manpage
+ for (ifaddrs* addr = addrs; addr != NULL; addr = addr->ifa_next) {
+ if (addr->ifa_addr == NULL)
+ continue;
+ int family = addr->ifa_addr->sa_family;
+ char host[NI_MAXHOST];
+
+ if (family == AF_INET || (ipv6enabled && (family == AF_INET6))) {
+ err = getnameinfo(
+ addr->ifa_addr,
+ (family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)),
+ host,
+ NI_MAXHOST,
+ NULL,
+ 0,
+ NI_NUMERICHOST);
+ if (err) {
+ warning() << "getnameinfo() failed: " << gai_strerror(err) << std::endl;
+ continue;
}
+ out.push_back(host);
}
+ }
#elif defined(_WIN32)
- // Start with the MS recommended 15KB buffer. Use multiple attempts
- // for the rare case that the adapter config changes between calls
+ // Start with the MS recommended 15KB buffer. Use multiple attempts
+ // for the rare case that the adapter config changes between calls
- ULONG adaptersLen = 15 * 1024;
- boost::scoped_array<char> buf(new char[adaptersLen]);
- IP_ADAPTER_ADDRESSES* adapters = reinterpret_cast<IP_ADAPTER_ADDRESSES*>(buf.get());
- DWORD err;
+ ULONG adaptersLen = 15 * 1024;
+ boost::scoped_array<char> buf(new char[adaptersLen]);
+ IP_ADAPTER_ADDRESSES* adapters = reinterpret_cast<IP_ADAPTER_ADDRESSES*>(buf.get());
+ DWORD err;
- ULONG family = ipv6enabled ? AF_UNSPEC : AF_INET;
+ ULONG family = ipv6enabled ? AF_UNSPEC : AF_INET;
- for (int tries = 0; tries < 3; ++tries) {
- err = GetAdaptersAddresses(family,
- GAA_FLAG_SKIP_ANYCAST | // only want unicast addrs
+ for (int tries = 0; tries < 3; ++tries) {
+ err = GetAdaptersAddresses(family,
+ GAA_FLAG_SKIP_ANYCAST | // only want unicast addrs
GAA_FLAG_SKIP_MULTICAST |
GAA_FLAG_SKIP_DNS_SERVER,
- NULL,
- adapters,
- &adaptersLen);
-
- if (err == ERROR_BUFFER_OVERFLOW) {
- // in this case, adaptersLen will be set to the size we need to allocate
- buf.reset(new char[adaptersLen]);
- adapters = reinterpret_cast<IP_ADAPTER_ADDRESSES*>(buf.get());
- }
- else {
- break; // only retry for incorrectly sized buffer
- }
+ NULL,
+ adapters,
+ &adaptersLen);
+
+ if (err == ERROR_BUFFER_OVERFLOW) {
+ // in this case, adaptersLen will be set to the size we need to allocate
+ buf.reset(new char[adaptersLen]);
+ adapters = reinterpret_cast<IP_ADAPTER_ADDRESSES*>(buf.get());
+ } else {
+ break; // only retry for incorrectly sized buffer
}
+ }
- if (err != NO_ERROR) {
- warning() << "GetAdaptersAddresses() failed: " << errnoWithDescription(err)
- << std::endl;
- return out;
- }
+ if (err != NO_ERROR) {
+ warning() << "GetAdaptersAddresses() failed: " << errnoWithDescription(err) << std::endl;
+ return out;
+ }
- for (IP_ADAPTER_ADDRESSES* adapter = adapters;
- adapter != NULL; adapter = adapter->Next) {
- for (IP_ADAPTER_UNICAST_ADDRESS* addr = adapter->FirstUnicastAddress;
- addr != NULL; addr = addr->Next) {
-
- short family =
- reinterpret_cast<SOCKADDR_STORAGE*>(addr->Address.lpSockaddr)->ss_family;
-
- if (family == AF_INET) {
- // IPv4
- SOCKADDR_IN* sock = reinterpret_cast<SOCKADDR_IN*>(addr->Address.lpSockaddr);
- char addrstr[INET_ADDRSTRLEN] = {0};
- boost::system::error_code ec;
- // Not all windows versions have inet_ntop
- boost::asio::detail::socket_ops::inet_ntop(AF_INET,
- &(sock->sin_addr),
- addrstr,
- INET_ADDRSTRLEN,
- 0,
- ec);
- if (ec) {
- warning() << "inet_ntop failed during IPv4 address conversion: "
- << ec.message() << std::endl;
- continue;
- }
- out.push_back(addrstr);
+ for (IP_ADAPTER_ADDRESSES* adapter = adapters; adapter != NULL; adapter = adapter->Next) {
+ for (IP_ADAPTER_UNICAST_ADDRESS* addr = adapter->FirstUnicastAddress; addr != NULL;
+ addr = addr->Next) {
+ short family = reinterpret_cast<SOCKADDR_STORAGE*>(addr->Address.lpSockaddr)->ss_family;
+
+ if (family == AF_INET) {
+ // IPv4
+ SOCKADDR_IN* sock = reinterpret_cast<SOCKADDR_IN*>(addr->Address.lpSockaddr);
+ char addrstr[INET_ADDRSTRLEN] = {0};
+ boost::system::error_code ec;
+ // Not all windows versions have inet_ntop
+ boost::asio::detail::socket_ops::inet_ntop(
+ AF_INET, &(sock->sin_addr), addrstr, INET_ADDRSTRLEN, 0, ec);
+ if (ec) {
+ warning() << "inet_ntop failed during IPv4 address conversion: " << ec.message()
+ << std::endl;
+ continue;
}
- else if (family == AF_INET6) {
- // IPv6
- SOCKADDR_IN6* sock = reinterpret_cast<SOCKADDR_IN6*>(addr->Address.lpSockaddr);
- char addrstr[INET6_ADDRSTRLEN] = {0};
- boost::system::error_code ec;
- boost::asio::detail::socket_ops::inet_ntop(AF_INET6,
- &(sock->sin6_addr),
- addrstr,
- INET6_ADDRSTRLEN,
- 0,
- ec);
- if (ec) {
- warning() << "inet_ntop failed during IPv6 address conversion: "
- << ec.message() << std::endl;
- continue;
- }
- out.push_back(addrstr);
+ out.push_back(addrstr);
+ } else if (family == AF_INET6) {
+ // IPv6
+ SOCKADDR_IN6* sock = reinterpret_cast<SOCKADDR_IN6*>(addr->Address.lpSockaddr);
+ char addrstr[INET6_ADDRSTRLEN] = {0};
+ boost::system::error_code ec;
+ boost::asio::detail::socket_ops::inet_ntop(
+ AF_INET6, &(sock->sin6_addr), addrstr, INET6_ADDRSTRLEN, 0, ec);
+ if (ec) {
+ warning() << "inet_ntop failed during IPv6 address conversion: " << ec.message()
+ << std::endl;
+ continue;
}
+ out.push_back(addrstr);
}
}
+ }
#endif // defined(_WIN32)
- if (shouldLog(logger::LogSeverity::Debug(2))) {
- StringBuilder builder;
- builder << "getBoundAddrs():";
- for (std::vector<std::string>::const_iterator o = out.begin(); o != out.end(); ++o) {
- builder << " [ " << *o << "]";
- }
- LOG(2) << builder.str();
+ if (shouldLog(logger::LogSeverity::Debug(2))) {
+ StringBuilder builder;
+ builder << "getBoundAddrs():";
+ for (std::vector<std::string>::const_iterator o = out.begin(); o != out.end(); ++o) {
+ builder << " [ " << *o << "]";
}
- return out;
+ LOG(2) << builder.str();
}
+ return out;
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/isself.h b/src/mongo/db/repl/isself.h
index cbcbbd9f031..19b61bf47bd 100644
--- a/src/mongo/db/repl/isself.h
+++ b/src/mongo/db/repl/isself.h
@@ -34,30 +34,30 @@
#include "mongo/bson/oid.h"
namespace mongo {
- struct HostAndPort;
+struct HostAndPort;
namespace repl {
- /**
- * An identifier unique to this instance. Used by isSelf to see if we are talking
- * to ourself or someone else.
- */
- extern OID instanceId;
-
- /**
- * Returns true if "hostAndPort" identifies this instance.
- */
- bool isSelf(const HostAndPort& hostAndPort);
-
- /**
- * Returns all the IP addresses bound to the network interfaces of this machine.
- * This requires a syscall. If the ipv6enabled parameter is true, both IPv6 AND IPv4
- * addresses will be returned.
- *
- * Note: this only works on Linux and Windows. All calls should be properly ifdef'd,
- * otherwise an invariant will be triggered.
- */
- std::vector<std::string> getBoundAddrs(const bool ipv6enabled);
-
-} // namespace repl
-} // namespace mongo
+/**
+ * An identifier unique to this instance. Used by isSelf to see if we are talking
+ * to ourself or someone else.
+ */
+extern OID instanceId;
+
+/**
+ * Returns true if "hostAndPort" identifies this instance.
+ */
+bool isSelf(const HostAndPort& hostAndPort);
+
+/**
+ * Returns all the IP addresses bound to the network interfaces of this machine.
+ * This requires a syscall. If the ipv6enabled parameter is true, both IPv6 AND IPv4
+ * addresses will be returned.
+ *
+ * Note: this only works on Linux and Windows. All calls should be properly ifdef'd,
+ * otherwise an invariant will be triggered.
+ */
+std::vector<std::string> getBoundAddrs(const bool ipv6enabled);
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/isself_test.cpp b/src/mongo/db/repl/isself_test.cpp
index b3af1721e62..b6a3e26f8e6 100644
--- a/src/mongo/db/repl/isself_test.cpp
+++ b/src/mongo/db/repl/isself_test.cpp
@@ -40,45 +40,41 @@ namespace repl {
namespace {
- using std::string;
+using std::string;
- TEST(IsSelf, DetectsSameHostIPv4) {
+TEST(IsSelf, DetectsSameHostIPv4) {
#if defined(_WIN32) || defined(__linux__) || defined(__APPLE__)
- bool wasEnabled = IPv6Enabled();
- enableIPv6(false);
- ON_BLOCK_EXIT(enableIPv6, wasEnabled);
- // first we get the addrs bound on this host
- const std::vector<std::string> addrs = getBoundAddrs(false);
- // Fastpath should agree with the result of getBoundAddrs
- // since it uses it...
- for (std::vector<string>::const_iterator it = addrs.begin();
- it != addrs.end(); ++it) {
-
- ASSERT(isSelf(HostAndPort(*it, serverGlobalParams.port)));
- }
+ bool wasEnabled = IPv6Enabled();
+ enableIPv6(false);
+ ON_BLOCK_EXIT(enableIPv6, wasEnabled);
+ // first we get the addrs bound on this host
+ const std::vector<std::string> addrs = getBoundAddrs(false);
+ // Fastpath should agree with the result of getBoundAddrs
+ // since it uses it...
+ for (std::vector<string>::const_iterator it = addrs.begin(); it != addrs.end(); ++it) {
+ ASSERT(isSelf(HostAndPort(*it, serverGlobalParams.port)));
+ }
#else
- ASSERT(true);
+ ASSERT(true);
#endif
- }
+}
- TEST(IsSelf, DetectsSameHostIPv6) {
+TEST(IsSelf, DetectsSameHostIPv6) {
#if defined(_WIN32) || defined(__linux__) || defined(__APPLE__)
- bool wasEnabled = IPv6Enabled();
- enableIPv6(true);
- ON_BLOCK_EXIT(enableIPv6, wasEnabled);
- // first we get the addrs bound on this host
- const std::vector<std::string> addrs = getBoundAddrs(true);
- // Fastpath should agree with the result of getBoundAddrs
- // since it uses it...
- for (std::vector<string>::const_iterator it = addrs.begin();
- it != addrs.end(); ++it) {
-
- ASSERT(isSelf(HostAndPort(*it, serverGlobalParams.port)));
- }
+ bool wasEnabled = IPv6Enabled();
+ enableIPv6(true);
+ ON_BLOCK_EXIT(enableIPv6, wasEnabled);
+ // first we get the addrs bound on this host
+ const std::vector<std::string> addrs = getBoundAddrs(true);
+ // Fastpath should agree with the result of getBoundAddrs
+ // since it uses it...
+ for (std::vector<string>::const_iterator it = addrs.begin(); it != addrs.end(); ++it) {
+ ASSERT(isSelf(HostAndPort(*it, serverGlobalParams.port)));
+ }
#else
- ASSERT(true);
+ ASSERT(true);
#endif
- }
+}
} // namespace
diff --git a/src/mongo/db/repl/master_slave.cpp b/src/mongo/db/repl/master_slave.cpp
index 7cc44b022f1..8844ae7828d 100644
--- a/src/mongo/db/repl/master_slave.cpp
+++ b/src/mongo/db/repl/master_slave.cpp
@@ -81,1319 +81,1292 @@ using std::vector;
namespace mongo {
namespace repl {
- void pretouchOperation(OperationContext* txn, const BSONObj& op);
- void pretouchN(vector<BSONObj>&, unsigned a, unsigned b);
+void pretouchOperation(OperationContext* txn, const BSONObj& op);
+void pretouchN(vector<BSONObj>&, unsigned a, unsigned b);
- /* if 1 sync() is running */
- volatile int syncing = 0;
- volatile int relinquishSyncingSome = 0;
+/* if 1 sync() is running */
+volatile int syncing = 0;
+volatile int relinquishSyncingSome = 0;
- static time_t lastForcedResync = 0;
+static time_t lastForcedResync = 0;
- /* output by the web console */
- const char *replInfo = "";
- struct ReplInfo {
- ReplInfo(const char *msg) {
- replInfo = msg;
- }
- ~ReplInfo() {
- replInfo = "?";
- }
- };
-
-
- ReplSource::ReplSource(OperationContext* txn) {
- nClonedThisPass = 0;
- ensureMe(txn);
+/* output by the web console */
+const char* replInfo = "";
+struct ReplInfo {
+ ReplInfo(const char* msg) {
+ replInfo = msg;
}
-
- ReplSource::ReplSource(OperationContext* txn, BSONObj o) : nClonedThisPass(0) {
- only = o.getStringField("only");
- hostName = o.getStringField("host");
- _sourceName = o.getStringField("source");
- uassert( 10118 , "'host' field not set in sources collection object", !hostName.empty() );
- uassert( 10119 , "only source='main' allowed for now with replication", sourceName() == "main" );
- BSONElement e = o.getField("syncedTo");
- if ( !e.eoo() ) {
- uassert( 10120 , "bad sources 'syncedTo' field value", e.type() == Date || e.type() == Timestamp );
- OpTime tmp( e.date() );
- syncedTo = tmp;
- }
-
- BSONObj dbsObj = o.getObjectField("dbsNextPass");
- if ( !dbsObj.isEmpty() ) {
- BSONObjIterator i(dbsObj);
- while ( 1 ) {
- BSONElement e = i.next();
- if ( e.eoo() )
- break;
- addDbNextPass.insert( e.fieldName() );
- }
- }
-
- dbsObj = o.getObjectField("incompleteCloneDbs");
- if ( !dbsObj.isEmpty() ) {
- BSONObjIterator i(dbsObj);
- while ( 1 ) {
- BSONElement e = i.next();
- if ( e.eoo() )
- break;
- incompleteCloneDbs.insert( e.fieldName() );
- }
- }
- ensureMe(txn);
+ ~ReplInfo() {
+ replInfo = "?";
+ }
+};
+
+
+ReplSource::ReplSource(OperationContext* txn) {
+ nClonedThisPass = 0;
+ ensureMe(txn);
+}
+
+ReplSource::ReplSource(OperationContext* txn, BSONObj o) : nClonedThisPass(0) {
+ only = o.getStringField("only");
+ hostName = o.getStringField("host");
+ _sourceName = o.getStringField("source");
+ uassert(10118, "'host' field not set in sources collection object", !hostName.empty());
+ uassert(10119, "only source='main' allowed for now with replication", sourceName() == "main");
+ BSONElement e = o.getField("syncedTo");
+ if (!e.eoo()) {
+ uassert(
+ 10120, "bad sources 'syncedTo' field value", e.type() == Date || e.type() == Timestamp);
+ OpTime tmp(e.date());
+ syncedTo = tmp;
}
- /* Turn our C++ Source object into a BSONObj */
- BSONObj ReplSource::jsobj() {
- BSONObjBuilder b;
- b.append("host", hostName);
- b.append("source", sourceName());
- if ( !only.empty() )
- b.append("only", only);
- if ( !syncedTo.isNull() )
- b.appendTimestamp("syncedTo", syncedTo.asDate());
-
- BSONObjBuilder dbsNextPassBuilder;
- int n = 0;
- for ( set<string>::iterator i = addDbNextPass.begin(); i != addDbNextPass.end(); i++ ) {
- n++;
- dbsNextPassBuilder.appendBool(*i, 1);
+ BSONObj dbsObj = o.getObjectField("dbsNextPass");
+ if (!dbsObj.isEmpty()) {
+ BSONObjIterator i(dbsObj);
+ while (1) {
+ BSONElement e = i.next();
+ if (e.eoo())
+ break;
+ addDbNextPass.insert(e.fieldName());
}
- if ( n )
- b.append("dbsNextPass", dbsNextPassBuilder.done());
+ }
- BSONObjBuilder incompleteCloneDbsBuilder;
- n = 0;
- for ( set<string>::iterator i = incompleteCloneDbs.begin(); i != incompleteCloneDbs.end(); i++ ) {
- n++;
- incompleteCloneDbsBuilder.appendBool(*i, 1);
+ dbsObj = o.getObjectField("incompleteCloneDbs");
+ if (!dbsObj.isEmpty()) {
+ BSONObjIterator i(dbsObj);
+ while (1) {
+ BSONElement e = i.next();
+ if (e.eoo())
+ break;
+ incompleteCloneDbs.insert(e.fieldName());
}
- if ( n )
- b.append("incompleteCloneDbs", incompleteCloneDbsBuilder.done());
-
- return b.obj();
}
+ ensureMe(txn);
+}
+
+/* Turn our C++ Source object into a BSONObj */
+BSONObj ReplSource::jsobj() {
+ BSONObjBuilder b;
+ b.append("host", hostName);
+ b.append("source", sourceName());
+ if (!only.empty())
+ b.append("only", only);
+ if (!syncedTo.isNull())
+ b.appendTimestamp("syncedTo", syncedTo.asDate());
+
+ BSONObjBuilder dbsNextPassBuilder;
+ int n = 0;
+ for (set<string>::iterator i = addDbNextPass.begin(); i != addDbNextPass.end(); i++) {
+ n++;
+ dbsNextPassBuilder.appendBool(*i, 1);
+ }
+ if (n)
+ b.append("dbsNextPass", dbsNextPassBuilder.done());
+
+ BSONObjBuilder incompleteCloneDbsBuilder;
+ n = 0;
+ for (set<string>::iterator i = incompleteCloneDbs.begin(); i != incompleteCloneDbs.end(); i++) {
+ n++;
+ incompleteCloneDbsBuilder.appendBool(*i, 1);
+ }
+ if (n)
+ b.append("incompleteCloneDbs", incompleteCloneDbsBuilder.done());
- void ReplSource::ensureMe(OperationContext* txn) {
- string myname = getHostName();
+ return b.obj();
+}
- // local.me is an identifier for a server for getLastError w:2+
- bool exists = Helpers::getSingleton(txn, "local.me", _me);
+void ReplSource::ensureMe(OperationContext* txn) {
+ string myname = getHostName();
- if (!exists || !_me.hasField("host") || _me["host"].String() != myname) {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock dblk(txn->lockState(), "local", MODE_X);
- WriteUnitOfWork wunit(txn);
- // clean out local.me
- Helpers::emptyCollection(txn, "local.me");
+ // local.me is an identifier for a server for getLastError w:2+
+ bool exists = Helpers::getSingleton(txn, "local.me", _me);
- // repopulate
- BSONObjBuilder b;
- b.appendOID("_id", 0, true);
- b.append("host", myname);
- _me = b.obj();
- Helpers::putSingleton(txn, "local.me", _me);
- wunit.commit();
- }
- _me = _me.getOwned();
- }
+ if (!exists || !_me.hasField("host") || _me["host"].String() != myname) {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock dblk(txn->lockState(), "local", MODE_X);
+ WriteUnitOfWork wunit(txn);
+ // clean out local.me
+ Helpers::emptyCollection(txn, "local.me");
- void ReplSource::save(OperationContext* txn) {
+ // repopulate
BSONObjBuilder b;
- verify( !hostName.empty() );
- b.append("host", hostName);
- // todo: finish allowing multiple source configs.
- // this line doesn't work right when source is null, if that is allowed as it is now:
- //b.append("source", _sourceName);
- BSONObj pattern = b.done();
+ b.appendOID("_id", 0, true);
+ b.append("host", myname);
+ _me = b.obj();
+ Helpers::putSingleton(txn, "local.me", _me);
+ wunit.commit();
+ }
+ _me = _me.getOwned();
+}
- BSONObj o = jsobj();
- LOG( 1 ) << "Saving repl source: " << o << endl;
+void ReplSource::save(OperationContext* txn) {
+ BSONObjBuilder b;
+ verify(!hostName.empty());
+ b.append("host", hostName);
+ // todo: finish allowing multiple source configs.
+ // this line doesn't work right when source is null, if that is allowed as it is now:
+ // b.append("source", _sourceName);
+ BSONObj pattern = b.done();
- {
- OpDebug debug;
+ BSONObj o = jsobj();
+ LOG(1) << "Saving repl source: " << o << endl;
- Client::Context ctx(txn, "local.sources");
+ {
+ OpDebug debug;
- const NamespaceString requestNs("local.sources");
- UpdateRequest request(requestNs);
+ Client::Context ctx(txn, "local.sources");
- request.setQuery(pattern);
- request.setUpdates(o);
- request.setUpsert();
+ const NamespaceString requestNs("local.sources");
+ UpdateRequest request(requestNs);
- UpdateResult res = update(txn, ctx.db(), request, &debug);
+ request.setQuery(pattern);
+ request.setUpdates(o);
+ request.setUpsert();
- verify( ! res.modifiers );
- verify( res.numMatched == 1 );
- }
- }
+ UpdateResult res = update(txn, ctx.db(), request, &debug);
- static void addSourceToList(OperationContext* txn,
- ReplSource::SourceVector &v,
- ReplSource& s,
- ReplSource::SourceVector &old) {
- if ( !s.syncedTo.isNull() ) { // Don't reuse old ReplSource if there was a forced resync.
- for ( ReplSource::SourceVector::iterator i = old.begin(); i != old.end(); ) {
- if ( s == **i ) {
- v.push_back(*i);
- old.erase(i);
- return;
- }
- i++;
+ verify(!res.modifiers);
+ verify(res.numMatched == 1);
+ }
+}
+
+static void addSourceToList(OperationContext* txn,
+ ReplSource::SourceVector& v,
+ ReplSource& s,
+ ReplSource::SourceVector& old) {
+ if (!s.syncedTo.isNull()) { // Don't reuse old ReplSource if there was a forced resync.
+ for (ReplSource::SourceVector::iterator i = old.begin(); i != old.end();) {
+ if (s == **i) {
+ v.push_back(*i);
+ old.erase(i);
+ return;
}
+ i++;
}
-
- v.push_back( boost::shared_ptr< ReplSource >( new ReplSource( s ) ) );
}
- /* we reuse our existing objects so that we can keep our existing connection
- and cursor in effect.
- */
- void ReplSource::loadAll(OperationContext* txn, SourceVector &v) {
- const char* localSources = "local.sources";
- Client::Context ctx(txn, localSources);
- SourceVector old = v;
- v.clear();
-
- const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
- if (!replSettings.source.empty()) {
- // --source <host> specified.
- // check that no items are in sources other than that
- // add if missing
- int n = 0;
- auto_ptr<PlanExecutor> exec(
- InternalPlanner::collectionScan(txn,
- localSources,
- ctx.db()->getCollection(localSources)));
- BSONObj obj;
- PlanExecutor::ExecState state;
- while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
- n++;
- ReplSource tmp(txn, obj);
- if (tmp.hostName != replSettings.source) {
- log() << "repl: --source " << replSettings.source << " != " << tmp.hostName
- << " from local.sources collection" << endl;
- log() << "repl: for instructions on changing this slave's source, see:" << endl;
- log() << "http://dochub.mongodb.org/core/masterslave" << endl;
- log() << "repl: terminating mongod after 30 seconds" << endl;
- sleepsecs(30);
- dbexit( EXIT_REPLICATION_ERROR );
- }
- if (tmp.only != replSettings.only) {
- log() << "--only " << replSettings.only << " != " << tmp.only
- << " from local.sources collection" << endl;
- log() << "terminating after 30 seconds" << endl;
- sleepsecs(30);
- dbexit( EXIT_REPLICATION_ERROR );
- }
- }
- uassert(17065, "Internal error reading from local.sources", PlanExecutor::IS_EOF == state);
- uassert( 10002 , "local.sources collection corrupt?", n<2 );
- if ( n == 0 ) {
- // source missing. add.
- ReplSource s(txn);
- s.hostName = replSettings.source;
- s.only = replSettings.only;
- s.save(txn);
- }
- }
- else {
- try {
- massert(10384 , "--only requires use of --source", replSettings.only.empty());
- }
- catch ( ... ) {
- dbexit( EXIT_BADOPTIONS );
- }
- }
+ v.push_back(boost::shared_ptr<ReplSource>(new ReplSource(s)));
+}
- auto_ptr<PlanExecutor> exec(
- InternalPlanner::collectionScan(txn,
- localSources,
- ctx.db()->getCollection(localSources)));
+/* we reuse our existing objects so that we can keep our existing connection
+ and cursor in effect.
+*/
+void ReplSource::loadAll(OperationContext* txn, SourceVector& v) {
+ const char* localSources = "local.sources";
+ Client::Context ctx(txn, localSources);
+ SourceVector old = v;
+ v.clear();
+
+ const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
+ if (!replSettings.source.empty()) {
+ // --source <host> specified.
+ // check that no items are in sources other than that
+ // add if missing
+ int n = 0;
+ auto_ptr<PlanExecutor> exec(InternalPlanner::collectionScan(
+ txn, localSources, ctx.db()->getCollection(localSources)));
BSONObj obj;
PlanExecutor::ExecState state;
while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
+ n++;
ReplSource tmp(txn, obj);
- if ( tmp.syncedTo.isNull() ) {
- DBDirectClient c(txn);
- BSONObj op = c.findOne( "local.oplog.$main", QUERY( "op" << NE << "n" ).sort( BSON( "$natural" << -1 ) ) );
- if ( !op.isEmpty() ) {
- tmp.syncedTo = op[ "ts" ].date();
- }
+ if (tmp.hostName != replSettings.source) {
+ log() << "repl: --source " << replSettings.source << " != " << tmp.hostName
+ << " from local.sources collection" << endl;
+ log() << "repl: for instructions on changing this slave's source, see:" << endl;
+ log() << "http://dochub.mongodb.org/core/masterslave" << endl;
+ log() << "repl: terminating mongod after 30 seconds" << endl;
+ sleepsecs(30);
+ dbexit(EXIT_REPLICATION_ERROR);
+ }
+ if (tmp.only != replSettings.only) {
+ log() << "--only " << replSettings.only << " != " << tmp.only
+ << " from local.sources collection" << endl;
+ log() << "terminating after 30 seconds" << endl;
+ sleepsecs(30);
+ dbexit(EXIT_REPLICATION_ERROR);
}
- addSourceToList(txn, v, tmp, old);
}
- uassert(17066, "Internal error reading from local.sources", PlanExecutor::IS_EOF == state);
- }
-
- bool ReplSource::throttledForceResyncDead( OperationContext* txn, const char *requester ) {
- if ( time( 0 ) - lastForcedResync > 600 ) {
- forceResyncDead( txn, requester );
- lastForcedResync = time( 0 );
- return true;
+ uassert(17065, "Internal error reading from local.sources", PlanExecutor::IS_EOF == state);
+ uassert(10002, "local.sources collection corrupt?", n < 2);
+ if (n == 0) {
+ // source missing. add.
+ ReplSource s(txn);
+ s.hostName = replSettings.source;
+ s.only = replSettings.only;
+ s.save(txn);
+ }
+ } else {
+ try {
+ massert(10384, "--only requires use of --source", replSettings.only.empty());
+ } catch (...) {
+ dbexit(EXIT_BADOPTIONS);
}
- return false;
}
- void ReplSource::forceResyncDead( OperationContext* txn, const char *requester ) {
- if ( !replAllDead )
- return;
- SourceVector sources;
- ReplSource::loadAll(txn, sources);
- for( SourceVector::iterator i = sources.begin(); i != sources.end(); ++i ) {
- log() << requester << " forcing resync from " << (*i)->hostName << endl;
- (*i)->forceResync( txn, requester );
+ auto_ptr<PlanExecutor> exec(
+ InternalPlanner::collectionScan(txn, localSources, ctx.db()->getCollection(localSources)));
+ BSONObj obj;
+ PlanExecutor::ExecState state;
+ while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
+ ReplSource tmp(txn, obj);
+ if (tmp.syncedTo.isNull()) {
+ DBDirectClient c(txn);
+ BSONObj op = c.findOne("local.oplog.$main",
+ QUERY("op" << NE << "n").sort(BSON("$natural" << -1)));
+ if (!op.isEmpty()) {
+ tmp.syncedTo = op["ts"].date();
+ }
}
- replAllDead = 0;
+ addSourceToList(txn, v, tmp, old);
}
+ uassert(17066, "Internal error reading from local.sources", PlanExecutor::IS_EOF == state);
+}
- bool replHandshake(DBClientConnection *conn, const OID& myRID) {
- string myname = getHostName();
-
- BSONObjBuilder cmd;
- cmd.append("handshake", myRID);
-
- BSONObj res;
- bool ok = conn->runCommand( "admin" , cmd.obj() , res );
- // ignoring for now on purpose for older versions
- LOG( ok ? 1 : 0 ) << "replHandshake res not: " << ok << " res: " << res << endl;
+bool ReplSource::throttledForceResyncDead(OperationContext* txn, const char* requester) {
+ if (time(0) - lastForcedResync > 600) {
+ forceResyncDead(txn, requester);
+ lastForcedResync = time(0);
return true;
}
+ return false;
+}
- bool ReplSource::_connect(OplogReader* reader, const HostAndPort& host, const OID& myRID) {
- if (reader->conn()) {
- return true;
- }
+void ReplSource::forceResyncDead(OperationContext* txn, const char* requester) {
+ if (!replAllDead)
+ return;
+ SourceVector sources;
+ ReplSource::loadAll(txn, sources);
+ for (SourceVector::iterator i = sources.begin(); i != sources.end(); ++i) {
+ log() << requester << " forcing resync from " << (*i)->hostName << endl;
+ (*i)->forceResync(txn, requester);
+ }
+ replAllDead = 0;
+}
- if (!reader->connect(host)) {
- return false;
- }
+bool replHandshake(DBClientConnection* conn, const OID& myRID) {
+ string myname = getHostName();
- if (!replHandshake(reader->conn(), myRID)) {
- return false;
- }
+ BSONObjBuilder cmd;
+ cmd.append("handshake", myRID);
+
+ BSONObj res;
+ bool ok = conn->runCommand("admin", cmd.obj(), res);
+ // ignoring for now on purpose for older versions
+ LOG(ok ? 1 : 0) << "replHandshake res not: " << ok << " res: " << res << endl;
+ return true;
+}
+bool ReplSource::_connect(OplogReader* reader, const HostAndPort& host, const OID& myRID) {
+ if (reader->conn()) {
return true;
}
+ if (!reader->connect(host)) {
+ return false;
+ }
- void ReplSource::forceResync( OperationContext* txn, const char *requester ) {
- BSONObj info;
- {
- // This is always a GlobalWrite lock (so no ns/db used from the context)
- invariant(txn->lockState()->isW());
- Lock::TempRelease tempRelease(txn->lockState());
+ if (!replHandshake(reader->conn(), myRID)) {
+ return false;
+ }
- if (!_connect(&oplogReader, HostAndPort(hostName),
- getGlobalReplicationCoordinator()->getMyRID())) {
- msgassertedNoTrace( 14051 , "unable to connect to resync");
- }
- /* todo use getDatabaseNames() method here */
- bool ok = oplogReader.conn()->runCommand("admin",
- BSON("listDatabases" << 1),
- info,
- QueryOption_SlaveOk);
- massert( 10385 , "Unable to get database list", ok );
+ return true;
+}
+
+
+void ReplSource::forceResync(OperationContext* txn, const char* requester) {
+ BSONObj info;
+ {
+ // This is always a GlobalWrite lock (so no ns/db used from the context)
+ invariant(txn->lockState()->isW());
+ Lock::TempRelease tempRelease(txn->lockState());
+
+ if (!_connect(&oplogReader,
+ HostAndPort(hostName),
+ getGlobalReplicationCoordinator()->getMyRID())) {
+ msgassertedNoTrace(14051, "unable to connect to resync");
}
+ /* todo use getDatabaseNames() method here */
+ bool ok = oplogReader.conn()->runCommand(
+ "admin", BSON("listDatabases" << 1), info, QueryOption_SlaveOk);
+ massert(10385, "Unable to get database list", ok);
+ }
- BSONObjIterator i( info.getField( "databases" ).embeddedObject() );
- while( i.moreWithEOO() ) {
- BSONElement e = i.next();
- if ( e.eoo() )
- break;
- string name = e.embeddedObject().getField( "name" ).valuestr();
- if ( !e.embeddedObject().getBoolField( "empty" ) ) {
- if ( name != "local" ) {
- if ( only.empty() || only == name ) {
- resyncDrop( txn, name );
- }
+ BSONObjIterator i(info.getField("databases").embeddedObject());
+ while (i.moreWithEOO()) {
+ BSONElement e = i.next();
+ if (e.eoo())
+ break;
+ string name = e.embeddedObject().getField("name").valuestr();
+ if (!e.embeddedObject().getBoolField("empty")) {
+ if (name != "local") {
+ if (only.empty() || only == name) {
+ resyncDrop(txn, name);
}
}
}
- syncedTo = OpTime();
- addDbNextPass.clear();
- save(txn);
}
-
- void ReplSource::resyncDrop( OperationContext* txn, const string& db ) {
- log() << "resync: dropping database " << db;
- Client::Context ctx(txn, db);
- dropDatabase(txn, ctx.db());
+ syncedTo = OpTime();
+ addDbNextPass.clear();
+ save(txn);
+}
+
+void ReplSource::resyncDrop(OperationContext* txn, const string& db) {
+ log() << "resync: dropping database " << db;
+ Client::Context ctx(txn, db);
+ dropDatabase(txn, ctx.db());
+}
+
+/* grab initial copy of a database from the master */
+void ReplSource::resync(OperationContext* txn, const std::string& dbName) {
+ const std::string db(dbName); // need local copy of the name, we're dropping the original
+ resyncDrop(txn, db);
+
+ {
+ log() << "resync: cloning database " << db << " to get an initial copy" << endl;
+ ReplInfo r("resync: cloning a database");
+ string errmsg;
+ int errCode = 0;
+ CloneOptions cloneOptions;
+ cloneOptions.fromDB = db;
+ cloneOptions.logForRepl = false;
+ cloneOptions.slaveOk = true;
+ cloneOptions.useReplAuth = true;
+ cloneOptions.snapshot = true;
+ cloneOptions.mayYield = true;
+ cloneOptions.mayBeInterrupted = false;
+
+ Cloner cloner;
+ bool ok = cloner.go(txn, db, hostName.c_str(), cloneOptions, NULL, errmsg, &errCode);
+
+ if (!ok) {
+ if (errCode == DatabaseDifferCaseCode) {
+ resyncDrop(txn, db);
+ log() << "resync: database " << db
+ << " not valid on the master due to a name conflict, dropping." << endl;
+ return;
+ } else {
+ log() << "resync of " << db << " from " << hostName << " failed " << errmsg << endl;
+ throw SyncException();
+ }
+ }
}
- /* grab initial copy of a database from the master */
- void ReplSource::resync(OperationContext* txn, const std::string& dbName) {
- const std::string db(dbName); // need local copy of the name, we're dropping the original
- resyncDrop( txn, db );
+ log() << "resync: done with initial clone for db: " << db << endl;
- {
- log() << "resync: cloning database " << db << " to get an initial copy" << endl;
- ReplInfo r("resync: cloning a database");
- string errmsg;
- int errCode = 0;
- CloneOptions cloneOptions;
- cloneOptions.fromDB = db;
- cloneOptions.logForRepl = false;
- cloneOptions.slaveOk = true;
- cloneOptions.useReplAuth = true;
- cloneOptions.snapshot = true;
- cloneOptions.mayYield = true;
- cloneOptions.mayBeInterrupted = false;
-
- Cloner cloner;
- bool ok = cloner.go(txn,
- db,
- hostName.c_str(),
- cloneOptions,
- NULL,
- errmsg,
- &errCode);
-
- if ( !ok ) {
- if ( errCode == DatabaseDifferCaseCode ) {
- resyncDrop( txn, db );
- log() << "resync: database " << db << " not valid on the master due to a name conflict, dropping." << endl;
- return;
- }
- else {
- log() << "resync of " << db << " from " << hostName << " failed " << errmsg << endl;
- throw SyncException();
- }
- }
- }
+ return;
+}
- log() << "resync: done with initial clone for db: " << db << endl;
+static DatabaseIgnorer ___databaseIgnorer;
- return;
- }
-
- static DatabaseIgnorer ___databaseIgnorer;
-
- void DatabaseIgnorer::doIgnoreUntilAfter( const string &db, const OpTime &futureOplogTime ) {
- if ( futureOplogTime > _ignores[ db ] ) {
- _ignores[ db ] = futureOplogTime;
- }
+void DatabaseIgnorer::doIgnoreUntilAfter(const string& db, const OpTime& futureOplogTime) {
+ if (futureOplogTime > _ignores[db]) {
+ _ignores[db] = futureOplogTime;
}
+}
- bool DatabaseIgnorer::ignoreAt( const string &db, const OpTime &currentOplogTime ) {
- if ( _ignores[ db ].isNull() ) {
- return false;
- }
- if ( _ignores[ db ] >= currentOplogTime ) {
- return true;
- } else {
- // The ignore state has expired, so clear it.
- _ignores.erase( db );
- return false;
- }
+bool DatabaseIgnorer::ignoreAt(const string& db, const OpTime& currentOplogTime) {
+ if (_ignores[db].isNull()) {
+ return false;
+ }
+ if (_ignores[db] >= currentOplogTime) {
+ return true;
+ } else {
+ // The ignore state has expired, so clear it.
+ _ignores.erase(db);
+ return false;
+ }
+}
+
+bool ReplSource::handleDuplicateDbName(OperationContext* txn,
+ const BSONObj& op,
+ const char* ns,
+ const char* db) {
+ // We are already locked at this point
+ if (dbHolder().get(txn, ns) != NULL) {
+ // Database is already present.
+ return true;
+ }
+ BSONElement ts = op.getField("ts");
+ if ((ts.type() == Date || ts.type() == Timestamp) &&
+ ___databaseIgnorer.ignoreAt(db, ts.date())) {
+ // Database is ignored due to a previous indication that it is
+ // missing from master after optime "ts".
+ return false;
+ }
+ if (Database::duplicateUncasedName(db).empty()) {
+ // No duplicate database names are present.
+ return true;
}
- bool ReplSource::handleDuplicateDbName( OperationContext* txn,
- const BSONObj &op,
- const char* ns,
- const char* db ) {
- // We are already locked at this point
- if (dbHolder().get(txn, ns) != NULL) {
- // Database is already present.
- return true;
- }
- BSONElement ts = op.getField( "ts" );
- if ( ( ts.type() == Date || ts.type() == Timestamp ) && ___databaseIgnorer.ignoreAt( db, ts.date() ) ) {
- // Database is ignored due to a previous indication that it is
- // missing from master after optime "ts".
- return false;
- }
- if (Database::duplicateUncasedName(db).empty()) {
- // No duplicate database names are present.
- return true;
+ OpTime lastTime;
+ bool dbOk = false;
+ {
+ // This is always a GlobalWrite lock (so no ns/db used from the context)
+ invariant(txn->lockState()->isW());
+ Lock::TempRelease(txn->lockState());
+
+ // We always log an operation after executing it (never before), so
+ // a database list will always be valid as of an oplog entry generated
+ // before it was retrieved.
+
+ BSONObj last =
+ oplogReader.findOne(this->ns().c_str(), Query().sort(BSON("$natural" << -1)));
+ if (!last.isEmpty()) {
+ BSONElement ts = last.getField("ts");
+ massert(
+ 14032, "Invalid 'ts' in remote log", ts.type() == Date || ts.type() == Timestamp);
+ lastTime = OpTime(ts.date());
}
-
- OpTime lastTime;
- bool dbOk = false;
- {
- // This is always a GlobalWrite lock (so no ns/db used from the context)
- invariant(txn->lockState()->isW());
- Lock::TempRelease(txn->lockState());
-
- // We always log an operation after executing it (never before), so
- // a database list will always be valid as of an oplog entry generated
- // before it was retrieved.
-
- BSONObj last = oplogReader.findOne( this->ns().c_str(), Query().sort( BSON( "$natural" << -1 ) ) );
- if ( !last.isEmpty() ) {
- BSONElement ts = last.getField( "ts" );
- massert( 14032, "Invalid 'ts' in remote log", ts.type() == Date || ts.type() == Timestamp );
- lastTime = OpTime( ts.date() );
- }
- BSONObj info;
- bool ok = oplogReader.conn()->runCommand( "admin", BSON( "listDatabases" << 1 ), info );
- massert( 14033, "Unable to get database list", ok );
- BSONObjIterator i( info.getField( "databases" ).embeddedObject() );
- while( i.more() ) {
- BSONElement e = i.next();
-
- const char * name = e.embeddedObject().getField( "name" ).valuestr();
- if ( strcasecmp( name, db ) != 0 )
- continue;
-
- if ( strcmp( name, db ) == 0 ) {
- // The db exists on master, still need to check that no conflicts exist there.
- dbOk = true;
- continue;
- }
-
- // The master has a db name that conflicts with the requested name.
- dbOk = false;
- break;
+ BSONObj info;
+ bool ok = oplogReader.conn()->runCommand("admin", BSON("listDatabases" << 1), info);
+ massert(14033, "Unable to get database list", ok);
+ BSONObjIterator i(info.getField("databases").embeddedObject());
+ while (i.more()) {
+ BSONElement e = i.next();
+
+ const char* name = e.embeddedObject().getField("name").valuestr();
+ if (strcasecmp(name, db) != 0)
+ continue;
+
+ if (strcmp(name, db) == 0) {
+ // The db exists on master, still need to check that no conflicts exist there.
+ dbOk = true;
+ continue;
}
+
+ // The master has a db name that conflicts with the requested name.
+ dbOk = false;
+ break;
}
-
- if ( !dbOk ) {
- ___databaseIgnorer.doIgnoreUntilAfter( db, lastTime );
- incompleteCloneDbs.erase(db);
- addDbNextPass.erase(db);
- return false;
- }
-
- // Check for duplicates again, since we released the lock above.
- set< string > duplicates;
- Database::duplicateUncasedName(db, &duplicates);
-
- // The database is present on the master and no conflicting databases
- // are present on the master. Drop any local conflicts.
- for( set< string >::const_iterator i = duplicates.begin(); i != duplicates.end(); ++i ) {
- ___databaseIgnorer.doIgnoreUntilAfter( *i, lastTime );
- incompleteCloneDbs.erase(*i);
- addDbNextPass.erase(*i);
-
- Client::Context ctx(txn, *i);
- dropDatabase(txn, ctx.db());
- }
-
- massert(14034, "Duplicate database names present after attempting to delete duplicates",
- Database::duplicateUncasedName(db).empty());
- return true;
}
- void ReplSource::applyOperation(OperationContext* txn, Database* db, const BSONObj& op) {
- try {
- bool failedUpdate = applyOperation_inlock( txn, db, op );
- if (failedUpdate) {
- Sync sync(hostName);
- if (sync.shouldRetry(txn, op)) {
- uassert(15914,
- "Failure retrying initial sync update",
- !applyOperation_inlock(txn, db, op));
- }
+ if (!dbOk) {
+ ___databaseIgnorer.doIgnoreUntilAfter(db, lastTime);
+ incompleteCloneDbs.erase(db);
+ addDbNextPass.erase(db);
+ return false;
+ }
+
+ // Check for duplicates again, since we released the lock above.
+ set<string> duplicates;
+ Database::duplicateUncasedName(db, &duplicates);
+
+ // The database is present on the master and no conflicting databases
+ // are present on the master. Drop any local conflicts.
+ for (set<string>::const_iterator i = duplicates.begin(); i != duplicates.end(); ++i) {
+ ___databaseIgnorer.doIgnoreUntilAfter(*i, lastTime);
+ incompleteCloneDbs.erase(*i);
+ addDbNextPass.erase(*i);
+
+ Client::Context ctx(txn, *i);
+ dropDatabase(txn, ctx.db());
+ }
+
+ massert(14034,
+ "Duplicate database names present after attempting to delete duplicates",
+ Database::duplicateUncasedName(db).empty());
+ return true;
+}
+
+void ReplSource::applyOperation(OperationContext* txn, Database* db, const BSONObj& op) {
+ try {
+ bool failedUpdate = applyOperation_inlock(txn, db, op);
+ if (failedUpdate) {
+ Sync sync(hostName);
+ if (sync.shouldRetry(txn, op)) {
+ uassert(15914,
+ "Failure retrying initial sync update",
+ !applyOperation_inlock(txn, db, op));
}
}
- catch ( UserException& e ) {
- log() << "sync: caught user assertion " << e << " while applying op: " << op << endl;;
- }
- catch ( DBException& e ) {
- log() << "sync: caught db exception " << e << " while applying op: " << op << endl;;
- }
-
+ } catch (UserException& e) {
+ log() << "sync: caught user assertion " << e << " while applying op: " << op << endl;
+ ;
+ } catch (DBException& e) {
+ log() << "sync: caught db exception " << e << " while applying op: " << op << endl;
+ ;
}
+}
- /* local.$oplog.main is of the form:
- { ts: ..., op: <optype>, ns: ..., o: <obj> , o2: <extraobj>, b: <boolflag> }
- ...
- see logOp() comments.
+/* local.$oplog.main is of the form:
+ { ts: ..., op: <optype>, ns: ..., o: <obj> , o2: <extraobj>, b: <boolflag> }
+ ...
+ see logOp() comments.
- @param alreadyLocked caller already put us in write lock if true
- */
- void ReplSource::_sync_pullOpLog_applyOperation(OperationContext* txn, BSONObj& op, bool alreadyLocked) {
- LOG(6) << "processing op: " << op << endl;
+ @param alreadyLocked caller already put us in write lock if true
+*/
+void ReplSource::_sync_pullOpLog_applyOperation(OperationContext* txn,
+ BSONObj& op,
+ bool alreadyLocked) {
+ LOG(6) << "processing op: " << op << endl;
- if( op.getStringField("op")[0] == 'n' )
- return;
+ if (op.getStringField("op")[0] == 'n')
+ return;
- char clientName[MaxDatabaseNameLen];
- const char *ns = op.getStringField("ns");
- nsToDatabase(ns, clientName);
+ char clientName[MaxDatabaseNameLen];
+ const char* ns = op.getStringField("ns");
+ nsToDatabase(ns, clientName);
- if ( *ns == '.' ) {
- log() << "skipping bad op in oplog: " << op.toString() << endl;
- return;
- }
- else if ( *ns == 0 ) {
- /*if( op.getStringField("op")[0] != 'n' )*/ {
- log() << "halting replication, bad op in oplog:\n " << op.toString() << endl;
- replAllDead = "bad object in oplog";
- throw SyncException();
- }
- //ns = "local.system.x";
- //nsToDatabase(ns, clientName);
+ if (*ns == '.') {
+ log() << "skipping bad op in oplog: " << op.toString() << endl;
+ return;
+ } else if (*ns == 0) {
+ /*if( op.getStringField("op")[0] != 'n' )*/ {
+ log() << "halting replication, bad op in oplog:\n " << op.toString() << endl;
+ replAllDead = "bad object in oplog";
+ throw SyncException();
}
+ // ns = "local.system.x";
+ // nsToDatabase(ns, clientName);
+ }
- if ( !only.empty() && only != clientName )
- return;
+ if (!only.empty() && only != clientName)
+ return;
- const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
- if (replSettings.pretouch &&
- !alreadyLocked/*doesn't make sense if in write lock already*/) {
- if (replSettings.pretouch > 1) {
- /* note: this is bad - should be put in ReplSource. but this is first test... */
- static int countdown;
- verify( countdown >= 0 );
- if( countdown > 0 ) {
- countdown--; // was pretouched on a prev pass
+ const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
+ if (replSettings.pretouch && !alreadyLocked /*doesn't make sense if in write lock already*/) {
+ if (replSettings.pretouch > 1) {
+ /* note: this is bad - should be put in ReplSource. but this is first test... */
+ static int countdown;
+ verify(countdown >= 0);
+ if (countdown > 0) {
+ countdown--; // was pretouched on a prev pass
+ } else {
+ const int m = 4;
+ if (tp.get() == 0) {
+ int nthr = min(8, replSettings.pretouch);
+ nthr = max(nthr, 1);
+ tp.reset(new ThreadPool(nthr));
}
- else {
- const int m = 4;
- if( tp.get() == 0 ) {
- int nthr = min(8, replSettings.pretouch);
- nthr = max(nthr, 1);
- tp.reset( new ThreadPool(nthr) );
- }
- vector<BSONObj> v;
- oplogReader.peek(v, replSettings.pretouch);
- unsigned a = 0;
- while( 1 ) {
- if( a >= v.size() ) break;
- unsigned b = a + m - 1; // v[a..b]
- if( b >= v.size() ) b = v.size() - 1;
- tp->schedule(pretouchN, v, a, b);
- DEV cout << "pretouch task: " << a << ".." << b << endl;
- a += m;
- }
- // we do one too...
- pretouchOperation(txn, op);
- tp->join();
- countdown = v.size();
+ vector<BSONObj> v;
+ oplogReader.peek(v, replSettings.pretouch);
+ unsigned a = 0;
+ while (1) {
+ if (a >= v.size())
+ break;
+ unsigned b = a + m - 1; // v[a..b]
+ if (b >= v.size())
+ b = v.size() - 1;
+ tp->schedule(pretouchN, v, a, b);
+ DEV cout << "pretouch task: " << a << ".." << b << endl;
+ a += m;
}
- }
- else {
+ // we do one too...
pretouchOperation(txn, op);
+ tp->join();
+ countdown = v.size();
}
+ } else {
+ pretouchOperation(txn, op);
}
+ }
- scoped_ptr<Lock::GlobalWrite> lk(alreadyLocked ? 0 : new Lock::GlobalWrite(txn->lockState()));
+ scoped_ptr<Lock::GlobalWrite> lk(alreadyLocked ? 0 : new Lock::GlobalWrite(txn->lockState()));
- if ( replAllDead ) {
- // hmmm why is this check here and not at top of this function? does it get set between top and here?
- log() << "replAllDead, throwing SyncException: " << replAllDead << endl;
- throw SyncException();
- }
+ if (replAllDead) {
+ // hmmm why is this check here and not at top of this function? does it get set between top and here?
+ log() << "replAllDead, throwing SyncException: " << replAllDead << endl;
+ throw SyncException();
+ }
- if (!handleDuplicateDbName(txn, op, ns, clientName)) {
- return;
- }
+ if (!handleDuplicateDbName(txn, op, ns, clientName)) {
+ return;
+ }
- // This code executes on the slaves only, so it doesn't need to be sharding-aware since
- // mongos will not send requests there. That's why the last argument is false (do not do
- // version checking).
- Client::Context ctx(txn, ns, false);
- ctx.getClient()->curop()->reset();
+ // This code executes on the slaves only, so it doesn't need to be sharding-aware since
+ // mongos will not send requests there. That's why the last argument is false (do not do
+ // version checking).
+ Client::Context ctx(txn, ns, false);
+ ctx.getClient()->curop()->reset();
- bool empty = !ctx.db()->getDatabaseCatalogEntry()->hasUserData();
- bool incompleteClone = incompleteCloneDbs.count( clientName ) != 0;
+ bool empty = !ctx.db()->getDatabaseCatalogEntry()->hasUserData();
+ bool incompleteClone = incompleteCloneDbs.count(clientName) != 0;
- LOG(6) << "ns: " << ns << ", justCreated: " << ctx.justCreated() << ", empty: " << empty << ", incompleteClone: " << incompleteClone << endl;
+ LOG(6) << "ns: " << ns << ", justCreated: " << ctx.justCreated() << ", empty: " << empty
+ << ", incompleteClone: " << incompleteClone << endl;
- // always apply admin command command
- // this is a bit hacky -- the semantics of replication/commands aren't well specified
- if ( strcmp( clientName, "admin" ) == 0 && *op.getStringField( "op" ) == 'c' ) {
- applyOperation(txn, ctx.db(), op);
- return;
- }
+ // always apply admin command command
+ // this is a bit hacky -- the semantics of replication/commands aren't well specified
+ if (strcmp(clientName, "admin") == 0 && *op.getStringField("op") == 'c') {
+ applyOperation(txn, ctx.db(), op);
+ return;
+ }
- if ( ctx.justCreated() || empty || incompleteClone ) {
- // we must add to incomplete list now that setClient has been called
- incompleteCloneDbs.insert( clientName );
- if ( nClonedThisPass ) {
- /* we only clone one database per pass, even if a lot need done. This helps us
- avoid overflowing the master's transaction log by doing too much work before going
- back to read more transactions. (Imagine a scenario of slave startup where we try to
- clone 100 databases in one pass.)
- */
- addDbNextPass.insert( clientName );
- }
- else {
- if ( incompleteClone ) {
- log() << "An earlier initial clone of '" << clientName << "' did not complete, now resyncing." << endl;
- }
- save(txn);
- Client::Context ctx(txn, ns);
- nClonedThisPass++;
- resync(txn, ctx.db()->name());
- addDbNextPass.erase(clientName);
- incompleteCloneDbs.erase( clientName );
+ if (ctx.justCreated() || empty || incompleteClone) {
+ // we must add to incomplete list now that setClient has been called
+ incompleteCloneDbs.insert(clientName);
+ if (nClonedThisPass) {
+ /* we only clone one database per pass, even if a lot need done. This helps us
+ avoid overflowing the master's transaction log by doing too much work before going
+ back to read more transactions. (Imagine a scenario of slave startup where we try to
+ clone 100 databases in one pass.)
+ */
+ addDbNextPass.insert(clientName);
+ } else {
+ if (incompleteClone) {
+ log() << "An earlier initial clone of '" << clientName
+ << "' did not complete, now resyncing." << endl;
}
save(txn);
+ Client::Context ctx(txn, ns);
+ nClonedThisPass++;
+ resync(txn, ctx.db()->name());
+ addDbNextPass.erase(clientName);
+ incompleteCloneDbs.erase(clientName);
}
- else {
- applyOperation(txn, ctx.db(), op);
- addDbNextPass.erase( clientName );
- }
+ save(txn);
+ } else {
+ applyOperation(txn, ctx.db(), op);
+ addDbNextPass.erase(clientName);
}
+}
- void ReplSource::syncToTailOfRemoteLog() {
- string _ns = ns();
- BSONObjBuilder b;
- if ( !only.empty() ) {
- b.appendRegex("ns", string("^") + pcrecpp::RE::QuoteMeta( only ));
- }
- BSONObj last = oplogReader.findOne( _ns.c_str(), Query( b.done() ).sort( BSON( "$natural" << -1 ) ) );
- if ( !last.isEmpty() ) {
- BSONElement ts = last.getField( "ts" );
- massert( 10386 , "non Date ts found: " + last.toString(), ts.type() == Date || ts.type() == Timestamp );
- syncedTo = OpTime( ts.date() );
- }
+void ReplSource::syncToTailOfRemoteLog() {
+ string _ns = ns();
+ BSONObjBuilder b;
+ if (!only.empty()) {
+ b.appendRegex("ns", string("^") + pcrecpp::RE::QuoteMeta(only));
}
+ BSONObj last = oplogReader.findOne(_ns.c_str(), Query(b.done()).sort(BSON("$natural" << -1)));
+ if (!last.isEmpty()) {
+ BSONElement ts = last.getField("ts");
+ massert(10386,
+ "non Date ts found: " + last.toString(),
+ ts.type() == Date || ts.type() == Timestamp);
+ syncedTo = OpTime(ts.date());
+ }
+}
- class ReplApplyBatchSize : public ServerParameter {
- public:
- ReplApplyBatchSize()
- : ServerParameter( ServerParameterSet::getGlobal(), "replApplyBatchSize" ),
- _value( 1 ) {
- }
+class ReplApplyBatchSize : public ServerParameter {
+public:
+ ReplApplyBatchSize()
+ : ServerParameter(ServerParameterSet::getGlobal(), "replApplyBatchSize"), _value(1) {}
+
+ int get() const {
+ return _value;
+ }
+
+ virtual void append(OperationContext* txn, BSONObjBuilder& b, const string& name) {
+ b.append(name, _value);
+ }
- int get() const { return _value; }
+ virtual Status set(const BSONElement& newValuElement) {
+ return set(newValuElement.numberInt());
+ }
- virtual void append(OperationContext* txn, BSONObjBuilder& b, const string& name) {
- b.append( name, _value );
+ virtual Status set(int b) {
+ if (b < 1 || b > 1024) {
+ return Status(ErrorCodes::BadValue, "replApplyBatchSize has to be >= 1 and < 1024");
}
- virtual Status set( const BSONElement& newValuElement ) {
- return set( newValuElement.numberInt() );
+ const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
+ if (replSettings.slavedelay != 0 && b > 1) {
+ return Status(ErrorCodes::BadValue, "can't use a batch size > 1 with slavedelay");
+ }
+ if (!replSettings.slave) {
+ return Status(ErrorCodes::BadValue,
+ "can't set replApplyBatchSize on a non-slave machine");
}
- virtual Status set( int b ) {
- if( b < 1 || b > 1024 ) {
- return Status( ErrorCodes::BadValue,
- "replApplyBatchSize has to be >= 1 and < 1024" );
- }
+ _value = b;
+ return Status::OK();
+ }
- const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
- if ( replSettings.slavedelay != 0 && b > 1 ) {
- return Status( ErrorCodes::BadValue,
- "can't use a batch size > 1 with slavedelay" );
- }
- if ( ! replSettings.slave ) {
- return Status( ErrorCodes::BadValue,
- "can't set replApplyBatchSize on a non-slave machine" );
- }
+ virtual Status setFromString(const string& str) {
+ return set(atoi(str.c_str()));
+ }
- _value = b;
- return Status::OK();
- }
+ int _value;
- virtual Status setFromString( const string& str ) {
- return set( atoi( str.c_str() ) );
- }
+} replApplyBatchSize;
- int _value;
-
- } replApplyBatchSize;
-
- /* slave: pull some data from the master's oplog
- note: not yet in db mutex at this point.
- @return -1 error
- 0 ok, don't sleep
- 1 ok, sleep
- */
- int ReplSource::_sync_pullOpLog(OperationContext* txn, int& nApplied) {
- int okResultCode = 1;
- string ns = string("local.oplog.$") + sourceName();
- LOG(2) << "repl: sync_pullOpLog " << ns << " syncedTo:" << syncedTo.toStringLong() << '\n';
-
- bool tailing = true;
- oplogReader.tailCheck();
-
- bool initial = syncedTo.isNull();
-
- if ( !oplogReader.haveCursor() || initial ) {
- if ( initial ) {
- // Important to grab last oplog timestamp before listing databases.
- syncToTailOfRemoteLog();
- BSONObj info;
- bool ok = oplogReader.conn()->runCommand( "admin", BSON( "listDatabases" << 1 ), info );
- massert( 10389 , "Unable to get database list", ok );
- BSONObjIterator i( info.getField( "databases" ).embeddedObject() );
- while( i.moreWithEOO() ) {
- BSONElement e = i.next();
- if ( e.eoo() )
- break;
- string name = e.embeddedObject().getField( "name" ).valuestr();
- if ( !e.embeddedObject().getBoolField( "empty" ) ) {
- if ( name != "local" ) {
- if ( only.empty() || only == name ) {
- LOG( 2 ) << "adding to 'addDbNextPass': " << name << endl;
- addDbNextPass.insert( name );
- }
+/* slave: pull some data from the master's oplog
+ note: not yet in db mutex at this point.
+ @return -1 error
+ 0 ok, don't sleep
+ 1 ok, sleep
+*/
+int ReplSource::_sync_pullOpLog(OperationContext* txn, int& nApplied) {
+ int okResultCode = 1;
+ string ns = string("local.oplog.$") + sourceName();
+ LOG(2) << "repl: sync_pullOpLog " << ns << " syncedTo:" << syncedTo.toStringLong() << '\n';
+
+ bool tailing = true;
+ oplogReader.tailCheck();
+
+ bool initial = syncedTo.isNull();
+
+ if (!oplogReader.haveCursor() || initial) {
+ if (initial) {
+ // Important to grab last oplog timestamp before listing databases.
+ syncToTailOfRemoteLog();
+ BSONObj info;
+ bool ok = oplogReader.conn()->runCommand("admin", BSON("listDatabases" << 1), info);
+ massert(10389, "Unable to get database list", ok);
+ BSONObjIterator i(info.getField("databases").embeddedObject());
+ while (i.moreWithEOO()) {
+ BSONElement e = i.next();
+ if (e.eoo())
+ break;
+ string name = e.embeddedObject().getField("name").valuestr();
+ if (!e.embeddedObject().getBoolField("empty")) {
+ if (name != "local") {
+ if (only.empty() || only == name) {
+ LOG(2) << "adding to 'addDbNextPass': " << name << endl;
+ addDbNextPass.insert(name);
}
}
}
- // obviously global isn't ideal, but non-repl set is old so
- // keeping it simple
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
- save(txn);
}
+ // obviously global isn't ideal, but non-repl set is old so
+ // keeping it simple
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ save(txn);
+ }
- BSONObjBuilder gte;
- gte.appendTimestamp("$gte", syncedTo.asDate());
- BSONObjBuilder query;
- query.append("ts", gte.done());
- if ( !only.empty() ) {
- // note we may here skip a LOT of data table scanning, a lot of work for the master.
- // maybe append "\\." here?
- query.appendRegex("ns", string("^") + pcrecpp::RE::QuoteMeta( only ));
- }
- BSONObj queryObj = query.done();
- // e.g. queryObj = { ts: { $gte: syncedTo } }
+ BSONObjBuilder gte;
+ gte.appendTimestamp("$gte", syncedTo.asDate());
+ BSONObjBuilder query;
+ query.append("ts", gte.done());
+ if (!only.empty()) {
+ // note we may here skip a LOT of data table scanning, a lot of work for the master.
+ // maybe append "\\." here?
+ query.appendRegex("ns", string("^") + pcrecpp::RE::QuoteMeta(only));
+ }
+ BSONObj queryObj = query.done();
+ // e.g. queryObj = { ts: { $gte: syncedTo } }
+
+ oplogReader.tailingQuery(ns.c_str(), queryObj);
+ tailing = false;
+ } else {
+ LOG(2) << "repl: tailing=true\n";
+ }
+
+ if (!oplogReader.haveCursor()) {
+ log() << "repl: dbclient::query returns null (conn closed?)" << endl;
+ oplogReader.resetConnection();
+ return -1;
+ }
- oplogReader.tailingQuery(ns.c_str(), queryObj);
- tailing = false;
+ // show any deferred database creates from a previous pass
+ {
+ set<string>::iterator i = addDbNextPass.begin();
+ if (i != addDbNextPass.end()) {
+ BSONObjBuilder b;
+ b.append("ns", *i + '.');
+ b.append("op", "db");
+ BSONObj op = b.done();
+ _sync_pullOpLog_applyOperation(txn, op, false);
}
- else {
- LOG(2) << "repl: tailing=true\n";
+ }
+
+ if (!oplogReader.more()) {
+ if (tailing) {
+ LOG(2) << "repl: tailing & no new activity\n";
+ okResultCode = 0; // don't sleep
+
+ } else {
+ log() << "repl: " << ns << " oplog is empty" << endl;
+ }
+ {
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ save(txn);
}
+ return okResultCode;
+ }
- if( !oplogReader.haveCursor() ) {
- log() << "repl: dbclient::query returns null (conn closed?)" << endl;
- oplogReader.resetConnection();
- return -1;
+ OpTime nextOpTime;
+ {
+ BSONObj op = oplogReader.next();
+ BSONElement ts = op.getField("ts");
+ if (ts.type() != Date && ts.type() != Timestamp) {
+ string err = op.getStringField("$err");
+ if (!err.empty()) {
+ // 13051 is "tailable cursor requested on non capped collection"
+ if (op.getIntField("code") == 13051) {
+ log() << "trying to slave off of a non-master" << '\n';
+ massert(13344, "trying to slave off of a non-master", false);
+ } else {
+ log() << "repl: $err reading remote oplog: " + err << '\n';
+ massert(10390, "got $err reading remote oplog", false);
+ }
+ } else {
+ log() << "repl: bad object read from remote oplog: " << op.toString() << '\n';
+ massert(10391, "repl: bad object read from remote oplog", false);
+ }
}
- // show any deferred database creates from a previous pass
- {
- set<string>::iterator i = addDbNextPass.begin();
- if ( i != addDbNextPass.end() ) {
- BSONObjBuilder b;
- b.append("ns", *i + '.');
- b.append("op", "db");
- BSONObj op = b.done();
- _sync_pullOpLog_applyOperation(txn, op, false);
+ nextOpTime = OpTime(ts.date());
+ LOG(2) << "repl: first op time received: " << nextOpTime.toString() << '\n';
+ if (initial) {
+ LOG(1) << "repl: initial run\n";
+ }
+ if (tailing) {
+ if (!(syncedTo < nextOpTime)) {
+ log() << "repl ASSERTION failed : syncedTo < nextOpTime" << endl;
+ log() << "repl syncTo: " << syncedTo.toStringLong() << endl;
+ log() << "repl nextOpTime: " << nextOpTime.toStringLong() << endl;
+ verify(false);
}
+ oplogReader.putBack(op); // op will be processed in the loop below
+ nextOpTime = OpTime(); // will reread the op below
+ } else if (nextOpTime != syncedTo) { // didn't get what we queried for - error
+ log() << "repl: nextOpTime " << nextOpTime.toStringLong() << ' '
+ << ((nextOpTime < syncedTo) ? "<??" : ">") << " syncedTo "
+ << syncedTo.toStringLong() << '\n'
+ << "repl: time diff: " << (nextOpTime.getSecs() - syncedTo.getSecs()) << "sec\n"
+ << "repl: tailing: " << tailing << '\n'
+ << "repl: data too stale, halting replication" << endl;
+ replInfo = replAllDead = "data too stale halted replication";
+ verify(syncedTo < nextOpTime);
+ throw SyncException();
+ } else {
+ /* t == syncedTo, so the first op was applied previously or it is the first op of initial query and need not be applied. */
}
+ }
- if ( !oplogReader.more() ) {
- if ( tailing ) {
- LOG(2) << "repl: tailing & no new activity\n";
- okResultCode = 0; // don't sleep
+ // apply operations
+ {
+ int n = 0;
+ time_t saveLast = time(0);
+ while (1) {
+ // we need "&& n" to assure we actually process at least one op to get a sync
+ // point recorded in the first place.
+ const bool moreInitialSyncsPending = !addDbNextPass.empty() && n;
- }
- else {
- log() << "repl: " << ns << " oplog is empty" << endl;
- }
- {
+ if (moreInitialSyncsPending || !oplogReader.more()) {
ScopedTransaction transaction(txn, MODE_X);
Lock::GlobalWrite lk(txn->lockState());
- save(txn);
- }
- return okResultCode;
- }
- OpTime nextOpTime;
- {
- BSONObj op = oplogReader.next();
- BSONElement ts = op.getField("ts");
- if ( ts.type() != Date && ts.type() != Timestamp ) {
- string err = op.getStringField("$err");
- if ( !err.empty() ) {
- // 13051 is "tailable cursor requested on non capped collection"
- if (op.getIntField("code") == 13051) {
- log() << "trying to slave off of a non-master" << '\n';
- massert( 13344 , "trying to slave off of a non-master", false );
- }
- else {
- log() << "repl: $err reading remote oplog: " + err << '\n';
- massert( 10390 , "got $err reading remote oplog", false );
- }
+ if (tailing) {
+ okResultCode = 0; // don't sleep
}
- else {
- log() << "repl: bad object read from remote oplog: " << op.toString() << '\n';
- massert( 10391 , "repl: bad object read from remote oplog", false);
- }
- }
- nextOpTime = OpTime( ts.date() );
- LOG(2) << "repl: first op time received: " << nextOpTime.toString() << '\n';
- if ( initial ) {
- LOG(1) << "repl: initial run\n";
- }
- if( tailing ) {
- if( !( syncedTo < nextOpTime ) ) {
- log() << "repl ASSERTION failed : syncedTo < nextOpTime" << endl;
- log() << "repl syncTo: " << syncedTo.toStringLong() << endl;
- log() << "repl nextOpTime: " << nextOpTime.toStringLong() << endl;
- verify(false);
- }
- oplogReader.putBack( op ); // op will be processed in the loop below
- nextOpTime = OpTime(); // will reread the op below
- }
- else if ( nextOpTime != syncedTo ) { // didn't get what we queried for - error
- log()
- << "repl: nextOpTime " << nextOpTime.toStringLong() << ' '
- << ((nextOpTime < syncedTo) ? "<??" : ">")
- << " syncedTo " << syncedTo.toStringLong() << '\n'
- << "repl: time diff: " << (nextOpTime.getSecs() - syncedTo.getSecs())
- << "sec\n"
- << "repl: tailing: " << tailing << '\n'
- << "repl: data too stale, halting replication" << endl;
- replInfo = replAllDead = "data too stale halted replication";
- verify( syncedTo < nextOpTime );
- throw SyncException();
+ syncedTo = nextOpTime;
+ save(txn); // note how far we are synced up to now
+ nApplied = n;
+ break;
}
- else {
- /* t == syncedTo, so the first op was applied previously or it is the first op of initial query and need not be applied. */
+
+ OCCASIONALLY if (n > 0 && (n > 100000 || time(0) - saveLast > 60)) {
+ // periodically note our progress, in case we are doing a lot of work and crash
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ syncedTo = nextOpTime;
+ // can't update local log ts since there are pending operations from our peer
+ save(txn);
+ log() << "repl: checkpoint applied " << n << " operations" << endl;
+ log() << "repl: syncedTo: " << syncedTo.toStringLong() << endl;
+ saveLast = time(0);
+ n = 0;
}
- }
- // apply operations
- {
- int n = 0;
- time_t saveLast = time(0);
- while ( 1 ) {
- // we need "&& n" to assure we actually process at least one op to get a sync
- // point recorded in the first place.
- const bool moreInitialSyncsPending = !addDbNextPass.empty() && n;
-
- if ( moreInitialSyncsPending || !oplogReader.more() ) {
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
-
- if (tailing) {
- okResultCode = 0; // don't sleep
- }
+ BSONObj op = oplogReader.next();
- syncedTo = nextOpTime;
- save(txn); // note how far we are synced up to now
- nApplied = n;
- break;
+ int b = replApplyBatchSize.get();
+ bool justOne = b == 1;
+ scoped_ptr<Lock::GlobalWrite> lk(justOne ? 0 : new Lock::GlobalWrite(txn->lockState()));
+ while (1) {
+ BSONElement ts = op.getField("ts");
+ if (!(ts.type() == Date || ts.type() == Timestamp)) {
+ log() << "sync error: problem querying remote oplog record" << endl;
+ log() << "op: " << op.toString() << endl;
+ log() << "halting replication" << endl;
+ replInfo = replAllDead = "sync error: no ts found querying remote oplog record";
+ throw SyncException();
}
-
- OCCASIONALLY if( n > 0 && ( n > 100000 || time(0) - saveLast > 60 ) ) {
- // periodically note our progress, in case we are doing a lot of work and crash
+ OpTime last = nextOpTime;
+ nextOpTime = OpTime(ts.date());
+ if (!(last < nextOpTime)) {
+ log() << "sync error: last applied optime at slave >= nextOpTime from master"
+ << endl;
+ log() << " last: " << last.toStringLong() << endl;
+ log() << " nextOpTime: " << nextOpTime.toStringLong() << endl;
+ log() << " halting replication" << endl;
+ replInfo = replAllDead = "sync error last >= nextOpTime";
+ uassert(
+ 10123,
+ "replication error last applied optime at slave >= nextOpTime from master",
+ false);
+ }
+ const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
+ if (replSettings.slavedelay &&
+ (unsigned(time(0)) < nextOpTime.getSecs() + replSettings.slavedelay)) {
+ verify(justOne);
+ oplogReader.putBack(op);
+ _sleepAdviceTime = nextOpTime.getSecs() + replSettings.slavedelay + 1;
ScopedTransaction transaction(txn, MODE_X);
Lock::GlobalWrite lk(txn->lockState());
- syncedTo = nextOpTime;
- // can't update local log ts since there are pending operations from our peer
- save(txn);
- log() << "repl: checkpoint applied " << n << " operations" << endl;
+ if (n > 0) {
+ syncedTo = last;
+ save(txn);
+ }
+ log() << "repl: applied " << n << " operations" << endl;
log() << "repl: syncedTo: " << syncedTo.toStringLong() << endl;
- saveLast = time(0);
- n = 0;
+ log() << "waiting until: " << _sleepAdviceTime << " to continue" << endl;
+ return okResultCode;
}
- BSONObj op = oplogReader.next();
-
- int b = replApplyBatchSize.get();
- bool justOne = b == 1;
- scoped_ptr<Lock::GlobalWrite> lk(justOne ? 0 : new Lock::GlobalWrite(txn->lockState()));
- while( 1 ) {
-
- BSONElement ts = op.getField("ts");
- if( !( ts.type() == Date || ts.type() == Timestamp ) ) {
- log() << "sync error: problem querying remote oplog record" << endl;
- log() << "op: " << op.toString() << endl;
- log() << "halting replication" << endl;
- replInfo = replAllDead = "sync error: no ts found querying remote oplog record";
- throw SyncException();
- }
- OpTime last = nextOpTime;
- nextOpTime = OpTime( ts.date() );
- if ( !( last < nextOpTime ) ) {
- log() << "sync error: last applied optime at slave >= nextOpTime from master" << endl;
- log() << " last: " << last.toStringLong() << endl;
- log() << " nextOpTime: " << nextOpTime.toStringLong() << endl;
- log() << " halting replication" << endl;
- replInfo = replAllDead = "sync error last >= nextOpTime";
- uassert( 10123 , "replication error last applied optime at slave >= nextOpTime from master", false);
- }
- const ReplSettings& replSettings =
- getGlobalReplicationCoordinator()->getSettings();
- if ( replSettings.slavedelay && ( unsigned( time( 0 ) ) < nextOpTime.getSecs() + replSettings.slavedelay ) ) {
- verify( justOne );
- oplogReader.putBack( op );
- _sleepAdviceTime = nextOpTime.getSecs() + replSettings.slavedelay + 1;
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
- if ( n > 0 ) {
- syncedTo = last;
- save(txn);
- }
- log() << "repl: applied " << n << " operations" << endl;
- log() << "repl: syncedTo: " << syncedTo.toStringLong() << endl;
- log() << "waiting until: " << _sleepAdviceTime << " to continue" << endl;
- return okResultCode;
- }
-
- _sync_pullOpLog_applyOperation(txn, op, !justOne);
- n++;
+ _sync_pullOpLog_applyOperation(txn, op, !justOne);
+ n++;
- if( --b == 0 )
- break;
- // if to here, we are doing mulpile applications in a singel write lock acquisition
- if( !oplogReader.moreInCurrentBatch() ) {
- // break if no more in batch so we release lock while reading from the master
- break;
- }
- op = oplogReader.next();
+ if (--b == 0)
+ break;
+ // if to here, we are doing mulpile applications in a singel write lock acquisition
+ if (!oplogReader.moreInCurrentBatch()) {
+ // break if no more in batch so we release lock while reading from the master
+ break;
}
+ op = oplogReader.next();
}
}
-
- return okResultCode;
}
+ return okResultCode;
+}
- /* note: not yet in mutex at this point.
- returns >= 0 if ok. return -1 if you want to reconnect.
- return value of zero indicates no sleep necessary before next call
- */
- int ReplSource::sync(OperationContext* txn, int& nApplied) {
- _sleepAdviceTime = 0;
- ReplInfo r("sync");
- if (!serverGlobalParams.quiet) {
- LogstreamBuilder l = log();
- l << "repl: syncing from ";
- if( sourceName() != "main" ) {
- l << "source:" << sourceName() << ' ';
- }
- l << "host:" << hostName << endl;
- }
- nClonedThisPass = 0;
-
- // FIXME Handle cases where this db isn't on default port, or default port is spec'd in hostName.
- if ((string("localhost") == hostName || string("127.0.0.1") == hostName) &&
- serverGlobalParams.port == ServerGlobalParams::DefaultDBPort) {
- log() << "repl: can't sync from self (localhost). sources configuration may be wrong." << endl;
- sleepsecs(5);
- return -1;
- }
- if ( !_connect(&oplogReader,
- HostAndPort(hostName),
- getGlobalReplicationCoordinator()->getMyRID()) ) {
- LOG(4) << "repl: can't connect to sync source" << endl;
- return -1;
- }
+/* note: not yet in mutex at this point.
+ returns >= 0 if ok. return -1 if you want to reconnect.
+ return value of zero indicates no sleep necessary before next call
+*/
+int ReplSource::sync(OperationContext* txn, int& nApplied) {
+ _sleepAdviceTime = 0;
+ ReplInfo r("sync");
+ if (!serverGlobalParams.quiet) {
+ LogstreamBuilder l = log();
+ l << "repl: syncing from ";
+ if (sourceName() != "main") {
+ l << "source:" << sourceName() << ' ';
+ }
+ l << "host:" << hostName << endl;
+ }
+ nClonedThisPass = 0;
+
+ // FIXME Handle cases where this db isn't on default port, or default port is spec'd in hostName.
+ if ((string("localhost") == hostName || string("127.0.0.1") == hostName) &&
+ serverGlobalParams.port == ServerGlobalParams::DefaultDBPort) {
+ log() << "repl: can't sync from self (localhost). sources configuration may be wrong."
+ << endl;
+ sleepsecs(5);
+ return -1;
+ }
- return _sync_pullOpLog(txn, nApplied);
+ if (!_connect(
+ &oplogReader, HostAndPort(hostName), getGlobalReplicationCoordinator()->getMyRID())) {
+ LOG(4) << "repl: can't connect to sync source" << endl;
+ return -1;
}
- /* --------------------------------------------------------------*/
+ return _sync_pullOpLog(txn, nApplied);
+}
- static bool _replMainStarted = false;
+/* --------------------------------------------------------------*/
- /*
- TODO:
- _ source has autoptr to the cursor
- _ reuse that cursor when we can
- */
+static bool _replMainStarted = false;
- /* returns: # of seconds to sleep before next pass
- 0 = no sleep recommended
- 1 = special sentinel indicating adaptive sleep recommended
- */
- int _replMain(OperationContext* txn, ReplSource::SourceVector& sources, int& nApplied) {
- {
- ReplInfo r("replMain load sources");
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
- ReplSource::loadAll(txn, sources);
+/*
+TODO:
+_ source has autoptr to the cursor
+_ reuse that cursor when we can
+*/
- // only need this param for initial reset
- _replMainStarted = true;
- }
+/* returns: # of seconds to sleep before next pass
+ 0 = no sleep recommended
+ 1 = special sentinel indicating adaptive sleep recommended
+*/
+int _replMain(OperationContext* txn, ReplSource::SourceVector& sources, int& nApplied) {
+ {
+ ReplInfo r("replMain load sources");
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ ReplSource::loadAll(txn, sources);
- if ( sources.empty() ) {
- /* replication is not configured yet (for --slave) in local.sources. Poll for config it
- every 20 seconds.
- */
- log() << "no source given, add a master to local.sources to start replication" << endl;
- return 20;
- }
+ // only need this param for initial reset
+ _replMainStarted = true;
+ }
- int sleepAdvice = 1;
- for ( ReplSource::SourceVector::iterator i = sources.begin(); i != sources.end(); i++ ) {
- ReplSource *s = i->get();
- int res = -1;
- try {
- res = s->sync(txn, nApplied);
- bool moreToSync = s->haveMoreDbsToSync();
- if( res < 0 ) {
- sleepAdvice = 3;
- }
- else if( moreToSync ) {
- sleepAdvice = 0;
- }
- else if ( s->sleepAdvice() ) {
- sleepAdvice = s->sleepAdvice();
- }
- else
- sleepAdvice = res;
- }
- catch ( const SyncException& ) {
- log() << "caught SyncException" << endl;
- return 10;
- }
- catch ( AssertionException& e ) {
- if ( e.severe() ) {
- log() << "replMain AssertionException " << e.what() << endl;
- return 60;
- }
- else {
- log() << "repl: AssertionException " << e.what() << endl;
- }
- replInfo = "replMain caught AssertionException";
- }
- catch ( const DBException& e ) {
- log() << "repl: DBException " << e.what() << endl;
- replInfo = "replMain caught DBException";
- }
- catch ( const std::exception &e ) {
- log() << "repl: std::exception " << e.what() << endl;
- replInfo = "replMain caught std::exception";
- }
- catch ( ... ) {
- log() << "unexpected exception during replication. replication will halt" << endl;
- replAllDead = "caught unexpected exception during replication";
- }
- if ( res < 0 )
- s->oplogReader.resetConnection();
- }
- return sleepAdvice;
+ if (sources.empty()) {
+ /* replication is not configured yet (for --slave) in local.sources. Poll for config it
+ every 20 seconds.
+ */
+ log() << "no source given, add a master to local.sources to start replication" << endl;
+ return 20;
}
- static void replMain(OperationContext* txn) {
- ReplSource::SourceVector sources;
- while ( 1 ) {
- int s = 0;
- {
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
- if ( replAllDead ) {
- // throttledForceResyncDead can throw
- if ( !getGlobalReplicationCoordinator()->getSettings().autoresync ||
- !ReplSource::throttledForceResyncDead( txn, "auto" ) ) {
- log() << "all sources dead: " << replAllDead << ", sleeping for 5 seconds" << endl;
- break;
- }
- }
- verify( syncing == 0 ); // i.e., there is only one sync thread running. we will want to change/fix this.
- syncing++;
+ int sleepAdvice = 1;
+ for (ReplSource::SourceVector::iterator i = sources.begin(); i != sources.end(); i++) {
+ ReplSource* s = i->get();
+ int res = -1;
+ try {
+ res = s->sync(txn, nApplied);
+ bool moreToSync = s->haveMoreDbsToSync();
+ if (res < 0) {
+ sleepAdvice = 3;
+ } else if (moreToSync) {
+ sleepAdvice = 0;
+ } else if (s->sleepAdvice()) {
+ sleepAdvice = s->sleepAdvice();
+ } else
+ sleepAdvice = res;
+ } catch (const SyncException&) {
+ log() << "caught SyncException" << endl;
+ return 10;
+ } catch (AssertionException& e) {
+ if (e.severe()) {
+ log() << "replMain AssertionException " << e.what() << endl;
+ return 60;
+ } else {
+ log() << "repl: AssertionException " << e.what() << endl;
}
+ replInfo = "replMain caught AssertionException";
+ } catch (const DBException& e) {
+ log() << "repl: DBException " << e.what() << endl;
+ replInfo = "replMain caught DBException";
+ } catch (const std::exception& e) {
+ log() << "repl: std::exception " << e.what() << endl;
+ replInfo = "replMain caught std::exception";
+ } catch (...) {
+ log() << "unexpected exception during replication. replication will halt" << endl;
+ replAllDead = "caught unexpected exception during replication";
+ }
+ if (res < 0)
+ s->oplogReader.resetConnection();
+ }
+ return sleepAdvice;
+}
- try {
- int nApplied = 0;
- s = _replMain(txn, sources, nApplied);
- if( s == 1 ) {
- if( nApplied == 0 ) s = 2;
- else if( nApplied > 100 ) {
- // sleep very little - just enough that we aren't truly hammering master
- sleepmillis(75);
- s = 0;
- }
+static void replMain(OperationContext* txn) {
+ ReplSource::SourceVector sources;
+ while (1) {
+ int s = 0;
+ {
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ if (replAllDead) {
+ // throttledForceResyncDead can throw
+ if (!getGlobalReplicationCoordinator()->getSettings().autoresync ||
+ !ReplSource::throttledForceResyncDead(txn, "auto")) {
+ log() << "all sources dead: " << replAllDead << ", sleeping for 5 seconds"
+ << endl;
+ break;
}
}
- catch (...) {
- log() << "caught exception in _replMain" << endl;
- s = 4;
- }
-
- {
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
- verify( syncing == 1 );
- syncing--;
- }
-
- if( relinquishSyncingSome ) {
- relinquishSyncingSome = 0;
- s = 1; // sleep before going back in to syncing=1
- }
+ verify(
+ syncing ==
+ 0); // i.e., there is only one sync thread running. we will want to change/fix this.
+ syncing++;
+ }
- if ( s ) {
- stringstream ss;
- ss << "repl: sleep " << s << " sec before next pass";
- string msg = ss.str();
- if (!serverGlobalParams.quiet)
- log() << msg << endl;
- ReplInfo r(msg.c_str());
- sleepsecs(s);
+ try {
+ int nApplied = 0;
+ s = _replMain(txn, sources, nApplied);
+ if (s == 1) {
+ if (nApplied == 0)
+ s = 2;
+ else if (nApplied > 100) {
+ // sleep very little - just enough that we aren't truly hammering master
+ sleepmillis(75);
+ s = 0;
+ }
}
+ } catch (...) {
+ log() << "caught exception in _replMain" << endl;
+ s = 4;
}
- }
-
- static void replMasterThread() {
- sleepsecs(4);
- Client::initThread("replmaster");
- int toSleep = 10;
- while( 1 ) {
- sleepsecs(toSleep);
- // Write a keep-alive like entry to the log. This will make things like
- // printReplicationStatus() and printSlaveReplicationStatus() stay up-to-date even
- // when things are idle.
- OperationContextImpl txn;
- txn.getClient()->getAuthorizationSession()->grantInternalAuthorization();
+ {
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ verify(syncing == 1);
+ syncing--;
+ }
- Lock::GlobalWrite globalWrite(txn.lockState(), 1);
- if (globalWrite.isLocked()) {
- toSleep = 10;
+ if (relinquishSyncingSome) {
+ relinquishSyncingSome = 0;
+ s = 1; // sleep before going back in to syncing=1
+ }
- try {
- WriteUnitOfWork wuow(&txn);
- logKeepalive(&txn);
- wuow.commit();
- }
- catch (...) {
- log() << "caught exception in replMasterThread()" << endl;
- }
- }
- else {
- LOG(5) << "couldn't logKeepalive" << endl;
- toSleep = 1;
- }
+ if (s) {
+ stringstream ss;
+ ss << "repl: sleep " << s << " sec before next pass";
+ string msg = ss.str();
+ if (!serverGlobalParams.quiet)
+ log() << msg << endl;
+ ReplInfo r(msg.c_str());
+ sleepsecs(s);
}
}
-
- static void replSlaveThread() {
- sleepsecs(1);
- Client::initThread("replslave");
-
+}
+
+static void replMasterThread() {
+ sleepsecs(4);
+ Client::initThread("replmaster");
+ int toSleep = 10;
+ while (1) {
+ sleepsecs(toSleep);
+
+ // Write a keep-alive like entry to the log. This will make things like
+ // printReplicationStatus() and printSlaveReplicationStatus() stay up-to-date even
+ // when things are idle.
OperationContextImpl txn;
txn.getClient()->getAuthorizationSession()->grantInternalAuthorization();
- while ( 1 ) {
+ Lock::GlobalWrite globalWrite(txn.lockState(), 1);
+ if (globalWrite.isLocked()) {
+ toSleep = 10;
+
try {
- replMain(&txn);
- sleepsecs(5);
- }
- catch ( AssertionException& ) {
- ReplInfo r("Assertion in replSlaveThread(): sleeping 5 minutes before retry");
- log() << "Assertion in replSlaveThread(): sleeping 5 minutes before retry" << endl;
- sleepsecs(300);
- }
- catch ( DBException& e ) {
- log() << "exception in replSlaveThread(): " << e.what()
- << ", sleeping 5 minutes before retry" << endl;
- sleepsecs(300);
- }
- catch ( ... ) {
- log() << "error in replSlaveThread(): sleeping 5 minutes before retry" << endl;
- sleepsecs(300);
+ WriteUnitOfWork wuow(&txn);
+ logKeepalive(&txn);
+ wuow.commit();
+ } catch (...) {
+ log() << "caught exception in replMasterThread()" << endl;
}
+ } else {
+ LOG(5) << "couldn't logKeepalive" << endl;
+ toSleep = 1;
}
}
+}
- void startMasterSlave(OperationContext* txn) {
-
- oldRepl();
-
- const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
- if( !replSettings.slave && !replSettings.master )
- return;
+static void replSlaveThread() {
+ sleepsecs(1);
+ Client::initThread("replslave");
- txn->getClient()->getAuthorizationSession()->grantInternalAuthorization();
+ OperationContextImpl txn;
+ txn.getClient()->getAuthorizationSession()->grantInternalAuthorization();
- {
- ReplSource temp(txn); // Ensures local.me is populated
+ while (1) {
+ try {
+ replMain(&txn);
+ sleepsecs(5);
+ } catch (AssertionException&) {
+ ReplInfo r("Assertion in replSlaveThread(): sleeping 5 minutes before retry");
+ log() << "Assertion in replSlaveThread(): sleeping 5 minutes before retry" << endl;
+ sleepsecs(300);
+ } catch (DBException& e) {
+ log() << "exception in replSlaveThread(): " << e.what()
+ << ", sleeping 5 minutes before retry" << endl;
+ sleepsecs(300);
+ } catch (...) {
+ log() << "error in replSlaveThread(): sleeping 5 minutes before retry" << endl;
+ sleepsecs(300);
}
+ }
+}
- if ( replSettings.slave ) {
- verify( replSettings.slave == SimpleSlave );
- LOG(1) << "slave=true" << endl;
- boost::thread repl_thread(replSlaveThread);
- }
+void startMasterSlave(OperationContext* txn) {
+ oldRepl();
- if ( replSettings.master ) {
- LOG(1) << "master=true" << endl;
- createOplog(txn);
- boost::thread t(replMasterThread);
- }
+ const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
+ if (!replSettings.slave && !replSettings.master)
+ return;
- if (replSettings.fastsync) {
- while(!_replMainStarted) // don't allow writes until we've set up from log
- sleepmillis( 50 );
- }
- }
- int _dummy_z;
+ txn->getClient()->getAuthorizationSession()->grantInternalAuthorization();
- void pretouchN(vector<BSONObj>& v, unsigned a, unsigned b) {
- Client *c = currentClient.get();
- if( c == 0 ) {
- Client::initThread("pretouchN");
- c = &cc();
- }
+ {
+ ReplSource temp(txn); // Ensures local.me is populated
+ }
- OperationContextImpl txn; // XXX
- ScopedTransaction transaction(&txn, MODE_S);
- Lock::GlobalRead lk(txn.lockState());
-
- for( unsigned i = a; i <= b; i++ ) {
- const BSONObj& op = v[i];
- const char *which = "o";
- const char *opType = op.getStringField("op");
- if ( *opType == 'i' )
- ;
- else if( *opType == 'u' )
- which = "o2";
- else
- continue;
- /* todo : other operations */
+ if (replSettings.slave) {
+ verify(replSettings.slave == SimpleSlave);
+ LOG(1) << "slave=true" << endl;
+ boost::thread repl_thread(replSlaveThread);
+ }
- try {
- BSONObj o = op.getObjectField(which);
- BSONElement _id;
- if( o.getObjectID(_id) ) {
- const char *ns = op.getStringField("ns");
- BSONObjBuilder b;
- b.append(_id);
- BSONObj result;
- Client::Context ctx(&txn, ns);
- if( Helpers::findById(&txn, ctx.db(), ns, b.done(), result) )
- _dummy_z += result.objsize(); // touch
- }
- }
- catch( DBException& e ) {
- log() << "ignoring assertion in pretouchN() " << a << ' ' << b << ' ' << i << ' ' << e.toString() << endl;
- }
- }
+ if (replSettings.master) {
+ LOG(1) << "master=true" << endl;
+ createOplog(txn);
+ boost::thread t(replMasterThread);
}
- void pretouchOperation(OperationContext* txn, const BSONObj& op) {
+ if (replSettings.fastsync) {
+ while (!_replMainStarted) // don't allow writes until we've set up from log
+ sleepmillis(50);
+ }
+}
+int _dummy_z;
+
+void pretouchN(vector<BSONObj>& v, unsigned a, unsigned b) {
+ Client* c = currentClient.get();
+ if (c == 0) {
+ Client::initThread("pretouchN");
+ c = &cc();
+ }
- if (txn->lockState()->isWriteLocked()) {
- return; // no point pretouching if write locked. not sure if this will ever fire, but just in case.
- }
+ OperationContextImpl txn; // XXX
+ ScopedTransaction transaction(&txn, MODE_S);
+ Lock::GlobalRead lk(txn.lockState());
- const char *which = "o";
- const char *opType = op.getStringField("op");
- if ( *opType == 'i' )
+ for (unsigned i = a; i <= b; i++) {
+ const BSONObj& op = v[i];
+ const char* which = "o";
+ const char* opType = op.getStringField("op");
+ if (*opType == 'i')
;
- else if( *opType == 'u' )
+ else if (*opType == 'u')
which = "o2";
else
- return;
+ continue;
/* todo : other operations */
try {
BSONObj o = op.getObjectField(which);
BSONElement _id;
- if( o.getObjectID(_id) ) {
- const char *ns = op.getStringField("ns");
+ if (o.getObjectID(_id)) {
+ const char* ns = op.getStringField("ns");
BSONObjBuilder b;
b.append(_id);
BSONObj result;
- AutoGetCollectionForRead ctx(txn, ns );
- if (Helpers::findById(txn, ctx.getDb(), ns, b.done(), result)) {
- _dummy_z += result.objsize(); // touch
- }
+ Client::Context ctx(&txn, ns);
+ if (Helpers::findById(&txn, ctx.db(), ns, b.done(), result))
+ _dummy_z += result.objsize(); // touch
}
+ } catch (DBException& e) {
+ log() << "ignoring assertion in pretouchN() " << a << ' ' << b << ' ' << i << ' '
+ << e.toString() << endl;
}
- catch( DBException& ) {
- log() << "ignoring assertion in pretouchOperation()" << endl;
+ }
+}
+
+void pretouchOperation(OperationContext* txn, const BSONObj& op) {
+ if (txn->lockState()->isWriteLocked()) {
+ return; // no point pretouching if write locked. not sure if this will ever fire, but just in case.
+ }
+
+ const char* which = "o";
+ const char* opType = op.getStringField("op");
+ if (*opType == 'i')
+ ;
+ else if (*opType == 'u')
+ which = "o2";
+ else
+ return;
+ /* todo : other operations */
+
+ try {
+ BSONObj o = op.getObjectField(which);
+ BSONElement _id;
+ if (o.getObjectID(_id)) {
+ const char* ns = op.getStringField("ns");
+ BSONObjBuilder b;
+ b.append(_id);
+ BSONObj result;
+ AutoGetCollectionForRead ctx(txn, ns);
+ if (Helpers::findById(txn, ctx.getDb(), ns, b.done(), result)) {
+ _dummy_z += result.objsize(); // touch
+ }
}
+ } catch (DBException&) {
+ log() << "ignoring assertion in pretouchOperation()" << endl;
}
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/master_slave.h b/src/mongo/db/repl/master_slave.h
index 117e26d08a1..74e509302f7 100644
--- a/src/mongo/db/repl/master_slave.h
+++ b/src/mongo/db/repl/master_slave.h
@@ -42,154 +42,165 @@
*/
namespace mongo {
- namespace threadpool {
- class ThreadPool;
- }
+namespace threadpool {
+class ThreadPool;
+}
- class Database;
- class OperationContext;
+class Database;
+class OperationContext;
namespace repl {
- // Main entry point for master/slave at startup time.
- void startMasterSlave(OperationContext* txn);
+// Main entry point for master/slave at startup time.
+void startMasterSlave(OperationContext* txn);
+
+// externed for use with resync.cpp
+extern volatile int relinquishSyncingSome;
+extern volatile int syncing;
+
+extern const char* replInfo;
+
+/* A replication exception */
+class SyncException : public DBException {
+public:
+ SyncException() : DBException("sync exception", 10001) {}
+};
+
+/* A Source is a source from which we can pull (replicate) data.
+ stored in collection local.sources.
- // externed for use with resync.cpp
- extern volatile int relinquishSyncingSome;
- extern volatile int syncing;
+ Can be a group of things to replicate for several databases.
- extern const char *replInfo;
+ { host: ..., source: ..., only: ..., syncedTo: ..., dbsNextPass: { ... }, incompleteCloneDbs: { ... } }
- /* A replication exception */
- class SyncException : public DBException {
- public:
- SyncException() : DBException( "sync exception" , 10001 ) {}
- };
+ 'source' defaults to 'main'; support for multiple source names is
+ not done (always use main for now).
+*/
+class ReplSource {
+ boost::shared_ptr<threadpool::ThreadPool> tp;
- /* A Source is a source from which we can pull (replicate) data.
- stored in collection local.sources.
+ void resync(OperationContext* txn, const std::string& dbName);
- Can be a group of things to replicate for several databases.
+ /** @param alreadyLocked caller already put us in write lock if true */
+ void _sync_pullOpLog_applyOperation(OperationContext* txn, BSONObj& op, bool alreadyLocked);
- { host: ..., source: ..., only: ..., syncedTo: ..., dbsNextPass: { ... }, incompleteCloneDbs: { ... } }
+ /* pull some operations from the master's oplog, and apply them.
+ calls sync_pullOpLog_applyOperation
+ */
+ int _sync_pullOpLog(OperationContext* txn, int& nApplied);
- 'source' defaults to 'main'; support for multiple source names is
- not done (always use main for now).
+ /* we only clone one database per pass, even if a lot need done. This helps us
+ avoid overflowing the master's transaction log by doing too much work before going
+ back to read more transactions. (Imagine a scenario of slave startup where we try to
+ clone 100 databases in one pass.)
*/
- class ReplSource {
- boost::shared_ptr<threadpool::ThreadPool> tp;
-
- void resync(OperationContext* txn, const std::string& dbName);
-
- /** @param alreadyLocked caller already put us in write lock if true */
- void _sync_pullOpLog_applyOperation(OperationContext* txn, BSONObj& op, bool alreadyLocked);
-
- /* pull some operations from the master's oplog, and apply them.
- calls sync_pullOpLog_applyOperation
- */
- int _sync_pullOpLog(OperationContext* txn, int& nApplied);
-
- /* we only clone one database per pass, even if a lot need done. This helps us
- avoid overflowing the master's transaction log by doing too much work before going
- back to read more transactions. (Imagine a scenario of slave startup where we try to
- clone 100 databases in one pass.)
- */
- std::set<std::string> addDbNextPass;
-
- std::set<std::string> incompleteCloneDbs;
-
- /// TODO(spencer): Remove this once the LegacyReplicationCoordinator is gone.
- BSONObj _me;
-
- void resyncDrop( OperationContext* txn, const std::string& db );
- // call without the db mutex
- void syncToTailOfRemoteLog();
- std::string ns() const { return std::string( "local.oplog.$" ) + sourceName(); }
- unsigned _sleepAdviceTime;
-
- /**
- * If 'db' is a new database and its name would conflict with that of
- * an existing database, synchronize these database names with the
- * master.
- * @return true iff an op with the specified ns may be applied.
- */
- bool handleDuplicateDbName( OperationContext* txn,
- const BSONObj &op,
- const char* ns,
- const char* db );
-
- // populates _me so that it can be passed to oplogreader for handshakes
- /// TODO(spencer): Remove this function once the LegacyReplicationCoordinator is gone.
- void ensureMe(OperationContext* txn);
-
- void forceResync(OperationContext* txn, const char *requester);
-
- bool _connect(OplogReader* reader, const HostAndPort& host, const OID& myRID);
- public:
- OplogReader oplogReader;
-
- void applyOperation(OperationContext* txn, Database* db, const BSONObj& op);
- std::string hostName; // ip addr or hostname plus optionally, ":<port>"
- std::string _sourceName; // a logical source name.
- std::string sourceName() const { return _sourceName.empty() ? "main" : _sourceName; }
- std::string only; // only a certain db. note that in the sources collection, this may not be changed once you start replicating.
-
- /* the last time point we have already synced up to (in the remote/master's oplog). */
- OpTime syncedTo;
-
- int nClonedThisPass;
-
- typedef std::vector< boost::shared_ptr< ReplSource > > SourceVector;
- static void loadAll(OperationContext* txn, SourceVector&);
-
- explicit ReplSource(OperationContext* txn, BSONObj);
- // This is not the constructor you are looking for. Always prefer the version that takes
- // a BSONObj. This is public only as a hack so that the ReplicationCoordinator can find
- // out the process's RID in master/slave setups.
- ReplSource(OperationContext* txn);
-
- /* -1 = error */
- int sync(OperationContext* txn, int& nApplied);
-
- void save(OperationContext* txn); // write ourself to local.sources
-
- // make a jsobj from our member fields of the form
- // { host: ..., source: ..., syncedTo: ... }
- BSONObj jsobj();
-
- bool operator==(const ReplSource&r) const {
- return hostName == r.hostName && sourceName() == r.sourceName();
- }
- std::string toString() const { return sourceName() + "@" + hostName; }
-
- bool haveMoreDbsToSync() const { return !addDbNextPass.empty(); }
- int sleepAdvice() const {
- if ( !_sleepAdviceTime )
- return 0;
- int wait = _sleepAdviceTime - unsigned( time( 0 ) );
- return wait > 0 ? wait : 0;
- }
-
- static bool throttledForceResyncDead( OperationContext* txn, const char *requester );
- static void forceResyncDead( OperationContext* txn, const char *requester );
- };
+ std::set<std::string> addDbNextPass;
+
+ std::set<std::string> incompleteCloneDbs;
+
+ /// TODO(spencer): Remove this once the LegacyReplicationCoordinator is gone.
+ BSONObj _me;
+
+ void resyncDrop(OperationContext* txn, const std::string& db);
+ // call without the db mutex
+ void syncToTailOfRemoteLog();
+ std::string ns() const {
+ return std::string("local.oplog.$") + sourceName();
+ }
+ unsigned _sleepAdviceTime;
/**
- * Helper class used to set and query an ignore state for a named database.
- * The ignore state will expire after a specified OpTime.
+ * If 'db' is a new database and its name would conflict with that of
+ * an existing database, synchronize these database names with the
+ * master.
+ * @return true iff an op with the specified ns may be applied.
*/
- class DatabaseIgnorer {
- public:
- /** Indicate that operations for 'db' should be ignored until after 'futureOplogTime' */
- void doIgnoreUntilAfter( const std::string &db, const OpTime &futureOplogTime );
- /**
- * Query ignore state of 'db'; if 'currentOplogTime' is after the ignore
- * limit, the ignore state will be cleared.
- */
- bool ignoreAt( const std::string &db, const OpTime &currentOplogTime );
- private:
- std::map< std::string, OpTime > _ignores;
- };
-
-} // namespace repl
-} // namespace mongo
+ bool handleDuplicateDbName(OperationContext* txn,
+ const BSONObj& op,
+ const char* ns,
+ const char* db);
+
+ // populates _me so that it can be passed to oplogreader for handshakes
+ /// TODO(spencer): Remove this function once the LegacyReplicationCoordinator is gone.
+ void ensureMe(OperationContext* txn);
+
+ void forceResync(OperationContext* txn, const char* requester);
+
+ bool _connect(OplogReader* reader, const HostAndPort& host, const OID& myRID);
+
+public:
+ OplogReader oplogReader;
+
+ void applyOperation(OperationContext* txn, Database* db, const BSONObj& op);
+ std::string hostName; // ip addr or hostname plus optionally, ":<port>"
+ std::string _sourceName; // a logical source name.
+ std::string sourceName() const {
+ return _sourceName.empty() ? "main" : _sourceName;
+ }
+ std::string
+ only; // only a certain db. note that in the sources collection, this may not be changed once you start replicating.
+
+ /* the last time point we have already synced up to (in the remote/master's oplog). */
+ OpTime syncedTo;
+
+ int nClonedThisPass;
+
+ typedef std::vector<boost::shared_ptr<ReplSource>> SourceVector;
+ static void loadAll(OperationContext* txn, SourceVector&);
+
+ explicit ReplSource(OperationContext* txn, BSONObj);
+ // This is not the constructor you are looking for. Always prefer the version that takes
+ // a BSONObj. This is public only as a hack so that the ReplicationCoordinator can find
+ // out the process's RID in master/slave setups.
+ ReplSource(OperationContext* txn);
+
+ /* -1 = error */
+ int sync(OperationContext* txn, int& nApplied);
+
+ void save(OperationContext* txn); // write ourself to local.sources
+
+ // make a jsobj from our member fields of the form
+ // { host: ..., source: ..., syncedTo: ... }
+ BSONObj jsobj();
+
+ bool operator==(const ReplSource& r) const {
+ return hostName == r.hostName && sourceName() == r.sourceName();
+ }
+ std::string toString() const {
+ return sourceName() + "@" + hostName;
+ }
+
+ bool haveMoreDbsToSync() const {
+ return !addDbNextPass.empty();
+ }
+ int sleepAdvice() const {
+ if (!_sleepAdviceTime)
+ return 0;
+ int wait = _sleepAdviceTime - unsigned(time(0));
+ return wait > 0 ? wait : 0;
+ }
+
+ static bool throttledForceResyncDead(OperationContext* txn, const char* requester);
+ static void forceResyncDead(OperationContext* txn, const char* requester);
+};
+
+/**
+ * Helper class used to set and query an ignore state for a named database.
+ * The ignore state will expire after a specified OpTime.
+ */
+class DatabaseIgnorer {
+public:
+ /** Indicate that operations for 'db' should be ignored until after 'futureOplogTime' */
+ void doIgnoreUntilAfter(const std::string& db, const OpTime& futureOplogTime);
+ /**
+ * Query ignore state of 'db'; if 'currentOplogTime' is after the ignore
+ * limit, the ignore state will be cleared.
+ */
+ bool ignoreAt(const std::string& db, const OpTime& currentOplogTime);
+
+private:
+ std::map<std::string, OpTime> _ignores;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/member_config.cpp b/src/mongo/db/repl/member_config.cpp
index 6f3bcf40501..3828185094a 100644
--- a/src/mongo/db/repl/member_config.cpp
+++ b/src/mongo/db/repl/member_config.cpp
@@ -40,280 +40,267 @@
namespace mongo {
namespace repl {
- const std::string MemberConfig::kIdFieldName = "_id";
- const std::string MemberConfig::kVotesFieldName = "votes";
- const std::string MemberConfig::kPriorityFieldName = "priority";
- const std::string MemberConfig::kHostFieldName = "host";
- const std::string MemberConfig::kHiddenFieldName = "hidden";
- const std::string MemberConfig::kSlaveDelayFieldName = "slaveDelay";
- const std::string MemberConfig::kArbiterOnlyFieldName = "arbiterOnly";
- const std::string MemberConfig::kBuildIndexesFieldName = "buildIndexes";
- const std::string MemberConfig::kTagsFieldName = "tags";
- const std::string MemberConfig::kInternalVoterTagName = "$voter";
- const std::string MemberConfig::kInternalElectableTagName = "$electable";
- const std::string MemberConfig::kInternalAllTagName = "$all";
+const std::string MemberConfig::kIdFieldName = "_id";
+const std::string MemberConfig::kVotesFieldName = "votes";
+const std::string MemberConfig::kPriorityFieldName = "priority";
+const std::string MemberConfig::kHostFieldName = "host";
+const std::string MemberConfig::kHiddenFieldName = "hidden";
+const std::string MemberConfig::kSlaveDelayFieldName = "slaveDelay";
+const std::string MemberConfig::kArbiterOnlyFieldName = "arbiterOnly";
+const std::string MemberConfig::kBuildIndexesFieldName = "buildIndexes";
+const std::string MemberConfig::kTagsFieldName = "tags";
+const std::string MemberConfig::kInternalVoterTagName = "$voter";
+const std::string MemberConfig::kInternalElectableTagName = "$electable";
+const std::string MemberConfig::kInternalAllTagName = "$all";
namespace {
- const std::string kLegalMemberConfigFieldNames[] = {
- MemberConfig::kIdFieldName,
- MemberConfig::kVotesFieldName,
- MemberConfig::kPriorityFieldName,
- MemberConfig::kHostFieldName,
- MemberConfig::kHiddenFieldName,
- MemberConfig::kSlaveDelayFieldName,
- MemberConfig::kArbiterOnlyFieldName,
- MemberConfig::kBuildIndexesFieldName,
- MemberConfig::kTagsFieldName
- };
-
- const int kVotesFieldDefault = 1;
- const double kPriorityFieldDefault = 1.0;
- const Seconds kSlaveDelayFieldDefault(0);
- const bool kArbiterOnlyFieldDefault = false;
- const bool kHiddenFieldDefault = false;
- const bool kBuildIndexesFieldDefault = true;
-
- const Seconds kMaxSlaveDelay(3600 * 24 * 366);
+const std::string kLegalMemberConfigFieldNames[] = {MemberConfig::kIdFieldName,
+ MemberConfig::kVotesFieldName,
+ MemberConfig::kPriorityFieldName,
+ MemberConfig::kHostFieldName,
+ MemberConfig::kHiddenFieldName,
+ MemberConfig::kSlaveDelayFieldName,
+ MemberConfig::kArbiterOnlyFieldName,
+ MemberConfig::kBuildIndexesFieldName,
+ MemberConfig::kTagsFieldName};
+
+const int kVotesFieldDefault = 1;
+const double kPriorityFieldDefault = 1.0;
+const Seconds kSlaveDelayFieldDefault(0);
+const bool kArbiterOnlyFieldDefault = false;
+const bool kHiddenFieldDefault = false;
+const bool kBuildIndexesFieldDefault = true;
+
+const Seconds kMaxSlaveDelay(3600 * 24 * 366);
} // namespace
- Status MemberConfig::initialize(const BSONObj& mcfg, ReplicaSetTagConfig* tagConfig) {
- Status status = bsonCheckOnlyHasFields(
- "replica set member configuration", mcfg, kLegalMemberConfigFieldNames);
- if (!status.isOK())
- return status;
-
- //
- // Parse _id field.
- //
- BSONElement idElement = mcfg[kIdFieldName];
- if (idElement.eoo()) {
- return Status(ErrorCodes::NoSuchKey, str::stream() << kIdFieldName <<
- " field is missing");
- }
- if (!idElement.isNumber()) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << kIdFieldName <<
- " field has non-numeric type " << typeName(idElement.type()));
- }
- _id = idElement.numberInt();
-
- //
- // Parse h field.
- //
- std::string hostAndPortString;
- status = bsonExtractStringField(mcfg, kHostFieldName, &hostAndPortString);
- if (!status.isOK())
- return status;
- boost::trim(hostAndPortString);
- status = _host.initialize(hostAndPortString);
- if (!status.isOK())
- return status;
- if (!_host.hasPort()) {
- // make port explicit even if default.
- _host = HostAndPort(_host.host(), _host.port());
- }
+Status MemberConfig::initialize(const BSONObj& mcfg, ReplicaSetTagConfig* tagConfig) {
+ Status status = bsonCheckOnlyHasFields(
+ "replica set member configuration", mcfg, kLegalMemberConfigFieldNames);
+ if (!status.isOK())
+ return status;
- //
- // Parse votes field.
- //
- BSONElement votesElement = mcfg[kVotesFieldName];
- if (votesElement.eoo()) {
- _votes = kVotesFieldDefault;
- }
- else if (votesElement.isNumber()) {
- _votes = votesElement.numberInt();
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << kVotesFieldName <<
- " field value has non-numeric type " <<
- typeName(votesElement.type()));
- }
+ //
+ // Parse _id field.
+ //
+ BSONElement idElement = mcfg[kIdFieldName];
+ if (idElement.eoo()) {
+ return Status(ErrorCodes::NoSuchKey, str::stream() << kIdFieldName << " field is missing");
+ }
+ if (!idElement.isNumber()) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << kIdFieldName << " field has non-numeric type "
+ << typeName(idElement.type()));
+ }
+ _id = idElement.numberInt();
- //
- // Parse priority field.
- //
- BSONElement priorityElement = mcfg[kPriorityFieldName];
- if (priorityElement.eoo()) {
- _priority = kPriorityFieldDefault;
- }
- else if (priorityElement.isNumber()) {
- _priority = priorityElement.numberDouble();
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << kPriorityFieldName <<
- " field has non-numeric type " << typeName(priorityElement.type()));
- }
+ //
+ // Parse h field.
+ //
+ std::string hostAndPortString;
+ status = bsonExtractStringField(mcfg, kHostFieldName, &hostAndPortString);
+ if (!status.isOK())
+ return status;
+ boost::trim(hostAndPortString);
+ status = _host.initialize(hostAndPortString);
+ if (!status.isOK())
+ return status;
+ if (!_host.hasPort()) {
+ // make port explicit even if default.
+ _host = HostAndPort(_host.host(), _host.port());
+ }
- //
- // Parse arbiterOnly field.
- //
- status = bsonExtractBooleanFieldWithDefault(mcfg,
- kArbiterOnlyFieldName,
- kArbiterOnlyFieldDefault,
- &_arbiterOnly);
- if (!status.isOK())
- return status;
-
- //
- // Parse slaveDelay field.
- //
- BSONElement slaveDelayElement = mcfg[kSlaveDelayFieldName];
- if (slaveDelayElement.eoo()) {
- _slaveDelay = kSlaveDelayFieldDefault;
- }
- else if (slaveDelayElement.isNumber()) {
- _slaveDelay = Seconds(slaveDelayElement.numberInt());
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << kSlaveDelayFieldName <<
- " field value has non-numeric type " <<
- typeName(slaveDelayElement.type()));
- }
+ //
+ // Parse votes field.
+ //
+ BSONElement votesElement = mcfg[kVotesFieldName];
+ if (votesElement.eoo()) {
+ _votes = kVotesFieldDefault;
+ } else if (votesElement.isNumber()) {
+ _votes = votesElement.numberInt();
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << kVotesFieldName << " field value has non-numeric type "
+ << typeName(votesElement.type()));
+ }
+
+ //
+ // Parse priority field.
+ //
+ BSONElement priorityElement = mcfg[kPriorityFieldName];
+ if (priorityElement.eoo()) {
+ _priority = kPriorityFieldDefault;
+ } else if (priorityElement.isNumber()) {
+ _priority = priorityElement.numberDouble();
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << kPriorityFieldName << " field has non-numeric type "
+ << typeName(priorityElement.type()));
+ }
+
+ //
+ // Parse arbiterOnly field.
+ //
+ status = bsonExtractBooleanFieldWithDefault(
+ mcfg, kArbiterOnlyFieldName, kArbiterOnlyFieldDefault, &_arbiterOnly);
+ if (!status.isOK())
+ return status;
- //
- // Parse hidden field.
- //
- status = bsonExtractBooleanFieldWithDefault(mcfg,
- kHiddenFieldName,
- kHiddenFieldDefault,
- &_hidden);
- if (!status.isOK())
- return status;
-
- //
- // Parse buildIndexes field.
- //
- status = bsonExtractBooleanFieldWithDefault(mcfg,
- kBuildIndexesFieldName,
- kBuildIndexesFieldDefault,
- &_buildIndexes);
- if (!status.isOK())
- return status;
-
- //
- // Parse "tags" field.
- //
- _tags.clear();
- BSONElement tagsElement;
- status = bsonExtractTypedField(mcfg, kTagsFieldName, Object, &tagsElement);
- if (status.isOK()) {
- for (BSONObj::iterator tagIter(tagsElement.Obj()); tagIter.more();) {
- const BSONElement& tag = tagIter.next();
- if (tag.type() != String) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "tags." <<
- tag.fieldName() << " field has non-string value of type " <<
- typeName(tag.type()));
- }
- _tags.push_back(tagConfig->makeTag(tag.fieldNameStringData(),
- tag.valueStringData()));
+ //
+ // Parse slaveDelay field.
+ //
+ BSONElement slaveDelayElement = mcfg[kSlaveDelayFieldName];
+ if (slaveDelayElement.eoo()) {
+ _slaveDelay = kSlaveDelayFieldDefault;
+ } else if (slaveDelayElement.isNumber()) {
+ _slaveDelay = Seconds(slaveDelayElement.numberInt());
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << kSlaveDelayFieldName << " field value has non-numeric type "
+ << typeName(slaveDelayElement.type()));
+ }
+
+ //
+ // Parse hidden field.
+ //
+ status =
+ bsonExtractBooleanFieldWithDefault(mcfg, kHiddenFieldName, kHiddenFieldDefault, &_hidden);
+ if (!status.isOK())
+ return status;
+
+ //
+ // Parse buildIndexes field.
+ //
+ status = bsonExtractBooleanFieldWithDefault(
+ mcfg, kBuildIndexesFieldName, kBuildIndexesFieldDefault, &_buildIndexes);
+ if (!status.isOK())
+ return status;
+
+ //
+ // Parse "tags" field.
+ //
+ _tags.clear();
+ BSONElement tagsElement;
+ status = bsonExtractTypedField(mcfg, kTagsFieldName, Object, &tagsElement);
+ if (status.isOK()) {
+ for (BSONObj::iterator tagIter(tagsElement.Obj()); tagIter.more();) {
+ const BSONElement& tag = tagIter.next();
+ if (tag.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "tags." << tag.fieldName()
+ << " field has non-string value of type "
+ << typeName(tag.type()));
}
+ _tags.push_back(tagConfig->makeTag(tag.fieldNameStringData(), tag.valueStringData()));
}
- else if (ErrorCodes::NoSuchKey != status) {
- return status;
- }
+ } else if (ErrorCodes::NoSuchKey != status) {
+ return status;
+ }
- //
- // Add internal tags based on other member properties.
- //
-
- // Add a voter tag if this non-arbiter member votes; use _id for uniquity.
- const std::string id = str::stream() << _id;
- if (isVoter() && !_arbiterOnly) {
- _tags.push_back(tagConfig->makeTag(kInternalVoterTagName, id));
- }
+ //
+ // Add internal tags based on other member properties.
+ //
- // Add an electable tag if this member is electable.
- if (isElectable()) {
- _tags.push_back(tagConfig->makeTag(kInternalElectableTagName, id));
- }
+ // Add a voter tag if this non-arbiter member votes; use _id for uniquity.
+ const std::string id = str::stream() << _id;
+ if (isVoter() && !_arbiterOnly) {
+ _tags.push_back(tagConfig->makeTag(kInternalVoterTagName, id));
+ }
- // Add a tag for generic counting of this node.
- if (!_arbiterOnly) {
- _tags.push_back(tagConfig->makeTag(kInternalAllTagName, id));
- }
+ // Add an electable tag if this member is electable.
+ if (isElectable()) {
+ _tags.push_back(tagConfig->makeTag(kInternalElectableTagName, id));
+ }
- return Status::OK();
+ // Add a tag for generic counting of this node.
+ if (!_arbiterOnly) {
+ _tags.push_back(tagConfig->makeTag(kInternalAllTagName, id));
}
- Status MemberConfig::validate() const {
- if (_id < 0 || _id > 255) {
- return Status(ErrorCodes::BadValue, str::stream() << kIdFieldName <<
- " field value of " << _id << " is out of range.");
- }
+ return Status::OK();
+}
- if (_priority < 0 || _priority > 1000) {
- return Status(ErrorCodes::BadValue, str::stream() << kPriorityFieldName <<
- " field value of " << _priority << " is out of range");
- }
- if (_votes != 0 && _votes != 1) {
- return Status(ErrorCodes::BadValue, str::stream() << kVotesFieldName <<
- " field value is " << _votes << " but must be 0 or 1");
- }
- if (_arbiterOnly) {
- if (!_tags.empty()) {
- return Status(ErrorCodes::BadValue, "Cannot set tags on arbiters.");
- }
- if (!isVoter()) {
- return Status(ErrorCodes::BadValue, "Arbiter must vote (cannot have 0 votes)");
- }
- }
- if (_slaveDelay < Seconds(0) || _slaveDelay > kMaxSlaveDelay) {
- return Status(ErrorCodes::BadValue, str::stream() << kSlaveDelayFieldName <<
- " field value of " << _slaveDelay.total_seconds() <<
- " seconds is out of range");
- }
- if (_slaveDelay > Seconds(0) && _priority != 0) {
- return Status(ErrorCodes::BadValue, "slaveDelay requires priority be zero");
- }
- if (_hidden && _priority != 0) {
- return Status(ErrorCodes::BadValue, "priority must be 0 when hidden=true");
+Status MemberConfig::validate() const {
+ if (_id < 0 || _id > 255) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kIdFieldName << " field value of " << _id
+ << " is out of range.");
+ }
+
+ if (_priority < 0 || _priority > 1000) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kPriorityFieldName << " field value of " << _priority
+ << " is out of range");
+ }
+ if (_votes != 0 && _votes != 1) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kVotesFieldName << " field value is " << _votes
+ << " but must be 0 or 1");
+ }
+ if (_arbiterOnly) {
+ if (!_tags.empty()) {
+ return Status(ErrorCodes::BadValue, "Cannot set tags on arbiters.");
}
- if (!_buildIndexes && _priority != 0) {
- return Status(ErrorCodes::BadValue, "priority must be 0 when buildIndexes=false");
+ if (!isVoter()) {
+ return Status(ErrorCodes::BadValue, "Arbiter must vote (cannot have 0 votes)");
}
- return Status::OK();
}
+ if (_slaveDelay < Seconds(0) || _slaveDelay > kMaxSlaveDelay) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kSlaveDelayFieldName << " field value of "
+ << _slaveDelay.total_seconds() << " seconds is out of range");
+ }
+ if (_slaveDelay > Seconds(0) && _priority != 0) {
+ return Status(ErrorCodes::BadValue, "slaveDelay requires priority be zero");
+ }
+ if (_hidden && _priority != 0) {
+ return Status(ErrorCodes::BadValue, "priority must be 0 when hidden=true");
+ }
+ if (!_buildIndexes && _priority != 0) {
+ return Status(ErrorCodes::BadValue, "priority must be 0 when buildIndexes=false");
+ }
+ return Status::OK();
+}
- bool MemberConfig::hasTags(const ReplicaSetTagConfig& tagConfig) const {
- for (std::vector<ReplicaSetTag>::const_iterator tag = _tags.begin();
- tag != _tags.end();
- tag++) {
- std::string tagKey = tagConfig.getTagKey(*tag);
- if (tagKey[0] == '$') {
- // Filter out internal tags
- continue;
- }
- return true;
+bool MemberConfig::hasTags(const ReplicaSetTagConfig& tagConfig) const {
+ for (std::vector<ReplicaSetTag>::const_iterator tag = _tags.begin(); tag != _tags.end();
+ tag++) {
+ std::string tagKey = tagConfig.getTagKey(*tag);
+ if (tagKey[0] == '$') {
+ // Filter out internal tags
+ continue;
}
- return false;
+ return true;
}
+ return false;
+}
- BSONObj MemberConfig::toBSON(const ReplicaSetTagConfig& tagConfig) const {
- BSONObjBuilder configBuilder;
- configBuilder.append("_id", _id);
- configBuilder.append("host", _host.toString());
- configBuilder.append("arbiterOnly", _arbiterOnly);
- configBuilder.append("buildIndexes", _buildIndexes);
- configBuilder.append("hidden", _hidden);
- configBuilder.append("priority", _priority);
-
- BSONObjBuilder tags(configBuilder.subobjStart("tags"));
- for (std::vector<ReplicaSetTag>::const_iterator tag = _tags.begin();
- tag != _tags.end();
- tag++) {
- std::string tagKey = tagConfig.getTagKey(*tag);
- if (tagKey[0] == '$') {
- // Filter out internal tags
- continue;
- }
- tags.append(tagKey, tagConfig.getTagValue(*tag));
- }
- tags.done();
+BSONObj MemberConfig::toBSON(const ReplicaSetTagConfig& tagConfig) const {
+ BSONObjBuilder configBuilder;
+ configBuilder.append("_id", _id);
+ configBuilder.append("host", _host.toString());
+ configBuilder.append("arbiterOnly", _arbiterOnly);
+ configBuilder.append("buildIndexes", _buildIndexes);
+ configBuilder.append("hidden", _hidden);
+ configBuilder.append("priority", _priority);
- configBuilder.append("slaveDelay", _slaveDelay.total_seconds());
- configBuilder.append("votes", getNumVotes());
- return configBuilder.obj();
+ BSONObjBuilder tags(configBuilder.subobjStart("tags"));
+ for (std::vector<ReplicaSetTag>::const_iterator tag = _tags.begin(); tag != _tags.end();
+ tag++) {
+ std::string tagKey = tagConfig.getTagKey(*tag);
+ if (tagKey[0] == '$') {
+ // Filter out internal tags
+ continue;
+ }
+ tags.append(tagKey, tagConfig.getTagValue(*tag));
}
+ tags.done();
+
+ configBuilder.append("slaveDelay", _slaveDelay.total_seconds());
+ configBuilder.append("votes", getNumVotes());
+ return configBuilder.obj();
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/member_config.h b/src/mongo/db/repl/member_config.h
index f980a8e2bc8..694a8941f8e 100644
--- a/src/mongo/db/repl/member_config.h
+++ b/src/mongo/db/repl/member_config.h
@@ -38,143 +38,168 @@
namespace mongo {
- class BSONObj;
+class BSONObj;
namespace repl {
+/**
+ * Representation of the configuration information about a particular member of a replica set.
+ */
+class MemberConfig {
+public:
+ typedef std::vector<ReplicaSetTag>::const_iterator TagIterator;
+
+ static const std::string kIdFieldName;
+ static const std::string kVotesFieldName;
+ static const std::string kPriorityFieldName;
+ static const std::string kHostFieldName;
+ static const std::string kHiddenFieldName;
+ static const std::string kSlaveDelayFieldName;
+ static const std::string kArbiterOnlyFieldName;
+ static const std::string kBuildIndexesFieldName;
+ static const std::string kTagsFieldName;
+ static const std::string kInternalVoterTagName;
+ static const std::string kInternalElectableTagName;
+ static const std::string kInternalAllTagName;
+
+ /**
+ * Default constructor, produces a MemberConfig in an undefined state.
+ * Must successfully call initialze() before calling validate() or the
+ * accessors.
+ */
+ MemberConfig() : _slaveDelay(0) {}
+
+ /**
+ * Initializes this MemberConfig from the contents of "mcfg".
+ *
+ * If "mcfg" describes any tags, builds ReplicaSetTags for this
+ * configuration using "tagConfig" as the tag's namespace. This may
+ * have the effect of altering "tagConfig" when "mcfg" describes a
+ * tag not previously added to "tagConfig".
+ */
+ Status initialize(const BSONObj& mcfg, ReplicaSetTagConfig* tagConfig);
+
+ /**
+ * Performs basic consistency checks on the member configuration.
+ */
+ Status validate() const;
+
+ /**
+ * Gets the identifier for this member, unique within a ReplicaSetConfig.
+ */
+ int getId() const {
+ return _id;
+ }
+
+ /**
+ * Gets the canonical name of this member, by which other members and clients
+ * will contact it.
+ */
+ const HostAndPort& getHostAndPort() const {
+ return _host;
+ }
+
+ /**
+ * Gets this member's priority. Higher means more likely to be elected
+ * primary.
+ */
+ double getPriority() const {
+ return _priority;
+ }
+
/**
- * Representation of the configuration information about a particular member of a replica set.
- */
- class MemberConfig {
- public:
- typedef std::vector<ReplicaSetTag>::const_iterator TagIterator;
-
- static const std::string kIdFieldName;
- static const std::string kVotesFieldName;
- static const std::string kPriorityFieldName;
- static const std::string kHostFieldName;
- static const std::string kHiddenFieldName;
- static const std::string kSlaveDelayFieldName;
- static const std::string kArbiterOnlyFieldName;
- static const std::string kBuildIndexesFieldName;
- static const std::string kTagsFieldName;
- static const std::string kInternalVoterTagName;
- static const std::string kInternalElectableTagName;
- static const std::string kInternalAllTagName;
-
- /**
- * Default constructor, produces a MemberConfig in an undefined state.
- * Must successfully call initialze() before calling validate() or the
- * accessors.
- */
- MemberConfig() : _slaveDelay(0) {}
-
- /**
- * Initializes this MemberConfig from the contents of "mcfg".
- *
- * If "mcfg" describes any tags, builds ReplicaSetTags for this
- * configuration using "tagConfig" as the tag's namespace. This may
- * have the effect of altering "tagConfig" when "mcfg" describes a
- * tag not previously added to "tagConfig".
- */
- Status initialize(const BSONObj& mcfg, ReplicaSetTagConfig* tagConfig);
-
- /**
- * Performs basic consistency checks on the member configuration.
- */
- Status validate() const;
-
- /**
- * Gets the identifier for this member, unique within a ReplicaSetConfig.
- */
- int getId() const { return _id; }
-
- /**
- * Gets the canonical name of this member, by which other members and clients
- * will contact it.
- */
- const HostAndPort& getHostAndPort() const { return _host; }
-
- /**
- * Gets this member's priority. Higher means more likely to be elected
- * primary.
- */
- double getPriority() const { return _priority; }
-
- /**
- * Gets the amount of time behind the primary that this member will atempt to
- * remain. Zero seconds means stay as caught up as possible.
- */
- Seconds getSlaveDelay() const { return _slaveDelay; }
-
- /**
- * Returns true if this member may vote in elections.
- */
- bool isVoter() const { return _votes != 0; }
-
- /**
- * Returns the number of votes that this member gets.
- */
- int getNumVotes() const { return isVoter() ? 1 : 0; }
-
- /**
- * Returns true if this member is an arbiter (is not data-bearing).
- */
- bool isArbiter() const { return _arbiterOnly; }
-
- /**
- * Returns true if this member is hidden (not reported by isMaster, not electable).
- */
- bool isHidden() const { return _hidden; }
-
- /**
- * Returns true if this member should build secondary indexes.
- */
- bool shouldBuildIndexes() const { return _buildIndexes; }
-
- /**
- * Gets the number of replica set tags, including internal '$' tags, for this member.
- */
- size_t getNumTags() const { return _tags.size(); }
-
- /**
- * Returns true if this MemberConfig has any non-internal tags, using "tagConfig" to
- * determine the internal property of the tags.
- */
- bool hasTags(const ReplicaSetTagConfig& tagConfig) const;
-
- /**
- * Gets a begin iterator over the tags for this member.
- */
- TagIterator tagsBegin() const { return _tags.begin(); }
-
- /**
- * Gets an end iterator over the tags for this member.
- */
- TagIterator tagsEnd() const { return _tags.end(); }
-
- /**
- * Returns true if this represents the configuration of an electable member.
- */
- bool isElectable() const { return !isArbiter() && getPriority() > 0; }
-
- /**
- * Returns the member config as a BSONObj, using "tagConfig" to generate the tag subdoc.
- */
- BSONObj toBSON(const ReplicaSetTagConfig& tagConfig) const;
-
- private:
-
- int _id;
- HostAndPort _host;
- double _priority; // 0 means can never be primary
- int _votes; // Can this member vote? Only 0 and 1 are valid. Default 1.
- bool _arbiterOnly;
- Seconds _slaveDelay;
- bool _hidden; // if set, don't advertise to drivers in isMaster.
- bool _buildIndexes; // if false, do not create any non-_id indexes
- std::vector<ReplicaSetTag> _tags; // tagging for data center, rack, etc.
- };
+ * Gets the amount of time behind the primary that this member will atempt to
+ * remain. Zero seconds means stay as caught up as possible.
+ */
+ Seconds getSlaveDelay() const {
+ return _slaveDelay;
+ }
+
+ /**
+ * Returns true if this member may vote in elections.
+ */
+ bool isVoter() const {
+ return _votes != 0;
+ }
+
+ /**
+ * Returns the number of votes that this member gets.
+ */
+ int getNumVotes() const {
+ return isVoter() ? 1 : 0;
+ }
+
+ /**
+ * Returns true if this member is an arbiter (is not data-bearing).
+ */
+ bool isArbiter() const {
+ return _arbiterOnly;
+ }
+
+ /**
+ * Returns true if this member is hidden (not reported by isMaster, not electable).
+ */
+ bool isHidden() const {
+ return _hidden;
+ }
+
+ /**
+ * Returns true if this member should build secondary indexes.
+ */
+ bool shouldBuildIndexes() const {
+ return _buildIndexes;
+ }
+
+ /**
+ * Gets the number of replica set tags, including internal '$' tags, for this member.
+ */
+ size_t getNumTags() const {
+ return _tags.size();
+ }
+
+ /**
+ * Returns true if this MemberConfig has any non-internal tags, using "tagConfig" to
+ * determine the internal property of the tags.
+ */
+ bool hasTags(const ReplicaSetTagConfig& tagConfig) const;
+
+ /**
+ * Gets a begin iterator over the tags for this member.
+ */
+ TagIterator tagsBegin() const {
+ return _tags.begin();
+ }
+
+ /**
+ * Gets an end iterator over the tags for this member.
+ */
+ TagIterator tagsEnd() const {
+ return _tags.end();
+ }
+
+ /**
+ * Returns true if this represents the configuration of an electable member.
+ */
+ bool isElectable() const {
+ return !isArbiter() && getPriority() > 0;
+ }
+
+ /**
+ * Returns the member config as a BSONObj, using "tagConfig" to generate the tag subdoc.
+ */
+ BSONObj toBSON(const ReplicaSetTagConfig& tagConfig) const;
+
+private:
+ int _id;
+ HostAndPort _host;
+ double _priority; // 0 means can never be primary
+ int _votes; // Can this member vote? Only 0 and 1 are valid. Default 1.
+ bool _arbiterOnly;
+ Seconds _slaveDelay;
+ bool _hidden; // if set, don't advertise to drivers in isMaster.
+ bool _buildIndexes; // if false, do not create any non-_id indexes
+ std::vector<ReplicaSetTag> _tags; // tagging for data center, rack, etc.
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/member_config_test.cpp b/src/mongo/db/repl/member_config_test.cpp
index 9d3a0f7b276..98e57e7d998 100644
--- a/src/mongo/db/repl/member_config_test.cpp
+++ b/src/mongo/db/repl/member_config_test.cpp
@@ -38,324 +38,417 @@ namespace mongo {
namespace repl {
namespace {
- TEST(MemberConfig, ParseMinimalMemberConfigAndCheckDefaults) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "localhost:12345"),
- &tagConfig));
- ASSERT_EQUALS(0, mc.getId());
- ASSERT_EQUALS(HostAndPort("localhost", 12345), mc.getHostAndPort());
- ASSERT_EQUALS(1.0, mc.getPriority());
- ASSERT_EQUALS(0, mc.getSlaveDelay().total_seconds());
- ASSERT_TRUE(mc.isVoter());
- ASSERT_FALSE(mc.isHidden());
- ASSERT_FALSE(mc.isArbiter());
- ASSERT_TRUE(mc.shouldBuildIndexes());
- ASSERT_EQUALS(3U, mc.getNumTags());
- ASSERT_OK(mc.validate());
- }
-
- TEST(MemberConfig, ParseFailsWithIllegalFieldName) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_EQUALS(ErrorCodes::BadValue,
- mc.initialize(BSON("_id" << 0 << "host" << "localhost" << "frim" << 1),
- &tagConfig));
- }
-
- TEST(MemberConfig, ParseFailsWithMissingIdField) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, mc.initialize(BSON("host" << "localhost:12345"),
- &tagConfig));
- }
-
- TEST(MemberConfig, ParseFailsWithBadIdField) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, mc.initialize(BSON("host" << "localhost:12345"),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch,
- mc.initialize(BSON("_id" << "0" << "host" << "localhost:12345"),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch,
- mc.initialize(BSON("_id" << Date_t(0) << "host" << "localhost:12345"),
- &tagConfig));
- }
-
- TEST(MemberConfig, ParseFailsWithMissingHostField) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, mc.initialize(BSON("_id" << 0), &tagConfig));
- }
-
-
- TEST(MemberConfig, ParseFailsWithBadHostField) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, mc.initialize(BSON("_id" << 0 << "host" << 0),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::FailedToParse, mc.initialize(BSON("_id" << 0 << "host" << ""),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::FailedToParse,
- mc.initialize(BSON("_id" << 0 << "host" << "myhost:zabc"), &tagConfig));
- }
-
- TEST(MemberConfig, ParseArbiterOnly) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "arbiterOnly" << 1.0),
- &tagConfig));
- ASSERT_TRUE(mc.isArbiter());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "arbiterOnly" << false),
- &tagConfig));
- ASSERT_TRUE(!mc.isArbiter());
- }
-
- TEST(MemberConfig, ParseHidden) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "hidden" << 1.0),
- &tagConfig));
- ASSERT_TRUE(mc.isHidden());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "hidden" << false),
- &tagConfig));
- ASSERT_TRUE(!mc.isHidden());
- ASSERT_EQUALS(ErrorCodes::TypeMismatch,
- mc.initialize(BSON("_id" << 0 << "host" << "h" << "hidden" << "1.0"),
- &tagConfig));
- }
-
- TEST(MemberConfig, ParseBuildIndexes) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "buildIndexes" << 1.0),
- &tagConfig));
- ASSERT_TRUE(mc.shouldBuildIndexes());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "buildIndexes" << false),
- &tagConfig));
- ASSERT_TRUE(!mc.shouldBuildIndexes());
- }
-
- TEST(MemberConfig, ParseVotes) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 1.0),
- &tagConfig));
- ASSERT_TRUE(mc.isVoter());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 0),
- &tagConfig));
- ASSERT_FALSE(mc.isVoter());
-
- // For backwards compatibility, truncate 1.X to 1, and 0.X to 0 (and -0.X to 0).
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 1.5),
- &tagConfig));
- ASSERT_TRUE(mc.isVoter());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 0.5),
- &tagConfig));
- ASSERT_FALSE(mc.isVoter());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << -0.5),
- &tagConfig));
- ASSERT_FALSE(mc.isVoter());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 2),
- &tagConfig));
-
- ASSERT_EQUALS(ErrorCodes::TypeMismatch,
- mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << Date_t(2)),
- &tagConfig));
- }
-
- TEST(MemberConfig, ParsePriority) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1),
- &tagConfig));
- ASSERT_EQUALS(1.0, mc.getPriority());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 0),
- &tagConfig));
- ASSERT_EQUALS(0.0, mc.getPriority());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 100.8),
- &tagConfig));
- ASSERT_EQUALS(100.8, mc.getPriority());
-
- ASSERT_EQUALS(ErrorCodes::TypeMismatch,
- mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << Date_t(2)),
- &tagConfig));
- }
-
- TEST(MemberConfig, ParseSlaveDelay) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "slaveDelay" << 100),
- &tagConfig));
- ASSERT_EQUALS(100, mc.getSlaveDelay().total_seconds());
- }
-
- TEST(MemberConfig, ParseTags) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" <<
- "tags" << BSON("k1" << "v1" << "k2" << "v2")),
- &tagConfig));
- ASSERT_EQUALS(5U, mc.getNumTags());
- ASSERT_EQUALS(5, std::distance(mc.tagsBegin(), mc.tagsEnd()));
- ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("k1", "v1")));
- ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("k2", "v2")));
- ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$voter",
- "0")));
- ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$electable",
- "0")));
- ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$all",
- "0")));
- }
-
- TEST(MemberConfig, ValidateFailsWithIdOutOfRange) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << -1 << "host" << "localhost:12345"),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 256 << "host" << "localhost:12345"),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- }
-
- TEST(MemberConfig, ValidateVotes) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 1.0),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_TRUE(mc.isVoter());
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 0),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_FALSE(mc.isVoter());
-
- // For backwards compatibility, truncate 1.X to 1, and 0.X to 0 (and -0.X to 0).
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 1.5),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_TRUE(mc.isVoter());
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 0.5),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_FALSE(mc.isVoter());
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << -0.5),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_FALSE(mc.isVoter());
-
- // Invalid values
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << 2),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "votes" << -1),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- }
-
- TEST(MemberConfig, ValidatePriorityRanges) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 0),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1000),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << -1),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1001),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- }
-
- TEST(MemberConfig, ValidateSlaveDelays) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 0 <<
- "slaveDelay" << 0),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 0 <<
- "slaveDelay" << 3600 * 10),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 0 <<
- "slaveDelay" << -1),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 0 <<
- "slaveDelay" << 3600 * 24 * 400),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- }
-
- TEST(MemberConfig, ValidatePriorityAndSlaveDelayRelationship) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1 <<
- "slaveDelay" << 60),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- }
-
- TEST(MemberConfig, ValidatePriorityAndHiddenRelationship) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1 <<
- "hidden" << true),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1 <<
- "hidden" << false),
- &tagConfig));
- ASSERT_OK(mc.validate());
- }
-
- TEST(MemberConfig, ValidatePriorityAndBuildIndexesRelationship) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1 <<
- "buildIndexes" << false),
- &tagConfig));
-
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" << "priority" << 1 <<
- "buildIndexes" << true),
- &tagConfig));
- ASSERT_OK(mc.validate());
- }
-
- TEST(MemberConfig, ValidateArbiterVotesRelationship) {
- ReplicaSetTagConfig tagConfig;
- MemberConfig mc;
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" <<
- "votes" << 1 << "arbiterOnly" << true),
- &tagConfig));
- ASSERT_OK(mc.validate());
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" <<
- "votes" << 0 << "arbiterOnly" << false),
- &tagConfig));
- ASSERT_OK(mc.validate());
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" <<
- "votes" << 1 << "arbiterOnly" << false),
- &tagConfig));
- ASSERT_OK(mc.validate());
-
- ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host" << "h" <<
- "votes" << 0 << "arbiterOnly" << true),
- &tagConfig));
- ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
- }
+TEST(MemberConfig, ParseMinimalMemberConfigAndCheckDefaults) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "localhost:12345"),
+ &tagConfig));
+ ASSERT_EQUALS(0, mc.getId());
+ ASSERT_EQUALS(HostAndPort("localhost", 12345), mc.getHostAndPort());
+ ASSERT_EQUALS(1.0, mc.getPriority());
+ ASSERT_EQUALS(0, mc.getSlaveDelay().total_seconds());
+ ASSERT_TRUE(mc.isVoter());
+ ASSERT_FALSE(mc.isHidden());
+ ASSERT_FALSE(mc.isArbiter());
+ ASSERT_TRUE(mc.shouldBuildIndexes());
+ ASSERT_EQUALS(3U, mc.getNumTags());
+ ASSERT_OK(mc.validate());
+}
+
+TEST(MemberConfig, ParseFailsWithIllegalFieldName) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_EQUALS(ErrorCodes::BadValue,
+ mc.initialize(BSON("_id" << 0 << "host"
+ << "localhost"
+ << "frim" << 1),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParseFailsWithMissingIdField) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey,
+ mc.initialize(BSON("host"
+ << "localhost:12345"),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParseFailsWithBadIdField) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey,
+ mc.initialize(BSON("host"
+ << "localhost:12345"),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ mc.initialize(BSON("_id"
+ << "0"
+ << "host"
+ << "localhost:12345"),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ mc.initialize(BSON("_id" << Date_t(0) << "host"
+ << "localhost:12345"),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParseFailsWithMissingHostField) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, mc.initialize(BSON("_id" << 0), &tagConfig));
+}
+
+
+TEST(MemberConfig, ParseFailsWithBadHostField) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ mc.initialize(BSON("_id" << 0 << "host" << 0), &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::FailedToParse,
+ mc.initialize(BSON("_id" << 0 << "host"
+ << ""),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::FailedToParse,
+ mc.initialize(BSON("_id" << 0 << "host"
+ << "myhost:zabc"),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParseArbiterOnly) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "arbiterOnly" << 1.0),
+ &tagConfig));
+ ASSERT_TRUE(mc.isArbiter());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "arbiterOnly" << false),
+ &tagConfig));
+ ASSERT_TRUE(!mc.isArbiter());
+}
+
+TEST(MemberConfig, ParseHidden) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "hidden" << 1.0),
+ &tagConfig));
+ ASSERT_TRUE(mc.isHidden());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "hidden" << false),
+ &tagConfig));
+ ASSERT_TRUE(!mc.isHidden());
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "hidden"
+ << "1.0"),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParseBuildIndexes) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "buildIndexes" << 1.0),
+ &tagConfig));
+ ASSERT_TRUE(mc.shouldBuildIndexes());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "buildIndexes" << false),
+ &tagConfig));
+ ASSERT_TRUE(!mc.shouldBuildIndexes());
+}
+
+TEST(MemberConfig, ParseVotes) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 1.0),
+ &tagConfig));
+ ASSERT_TRUE(mc.isVoter());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 0),
+ &tagConfig));
+ ASSERT_FALSE(mc.isVoter());
+
+ // For backwards compatibility, truncate 1.X to 1, and 0.X to 0 (and -0.X to 0).
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 1.5),
+ &tagConfig));
+ ASSERT_TRUE(mc.isVoter());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 0.5),
+ &tagConfig));
+ ASSERT_FALSE(mc.isVoter());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << -0.5),
+ &tagConfig));
+ ASSERT_FALSE(mc.isVoter());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 2),
+ &tagConfig));
+
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << Date_t(2)),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParsePriority) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1),
+ &tagConfig));
+ ASSERT_EQUALS(1.0, mc.getPriority());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 0),
+ &tagConfig));
+ ASSERT_EQUALS(0.0, mc.getPriority());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 100.8),
+ &tagConfig));
+ ASSERT_EQUALS(100.8, mc.getPriority());
+
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << Date_t(2)),
+ &tagConfig));
+}
+
+TEST(MemberConfig, ParseSlaveDelay) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "slaveDelay" << 100),
+ &tagConfig));
+ ASSERT_EQUALS(100, mc.getSlaveDelay().total_seconds());
+}
+
+TEST(MemberConfig, ParseTags) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "tags" << BSON("k1"
+ << "v1"
+ << "k2"
+ << "v2")),
+ &tagConfig));
+ ASSERT_EQUALS(5U, mc.getNumTags());
+ ASSERT_EQUALS(5, std::distance(mc.tagsBegin(), mc.tagsEnd()));
+ ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("k1", "v1")));
+ ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("k2", "v2")));
+ ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$voter", "0")));
+ ASSERT_EQUALS(1,
+ std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$electable", "0")));
+ ASSERT_EQUALS(1, std::count(mc.tagsBegin(), mc.tagsEnd(), tagConfig.findTag("$all", "0")));
+}
+
+TEST(MemberConfig, ValidateFailsWithIdOutOfRange) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << -1 << "host"
+ << "localhost:12345"),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 256 << "host"
+ << "localhost:12345"),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+}
+
+TEST(MemberConfig, ValidateVotes) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 1.0),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_TRUE(mc.isVoter());
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 0),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_FALSE(mc.isVoter());
+
+ // For backwards compatibility, truncate 1.X to 1, and 0.X to 0 (and -0.X to 0).
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 1.5),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_TRUE(mc.isVoter());
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 0.5),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_FALSE(mc.isVoter());
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << -0.5),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_FALSE(mc.isVoter());
+
+ // Invalid values
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 2),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << -1),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+}
+
+TEST(MemberConfig, ValidatePriorityRanges) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 0),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1000),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << -1),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1001),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+}
+
+TEST(MemberConfig, ValidateSlaveDelays) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 0 << "slaveDelay" << 0),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 0 << "slaveDelay" << 3600 * 10),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 0 << "slaveDelay" << -1),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 0 << "slaveDelay" << 3600 * 24 * 400),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+}
+
+TEST(MemberConfig, ValidatePriorityAndSlaveDelayRelationship) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1 << "slaveDelay" << 60),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+}
+
+TEST(MemberConfig, ValidatePriorityAndHiddenRelationship) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1 << "hidden" << true),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1 << "hidden" << false),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+}
+
+TEST(MemberConfig, ValidatePriorityAndBuildIndexesRelationship) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1 << "buildIndexes" << false),
+ &tagConfig));
+
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "priority" << 1 << "buildIndexes" << true),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+}
+
+TEST(MemberConfig, ValidateArbiterVotesRelationship) {
+ ReplicaSetTagConfig tagConfig;
+ MemberConfig mc;
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 1 << "arbiterOnly" << true),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 0 << "arbiterOnly" << false),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 1 << "arbiterOnly" << false),
+ &tagConfig));
+ ASSERT_OK(mc.validate());
+
+ ASSERT_OK(mc.initialize(BSON("_id" << 0 << "host"
+ << "h"
+ << "votes" << 0 << "arbiterOnly" << true),
+ &tagConfig));
+ ASSERT_EQUALS(ErrorCodes::BadValue, mc.validate());
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/member_heartbeat_data.cpp b/src/mongo/db/repl/member_heartbeat_data.cpp
index 8ca22c40649..ff1d5882be6 100644
--- a/src/mongo/db/repl/member_heartbeat_data.cpp
+++ b/src/mongo/db/repl/member_heartbeat_data.cpp
@@ -39,74 +39,68 @@
namespace mongo {
namespace repl {
- MemberHeartbeatData::MemberHeartbeatData() :
- _health(-1),
- _upSince(0),
- _lastHeartbeat(0),
- _lastHeartbeatRecv(0),
- _authIssue(false) {
-
- _lastResponse.setState(MemberState::RS_UNKNOWN);
- _lastResponse.setElectionTime(OpTime());
- _lastResponse.setOpTime(OpTime());
+MemberHeartbeatData::MemberHeartbeatData()
+ : _health(-1), _upSince(0), _lastHeartbeat(0), _lastHeartbeatRecv(0), _authIssue(false) {
+ _lastResponse.setState(MemberState::RS_UNKNOWN);
+ _lastResponse.setElectionTime(OpTime());
+ _lastResponse.setOpTime(OpTime());
+}
+
+void MemberHeartbeatData::setUpValues(Date_t now,
+ const HostAndPort& host,
+ ReplSetHeartbeatResponse hbResponse) {
+ _health = 1;
+ if (_upSince == 0) {
+ _upSince = now;
}
-
- void MemberHeartbeatData::setUpValues(Date_t now,
- const HostAndPort& host,
- ReplSetHeartbeatResponse hbResponse) {
- _health = 1;
- if (_upSince == 0) {
- _upSince = now;
- }
- _authIssue = false;
- _lastHeartbeat = now;
- if (!hbResponse.hasState()) {
- hbResponse.setState(MemberState::RS_UNKNOWN);
- }
- if (!hbResponse.hasElectionTime()) {
- hbResponse.setElectionTime(_lastResponse.getElectionTime());
- }
- if (!hbResponse.hasOpTime()) {
- hbResponse.setOpTime(_lastResponse.getOpTime());
- }
-
- // Log if the state changes
- if (_lastResponse.getState() != hbResponse.getState()){
- log() << "Member " << host.toString() << " is now in state "
- << hbResponse.getState().toString() << rsLog;
- }
-
- _lastResponse = hbResponse;
+ _authIssue = false;
+ _lastHeartbeat = now;
+ if (!hbResponse.hasState()) {
+ hbResponse.setState(MemberState::RS_UNKNOWN);
}
-
- void MemberHeartbeatData::setDownValues(Date_t now, const std::string& heartbeatMessage) {
-
- _health = 0;
- _upSince = 0;
- _lastHeartbeat = now;
- _authIssue = false;
-
- _lastResponse = ReplSetHeartbeatResponse();
- _lastResponse.setState(MemberState::RS_DOWN);
- _lastResponse.setElectionTime(OpTime());
- _lastResponse.setOpTime(OpTime());
- _lastResponse.setHbMsg(heartbeatMessage);
- _lastResponse.setSyncingTo("");
+ if (!hbResponse.hasElectionTime()) {
+ hbResponse.setElectionTime(_lastResponse.getElectionTime());
+ }
+ if (!hbResponse.hasOpTime()) {
+ hbResponse.setOpTime(_lastResponse.getOpTime());
}
- void MemberHeartbeatData::setAuthIssue(Date_t now) {
- _health = 0; // set health to 0 so that this doesn't count towards majority.
- _upSince = 0;
- _lastHeartbeat = now;
- _authIssue = true;
-
- _lastResponse = ReplSetHeartbeatResponse();
- _lastResponse.setState(MemberState::RS_UNKNOWN);
- _lastResponse.setElectionTime(OpTime());
- _lastResponse.setOpTime(OpTime());
- _lastResponse.setHbMsg("");
- _lastResponse.setSyncingTo("");
+ // Log if the state changes
+ if (_lastResponse.getState() != hbResponse.getState()) {
+ log() << "Member " << host.toString() << " is now in state "
+ << hbResponse.getState().toString() << rsLog;
}
-} // namespace repl
-} // namespace mongo
+ _lastResponse = hbResponse;
+}
+
+void MemberHeartbeatData::setDownValues(Date_t now, const std::string& heartbeatMessage) {
+ _health = 0;
+ _upSince = 0;
+ _lastHeartbeat = now;
+ _authIssue = false;
+
+ _lastResponse = ReplSetHeartbeatResponse();
+ _lastResponse.setState(MemberState::RS_DOWN);
+ _lastResponse.setElectionTime(OpTime());
+ _lastResponse.setOpTime(OpTime());
+ _lastResponse.setHbMsg(heartbeatMessage);
+ _lastResponse.setSyncingTo("");
+}
+
+void MemberHeartbeatData::setAuthIssue(Date_t now) {
+ _health = 0; // set health to 0 so that this doesn't count towards majority.
+ _upSince = 0;
+ _lastHeartbeat = now;
+ _authIssue = true;
+
+ _lastResponse = ReplSetHeartbeatResponse();
+ _lastResponse.setState(MemberState::RS_UNKNOWN);
+ _lastResponse.setElectionTime(OpTime());
+ _lastResponse.setOpTime(OpTime());
+ _lastResponse.setHbMsg("");
+ _lastResponse.setSyncingTo("");
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/member_heartbeat_data.h b/src/mongo/db/repl/member_heartbeat_data.h
index 624c572d33f..c54ffb02385 100644
--- a/src/mongo/db/repl/member_heartbeat_data.h
+++ b/src/mongo/db/repl/member_heartbeat_data.h
@@ -36,77 +36,103 @@
namespace mongo {
namespace repl {
+/**
+ * This class contains the data returned from a heartbeat command for one member
+ * of a replica set.
+ **/
+class MemberHeartbeatData {
+public:
+ MemberHeartbeatData();
+
+ MemberState getState() const {
+ return _lastResponse.getState();
+ }
+ int getHealth() const {
+ return _health;
+ }
+ Date_t getUpSince() const {
+ return _upSince;
+ }
+ Date_t getLastHeartbeat() const {
+ return _lastHeartbeat;
+ }
+ Date_t getLastHeartbeatRecv() const {
+ return _lastHeartbeatRecv;
+ }
+ void setLastHeartbeatRecv(Date_t newHeartbeatRecvTime) {
+ _lastHeartbeatRecv = newHeartbeatRecvTime;
+ }
+ const std::string& getLastHeartbeatMsg() const {
+ return _lastResponse.getHbMsg();
+ }
+ const std::string& getSyncSource() const {
+ return _lastResponse.getSyncingTo();
+ }
+ OpTime getOpTime() const {
+ return _lastResponse.getOpTime();
+ }
+ int getConfigVersion() const {
+ return _lastResponse.getVersion();
+ }
+ bool hasAuthIssue() const {
+ return _authIssue;
+ }
+
+ OpTime getElectionTime() const {
+ return _lastResponse.getElectionTime();
+ }
+
+ // Returns true if the last heartbeat data explicilty stated that the node
+ // is not electable.
+ bool isUnelectable() const {
+ return _lastResponse.hasIsElectable() && !_lastResponse.isElectable();
+ }
+
+ // Was this member up for the last heartbeat?
+ bool up() const {
+ return _health > 0;
+ }
+ // Was this member up for the last hearbeeat
+ // (or we haven't received the first heartbeat yet)
+ bool maybeUp() const {
+ return _health != 0;
+ }
+
+ /**
+ * Sets values in this object from the results of a successful heartbeat command.
+ */
+ void setUpValues(Date_t now, const HostAndPort& host, ReplSetHeartbeatResponse hbResponse);
+
+ /**
+ * Sets values in this object from the results of a erroring/failed heartbeat command.
+ * _authIssues is set to false, _health is set to 0, _state is set to RS_DOWN, and
+ * other values are set as specified.
+ */
+ void setDownValues(Date_t now, const std::string& heartbeatMessage);
+
/**
- * This class contains the data returned from a heartbeat command for one member
- * of a replica set.
- **/
- class MemberHeartbeatData {
- public:
- MemberHeartbeatData();
-
- MemberState getState() const { return _lastResponse.getState(); }
- int getHealth() const { return _health; }
- Date_t getUpSince() const { return _upSince; }
- Date_t getLastHeartbeat() const { return _lastHeartbeat; }
- Date_t getLastHeartbeatRecv() const { return _lastHeartbeatRecv; }
- void setLastHeartbeatRecv(Date_t newHeartbeatRecvTime) {
- _lastHeartbeatRecv = newHeartbeatRecvTime;
- }
- const std::string& getLastHeartbeatMsg() const { return _lastResponse.getHbMsg(); }
- const std::string& getSyncSource() const { return _lastResponse.getSyncingTo(); }
- OpTime getOpTime() const { return _lastResponse.getOpTime(); }
- int getConfigVersion() const { return _lastResponse.getVersion(); }
- bool hasAuthIssue() const { return _authIssue; }
-
- OpTime getElectionTime() const { return _lastResponse.getElectionTime(); }
-
- // Returns true if the last heartbeat data explicilty stated that the node
- // is not electable.
- bool isUnelectable() const {
- return _lastResponse.hasIsElectable() && !_lastResponse.isElectable();
- }
-
- // Was this member up for the last heartbeat?
- bool up() const { return _health > 0; }
- // Was this member up for the last hearbeeat
- // (or we haven't received the first heartbeat yet)
- bool maybeUp() const { return _health != 0; }
-
- /**
- * Sets values in this object from the results of a successful heartbeat command.
- */
- void setUpValues(Date_t now, const HostAndPort& host, ReplSetHeartbeatResponse hbResponse);
-
- /**
- * Sets values in this object from the results of a erroring/failed heartbeat command.
- * _authIssues is set to false, _health is set to 0, _state is set to RS_DOWN, and
- * other values are set as specified.
- */
- void setDownValues(Date_t now, const std::string& heartbeatMessage);
-
- /**
- * Sets values in this object that indicate there was an auth issue on the last heartbeat
- * command.
- */
- void setAuthIssue(Date_t now);
-
- private:
- // -1 = not checked yet, 0 = member is down/unreachable, 1 = member is up
- int _health;
-
- // Time of first successful heartbeat, if currently still up
- Date_t _upSince;
- // This is the last time we got a response from a heartbeat request to a given member.
- Date_t _lastHeartbeat;
- // This is the last time we got a heartbeat request from a given member.
- Date_t _lastHeartbeatRecv;
-
- // Did the last heartbeat show a failure to authenticate?
- bool _authIssue;
-
- // The last heartbeat response we received.
- ReplSetHeartbeatResponse _lastResponse;
- };
-
-} // namespace repl
-} // namespace mongo
+ * Sets values in this object that indicate there was an auth issue on the last heartbeat
+ * command.
+ */
+ void setAuthIssue(Date_t now);
+
+private:
+ // -1 = not checked yet, 0 = member is down/unreachable, 1 = member is up
+ int _health;
+
+ // Time of first successful heartbeat, if currently still up
+ Date_t _upSince;
+ // This is the last time we got a response from a heartbeat request to a given member.
+ Date_t _lastHeartbeat;
+ // This is the last time we got a heartbeat request from a given member.
+ Date_t _lastHeartbeatRecv;
+
+ // Did the last heartbeat show a failure to authenticate?
+ bool _authIssue;
+
+ // The last heartbeat response we received.
+ ReplSetHeartbeatResponse _lastResponse;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/member_state.h b/src/mongo/db/repl/member_state.h
index c3e3ffd292b..4adf7516845 100644
--- a/src/mongo/db/repl/member_state.h
+++ b/src/mongo/db/repl/member_state.h
@@ -36,65 +36,97 @@ namespace mongo {
namespace repl {
- /*
- RS_STARTUP serving still starting up, or still trying to initiate the set
- RS_PRIMARY this server thinks it is primary
- RS_SECONDARY this server thinks it is a secondary (slave mode)
- RS_RECOVERING recovering/resyncing; after recovery usually auto-transitions to secondary
- RS_STARTUP2 loaded config, still determining who is primary
+/*
+ RS_STARTUP serving still starting up, or still trying to initiate the set
+ RS_PRIMARY this server thinks it is primary
+ RS_SECONDARY this server thinks it is a secondary (slave mode)
+ RS_RECOVERING recovering/resyncing; after recovery usually auto-transitions to secondary
+ RS_STARTUP2 loaded config, still determining who is primary
- State -> integer mappings are reserved forever. Do not change them or delete them, except
- to update RS_MAX when introducing new states.
- */
- struct MemberState {
- enum MS {
- RS_STARTUP = 0,
- RS_PRIMARY = 1,
- RS_SECONDARY = 2,
- RS_RECOVERING = 3,
- RS_STARTUP2 = 5,
- RS_UNKNOWN = 6, /* remote node not yet reached */
- RS_ARBITER = 7,
- RS_DOWN = 8, /* node not reachable for a report */
- RS_ROLLBACK = 9,
- RS_REMOVED = 10, /* node removed from replica set */
- RS_MAX = 10
- } s;
+ State -> integer mappings are reserved forever. Do not change them or delete them, except
+ to update RS_MAX when introducing new states.
+*/
+struct MemberState {
+ enum MS {
+ RS_STARTUP = 0,
+ RS_PRIMARY = 1,
+ RS_SECONDARY = 2,
+ RS_RECOVERING = 3,
+ RS_STARTUP2 = 5,
+ RS_UNKNOWN = 6, /* remote node not yet reached */
+ RS_ARBITER = 7,
+ RS_DOWN = 8, /* node not reachable for a report */
+ RS_ROLLBACK = 9,
+ RS_REMOVED = 10, /* node removed from replica set */
+ RS_MAX = 10
+ } s;
- MemberState(MS ms = RS_UNKNOWN) : s(ms) { }
- explicit MemberState(int ms) : s((MS) ms) { }
+ MemberState(MS ms = RS_UNKNOWN) : s(ms) {}
+ explicit MemberState(int ms) : s((MS)ms) {}
- bool startup() const { return s == RS_STARTUP; }
- bool primary() const { return s == RS_PRIMARY; }
- bool secondary() const { return s == RS_SECONDARY; }
- bool recovering() const { return s == RS_RECOVERING; }
- bool startup2() const { return s == RS_STARTUP2; }
- bool rollback() const { return s == RS_ROLLBACK; }
- bool readable() const { return s == RS_PRIMARY || s == RS_SECONDARY; }
- bool removed() const { return s == RS_REMOVED; }
- bool arbiter() const { return s == RS_ARBITER; }
+ bool startup() const {
+ return s == RS_STARTUP;
+ }
+ bool primary() const {
+ return s == RS_PRIMARY;
+ }
+ bool secondary() const {
+ return s == RS_SECONDARY;
+ }
+ bool recovering() const {
+ return s == RS_RECOVERING;
+ }
+ bool startup2() const {
+ return s == RS_STARTUP2;
+ }
+ bool rollback() const {
+ return s == RS_ROLLBACK;
+ }
+ bool readable() const {
+ return s == RS_PRIMARY || s == RS_SECONDARY;
+ }
+ bool removed() const {
+ return s == RS_REMOVED;
+ }
+ bool arbiter() const {
+ return s == RS_ARBITER;
+ }
- std::string toString() const;
+ std::string toString() const;
- bool operator==(const MemberState& r) const { return s == r.s; }
- bool operator!=(const MemberState& r) const { return s != r.s; }
- };
+ bool operator==(const MemberState& r) const {
+ return s == r.s;
+ }
+ bool operator!=(const MemberState& r) const {
+ return s != r.s;
+ }
+};
- inline std::string MemberState::toString() const {
- switch ( s ) {
- case RS_STARTUP: return "STARTUP";
- case RS_PRIMARY: return "PRIMARY";
- case RS_SECONDARY: return "SECONDARY";
- case RS_RECOVERING: return "RECOVERING";
- case RS_STARTUP2: return "STARTUP2";
- case RS_ARBITER: return "ARBITER";
- case RS_DOWN: return "DOWN";
- case RS_ROLLBACK: return "ROLLBACK";
- case RS_UNKNOWN: return "UNKNOWN";
- case RS_REMOVED: return "REMOVED";
- }
- return "";
+inline std::string MemberState::toString() const {
+ switch (s) {
+ case RS_STARTUP:
+ return "STARTUP";
+ case RS_PRIMARY:
+ return "PRIMARY";
+ case RS_SECONDARY:
+ return "SECONDARY";
+ case RS_RECOVERING:
+ return "RECOVERING";
+ case RS_STARTUP2:
+ return "STARTUP2";
+ case RS_ARBITER:
+ return "ARBITER";
+ case RS_DOWN:
+ return "DOWN";
+ case RS_ROLLBACK:
+ return "ROLLBACK";
+ case RS_UNKNOWN:
+ return "UNKNOWN";
+ case RS_REMOVED:
+ return "REMOVED";
}
+ return "";
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/minvalid.cpp b/src/mongo/db/repl/minvalid.cpp
index 18235cc178c..ec39364ea13 100644
--- a/src/mongo/db/repl/minvalid.cpp
+++ b/src/mongo/db/repl/minvalid.cpp
@@ -45,61 +45,65 @@ namespace mongo {
namespace repl {
namespace {
- const char* initialSyncFlagString = "doingInitialSync";
- const BSONObj initialSyncFlag(BSON(initialSyncFlagString << true));
- const char* minvalidNS = "local.replset.minvalid";
-} // namespace
+const char* initialSyncFlagString = "doingInitialSync";
+const BSONObj initialSyncFlag(BSON(initialSyncFlagString << true));
+const char* minvalidNS = "local.replset.minvalid";
+} // namespace
- void clearInitialSyncFlag(OperationContext* txn) {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock lk(txn->lockState(), "local", MODE_X);
- Helpers::putSingleton(txn, minvalidNS, BSON("$unset" << initialSyncFlag));
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "clearInitialSyncFlags", minvalidNS);
+void clearInitialSyncFlag(OperationContext* txn) {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock lk(txn->lockState(), "local", MODE_X);
+ Helpers::putSingleton(txn, minvalidNS, BSON("$unset" << initialSyncFlag));
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "clearInitialSyncFlags", minvalidNS);
+}
- void setInitialSyncFlag(OperationContext* txn) {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock lk(txn->lockState(), "local", MODE_X);
- Helpers::putSingleton(txn, minvalidNS, BSON("$set" << initialSyncFlag));
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "setInitialSyncFlags", minvalidNS);
+void setInitialSyncFlag(OperationContext* txn) {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock lk(txn->lockState(), "local", MODE_X);
+ Helpers::putSingleton(txn, minvalidNS, BSON("$set" << initialSyncFlag));
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "setInitialSyncFlags", minvalidNS);
+}
- bool getInitialSyncFlag() {
- OperationContextImpl txn;
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(&txn, MODE_IX);
- Lock::DBLock lk(txn.lockState(), "local", MODE_X);
- BSONObj mv;
- bool found = Helpers::getSingleton( &txn, minvalidNS, mv);
- if (found) {
- return mv[initialSyncFlagString].trueValue();
- }
- return false;
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(&txn, "getInitialSyncFlags", minvalidNS);
+bool getInitialSyncFlag() {
+ OperationContextImpl txn;
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(&txn, MODE_IX);
+ Lock::DBLock lk(txn.lockState(), "local", MODE_X);
+ BSONObj mv;
+ bool found = Helpers::getSingleton(&txn, minvalidNS, mv);
+ if (found) {
+ return mv[initialSyncFlagString].trueValue();
+ }
+ return false;
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(&txn, "getInitialSyncFlags", minvalidNS);
+}
- void setMinValid(OperationContext* ctx, OpTime ts) {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(ctx, MODE_IX);
- Lock::DBLock lk(ctx->lockState(), "local", MODE_X);
- Helpers::putSingleton(ctx, minvalidNS, BSON("$set" << BSON("ts" << ts)));
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(ctx, "setMinValid", minvalidNS);
+void setMinValid(OperationContext* ctx, OpTime ts) {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(ctx, MODE_IX);
+ Lock::DBLock lk(ctx->lockState(), "local", MODE_X);
+ Helpers::putSingleton(ctx, minvalidNS, BSON("$set" << BSON("ts" << ts)));
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(ctx, "setMinValid", minvalidNS);
+}
- OpTime getMinValid(OperationContext* txn) {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(txn, MODE_IS);
- Lock::DBLock lk(txn->lockState(), "local", MODE_S);
- BSONObj mv;
- bool found = Helpers::getSingleton(txn, minvalidNS, mv);
- if (found) {
- return mv["ts"]._opTime();
- }
- return OpTime();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "getMinValid", minvalidNS);
+OpTime getMinValid(OperationContext* txn) {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(txn, MODE_IS);
+ Lock::DBLock lk(txn->lockState(), "local", MODE_S);
+ BSONObj mv;
+ bool found = Helpers::getSingleton(txn, minvalidNS, mv);
+ if (found) {
+ return mv["ts"]._opTime();
+ }
+ return OpTime();
}
-
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "getMinValid", minvalidNS);
+}
}
}
diff --git a/src/mongo/db/repl/minvalid.h b/src/mongo/db/repl/minvalid.h
index 7bbe7c39c69..36edeafad6b 100644
--- a/src/mongo/db/repl/minvalid.h
+++ b/src/mongo/db/repl/minvalid.h
@@ -29,39 +29,39 @@
#pragma once
namespace mongo {
- class BSONObj;
- class OperationContext;
- class OpTime;
+class BSONObj;
+class OperationContext;
+class OpTime;
namespace repl {
- /**
- * Helper functions for maintaining local.replset.minvalid collection contents.
- *
- * When a member reaches its minValid optime it is in a consistent state. Thus, minValid is
- * set as the last step in initial sync. At the beginning of initial sync, _initialSyncFlag
- * is appended onto minValid to indicate that initial sync was started but has not yet
- * completed.
- * minValid is also used during "normal" sync: the last op in each batch is used to set
- * minValid, to indicate that we are in a consistent state when the batch has been fully
- * applied.
- */
+/**
+ * Helper functions for maintaining local.replset.minvalid collection contents.
+ *
+ * When a member reaches its minValid optime it is in a consistent state. Thus, minValid is
+ * set as the last step in initial sync. At the beginning of initial sync, _initialSyncFlag
+ * is appended onto minValid to indicate that initial sync was started but has not yet
+ * completed.
+ * minValid is also used during "normal" sync: the last op in each batch is used to set
+ * minValid, to indicate that we are in a consistent state when the batch has been fully
+ * applied.
+ */
- /**
- * The initial sync flag is used to durably record the state of an initial sync; its boolean
- * value is true when an initial sync is in progress and hasn't yet completed. The flag
- * is stored as part of the local.replset.minvalid collection.
- */
- void clearInitialSyncFlag(OperationContext* txn);
- void setInitialSyncFlag(OperationContext* txn);
- bool getInitialSyncFlag();
+/**
+ * The initial sync flag is used to durably record the state of an initial sync; its boolean
+ * value is true when an initial sync is in progress and hasn't yet completed. The flag
+ * is stored as part of the local.replset.minvalid collection.
+ */
+void clearInitialSyncFlag(OperationContext* txn);
+void setInitialSyncFlag(OperationContext* txn);
+bool getInitialSyncFlag();
- /**
- * The minValid optime value is the earliest (minimum) OpTime that must be applied in order to
- * consider the dataset consistent. Do not allow client reads if our last applied operation is
- * before the minValid time.
- */
- void setMinValid(OperationContext* ctx, OpTime ts);
- OpTime getMinValid(OperationContext* txn);
+/**
+ * The minValid optime value is the earliest (minimum) OpTime that must be applied in order to
+ * consider the dataset consistent. Do not allow client reads if our last applied operation is
+ * before the minValid time.
+ */
+void setMinValid(OperationContext* ctx, OpTime ts);
+OpTime getMinValid(OperationContext* txn);
}
}
diff --git a/src/mongo/db/repl/multicmd.cpp b/src/mongo/db/repl/multicmd.cpp
index cb0a41345f6..706645d9796 100644
--- a/src/mongo/db/repl/multicmd.cpp
+++ b/src/mongo/db/repl/multicmd.cpp
@@ -39,17 +39,16 @@ namespace mongo {
namespace repl {
- void _MultiCommandJob::run() {
- try {
- ScopedConn c(d.toHost);
- LOG(1) << "multiCommand running on host " << d.toHost;
- d.ok = c.runCommand("admin", cmd, d.result);
- LOG(1) << "multiCommand response: " << d.result;
- }
- catch (const DBException& e) {
- LOG(1) << "dev caught " << e.what() << " on multiCommand to " << d.toHost;
- }
+void _MultiCommandJob::run() {
+ try {
+ ScopedConn c(d.toHost);
+ LOG(1) << "multiCommand running on host " << d.toHost;
+ d.ok = c.runCommand("admin", cmd, d.result);
+ LOG(1) << "multiCommand response: " << d.result;
+ } catch (const DBException& e) {
+ LOG(1) << "dev caught " << e.what() << " on multiCommand to " << d.toHost;
}
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/multicmd.h b/src/mongo/db/repl/multicmd.h
index fa4519d4f68..677f3da481b 100644
--- a/src/mongo/db/repl/multicmd.h
+++ b/src/mongo/db/repl/multicmd.h
@@ -39,46 +39,49 @@
namespace mongo {
namespace repl {
- struct Target {
- Target(std::string hostport) : toHost(hostport), ok(false) { }
- //Target() : ok(false) { }
- const std::string toHost;
- bool ok;
- BSONObj result;
- };
+struct Target {
+ Target(std::string hostport) : toHost(hostport), ok(false) {}
+ // Target() : ok(false) { }
+ const std::string toHost;
+ bool ok;
+ BSONObj result;
+};
- /** send a command to several servers in parallel. waits for all to complete before
- returning.
-
- in: Target::toHost
- out: Target::result and Target::ok
- */
- void multiCommand(BSONObj cmd, std::list<Target>& L);
+/** send a command to several servers in parallel. waits for all to complete before
+ returning.
- class _MultiCommandJob : public BackgroundJob {
- public:
- BSONObj& cmd;
- Target& d;
- _MultiCommandJob(BSONObj& _cmd, Target& _d) : cmd(_cmd), d(_d) { }
+ in: Target::toHost
+ out: Target::result and Target::ok
+*/
+void multiCommand(BSONObj cmd, std::list<Target>& L);
+
+class _MultiCommandJob : public BackgroundJob {
+public:
+ BSONObj& cmd;
+ Target& d;
+ _MultiCommandJob(BSONObj& _cmd, Target& _d) : cmd(_cmd), d(_d) {}
- private:
- std::string name() const { return "MultiCommandJob"; }
- void run();
- };
+private:
+ std::string name() const {
+ return "MultiCommandJob";
+ }
+ void run();
+};
- inline void multiCommand(BSONObj cmd, std::list<Target>& L) {
- std::list< boost::shared_ptr<BackgroundJob> > jobs;
+inline void multiCommand(BSONObj cmd, std::list<Target>& L) {
+ std::list<boost::shared_ptr<BackgroundJob>> jobs;
- for( std::list<Target>::iterator i = L.begin(); i != L.end(); i++ ) {
- Target& d = *i;
- _MultiCommandJob *j = new _MultiCommandJob(cmd, d);
- jobs.push_back( boost::shared_ptr<BackgroundJob>(j) );
- j->go();
- }
+ for (std::list<Target>::iterator i = L.begin(); i != L.end(); i++) {
+ Target& d = *i;
+ _MultiCommandJob* j = new _MultiCommandJob(cmd, d);
+ jobs.push_back(boost::shared_ptr<BackgroundJob>(j));
+ j->go();
+ }
- for( std::list< boost::shared_ptr<BackgroundJob> >::iterator i = jobs.begin(); i != jobs.end(); i++ ) {
- (*i)->wait();
- }
+ for (std::list<boost::shared_ptr<BackgroundJob>>::iterator i = jobs.begin(); i != jobs.end();
+ i++) {
+ (*i)->wait();
}
-} // namespace repl
-} // namespace mongo
+}
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/network_interface_impl.cpp b/src/mongo/db/repl/network_interface_impl.cpp
index fa758c7606f..754714c4614 100644
--- a/src/mongo/db/repl/network_interface_impl.cpp
+++ b/src/mongo/db/repl/network_interface_impl.cpp
@@ -56,594 +56,573 @@ namespace repl {
namespace {
- const size_t kMinThreads = 1;
- const size_t kMaxThreads = 51; // Set to 1 + max repl set size, for heartbeat + wiggle room.
- const Seconds kMaxIdleThreadAge(30);
- const Seconds kMaxConnectionAge(30);
+const size_t kMinThreads = 1;
+const size_t kMaxThreads = 51; // Set to 1 + max repl set size, for heartbeat + wiggle room.
+const Seconds kMaxIdleThreadAge(30);
+const Seconds kMaxConnectionAge(30);
} // namespace
- /**
- * Private pool of connections used by the network interface.
- *
- * Methods of the pool may be called from any thread, as they are synchronized internally.
- */
- class NetworkInterfaceImpl::ConnectionPool {
- MONGO_DISALLOW_COPYING(ConnectionPool);
- public:
- struct ConnectionInfo;
+/**
+ * Private pool of connections used by the network interface.
+ *
+ * Methods of the pool may be called from any thread, as they are synchronized internally.
+ */
+class NetworkInterfaceImpl::ConnectionPool {
+ MONGO_DISALLOW_COPYING(ConnectionPool);
- typedef stdx::list<ConnectionInfo> ConnectionList;
- typedef unordered_map<HostAndPort, ConnectionList> HostConnectionMap;
+public:
+ struct ConnectionInfo;
- /**
- * RAII class for connections from the pool. To use the connection pool, instantiate one of
- * these with a pointer to the pool, the identity of the target node and the timeout for
- * network operations, use it like a pointer to a connection, and then call done() on
- * successful completion. Failure to call done() will lead to the connection being reaped
- * when the holder goes out of scope.
- */
- class ConnectionPtr {
- MONGO_DISALLOW_COPYING(ConnectionPtr);
- public:
- /**
- * Constructs a ConnectionPtr referring to a connection to "target" drawn from "pool",
- * with the network timeout set to "timeout".
- *
- * Throws DBExceptions if the connection cannot be established.
- */
- ConnectionPtr(ConnectionPool* pool,
- const HostAndPort& target,
- Date_t now,
- Milliseconds timeout) :
- _pool(pool), _connInfo(pool->acquireConnection(target, now, timeout)) {}
-
- /**
- * Destructor reaps the connection if it wasn't already returned to the pool by calling
- * done().
- */
- ~ConnectionPtr() { if (_pool) _pool->destroyConnection(_connInfo); }
-
- /**
- * Releases the connection back to the pool from which it was drawn.
- */
- void done(Date_t now) { _pool->releaseConnection(_connInfo, now); _pool = NULL; }
-
- DBClientConnection& operator*();
- DBClientConnection* operator->();
-
- private:
- ConnectionPool* _pool;
- const ConnectionList::iterator _connInfo;
- };
-
- ConnectionPool();
- ~ConnectionPool();
+ typedef stdx::list<ConnectionInfo> ConnectionList;
+ typedef unordered_map<HostAndPort, ConnectionList> HostConnectionMap;
- /**
- * Acquires a connection to "target" with the given "timeout", or throws a DBException.
- * Intended for use by ConnectionPtr.
- */
- ConnectionList::iterator acquireConnection(
- const HostAndPort& target, Date_t now, Milliseconds timeout);
+ /**
+ * RAII class for connections from the pool. To use the connection pool, instantiate one of
+ * these with a pointer to the pool, the identity of the target node and the timeout for
+ * network operations, use it like a pointer to a connection, and then call done() on
+ * successful completion. Failure to call done() will lead to the connection being reaped
+ * when the holder goes out of scope.
+ */
+ class ConnectionPtr {
+ MONGO_DISALLOW_COPYING(ConnectionPtr);
+ public:
/**
- * Releases a connection back into the pool.
- * Intended for use by ConnectionPtr.
- * Call this for connections that can safely be reused.
+ * Constructs a ConnectionPtr referring to a connection to "target" drawn from "pool",
+ * with the network timeout set to "timeout".
+ *
+ * Throws DBExceptions if the connection cannot be established.
*/
- void releaseConnection(ConnectionList::iterator iter, Date_t now);
+ ConnectionPtr(ConnectionPool* pool,
+ const HostAndPort& target,
+ Date_t now,
+ Milliseconds timeout)
+ : _pool(pool), _connInfo(pool->acquireConnection(target, now, timeout)) {}
/**
- * Destroys a connection previously acquired from the pool.
- * Intended for use by ConnectionPtr.
- * Call this for connections that cannot be reused.
+ * Destructor reaps the connection if it wasn't already returned to the pool by calling
+ * done().
*/
- void destroyConnection(ConnectionList::iterator);
+ ~ConnectionPtr() {
+ if (_pool)
+ _pool->destroyConnection(_connInfo);
+ }
/**
- * Closes all connections currently in use, to ensure that the network threads
- * terminate promptly during shutdown.
+ * Releases the connection back to the pool from which it was drawn.
*/
- void closeAllInUseConnections();
+ void done(Date_t now) {
+ _pool->releaseConnection(_connInfo, now);
+ _pool = NULL;
+ }
- /**
- * Reaps all connections in the pool that are too old as of "now".
- */
- void cleanUpOlderThan(Date_t now);
+ DBClientConnection& operator*();
+ DBClientConnection* operator->();
private:
- /**
- * Returns true if the given connection is young enough to keep in the pool.
- */
- bool shouldKeepConnection(Date_t now, const ConnectionInfo& connInfo) const;
-
- /**
- * Implementation of cleanUpOlderThan which assumes that _mutex is already held.
- */
- void cleanUpOlderThan_inlock(Date_t now);
+ ConnectionPool* _pool;
+ const ConnectionList::iterator _connInfo;
+ };
- /**
- * Reaps connections in "hostConns" that are too old or have been in the pool too long as of
- * "now". Expects _mutex to be held.
- */
- void cleanUpOlderThan_inlock(Date_t now, ConnectionList* hostConns);
+ ConnectionPool();
+ ~ConnectionPool();
- /**
- * Destroys the connection associated with "iter" and removes "iter" fron connList.
- */
- static void destroyConnection_inlock(ConnectionList* connList,
- ConnectionList::iterator iter);
+ /**
+ * Acquires a connection to "target" with the given "timeout", or throws a DBException.
+ * Intended for use by ConnectionPtr.
+ */
+ ConnectionList::iterator acquireConnection(const HostAndPort& target,
+ Date_t now,
+ Milliseconds timeout);
- // Mutex guarding members of the connection pool
- boost::mutex _mutex;
+ /**
+ * Releases a connection back into the pool.
+ * Intended for use by ConnectionPtr.
+ * Call this for connections that can safely be reused.
+ */
+ void releaseConnection(ConnectionList::iterator iter, Date_t now);
- // Map from HostAndPort to idle connections.
- HostConnectionMap _connections;
+ /**
+ * Destroys a connection previously acquired from the pool.
+ * Intended for use by ConnectionPtr.
+ * Call this for connections that cannot be reused.
+ */
+ void destroyConnection(ConnectionList::iterator);
- // List of non-idle connections.
- ConnectionList _inUseConnections;
- };
+ /**
+ * Closes all connections currently in use, to ensure that the network threads
+ * terminate promptly during shutdown.
+ */
+ void closeAllInUseConnections();
/**
- * Information about a connection in the pool.
+ * Reaps all connections in the pool that are too old as of "now".
*/
- struct NetworkInterfaceImpl::ConnectionPool::ConnectionInfo {
- ConnectionInfo() : conn(NULL), creationDate(0ULL) {}
- ConnectionInfo(DBClientConnection* theConn, Date_t date) :
- conn(theConn),
- creationDate(date) {}
+ void cleanUpOlderThan(Date_t now);
- // A connection in the pool.
- DBClientConnection* conn;
+private:
+ /**
+ * Returns true if the given connection is young enough to keep in the pool.
+ */
+ bool shouldKeepConnection(Date_t now, const ConnectionInfo& connInfo) const;
- // The date at which the connection was created.
- Date_t creationDate;
- };
+ /**
+ * Implementation of cleanUpOlderThan which assumes that _mutex is already held.
+ */
+ void cleanUpOlderThan_inlock(Date_t now);
- DBClientConnection& NetworkInterfaceImpl::ConnectionPool::ConnectionPtr::operator*() {
- return *_connInfo->conn;
- }
+ /**
+ * Reaps connections in "hostConns" that are too old or have been in the pool too long as of
+ * "now". Expects _mutex to be held.
+ */
+ void cleanUpOlderThan_inlock(Date_t now, ConnectionList* hostConns);
- DBClientConnection* NetworkInterfaceImpl::ConnectionPool::ConnectionPtr::operator->() {
- return _connInfo->conn;
- }
+ /**
+ * Destroys the connection associated with "iter" and removes "iter" fron connList.
+ */
+ static void destroyConnection_inlock(ConnectionList* connList, ConnectionList::iterator iter);
- NetworkInterfaceImpl::ConnectionPool::ConnectionPool() {}
+ // Mutex guarding members of the connection pool
+ boost::mutex _mutex;
- NetworkInterfaceImpl::ConnectionPool::~ConnectionPool() {
- cleanUpOlderThan(Date_t(~0ULL));
- invariant(_connections.empty());
- invariant(_inUseConnections.empty());
- }
+ // Map from HostAndPort to idle connections.
+ HostConnectionMap _connections;
- void NetworkInterfaceImpl::ConnectionPool::cleanUpOlderThan(Date_t now) {
- boost::lock_guard<boost::mutex> lk(_mutex);
- cleanUpOlderThan_inlock(now);
- }
+ // List of non-idle connections.
+ ConnectionList _inUseConnections;
+};
- void NetworkInterfaceImpl::ConnectionPool::cleanUpOlderThan_inlock(Date_t now) {
- HostConnectionMap::iterator hostConns = _connections.begin();
- while (hostConns != _connections.end()) {
- cleanUpOlderThan_inlock(now, &hostConns->second);
- if (hostConns->second.empty()) {
- _connections.erase(hostConns++);
- }
- else {
- ++hostConns;
- }
+/**
+ * Information about a connection in the pool.
+ */
+struct NetworkInterfaceImpl::ConnectionPool::ConnectionInfo {
+ ConnectionInfo() : conn(NULL), creationDate(0ULL) {}
+ ConnectionInfo(DBClientConnection* theConn, Date_t date) : conn(theConn), creationDate(date) {}
+
+ // A connection in the pool.
+ DBClientConnection* conn;
+
+ // The date at which the connection was created.
+ Date_t creationDate;
+};
+
+DBClientConnection& NetworkInterfaceImpl::ConnectionPool::ConnectionPtr::operator*() {
+ return *_connInfo->conn;
+}
+
+DBClientConnection* NetworkInterfaceImpl::ConnectionPool::ConnectionPtr::operator->() {
+ return _connInfo->conn;
+}
+
+NetworkInterfaceImpl::ConnectionPool::ConnectionPool() {}
+
+NetworkInterfaceImpl::ConnectionPool::~ConnectionPool() {
+ cleanUpOlderThan(Date_t(~0ULL));
+ invariant(_connections.empty());
+ invariant(_inUseConnections.empty());
+}
+
+void NetworkInterfaceImpl::ConnectionPool::cleanUpOlderThan(Date_t now) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ cleanUpOlderThan_inlock(now);
+}
+
+void NetworkInterfaceImpl::ConnectionPool::cleanUpOlderThan_inlock(Date_t now) {
+ HostConnectionMap::iterator hostConns = _connections.begin();
+ while (hostConns != _connections.end()) {
+ cleanUpOlderThan_inlock(now, &hostConns->second);
+ if (hostConns->second.empty()) {
+ _connections.erase(hostConns++);
+ } else {
+ ++hostConns;
}
}
-
- void NetworkInterfaceImpl::ConnectionPool::cleanUpOlderThan_inlock(
- Date_t now,
- ConnectionList* hostConns) {
- ConnectionList::iterator iter = hostConns->begin();
- while (iter != hostConns->end()) {
- if (shouldKeepConnection(now, *iter)) {
- ++iter;
- }
- else {
- destroyConnection_inlock(hostConns, iter++);
- }
+}
+
+void NetworkInterfaceImpl::ConnectionPool::cleanUpOlderThan_inlock(Date_t now,
+ ConnectionList* hostConns) {
+ ConnectionList::iterator iter = hostConns->begin();
+ while (iter != hostConns->end()) {
+ if (shouldKeepConnection(now, *iter)) {
+ ++iter;
+ } else {
+ destroyConnection_inlock(hostConns, iter++);
}
}
+}
- bool NetworkInterfaceImpl::ConnectionPool::shouldKeepConnection(
- const Date_t now,
- const ConnectionInfo& connInfo) const {
-
- const Date_t expirationDate =
- connInfo.creationDate + kMaxConnectionAge.total_milliseconds();
- if (expirationDate <= now) {
- return false;
- }
- return true;
+bool NetworkInterfaceImpl::ConnectionPool::shouldKeepConnection(
+ const Date_t now, const ConnectionInfo& connInfo) const {
+ const Date_t expirationDate = connInfo.creationDate + kMaxConnectionAge.total_milliseconds();
+ if (expirationDate <= now) {
+ return false;
}
-
- void NetworkInterfaceImpl::ConnectionPool::closeAllInUseConnections() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- for (ConnectionList::iterator iter = _inUseConnections.begin();
- iter != _inUseConnections.end();
- ++iter) {
-
- iter->conn->port().shutdown();
- }
+ return true;
+}
+
+void NetworkInterfaceImpl::ConnectionPool::closeAllInUseConnections() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ for (ConnectionList::iterator iter = _inUseConnections.begin(); iter != _inUseConnections.end();
+ ++iter) {
+ iter->conn->port().shutdown();
}
-
- NetworkInterfaceImpl::ConnectionPool::ConnectionList::iterator
- NetworkInterfaceImpl::ConnectionPool::acquireConnection(
- const HostAndPort& target,
- Date_t now,
- Milliseconds timeout) {
- boost::unique_lock<boost::mutex> lk(_mutex);
- for (HostConnectionMap::iterator hostConns;
- ((hostConns = _connections.find(target)) != _connections.end());) {
-
- cleanUpOlderThan_inlock(now, &hostConns->second);
- if (hostConns->second.empty()) {
- break;
- }
- _inUseConnections.splice(_inUseConnections.begin(),
- hostConns->second,
- hostConns->second.begin());
- const ConnectionList::iterator candidate = _inUseConnections.begin();
- lk.unlock();
- try {
- if (candidate->conn->isStillConnected()) {
- // setSoTimeout takes a double representing the number of seconds for send and
- // receive timeouts. Thus, we must take total_milliseconds() and divide by
- // 1000.0 to get the number of seconds with a fractional part.
- candidate->conn->setSoTimeout(timeout.total_milliseconds() / 1000.0);
- return candidate;
- }
- }
- catch (...) {
- lk.lock();
- destroyConnection_inlock(&_inUseConnections, candidate);
- throw;
+}
+
+NetworkInterfaceImpl::ConnectionPool::ConnectionList::iterator
+NetworkInterfaceImpl::ConnectionPool::acquireConnection(const HostAndPort& target,
+ Date_t now,
+ Milliseconds timeout) {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ for (HostConnectionMap::iterator hostConns;
+ ((hostConns = _connections.find(target)) != _connections.end());) {
+ cleanUpOlderThan_inlock(now, &hostConns->second);
+ if (hostConns->second.empty()) {
+ break;
+ }
+ _inUseConnections.splice(
+ _inUseConnections.begin(), hostConns->second, hostConns->second.begin());
+ const ConnectionList::iterator candidate = _inUseConnections.begin();
+ lk.unlock();
+ try {
+ if (candidate->conn->isStillConnected()) {
+ // setSoTimeout takes a double representing the number of seconds for send and
+ // receive timeouts. Thus, we must take total_milliseconds() and divide by
+ // 1000.0 to get the number of seconds with a fractional part.
+ candidate->conn->setSoTimeout(timeout.total_milliseconds() / 1000.0);
+ return candidate;
}
+ } catch (...) {
lk.lock();
destroyConnection_inlock(&_inUseConnections, candidate);
- }
-
- // No idle connection in the pool; make a new one.
- lk.unlock();
- std::auto_ptr<DBClientConnection> conn(new DBClientConnection);
- // setSoTimeout takes a double representing the number of seconds for send and receive
- // timeouts. Thus, we must take total_milliseconds() and divide by 1000.0 to get the number
- // of seconds with a fractional part.
- conn->setSoTimeout(timeout.total_milliseconds() / 1000.0);
- std::string errmsg;
- uassert(18915,
- str::stream() << "Failed attempt to connect to " << target.toString() << "; " <<
- errmsg,
- conn->connect(target, errmsg));
- conn->port().tag |= ScopedConn::keepOpen;
- if (getGlobalAuthorizationManager()->isAuthEnabled()) {
- uassert(ErrorCodes::AuthenticationFailed,
- "Missing credentials for authenticating as internal user",
- isInternalAuthSet());
- conn->auth(getInternalUserAuthParamsWithFallback());
+ throw;
}
lk.lock();
- return _inUseConnections.insert(_inUseConnections.begin(),
- ConnectionInfo(conn.release(), now));
+ destroyConnection_inlock(&_inUseConnections, candidate);
}
- void NetworkInterfaceImpl::ConnectionPool::releaseConnection(ConnectionList::iterator iter,
- const Date_t now) {
- boost::lock_guard<boost::mutex> lk(_mutex);
- if (!shouldKeepConnection(now, *iter)) {
- destroyConnection_inlock(&_inUseConnections, iter);
- return;
- }
- ConnectionList& hostConns = _connections[iter->conn->getServerHostAndPort()];
- cleanUpOlderThan_inlock(now, &hostConns);
- hostConns.splice(hostConns.begin(), _inUseConnections, iter);
+ // No idle connection in the pool; make a new one.
+ lk.unlock();
+ std::auto_ptr<DBClientConnection> conn(new DBClientConnection);
+ // setSoTimeout takes a double representing the number of seconds for send and receive
+ // timeouts. Thus, we must take total_milliseconds() and divide by 1000.0 to get the number
+ // of seconds with a fractional part.
+ conn->setSoTimeout(timeout.total_milliseconds() / 1000.0);
+ std::string errmsg;
+ uassert(18915,
+ str::stream() << "Failed attempt to connect to " << target.toString() << "; " << errmsg,
+ conn->connect(target, errmsg));
+ conn->port().tag |= ScopedConn::keepOpen;
+ if (getGlobalAuthorizationManager()->isAuthEnabled()) {
+ uassert(ErrorCodes::AuthenticationFailed,
+ "Missing credentials for authenticating as internal user",
+ isInternalAuthSet());
+ conn->auth(getInternalUserAuthParamsWithFallback());
}
-
- void NetworkInterfaceImpl::ConnectionPool::destroyConnection(ConnectionList::iterator iter) {
- boost::lock_guard<boost::mutex> lk(_mutex);
+ lk.lock();
+ return _inUseConnections.insert(_inUseConnections.begin(), ConnectionInfo(conn.release(), now));
+}
+
+void NetworkInterfaceImpl::ConnectionPool::releaseConnection(ConnectionList::iterator iter,
+ const Date_t now) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ if (!shouldKeepConnection(now, *iter)) {
destroyConnection_inlock(&_inUseConnections, iter);
+ return;
}
-
- void NetworkInterfaceImpl::ConnectionPool::destroyConnection_inlock(
- ConnectionList* connList, ConnectionList::iterator iter) {
- delete iter->conn;
- connList->erase(iter);
- }
-
- NetworkInterfaceImpl::NetworkInterfaceImpl() :
- _numIdleThreads(0),
- _nextThreadId(0),
- _lastFullUtilizationDate(),
- _isExecutorRunnable(false),
- _inShutdown(false),
- _numActiveNetworkRequests(0) {
- _connPool.reset(new ConnectionPool());
- }
-
- NetworkInterfaceImpl::~NetworkInterfaceImpl() { }
-
- std::string NetworkInterfaceImpl::getDiagnosticString() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- str::stream output;
- output << "NetworkImpl";
- output << " threads:" << _threads.size();
- output << " inShutdown:" << _inShutdown;
- output << " active:" << _numActiveNetworkRequests;
- output << " pending:" << _pending.size();
- output << " execRunable:" << _isExecutorRunnable;
- return output;
-
+ ConnectionList& hostConns = _connections[iter->conn->getServerHostAndPort()];
+ cleanUpOlderThan_inlock(now, &hostConns);
+ hostConns.splice(hostConns.begin(), _inUseConnections, iter);
+}
+
+void NetworkInterfaceImpl::ConnectionPool::destroyConnection(ConnectionList::iterator iter) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ destroyConnection_inlock(&_inUseConnections, iter);
+}
+
+void NetworkInterfaceImpl::ConnectionPool::destroyConnection_inlock(ConnectionList* connList,
+ ConnectionList::iterator iter) {
+ delete iter->conn;
+ connList->erase(iter);
+}
+
+NetworkInterfaceImpl::NetworkInterfaceImpl()
+ : _numIdleThreads(0),
+ _nextThreadId(0),
+ _lastFullUtilizationDate(),
+ _isExecutorRunnable(false),
+ _inShutdown(false),
+ _numActiveNetworkRequests(0) {
+ _connPool.reset(new ConnectionPool());
+}
+
+NetworkInterfaceImpl::~NetworkInterfaceImpl() {}
+
+std::string NetworkInterfaceImpl::getDiagnosticString() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ str::stream output;
+ output << "NetworkImpl";
+ output << " threads:" << _threads.size();
+ output << " inShutdown:" << _inShutdown;
+ output << " active:" << _numActiveNetworkRequests;
+ output << " pending:" << _pending.size();
+ output << " execRunable:" << _isExecutorRunnable;
+ return output;
+}
+
+void NetworkInterfaceImpl::_startNewNetworkThread_inlock() {
+ if (_inShutdown) {
+ LOG(1) << "Not starting new replication networking thread while shutting down replication.";
+ return;
}
-
- void NetworkInterfaceImpl::_startNewNetworkThread_inlock() {
- if (_inShutdown) {
- LOG(1) <<
- "Not starting new replication networking thread while shutting down replication.";
- return;
- }
- if (_threads.size() >= kMaxThreads) {
- LOG(1) << "Not starting new replication networking thread because " << kMaxThreads <<
- " are already running; " << _numIdleThreads << " threads are idle and " <<
- _pending.size() << " network requests are waiting for a thread to serve them.";
- return;
- }
- const std::string threadName(str::stream() << "ReplExecNetThread-" << _nextThreadId++);
- try {
- _threads.push_back(
- boost::make_shared<boost::thread>(
- stdx::bind(&NetworkInterfaceImpl::_requestProcessorThreadBody,
- this,
- threadName)));
- ++_numIdleThreads;
- }
- catch (const std::exception& ex) {
- error() << "Failed to start " << threadName << "; " << _threads.size() <<
- " other network threads still running; caught exception: " << ex.what();
- }
+ if (_threads.size() >= kMaxThreads) {
+ LOG(1) << "Not starting new replication networking thread because " << kMaxThreads
+ << " are already running; " << _numIdleThreads << " threads are idle and "
+ << _pending.size() << " network requests are waiting for a thread to serve them.";
+ return;
}
-
- void NetworkInterfaceImpl::startup() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- invariant(!_inShutdown);
- if (!_threads.empty()) {
- return;
- }
- for (size_t i = 0; i < kMinThreads; ++i) {
- _startNewNetworkThread_inlock();
- }
+ const std::string threadName(str::stream() << "ReplExecNetThread-" << _nextThreadId++);
+ try {
+ _threads.push_back(boost::make_shared<boost::thread>(
+ stdx::bind(&NetworkInterfaceImpl::_requestProcessorThreadBody, this, threadName)));
+ ++_numIdleThreads;
+ } catch (const std::exception& ex) {
+ error() << "Failed to start " << threadName << "; " << _threads.size()
+ << " other network threads still running; caught exception: " << ex.what();
}
+}
- void NetworkInterfaceImpl::shutdown() {
- using std::swap;
- boost::unique_lock<boost::mutex> lk(_mutex);
- _inShutdown = true;
- _hasPending.notify_all();
- ThreadList threadsToJoin;
- swap(threadsToJoin, _threads);
- lk.unlock();
- _connPool->closeAllInUseConnections();
- std::for_each(threadsToJoin.begin(),
- threadsToJoin.end(),
- stdx::bind(&boost::thread::join, stdx::placeholders::_1));
+void NetworkInterfaceImpl::startup() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ invariant(!_inShutdown);
+ if (!_threads.empty()) {
+ return;
}
-
- void NetworkInterfaceImpl::signalWorkAvailable() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- _signalWorkAvailable_inlock();
+ for (size_t i = 0; i < kMinThreads; ++i) {
+ _startNewNetworkThread_inlock();
}
-
- void NetworkInterfaceImpl::_signalWorkAvailable_inlock() {
- if (!_isExecutorRunnable) {
- _isExecutorRunnable = true;
- _isExecutorRunnableCondition.notify_one();
- }
+}
+
+void NetworkInterfaceImpl::shutdown() {
+ using std::swap;
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ _inShutdown = true;
+ _hasPending.notify_all();
+ ThreadList threadsToJoin;
+ swap(threadsToJoin, _threads);
+ lk.unlock();
+ _connPool->closeAllInUseConnections();
+ std::for_each(threadsToJoin.begin(),
+ threadsToJoin.end(),
+ stdx::bind(&boost::thread::join, stdx::placeholders::_1));
+}
+
+void NetworkInterfaceImpl::signalWorkAvailable() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ _signalWorkAvailable_inlock();
+}
+
+void NetworkInterfaceImpl::_signalWorkAvailable_inlock() {
+ if (!_isExecutorRunnable) {
+ _isExecutorRunnable = true;
+ _isExecutorRunnableCondition.notify_one();
}
+}
- void NetworkInterfaceImpl::waitForWork() {
- boost::unique_lock<boost::mutex> lk(_mutex);
- while (!_isExecutorRunnable) {
- _isExecutorRunnableCondition.wait(lk);
- }
- _isExecutorRunnable = false;
+void NetworkInterfaceImpl::waitForWork() {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ while (!_isExecutorRunnable) {
+ _isExecutorRunnableCondition.wait(lk);
}
-
- void NetworkInterfaceImpl::waitForWorkUntil(Date_t when) {
- boost::unique_lock<boost::mutex> lk(_mutex);
- while (!_isExecutorRunnable) {
- const Milliseconds waitTime(when - now());
- if (waitTime <= Milliseconds(0)) {
- break;
- }
- _isExecutorRunnableCondition.timed_wait(lk, waitTime);
+ _isExecutorRunnable = false;
+}
+
+void NetworkInterfaceImpl::waitForWorkUntil(Date_t when) {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ while (!_isExecutorRunnable) {
+ const Milliseconds waitTime(when - now());
+ if (waitTime <= Milliseconds(0)) {
+ break;
}
- _isExecutorRunnable = false;
- }
-
- void NetworkInterfaceImpl::_requestProcessorThreadBody(
- NetworkInterfaceImpl* net,
- const std::string& threadName) {
- setThreadName(threadName);
- LOG(1) << "thread starting";
- net->_consumeNetworkRequests();
-
- // At this point, another thread may have destroyed "net", if this thread chose to detach
- // itself and remove itself from net->_threads before releasing net->_mutex. Do not access
- // member variables of "net" from here, on.
- LOG(1) << "thread shutting down";
+ _isExecutorRunnableCondition.timed_wait(lk, waitTime);
}
-
- void NetworkInterfaceImpl::_consumeNetworkRequests() {
- boost::unique_lock<boost::mutex> lk(_mutex);
- while (!_inShutdown) {
- if (_pending.empty()) {
- if (_threads.size() > kMinThreads) {
- const Date_t nowDate = now();
- const Date_t nextThreadRetirementDate =
- _lastFullUtilizationDate + kMaxIdleThreadAge.total_milliseconds();
- if (nowDate > nextThreadRetirementDate) {
- _lastFullUtilizationDate = nowDate;
- break;
- }
+ _isExecutorRunnable = false;
+}
+
+void NetworkInterfaceImpl::_requestProcessorThreadBody(NetworkInterfaceImpl* net,
+ const std::string& threadName) {
+ setThreadName(threadName);
+ LOG(1) << "thread starting";
+ net->_consumeNetworkRequests();
+
+ // At this point, another thread may have destroyed "net", if this thread chose to detach
+ // itself and remove itself from net->_threads before releasing net->_mutex. Do not access
+ // member variables of "net" from here, on.
+ LOG(1) << "thread shutting down";
+}
+
+void NetworkInterfaceImpl::_consumeNetworkRequests() {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ while (!_inShutdown) {
+ if (_pending.empty()) {
+ if (_threads.size() > kMinThreads) {
+ const Date_t nowDate = now();
+ const Date_t nextThreadRetirementDate =
+ _lastFullUtilizationDate + kMaxIdleThreadAge.total_milliseconds();
+ if (nowDate > nextThreadRetirementDate) {
+ _lastFullUtilizationDate = nowDate;
+ break;
}
- _hasPending.timed_wait(lk, kMaxIdleThreadAge);
- continue;
}
- CommandData todo = _pending.front();
- _pending.pop_front();
- ++_numActiveNetworkRequests;
- --_numIdleThreads;
- lk.unlock();
- ResponseStatus result = _runCommand(todo.request);
- LOG(2) << "Network status of sending " << todo.request.cmdObj.firstElementFieldName() <<
- " to " << todo.request.target << " was " << result.getStatus();
- todo.onFinish(result);
- lk.lock();
- --_numActiveNetworkRequests;
- ++_numIdleThreads;
- _signalWorkAvailable_inlock();
+ _hasPending.timed_wait(lk, kMaxIdleThreadAge);
+ continue;
}
+ CommandData todo = _pending.front();
+ _pending.pop_front();
+ ++_numActiveNetworkRequests;
--_numIdleThreads;
- if (_inShutdown) {
- return;
- }
- // This thread is ending because it was idle for too long.
- // Find self in _threads, remove self from _threads, detach self.
- for (size_t i = 0; i < _threads.size(); ++i) {
- if (_threads[i]->get_id() != boost::this_thread::get_id()) {
- continue;
- }
- _threads[i]->detach();
- _threads[i].swap(_threads.back());
- _threads.pop_back();
- return;
- }
- severe().stream() << "Could not find this thread, with id " <<
- boost::this_thread::get_id() << " in the replication networking thread pool";
- fassertFailedNoTrace(28581);
- }
-
- void NetworkInterfaceImpl::startCommand(
- const ReplicationExecutor::CallbackHandle& cbHandle,
- const ReplicationExecutor::RemoteCommandRequest& request,
- const RemoteCommandCompletionFn& onFinish) {
- LOG(2) << "Scheduling " << request.cmdObj.firstElementFieldName() << " to " <<
- request.target;
- boost::lock_guard<boost::mutex> lk(_mutex);
- _pending.push_back(CommandData());
- CommandData& cd = _pending.back();
- cd.cbHandle = cbHandle;
- cd.request = request;
- cd.onFinish = onFinish;
- if (_numIdleThreads < _pending.size()) {
- _startNewNetworkThread_inlock();
- }
- if (_numIdleThreads <= _pending.size()) {
- _lastFullUtilizationDate = curTimeMillis64();
- }
- _hasPending.notify_one();
- }
-
- void NetworkInterfaceImpl::cancelCommand(const ReplicationExecutor::CallbackHandle& cbHandle) {
- boost::unique_lock<boost::mutex> lk(_mutex);
- CommandDataList::iterator iter;
- for (iter = _pending.begin(); iter != _pending.end(); ++iter) {
- if (iter->cbHandle == cbHandle) {
- break;
- }
- }
- if (iter == _pending.end()) {
- return;
- }
- const RemoteCommandCompletionFn onFinish = iter->onFinish;
- LOG(2) << "Canceled sending " << iter->request.cmdObj.firstElementFieldName() << " to " <<
- iter->request.target;
- _pending.erase(iter);
lk.unlock();
- onFinish(ResponseStatus(ErrorCodes::CallbackCanceled, "Callback canceled"));
+ ResponseStatus result = _runCommand(todo.request);
+ LOG(2) << "Network status of sending " << todo.request.cmdObj.firstElementFieldName()
+ << " to " << todo.request.target << " was " << result.getStatus();
+ todo.onFinish(result);
lk.lock();
+ --_numActiveNetworkRequests;
+ ++_numIdleThreads;
_signalWorkAvailable_inlock();
}
-
- Date_t NetworkInterfaceImpl::now() {
- return curTimeMillis64();
+ --_numIdleThreads;
+ if (_inShutdown) {
+ return;
}
-
- namespace {
-
- /**
- * Calculates the timeout for a network operation expiring at "expDate", given
- * that it is now "nowDate".
- *
- * Returns 0 to indicate no expiration date, a number of milliseconds until "expDate", or
- * ErrorCodes::ExceededTimeLimit if "expDate" is not later than "nowDate".
- *
- * TODO: Change return type to StatusWith<Milliseconds> once Milliseconds supports default
- * construction or StatusWith<T> supports not constructing T when the result is a non-OK
- * status.
- */
- StatusWith<int64_t> getTimeoutMillis(const Date_t expDate, const Date_t nowDate) {
- if (expDate == ReplicationExecutor::kNoExpirationDate) {
- return StatusWith<int64_t>(0);
- }
- if (expDate <= nowDate) {
- return StatusWith<int64_t>(
- ErrorCodes::ExceededTimeLimit,
- str::stream() << "Went to run command, but it was too late. "
- "Expiration was set to " << dateToISOStringUTC(expDate));
- }
- return StatusWith<int64_t>(expDate.asInt64() - nowDate.asInt64());
+ // This thread is ending because it was idle for too long.
+ // Find self in _threads, remove self from _threads, detach self.
+ for (size_t i = 0; i < _threads.size(); ++i) {
+ if (_threads[i]->get_id() != boost::this_thread::get_id()) {
+ continue;
}
+ _threads[i]->detach();
+ _threads[i].swap(_threads.back());
+ _threads.pop_back();
+ return;
+ }
+ severe().stream() << "Could not find this thread, with id " << boost::this_thread::get_id()
+ << " in the replication networking thread pool";
+ fassertFailedNoTrace(28581);
+}
+
+void NetworkInterfaceImpl::startCommand(const ReplicationExecutor::CallbackHandle& cbHandle,
+ const ReplicationExecutor::RemoteCommandRequest& request,
+ const RemoteCommandCompletionFn& onFinish) {
+ LOG(2) << "Scheduling " << request.cmdObj.firstElementFieldName() << " to " << request.target;
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ _pending.push_back(CommandData());
+ CommandData& cd = _pending.back();
+ cd.cbHandle = cbHandle;
+ cd.request = request;
+ cd.onFinish = onFinish;
+ if (_numIdleThreads < _pending.size()) {
+ _startNewNetworkThread_inlock();
+ }
+ if (_numIdleThreads <= _pending.size()) {
+ _lastFullUtilizationDate = curTimeMillis64();
+ }
+ _hasPending.notify_one();
+}
+
+void NetworkInterfaceImpl::cancelCommand(const ReplicationExecutor::CallbackHandle& cbHandle) {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ CommandDataList::iterator iter;
+ for (iter = _pending.begin(); iter != _pending.end(); ++iter) {
+ if (iter->cbHandle == cbHandle) {
+ break;
+ }
+ }
+ if (iter == _pending.end()) {
+ return;
+ }
+ const RemoteCommandCompletionFn onFinish = iter->onFinish;
+ LOG(2) << "Canceled sending " << iter->request.cmdObj.firstElementFieldName() << " to "
+ << iter->request.target;
+ _pending.erase(iter);
+ lk.unlock();
+ onFinish(ResponseStatus(ErrorCodes::CallbackCanceled, "Callback canceled"));
+ lk.lock();
+ _signalWorkAvailable_inlock();
+}
+
+Date_t NetworkInterfaceImpl::now() {
+ return curTimeMillis64();
+}
- } //namespace
+namespace {
- ResponseStatus NetworkInterfaceImpl::_runCommand(
- const ReplicationExecutor::RemoteCommandRequest& request) {
+/**
+ * Calculates the timeout for a network operation expiring at "expDate", given
+ * that it is now "nowDate".
+ *
+ * Returns 0 to indicate no expiration date, a number of milliseconds until "expDate", or
+ * ErrorCodes::ExceededTimeLimit if "expDate" is not later than "nowDate".
+ *
+ * TODO: Change return type to StatusWith<Milliseconds> once Milliseconds supports default
+ * construction or StatusWith<T> supports not constructing T when the result is a non-OK
+ * status.
+ */
+StatusWith<int64_t> getTimeoutMillis(const Date_t expDate, const Date_t nowDate) {
+ if (expDate == ReplicationExecutor::kNoExpirationDate) {
+ return StatusWith<int64_t>(0);
+ }
+ if (expDate <= nowDate) {
+ return StatusWith<int64_t>(ErrorCodes::ExceededTimeLimit,
+ str::stream() << "Went to run command, but it was too late. "
+ "Expiration was set to "
+ << dateToISOStringUTC(expDate));
+ }
+ return StatusWith<int64_t>(expDate.asInt64() - nowDate.asInt64());
+}
- try {
- BSONObj output;
+} // namespace
- const Date_t requestStartDate = now();
- StatusWith<int64_t> timeoutMillis = getTimeoutMillis(request.expirationDate,
- requestStartDate);
- if (!timeoutMillis.isOK()) {
- return ResponseStatus(timeoutMillis.getStatus());
- }
+ResponseStatus NetworkInterfaceImpl::_runCommand(
+ const ReplicationExecutor::RemoteCommandRequest& request) {
+ try {
+ BSONObj output;
- ConnectionPool::ConnectionPtr conn(_connPool.get(),
- request.target,
- requestStartDate,
- Milliseconds(timeoutMillis.getValue()));
- conn->runCommand(request.dbname, request.cmdObj, output);
- const Date_t requestFinishDate = now();
- conn.done(requestFinishDate);
- return ResponseStatus(Response(output,
- Milliseconds(requestFinishDate - requestStartDate)));
+ const Date_t requestStartDate = now();
+ StatusWith<int64_t> timeoutMillis =
+ getTimeoutMillis(request.expirationDate, requestStartDate);
+ if (!timeoutMillis.isOK()) {
+ return ResponseStatus(timeoutMillis.getStatus());
}
- catch (const DBException& ex) {
- return ResponseStatus(ex.toStatus());
- }
- catch (const std::exception& ex) {
- return ResponseStatus(
- ErrorCodes::UnknownError,
- mongoutils::str::stream() <<
- "Sending command " << request.cmdObj << " on database " << request.dbname <<
- " over network to " << request.target.toString() << " received exception " <<
- ex.what());
- }
- }
- void NetworkInterfaceImpl::runCallbackWithGlobalExclusiveLock(
- const stdx::function<void (OperationContext*)>& callback) {
- Client::initThreadIfNotAlready();
- OperationContextImpl txn;
- ScopedTransaction transaction(&txn, MODE_X);
- Lock::GlobalWrite lk(txn.lockState());
- callback(&txn);
+ ConnectionPool::ConnectionPtr conn(_connPool.get(),
+ request.target,
+ requestStartDate,
+ Milliseconds(timeoutMillis.getValue()));
+ conn->runCommand(request.dbname, request.cmdObj, output);
+ const Date_t requestFinishDate = now();
+ conn.done(requestFinishDate);
+ return ResponseStatus(Response(output, Milliseconds(requestFinishDate - requestStartDate)));
+ } catch (const DBException& ex) {
+ return ResponseStatus(ex.toStatus());
+ } catch (const std::exception& ex) {
+ return ResponseStatus(ErrorCodes::UnknownError,
+ mongoutils::str::stream()
+ << "Sending command " << request.cmdObj << " on database "
+ << request.dbname << " over network to "
+ << request.target.toString() << " received exception "
+ << ex.what());
}
+}
+
+void NetworkInterfaceImpl::runCallbackWithGlobalExclusiveLock(
+ const stdx::function<void(OperationContext*)>& callback) {
+ Client::initThreadIfNotAlready();
+ OperationContextImpl txn;
+ ScopedTransaction transaction(&txn, MODE_X);
+ Lock::GlobalWrite lk(txn.lockState());
+ callback(&txn);
+}
} // namespace repl
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/repl/network_interface_impl.h b/src/mongo/db/repl/network_interface_impl.h
index 29011f32762..41d9a0003e7 100644
--- a/src/mongo/db/repl/network_interface_impl.h
+++ b/src/mongo/db/repl/network_interface_impl.h
@@ -42,132 +42,131 @@
namespace mongo {
namespace repl {
+/**
+ * Implementation of the network interface used by the ReplicationExecutor inside mongod.
+ *
+ * This implementation manages a dynamically sized group of worker threads for performing
+ * network operations. The minimum and maximum number of threads is set at compile time, and
+ * the exact number of threads is adjusted dynamically, using the following two rules.
+ *
+ * 1.) If the number of worker threads is less than the maximum, there are no idle worker
+ * threads, and the client enqueues a new network operation via startCommand(), the network
+ * interface spins up a new worker thread. This decision is made on the assumption that
+ * spinning up a new thread is faster than the round-trip time for processing a remote command,
+ * and so this will minimize wait time.
+ *
+ * 2.) If the number of worker threads has exceeded the the peak number of scheduled outstanding
+ * network commands continuously for a period of time (kMaxIdleThreadAge), one thread is retired
+ * from the pool and the monitoring of idle threads is reset. This means that at most one
+ * thread retires every kMaxIdleThreadAge units of time. The value of kMaxIdleThreadAge is set
+ * to be much larger than the expected frequency of new requests, averaging out short-duration
+ * periods of idleness, as occur between heartbeats.
+ *
+ * The implementation also manages a pool of network connections to recently contacted remote
+ * nodes. The size of this pool is not bounded, but connections are retired unconditionally
+ * after they have been connected for a certain maximum period.
+ */
+class NetworkInterfaceImpl : public ReplicationExecutor::NetworkInterface {
+public:
+ explicit NetworkInterfaceImpl();
+ virtual ~NetworkInterfaceImpl();
+ virtual std::string getDiagnosticString();
+ virtual void startup();
+ virtual void shutdown();
+ virtual void waitForWork();
+ virtual void waitForWorkUntil(Date_t when);
+ virtual void signalWorkAvailable();
+ virtual Date_t now();
+ virtual void startCommand(const ReplicationExecutor::CallbackHandle& cbHandle,
+ const ReplicationExecutor::RemoteCommandRequest& request,
+ const RemoteCommandCompletionFn& onFinish);
+ virtual void cancelCommand(const ReplicationExecutor::CallbackHandle& cbHandle);
+ virtual void runCallbackWithGlobalExclusiveLock(
+ const stdx::function<void(OperationContext*)>& callback);
+
+ std::string getNextCallbackWithGlobalLockThreadName();
+
+private:
+ class ConnectionPool;
+
/**
- * Implementation of the network interface used by the ReplicationExecutor inside mongod.
- *
- * This implementation manages a dynamically sized group of worker threads for performing
- * network operations. The minimum and maximum number of threads is set at compile time, and
- * the exact number of threads is adjusted dynamically, using the following two rules.
- *
- * 1.) If the number of worker threads is less than the maximum, there are no idle worker
- * threads, and the client enqueues a new network operation via startCommand(), the network
- * interface spins up a new worker thread. This decision is made on the assumption that
- * spinning up a new thread is faster than the round-trip time for processing a remote command,
- * and so this will minimize wait time.
- *
- * 2.) If the number of worker threads has exceeded the the peak number of scheduled outstanding
- * network commands continuously for a period of time (kMaxIdleThreadAge), one thread is retired
- * from the pool and the monitoring of idle threads is reset. This means that at most one
- * thread retires every kMaxIdleThreadAge units of time. The value of kMaxIdleThreadAge is set
- * to be much larger than the expected frequency of new requests, averaging out short-duration
- * periods of idleness, as occur between heartbeats.
- *
- * The implementation also manages a pool of network connections to recently contacted remote
- * nodes. The size of this pool is not bounded, but connections are retired unconditionally
- * after they have been connected for a certain maximum period.
+ * Information describing an in-flight command.
*/
- class NetworkInterfaceImpl : public ReplicationExecutor::NetworkInterface {
- public:
- explicit NetworkInterfaceImpl();
- virtual ~NetworkInterfaceImpl();
- virtual std::string getDiagnosticString();
- virtual void startup();
- virtual void shutdown();
- virtual void waitForWork();
- virtual void waitForWorkUntil(Date_t when);
- virtual void signalWorkAvailable();
- virtual Date_t now();
- virtual void startCommand(
- const ReplicationExecutor::CallbackHandle& cbHandle,
- const ReplicationExecutor::RemoteCommandRequest& request,
- const RemoteCommandCompletionFn& onFinish);
- virtual void cancelCommand(const ReplicationExecutor::CallbackHandle& cbHandle);
- virtual void runCallbackWithGlobalExclusiveLock(
- const stdx::function<void (OperationContext*)>& callback);
-
- std::string getNextCallbackWithGlobalLockThreadName();
-
- private:
- class ConnectionPool;
-
- /**
- * Information describing an in-flight command.
- */
- struct CommandData {
- ReplicationExecutor::CallbackHandle cbHandle;
- ReplicationExecutor::RemoteCommandRequest request;
- RemoteCommandCompletionFn onFinish;
- };
- typedef stdx::list<CommandData> CommandDataList;
- typedef std::vector<boost::shared_ptr<boost::thread> > ThreadList;
-
- /**
- * Thread body for threads that synchronously perform network requests from
- * the _pending list.
- */
- static void _requestProcessorThreadBody(NetworkInterfaceImpl* net,
- const std::string& threadName);
-
- /**
- * Run loop that iteratively consumes network requests in a request processor thread.
- */
- void _consumeNetworkRequests();
-
- /**
- * Synchronously invokes the command described by "request".
- */
- ResponseStatus _runCommand(const ReplicationExecutor::RemoteCommandRequest& request);
-
- /**
- * Notifies the network threads that there is work available.
- */
- void _signalWorkAvailable_inlock();
-
- /**
- * Starts a new network thread.
- */
- void _startNewNetworkThread_inlock();
-
- // Mutex guarding the state of the network interface, except for the pool pointed to by
- // _connPool.
- boost::mutex _mutex;
-
- // Condition signaled to indicate that there is work in the _pending queue.
- boost::condition_variable _hasPending;
-
- // Queue of yet-to-be-executed network operations.
- CommandDataList _pending;
-
- // List of threads serving as the worker pool.
- ThreadList _threads;
-
- // Count of idle threads.
- size_t _numIdleThreads;
-
- // Id counter for assigning thread names
- size_t _nextThreadId;
-
- // The last time that _pending.size() + _numActiveNetworkRequests grew to be at least
- // _threads.size().
- Date_t _lastFullUtilizationDate;
-
- // Condition signaled to indicate that the executor, blocked in waitForWorkUntil or
- // waitForWork, should wake up.
- boost::condition_variable _isExecutorRunnableCondition;
-
- // Flag indicating whether or not the executor associated with this interface is runnable.
- bool _isExecutorRunnable;
-
- // Flag indicating when this interface is being shut down (because shutdown() has executed).
- bool _inShutdown;
-
- // Pool of connections to remote nodes, used by the worker threads to execute network
- // requests.
- boost::scoped_ptr<ConnectionPool> _connPool; // (R)
-
- // Number of active network requests
- size_t _numActiveNetworkRequests;
+ struct CommandData {
+ ReplicationExecutor::CallbackHandle cbHandle;
+ ReplicationExecutor::RemoteCommandRequest request;
+ RemoteCommandCompletionFn onFinish;
};
+ typedef stdx::list<CommandData> CommandDataList;
+ typedef std::vector<boost::shared_ptr<boost::thread>> ThreadList;
+
+ /**
+ * Thread body for threads that synchronously perform network requests from
+ * the _pending list.
+ */
+ static void _requestProcessorThreadBody(NetworkInterfaceImpl* net,
+ const std::string& threadName);
+
+ /**
+ * Run loop that iteratively consumes network requests in a request processor thread.
+ */
+ void _consumeNetworkRequests();
+
+ /**
+ * Synchronously invokes the command described by "request".
+ */
+ ResponseStatus _runCommand(const ReplicationExecutor::RemoteCommandRequest& request);
+
+ /**
+ * Notifies the network threads that there is work available.
+ */
+ void _signalWorkAvailable_inlock();
+
+ /**
+ * Starts a new network thread.
+ */
+ void _startNewNetworkThread_inlock();
+
+ // Mutex guarding the state of the network interface, except for the pool pointed to by
+ // _connPool.
+ boost::mutex _mutex;
+
+ // Condition signaled to indicate that there is work in the _pending queue.
+ boost::condition_variable _hasPending;
+
+ // Queue of yet-to-be-executed network operations.
+ CommandDataList _pending;
+
+ // List of threads serving as the worker pool.
+ ThreadList _threads;
+
+ // Count of idle threads.
+ size_t _numIdleThreads;
+
+ // Id counter for assigning thread names
+ size_t _nextThreadId;
+
+ // The last time that _pending.size() + _numActiveNetworkRequests grew to be at least
+ // _threads.size().
+ Date_t _lastFullUtilizationDate;
+
+ // Condition signaled to indicate that the executor, blocked in waitForWorkUntil or
+ // waitForWork, should wake up.
+ boost::condition_variable _isExecutorRunnableCondition;
+
+ // Flag indicating whether or not the executor associated with this interface is runnable.
+ bool _isExecutorRunnable;
+
+ // Flag indicating when this interface is being shut down (because shutdown() has executed).
+ bool _inShutdown;
+
+ // Pool of connections to remote nodes, used by the worker threads to execute network
+ // requests.
+ boost::scoped_ptr<ConnectionPool> _connPool; // (R)
+
+ // Number of active network requests
+ size_t _numActiveNetworkRequests;
+};
} // namespace repl
-} // namespace mongo
+} // namespace mongo
diff --git a/src/mongo/db/repl/network_interface_mock.cpp b/src/mongo/db/repl/network_interface_mock.cpp
index f0fd79fe667..a996340af50 100644
--- a/src/mongo/db/repl/network_interface_mock.cpp
+++ b/src/mongo/db/repl/network_interface_mock.cpp
@@ -38,374 +38,357 @@
namespace mongo {
namespace repl {
- NetworkInterfaceMock::NetworkInterfaceMock()
- : _waitingToRunMask(0),
- _currentlyRunning(kNoThread),
- _hasStarted(false),
- _inShutdown(false),
- _executorNextWakeupDate(~0ULL) {
-
- StatusWith<Date_t> initialNow = dateFromISOString("2014-08-01T00:00:00Z");
- fassert(18653, initialNow.getStatus());
- _now = initialNow.getValue();
- }
-
- NetworkInterfaceMock::~NetworkInterfaceMock() {
- boost::unique_lock<boost::mutex> lk(_mutex);
- invariant(!_hasStarted || _inShutdown);
- invariant(_scheduled.empty());
- invariant(_blackHoled.empty());
- }
-
- std::string NetworkInterfaceMock::getDiagnosticString() {
- // TODO something better.
- return "NetworkInterfaceMock diagnostics here";
- }
-
- Date_t NetworkInterfaceMock::now() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- return _now_inlock();
- }
-
- void NetworkInterfaceMock::runCallbackWithGlobalExclusiveLock(
- const stdx::function<void (OperationContext* txn)>& callback) {
-
- OperationContextNoop txn;
- callback(&txn);
- }
-
- void NetworkInterfaceMock::startCommand(
- const ReplicationExecutor::CallbackHandle& cbHandle,
- const ReplicationExecutor::RemoteCommandRequest& request,
- const RemoteCommandCompletionFn& onFinish) {
-
- boost::lock_guard<boost::mutex> lk(_mutex);
- invariant(!_inShutdown);
- const Date_t now = _now_inlock();
- NetworkOperationIterator insertBefore = _unscheduled.begin();
- while ((insertBefore != _unscheduled.end()) &&
- (insertBefore->getNextConsiderationDate() <= now)) {
-
- ++insertBefore;
- }
- _unscheduled.insert(insertBefore, NetworkOperation(cbHandle, request, now, onFinish));
- }
-
- static bool findAndCancelIf(
- const stdx::function<bool (const NetworkInterfaceMock::NetworkOperation&)>& matchFn,
- NetworkInterfaceMock::NetworkOperationList* other,
- NetworkInterfaceMock::NetworkOperationList* scheduled,
- const Date_t now) {
- const NetworkInterfaceMock::NetworkOperationIterator noi =
- std::find_if(other->begin(), other->end(), matchFn);
- if (noi == other->end()) {
- return false;
- }
- scheduled->splice(scheduled->begin(), *other, noi);
- noi->setResponse(now, ResponseStatus(ErrorCodes::CallbackCanceled,
- "Network operation canceled"));
- return true;
- }
-
- void NetworkInterfaceMock::cancelCommand(
- const ReplicationExecutor::CallbackHandle& cbHandle) {
- boost::lock_guard<boost::mutex> lk(_mutex);
- invariant(!_inShutdown);
- stdx::function<bool (const NetworkOperation&)> matchesHandle = stdx::bind(
- &NetworkOperation::isForCallback,
- stdx::placeholders::_1,
- cbHandle);
- const Date_t now = _now_inlock();
- if (findAndCancelIf(matchesHandle, &_unscheduled, &_scheduled, now)) {
- return;
- }
- if (findAndCancelIf(matchesHandle, &_blackHoled, &_scheduled, now)) {
- return;
- }
- if (findAndCancelIf(matchesHandle, &_scheduled, &_scheduled, now)) {
- return;
- }
- // No not-in-progress network command matched cbHandle. Oh, well.
- }
-
- void NetworkInterfaceMock::startup() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- invariant(!_hasStarted);
- _hasStarted = true;
- _inShutdown = false;
- invariant(_currentlyRunning == kNoThread);
- _currentlyRunning = kExecutorThread;
- }
-
- void NetworkInterfaceMock::shutdown() {
- boost::unique_lock<boost::mutex> lk(_mutex);
- invariant(_hasStarted);
- invariant(!_inShutdown);
- _inShutdown = true;
- NetworkOperationList todo;
- todo.splice(todo.end(), _scheduled);
- todo.splice(todo.end(), _unscheduled);
- todo.splice(todo.end(), _processing);
- todo.splice(todo.end(), _blackHoled);
-
- const Date_t now = _now_inlock();
- _waitingToRunMask |= kExecutorThread; // Prevents network thread from scheduling.
- lk.unlock();
- for (NetworkOperationIterator iter = todo.begin(); iter != todo.end(); ++iter) {
- iter->setResponse(now, ResponseStatus(ErrorCodes::ShutdownInProgress,
- "Shutting down mock network"));
- iter->finishResponse();
- }
- lk.lock();
- invariant(_currentlyRunning == kExecutorThread);
- _currentlyRunning = kNoThread;
- _waitingToRunMask = kNetworkThread;
- _shouldWakeNetworkCondition.notify_one();
- }
-
- void NetworkInterfaceMock::enterNetwork() {
- boost::unique_lock<boost::mutex> lk(_mutex);
- while (!_isNetworkThreadRunnable_inlock()) {
- _shouldWakeNetworkCondition.wait(lk);
- }
- _currentlyRunning = kNetworkThread;
- _waitingToRunMask &= ~kNetworkThread;
- }
-
- void NetworkInterfaceMock::exitNetwork() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- if (_currentlyRunning != kNetworkThread) {
- return;
- }
- _currentlyRunning = kNoThread;
- if (_isExecutorThreadRunnable_inlock()) {
- _shouldWakeExecutorCondition.notify_one();
- }
- _waitingToRunMask |= kNetworkThread;
+NetworkInterfaceMock::NetworkInterfaceMock()
+ : _waitingToRunMask(0),
+ _currentlyRunning(kNoThread),
+ _hasStarted(false),
+ _inShutdown(false),
+ _executorNextWakeupDate(~0ULL) {
+ StatusWith<Date_t> initialNow = dateFromISOString("2014-08-01T00:00:00Z");
+ fassert(18653, initialNow.getStatus());
+ _now = initialNow.getValue();
+}
+
+NetworkInterfaceMock::~NetworkInterfaceMock() {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ invariant(!_hasStarted || _inShutdown);
+ invariant(_scheduled.empty());
+ invariant(_blackHoled.empty());
+}
+
+std::string NetworkInterfaceMock::getDiagnosticString() {
+ // TODO something better.
+ return "NetworkInterfaceMock diagnostics here";
+}
+
+Date_t NetworkInterfaceMock::now() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ return _now_inlock();
+}
+
+void NetworkInterfaceMock::runCallbackWithGlobalExclusiveLock(
+ const stdx::function<void(OperationContext* txn)>& callback) {
+ OperationContextNoop txn;
+ callback(&txn);
+}
+
+void NetworkInterfaceMock::startCommand(const ReplicationExecutor::CallbackHandle& cbHandle,
+ const ReplicationExecutor::RemoteCommandRequest& request,
+ const RemoteCommandCompletionFn& onFinish) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ invariant(!_inShutdown);
+ const Date_t now = _now_inlock();
+ NetworkOperationIterator insertBefore = _unscheduled.begin();
+ while ((insertBefore != _unscheduled.end()) &&
+ (insertBefore->getNextConsiderationDate() <= now)) {
+ ++insertBefore;
+ }
+ _unscheduled.insert(insertBefore, NetworkOperation(cbHandle, request, now, onFinish));
+}
+
+static bool findAndCancelIf(
+ const stdx::function<bool(const NetworkInterfaceMock::NetworkOperation&)>& matchFn,
+ NetworkInterfaceMock::NetworkOperationList* other,
+ NetworkInterfaceMock::NetworkOperationList* scheduled,
+ const Date_t now) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi =
+ std::find_if(other->begin(), other->end(), matchFn);
+ if (noi == other->end()) {
+ return false;
+ }
+ scheduled->splice(scheduled->begin(), *other, noi);
+ noi->setResponse(now,
+ ResponseStatus(ErrorCodes::CallbackCanceled, "Network operation canceled"));
+ return true;
+}
+
+void NetworkInterfaceMock::cancelCommand(const ReplicationExecutor::CallbackHandle& cbHandle) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ invariant(!_inShutdown);
+ stdx::function<bool(const NetworkOperation&)> matchesHandle =
+ stdx::bind(&NetworkOperation::isForCallback, stdx::placeholders::_1, cbHandle);
+ const Date_t now = _now_inlock();
+ if (findAndCancelIf(matchesHandle, &_unscheduled, &_scheduled, now)) {
+ return;
+ }
+ if (findAndCancelIf(matchesHandle, &_blackHoled, &_scheduled, now)) {
+ return;
+ }
+ if (findAndCancelIf(matchesHandle, &_scheduled, &_scheduled, now)) {
+ return;
+ }
+ // No not-in-progress network command matched cbHandle. Oh, well.
+}
+
+void NetworkInterfaceMock::startup() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ invariant(!_hasStarted);
+ _hasStarted = true;
+ _inShutdown = false;
+ invariant(_currentlyRunning == kNoThread);
+ _currentlyRunning = kExecutorThread;
+}
+
+void NetworkInterfaceMock::shutdown() {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ invariant(_hasStarted);
+ invariant(!_inShutdown);
+ _inShutdown = true;
+ NetworkOperationList todo;
+ todo.splice(todo.end(), _scheduled);
+ todo.splice(todo.end(), _unscheduled);
+ todo.splice(todo.end(), _processing);
+ todo.splice(todo.end(), _blackHoled);
+
+ const Date_t now = _now_inlock();
+ _waitingToRunMask |= kExecutorThread; // Prevents network thread from scheduling.
+ lk.unlock();
+ for (NetworkOperationIterator iter = todo.begin(); iter != todo.end(); ++iter) {
+ iter->setResponse(
+ now, ResponseStatus(ErrorCodes::ShutdownInProgress, "Shutting down mock network"));
+ iter->finishResponse();
+ }
+ lk.lock();
+ invariant(_currentlyRunning == kExecutorThread);
+ _currentlyRunning = kNoThread;
+ _waitingToRunMask = kNetworkThread;
+ _shouldWakeNetworkCondition.notify_one();
+}
+
+void NetworkInterfaceMock::enterNetwork() {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ while (!_isNetworkThreadRunnable_inlock()) {
+ _shouldWakeNetworkCondition.wait(lk);
+ }
+ _currentlyRunning = kNetworkThread;
+ _waitingToRunMask &= ~kNetworkThread;
+}
+
+void NetworkInterfaceMock::exitNetwork() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ if (_currentlyRunning != kNetworkThread) {
+ return;
+ }
+ _currentlyRunning = kNoThread;
+ if (_isExecutorThreadRunnable_inlock()) {
+ _shouldWakeExecutorCondition.notify_one();
}
+ _waitingToRunMask |= kNetworkThread;
+}
- bool NetworkInterfaceMock::hasReadyRequests() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- invariant(_currentlyRunning == kNetworkThread);
- return _hasReadyRequests_inlock();
- }
+bool NetworkInterfaceMock::hasReadyRequests() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ invariant(_currentlyRunning == kNetworkThread);
+ return _hasReadyRequests_inlock();
+}
- bool NetworkInterfaceMock::_hasReadyRequests_inlock() {
- if (_unscheduled.empty())
- return false;
- if (_unscheduled.front().getNextConsiderationDate() > _now_inlock()) {
- return false;
- }
- return true;
+bool NetworkInterfaceMock::_hasReadyRequests_inlock() {
+ if (_unscheduled.empty())
+ return false;
+ if (_unscheduled.front().getNextConsiderationDate() > _now_inlock()) {
+ return false;
}
+ return true;
+}
- NetworkInterfaceMock::NetworkOperationIterator NetworkInterfaceMock::getNextReadyRequest() {
- boost::unique_lock<boost::mutex> lk(_mutex);
- invariant(_currentlyRunning == kNetworkThread);
- while (!_hasReadyRequests_inlock()) {
- _waitingToRunMask |= kExecutorThread;
- _runReadyNetworkOperations_inlock(&lk);
- }
- invariant(_hasReadyRequests_inlock());
- _processing.splice(_processing.begin(), _unscheduled, _unscheduled.begin());
- return _processing.begin();
+NetworkInterfaceMock::NetworkOperationIterator NetworkInterfaceMock::getNextReadyRequest() {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ invariant(_currentlyRunning == kNetworkThread);
+ while (!_hasReadyRequests_inlock()) {
+ _waitingToRunMask |= kExecutorThread;
+ _runReadyNetworkOperations_inlock(&lk);
}
-
- void NetworkInterfaceMock::scheduleResponse(
- NetworkOperationIterator noi,
- Date_t when,
- const ResponseStatus& response) {
-
- boost::lock_guard<boost::mutex> lk(_mutex);
- invariant(_currentlyRunning == kNetworkThread);
- NetworkOperationIterator insertBefore = _scheduled.begin();
- while ((insertBefore != _scheduled.end()) && (insertBefore->getResponseDate() <= when)) {
- ++insertBefore;
+ invariant(_hasReadyRequests_inlock());
+ _processing.splice(_processing.begin(), _unscheduled, _unscheduled.begin());
+ return _processing.begin();
+}
+
+void NetworkInterfaceMock::scheduleResponse(NetworkOperationIterator noi,
+ Date_t when,
+ const ResponseStatus& response) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ invariant(_currentlyRunning == kNetworkThread);
+ NetworkOperationIterator insertBefore = _scheduled.begin();
+ while ((insertBefore != _scheduled.end()) && (insertBefore->getResponseDate() <= when)) {
+ ++insertBefore;
+ }
+ noi->setResponse(when, response);
+ _scheduled.splice(insertBefore, _processing, noi);
+}
+
+void NetworkInterfaceMock::blackHole(NetworkOperationIterator noi) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ invariant(_currentlyRunning == kNetworkThread);
+ _blackHoled.splice(_blackHoled.end(), _processing, noi);
+}
+
+void NetworkInterfaceMock::requeueAt(NetworkOperationIterator noi, Date_t dontAskUntil) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ invariant(_currentlyRunning == kNetworkThread);
+ invariant(noi->getNextConsiderationDate() < dontAskUntil);
+ invariant(_now_inlock() < dontAskUntil);
+ NetworkOperationIterator insertBefore = _unscheduled.begin();
+ for (; insertBefore != _unscheduled.end(); ++insertBefore) {
+ if (insertBefore->getNextConsiderationDate() >= dontAskUntil) {
+ break;
}
- noi->setResponse(when, response);
- _scheduled.splice(insertBefore, _processing, noi);
- }
-
- void NetworkInterfaceMock::blackHole(NetworkOperationIterator noi) {
- boost::lock_guard<boost::mutex> lk(_mutex);
- invariant(_currentlyRunning == kNetworkThread);
- _blackHoled.splice(_blackHoled.end(), _processing, noi);
}
+ noi->setNextConsiderationDate(dontAskUntil);
+ _unscheduled.splice(insertBefore, _processing, noi);
+}
- void NetworkInterfaceMock::requeueAt(NetworkOperationIterator noi, Date_t dontAskUntil) {
- boost::lock_guard<boost::mutex> lk(_mutex);
- invariant(_currentlyRunning == kNetworkThread);
- invariant(noi->getNextConsiderationDate() < dontAskUntil);
- invariant(_now_inlock() < dontAskUntil);
- NetworkOperationIterator insertBefore = _unscheduled.begin();
- for (; insertBefore != _unscheduled.end(); ++insertBefore) {
- if (insertBefore->getNextConsiderationDate() >= dontAskUntil) {
- break;
- }
+void NetworkInterfaceMock::runUntil(Date_t until) {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ invariant(_currentlyRunning == kNetworkThread);
+ invariant(until > _now_inlock());
+ while (until > _now_inlock()) {
+ _runReadyNetworkOperations_inlock(&lk);
+ if (_hasReadyRequests_inlock()) {
+ break;
}
- noi->setNextConsiderationDate(dontAskUntil);
- _unscheduled.splice(insertBefore, _processing, noi);
- }
-
- void NetworkInterfaceMock::runUntil(Date_t until) {
- boost::unique_lock<boost::mutex> lk(_mutex);
- invariant(_currentlyRunning == kNetworkThread);
- invariant(until > _now_inlock());
- while (until > _now_inlock()) {
- _runReadyNetworkOperations_inlock(&lk);
- if (_hasReadyRequests_inlock()) {
- break;
- }
- Date_t newNow = _executorNextWakeupDate;
- if (!_scheduled.empty() && _scheduled.front().getResponseDate() < newNow) {
- newNow = _scheduled.front().getResponseDate();
- }
- if (until < newNow) {
- newNow = until;
- }
- invariant(_now_inlock() <= newNow);
- _now = newNow;
- _waitingToRunMask |= kExecutorThread;
+ Date_t newNow = _executorNextWakeupDate;
+ if (!_scheduled.empty() && _scheduled.front().getResponseDate() < newNow) {
+ newNow = _scheduled.front().getResponseDate();
}
- _runReadyNetworkOperations_inlock(&lk);
- }
-
- void NetworkInterfaceMock::runReadyNetworkOperations() {
- boost::unique_lock<boost::mutex> lk(_mutex);
- invariant(_currentlyRunning == kNetworkThread);
- _runReadyNetworkOperations_inlock(&lk);
- }
-
- void NetworkInterfaceMock::waitForWork() {
- boost::unique_lock<boost::mutex> lk(_mutex);
- invariant(_currentlyRunning == kExecutorThread);
- _waitForWork_inlock(&lk);
- }
-
- void NetworkInterfaceMock::waitForWorkUntil(Date_t when) {
- boost::unique_lock<boost::mutex> lk(_mutex);
- invariant(_currentlyRunning == kExecutorThread);
- _executorNextWakeupDate = when;
- if (_executorNextWakeupDate <= _now_inlock()) {
- return;
+ if (until < newNow) {
+ newNow = until;
}
- _waitForWork_inlock(&lk);
- }
-
- void NetworkInterfaceMock::signalWorkAvailable() {
- boost::lock_guard<boost::mutex> lk(_mutex);
+ invariant(_now_inlock() <= newNow);
+ _now = newNow;
_waitingToRunMask |= kExecutorThread;
- if (_currentlyRunning == kNoThread) {
- _shouldWakeExecutorCondition.notify_one();
- }
}
-
- void NetworkInterfaceMock::_runReadyNetworkOperations_inlock(
- boost::unique_lock<boost::mutex>* lk) {
- while (!_scheduled.empty() && _scheduled.front().getResponseDate() <= _now_inlock()) {
- invariant(_currentlyRunning == kNetworkThread);
- NetworkOperation op = _scheduled.front();
- _scheduled.pop_front();
- _waitingToRunMask |= kExecutorThread;
- lk->unlock();
- op.finishResponse();
- lk->lock();
- }
- invariant(_currentlyRunning == kNetworkThread);
- if (!(_waitingToRunMask & kExecutorThread)) {
- return;
- }
+ _runReadyNetworkOperations_inlock(&lk);
+}
+
+void NetworkInterfaceMock::runReadyNetworkOperations() {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ invariant(_currentlyRunning == kNetworkThread);
+ _runReadyNetworkOperations_inlock(&lk);
+}
+
+void NetworkInterfaceMock::waitForWork() {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ invariant(_currentlyRunning == kExecutorThread);
+ _waitForWork_inlock(&lk);
+}
+
+void NetworkInterfaceMock::waitForWorkUntil(Date_t when) {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ invariant(_currentlyRunning == kExecutorThread);
+ _executorNextWakeupDate = when;
+ if (_executorNextWakeupDate <= _now_inlock()) {
+ return;
+ }
+ _waitForWork_inlock(&lk);
+}
+
+void NetworkInterfaceMock::signalWorkAvailable() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ _waitingToRunMask |= kExecutorThread;
+ if (_currentlyRunning == kNoThread) {
_shouldWakeExecutorCondition.notify_one();
- _currentlyRunning = kNoThread;
- while (!_isNetworkThreadRunnable_inlock()) {
- _shouldWakeNetworkCondition.wait(*lk);
- }
- _currentlyRunning = kNetworkThread;
- _waitingToRunMask &= ~kNetworkThread;
}
+}
- void NetworkInterfaceMock::_waitForWork_inlock(boost::unique_lock<boost::mutex>* lk) {
- if (_waitingToRunMask & kExecutorThread) {
- _waitingToRunMask &= ~kExecutorThread;
- return;
- }
- _currentlyRunning = kNoThread;
- while (!_isExecutorThreadRunnable_inlock()) {
- _waitingToRunMask |= kNetworkThread;
- _shouldWakeNetworkCondition.notify_one();
- _shouldWakeExecutorCondition.wait(*lk);
- }
- _currentlyRunning = kExecutorThread;
- _waitingToRunMask &= ~kExecutorThread;
- }
-
- bool NetworkInterfaceMock::_isNetworkThreadRunnable_inlock() {
- if (_currentlyRunning != kNoThread) {
- return false;
- }
- if (_waitingToRunMask != kNetworkThread) {
- return false;
- }
- return true;
- }
-
- bool NetworkInterfaceMock::_isExecutorThreadRunnable_inlock() {
- if (_currentlyRunning != kNoThread) {
- return false;
- }
- return _waitingToRunMask & kExecutorThread;
- }
-
- static const StatusWith<ReplicationExecutor::RemoteCommandResponse> kUnsetResponse(
- ErrorCodes::InternalError,
- "NetworkOperation::_response never set");
-
- NetworkInterfaceMock::NetworkOperation::NetworkOperation()
- : _requestDate(),
- _nextConsiderationDate(),
- _responseDate(),
- _request(),
- _response(kUnsetResponse),
- _onFinish() {
- }
-
- NetworkInterfaceMock::NetworkOperation::NetworkOperation(
- const ReplicationExecutor::CallbackHandle& cbHandle,
- const ReplicationExecutor::RemoteCommandRequest& theRequest,
- Date_t theRequestDate,
- const RemoteCommandCompletionFn& onFinish)
- : _requestDate(theRequestDate),
- _nextConsiderationDate(theRequestDate),
- _responseDate(),
- _cbHandle(cbHandle),
- _request(theRequest),
- _response(kUnsetResponse),
- _onFinish(onFinish) {
+void NetworkInterfaceMock::_runReadyNetworkOperations_inlock(boost::unique_lock<boost::mutex>* lk) {
+ while (!_scheduled.empty() && _scheduled.front().getResponseDate() <= _now_inlock()) {
+ invariant(_currentlyRunning == kNetworkThread);
+ NetworkOperation op = _scheduled.front();
+ _scheduled.pop_front();
+ _waitingToRunMask |= kExecutorThread;
+ lk->unlock();
+ op.finishResponse();
+ lk->lock();
}
-
- NetworkInterfaceMock::NetworkOperation::~NetworkOperation() {}
-
- void NetworkInterfaceMock::NetworkOperation::setNextConsiderationDate(
- Date_t nextConsiderationDate) {
-
- invariant(nextConsiderationDate > _nextConsiderationDate);
- _nextConsiderationDate = nextConsiderationDate;
+ invariant(_currentlyRunning == kNetworkThread);
+ if (!(_waitingToRunMask & kExecutorThread)) {
+ return;
}
-
- void NetworkInterfaceMock::NetworkOperation::setResponse(
- Date_t responseDate,
- const ResponseStatus& response) {
-
- invariant(responseDate >= _requestDate);
- _responseDate = responseDate;
- _response = response;
+ _shouldWakeExecutorCondition.notify_one();
+ _currentlyRunning = kNoThread;
+ while (!_isNetworkThreadRunnable_inlock()) {
+ _shouldWakeNetworkCondition.wait(*lk);
}
+ _currentlyRunning = kNetworkThread;
+ _waitingToRunMask &= ~kNetworkThread;
+}
- void NetworkInterfaceMock::NetworkOperation::finishResponse() {
- invariant(_onFinish);
- _onFinish(_response);
- _onFinish = RemoteCommandCompletionFn();
+void NetworkInterfaceMock::_waitForWork_inlock(boost::unique_lock<boost::mutex>* lk) {
+ if (_waitingToRunMask & kExecutorThread) {
+ _waitingToRunMask &= ~kExecutorThread;
+ return;
}
+ _currentlyRunning = kNoThread;
+ while (!_isExecutorThreadRunnable_inlock()) {
+ _waitingToRunMask |= kNetworkThread;
+ _shouldWakeNetworkCondition.notify_one();
+ _shouldWakeExecutorCondition.wait(*lk);
+ }
+ _currentlyRunning = kExecutorThread;
+ _waitingToRunMask &= ~kExecutorThread;
+}
+
+bool NetworkInterfaceMock::_isNetworkThreadRunnable_inlock() {
+ if (_currentlyRunning != kNoThread) {
+ return false;
+ }
+ if (_waitingToRunMask != kNetworkThread) {
+ return false;
+ }
+ return true;
+}
+
+bool NetworkInterfaceMock::_isExecutorThreadRunnable_inlock() {
+ if (_currentlyRunning != kNoThread) {
+ return false;
+ }
+ return _waitingToRunMask & kExecutorThread;
+}
+
+static const StatusWith<ReplicationExecutor::RemoteCommandResponse> kUnsetResponse(
+ ErrorCodes::InternalError, "NetworkOperation::_response never set");
+
+NetworkInterfaceMock::NetworkOperation::NetworkOperation()
+ : _requestDate(),
+ _nextConsiderationDate(),
+ _responseDate(),
+ _request(),
+ _response(kUnsetResponse),
+ _onFinish() {}
+
+NetworkInterfaceMock::NetworkOperation::NetworkOperation(
+ const ReplicationExecutor::CallbackHandle& cbHandle,
+ const ReplicationExecutor::RemoteCommandRequest& theRequest,
+ Date_t theRequestDate,
+ const RemoteCommandCompletionFn& onFinish)
+ : _requestDate(theRequestDate),
+ _nextConsiderationDate(theRequestDate),
+ _responseDate(),
+ _cbHandle(cbHandle),
+ _request(theRequest),
+ _response(kUnsetResponse),
+ _onFinish(onFinish) {}
+
+NetworkInterfaceMock::NetworkOperation::~NetworkOperation() {}
+
+void NetworkInterfaceMock::NetworkOperation::setNextConsiderationDate(
+ Date_t nextConsiderationDate) {
+ invariant(nextConsiderationDate > _nextConsiderationDate);
+ _nextConsiderationDate = nextConsiderationDate;
+}
+
+void NetworkInterfaceMock::NetworkOperation::setResponse(Date_t responseDate,
+ const ResponseStatus& response) {
+ invariant(responseDate >= _requestDate);
+ _responseDate = responseDate;
+ _response = response;
+}
+
+void NetworkInterfaceMock::NetworkOperation::finishResponse() {
+ invariant(_onFinish);
+ _onFinish(_response);
+ _onFinish = RemoteCommandCompletionFn();
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/network_interface_mock.h b/src/mongo/db/repl/network_interface_mock.h
index fd2ad52746d..269eec7fcfc 100644
--- a/src/mongo/db/repl/network_interface_mock.h
+++ b/src/mongo/db/repl/network_interface_mock.h
@@ -38,292 +38,297 @@
namespace mongo {
namespace repl {
+/**
+ * Mock network implementation for use in unit tests.
+ *
+ * To use, construct a new instance on the heap, and keep a pointer to it. Pass
+ * the pointer to the instance into the ReplicationExecutor constructor, transfering
+ * ownership. Start the executor's run() method in a separate thread, schedule the
+ * work you want to test into the executor, then while the test is still going, iterate
+ * through the ready network requests, servicing them and advancing time as needed.
+ *
+ * The mock has a fully virtualized notion of time and the the network. When the
+ * replication executor under test schedules a network operation, the startCommand
+ * method of this class adds an entry to the _unscheduled queue for immediate consideration.
+ * The test driver loop, when it examines the request, may schedule a response, ask the
+ * interface to redeliver the request at a later virtual time, or to swallow the virtual
+ * request until the end of the simulation. The test driver loop can also instruct the
+ * interface to run forward through virtual time until there are operations ready to
+ * consider, via runUntil.
+ *
+ * The thread acting as the "network" and the executor run thread are highly synchronized
+ * by this code, allowing for deterministic control of operation interleaving.
+ */
+class NetworkInterfaceMock : public ReplicationExecutor::NetworkInterface {
+public:
+ class NetworkOperation;
+ typedef stdx::list<NetworkOperation> NetworkOperationList;
+ typedef NetworkOperationList::iterator NetworkOperationIterator;
+
+ NetworkInterfaceMock();
+ virtual ~NetworkInterfaceMock();
+ virtual std::string getDiagnosticString();
+
+ ////////////////////////////////////////////////////////////////////////////////
+ //
+ // ReplicationExecutor::NetworkInterface methods
+ //
+ ////////////////////////////////////////////////////////////////////////////////
+
+ virtual void startup();
+ virtual void shutdown();
+ virtual void waitForWork();
+ virtual void waitForWorkUntil(Date_t when);
+ virtual void signalWorkAvailable();
+ virtual Date_t now();
+ virtual void startCommand(const ReplicationExecutor::CallbackHandle& cbHandle,
+ const ReplicationExecutor::RemoteCommandRequest& request,
+ const RemoteCommandCompletionFn& onFinish);
+ virtual void cancelCommand(const ReplicationExecutor::CallbackHandle& cbHandle);
+ virtual void runCallbackWithGlobalExclusiveLock(
+ const stdx::function<void(OperationContext*)>& callback);
+
+
+ ////////////////////////////////////////////////////////////////////////////////
+ //
+ // Methods for simulating network operations and the passage of time.
+ //
+ // Methods in this section are to be called by the thread currently simulating
+ // the network.
+ //
+ ////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * Causes the currently running (non-executor) thread to assume the mantle of the network
+ * simulation thread.
+ *
+ * Call this before calling any of the other methods in this section.
+ */
+ void enterNetwork();
+
+ /**
+ * Causes the currently running thread to drop the mantle of "network simulation thread".
+ *
+ * Call this before calling any methods that might block waiting for the replciation
+ * executor thread.
+ */
+ void exitNetwork();
+
+ /**
+ * Returns true if there are unscheduled network requests to be processed.
+ */
+ bool hasReadyRequests();
+
+ /**
+ * Gets the next unscheduled request to process, blocking until one is available.
+ *
+ * Will not return until the executor thread is blocked in waitForWorkUntil or waitForWork.
+ */
+ NetworkOperationIterator getNextReadyRequest();
+
+ /**
+ * Schedules "response" in response to "noi" at virtual time "when".
+ */
+ void scheduleResponse(NetworkOperationIterator noi,
+ Date_t when,
+ const ResponseStatus& response);
+
+ /**
+ * Swallows "noi", causing the network interface to not respond to it until
+ * shutdown() is called.
+ */
+ void blackHole(NetworkOperationIterator noi);
+
+ /**
+ * Defers decision making on "noi" until virtual time "dontAskUntil". Use
+ * this when getNextReadyRequest() returns a request you want to deal with
+ * after looking at other requests.
+ */
+ void requeueAt(NetworkOperationIterator noi, Date_t dontAskUntil);
+
/**
- * Mock network implementation for use in unit tests.
+ * Runs the simulator forward until now() == until or hasReadyRequests() is true.
*
- * To use, construct a new instance on the heap, and keep a pointer to it. Pass
- * the pointer to the instance into the ReplicationExecutor constructor, transfering
- * ownership. Start the executor's run() method in a separate thread, schedule the
- * work you want to test into the executor, then while the test is still going, iterate
- * through the ready network requests, servicing them and advancing time as needed.
+ * Will not return until the executor thread is blocked in waitForWorkUntil or waitForWork.
+ */
+ void runUntil(Date_t until);
+
+ /**
+ * Processes all ready, scheduled network operations.
*
- * The mock has a fully virtualized notion of time and the the network. When the
- * replication executor under test schedules a network operation, the startCommand
- * method of this class adds an entry to the _unscheduled queue for immediate consideration.
- * The test driver loop, when it examines the request, may schedule a response, ask the
- * interface to redeliver the request at a later virtual time, or to swallow the virtual
- * request until the end of the simulation. The test driver loop can also instruct the
- * interface to run forward through virtual time until there are operations ready to
- * consider, via runUntil.
+ * Will not return until the executor thread is blocked in waitForWorkUntil or waitForWork.
+ */
+ void runReadyNetworkOperations();
+
+private:
+ /**
+ * Type used to identify which thread (network mock or executor) is currently executing.
*
- * The thread acting as the "network" and the executor run thread are highly synchronized
- * by this code, allowing for deterministic control of operation interleaving.
+ * Values are used in a bitmask, as well.
+ */
+ enum ThreadType { kNoThread = 0, kExecutorThread = 1, kNetworkThread = 2 };
+
+ /**
+ * Returns the current virtualized time.
+ */
+ Date_t _now_inlock() const {
+ return _now;
+ }
+
+ /**
+ * Implementation of waitForWork*.
+ */
+ void _waitForWork_inlock(boost::unique_lock<boost::mutex>* lk);
+
+ /**
+ * Returns true if there are ready requests for the network thread to service.
+ */
+ bool _hasReadyRequests_inlock();
+
+ /**
+ * Returns true if the network thread could run right now.
+ */
+ bool _isNetworkThreadRunnable_inlock();
+
+ /**
+ * Returns true if the executor thread could run right now.
+ */
+ bool _isExecutorThreadRunnable_inlock();
+
+ /**
+ * Runs all ready network operations, called while holding "lk". May drop and
+ * reaquire "lk" several times, but will not return until the executor has blocked
+ * in waitFor*.
+ */
+ void _runReadyNetworkOperations_inlock(boost::unique_lock<boost::mutex>* lk);
+
+ // Mutex that synchronizes access to mutable data in this class and its subclasses.
+ // Fields guarded by the mutex are labled (M), below, and those that are read-only
+ // in multi-threaded execution, and so unsynchronized, are labeled (R).
+ boost::mutex _mutex;
+
+ // Condition signaled to indicate that the network processing thread should wake up.
+ boost::condition_variable _shouldWakeNetworkCondition; // (M)
+
+ // Condition signaled to indicate that the executor run thread should wake up.
+ boost::condition_variable _shouldWakeExecutorCondition; // (M)
+
+ // Bitmask indicating which threads are runnable.
+ int _waitingToRunMask; // (M)
+
+ // Indicator of which thread, if any, is currently running.
+ ThreadType _currentlyRunning; // (M)
+
+ // The current time reported by this instance of NetworkInterfaceMock.
+ Date_t _now; // (M)
+
+ // Set to true by "startUp()"
+ bool _hasStarted; // (M)
+
+ // Set to true by "shutDown()".
+ bool _inShutdown; // (M)
+
+ // Next date that the executor expects to wake up at (due to a scheduleWorkAt() call).
+ Date_t _executorNextWakeupDate; // (M)
+
+ // List of network operations whose responses haven't been scheduled or blackholed. This is
+ // where network requests are first queued. It is sorted by
+ // NetworkOperation::_nextConsiderationDate, which is set to now() when startCommand() is
+ // called, and adjusted by requeueAt().
+ NetworkOperationList _unscheduled; // (M)
+
+ // List of network operations that have been returned by getNextReadyRequest() but not
+ // yet scheudled, black-holed or requeued.
+ NetworkOperationList _processing; // (M)
+
+ // List of network operations whose responses have been scheduled but not delivered, sorted
+ // by NetworkOperation::_responseDate. These operations will have their responses delivered
+ // when now() == getResponseDate().
+ NetworkOperationList _scheduled; // (M)
+
+ // List of network operations that will not be responded to until shutdown() is called.
+ NetworkOperationList _blackHoled; // (M)
+
+ // Pointer to the executor into which this mock is installed. Used to signal the executor
+ // when the clock changes.
+ ReplicationExecutor* _executor; // (R)
+};
+
+/**
+ * Representation of an in-progress network operation.
+ */
+class NetworkInterfaceMock::NetworkOperation {
+public:
+ NetworkOperation();
+ NetworkOperation(const ReplicationExecutor::CallbackHandle& cbHandle,
+ const ReplicationExecutor::RemoteCommandRequest& theRequest,
+ Date_t theRequestDate,
+ const RemoteCommandCompletionFn& onFinish);
+ ~NetworkOperation();
+
+ /**
+ * Adjusts the stored virtual time at which this entry will be subject to consideration
+ * by the test harness.
+ */
+ void setNextConsiderationDate(Date_t nextConsiderationDate);
+
+ /**
+ * Sets the response and thet virtual time at which it will be delivered.
+ */
+ void setResponse(Date_t responseDate, const ResponseStatus& response);
+
+ /**
+ * Predicate that returns true if cbHandle equals the executor's handle for this network
+ * operation. Used for searching lists of NetworkOperations.
+ */
+ bool isForCallback(const ReplicationExecutor::CallbackHandle& cbHandle) const {
+ return cbHandle == _cbHandle;
+ }
+
+ /**
+ * Gets the request that initiated this operation.
+ */
+ const ReplicationExecutor::RemoteCommandRequest& getRequest() const {
+ return _request;
+ }
+
+ /**
+ * Gets the virtual time at which the operation was started.
+ */
+ Date_t getRequestDate() const {
+ return _requestDate;
+ }
+
+ /**
+ * Gets the virtual time at which the test harness should next consider what to do
+ * with this request.
+ */
+ Date_t getNextConsiderationDate() const {
+ return _nextConsiderationDate;
+ }
+
+ /**
+ * After setResponse() has been called, returns the virtual time at which
+ * the response should be delivered.
*/
- class NetworkInterfaceMock : public ReplicationExecutor::NetworkInterface {
- public:
- class NetworkOperation;
- typedef stdx::list<NetworkOperation> NetworkOperationList;
- typedef NetworkOperationList::iterator NetworkOperationIterator;
-
- NetworkInterfaceMock();
- virtual ~NetworkInterfaceMock();
- virtual std::string getDiagnosticString();
-
- ////////////////////////////////////////////////////////////////////////////////
- //
- // ReplicationExecutor::NetworkInterface methods
- //
- ////////////////////////////////////////////////////////////////////////////////
-
- virtual void startup();
- virtual void shutdown();
- virtual void waitForWork();
- virtual void waitForWorkUntil(Date_t when);
- virtual void signalWorkAvailable();
- virtual Date_t now();
- virtual void startCommand(const ReplicationExecutor::CallbackHandle& cbHandle,
- const ReplicationExecutor::RemoteCommandRequest& request,
- const RemoteCommandCompletionFn& onFinish);
- virtual void cancelCommand(const ReplicationExecutor::CallbackHandle& cbHandle);
- virtual void runCallbackWithGlobalExclusiveLock(
- const stdx::function<void (OperationContext*)>& callback);
-
-
- ////////////////////////////////////////////////////////////////////////////////
- //
- // Methods for simulating network operations and the passage of time.
- //
- // Methods in this section are to be called by the thread currently simulating
- // the network.
- //
- ////////////////////////////////////////////////////////////////////////////////
-
- /**
- * Causes the currently running (non-executor) thread to assume the mantle of the network
- * simulation thread.
- *
- * Call this before calling any of the other methods in this section.
- */
- void enterNetwork();
-
- /**
- * Causes the currently running thread to drop the mantle of "network simulation thread".
- *
- * Call this before calling any methods that might block waiting for the replciation
- * executor thread.
- */
- void exitNetwork();
-
- /**
- * Returns true if there are unscheduled network requests to be processed.
- */
- bool hasReadyRequests();
-
- /**
- * Gets the next unscheduled request to process, blocking until one is available.
- *
- * Will not return until the executor thread is blocked in waitForWorkUntil or waitForWork.
- */
- NetworkOperationIterator getNextReadyRequest();
-
- /**
- * Schedules "response" in response to "noi" at virtual time "when".
- */
- void scheduleResponse(
- NetworkOperationIterator noi,
- Date_t when,
- const ResponseStatus& response);
-
- /**
- * Swallows "noi", causing the network interface to not respond to it until
- * shutdown() is called.
- */
- void blackHole(NetworkOperationIterator noi);
-
- /**
- * Defers decision making on "noi" until virtual time "dontAskUntil". Use
- * this when getNextReadyRequest() returns a request you want to deal with
- * after looking at other requests.
- */
- void requeueAt(NetworkOperationIterator noi, Date_t dontAskUntil);
-
- /**
- * Runs the simulator forward until now() == until or hasReadyRequests() is true.
- *
- * Will not return until the executor thread is blocked in waitForWorkUntil or waitForWork.
- */
- void runUntil(Date_t until);
-
- /**
- * Processes all ready, scheduled network operations.
- *
- * Will not return until the executor thread is blocked in waitForWorkUntil or waitForWork.
- */
- void runReadyNetworkOperations();
-
- private:
- /**
- * Type used to identify which thread (network mock or executor) is currently executing.
- *
- * Values are used in a bitmask, as well.
- */
- enum ThreadType {
- kNoThread = 0,
- kExecutorThread = 1,
- kNetworkThread = 2
- };
-
- /**
- * Returns the current virtualized time.
- */
- Date_t _now_inlock() const { return _now; }
-
- /**
- * Implementation of waitForWork*.
- */
- void _waitForWork_inlock(boost::unique_lock<boost::mutex>* lk);
-
- /**
- * Returns true if there are ready requests for the network thread to service.
- */
- bool _hasReadyRequests_inlock();
-
- /**
- * Returns true if the network thread could run right now.
- */
- bool _isNetworkThreadRunnable_inlock();
-
- /**
- * Returns true if the executor thread could run right now.
- */
- bool _isExecutorThreadRunnable_inlock();
-
- /**
- * Runs all ready network operations, called while holding "lk". May drop and
- * reaquire "lk" several times, but will not return until the executor has blocked
- * in waitFor*.
- */
- void _runReadyNetworkOperations_inlock(boost::unique_lock<boost::mutex>* lk);
-
- // Mutex that synchronizes access to mutable data in this class and its subclasses.
- // Fields guarded by the mutex are labled (M), below, and those that are read-only
- // in multi-threaded execution, and so unsynchronized, are labeled (R).
- boost::mutex _mutex;
-
- // Condition signaled to indicate that the network processing thread should wake up.
- boost::condition_variable _shouldWakeNetworkCondition; // (M)
-
- // Condition signaled to indicate that the executor run thread should wake up.
- boost::condition_variable _shouldWakeExecutorCondition; // (M)
-
- // Bitmask indicating which threads are runnable.
- int _waitingToRunMask; // (M)
-
- // Indicator of which thread, if any, is currently running.
- ThreadType _currentlyRunning; // (M)
-
- // The current time reported by this instance of NetworkInterfaceMock.
- Date_t _now; // (M)
-
- // Set to true by "startUp()"
- bool _hasStarted; // (M)
-
- // Set to true by "shutDown()".
- bool _inShutdown; // (M)
-
- // Next date that the executor expects to wake up at (due to a scheduleWorkAt() call).
- Date_t _executorNextWakeupDate; // (M)
-
- // List of network operations whose responses haven't been scheduled or blackholed. This is
- // where network requests are first queued. It is sorted by
- // NetworkOperation::_nextConsiderationDate, which is set to now() when startCommand() is
- // called, and adjusted by requeueAt().
- NetworkOperationList _unscheduled; // (M)
-
- // List of network operations that have been returned by getNextReadyRequest() but not
- // yet scheudled, black-holed or requeued.
- NetworkOperationList _processing; // (M)
-
- // List of network operations whose responses have been scheduled but not delivered, sorted
- // by NetworkOperation::_responseDate. These operations will have their responses delivered
- // when now() == getResponseDate().
- NetworkOperationList _scheduled; // (M)
-
- // List of network operations that will not be responded to until shutdown() is called.
- NetworkOperationList _blackHoled; // (M)
-
- // Pointer to the executor into which this mock is installed. Used to signal the executor
- // when the clock changes.
- ReplicationExecutor* _executor; // (R)
- };
+ Date_t getResponseDate() const {
+ return _responseDate;
+ }
/**
- * Representation of an in-progress network operation.
+ * Delivers the response, by invoking the onFinish callback passed into the constructor.
*/
- class NetworkInterfaceMock::NetworkOperation {
- public:
- NetworkOperation();
- NetworkOperation(const ReplicationExecutor::CallbackHandle& cbHandle,
- const ReplicationExecutor::RemoteCommandRequest& theRequest,
- Date_t theRequestDate,
- const RemoteCommandCompletionFn& onFinish);
- ~NetworkOperation();
-
- /**
- * Adjusts the stored virtual time at which this entry will be subject to consideration
- * by the test harness.
- */
- void setNextConsiderationDate(Date_t nextConsiderationDate);
-
- /**
- * Sets the response and thet virtual time at which it will be delivered.
- */
- void setResponse(Date_t responseDate, const ResponseStatus& response);
-
- /**
- * Predicate that returns true if cbHandle equals the executor's handle for this network
- * operation. Used for searching lists of NetworkOperations.
- */
- bool isForCallback(const ReplicationExecutor::CallbackHandle& cbHandle) const {
- return cbHandle == _cbHandle;
- }
-
- /**
- * Gets the request that initiated this operation.
- */
- const ReplicationExecutor::RemoteCommandRequest& getRequest() const { return _request; }
-
- /**
- * Gets the virtual time at which the operation was started.
- */
- Date_t getRequestDate() const { return _requestDate; }
-
- /**
- * Gets the virtual time at which the test harness should next consider what to do
- * with this request.
- */
- Date_t getNextConsiderationDate() const { return _nextConsiderationDate; }
-
- /**
- * After setResponse() has been called, returns the virtual time at which
- * the response should be delivered.
- */
- Date_t getResponseDate() const { return _responseDate; }
-
- /**
- * Delivers the response, by invoking the onFinish callback passed into the constructor.
- */
- void finishResponse();
-
- private:
- Date_t _requestDate;
- Date_t _nextConsiderationDate;
- Date_t _responseDate;
- ReplicationExecutor::CallbackHandle _cbHandle;
- ReplicationExecutor::RemoteCommandRequest _request;
- ResponseStatus _response;
- RemoteCommandCompletionFn _onFinish;
- };
+ void finishResponse();
+
+private:
+ Date_t _requestDate;
+ Date_t _nextConsiderationDate;
+ Date_t _responseDate;
+ ReplicationExecutor::CallbackHandle _cbHandle;
+ ReplicationExecutor::RemoteCommandRequest _request;
+ ResponseStatus _response;
+ RemoteCommandCompletionFn _onFinish;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/operation_context_repl_mock.cpp b/src/mongo/db/repl/operation_context_repl_mock.cpp
index b5c44316261..f5ac1a7e5ce 100644
--- a/src/mongo/db/repl/operation_context_repl_mock.cpp
+++ b/src/mongo/db/repl/operation_context_repl_mock.cpp
@@ -36,10 +36,9 @@
namespace mongo {
namespace repl {
- OperationContextReplMock::OperationContextReplMock()
- : _lockState(new MMAPV1LockerImpl()) { }
+OperationContextReplMock::OperationContextReplMock() : _lockState(new MMAPV1LockerImpl()) {}
- OperationContextReplMock::~OperationContextReplMock() {}
+OperationContextReplMock::~OperationContextReplMock() {}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/operation_context_repl_mock.h b/src/mongo/db/repl/operation_context_repl_mock.h
index 660fbdfc5db..4ba3a4b4bf3 100644
--- a/src/mongo/db/repl/operation_context_repl_mock.h
+++ b/src/mongo/db/repl/operation_context_repl_mock.h
@@ -34,23 +34,25 @@
namespace mongo {
- class Locker;
+class Locker;
namespace repl {
- /**
- * Mock implementation of OperationContext that can be used with real instances of LockManager.
- */
- class OperationContextReplMock : public OperationContextNoop {
- public:
- OperationContextReplMock();
- virtual ~OperationContextReplMock();
-
- virtual Locker* lockState() const { return _lockState.get(); }
-
- private:
- boost::scoped_ptr<Locker> _lockState;
- };
+/**
+ * Mock implementation of OperationContext that can be used with real instances of LockManager.
+ */
+class OperationContextReplMock : public OperationContextNoop {
+public:
+ OperationContextReplMock();
+ virtual ~OperationContextReplMock();
+
+ virtual Locker* lockState() const {
+ return _lockState.get();
+ }
+
+private:
+ boost::scoped_ptr<Locker> _lockState;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 53757442e49..df2c04b8e22 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -73,645 +73,629 @@
namespace mongo {
- using std::endl;
- using std::stringstream;
+using std::endl;
+using std::stringstream;
namespace repl {
namespace {
- // cached copies of these...so don't rename them, drop them, etc.!!!
- Database* localDB = NULL;
- Collection* localOplogMainCollection = 0;
- Collection* localOplogRSCollection = 0;
-
- // Synchronizes the section where a new OpTime is generated and when it actually
- // appears in the oplog.
- mongo::mutex newOpMutex("oplogNewOp");
- boost::condition newOptimeNotifier;
-
- // so we can fail the same way
- void checkOplogInsert( StatusWith<RecordId> result ) {
- massert( 17322,
- str::stream() << "write to oplog failed: " << result.getStatus().toString(),
- result.isOK() );
- }
-
-
- /**
- * Allocates an optime for a new entry in the oplog, and updates the replication coordinator to
- * reflect that new optime. Returns the new optime and the correct value of the "h" field for
- * the new oplog entry.
- *
- * NOTE: From the time this function returns to the time that the new oplog entry is written
- * to the storage system, all errors must be considered fatal. This is because the this
- * function registers the new optime with the storage system and the replication coordinator,
- * and provides no facility to revert those registrations on rollback.
- */
- std::pair<OpTime, long long> getNextOpTime(OperationContext* txn,
- Collection* oplog,
- const char* ns,
- ReplicationCoordinator* replCoord,
- const char* opstr) {
- mutex::scoped_lock lk(newOpMutex);
- OpTime ts = getNextGlobalOptime();
- newOptimeNotifier.notify_all();
-
- fassert(28560, oplog->getRecordStore()->oplogDiskLocRegister(txn, ts));
+// cached copies of these...so don't rename them, drop them, etc.!!!
+Database* localDB = NULL;
+Collection* localOplogMainCollection = 0;
+Collection* localOplogRSCollection = 0;
- long long hashNew;
+// Synchronizes the section where a new OpTime is generated and when it actually
+// appears in the oplog.
+mongo::mutex newOpMutex("oplogNewOp");
+boost::condition newOptimeNotifier;
- if (replCoord->getReplicationMode() == ReplicationCoordinator::modeReplSet) {
+// so we can fail the same way
+void checkOplogInsert(StatusWith<RecordId> result) {
+ massert(17322,
+ str::stream() << "write to oplog failed: " << result.getStatus().toString(),
+ result.isOK());
+}
- hashNew = BackgroundSync::get()->getLastAppliedHash();
- // Check to make sure logOp() is legal at this point.
- if (*opstr == 'n') {
- // 'n' operations are always logged
- invariant(*ns == '\0');
-
- // 'n' operations do not advance the hash, since they are not rolled back
- }
- else {
- // Advance the hash
- hashNew = (hashNew * 131 + ts.asLL()) * 17 + replCoord->getMyId();
-
- BackgroundSync::get()->setLastAppliedHash(hashNew);
- }
- }
- else {
- hashNew = 0;
+/**
+ * Allocates an optime for a new entry in the oplog, and updates the replication coordinator to
+ * reflect that new optime. Returns the new optime and the correct value of the "h" field for
+ * the new oplog entry.
+ *
+ * NOTE: From the time this function returns to the time that the new oplog entry is written
+ * to the storage system, all errors must be considered fatal. This is because the this
+ * function registers the new optime with the storage system and the replication coordinator,
+ * and provides no facility to revert those registrations on rollback.
+ */
+std::pair<OpTime, long long> getNextOpTime(OperationContext* txn,
+ Collection* oplog,
+ const char* ns,
+ ReplicationCoordinator* replCoord,
+ const char* opstr) {
+ mutex::scoped_lock lk(newOpMutex);
+ OpTime ts = getNextGlobalOptime();
+ newOptimeNotifier.notify_all();
+
+ fassert(28560, oplog->getRecordStore()->oplogDiskLocRegister(txn, ts));
+
+ long long hashNew;
+
+ if (replCoord->getReplicationMode() == ReplicationCoordinator::modeReplSet) {
+ hashNew = BackgroundSync::get()->getLastAppliedHash();
+
+ // Check to make sure logOp() is legal at this point.
+ if (*opstr == 'n') {
+ // 'n' operations are always logged
+ invariant(*ns == '\0');
+
+ // 'n' operations do not advance the hash, since they are not rolled back
+ } else {
+ // Advance the hash
+ hashNew = (hashNew * 131 + ts.asLL()) * 17 + replCoord->getMyId();
+
+ BackgroundSync::get()->setLastAppliedHash(hashNew);
}
-
- replCoord->setMyLastOptime(ts);
- return std::pair<OpTime,long long>(ts, hashNew);
+ } else {
+ hashNew = 0;
}
- /**
- * This allows us to stream the oplog entry directly into data region
- * main goal is to avoid copying the o portion
- * which can be very large
- * TODO: can have this build the entire doc
- */
- class OplogDocWriter : public DocWriter {
- public:
- OplogDocWriter( const BSONObj& frame, const BSONObj& oField )
- : _frame( frame ), _oField( oField ) {
- }
+ replCoord->setMyLastOptime(ts);
+ return std::pair<OpTime, long long>(ts, hashNew);
+}
- ~OplogDocWriter(){}
+/**
+ * This allows us to stream the oplog entry directly into data region
+ * main goal is to avoid copying the o portion
+ * which can be very large
+ * TODO: can have this build the entire doc
+ */
+class OplogDocWriter : public DocWriter {
+public:
+ OplogDocWriter(const BSONObj& frame, const BSONObj& oField) : _frame(frame), _oField(oField) {}
- void writeDocument( char* start ) const {
- char* buf = start;
+ ~OplogDocWriter() {}
- memcpy( buf, _frame.objdata(), _frame.objsize() - 1 ); // don't copy final EOO
+ void writeDocument(char* start) const {
+ char* buf = start;
- reinterpret_cast<int*>( buf )[0] = documentSize();
+ memcpy(buf, _frame.objdata(), _frame.objsize() - 1); // don't copy final EOO
- buf += ( _frame.objsize() - 1 );
- buf[0] = (char)Object;
- buf[1] = 'o';
- buf[2] = 0;
- memcpy( buf+3, _oField.objdata(), _oField.objsize() );
- buf += 3 + _oField.objsize();
- buf[0] = EOO;
+ reinterpret_cast<int*>(buf)[0] = documentSize();
- verify( static_cast<size_t>( ( buf + 1 ) - start ) == documentSize() ); // DEV?
- }
+ buf += (_frame.objsize() - 1);
+ buf[0] = (char)Object;
+ buf[1] = 'o';
+ buf[2] = 0;
+ memcpy(buf + 3, _oField.objdata(), _oField.objsize());
+ buf += 3 + _oField.objsize();
+ buf[0] = EOO;
- size_t documentSize() const {
- return _frame.objsize() + _oField.objsize() + 1 /* type */ + 2 /* "o" */;
- }
+ verify(static_cast<size_t>((buf + 1) - start) == documentSize()); // DEV?
+ }
- private:
- BSONObj _frame;
- BSONObj _oField;
- };
-
- /* we write to local.oplog.rs:
- { ts : ..., h: ..., v: ..., op: ..., etc }
- ts: an OpTime timestamp
- h: hash
- v: version
- op:
- "i" insert
- "u" update
- "d" delete
- "c" db cmd
- "db" declares presence of a database (ns is set to the db name + '.')
- "n" no op
-
- bb param:
- if not null, specifies a boolean to pass along to the other side as b: param.
- used for "justOne" or "upsert" flags on 'd', 'u'
+ size_t documentSize() const {
+ return _frame.objsize() + _oField.objsize() + 1 /* type */ + 2 /* "o" */;
+ }
- */
+private:
+ BSONObj _frame;
+ BSONObj _oField;
+};
+
+/* we write to local.oplog.rs:
+ { ts : ..., h: ..., v: ..., op: ..., etc }
+ ts: an OpTime timestamp
+ h: hash
+ v: version
+ op:
+ "i" insert
+ "u" update
+ "d" delete
+ "c" db cmd
+ "db" declares presence of a database (ns is set to the db name + '.')
+ "n" no op
+
+ bb param:
+ if not null, specifies a boolean to pass along to the other side as b: param.
+ used for "justOne" or "upsert" flags on 'd', 'u'
- void _logOpRS(OperationContext* txn,
- const char *opstr,
- const char *ns,
- const char *logNS,
- const BSONObj& obj,
- BSONObj *o2,
- bool *bb,
- bool fromMigrate ) {
- if ( strncmp(ns, "local.", 6) == 0 ) {
- return;
- }
-
- Lock::DBLock lk(txn->lockState(), "local", MODE_IX);
- Lock::OplogIntentWriteLock oplogLk(txn->lockState());
+*/
- DEV verify( logNS == 0 ); // check this was never a master/slave master
+void _logOpRS(OperationContext* txn,
+ const char* opstr,
+ const char* ns,
+ const char* logNS,
+ const BSONObj& obj,
+ BSONObj* o2,
+ bool* bb,
+ bool fromMigrate) {
+ if (strncmp(ns, "local.", 6) == 0) {
+ return;
+ }
- if ( localOplogRSCollection == 0 ) {
- Client::Context ctx(txn, rsoplog);
- localDB = ctx.db();
- invariant( localDB );
- localOplogRSCollection = localDB->getCollection( rsoplog );
- massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", localOplogRSCollection);
- }
+ Lock::DBLock lk(txn->lockState(), "local", MODE_IX);
+ Lock::OplogIntentWriteLock oplogLk(txn->lockState());
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (ns[0] && !replCoord->canAcceptWritesForDatabase(nsToDatabaseSubstring(ns))) {
- severe() << "replSet error : logOp() but can't accept write to collection " << ns;
- fassertFailed(17405);
- }
+ DEV verify(logNS == 0); // check this was never a master/slave master
- oplogLk.serializeIfNeeded();
- std::pair<OpTime, long long> slot = getNextOpTime(txn,
- localOplogRSCollection,
- ns,
- replCoord,
- opstr);
-
- /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
- instead we do a single copy to the destination position in the memory mapped file.
- */
-
- BSONObjBuilder b(256);
- b.appendTimestamp("ts", slot.first.asDate());
- b.append("h", slot.second);
- b.append("v", OPLOG_VERSION);
- b.append("op", opstr);
- b.append("ns", ns);
- if (fromMigrate)
- b.appendBool("fromMigrate", true);
- if ( bb )
- b.appendBool("b", *bb);
- if ( o2 )
- b.append("o2", *o2);
- BSONObj partial = b.done();
-
- OplogDocWriter writer( partial, obj );
- checkOplogInsert( localOplogRSCollection->insertDocument( txn, &writer, false ) );
-
- txn->getClient()->setLastOp(slot.first);
+ if (localOplogRSCollection == 0) {
+ Client::Context ctx(txn, rsoplog);
+ localDB = ctx.db();
+ invariant(localDB);
+ localOplogRSCollection = localDB->getCollection(rsoplog);
+ massert(13347,
+ "local.oplog.rs missing. did you drop it? if so restart server",
+ localOplogRSCollection);
}
- void _logOpOld(OperationContext* txn,
- const char *opstr,
- const char *ns,
- const char *logNS,
- const BSONObj& obj,
- BSONObj *o2,
- bool *bb,
- bool fromMigrate ) {
-
-
- if ( strncmp(ns, "local.", 6) == 0 ) {
- return;
- }
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (ns[0] && !replCoord->canAcceptWritesForDatabase(nsToDatabaseSubstring(ns))) {
+ severe() << "replSet error : logOp() but can't accept write to collection " << ns;
+ fassertFailed(17405);
+ }
- Lock::DBLock lk(txn->lockState(), "local", MODE_IX);
+ oplogLk.serializeIfNeeded();
+ std::pair<OpTime, long long> slot =
+ getNextOpTime(txn, localOplogRSCollection, ns, replCoord, opstr);
- if( logNS == 0 ) {
- logNS = "local.oplog.$main";
- }
+ /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
+ instead we do a single copy to the destination position in the memory mapped file.
+ */
- Lock::CollectionLock lk2(txn->lockState(), logNS, MODE_IX);
+ BSONObjBuilder b(256);
+ b.appendTimestamp("ts", slot.first.asDate());
+ b.append("h", slot.second);
+ b.append("v", OPLOG_VERSION);
+ b.append("op", opstr);
+ b.append("ns", ns);
+ if (fromMigrate)
+ b.appendBool("fromMigrate", true);
+ if (bb)
+ b.appendBool("b", *bb);
+ if (o2)
+ b.append("o2", *o2);
+ BSONObj partial = b.done();
+
+ OplogDocWriter writer(partial, obj);
+ checkOplogInsert(localOplogRSCollection->insertDocument(txn, &writer, false));
+
+ txn->getClient()->setLastOp(slot.first);
+}
+
+void _logOpOld(OperationContext* txn,
+ const char* opstr,
+ const char* ns,
+ const char* logNS,
+ const BSONObj& obj,
+ BSONObj* o2,
+ bool* bb,
+ bool fromMigrate) {
+ if (strncmp(ns, "local.", 6) == 0) {
+ return;
+ }
- if (localOplogMainCollection == 0) {
- Client::Context ctx(txn, logNS);
- localDB = ctx.db();
- invariant(localDB);
- localOplogMainCollection = localDB->getCollection(logNS);
- invariant(localOplogMainCollection);
- }
+ Lock::DBLock lk(txn->lockState(), "local", MODE_IX);
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- std::pair<OpTime,long long> slot = getNextOpTime(txn,
- localOplogMainCollection,
- ns,
- replCoord,
- opstr);
-
- /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
- instead we do a single copy to the destination position in the memory mapped file.
- */
-
- BSONObjBuilder b(256);
- b.appendTimestamp("ts", slot.first.asDate());
- b.append("op", opstr);
- b.append("ns", ns);
- if (fromMigrate)
- b.appendBool("fromMigrate", true);
- if ( bb )
- b.appendBool("b", *bb);
- if ( o2 )
- b.append("o2", *o2);
- BSONObj partial = b.done(); // partial is everything except the o:... part.
-
- OplogDocWriter writer( partial, obj );
- checkOplogInsert( localOplogMainCollection->insertDocument( txn, &writer, false ) );
-
- txn->getClient()->setLastOp(slot.first);
+ if (logNS == 0) {
+ logNS = "local.oplog.$main";
}
- void (*_logOp)(OperationContext* txn,
- const char *opstr,
- const char *ns,
- const char *logNS,
- const BSONObj& obj,
- BSONObj *o2,
- bool *bb,
- bool fromMigrate ) = _logOpRS;
-} // namespace
-
- void oldRepl() { _logOp = _logOpOld; }
+ Lock::CollectionLock lk2(txn->lockState(), logNS, MODE_IX);
- void logKeepalive(OperationContext* txn) {
- _logOp(txn, "n", "", 0, BSONObj(), 0, 0, false);
- }
- void logOpComment(OperationContext* txn, const BSONObj& obj) {
- _logOp(txn, "n", "", 0, obj, 0, 0, false);
- }
- void logOpInitiate(OperationContext* txn, const BSONObj& obj) {
- _logOpRS(txn, "n", "", 0, obj, 0, 0, false);
+ if (localOplogMainCollection == 0) {
+ Client::Context ctx(txn, logNS);
+ localDB = ctx.db();
+ invariant(localDB);
+ localOplogMainCollection = localDB->getCollection(logNS);
+ invariant(localOplogMainCollection);
}
- /*@ @param opstr:
- c userCreateNS
- i insert
- n no-op / keepalive
- d delete / remove
- u update
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ std::pair<OpTime, long long> slot =
+ getNextOpTime(txn, localOplogMainCollection, ns, replCoord, opstr);
+
+ /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
+ instead we do a single copy to the destination position in the memory mapped file.
*/
- void logOp(OperationContext* txn,
+
+ BSONObjBuilder b(256);
+ b.appendTimestamp("ts", slot.first.asDate());
+ b.append("op", opstr);
+ b.append("ns", ns);
+ if (fromMigrate)
+ b.appendBool("fromMigrate", true);
+ if (bb)
+ b.appendBool("b", *bb);
+ if (o2)
+ b.append("o2", *o2);
+ BSONObj partial = b.done(); // partial is everything except the o:... part.
+
+ OplogDocWriter writer(partial, obj);
+ checkOplogInsert(localOplogMainCollection->insertDocument(txn, &writer, false));
+
+ txn->getClient()->setLastOp(slot.first);
+}
+
+void (*_logOp)(OperationContext* txn,
const char* opstr,
const char* ns,
+ const char* logNS,
const BSONObj& obj,
- BSONObj* patt,
- bool* b,
- bool fromMigrate) {
+ BSONObj* o2,
+ bool* bb,
+ bool fromMigrate) = _logOpRS;
+} // namespace
- if ( getGlobalReplicationCoordinator()->isReplEnabled() ) {
- _logOp(txn, opstr, ns, 0, obj, patt, b, fromMigrate);
- }
- ensureShardVersionOKOrThrow(ns);
-
- //
- // rollback-safe logOp listeners
- //
- getGlobalAuthorizationManager()->logOp(txn, opstr, ns, obj, patt, b);
- logOpForSharding(txn, opstr, ns, obj, patt, fromMigrate);
- logOpForDbHash(txn, ns);
- if ( strstr( ns, ".system.js" ) ) {
- Scope::storedFuncMod(txn);
- }
+void oldRepl() {
+ _logOp = _logOpOld;
+}
+
+void logKeepalive(OperationContext* txn) {
+ _logOp(txn, "n", "", 0, BSONObj(), 0, 0, false);
+}
+void logOpComment(OperationContext* txn, const BSONObj& obj) {
+ _logOp(txn, "n", "", 0, obj, 0, 0, false);
+}
+void logOpInitiate(OperationContext* txn, const BSONObj& obj) {
+ _logOpRS(txn, "n", "", 0, obj, 0, 0, false);
+}
+
+/*@ @param opstr:
+ c userCreateNS
+ i insert
+ n no-op / keepalive
+ d delete / remove
+ u update
+*/
+void logOp(OperationContext* txn,
+ const char* opstr,
+ const char* ns,
+ const BSONObj& obj,
+ BSONObj* patt,
+ bool* b,
+ bool fromMigrate) {
+ if (getGlobalReplicationCoordinator()->isReplEnabled()) {
+ _logOp(txn, opstr, ns, 0, obj, patt, b, fromMigrate);
}
+ ensureShardVersionOKOrThrow(ns);
+
+ //
+ // rollback-safe logOp listeners
+ //
+ getGlobalAuthorizationManager()->logOp(txn, opstr, ns, obj, patt, b);
+ logOpForSharding(txn, opstr, ns, obj, patt, fromMigrate);
+ logOpForDbHash(txn, ns);
+ if (strstr(ns, ".system.js")) {
+ Scope::storedFuncMod(txn);
+ }
+}
+
+OpTime writeOpsToOplog(OperationContext* txn, const std::deque<BSONObj>& ops) {
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ OpTime lastOptime;
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ lastOptime = replCoord->getMyLastOptime();
+ invariant(!ops.empty());
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock lk(txn->lockState(), "local", MODE_X);
+
+ if (localOplogRSCollection == 0) {
+ Client::Context ctx(txn, rsoplog);
- OpTime writeOpsToOplog(OperationContext* txn, const std::deque<BSONObj>& ops) {
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- OpTime lastOptime;
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- lastOptime = replCoord->getMyLastOptime();
- invariant(!ops.empty());
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock lk(txn->lockState(), "local", MODE_X);
-
- if ( localOplogRSCollection == 0 ) {
- Client::Context ctx(txn, rsoplog);
-
- localDB = ctx.db();
- verify( localDB );
- localOplogRSCollection = localDB->getCollection(rsoplog);
- massert(13389,
- "local.oplog.rs missing. did you drop it? if so restart server",
- localOplogRSCollection);
- }
+ localDB = ctx.db();
+ verify(localDB);
+ localOplogRSCollection = localDB->getCollection(rsoplog);
+ massert(13389,
+ "local.oplog.rs missing. did you drop it? if so restart server",
+ localOplogRSCollection);
+ }
- Client::Context ctx(txn, rsoplog, localDB);
- WriteUnitOfWork wunit(txn);
+ Client::Context ctx(txn, rsoplog, localDB);
+ WriteUnitOfWork wunit(txn);
- for (std::deque<BSONObj>::const_iterator it = ops.begin();
- it != ops.end();
- ++it) {
- const BSONObj& op = *it;
- const OpTime ts = op["ts"]._opTime();
+ for (std::deque<BSONObj>::const_iterator it = ops.begin(); it != ops.end(); ++it) {
+ const BSONObj& op = *it;
+ const OpTime ts = op["ts"]._opTime();
- checkOplogInsert(localOplogRSCollection->insertDocument(txn, op, false));
+ checkOplogInsert(localOplogRSCollection->insertDocument(txn, op, false));
- if (!(lastOptime < ts)) {
- severe() << "replication oplog stream went back in time. "
- "previous timestamp: " << lastOptime << " newest timestamp: " << ts
- << ". Op being applied: " << op;
- fassertFailedNoTrace(18905);
- }
- lastOptime = ts;
+ if (!(lastOptime < ts)) {
+ severe() << "replication oplog stream went back in time. "
+ "previous timestamp: " << lastOptime << " newest timestamp: " << ts
+ << ". Op being applied: " << op;
+ fassertFailedNoTrace(18905);
}
- wunit.commit();
+ lastOptime = ts;
+ }
+ wunit.commit();
- BackgroundSync* bgsync = BackgroundSync::get();
- // Keep this up-to-date, in case we step up to primary.
- long long hash = ops.back()["h"].numberLong();
- bgsync->setLastAppliedHash(hash);
+ BackgroundSync* bgsync = BackgroundSync::get();
+ // Keep this up-to-date, in case we step up to primary.
+ long long hash = ops.back()["h"].numberLong();
+ bgsync->setLastAppliedHash(hash);
- ctx.getClient()->setLastOp(lastOptime);
+ ctx.getClient()->setLastOp(lastOptime);
- replCoord->setMyLastOptime(lastOptime);
- setNewOptime(lastOptime);
+ replCoord->setMyLastOptime(lastOptime);
+ setNewOptime(lastOptime);
- return lastOptime;
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "write oplog entry", rsoplog);
+ return lastOptime;
}
-
- void createOplog(OperationContext* txn) {
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
-
- const char * ns = "local.oplog.$main";
-
- const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
- bool rs = !replSettings.replSet.empty();
- if( rs )
- ns = rsoplog;
-
- Client::Context ctx(txn, ns);
- Collection* collection = ctx.db()->getCollection( ns );
-
- if ( collection ) {
-
- if (replSettings.oplogSize != 0) {
- const CollectionOptions oplogOpts =
- collection->getCatalogEntry()->getCollectionOptions(txn);
-
- int o = (int)(oplogOpts.cappedSize / ( 1024 * 1024 ) );
- int n = (int)(replSettings.oplogSize / (1024 * 1024));
- if ( n != o ) {
- stringstream ss;
- ss << "cmdline oplogsize (" << n << ") different than existing (" << o << ") see: http://dochub.mongodb.org/core/increase-oplog";
- log() << ss.str() << endl;
- throw UserException( 13257 , ss.str() );
- }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "write oplog entry", rsoplog);
+}
+
+void createOplog(OperationContext* txn) {
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+
+ const char* ns = "local.oplog.$main";
+
+ const ReplSettings& replSettings = getGlobalReplicationCoordinator()->getSettings();
+ bool rs = !replSettings.replSet.empty();
+ if (rs)
+ ns = rsoplog;
+
+ Client::Context ctx(txn, ns);
+ Collection* collection = ctx.db()->getCollection(ns);
+
+ if (collection) {
+ if (replSettings.oplogSize != 0) {
+ const CollectionOptions oplogOpts =
+ collection->getCatalogEntry()->getCollectionOptions(txn);
+
+ int o = (int)(oplogOpts.cappedSize / (1024 * 1024));
+ int n = (int)(replSettings.oplogSize / (1024 * 1024));
+ if (n != o) {
+ stringstream ss;
+ ss << "cmdline oplogsize (" << n << ") different than existing (" << o
+ << ") see: http://dochub.mongodb.org/core/increase-oplog";
+ log() << ss.str() << endl;
+ throw UserException(13257, ss.str());
}
-
- if ( !rs )
- initOpTimeFromOplog(txn, ns);
- return;
}
- /* create an oplog collection, if it doesn't yet exist. */
- long long sz = 0;
- if ( replSettings.oplogSize != 0 ) {
- sz = replSettings.oplogSize;
- }
- else {
- /* not specified. pick a default size */
- sz = 50LL * 1024LL * 1024LL;
- if ( sizeof(int *) >= 8 ) {
+ if (!rs)
+ initOpTimeFromOplog(txn, ns);
+ return;
+ }
+
+ /* create an oplog collection, if it doesn't yet exist. */
+ long long sz = 0;
+ if (replSettings.oplogSize != 0) {
+ sz = replSettings.oplogSize;
+ } else {
+ /* not specified. pick a default size */
+ sz = 50LL * 1024LL * 1024LL;
+ if (sizeof(int*) >= 8) {
#if defined(__APPLE__)
- // typically these are desktops (dev machines), so keep it smallish
- sz = (256-64) * 1024 * 1024;
+ // typically these are desktops (dev machines), so keep it smallish
+ sz = (256 - 64) * 1024 * 1024;
#else
- sz = 990LL * 1024 * 1024;
- double free =
- File::freeSpace(storageGlobalParams.dbpath); //-1 if call not supported.
- long long fivePct = static_cast<long long>( free * 0.05 );
- if ( fivePct > sz )
- sz = fivePct;
- // we use 5% of free space up to 50GB (1TB free)
- static long long upperBound = 50LL * 1024 * 1024 * 1024;
- if (fivePct > upperBound)
- sz = upperBound;
+ sz = 990LL * 1024 * 1024;
+ double free = File::freeSpace(storageGlobalParams.dbpath); //-1 if call not supported.
+ long long fivePct = static_cast<long long>(free * 0.05);
+ if (fivePct > sz)
+ sz = fivePct;
+ // we use 5% of free space up to 50GB (1TB free)
+ static long long upperBound = 50LL * 1024 * 1024 * 1024;
+ if (fivePct > upperBound)
+ sz = upperBound;
#endif
- }
}
-
- log() << "******" << endl;
- log() << "creating replication oplog of size: " << (int)( sz / ( 1024 * 1024 ) ) << "MB..." << endl;
-
- CollectionOptions options;
- options.capped = true;
- options.cappedSize = sz;
- options.autoIndexId = CollectionOptions::NO;
-
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- WriteUnitOfWork uow( txn );
- invariant(ctx.db()->createCollection(txn, ns, options));
- if( !rs )
- logOp(txn, "n", "", BSONObj() );
- uow.commit();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createCollection", ns);
-
- /* sync here so we don't get any surprising lag later when we try to sync */
- StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
- storageEngine->flushAllFiles(true);
- log() << "******" << endl;
}
- // -------------------------------------
-
- /** @param fromRepl false if from ApplyOpsCmd
- @return true if was and update should have happened and the document DNE. see replset initial sync code.
- */
- bool applyOperation_inlock(OperationContext* txn,
- Database* db,
- const BSONObj& op,
- bool fromRepl,
- bool convertUpdateToUpsert) {
- LOG(3) << "applying op: " << op << endl;
- bool failedUpdate = false;
-
- OpCounters * opCounters = fromRepl ? &replOpCounters : &globalOpCounters;
-
- const char *names[] = { "o", "ns", "op", "b", "o2" };
- BSONElement fields[5];
- op.getFields(5, names, fields);
- BSONElement& fieldO = fields[0];
- BSONElement& fieldNs = fields[1];
- BSONElement& fieldOp = fields[2];
- BSONElement& fieldB = fields[3];
- BSONElement& fieldO2 = fields[4];
-
- BSONObj o;
- if( fieldO.isABSONObj() )
- o = fieldO.embeddedObject();
-
- const char *ns = fieldNs.valuestrsafe();
-
- BSONObj o2;
- if (fieldO2.isABSONObj())
- o2 = fieldO2.Obj();
-
- bool valueB = fieldB.booleanSafe();
-
- if (nsIsFull(ns)) {
- if (supportsDocLocking()) {
- // WiredTiger, and others requires MODE_IX since the applier threads driving
- // this allow writes to the same collection on any thread.
- invariant(txn->lockState()->isCollectionLockedForMode(ns, MODE_IX));
+ log() << "******" << endl;
+ log() << "creating replication oplog of size: " << (int)(sz / (1024 * 1024)) << "MB..." << endl;
+
+ CollectionOptions options;
+ options.capped = true;
+ options.cappedSize = sz;
+ options.autoIndexId = CollectionOptions::NO;
+
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ WriteUnitOfWork uow(txn);
+ invariant(ctx.db()->createCollection(txn, ns, options));
+ if (!rs)
+ logOp(txn, "n", "", BSONObj());
+ uow.commit();
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "createCollection", ns);
+
+ /* sync here so we don't get any surprising lag later when we try to sync */
+ StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
+ storageEngine->flushAllFiles(true);
+ log() << "******" << endl;
+}
+
+// -------------------------------------
+
+/** @param fromRepl false if from ApplyOpsCmd
+ @return true if was and update should have happened and the document DNE. see replset initial sync code.
+ */
+bool applyOperation_inlock(OperationContext* txn,
+ Database* db,
+ const BSONObj& op,
+ bool fromRepl,
+ bool convertUpdateToUpsert) {
+ LOG(3) << "applying op: " << op << endl;
+ bool failedUpdate = false;
+
+ OpCounters* opCounters = fromRepl ? &replOpCounters : &globalOpCounters;
+
+ const char* names[] = {"o", "ns", "op", "b", "o2"};
+ BSONElement fields[5];
+ op.getFields(5, names, fields);
+ BSONElement& fieldO = fields[0];
+ BSONElement& fieldNs = fields[1];
+ BSONElement& fieldOp = fields[2];
+ BSONElement& fieldB = fields[3];
+ BSONElement& fieldO2 = fields[4];
+
+ BSONObj o;
+ if (fieldO.isABSONObj())
+ o = fieldO.embeddedObject();
+
+ const char* ns = fieldNs.valuestrsafe();
+
+ BSONObj o2;
+ if (fieldO2.isABSONObj())
+ o2 = fieldO2.Obj();
+
+ bool valueB = fieldB.booleanSafe();
+
+ if (nsIsFull(ns)) {
+ if (supportsDocLocking()) {
+ // WiredTiger, and others requires MODE_IX since the applier threads driving
+ // this allow writes to the same collection on any thread.
+ invariant(txn->lockState()->isCollectionLockedForMode(ns, MODE_IX));
+ } else {
+ // mmapV1 ensures that all operations to the same collection are executed from
+ // the same worker thread, so it takes an exclusive lock (MODE_X)
+ invariant(txn->lockState()->isCollectionLockedForMode(ns, MODE_X));
+ }
+ }
+ Collection* collection = db->getCollection(ns);
+ IndexCatalog* indexCatalog = collection == NULL ? NULL : collection->getIndexCatalog();
+
+ // operation type -- see logOp() comments for types
+ const char* opType = fieldOp.valuestrsafe();
+
+ if (*opType == 'i') {
+ opCounters->gotInsert();
+
+ const char* p = strchr(ns, '.');
+ if (p && nsToCollectionSubstring(p) == "system.indexes") {
+ if (o["background"].trueValue()) {
+ IndexBuilder* builder = new IndexBuilder(o);
+ // This spawns a new thread and returns immediately.
+ builder->go();
+ // Wait for thread to start and register itself
+ Lock::TempRelease release(txn->lockState());
+ IndexBuilder::waitForBgIndexStarting();
} else {
- // mmapV1 ensures that all operations to the same collection are executed from
- // the same worker thread, so it takes an exclusive lock (MODE_X)
- invariant(txn->lockState()->isCollectionLockedForMode(ns, MODE_X));
+ IndexBuilder builder(o);
+ Status status = builder.buildInForeground(txn, db);
+ uassertStatusOK(status);
}
- }
- Collection* collection = db->getCollection( ns );
- IndexCatalog* indexCatalog = collection == NULL ? NULL : collection->getIndexCatalog();
-
- // operation type -- see logOp() comments for types
- const char *opType = fieldOp.valuestrsafe();
-
- if ( *opType == 'i' ) {
- opCounters->gotInsert();
-
- const char *p = strchr(ns, '.');
- if ( p && nsToCollectionSubstring( p ) == "system.indexes" ) {
- if (o["background"].trueValue()) {
- IndexBuilder* builder = new IndexBuilder(o);
- // This spawns a new thread and returns immediately.
- builder->go();
- // Wait for thread to start and register itself
- Lock::TempRelease release(txn->lockState());
- IndexBuilder::waitForBgIndexStarting();
- }
- else {
- IndexBuilder builder(o);
- Status status = builder.buildInForeground(txn, db);
- uassertStatusOK(status);
+ } else {
+ // do upserts for inserts as we might get replayed more than once
+ OpDebug debug;
+ BSONElement _id;
+ if (!o.getObjectID(_id)) {
+ /* No _id. This will be very slow. */
+ Timer t;
+
+ const NamespaceString requestNs(ns);
+ UpdateRequest request(requestNs);
+
+ request.setQuery(o);
+ request.setUpdates(o);
+ request.setUpsert();
+ request.setFromReplication();
+ UpdateLifecycleImpl updateLifecycle(true, requestNs);
+ request.setLifecycle(&updateLifecycle);
+
+ update(txn, db, request, &debug);
+
+ if (t.millis() >= 2) {
+ RARELY OCCASIONALLY log()
+ << "warning, repl doing slow updates (no _id field) for " << ns << endl;
}
+ } else {
+ /* todo : it may be better to do an insert here, and then catch the dup key exception and do update
+ then. very few upserts will not be inserts...
+ */
+ BSONObjBuilder b;
+ b.append(_id);
+
+ const NamespaceString requestNs(ns);
+ UpdateRequest request(requestNs);
+
+ request.setQuery(b.done());
+ request.setUpdates(o);
+ request.setUpsert();
+ request.setFromReplication();
+ UpdateLifecycleImpl updateLifecycle(true, requestNs);
+ request.setLifecycle(&updateLifecycle);
+
+ update(txn, db, request, &debug);
}
- else {
- // do upserts for inserts as we might get replayed more than once
- OpDebug debug;
- BSONElement _id;
- if( !o.getObjectID(_id) ) {
- /* No _id. This will be very slow. */
- Timer t;
-
- const NamespaceString requestNs(ns);
- UpdateRequest request(requestNs);
-
- request.setQuery(o);
- request.setUpdates(o);
- request.setUpsert();
- request.setFromReplication();
- UpdateLifecycleImpl updateLifecycle(true, requestNs);
- request.setLifecycle(&updateLifecycle);
-
- update(txn, db, request, &debug);
-
- if( t.millis() >= 2 ) {
- RARELY OCCASIONALLY log() << "warning, repl doing slow updates (no _id field) for " << ns << endl;
- }
+ }
+ } else if (*opType == 'u') {
+ opCounters->gotUpdate();
+
+ OpDebug debug;
+ BSONObj updateCriteria = o2;
+ const bool upsert = valueB || convertUpdateToUpsert;
+
+ const NamespaceString requestNs(ns);
+ UpdateRequest request(requestNs);
+
+ request.setQuery(updateCriteria);
+ request.setUpdates(o);
+ request.setUpsert(upsert);
+ request.setFromReplication();
+ UpdateLifecycleImpl updateLifecycle(true, requestNs);
+ request.setLifecycle(&updateLifecycle);
+
+ UpdateResult ur = update(txn, db, request, &debug);
+
+ if (ur.numMatched == 0) {
+ if (ur.modifiers) {
+ if (updateCriteria.nFields() == 1) {
+ // was a simple { _id : ... } update criteria
+ failedUpdate = true;
+ log() << "replication failed to apply update: " << op.toString() << endl;
}
+ // need to check to see if it isn't present so we can set failedUpdate correctly.
+ // note that adds some overhead for this extra check in some cases, such as an updateCriteria
+ // of the form
+ // { _id:..., { x : {$size:...} }
+ // thus this is not ideal.
else {
- /* todo : it may be better to do an insert here, and then catch the dup key exception and do update
- then. very few upserts will not be inserts...
- */
- BSONObjBuilder b;
- b.append(_id);
-
- const NamespaceString requestNs(ns);
- UpdateRequest request(requestNs);
-
- request.setQuery(b.done());
- request.setUpdates(o);
- request.setUpsert();
- request.setFromReplication();
- UpdateLifecycleImpl updateLifecycle(true, requestNs);
- request.setLifecycle(&updateLifecycle);
-
- update(txn, db, request, &debug);
- }
- }
- }
- else if ( *opType == 'u' ) {
- opCounters->gotUpdate();
-
- OpDebug debug;
- BSONObj updateCriteria = o2;
- const bool upsert = valueB || convertUpdateToUpsert;
-
- const NamespaceString requestNs(ns);
- UpdateRequest request(requestNs);
-
- request.setQuery(updateCriteria);
- request.setUpdates(o);
- request.setUpsert(upsert);
- request.setFromReplication();
- UpdateLifecycleImpl updateLifecycle(true, requestNs);
- request.setLifecycle(&updateLifecycle);
-
- UpdateResult ur = update(txn, db, request, &debug);
-
- if( ur.numMatched == 0 ) {
- if( ur.modifiers ) {
- if( updateCriteria.nFields() == 1 ) {
- // was a simple { _id : ... } update criteria
+ if (collection == NULL ||
+ (indexCatalog->haveIdIndex(txn) &&
+ Helpers::findById(txn, collection, updateCriteria).isNull()) ||
+ // capped collections won't have an _id index
+ (!indexCatalog->haveIdIndex(txn) &&
+ Helpers::findOne(txn, collection, updateCriteria, false).isNull())) {
failedUpdate = true;
- log() << "replication failed to apply update: " << op.toString() << endl;
- }
- // need to check to see if it isn't present so we can set failedUpdate correctly.
- // note that adds some overhead for this extra check in some cases, such as an updateCriteria
- // of the form
- // { _id:..., { x : {$size:...} }
- // thus this is not ideal.
- else {
- if (collection == NULL ||
- (indexCatalog->haveIdIndex(txn) && Helpers::findById(txn, collection, updateCriteria).isNull()) ||
- // capped collections won't have an _id index
- (!indexCatalog->haveIdIndex(txn) && Helpers::findOne(txn, collection, updateCriteria, false).isNull())) {
- failedUpdate = true;
- log() << "replication couldn't find doc: " << op.toString() << endl;
- }
-
- // Otherwise, it's present; zero objects were updated because of additional specifiers
- // in the query for idempotence
+ log() << "replication couldn't find doc: " << op.toString() << endl;
}
+
+ // Otherwise, it's present; zero objects were updated because of additional specifiers
+ // in the query for idempotence
}
- else {
- // this could happen benignly on an oplog duplicate replay of an upsert
- // (because we are idempotent),
- // if an regular non-mod update fails the item is (presumably) missing.
- if( !upsert ) {
- failedUpdate = true;
- log() << "replication update of non-mod failed: " << op.toString() << endl;
- }
+ } else {
+ // this could happen benignly on an oplog duplicate replay of an upsert
+ // (because we are idempotent),
+ // if an regular non-mod update fails the item is (presumably) missing.
+ if (!upsert) {
+ failedUpdate = true;
+ log() << "replication update of non-mod failed: " << op.toString() << endl;
}
}
}
- else if ( *opType == 'd' ) {
- opCounters->gotDelete();
- if ( opType[1] == 0 )
- deleteObjects(txn, db, ns, o, PlanExecutor::YIELD_MANUAL, /*justOne*/ valueB);
- else
- verify( opType[1] == 'b' ); // "db" advertisement
- }
- else if ( *opType == 'c' ) {
- bool done = false;
- while (!done) {
- BufBuilder bb;
- BSONObjBuilder runCommandResult;
-
- // Applying commands in repl is done under Global W-lock, so it is safe to not
- // perform the current DB checks after reacquiring the lock.
- invariant(txn->lockState()->isW());
-
- _runCommands(txn, ns, o, bb, runCommandResult, true, 0);
- // _runCommands takes care of adjusting opcounters for command counting.
- Status status = Command::getStatusFromCommandResult(runCommandResult.done());
- switch (status.code()) {
+ } else if (*opType == 'd') {
+ opCounters->gotDelete();
+ if (opType[1] == 0)
+ deleteObjects(txn, db, ns, o, PlanExecutor::YIELD_MANUAL, /*justOne*/ valueB);
+ else
+ verify(opType[1] == 'b'); // "db" advertisement
+ } else if (*opType == 'c') {
+ bool done = false;
+ while (!done) {
+ BufBuilder bb;
+ BSONObjBuilder runCommandResult;
+
+ // Applying commands in repl is done under Global W-lock, so it is safe to not
+ // perform the current DB checks after reacquiring the lock.
+ invariant(txn->lockState()->isW());
+
+ _runCommands(txn, ns, o, bb, runCommandResult, true, 0);
+ // _runCommands takes care of adjusting opcounters for command counting.
+ Status status = Command::getStatusFromCommandResult(runCommandResult.done());
+ switch (status.code()) {
case ErrorCodes::WriteConflict: {
// Need to throw this up to a higher level where it will be caught and the
// operation retried.
@@ -734,74 +718,63 @@ namespace {
break;
}
default:
- warning() << "repl Failed command " << o << " on " <<
- nsToDatabaseSubstring(ns) << " with status " << status <<
- " during oplog application";
- // fallthrough
+ warning() << "repl Failed command " << o << " on " << nsToDatabaseSubstring(ns)
+ << " with status " << status << " during oplog application";
+ // fallthrough
case ErrorCodes::OK:
done = true;
break;
- }
}
}
- else if ( *opType == 'n' ) {
- // no op
- }
- else {
- throw MsgAssertionException( 14825 , ErrorMsg("error in applyOperation : unknown opType ", *opType) );
- }
-
- // AuthorizationManager's logOp method registers a RecoveryUnit::Change
- // and to do so we need to have begun a UnitOfWork
- WriteUnitOfWork wuow(txn);
- getGlobalAuthorizationManager()->logOp(
- txn,
- opType,
- ns,
- o,
- fieldO2.isABSONObj() ? &o2 : NULL,
- !fieldB.eoo() ? &valueB : NULL );
- wuow.commit();
-
- return failedUpdate;
+ } else if (*opType == 'n') {
+ // no op
+ } else {
+ throw MsgAssertionException(14825,
+ ErrorMsg("error in applyOperation : unknown opType ", *opType));
}
- void waitUpToOneSecondForOptimeChange(const OpTime& referenceTime) {
- mutex::scoped_lock lk(newOpMutex);
+ // AuthorizationManager's logOp method registers a RecoveryUnit::Change
+ // and to do so we need to have begun a UnitOfWork
+ WriteUnitOfWork wuow(txn);
+ getGlobalAuthorizationManager()->logOp(
+ txn, opType, ns, o, fieldO2.isABSONObj() ? &o2 : NULL, !fieldB.eoo() ? &valueB : NULL);
+ wuow.commit();
- while (referenceTime == getLastSetOptime()) {
- if (!newOptimeNotifier.timed_wait(lk.boost(),
- boost::posix_time::seconds(1)))
- return;
- }
- }
+ return failedUpdate;
+}
- void setNewOptime(const OpTime& newTime) {
- mutex::scoped_lock lk(newOpMutex);
- setGlobalOptime(newTime);
- newOptimeNotifier.notify_all();
+void waitUpToOneSecondForOptimeChange(const OpTime& referenceTime) {
+ mutex::scoped_lock lk(newOpMutex);
+
+ while (referenceTime == getLastSetOptime()) {
+ if (!newOptimeNotifier.timed_wait(lk.boost(), boost::posix_time::seconds(1)))
+ return;
}
+}
- void initOpTimeFromOplog(OperationContext* txn, const std::string& oplogNS) {
- DBDirectClient c(txn);
- BSONObj lastOp = c.findOne(oplogNS,
- Query().sort(reverseNaturalObj),
- NULL,
- QueryOption_SlaveOk);
+void setNewOptime(const OpTime& newTime) {
+ mutex::scoped_lock lk(newOpMutex);
+ setGlobalOptime(newTime);
+ newOptimeNotifier.notify_all();
+}
- if (!lastOp.isEmpty()) {
- LOG(1) << "replSet setting last OpTime";
- setNewOptime(lastOp[ "ts" ].date());
- }
+void initOpTimeFromOplog(OperationContext* txn, const std::string& oplogNS) {
+ DBDirectClient c(txn);
+ BSONObj lastOp = c.findOne(oplogNS, Query().sort(reverseNaturalObj), NULL, QueryOption_SlaveOk);
+
+ if (!lastOp.isEmpty()) {
+ LOG(1) << "replSet setting last OpTime";
+ setNewOptime(lastOp["ts"].date());
}
+}
- void oplogCheckCloseDatabase(OperationContext* txn, Database* db) {
- invariant(txn->lockState()->isW());
+void oplogCheckCloseDatabase(OperationContext* txn, Database* db) {
+ invariant(txn->lockState()->isW());
- localDB = NULL;
- localOplogMainCollection = NULL;
- localOplogRSCollection = NULL;
- }
+ localDB = NULL;
+ localOplogMainCollection = NULL;
+ localOplogRSCollection = NULL;
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplog.h b/src/mongo/db/repl/oplog.h
index 07b0723417a..f6f7bc3c82a 100644
--- a/src/mongo/db/repl/oplog.h
+++ b/src/mongo/db/repl/oplog.h
@@ -33,96 +33,96 @@
#include <string>
namespace mongo {
- class BSONObj;
- class Database;
- class OperationContext;
- class OpTime;
+class BSONObj;
+class Database;
+class OperationContext;
+class OpTime;
namespace repl {
- // Redefines the function for logOp() to master/slave.
- void oldRepl(); // master-slave
-
- // Create a new capped collection for the oplog if it doesn't yet exist.
- // This will be either local.oplog.rs (replica sets) or local.oplog.$main (master/slave)
- // If the collection already exists, set the 'last' OpTime if master/slave (side effect!)
- void createOplog(OperationContext* txn);
-
- // This function writes ops into the replica-set oplog;
- // used internally by replication secondaries after they have applied ops. Updates the global
- // optime.
- // Returns the optime for the last op inserted.
- OpTime writeOpsToOplog(OperationContext* txn, const std::deque<BSONObj>& ops);
-
- const char rsoplog[] = "local.oplog.rs";
- static const int OPLOG_VERSION = 2;
-
- /** Log an operation to the local oplog
-
- @param opstr
- "i" insert
- "u" update
- "d" delete
- "c" db cmd
- "n" no-op
- "db" declares presence of a database (ns is set to the db name + '.')
-
- For 'u' records, 'obj' captures the mutation made to the object but not
- the object itself. In that case, we provide also 'fullObj' which is the
- image of the object _after_ the mutation logged here was applied.
-
- See _logOp() in oplog.cpp for more details.
- */
- void logOp( OperationContext* txn,
- const char *opstr,
- const char *ns,
- const BSONObj& obj,
- BSONObj *patt = NULL,
- bool *b = NULL,
- bool fromMigrate = false);
-
- // Log an empty no-op operation to the local oplog
- void logKeepalive(OperationContext* txn);
-
- /** puts obj in the oplog as a comment (a no-op). Just for diags.
- convention is
- { msg : "text", ... }
- */
- void logOpComment(OperationContext* txn, const BSONObj& obj);
-
- // Same as logOpComment, except only works for replsets
- void logOpInitiate(OperationContext* txn, const BSONObj& obj);
-
- // Flush out the cached pointers to the local database and oplog.
- // Used by the closeDatabase command to ensure we don't cache closed things.
- void oplogCheckCloseDatabase(OperationContext* txn, Database * db);
-
- /**
- * take an op and apply locally
- * used for applying from an oplog
- * @param fromRepl really from replication or for testing/internal/command/etc...
- * @param convertUpdateToUpsert convert some updates to upserts for idempotency reasons
- * Returns if the op was an update that could not be applied (true on failure)
- */
- bool applyOperation_inlock(OperationContext* txn,
- Database* db,
- const BSONObj& op,
- bool fromRepl = true,
- bool convertUpdateToUpsert = false);
-
- /**
- * Waits one second for the OpTime from the oplog to change.
- */
- void waitUpToOneSecondForOptimeChange(const OpTime& referenceTime);
-
- /**
- * Initializes the global OpTime with the value from the timestamp of the last oplog entry.
- */
- void initOpTimeFromOplog(OperationContext* txn, const std::string& oplogNS);
-
- /**
- * Sets the global OpTime to be 'newTime'.
- */
- void setNewOptime(const OpTime& newTime);
-} // namespace repl
-} // namespace mongo
+// Redefines the function for logOp() to master/slave.
+void oldRepl(); // master-slave
+
+// Create a new capped collection for the oplog if it doesn't yet exist.
+// This will be either local.oplog.rs (replica sets) or local.oplog.$main (master/slave)
+// If the collection already exists, set the 'last' OpTime if master/slave (side effect!)
+void createOplog(OperationContext* txn);
+
+// This function writes ops into the replica-set oplog;
+// used internally by replication secondaries after they have applied ops. Updates the global
+// optime.
+// Returns the optime for the last op inserted.
+OpTime writeOpsToOplog(OperationContext* txn, const std::deque<BSONObj>& ops);
+
+const char rsoplog[] = "local.oplog.rs";
+static const int OPLOG_VERSION = 2;
+
+/** Log an operation to the local oplog
+
+ @param opstr
+ "i" insert
+ "u" update
+ "d" delete
+ "c" db cmd
+ "n" no-op
+ "db" declares presence of a database (ns is set to the db name + '.')
+
+ For 'u' records, 'obj' captures the mutation made to the object but not
+ the object itself. In that case, we provide also 'fullObj' which is the
+ image of the object _after_ the mutation logged here was applied.
+
+ See _logOp() in oplog.cpp for more details.
+*/
+void logOp(OperationContext* txn,
+ const char* opstr,
+ const char* ns,
+ const BSONObj& obj,
+ BSONObj* patt = NULL,
+ bool* b = NULL,
+ bool fromMigrate = false);
+
+// Log an empty no-op operation to the local oplog
+void logKeepalive(OperationContext* txn);
+
+/** puts obj in the oplog as a comment (a no-op). Just for diags.
+ convention is
+ { msg : "text", ... }
+*/
+void logOpComment(OperationContext* txn, const BSONObj& obj);
+
+// Same as logOpComment, except only works for replsets
+void logOpInitiate(OperationContext* txn, const BSONObj& obj);
+
+// Flush out the cached pointers to the local database and oplog.
+// Used by the closeDatabase command to ensure we don't cache closed things.
+void oplogCheckCloseDatabase(OperationContext* txn, Database* db);
+
+/**
+ * take an op and apply locally
+ * used for applying from an oplog
+ * @param fromRepl really from replication or for testing/internal/command/etc...
+ * @param convertUpdateToUpsert convert some updates to upserts for idempotency reasons
+ * Returns if the op was an update that could not be applied (true on failure)
+ */
+bool applyOperation_inlock(OperationContext* txn,
+ Database* db,
+ const BSONObj& op,
+ bool fromRepl = true,
+ bool convertUpdateToUpsert = false);
+
+/**
+ * Waits one second for the OpTime from the oplog to change.
+ */
+void waitUpToOneSecondForOptimeChange(const OpTime& referenceTime);
+
+/**
+ * Initializes the global OpTime with the value from the timestamp of the last oplog entry.
+ */
+void initOpTimeFromOplog(OperationContext* txn, const std::string& oplogNS);
+
+/**
+ * Sets the global OpTime to be 'newTime'.
+ */
+void setNewOptime(const OpTime& newTime);
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplogreader.cpp b/src/mongo/db/repl/oplogreader.cpp
index 0370d2ae098..ef10b40b2fe 100644
--- a/src/mongo/db/repl/oplogreader.cpp
+++ b/src/mongo/db/repl/oplogreader.cpp
@@ -52,171 +52,159 @@
namespace mongo {
- using boost::shared_ptr;
- using std::endl;
- using std::string;
+using boost::shared_ptr;
+using std::endl;
+using std::string;
namespace repl {
- //number of readers created;
- // this happens when the source source changes, a reconfig/network-error or the cursor dies
- static Counter64 readersCreatedStats;
- static ServerStatusMetricField<Counter64> displayReadersCreated(
- "repl.network.readersCreated",
- &readersCreatedStats );
+// number of readers created;
+// this happens when the source source changes, a reconfig/network-error or the cursor dies
+static Counter64 readersCreatedStats;
+static ServerStatusMetricField<Counter64> displayReadersCreated("repl.network.readersCreated",
+ &readersCreatedStats);
- static const BSONObj userReplQuery = fromjson("{\"user\":\"repl\"}");
+static const BSONObj userReplQuery = fromjson("{\"user\":\"repl\"}");
- bool replAuthenticate(DBClientBase *conn) {
- if (!getGlobalAuthorizationManager()->isAuthEnabled())
- return true;
+bool replAuthenticate(DBClientBase* conn) {
+ if (!getGlobalAuthorizationManager()->isAuthEnabled())
+ return true;
- if (!isInternalAuthSet())
- return false;
- return authenticateInternalUser(conn);
- }
+ if (!isInternalAuthSet())
+ return false;
+ return authenticateInternalUser(conn);
+}
- OplogReader::OplogReader() {
- _tailingQueryOptions = QueryOption_SlaveOk;
- _tailingQueryOptions |= QueryOption_CursorTailable | QueryOption_OplogReplay;
-
- /* TODO: slaveOk maybe shouldn't use? */
- _tailingQueryOptions |= QueryOption_AwaitData;
+OplogReader::OplogReader() {
+ _tailingQueryOptions = QueryOption_SlaveOk;
+ _tailingQueryOptions |= QueryOption_CursorTailable | QueryOption_OplogReplay;
- readersCreatedStats.increment();
- }
+ /* TODO: slaveOk maybe shouldn't use? */
+ _tailingQueryOptions |= QueryOption_AwaitData;
- bool OplogReader::connect(const HostAndPort& host) {
- if (conn() == NULL || _host != host) {
- resetConnection();
- _conn = shared_ptr<DBClientConnection>(new DBClientConnection(false,
- tcp_timeout));
- string errmsg;
- if ( !_conn->connect(host, errmsg) ||
- (getGlobalAuthorizationManager()->isAuthEnabled() &&
- !replAuthenticate(_conn.get())) ) {
-
- resetConnection();
- log() << "repl: " << errmsg << endl;
- return false;
- }
- _host = host;
- }
- return true;
- }
+ readersCreatedStats.increment();
+}
- void OplogReader::tailCheck() {
- if( cursor.get() && cursor->isDead() ) {
- log() << "repl: old cursor isDead, will initiate a new one" << std::endl;
- resetCursor();
+bool OplogReader::connect(const HostAndPort& host) {
+ if (conn() == NULL || _host != host) {
+ resetConnection();
+ _conn = shared_ptr<DBClientConnection>(new DBClientConnection(false, tcp_timeout));
+ string errmsg;
+ if (!_conn->connect(host, errmsg) ||
+ (getGlobalAuthorizationManager()->isAuthEnabled() && !replAuthenticate(_conn.get()))) {
+ resetConnection();
+ log() << "repl: " << errmsg << endl;
+ return false;
}
+ _host = host;
}
+ return true;
+}
- void OplogReader::query(const char *ns,
- Query query,
- int nToReturn,
- int nToSkip,
- const BSONObj* fields) {
- cursor.reset(
- _conn->query(ns, query, nToReturn, nToSkip, fields, QueryOption_SlaveOk).release()
- );
- }
-
- void OplogReader::tailingQuery(const char *ns, const BSONObj& query, const BSONObj* fields ) {
- verify( !haveCursor() );
- LOG(2) << "repl: " << ns << ".find(" << query.toString() << ')' << endl;
- cursor.reset( _conn->query( ns, query, 0, 0, fields, _tailingQueryOptions ).release() );
- }
-
- void OplogReader::tailingQueryGTE(const char *ns, OpTime optime, const BSONObj* fields ) {
- BSONObjBuilder gte;
- gte.appendTimestamp("$gte", optime.asDate());
- BSONObjBuilder query;
- query.append("ts", gte.done());
- tailingQuery(ns, query.done(), fields);
+void OplogReader::tailCheck() {
+ if (cursor.get() && cursor->isDead()) {
+ log() << "repl: old cursor isDead, will initiate a new one" << std::endl;
+ resetCursor();
}
-
- HostAndPort OplogReader::getHost() const {
- return _host;
- }
-
- void OplogReader::connectToSyncSource(OperationContext* txn,
- OpTime lastOpTimeFetched,
- ReplicationCoordinator* replCoord) {
- const OpTime sentinel(Milliseconds(curTimeMillis64()).total_seconds(), 0);
- OpTime oldestOpTimeSeen = sentinel;
-
- invariant(conn() == NULL);
-
- while (true) {
- HostAndPort candidate = replCoord->chooseNewSyncSource(lastOpTimeFetched);
-
- if (candidate.empty()) {
- if (oldestOpTimeSeen == sentinel) {
- // If, in this invocation of connectToSyncSource(), we did not successfully
- // connect to any node ahead of us,
- // we apparently have no sync sources to connect to.
- // This situation is common; e.g. if there are no writes to the primary at
- // the moment.
- return;
- }
-
- // Connected to at least one member, but in all cases we were too stale to use them
- // as a sync source.
- log() << "replSet error RS102 too stale to catch up";
- log() << "replSet our last optime : " << lastOpTimeFetched.toStringLong();
- log() << "replSet oldest available is " << oldestOpTimeSeen.toStringLong();
- log() << "replSet "
- "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember";
- setMinValid(txn, oldestOpTimeSeen);
- bool worked = replCoord->setFollowerMode(MemberState::RS_RECOVERING);
- if (!worked) {
- warning() << "Failed to transition into "
- << MemberState(MemberState::RS_RECOVERING)
- << ". Current state: " << replCoord->getMemberState();
- }
+}
+
+void OplogReader::query(
+ const char* ns, Query query, int nToReturn, int nToSkip, const BSONObj* fields) {
+ cursor.reset(
+ _conn->query(ns, query, nToReturn, nToSkip, fields, QueryOption_SlaveOk).release());
+}
+
+void OplogReader::tailingQuery(const char* ns, const BSONObj& query, const BSONObj* fields) {
+ verify(!haveCursor());
+ LOG(2) << "repl: " << ns << ".find(" << query.toString() << ')' << endl;
+ cursor.reset(_conn->query(ns, query, 0, 0, fields, _tailingQueryOptions).release());
+}
+
+void OplogReader::tailingQueryGTE(const char* ns, OpTime optime, const BSONObj* fields) {
+ BSONObjBuilder gte;
+ gte.appendTimestamp("$gte", optime.asDate());
+ BSONObjBuilder query;
+ query.append("ts", gte.done());
+ tailingQuery(ns, query.done(), fields);
+}
+
+HostAndPort OplogReader::getHost() const {
+ return _host;
+}
+
+void OplogReader::connectToSyncSource(OperationContext* txn,
+ OpTime lastOpTimeFetched,
+ ReplicationCoordinator* replCoord) {
+ const OpTime sentinel(Milliseconds(curTimeMillis64()).total_seconds(), 0);
+ OpTime oldestOpTimeSeen = sentinel;
+
+ invariant(conn() == NULL);
+
+ while (true) {
+ HostAndPort candidate = replCoord->chooseNewSyncSource(lastOpTimeFetched);
+
+ if (candidate.empty()) {
+ if (oldestOpTimeSeen == sentinel) {
+ // If, in this invocation of connectToSyncSource(), we did not successfully
+ // connect to any node ahead of us,
+ // we apparently have no sync sources to connect to.
+ // This situation is common; e.g. if there are no writes to the primary at
+ // the moment.
return;
}
- if (!connect(candidate)) {
- LOG(2) << "replSet can't connect to " << candidate.toString() <<
- " to read operations";
- resetConnection();
- replCoord->blacklistSyncSource(candidate, Date_t(curTimeMillis64() + 10*1000));
- continue;
+ // Connected to at least one member, but in all cases we were too stale to use them
+ // as a sync source.
+ log() << "replSet error RS102 too stale to catch up";
+ log() << "replSet our last optime : " << lastOpTimeFetched.toStringLong();
+ log() << "replSet oldest available is " << oldestOpTimeSeen.toStringLong();
+ log() << "replSet "
+ "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember";
+ setMinValid(txn, oldestOpTimeSeen);
+ bool worked = replCoord->setFollowerMode(MemberState::RS_RECOVERING);
+ if (!worked) {
+ warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING)
+ << ". Current state: " << replCoord->getMemberState();
}
- // Read the first (oldest) op and confirm that it's not newer than our last
- // fetched op. Otherwise, we have fallen off the back of that source's oplog.
- BSONObj remoteOldestOp(findOne(rsoplog, Query()));
- BSONElement tsElem(remoteOldestOp["ts"]);
- if (tsElem.type() != Timestamp) {
- // This member's got a bad op in its oplog.
- warning() << "oplog invalid format on node " << candidate.toString();
- resetConnection();
- replCoord->blacklistSyncSource(candidate,
- Date_t(curTimeMillis64() + 600*1000));
- continue;
- }
- OpTime remoteOldOpTime = tsElem._opTime();
-
- if (!lastOpTimeFetched.isNull() && lastOpTimeFetched < remoteOldOpTime) {
- // We're too stale to use this sync source.
- resetConnection();
- replCoord->blacklistSyncSource(candidate,
- Date_t(curTimeMillis64() + 600*1000));
- if (oldestOpTimeSeen > remoteOldOpTime) {
- warning() << "we are too stale to use " << candidate.toString() <<
- " as a sync source";
- oldestOpTimeSeen = remoteOldOpTime;
- }
- continue;
+ return;
+ }
+
+ if (!connect(candidate)) {
+ LOG(2) << "replSet can't connect to " << candidate.toString() << " to read operations";
+ resetConnection();
+ replCoord->blacklistSyncSource(candidate, Date_t(curTimeMillis64() + 10 * 1000));
+ continue;
+ }
+ // Read the first (oldest) op and confirm that it's not newer than our last
+ // fetched op. Otherwise, we have fallen off the back of that source's oplog.
+ BSONObj remoteOldestOp(findOne(rsoplog, Query()));
+ BSONElement tsElem(remoteOldestOp["ts"]);
+ if (tsElem.type() != Timestamp) {
+ // This member's got a bad op in its oplog.
+ warning() << "oplog invalid format on node " << candidate.toString();
+ resetConnection();
+ replCoord->blacklistSyncSource(candidate, Date_t(curTimeMillis64() + 600 * 1000));
+ continue;
+ }
+ OpTime remoteOldOpTime = tsElem._opTime();
+
+ if (!lastOpTimeFetched.isNull() && lastOpTimeFetched < remoteOldOpTime) {
+ // We're too stale to use this sync source.
+ resetConnection();
+ replCoord->blacklistSyncSource(candidate, Date_t(curTimeMillis64() + 600 * 1000));
+ if (oldestOpTimeSeen > remoteOldOpTime) {
+ warning() << "we are too stale to use " << candidate.toString()
+ << " as a sync source";
+ oldestOpTimeSeen = remoteOldOpTime;
}
+ continue;
+ }
- // Got a valid sync source.
- return;
- } // while (true)
- }
+ // Got a valid sync source.
+ return;
+ } // while (true)
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplogreader.h b/src/mongo/db/repl/oplogreader.h
index 322559dba87..435ce408012 100644
--- a/src/mongo/db/repl/oplogreader.h
+++ b/src/mongo/db/repl/oplogreader.h
@@ -39,118 +39,131 @@
namespace mongo {
- extern const BSONObj reverseNaturalObj; // { $natural : -1 }
+extern const BSONObj reverseNaturalObj; // { $natural : -1 }
namespace repl {
- class ReplicationCoordinator;
+class ReplicationCoordinator;
+
+/**
+ * Authenticates conn using the server's cluster-membership credentials.
+ *
+ * Returns true on successful authentication.
+ */
+bool replAuthenticate(DBClientBase* conn);
+
+/* started abstracting out the querying of the primary/master's oplog
+ still fairly awkward but a start.
+*/
+
+class OplogReader {
+private:
+ boost::shared_ptr<DBClientConnection> _conn;
+ boost::shared_ptr<DBClientCursor> cursor;
+ int _tailingQueryOptions;
+
+ // If _conn was actively connected, _host represents the current HostAndPort of the
+ // connection.
+ HostAndPort _host;
+
+public:
+ OplogReader();
+ ~OplogReader() {}
+ void resetCursor() {
+ cursor.reset();
+ }
+ void resetConnection() {
+ cursor.reset();
+ _conn.reset();
+ _host = HostAndPort();
+ }
+ DBClientConnection* conn() {
+ return _conn.get();
+ }
+ BSONObj findOne(const char* ns, const Query& q) {
+ return conn()->findOne(ns, q, 0, QueryOption_SlaveOk);
+ }
+ BSONObj getLastOp(const char* ns) {
+ return findOne(ns, Query().sort(reverseNaturalObj));
+ }
+
+ /* SO_TIMEOUT (send/recv time out) for our DBClientConnections */
+ static const int tcp_timeout = 30;
+
+ /* ok to call if already connected */
+ bool connect(const HostAndPort& host);
+
+ void tailCheck();
+
+ bool haveCursor() {
+ return cursor.get() != 0;
+ }
+
+ void query(const char* ns, Query query, int nToReturn, int nToSkip, const BSONObj* fields = 0);
+
+ void tailingQuery(const char* ns, const BSONObj& query, const BSONObj* fields = 0);
+
+ void tailingQueryGTE(const char* ns, OpTime t, const BSONObj* fields = 0);
+
+ /* Do a tailing query, but only send the ts field back. */
+ void ghostQueryGTE(const char* ns, OpTime t) {
+ const BSONObj fields = BSON("ts" << 1 << "_id" << 0);
+ return tailingQueryGTE(ns, t, &fields);
+ }
+
+ bool more() {
+ uassert(15910, "Doesn't have cursor for reading oplog", cursor.get());
+ return cursor->more();
+ }
+
+ bool moreInCurrentBatch() {
+ uassert(15911, "Doesn't have cursor for reading oplog", cursor.get());
+ return cursor->moreInCurrentBatch();
+ }
+
+ int currentBatchMessageSize() {
+ if (NULL == cursor->getMessage())
+ return 0;
+ return cursor->getMessage()->size();
+ }
+
+ int getTailingQueryOptions() const {
+ return _tailingQueryOptions;
+ }
+ void setTailingQueryOptions(int tailingQueryOptions) {
+ _tailingQueryOptions = tailingQueryOptions;
+ }
+
+ void peek(std::vector<BSONObj>& v, int n) {
+ if (cursor.get())
+ cursor->peek(v, n);
+ }
+ BSONObj nextSafe() {
+ return cursor->nextSafe();
+ }
+ BSONObj next() {
+ return cursor->next();
+ }
+ void putBack(BSONObj op) {
+ cursor->putBack(op);
+ }
+
+ HostAndPort getHost() const;
/**
- * Authenticates conn using the server's cluster-membership credentials.
- *
- * Returns true on successful authentication.
+ * Connects this OplogReader to a valid sync source, using the provided lastOpTimeFetched
+ * and ReplicationCoordinator objects.
+ * If this function fails to connect to a sync source that is viable, this OplogReader
+ * is left unconnected, where this->conn() equals NULL.
+ * In the process of connecting, this function may add items to the repl coordinator's
+ * sync source blacklist.
+ * This function may throw DB exceptions.
+ * If "lastOpTimeFetched" is (0, 0), we do not check staleness as this indicates an initial
+ * sync.
*/
- bool replAuthenticate(DBClientBase* conn);
-
- /* started abstracting out the querying of the primary/master's oplog
- still fairly awkward but a start.
- */
-
- class OplogReader {
- private:
- boost::shared_ptr<DBClientConnection> _conn;
- boost::shared_ptr<DBClientCursor> cursor;
- int _tailingQueryOptions;
-
- // If _conn was actively connected, _host represents the current HostAndPort of the
- // connection.
- HostAndPort _host;
- public:
- OplogReader();
- ~OplogReader() { }
- void resetCursor() { cursor.reset(); }
- void resetConnection() {
- cursor.reset();
- _conn.reset();
- _host = HostAndPort();
- }
- DBClientConnection* conn() { return _conn.get(); }
- BSONObj findOne(const char *ns, const Query& q) {
- return conn()->findOne(ns, q, 0, QueryOption_SlaveOk);
- }
- BSONObj getLastOp(const char *ns) {
- return findOne(ns, Query().sort(reverseNaturalObj));
- }
-
- /* SO_TIMEOUT (send/recv time out) for our DBClientConnections */
- static const int tcp_timeout = 30;
-
- /* ok to call if already connected */
- bool connect(const HostAndPort& host);
-
- void tailCheck();
-
- bool haveCursor() { return cursor.get() != 0; }
-
- void query(const char *ns,
- Query query,
- int nToReturn,
- int nToSkip,
- const BSONObj* fields=0);
-
- void tailingQuery(const char *ns, const BSONObj& query, const BSONObj* fields=0);
-
- void tailingQueryGTE(const char *ns, OpTime t, const BSONObj* fields=0);
-
- /* Do a tailing query, but only send the ts field back. */
- void ghostQueryGTE(const char *ns, OpTime t) {
- const BSONObj fields = BSON("ts" << 1 << "_id" << 0);
- return tailingQueryGTE(ns, t, &fields);
- }
-
- bool more() {
- uassert( 15910, "Doesn't have cursor for reading oplog", cursor.get() );
- return cursor->more();
- }
-
- bool moreInCurrentBatch() {
- uassert( 15911, "Doesn't have cursor for reading oplog", cursor.get() );
- return cursor->moreInCurrentBatch();
- }
-
- int currentBatchMessageSize() {
- if( NULL == cursor->getMessage() )
- return 0;
- return cursor->getMessage()->size();
- }
-
- int getTailingQueryOptions() const { return _tailingQueryOptions; }
- void setTailingQueryOptions( int tailingQueryOptions ) { _tailingQueryOptions = tailingQueryOptions; }
-
- void peek(std::vector<BSONObj>& v, int n) {
- if( cursor.get() )
- cursor->peek(v,n);
- }
- BSONObj nextSafe() { return cursor->nextSafe(); }
- BSONObj next() { return cursor->next(); }
- void putBack(BSONObj op) { cursor->putBack(op); }
-
- HostAndPort getHost() const;
-
- /**
- * Connects this OplogReader to a valid sync source, using the provided lastOpTimeFetched
- * and ReplicationCoordinator objects.
- * If this function fails to connect to a sync source that is viable, this OplogReader
- * is left unconnected, where this->conn() equals NULL.
- * In the process of connecting, this function may add items to the repl coordinator's
- * sync source blacklist.
- * This function may throw DB exceptions.
- * If "lastOpTimeFetched" is (0, 0), we do not check staleness as this indicates an initial
- * sync.
- */
- void connectToSyncSource(OperationContext* txn,
- OpTime lastOpTimeFetched,
- ReplicationCoordinator* replCoord);
- };
-
-} // namespace repl
-} // namespace mongo
+ void connectToSyncSource(OperationContext* txn,
+ OpTime lastOpTimeFetched,
+ ReplicationCoordinator* replCoord);
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_heartbeat_args.cpp b/src/mongo/db/repl/repl_set_heartbeat_args.cpp
index 75eee68348f..babca5a0dfa 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_args.cpp
+++ b/src/mongo/db/repl/repl_set_heartbeat_args.cpp
@@ -39,142 +39,133 @@ namespace repl {
namespace {
- const std::string kCheckEmptyFieldName = "checkEmpty";
- const std::string kProtocolVersionFieldName = "pv";
- const std::string kConfigVersionFieldName = "v";
- const std::string kSenderIdFieldName = "fromId";
- const std::string kSetNameFieldName = "replSetHeartbeat";
- const std::string kSenderHostFieldName = "from";
-
- const std::string kLegalHeartbeatFieldNames[] = {
- kCheckEmptyFieldName,
- kProtocolVersionFieldName,
- kConfigVersionFieldName,
- kSenderIdFieldName,
- kSetNameFieldName,
- kSenderHostFieldName
- };
-
-} // namespace
-
- ReplSetHeartbeatArgs::ReplSetHeartbeatArgs() :
- _hasCheckEmpty(false),
- _hasProtocolVersion(false),
- _hasConfigVersion(false),
- _hasSenderId(false),
- _hasSetName(false),
- _hasSenderHost(false),
- _checkEmpty(false),
- _protocolVersion(-1),
- _configVersion(-1),
- _senderId(-1),
- _setName(""),
- _senderHost(HostAndPort()) {}
-
- Status ReplSetHeartbeatArgs::initialize(const BSONObj& argsObj) {
- Status status = bsonCheckOnlyHasFields("ReplSetHeartbeatArgs",
- argsObj,
- kLegalHeartbeatFieldNames);
+const std::string kCheckEmptyFieldName = "checkEmpty";
+const std::string kProtocolVersionFieldName = "pv";
+const std::string kConfigVersionFieldName = "v";
+const std::string kSenderIdFieldName = "fromId";
+const std::string kSetNameFieldName = "replSetHeartbeat";
+const std::string kSenderHostFieldName = "from";
+
+const std::string kLegalHeartbeatFieldNames[] = {kCheckEmptyFieldName,
+ kProtocolVersionFieldName,
+ kConfigVersionFieldName,
+ kSenderIdFieldName,
+ kSetNameFieldName,
+ kSenderHostFieldName};
+
+} // namespace
+
+ReplSetHeartbeatArgs::ReplSetHeartbeatArgs()
+ : _hasCheckEmpty(false),
+ _hasProtocolVersion(false),
+ _hasConfigVersion(false),
+ _hasSenderId(false),
+ _hasSetName(false),
+ _hasSenderHost(false),
+ _checkEmpty(false),
+ _protocolVersion(-1),
+ _configVersion(-1),
+ _senderId(-1),
+ _setName(""),
+ _senderHost(HostAndPort()) {}
+
+Status ReplSetHeartbeatArgs::initialize(const BSONObj& argsObj) {
+ Status status =
+ bsonCheckOnlyHasFields("ReplSetHeartbeatArgs", argsObj, kLegalHeartbeatFieldNames);
+ if (!status.isOK())
+ return status;
+
+ status = bsonExtractBooleanFieldWithDefault(argsObj, kCheckEmptyFieldName, false, &_checkEmpty);
+ if (!status.isOK())
+ return status;
+ _hasCheckEmpty = true;
+
+ status = bsonExtractIntegerField(argsObj, kProtocolVersionFieldName, &_protocolVersion);
+ if (!status.isOK())
+ return status;
+ _hasProtocolVersion = true;
+
+ status = bsonExtractIntegerField(argsObj, kConfigVersionFieldName, &_configVersion);
+ if (!status.isOK())
+ return status;
+ _hasConfigVersion = true;
+
+ status = bsonExtractIntegerFieldWithDefault(argsObj, kSenderIdFieldName, -1, &_senderId);
+ if (!status.isOK())
+ return status;
+ _hasSenderId = true;
+
+ status = bsonExtractStringField(argsObj, kSetNameFieldName, &_setName);
+ if (!status.isOK())
+ return status;
+ _hasSetName = true;
+
+ std::string hostAndPortString;
+ status =
+ bsonExtractStringFieldWithDefault(argsObj, kSenderHostFieldName, "", &hostAndPortString);
+ if (!status.isOK())
+ return status;
+
+ if (!hostAndPortString.empty()) {
+ status = _senderHost.initialize(hostAndPortString);
if (!status.isOK())
return status;
-
- status = bsonExtractBooleanFieldWithDefault(argsObj,
- kCheckEmptyFieldName,
- false,
- &_checkEmpty);
- if (!status.isOK())
- return status;
- _hasCheckEmpty = true;
-
- status = bsonExtractIntegerField(argsObj, kProtocolVersionFieldName, &_protocolVersion);
- if (!status.isOK())
- return status;
- _hasProtocolVersion = true;
-
- status = bsonExtractIntegerField(argsObj, kConfigVersionFieldName, &_configVersion);
- if (!status.isOK())
- return status;
- _hasConfigVersion = true;
-
- status = bsonExtractIntegerFieldWithDefault(argsObj, kSenderIdFieldName, -1, &_senderId);
- if (!status.isOK())
- return status;
- _hasSenderId = true;
-
- status = bsonExtractStringField(argsObj, kSetNameFieldName, &_setName);
- if (!status.isOK())
- return status;
- _hasSetName = true;
-
- std::string hostAndPortString;
- status = bsonExtractStringFieldWithDefault(
- argsObj,
- kSenderHostFieldName,
- "",
- &hostAndPortString);
- if (!status.isOK())
- return status;
-
- if (!hostAndPortString.empty()) {
- status = _senderHost.initialize(hostAndPortString);
- if (!status.isOK())
- return status;
- _hasSenderHost = true;
- }
-
- return Status::OK();
- }
-
- bool ReplSetHeartbeatArgs::isInitialized() const {
- return _hasProtocolVersion && _hasConfigVersion && _hasSetName;
- }
-
- BSONObj ReplSetHeartbeatArgs::toBSON() const {
- invariant(isInitialized());
- BSONObjBuilder builder;
- builder.append("replSetHeartbeat", _setName);
- builder.appendIntOrLL("pv", _protocolVersion);
- builder.appendIntOrLL("v", _configVersion);
- builder.append("from", _hasSenderHost ? _senderHost.toString() : "");
-
- if (_hasSenderId) {
- builder.appendIntOrLL("fromId", _senderId);
- }
- if (_hasCheckEmpty) {
- builder.append("checkEmpty", _checkEmpty);
- }
- return builder.obj();
- }
-
- void ReplSetHeartbeatArgs::setCheckEmpty(bool newVal) {
- _checkEmpty = newVal;
- _hasCheckEmpty = true;
+ _hasSenderHost = true;
}
- void ReplSetHeartbeatArgs::setProtocolVersion(long long newVal) {
- _protocolVersion = newVal;
- _hasProtocolVersion = true;
- }
+ return Status::OK();
+}
- void ReplSetHeartbeatArgs::setConfigVersion(long long newVal) {
- _configVersion = newVal;
- _hasConfigVersion = true;
- }
+bool ReplSetHeartbeatArgs::isInitialized() const {
+ return _hasProtocolVersion && _hasConfigVersion && _hasSetName;
+}
- void ReplSetHeartbeatArgs::setSenderId(long long newVal) {
- _senderId = newVal;
- _hasSenderId = true;
- }
+BSONObj ReplSetHeartbeatArgs::toBSON() const {
+ invariant(isInitialized());
+ BSONObjBuilder builder;
+ builder.append("replSetHeartbeat", _setName);
+ builder.appendIntOrLL("pv", _protocolVersion);
+ builder.appendIntOrLL("v", _configVersion);
+ builder.append("from", _hasSenderHost ? _senderHost.toString() : "");
- void ReplSetHeartbeatArgs::setSetName(std::string newVal) {
- _setName = newVal;
- _hasSetName = true;
+ if (_hasSenderId) {
+ builder.appendIntOrLL("fromId", _senderId);
}
-
- void ReplSetHeartbeatArgs::setSenderHost(HostAndPort newVal) {
- _senderHost = newVal;
- _hasSenderHost = true;
+ if (_hasCheckEmpty) {
+ builder.append("checkEmpty", _checkEmpty);
}
+ return builder.obj();
+}
+
+void ReplSetHeartbeatArgs::setCheckEmpty(bool newVal) {
+ _checkEmpty = newVal;
+ _hasCheckEmpty = true;
+}
+
+void ReplSetHeartbeatArgs::setProtocolVersion(long long newVal) {
+ _protocolVersion = newVal;
+ _hasProtocolVersion = true;
+}
+
+void ReplSetHeartbeatArgs::setConfigVersion(long long newVal) {
+ _configVersion = newVal;
+ _hasConfigVersion = true;
+}
+
+void ReplSetHeartbeatArgs::setSenderId(long long newVal) {
+ _senderId = newVal;
+ _hasSenderId = true;
+}
+
+void ReplSetHeartbeatArgs::setSetName(std::string newVal) {
+ _setName = newVal;
+ _hasSetName = true;
+}
+
+void ReplSetHeartbeatArgs::setSenderHost(HostAndPort newVal) {
+ _senderHost = newVal;
+ _hasSenderHost = true;
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_heartbeat_args.h b/src/mongo/db/repl/repl_set_heartbeat_args.h
index 487be758524..f03e3260a04 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_args.h
+++ b/src/mongo/db/repl/repl_set_heartbeat_args.h
@@ -34,101 +34,125 @@
namespace mongo {
- class BSONObj;
- class Status;
+class BSONObj;
+class Status;
namespace repl {
+/**
+ * Arguments to the replSetHeartbeat command.
+ */
+class ReplSetHeartbeatArgs {
+public:
+ ReplSetHeartbeatArgs();
+
+ /**
+ * Initializes this ReplSetHeartbeatArgs from the contents of args.
+ */
+ Status initialize(const BSONObj& argsObj);
+
+ /**
+ * Returns true if all required fields have been initialized.
+ */
+ bool isInitialized() const;
+
+ /**
+ * Returns whether the sender would like to know whether the node is empty or not.
+ */
+ bool getCheckEmpty() const {
+ return _checkEmpty;
+ }
+
+ /**
+ * Gets the version of the Heartbeat protocol being used by the sender.
+ */
+ long long getProtocolVersion() const {
+ return _protocolVersion;
+ }
+
+ /**
+ * Gets the ReplSetConfig version number of the sender.
+ */
+ long long getConfigVersion() const {
+ return _configVersion;
+ }
+
+ /**
+ * Gets the _id of the sender in their ReplSetConfig.
+ */
+ long long getSenderId() const {
+ return _senderId;
+ }
+
+ /**
+ * Gets the replSet name of the sender's replica set.
+ */
+ std::string getSetName() const {
+ return _setName;
+ }
+
+ /**
+ * Gets the HostAndPort of the sender.
+ */
+ HostAndPort getSenderHost() const {
+ return _senderHost;
+ }
+
+ /**
+ * The below methods check whether or not value in the method name has been set.
+ */
+ bool hasCheckEmpty() {
+ return _hasCheckEmpty;
+ }
+ bool hasProtocolVersion() {
+ return _hasProtocolVersion;
+ }
+ bool hasConfigVersion() {
+ return _hasConfigVersion;
+ }
+ bool hasSenderId() {
+ return _hasSenderId;
+ }
+ bool hasSetName() {
+ return _hasSetName;
+ }
+ bool hasSenderHost() {
+ return _hasSenderHost;
+ }
+
+ /**
+ * The below methods set the value in the method name to 'newVal'.
+ */
+ void setCheckEmpty(bool newVal);
+ void setProtocolVersion(long long newVal);
+ void setConfigVersion(long long newVal);
+ void setSenderId(long long newVal);
+ void setSetName(std::string newVal);
+ void setSenderHost(HostAndPort newVal);
+
/**
- * Arguments to the replSetHeartbeat command.
+ * Returns a BSONified version of the object.
+ * Should only be called if the mandatory fields have been set.
+ * Optional fields are only included if they have been set.
*/
- class ReplSetHeartbeatArgs {
- public:
- ReplSetHeartbeatArgs();
-
- /**
- * Initializes this ReplSetHeartbeatArgs from the contents of args.
- */
- Status initialize(const BSONObj& argsObj);
-
- /**
- * Returns true if all required fields have been initialized.
- */
- bool isInitialized() const;
-
- /**
- * Returns whether the sender would like to know whether the node is empty or not.
- */
- bool getCheckEmpty() const { return _checkEmpty; }
-
- /**
- * Gets the version of the Heartbeat protocol being used by the sender.
- */
- long long getProtocolVersion() const { return _protocolVersion; }
-
- /**
- * Gets the ReplSetConfig version number of the sender.
- */
- long long getConfigVersion() const { return _configVersion; }
-
- /**
- * Gets the _id of the sender in their ReplSetConfig.
- */
- long long getSenderId() const { return _senderId; }
-
- /**
- * Gets the replSet name of the sender's replica set.
- */
- std::string getSetName() const { return _setName; }
-
- /**
- * Gets the HostAndPort of the sender.
- */
- HostAndPort getSenderHost() const { return _senderHost; }
-
- /**
- * The below methods check whether or not value in the method name has been set.
- */
- bool hasCheckEmpty() { return _hasCheckEmpty; }
- bool hasProtocolVersion() { return _hasProtocolVersion; }
- bool hasConfigVersion() { return _hasConfigVersion; }
- bool hasSenderId() { return _hasSenderId; }
- bool hasSetName() { return _hasSetName; }
- bool hasSenderHost() { return _hasSenderHost; }
-
- /**
- * The below methods set the value in the method name to 'newVal'.
- */
- void setCheckEmpty(bool newVal);
- void setProtocolVersion(long long newVal);
- void setConfigVersion(long long newVal);
- void setSenderId(long long newVal);
- void setSetName(std::string newVal);
- void setSenderHost(HostAndPort newVal);
-
- /**
- * Returns a BSONified version of the object.
- * Should only be called if the mandatory fields have been set.
- * Optional fields are only included if they have been set.
- */
- BSONObj toBSON() const;
-
- private:
- bool _hasCheckEmpty;
- bool _hasProtocolVersion;
- bool _hasConfigVersion;
- bool _hasSenderId;
- bool _hasSetName;
- bool _hasSenderHost;
-
- // look at the body of the isInitialized() function to see which fields are mandatory
- bool _checkEmpty;
- long long _protocolVersion;
- long long _configVersion;
- long long _senderId;
- std::string _setName;
- HostAndPort _senderHost;
- };
-
-} // namespace repl
-} // namespace mongo
+ BSONObj toBSON() const;
+
+private:
+ bool _hasCheckEmpty;
+ bool _hasProtocolVersion;
+ bool _hasConfigVersion;
+ bool _hasSenderId;
+ bool _hasSetName;
+ bool _hasSenderHost;
+
+ // look at the body of the isInitialized() function to see which fields are mandatory
+ bool _checkEmpty;
+ long long _protocolVersion;
+ long long _configVersion;
+ long long _senderId;
+ std::string _setName;
+ HostAndPort _senderHost;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_heartbeat_response.cpp b/src/mongo/db/repl/repl_set_heartbeat_response.cpp
index 4e968ef0175..5d27f8b222d 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_response.cpp
+++ b/src/mongo/db/repl/repl_set_heartbeat_response.cpp
@@ -44,315 +44,308 @@ namespace mongo {
namespace repl {
namespace {
- const std::string kOkFieldName = "ok";
- const std::string kErrMsgFieldName = "errmsg";
- const std::string kErrorCodeFieldName = "code";
- const std::string kOpTimeFieldName = "opTime";
- const std::string kTimeFieldName = "time";
- const std::string kElectionTimeFieldName = "electionTime";
- const std::string kConfigFieldName = "config";
- const std::string kIsElectableFieldName = "e";
- const std::string kMismatchFieldName = "mismatch";
- const std::string kIsReplSetFieldName = "rs";
- const std::string kHasStateDisagreementFieldName = "stateDisagreement";
- const std::string kMemberStateFieldName = "state";
- const std::string kConfigVersionFieldName = "v";
- const std::string kHbMessageFieldName = "hbmsg";
- const std::string kReplSetFieldName = "set";
- const std::string kSyncSourceFieldName = "syncingTo";
- const std::string kHasDataFieldName = "hasData";
+const std::string kOkFieldName = "ok";
+const std::string kErrMsgFieldName = "errmsg";
+const std::string kErrorCodeFieldName = "code";
+const std::string kOpTimeFieldName = "opTime";
+const std::string kTimeFieldName = "time";
+const std::string kElectionTimeFieldName = "electionTime";
+const std::string kConfigFieldName = "config";
+const std::string kIsElectableFieldName = "e";
+const std::string kMismatchFieldName = "mismatch";
+const std::string kIsReplSetFieldName = "rs";
+const std::string kHasStateDisagreementFieldName = "stateDisagreement";
+const std::string kMemberStateFieldName = "state";
+const std::string kConfigVersionFieldName = "v";
+const std::string kHbMessageFieldName = "hbmsg";
+const std::string kReplSetFieldName = "set";
+const std::string kSyncSourceFieldName = "syncingTo";
+const std::string kHasDataFieldName = "hasData";
} // namespace
- ReplSetHeartbeatResponse::ReplSetHeartbeatResponse() :
- _electionTimeSet(false),
- _timeSet(false),
- _time(0),
- _opTimeSet(false),
- _electableSet(false),
- _electable(false),
- _hasDataSet(false),
- _hasData(false),
- _mismatch(false),
- _isReplSet(false),
- _stateDisagreement(false),
- _stateSet(false),
- _version(-1),
- _configSet(false)
- {}
-
- void ReplSetHeartbeatResponse::addToBSON(BSONObjBuilder* builder) const {
- if (_mismatch) {
- *builder << kOkFieldName << 0.0;
- *builder << kMismatchFieldName << _mismatch;
- return;
- }
-
- builder->append(kOkFieldName, 1.0);
- if (_opTimeSet) {
- builder->appendDate(kOpTimeFieldName, _opTime.asDate());
- }
- if (_timeSet) {
- *builder << kTimeFieldName << _time.total_seconds();
- }
- if (_electionTimeSet) {
- builder->appendDate(kElectionTimeFieldName, _electionTime.asDate());
- }
- if (_configSet) {
- *builder << kConfigFieldName << _config.toBSON();
- }
- if (_electableSet) {
- *builder << kIsElectableFieldName << _electable;
- }
- if (_isReplSet) {
- *builder << "rs" << _isReplSet;
- }
- if (_stateDisagreement) {
- *builder << kHasStateDisagreementFieldName << _stateDisagreement;
- }
- if (_stateSet) {
- builder->appendIntOrLL(kMemberStateFieldName, _state.s);
- }
- if (_version != -1) {
- *builder << kConfigVersionFieldName << _version;
- }
- *builder << kHbMessageFieldName << _hbmsg;
- if (!_setName.empty()) {
- *builder << kReplSetFieldName << _setName;
- }
- if (!_syncingTo.empty()) {
- *builder << kSyncSourceFieldName << _syncingTo;
- }
- if (_hasDataSet) {
- builder->append(kHasDataFieldName, _hasData);
- }
+ReplSetHeartbeatResponse::ReplSetHeartbeatResponse()
+ : _electionTimeSet(false),
+ _timeSet(false),
+ _time(0),
+ _opTimeSet(false),
+ _electableSet(false),
+ _electable(false),
+ _hasDataSet(false),
+ _hasData(false),
+ _mismatch(false),
+ _isReplSet(false),
+ _stateDisagreement(false),
+ _stateSet(false),
+ _version(-1),
+ _configSet(false) {}
+
+void ReplSetHeartbeatResponse::addToBSON(BSONObjBuilder* builder) const {
+ if (_mismatch) {
+ *builder << kOkFieldName << 0.0;
+ *builder << kMismatchFieldName << _mismatch;
+ return;
}
- BSONObj ReplSetHeartbeatResponse::toBSON() const {
- BSONObjBuilder builder;
- addToBSON(&builder);
- return builder.obj();
+ builder->append(kOkFieldName, 1.0);
+ if (_opTimeSet) {
+ builder->appendDate(kOpTimeFieldName, _opTime.asDate());
+ }
+ if (_timeSet) {
+ *builder << kTimeFieldName << _time.total_seconds();
+ }
+ if (_electionTimeSet) {
+ builder->appendDate(kElectionTimeFieldName, _electionTime.asDate());
+ }
+ if (_configSet) {
+ *builder << kConfigFieldName << _config.toBSON();
+ }
+ if (_electableSet) {
+ *builder << kIsElectableFieldName << _electable;
+ }
+ if (_isReplSet) {
+ *builder << "rs" << _isReplSet;
+ }
+ if (_stateDisagreement) {
+ *builder << kHasStateDisagreementFieldName << _stateDisagreement;
+ }
+ if (_stateSet) {
+ builder->appendIntOrLL(kMemberStateFieldName, _state.s);
+ }
+ if (_version != -1) {
+ *builder << kConfigVersionFieldName << _version;
+ }
+ *builder << kHbMessageFieldName << _hbmsg;
+ if (!_setName.empty()) {
+ *builder << kReplSetFieldName << _setName;
+ }
+ if (!_syncingTo.empty()) {
+ *builder << kSyncSourceFieldName << _syncingTo;
+ }
+ if (_hasDataSet) {
+ builder->append(kHasDataFieldName, _hasData);
+ }
+}
+
+BSONObj ReplSetHeartbeatResponse::toBSON() const {
+ BSONObjBuilder builder;
+ addToBSON(&builder);
+ return builder.obj();
+}
+
+Status ReplSetHeartbeatResponse::initialize(const BSONObj& doc) {
+ // Old versions set this even though they returned not "ok"
+ _mismatch = doc[kMismatchFieldName].trueValue();
+ if (_mismatch)
+ return Status(ErrorCodes::InconsistentReplicaSetNames, "replica set name doesn't match.");
+
+ // Old versions sometimes set the replica set name ("set") but ok:0
+ const BSONElement replSetNameElement = doc[kReplSetFieldName];
+ if (replSetNameElement.eoo()) {
+ _setName.clear();
+ } else if (replSetNameElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kReplSetFieldName
+ << "\" field in response to replSetHeartbeat to have "
+ "type String, but found "
+ << typeName(replSetNameElement.type()));
+ } else {
+ _setName = replSetNameElement.String();
}
- Status ReplSetHeartbeatResponse::initialize(const BSONObj& doc) {
-
- // Old versions set this even though they returned not "ok"
- _mismatch = doc[kMismatchFieldName].trueValue();
- if (_mismatch)
- return Status(ErrorCodes::InconsistentReplicaSetNames,
- "replica set name doesn't match.");
-
- // Old versions sometimes set the replica set name ("set") but ok:0
- const BSONElement replSetNameElement = doc[kReplSetFieldName];
- if (replSetNameElement.eoo()) {
- _setName.clear();
- }
- else if (replSetNameElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kReplSetFieldName << "\" field in response to replSetHeartbeat to have "
- "type String, but found " << typeName(replSetNameElement.type()));
- }
- else {
- _setName = replSetNameElement.String();
- }
-
- if (_setName.empty() && !doc[kOkFieldName].trueValue()) {
- std::string errMsg = doc[kErrMsgFieldName].str();
-
- BSONElement errCodeElem = doc[kErrorCodeFieldName];
- if (errCodeElem.ok()) {
- if (!errCodeElem.isNumber())
- return Status(ErrorCodes::BadValue, "Error code is not a number!");
-
- int errorCode = errCodeElem.numberInt();
- return Status(ErrorCodes::Error(errorCode), errMsg);
- }
- return Status(ErrorCodes::UnknownError, errMsg);
- }
-
- const BSONElement hasDataElement = doc[kHasDataFieldName];
- _hasDataSet = !hasDataElement.eoo();
- _hasData = hasDataElement.trueValue();
-
- const BSONElement electionTimeElement = doc[kElectionTimeFieldName];
- if (electionTimeElement.eoo()) {
- _electionTimeSet = false;
- }
- else if (electionTimeElement.type() == Timestamp) {
- _electionTimeSet = true;
- _electionTime = electionTimeElement._opTime();
- }
- else if (electionTimeElement.type() == Date) {
- _electionTimeSet = true;
- _electionTime = OpTime(electionTimeElement.date());
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kElectionTimeFieldName << "\" field in response to replSetHeartbeat "
- "command to have type Date or Timestamp, but found type " <<
- typeName(electionTimeElement.type()));
- }
-
- const BSONElement timeElement = doc[kTimeFieldName];
- if (timeElement.eoo()) {
- _timeSet = false;
- }
- else if (timeElement.isNumber()) {
- _timeSet = true;
- _time = Seconds(timeElement.numberLong());
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kTimeFieldName << "\" field in response to replSetHeartbeat "
- "command to have a numeric type, but found type " <<
- typeName(timeElement.type()));
- }
-
- const BSONElement opTimeElement = doc[kOpTimeFieldName];
- if (opTimeElement.eoo()) {
- _opTimeSet = false;
- }
- else if (opTimeElement.type() == Timestamp) {
- _opTimeSet = true;
- _opTime = opTimeElement._opTime();
- }
- else if (opTimeElement.type() == Date) {
- _opTimeSet = true;
- _opTime = OpTime(opTimeElement.date());
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kOpTimeFieldName << "\" field in response to replSetHeartbeat "
- "command to have type Date or Timestamp, but found type " <<
- typeName(opTimeElement.type()));
- }
-
- const BSONElement electableElement = doc[kIsElectableFieldName];
- if (electableElement.eoo()) {
- _electableSet = false;
- }
- else {
- _electableSet = true;
- _electable = electableElement.trueValue();
- }
+ if (_setName.empty() && !doc[kOkFieldName].trueValue()) {
+ std::string errMsg = doc[kErrMsgFieldName].str();
- _isReplSet = doc[kIsReplSetFieldName].trueValue();
+ BSONElement errCodeElem = doc[kErrorCodeFieldName];
+ if (errCodeElem.ok()) {
+ if (!errCodeElem.isNumber())
+ return Status(ErrorCodes::BadValue, "Error code is not a number!");
- const BSONElement memberStateElement = doc[kMemberStateFieldName];
- if (memberStateElement.eoo()) {
- _stateSet = false;
+ int errorCode = errCodeElem.numberInt();
+ return Status(ErrorCodes::Error(errorCode), errMsg);
}
- else if (memberStateElement.type() != NumberInt &&
- memberStateElement.type() != NumberLong) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kMemberStateFieldName << "\" field in response to replSetHeartbeat "
- "command to have type NumberInt or NumberLong, but found type " <<
- typeName(memberStateElement.type()));
- }
- else {
- long long stateInt = memberStateElement.numberLong();
- if (stateInt < 0 || stateInt > MemberState::RS_MAX) {
- return Status(ErrorCodes::BadValue, str::stream() << "Value for \"" <<
- kMemberStateFieldName << "\" in response to replSetHeartbeat is "
- "out of range; legal values are non-negative and no more than " <<
- MemberState::RS_MAX);
- }
- _stateSet = true;
- _state = MemberState(static_cast<int>(stateInt));
- }
-
- _stateDisagreement = doc[kHasStateDisagreementFieldName].trueValue();
+ return Status(ErrorCodes::UnknownError, errMsg);
+ }
+ const BSONElement hasDataElement = doc[kHasDataFieldName];
+ _hasDataSet = !hasDataElement.eoo();
+ _hasData = hasDataElement.trueValue();
+
+ const BSONElement electionTimeElement = doc[kElectionTimeFieldName];
+ if (electionTimeElement.eoo()) {
+ _electionTimeSet = false;
+ } else if (electionTimeElement.type() == Timestamp) {
+ _electionTimeSet = true;
+ _electionTime = electionTimeElement._opTime();
+ } else if (electionTimeElement.type() == Date) {
+ _electionTimeSet = true;
+ _electionTime = OpTime(electionTimeElement.date());
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kElectionTimeFieldName
+ << "\" field in response to replSetHeartbeat "
+ "command to have type Date or Timestamp, but found type "
+ << typeName(electionTimeElement.type()));
+ }
- // Not required for the case of uninitialized members -- they have no config
- const BSONElement versionElement = doc[kConfigVersionFieldName];
+ const BSONElement timeElement = doc[kTimeFieldName];
+ if (timeElement.eoo()) {
+ _timeSet = false;
+ } else if (timeElement.isNumber()) {
+ _timeSet = true;
+ _time = Seconds(timeElement.numberLong());
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kTimeFieldName
+ << "\" field in response to replSetHeartbeat "
+ "command to have a numeric type, but found type "
+ << typeName(timeElement.type()));
+ }
- // If we have an optime then we must have a version
- if (_opTimeSet && versionElement.eoo()) {
- return Status(ErrorCodes::NoSuchKey, str::stream() <<
- "Response to replSetHeartbeat missing required \"" <<
- kConfigVersionFieldName << "\" field even though initialized");
- }
+ const BSONElement opTimeElement = doc[kOpTimeFieldName];
+ if (opTimeElement.eoo()) {
+ _opTimeSet = false;
+ } else if (opTimeElement.type() == Timestamp) {
+ _opTimeSet = true;
+ _opTime = opTimeElement._opTime();
+ } else if (opTimeElement.type() == Date) {
+ _opTimeSet = true;
+ _opTime = OpTime(opTimeElement.date());
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kOpTimeFieldName
+ << "\" field in response to replSetHeartbeat "
+ "command to have type Date or Timestamp, but found type "
+ << typeName(opTimeElement.type()));
+ }
- // If there is a "v" (config version) then it must be an int.
- if (!versionElement.eoo() && versionElement.type() != NumberInt) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kConfigVersionFieldName <<
- "\" field in response to replSetHeartbeat to have "
- "type NumberInt, but found " << typeName(versionElement.type()));
- }
- _version = versionElement.numberInt();
+ const BSONElement electableElement = doc[kIsElectableFieldName];
+ if (electableElement.eoo()) {
+ _electableSet = false;
+ } else {
+ _electableSet = true;
+ _electable = electableElement.trueValue();
+ }
- const BSONElement hbMsgElement = doc[kHbMessageFieldName];
- if (hbMsgElement.eoo()) {
- _hbmsg.clear();
- }
- else if (hbMsgElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kHbMessageFieldName << "\" field in response to replSetHeartbeat to have "
- "type String, but found " << typeName(hbMsgElement.type()));
- }
- else {
- _hbmsg = hbMsgElement.String();
- }
+ _isReplSet = doc[kIsReplSetFieldName].trueValue();
+
+ const BSONElement memberStateElement = doc[kMemberStateFieldName];
+ if (memberStateElement.eoo()) {
+ _stateSet = false;
+ } else if (memberStateElement.type() != NumberInt && memberStateElement.type() != NumberLong) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream()
+ << "Expected \"" << kMemberStateFieldName
+ << "\" field in response to replSetHeartbeat "
+ "command to have type NumberInt or NumberLong, but found type "
+ << typeName(memberStateElement.type()));
+ } else {
+ long long stateInt = memberStateElement.numberLong();
+ if (stateInt < 0 || stateInt > MemberState::RS_MAX) {
+ return Status(ErrorCodes::BadValue,
+ str::stream()
+ << "Value for \"" << kMemberStateFieldName
+ << "\" in response to replSetHeartbeat is "
+ "out of range; legal values are non-negative and no more than "
+ << MemberState::RS_MAX);
+ }
+ _stateSet = true;
+ _state = MemberState(static_cast<int>(stateInt));
+ }
- const BSONElement syncingToElement = doc[kSyncSourceFieldName];
- if (syncingToElement.eoo()) {
- _syncingTo.clear();
- }
- else if (syncingToElement.type() != String) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kSyncSourceFieldName << "\" field in response to replSetHeartbeat to "
- "have type String, but found " << typeName(syncingToElement.type()));
- }
- else {
- _syncingTo = syncingToElement.String();
- }
+ _stateDisagreement = doc[kHasStateDisagreementFieldName].trueValue();
- const BSONElement rsConfigElement = doc[kConfigFieldName];
- if (rsConfigElement.eoo()) {
- _configSet = false;
- _config = ReplicaSetConfig();
- return Status::OK();
- }
- else if (rsConfigElement.type() != Object) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected \"" <<
- kConfigFieldName << "\" in response to replSetHeartbeat to have type "
- "Object, but found " << typeName(rsConfigElement.type()));
- }
- _configSet = true;
- return _config.initialize(rsConfigElement.Obj());
- }
- MemberState ReplSetHeartbeatResponse::getState() const {
- invariant(_stateSet);
- return _state;
- }
+ // Not required for the case of uninitialized members -- they have no config
+ const BSONElement versionElement = doc[kConfigVersionFieldName];
- OpTime ReplSetHeartbeatResponse::getElectionTime() const {
- invariant(_electionTimeSet);
- return _electionTime;
+ // If we have an optime then we must have a version
+ if (_opTimeSet && versionElement.eoo()) {
+ return Status(ErrorCodes::NoSuchKey,
+ str::stream() << "Response to replSetHeartbeat missing required \""
+ << kConfigVersionFieldName
+ << "\" field even though initialized");
}
- bool ReplSetHeartbeatResponse::isElectable() const {
- invariant(_electableSet);
- return _electable;
+ // If there is a "v" (config version) then it must be an int.
+ if (!versionElement.eoo() && versionElement.type() != NumberInt) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kConfigVersionFieldName
+ << "\" field in response to replSetHeartbeat to have "
+ "type NumberInt, but found "
+ << typeName(versionElement.type()));
}
-
- Seconds ReplSetHeartbeatResponse::getTime() const {
- invariant(_timeSet);
- return _time;
+ _version = versionElement.numberInt();
+
+ const BSONElement hbMsgElement = doc[kHbMessageFieldName];
+ if (hbMsgElement.eoo()) {
+ _hbmsg.clear();
+ } else if (hbMsgElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kHbMessageFieldName
+ << "\" field in response to replSetHeartbeat to have "
+ "type String, but found " << typeName(hbMsgElement.type()));
+ } else {
+ _hbmsg = hbMsgElement.String();
}
- OpTime ReplSetHeartbeatResponse::getOpTime() const {
- invariant(_opTimeSet);
- return _opTime;
+ const BSONElement syncingToElement = doc[kSyncSourceFieldName];
+ if (syncingToElement.eoo()) {
+ _syncingTo.clear();
+ } else if (syncingToElement.type() != String) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kSyncSourceFieldName
+ << "\" field in response to replSetHeartbeat to "
+ "have type String, but found "
+ << typeName(syncingToElement.type()));
+ } else {
+ _syncingTo = syncingToElement.String();
}
- const ReplicaSetConfig& ReplSetHeartbeatResponse::getConfig() const {
- invariant(_configSet);
- return _config;
+ const BSONElement rsConfigElement = doc[kConfigFieldName];
+ if (rsConfigElement.eoo()) {
+ _configSet = false;
+ _config = ReplicaSetConfig();
+ return Status::OK();
+ } else if (rsConfigElement.type() != Object) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected \"" << kConfigFieldName
+ << "\" in response to replSetHeartbeat to have type "
+ "Object, but found " << typeName(rsConfigElement.type()));
}
-
-} // namespace repl
-} // namespace mongo
+ _configSet = true;
+ return _config.initialize(rsConfigElement.Obj());
+}
+
+MemberState ReplSetHeartbeatResponse::getState() const {
+ invariant(_stateSet);
+ return _state;
+}
+
+OpTime ReplSetHeartbeatResponse::getElectionTime() const {
+ invariant(_electionTimeSet);
+ return _electionTime;
+}
+
+bool ReplSetHeartbeatResponse::isElectable() const {
+ invariant(_electableSet);
+ return _electable;
+}
+
+Seconds ReplSetHeartbeatResponse::getTime() const {
+ invariant(_timeSet);
+ return _time;
+}
+
+OpTime ReplSetHeartbeatResponse::getOpTime() const {
+ invariant(_opTimeSet);
+ return _opTime;
+}
+
+const ReplicaSetConfig& ReplSetHeartbeatResponse::getConfig() const {
+ invariant(_configSet);
+ return _config;
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_heartbeat_response.h b/src/mongo/db/repl/repl_set_heartbeat_response.h
index a5629fbc3bf..cb555cca825 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_response.h
+++ b/src/mongo/db/repl/repl_set_heartbeat_response.h
@@ -36,165 +36,231 @@
namespace mongo {
- class BSONObj;
- class BSONObjBuilder;
- class Status;
+class BSONObj;
+class BSONObjBuilder;
+class Status;
namespace repl {
+/**
+ * Response structure for the replSetHeartbeat command.
+ */
+class ReplSetHeartbeatResponse {
+public:
+ ReplSetHeartbeatResponse();
+
+ /**
+ * Initializes this ReplSetHeartbeatResponse from the contents of "doc".
+ */
+ Status initialize(const BSONObj& doc);
+
+ /**
+ * Appends all non-default values to "builder".
+ */
+ void addToBSON(BSONObjBuilder* builder) const;
+
+ /**
+ * Returns a BSONObj consisting of all non-default values to "builder".
+ */
+ BSONObj toBSON() const;
+
+ /**
+ * Returns toBSON().toString()
+ */
+ const std::string toString() const {
+ return toBSON().toString();
+ }
+
+ bool hasDataSet() const {
+ return _hasDataSet;
+ }
+ bool hasData() const {
+ return _hasData;
+ }
+ bool isMismatched() const {
+ return _mismatch;
+ }
+ bool isReplSet() const {
+ return _isReplSet;
+ }
+ bool isStateDisagreement() const {
+ return _stateDisagreement;
+ }
+ const std::string& getReplicaSetName() const {
+ return _setName;
+ }
+ bool hasState() const {
+ return _stateSet;
+ }
+ MemberState getState() const;
+ bool hasElectionTime() const {
+ return _electionTimeSet;
+ }
+ OpTime getElectionTime() const;
+ bool hasIsElectable() const {
+ return _electableSet;
+ }
+ bool isElectable() const;
+ const std::string& getHbMsg() const {
+ return _hbmsg;
+ }
+ bool hasTime() const {
+ return _timeSet;
+ }
+ Seconds getTime() const;
+ bool hasOpTime() const {
+ return _opTimeSet;
+ }
+ OpTime getOpTime() const;
+ const std::string& getSyncingTo() const {
+ return _syncingTo;
+ }
+ int getVersion() const {
+ return _version;
+ }
+ bool hasConfig() const {
+ return _configSet;
+ }
+ const ReplicaSetConfig& getConfig() const;
+
+ /**
+ * Sets _mismatch to true.
+ */
+ void noteMismatched() {
+ _mismatch = true;
+ }
+
+ /**
+ * Sets _isReplSet to true.
+ */
+ void noteReplSet() {
+ _isReplSet = true;
+ }
+
+ /**
+ * Sets _stateDisagreement to true.
+ */
+ void noteStateDisagreement() {
+ _stateDisagreement = true;
+ }
+
+ /**
+ * Sets _hasData to true, and _hasDataSet to true to indicate _hasData has been modified
+ */
+ void noteHasData() {
+ _hasDataSet = _hasData = true;
+ }
+
+ /**
+ * Sets _setName to "name".
+ */
+ void setSetName(std::string name) {
+ _setName = name;
+ }
+
+ /**
+ * Sets _state to "state".
+ */
+ void setState(MemberState state) {
+ _stateSet = true;
+ _state = state;
+ }
+
/**
- * Response structure for the replSetHeartbeat command.
- */
- class ReplSetHeartbeatResponse {
- public:
- ReplSetHeartbeatResponse();
-
- /**
- * Initializes this ReplSetHeartbeatResponse from the contents of "doc".
- */
- Status initialize(const BSONObj& doc);
-
- /**
- * Appends all non-default values to "builder".
- */
- void addToBSON(BSONObjBuilder* builder) const;
-
- /**
- * Returns a BSONObj consisting of all non-default values to "builder".
- */
- BSONObj toBSON() const;
-
- /**
- * Returns toBSON().toString()
- */
- const std::string toString() const { return toBSON().toString(); }
-
- bool hasDataSet() const { return _hasDataSet; }
- bool hasData() const { return _hasData; }
- bool isMismatched() const { return _mismatch; }
- bool isReplSet() const { return _isReplSet; }
- bool isStateDisagreement() const { return _stateDisagreement; }
- const std::string& getReplicaSetName() const { return _setName; }
- bool hasState() const { return _stateSet; }
- MemberState getState() const;
- bool hasElectionTime() const { return _electionTimeSet; }
- OpTime getElectionTime() const;
- bool hasIsElectable() const { return _electableSet; }
- bool isElectable() const;
- const std::string& getHbMsg() const { return _hbmsg; }
- bool hasTime() const { return _timeSet; }
- Seconds getTime() const;
- bool hasOpTime() const { return _opTimeSet; }
- OpTime getOpTime() const;
- const std::string& getSyncingTo() const { return _syncingTo; }
- int getVersion() const { return _version; }
- bool hasConfig() const { return _configSet; }
- const ReplicaSetConfig& getConfig() const;
-
- /**
- * Sets _mismatch to true.
- */
- void noteMismatched() { _mismatch = true; }
-
- /**
- * Sets _isReplSet to true.
- */
- void noteReplSet() { _isReplSet = true; }
-
- /**
- * Sets _stateDisagreement to true.
- */
- void noteStateDisagreement() { _stateDisagreement = true; }
-
- /**
- * Sets _hasData to true, and _hasDataSet to true to indicate _hasData has been modified
- */
- void noteHasData() { _hasDataSet = _hasData = true;}
-
- /**
- * Sets _setName to "name".
- */
- void setSetName(std::string name) { _setName = name; }
-
- /**
- * Sets _state to "state".
- */
- void setState(MemberState state) { _stateSet = true; _state = state; }
-
- /**
- * Sets the optional "electionTime" field to the given OpTime.
- */
- void setElectionTime(OpTime time) { _electionTimeSet = true; _electionTime = time; }
-
- /**
- * Sets _electable to "electable" and sets _electableSet to true to indicate
- * that the value of _electable has been modified.
- */
- void setElectable(bool electable) { _electableSet = true; _electable = electable; }
-
- /**
- * Sets _hbmsg to "hbmsg".
- */
- void setHbMsg(std::string hbmsg) { _hbmsg = hbmsg; }
-
- /**
- * Sets the optional "time" field of the response to "theTime", which is
- * a count of seconds since the UNIX epoch.
- */
- void setTime(Seconds theTime) { _timeSet = true; _time = theTime; }
-
- /**
- * Sets _opTime to "time" and sets _opTimeSet to true to indicate that the value
- * of _opTime has been modified.
- */
- void setOpTime(OpTime time) { _opTimeSet = true; _opTime = time; }
-
- /**
- * Sets _syncingTo to "syncingTo".
- */
- void setSyncingTo(std::string syncingTo) { _syncingTo = syncingTo; }
-
- /**
- * Sets _version to "version".
- */
- void setVersion(int version) { _version = version; }
-
- /**
- * Initializes _config with "config".
- */
- void setConfig(const ReplicaSetConfig& config) { _configSet = true; _config = config; }
-
- private:
- bool _electionTimeSet;
- OpTime _electionTime;
-
- bool _timeSet;
- Seconds _time; // Seconds since UNIX epoch.
-
- bool _opTimeSet;
- OpTime _opTime;
-
- bool _electableSet;
- bool _electable;
-
- bool _hasDataSet;
- bool _hasData;
-
- bool _mismatch;
- bool _isReplSet;
- bool _stateDisagreement;
-
- bool _stateSet;
- MemberState _state;
-
- int _version;
- std::string _setName;
- std::string _hbmsg;
- std::string _syncingTo;
-
- bool _configSet;
- ReplicaSetConfig _config;
- };
-
-} // namespace repl
-} // namespace mongo
+ * Sets the optional "electionTime" field to the given OpTime.
+ */
+ void setElectionTime(OpTime time) {
+ _electionTimeSet = true;
+ _electionTime = time;
+ }
+
+ /**
+ * Sets _electable to "electable" and sets _electableSet to true to indicate
+ * that the value of _electable has been modified.
+ */
+ void setElectable(bool electable) {
+ _electableSet = true;
+ _electable = electable;
+ }
+
+ /**
+ * Sets _hbmsg to "hbmsg".
+ */
+ void setHbMsg(std::string hbmsg) {
+ _hbmsg = hbmsg;
+ }
+
+ /**
+ * Sets the optional "time" field of the response to "theTime", which is
+ * a count of seconds since the UNIX epoch.
+ */
+ void setTime(Seconds theTime) {
+ _timeSet = true;
+ _time = theTime;
+ }
+
+ /**
+ * Sets _opTime to "time" and sets _opTimeSet to true to indicate that the value
+ * of _opTime has been modified.
+ */
+ void setOpTime(OpTime time) {
+ _opTimeSet = true;
+ _opTime = time;
+ }
+
+ /**
+ * Sets _syncingTo to "syncingTo".
+ */
+ void setSyncingTo(std::string syncingTo) {
+ _syncingTo = syncingTo;
+ }
+
+ /**
+ * Sets _version to "version".
+ */
+ void setVersion(int version) {
+ _version = version;
+ }
+
+ /**
+ * Initializes _config with "config".
+ */
+ void setConfig(const ReplicaSetConfig& config) {
+ _configSet = true;
+ _config = config;
+ }
+
+private:
+ bool _electionTimeSet;
+ OpTime _electionTime;
+
+ bool _timeSet;
+ Seconds _time; // Seconds since UNIX epoch.
+
+ bool _opTimeSet;
+ OpTime _opTime;
+
+ bool _electableSet;
+ bool _electable;
+
+ bool _hasDataSet;
+ bool _hasData;
+
+ bool _mismatch;
+ bool _isReplSet;
+ bool _stateDisagreement;
+
+ bool _stateSet;
+ MemberState _state;
+
+ int _version;
+ std::string _setName;
+ std::string _hbmsg;
+ std::string _syncingTo;
+
+ bool _configSet;
+ ReplicaSetConfig _config;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp b/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp
index 71003ab4fec..a2791084d00 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp
+++ b/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp
@@ -38,722 +38,733 @@ namespace mongo {
namespace repl {
namespace {
- using boost::scoped_ptr;
- using std::auto_ptr;
-
- bool stringContains(const std::string &haystack, const std::string& needle) {
- return haystack.find(needle) != std::string::npos;
- }
-
- TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
- int fieldsSet = 2;
- ReplSetHeartbeatResponse hbResponse;
- ReplSetHeartbeatResponse hbResponseObjRoundTripChecker;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(false, hbResponse.hasElectionTime());
- ASSERT_EQUALS(false, hbResponse.hasIsElectable());
- ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(false, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS("", hbResponse.getSyncingTo());
- ASSERT_EQUALS(-1, hbResponse.getVersion());
-
- BSONObj hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
-
- Status initializeResult = Status::OK();
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
-
- // set version
- hbResponse.setVersion(1);
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(false, hbResponse.hasElectionTime());
- ASSERT_EQUALS(false, hbResponse.hasIsElectable());
- ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(false, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS("", hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getVersion());
-
- hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
-
- // set setname
- hbResponse.setSetName("rs0");
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(false, hbResponse.hasElectionTime());
- ASSERT_EQUALS(false, hbResponse.hasIsElectable());
- ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(false, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS("", hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getVersion());
-
- hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
-
- // set electionTime
- hbResponse.setElectionTime(OpTime(10,0));
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(false, hbResponse.hasIsElectable());
- ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(false, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS("", hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getVersion());
- ASSERT_EQUALS(OpTime(10,0), hbResponse.getElectionTime());
-
- hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(OpTime(10,0), hbResponseObj["electionTime"]._opTime());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
-
- // set opTime
- hbResponse.setOpTime(Date_t(10));
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(false, hbResponse.hasIsElectable());
- ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS("", hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getVersion());
- ASSERT_EQUALS(OpTime(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponse.getOpTime());
-
- hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(OpTime(10,0), hbResponseObj["electionTime"]._opTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponseObj["opTime"]._opTime());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
-
- // set time
- hbResponse.setTime(Seconds(10));
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(false, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS("", hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getVersion());
- ASSERT_EQUALS(OpTime(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponse.getOpTime());
- ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
-
- hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(OpTime(10,0), hbResponseObj["electionTime"]._opTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponseObj["opTime"]._opTime());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
-
- // set electable
- hbResponse.setElectable(true);
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(false, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS("", hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getVersion());
- ASSERT_EQUALS(OpTime(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponse.getOpTime());
- ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
- ASSERT_EQUALS(true, hbResponse.isElectable());
-
- hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(OpTime(10,0), hbResponseObj["electionTime"]._opTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponseObj["opTime"]._opTime());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
-
- // set config
- ReplicaSetConfig config;
- hbResponse.setConfig(config);
- ++fieldsSet;
- ASSERT_EQUALS(false, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS("", hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getVersion());
- ASSERT_EQUALS(OpTime(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponse.getOpTime());
- ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(OpTime(10,0), hbResponseObj["electionTime"]._opTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponseObj["opTime"]._opTime());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
-
- // set state
- hbResponse.setState(MemberState(MemberState::RS_SECONDARY));
- ++fieldsSet;
- ASSERT_EQUALS(true, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- hbResponse.getState().toString());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS("", hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getVersion());
- ASSERT_EQUALS(OpTime(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponse.getOpTime());
- ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(OpTime(10,0), hbResponseObj["electionTime"]._opTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponseObj["opTime"]._opTime());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
- ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
-
- // set stateDisagreement
- hbResponse.noteStateDisagreement();
- ++fieldsSet;
- ASSERT_EQUALS(true, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(false, hbResponse.isReplSet());
- ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- hbResponse.getState().toString());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS("", hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getVersion());
- ASSERT_EQUALS(OpTime(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponse.getOpTime());
- ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(OpTime(10,0), hbResponseObj["electionTime"]._opTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponseObj["opTime"]._opTime());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
- ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
- ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
-
- // set replSet
- hbResponse.noteReplSet();
- ++fieldsSet;
- ASSERT_EQUALS(true, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(true, hbResponse.isReplSet());
- ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- hbResponse.getState().toString());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS("", hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getVersion());
- ASSERT_EQUALS(OpTime(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponse.getOpTime());
- ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(OpTime(10,0), hbResponseObj["electionTime"]._opTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponseObj["opTime"]._opTime());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
- ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
- ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["rs"].trueValue());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
-
- // set syncingTo
- hbResponse.setSyncingTo("syncTarget");
- ++fieldsSet;
- ASSERT_EQUALS(true, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(true, hbResponse.isReplSet());
- ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- hbResponse.getState().toString());
- ASSERT_EQUALS("", hbResponse.getHbMsg());
- ASSERT_EQUALS("syncTarget", hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getVersion());
- ASSERT_EQUALS(OpTime(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponse.getOpTime());
- ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(OpTime(10,0), hbResponseObj["electionTime"]._opTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponseObj["opTime"]._opTime());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
- ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
- ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["rs"].trueValue());
- ASSERT_EQUALS("syncTarget", hbResponseObj["syncingTo"].String());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
-
- // set hbmsg
- hbResponse.setHbMsg("lub dub");
- ASSERT_EQUALS(true, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(false, hbResponse.isMismatched());
- ASSERT_EQUALS(true, hbResponse.isReplSet());
- ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- hbResponse.getState().toString());
- ASSERT_EQUALS("lub dub", hbResponse.getHbMsg());
- ASSERT_EQUALS("syncTarget", hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getVersion());
- ASSERT_EQUALS(OpTime(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponse.getOpTime());
- ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
- ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
- ASSERT_EQUALS("lub dub", hbResponseObj["hbmsg"].String());
- ASSERT_EQUALS(1, hbResponseObj["v"].Number());
- ASSERT_EQUALS(OpTime(10,0), hbResponseObj["electionTime"]._opTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponseObj["opTime"]._opTime());
- ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
- ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
- ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
- ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
- ASSERT_EQUALS(true, hbResponseObj["rs"].trueValue());
- ASSERT_EQUALS("syncTarget", hbResponseObj["syncingTo"].String());
-
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
- ASSERT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
-
- // set mismatched
- hbResponse.noteMismatched();
- ASSERT_EQUALS(true, hbResponse.hasState());
- ASSERT_EQUALS(true, hbResponse.hasElectionTime());
- ASSERT_EQUALS(true, hbResponse.hasIsElectable());
- ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
- ASSERT_EQUALS(true, hbResponse.hasConfig());
- ASSERT_EQUALS(true, hbResponse.isMismatched());
- ASSERT_EQUALS(true, hbResponse.isReplSet());
- ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
- ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- hbResponse.getState().toString());
- ASSERT_EQUALS("lub dub", hbResponse.getHbMsg());
- ASSERT_EQUALS("syncTarget", hbResponse.getSyncingTo());
- ASSERT_EQUALS(1, hbResponse.getVersion());
- ASSERT_EQUALS(OpTime(10,0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(0,10), hbResponse.getOpTime());
- ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
- ASSERT_EQUALS(true, hbResponse.isElectable());
- ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
-
- hbResponseObj = hbResponse.toBSON();
- ASSERT_EQUALS(2, hbResponseObj.nFields());
- ASSERT_EQUALS(true, hbResponseObj["mismatch"].trueValue());
-
- // NOTE: Does not check round-trip. Once noteMismached is set the bson will return an error
- // from initialize parsing.
- initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
- ASSERT_NOT_EQUALS(Status::OK(), initializeResult);
- ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, initializeResult.code());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeWrongElectionTimeType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 << "electionTime" << "hello");
- Status result = hbResponse.initialize(initializerObj);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"electionTime\" field in response to replSetHeartbeat command to "
- "have type Date or Timestamp, but found type String",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeWrongTimeType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 << "time" << "hello");
- Status result = hbResponse.initialize(initializerObj);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"time\" field in response to replSetHeartbeat command to "
- "have a numeric type, but found type String",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeWrongOpTimeType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 << "opTime" << "hello");
- Status result = hbResponse.initialize(initializerObj);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"opTime\" field in response to replSetHeartbeat command to "
- "have type Date or Timestamp, but found type String",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeMemberStateWrongType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 << "state" << "hello");
- Status result = hbResponse.initialize(initializerObj);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"state\" field in response to replSetHeartbeat command to "
- "have type NumberInt or NumberLong, but found type String",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeMemberStateTooLow) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 << "state" << -1);
- Status result = hbResponse.initialize(initializerObj);
- ASSERT_EQUALS(ErrorCodes::BadValue, result);
- ASSERT_EQUALS("Value for \"state\" in response to replSetHeartbeat is out of range; "
- "legal values are non-negative and no more than 10",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeMemberStateTooHigh) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 << "state" << 11);
- Status result = hbResponse.initialize(initializerObj);
- ASSERT_EQUALS(ErrorCodes::BadValue, result);
- ASSERT_EQUALS("Value for \"state\" in response to replSetHeartbeat is out of range; "
- "legal values are non-negative and no more than 10",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeVersionWrongType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 <<
- "v" << "hello");
- Status result = hbResponse.initialize(initializerObj);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"v\" field in response to replSetHeartbeat to "
- "have type NumberInt, but found String",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeReplSetNameWrongType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 <<
- "v" << 2 << // needs a version to get this far in initialize()
- "set" << 4);
- Status result = hbResponse.initialize(initializerObj);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"set\" field in response to replSetHeartbeat to "
- "have type String, but found NumberInt32",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeHeartbeatMeessageWrongType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 <<
- "v" << 2 << // needs a version to get this far in initialize()
- "hbmsg" << 4);
- Status result = hbResponse.initialize(initializerObj);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"hbmsg\" field in response to replSetHeartbeat to "
- "have type String, but found NumberInt32",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeSyncingToWrongType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 <<
- "v" << 2 << // needs a version to get this far in initialize()
- "syncingTo" << 4);
- Status result = hbResponse.initialize(initializerObj);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"syncingTo\" field in response to replSetHeartbeat to "
- "have type String, but found NumberInt32",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeConfigWrongType) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 <<
- "v" << 2 << // needs a version to get this far in initialize()
- "config" << 4);
- Status result = hbResponse.initialize(initializerObj);
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
- ASSERT_EQUALS("Expected \"config\" in response to replSetHeartbeat to "
- "have type Object, but found NumberInt32",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeBadConfig) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 1.0 <<
- "v" << 2 << // needs a version to get this far in initialize()
- "config" << BSON("illegalFieldName" << 2));
- Status result = hbResponse.initialize(initializerObj);
- ASSERT_EQUALS(ErrorCodes::BadValue, result);
- ASSERT_EQUALS("Unexpected field illegalFieldName in replica set configuration",
- result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeBothElectionTimeTypesSameResult) {
- ReplSetHeartbeatResponse hbResponseDate;
- ReplSetHeartbeatResponse hbResponseTimestamp;
- BSONObjBuilder initializerDate;
- BSONObjBuilder initializerTimestamp;
- Date_t electionTime = Date_t(974132);
-
- initializerDate.append("ok", 1.0);
- initializerDate.append("v", 1);
- initializerDate.appendDate("electionTime", electionTime);
- Status result = hbResponseDate.initialize(initializerDate.obj());
- ASSERT_EQUALS(Status::OK(), result);
-
- initializerTimestamp.append("ok", 1.0);
- initializerTimestamp.append("v", 1);
- initializerTimestamp.appendTimestamp("electionTime", electionTime);
- result = hbResponseTimestamp.initialize(initializerTimestamp.obj());
- ASSERT_EQUALS(Status::OK(), result);
-
- ASSERT_EQUALS(hbResponseTimestamp.getElectionTime(), hbResponseTimestamp.getElectionTime());
- }
-
- TEST(ReplSetHeartbeatResponse, InitializeBothOpTimeTypesSameResult) {
- ReplSetHeartbeatResponse hbResponseDate;
- ReplSetHeartbeatResponse hbResponseTimestamp;
- BSONObjBuilder initializerDate;
- BSONObjBuilder initializerTimestamp;
- Date_t opTime = Date_t(974132);
-
- initializerDate.append("ok", 1.0);
- initializerDate.append("v", 1);
- initializerDate.appendDate("opTime", opTime);
- Status result = hbResponseDate.initialize(initializerDate.obj());
- ASSERT_EQUALS(Status::OK(), result);
-
- initializerTimestamp.append("ok", 1.0);
- initializerTimestamp.append("v", 1);
- initializerTimestamp.appendTimestamp("opTime", opTime);
- result = hbResponseTimestamp.initialize(initializerTimestamp.obj());
- ASSERT_EQUALS(Status::OK(), result);
-
- ASSERT_EQUALS(hbResponseTimestamp.getOpTime(), hbResponseTimestamp.getOpTime());
- }
-
- TEST(ReplSetHeartbeatResponse, NoConfigStillInitializing) {
- ReplSetHeartbeatResponse hbResp;
- std::string msg = "still initializing";
- Status result = hbResp.initialize(BSON("ok" << 1.0 <<
- "rs" << true <<
- "hbmsg" << msg));
- ASSERT_EQUALS(Status::OK(), result);
- ASSERT_EQUALS(true, hbResp.isReplSet());
- ASSERT_EQUALS(msg, hbResp.getHbMsg());
- }
-
- TEST(ReplSetHeartbeatResponse, InvalidResponseOpTimeMissesConfigVersion) {
- ReplSetHeartbeatResponse hbResp;
- std::string msg = "still initializing";
- Status result = hbResp.initialize(BSON("ok" << 1.0 <<
- "opTime" << OpTime()));
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.code());
- ASSERT_TRUE(stringContains(result.reason(), "\"v\""))
- << result.reason() << " doesn't contain 'v' field required error msg";
- }
-
- TEST(ReplSetHeartbeatResponse, MismatchedRepliSetNames) {
- ReplSetHeartbeatResponse hbResponse;
- BSONObj initializerObj = BSON("ok" << 0.0 << "mismatch" << true);
- Status result = hbResponse.initialize(initializerObj);
- ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, result.code());
- }
-
- TEST(ReplSetHeartbeatResponse, AuthFailure) {
- ReplSetHeartbeatResponse hbResp;
- std::string errMsg = "Unauthorized";
- Status result = hbResp.initialize(BSON("ok" << 0.0 <<
- "errmsg" << errMsg <<
- "code" << ErrorCodes::Unauthorized));
- ASSERT_EQUALS(ErrorCodes::Unauthorized, result.code());
- ASSERT_EQUALS(errMsg, result.reason());
- }
-
- TEST(ReplSetHeartbeatResponse, ServerError) {
- ReplSetHeartbeatResponse hbResp;
- std::string errMsg = "Random Error";
- Status result = hbResp.initialize(BSON("ok" << 0.0 << "errmsg" << errMsg ));
- ASSERT_EQUALS(ErrorCodes::UnknownError, result.code());
- ASSERT_EQUALS(errMsg, result.reason());
- }
+using boost::scoped_ptr;
+using std::auto_ptr;
+
+bool stringContains(const std::string& haystack, const std::string& needle) {
+ return haystack.find(needle) != std::string::npos;
+}
+
+TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
+ int fieldsSet = 2;
+ ReplSetHeartbeatResponse hbResponse;
+ ReplSetHeartbeatResponse hbResponseObjRoundTripChecker;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(false, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(false, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(false, hbResponse.hasTime());
+ ASSERT_EQUALS(false, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS("", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(-1, hbResponse.getVersion());
+
+ BSONObj hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+
+ Status initializeResult = Status::OK();
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
+
+ // set version
+ hbResponse.setVersion(1);
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(false, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(false, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(false, hbResponse.hasTime());
+ ASSERT_EQUALS(false, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS("", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getVersion());
+
+ hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
+
+ // set setname
+ hbResponse.setSetName("rs0");
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(false, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(false, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(false, hbResponse.hasTime());
+ ASSERT_EQUALS(false, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS("", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getVersion());
+
+ hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
+
+ // set electionTime
+ hbResponse.setElectionTime(OpTime(10, 0));
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(false, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(false, hbResponse.hasTime());
+ ASSERT_EQUALS(false, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS("", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getVersion());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponse.getElectionTime());
+
+ hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponseObj["electionTime"]._opTime());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
+
+ // set opTime
+ hbResponse.setOpTime(Date_t(10));
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(false, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(false, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS("", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getVersion());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponse.getOpTime());
+
+ hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponseObj["electionTime"]._opTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponseObj["opTime"]._opTime());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
+
+ // set time
+ hbResponse.setTime(Seconds(10));
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(false, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS("", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getVersion());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponse.getOpTime());
+ ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
+
+ hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponseObj["electionTime"]._opTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponseObj["opTime"]._opTime());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
+
+ // set electable
+ hbResponse.setElectable(true);
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS("", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getVersion());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponse.getOpTime());
+ ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+
+ hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponseObj["electionTime"]._opTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponseObj["opTime"]._opTime());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
+
+ // set config
+ ReplicaSetConfig config;
+ hbResponse.setConfig(config);
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS("", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getVersion());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponse.getOpTime());
+ ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponseObj["electionTime"]._opTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponseObj["opTime"]._opTime());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
+
+ // set state
+ hbResponse.setState(MemberState(MemberState::RS_SECONDARY));
+ ++fieldsSet;
+ ASSERT_EQUALS(true, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ hbResponse.getState().toString());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS("", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getVersion());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponse.getOpTime());
+ ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponseObj["electionTime"]._opTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponseObj["opTime"]._opTime());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
+ ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
+
+ // set stateDisagreement
+ hbResponse.noteStateDisagreement();
+ ++fieldsSet;
+ ASSERT_EQUALS(true, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ hbResponse.getState().toString());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS("", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getVersion());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponse.getOpTime());
+ ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponseObj["electionTime"]._opTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponseObj["opTime"]._opTime());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
+ ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
+ ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
+
+ // set replSet
+ hbResponse.noteReplSet();
+ ++fieldsSet;
+ ASSERT_EQUALS(true, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(true, hbResponse.isReplSet());
+ ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ hbResponse.getState().toString());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS("", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getVersion());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponse.getOpTime());
+ ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponseObj["electionTime"]._opTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponseObj["opTime"]._opTime());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
+ ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
+ ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["rs"].trueValue());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
+
+ // set syncingTo
+ hbResponse.setSyncingTo("syncTarget");
+ ++fieldsSet;
+ ASSERT_EQUALS(true, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(true, hbResponse.isReplSet());
+ ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ hbResponse.getState().toString());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS("syncTarget", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getVersion());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponse.getOpTime());
+ ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponseObj["electionTime"]._opTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponseObj["opTime"]._opTime());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
+ ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
+ ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["rs"].trueValue());
+ ASSERT_EQUALS("syncTarget", hbResponseObj["syncingTo"].String());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
+
+ // set hbmsg
+ hbResponse.setHbMsg("lub dub");
+ ASSERT_EQUALS(true, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(true, hbResponse.isReplSet());
+ ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ hbResponse.getState().toString());
+ ASSERT_EQUALS("lub dub", hbResponse.getHbMsg());
+ ASSERT_EQUALS("syncTarget", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getVersion());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponse.getOpTime());
+ ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("lub dub", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponseObj["electionTime"]._opTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponseObj["opTime"]._opTime());
+ ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
+ ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
+ ASSERT_EQUALS(2, hbResponseObj["state"].numberLong());
+ ASSERT_EQUALS(false, hbResponseObj["mismatch"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["stateDisagreement"].trueValue());
+ ASSERT_EQUALS(true, hbResponseObj["rs"].trueValue());
+ ASSERT_EQUALS("syncTarget", hbResponseObj["syncingTo"].String());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON().toString());
+
+ // set mismatched
+ hbResponse.noteMismatched();
+ ASSERT_EQUALS(true, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(true, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(true, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasConfig());
+ ASSERT_EQUALS(true, hbResponse.isMismatched());
+ ASSERT_EQUALS(true, hbResponse.isReplSet());
+ ASSERT_EQUALS(true, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ hbResponse.getState().toString());
+ ASSERT_EQUALS("lub dub", hbResponse.getHbMsg());
+ ASSERT_EQUALS("syncTarget", hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getVersion());
+ ASSERT_EQUALS(OpTime(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(0, 10), hbResponse.getOpTime());
+ ASSERT_EQUALS(10, hbResponse.getTime().total_seconds());
+ ASSERT_EQUALS(true, hbResponse.isElectable());
+ ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
+
+ hbResponseObj = hbResponse.toBSON();
+ ASSERT_EQUALS(2, hbResponseObj.nFields());
+ ASSERT_EQUALS(true, hbResponseObj["mismatch"].trueValue());
+
+ // NOTE: Does not check round-trip. Once noteMismached is set the bson will return an error
+ // from initialize parsing.
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj);
+ ASSERT_NOT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, initializeResult.code());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeWrongElectionTimeType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "electionTime"
+ << "hello");
+ Status result = hbResponse.initialize(initializerObj);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"electionTime\" field in response to replSetHeartbeat command to "
+ "have type Date or Timestamp, but found type String",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeWrongTimeType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "time"
+ << "hello");
+ Status result = hbResponse.initialize(initializerObj);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"time\" field in response to replSetHeartbeat command to "
+ "have a numeric type, but found type String",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeWrongOpTimeType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "opTime"
+ << "hello");
+ Status result = hbResponse.initialize(initializerObj);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"opTime\" field in response to replSetHeartbeat command to "
+ "have type Date or Timestamp, but found type String",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeMemberStateWrongType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "state"
+ << "hello");
+ Status result = hbResponse.initialize(initializerObj);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"state\" field in response to replSetHeartbeat command to "
+ "have type NumberInt or NumberLong, but found type String",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeMemberStateTooLow) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "state" << -1);
+ Status result = hbResponse.initialize(initializerObj);
+ ASSERT_EQUALS(ErrorCodes::BadValue, result);
+ ASSERT_EQUALS(
+ "Value for \"state\" in response to replSetHeartbeat is out of range; "
+ "legal values are non-negative and no more than 10",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeMemberStateTooHigh) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "state" << 11);
+ Status result = hbResponse.initialize(initializerObj);
+ ASSERT_EQUALS(ErrorCodes::BadValue, result);
+ ASSERT_EQUALS(
+ "Value for \"state\" in response to replSetHeartbeat is out of range; "
+ "legal values are non-negative and no more than 10",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeVersionWrongType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "v"
+ << "hello");
+ Status result = hbResponse.initialize(initializerObj);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"v\" field in response to replSetHeartbeat to "
+ "have type NumberInt, but found String",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeReplSetNameWrongType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj =
+ BSON("ok" << 1.0 << "v" << 2 << // needs a version to get this far in initialize()
+ "set" << 4);
+ Status result = hbResponse.initialize(initializerObj);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"set\" field in response to replSetHeartbeat to "
+ "have type String, but found NumberInt32",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeHeartbeatMeessageWrongType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj =
+ BSON("ok" << 1.0 << "v" << 2 << // needs a version to get this far in initialize()
+ "hbmsg" << 4);
+ Status result = hbResponse.initialize(initializerObj);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"hbmsg\" field in response to replSetHeartbeat to "
+ "have type String, but found NumberInt32",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeSyncingToWrongType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj =
+ BSON("ok" << 1.0 << "v" << 2 << // needs a version to get this far in initialize()
+ "syncingTo" << 4);
+ Status result = hbResponse.initialize(initializerObj);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"syncingTo\" field in response to replSetHeartbeat to "
+ "have type String, but found NumberInt32",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeConfigWrongType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj =
+ BSON("ok" << 1.0 << "v" << 2 << // needs a version to get this far in initialize()
+ "config" << 4);
+ Status result = hbResponse.initialize(initializerObj);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS(
+ "Expected \"config\" in response to replSetHeartbeat to "
+ "have type Object, but found NumberInt32",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeBadConfig) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj =
+ BSON("ok" << 1.0 << "v" << 2 << // needs a version to get this far in initialize()
+ "config" << BSON("illegalFieldName" << 2));
+ Status result = hbResponse.initialize(initializerObj);
+ ASSERT_EQUALS(ErrorCodes::BadValue, result);
+ ASSERT_EQUALS("Unexpected field illegalFieldName in replica set configuration",
+ result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeBothElectionTimeTypesSameResult) {
+ ReplSetHeartbeatResponse hbResponseDate;
+ ReplSetHeartbeatResponse hbResponseTimestamp;
+ BSONObjBuilder initializerDate;
+ BSONObjBuilder initializerTimestamp;
+ Date_t electionTime = Date_t(974132);
+
+ initializerDate.append("ok", 1.0);
+ initializerDate.append("v", 1);
+ initializerDate.appendDate("electionTime", electionTime);
+ Status result = hbResponseDate.initialize(initializerDate.obj());
+ ASSERT_EQUALS(Status::OK(), result);
+
+ initializerTimestamp.append("ok", 1.0);
+ initializerTimestamp.append("v", 1);
+ initializerTimestamp.appendTimestamp("electionTime", electionTime);
+ result = hbResponseTimestamp.initialize(initializerTimestamp.obj());
+ ASSERT_EQUALS(Status::OK(), result);
+
+ ASSERT_EQUALS(hbResponseTimestamp.getElectionTime(), hbResponseTimestamp.getElectionTime());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeBothOpTimeTypesSameResult) {
+ ReplSetHeartbeatResponse hbResponseDate;
+ ReplSetHeartbeatResponse hbResponseTimestamp;
+ BSONObjBuilder initializerDate;
+ BSONObjBuilder initializerTimestamp;
+ Date_t opTime = Date_t(974132);
+
+ initializerDate.append("ok", 1.0);
+ initializerDate.append("v", 1);
+ initializerDate.appendDate("opTime", opTime);
+ Status result = hbResponseDate.initialize(initializerDate.obj());
+ ASSERT_EQUALS(Status::OK(), result);
+
+ initializerTimestamp.append("ok", 1.0);
+ initializerTimestamp.append("v", 1);
+ initializerTimestamp.appendTimestamp("opTime", opTime);
+ result = hbResponseTimestamp.initialize(initializerTimestamp.obj());
+ ASSERT_EQUALS(Status::OK(), result);
+
+ ASSERT_EQUALS(hbResponseTimestamp.getOpTime(), hbResponseTimestamp.getOpTime());
+}
+
+TEST(ReplSetHeartbeatResponse, NoConfigStillInitializing) {
+ ReplSetHeartbeatResponse hbResp;
+ std::string msg = "still initializing";
+ Status result = hbResp.initialize(BSON("ok" << 1.0 << "rs" << true << "hbmsg" << msg));
+ ASSERT_EQUALS(Status::OK(), result);
+ ASSERT_EQUALS(true, hbResp.isReplSet());
+ ASSERT_EQUALS(msg, hbResp.getHbMsg());
+}
+
+TEST(ReplSetHeartbeatResponse, InvalidResponseOpTimeMissesConfigVersion) {
+ ReplSetHeartbeatResponse hbResp;
+ std::string msg = "still initializing";
+ Status result = hbResp.initialize(BSON("ok" << 1.0 << "opTime" << OpTime()));
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, result.code());
+ ASSERT_TRUE(stringContains(result.reason(), "\"v\""))
+ << result.reason() << " doesn't contain 'v' field required error msg";
+}
+
+TEST(ReplSetHeartbeatResponse, MismatchedRepliSetNames) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 0.0 << "mismatch" << true);
+ Status result = hbResponse.initialize(initializerObj);
+ ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, result.code());
+}
+
+TEST(ReplSetHeartbeatResponse, AuthFailure) {
+ ReplSetHeartbeatResponse hbResp;
+ std::string errMsg = "Unauthorized";
+ Status result = hbResp.initialize(
+ BSON("ok" << 0.0 << "errmsg" << errMsg << "code" << ErrorCodes::Unauthorized));
+ ASSERT_EQUALS(ErrorCodes::Unauthorized, result.code());
+ ASSERT_EQUALS(errMsg, result.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, ServerError) {
+ ReplSetHeartbeatResponse hbResp;
+ std::string errMsg = "Random Error";
+ Status result = hbResp.initialize(BSON("ok" << 0.0 << "errmsg" << errMsg));
+ ASSERT_EQUALS(ErrorCodes::UnknownError, result.code());
+ ASSERT_EQUALS(errMsg, result.reason());
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/repl_set_html_summary.cpp b/src/mongo/db/repl/repl_set_html_summary.cpp
index ec7d54e9748..227f96228a8 100644
--- a/src/mongo/db/repl/repl_set_html_summary.cpp
+++ b/src/mongo/db/repl/repl_set_html_summary.cpp
@@ -42,187 +42,180 @@
namespace mongo {
namespace repl {
- ReplSetHtmlSummary::ReplSetHtmlSummary() : _selfIndex(-1), _primaryIndex(-1), _selfUptime(0) {}
+ReplSetHtmlSummary::ReplSetHtmlSummary() : _selfIndex(-1), _primaryIndex(-1), _selfUptime(0) {}
namespace {
- template<class T>
- std::string ToString(const T& t) {
- str::stream s;
- s << t;
- return s;
- }
+template <class T>
+std::string ToString(const T& t) {
+ str::stream s;
+ s << t;
+ return s;
+}
- /**
- * Turns an unsigned int representing a duration of time in milliseconds and turns it into
- * a human readable time string representation.
- */
- std::string ago(unsigned int duration) {
- std::stringstream s;
- if( duration < 180 ) {
- s << duration << " sec";
- if( duration != 1 ) s << 's';
- }
- else if( duration < 3600 ) {
- s.precision(2);
- s << duration / 60.0 << " mins";
- }
- else {
- s.precision(2);
- s << duration / 3600.0 << " hrs";
- }
- return s.str();
+/**
+ * Turns an unsigned int representing a duration of time in milliseconds and turns it into
+ * a human readable time string representation.
+ */
+std::string ago(unsigned int duration) {
+ std::stringstream s;
+ if (duration < 180) {
+ s << duration << " sec";
+ if (duration != 1)
+ s << 's';
+ } else if (duration < 3600) {
+ s.precision(2);
+ s << duration / 60.0 << " mins";
+ } else {
+ s.precision(2);
+ s << duration / 3600.0 << " hrs";
}
+ return s.str();
+}
- unsigned int timeDifference(Date_t now, Date_t past) {
- return static_cast<unsigned int> ((past ?
- (now - past) / 1000 /* convert millis to secs */ : 0));
- }
+unsigned int timeDifference(Date_t now, Date_t past) {
+ return static_cast<unsigned int>((past ? (now - past) / 1000 /* convert millis to secs */ : 0));
+}
- std::string stateAsHtml(const MemberState& s) {
- using namespace html;
-
- if( s.s == MemberState::RS_STARTUP )
- return a("",
- "server still starting up, or still trying to initiate the set",
- "STARTUP");
- if( s.s == MemberState::RS_PRIMARY )
- return a("", "this server thinks it is primary", "PRIMARY");
- if( s.s == MemberState::RS_SECONDARY )
- return a("", "this server thinks it is a secondary (slave mode)", "SECONDARY");
- if( s.s == MemberState::RS_RECOVERING )
- return a("",
- "recovering/resyncing; after recovery usually auto-transitions to secondary",
- "RECOVERING");
- if( s.s == MemberState::RS_STARTUP2 )
- return a("", "loaded config, still determining who is primary", "STARTUP2");
- if( s.s == MemberState::RS_ARBITER )
- return a("", "this server is an arbiter only", "ARBITER");
- if( s.s == MemberState::RS_DOWN )
- return a("", "member is down, slow, or unreachable", "DOWN");
- if( s.s == MemberState::RS_ROLLBACK )
- return a("", "rolling back operations to get in sync", "ROLLBACK");
- if( s.s == MemberState::RS_UNKNOWN)
- return a("", "we do not know what state this node is in", "UNKNOWN");
- if( s.s == MemberState::RS_REMOVED)
- return a("", "this server has been removed from the replica set config", "ROLLBACK");
- return "";
- }
+std::string stateAsHtml(const MemberState& s) {
+ using namespace html;
+
+ if (s.s == MemberState::RS_STARTUP)
+ return a("", "server still starting up, or still trying to initiate the set", "STARTUP");
+ if (s.s == MemberState::RS_PRIMARY)
+ return a("", "this server thinks it is primary", "PRIMARY");
+ if (s.s == MemberState::RS_SECONDARY)
+ return a("", "this server thinks it is a secondary (slave mode)", "SECONDARY");
+ if (s.s == MemberState::RS_RECOVERING)
+ return a("",
+ "recovering/resyncing; after recovery usually auto-transitions to secondary",
+ "RECOVERING");
+ if (s.s == MemberState::RS_STARTUP2)
+ return a("", "loaded config, still determining who is primary", "STARTUP2");
+ if (s.s == MemberState::RS_ARBITER)
+ return a("", "this server is an arbiter only", "ARBITER");
+ if (s.s == MemberState::RS_DOWN)
+ return a("", "member is down, slow, or unreachable", "DOWN");
+ if (s.s == MemberState::RS_ROLLBACK)
+ return a("", "rolling back operations to get in sync", "ROLLBACK");
+ if (s.s == MemberState::RS_UNKNOWN)
+ return a("", "we do not know what state this node is in", "UNKNOWN");
+ if (s.s == MemberState::RS_REMOVED)
+ return a("", "this server has been removed from the replica set config", "ROLLBACK");
+ return "";
+}
}
- const std::string ReplSetHtmlSummary::toHtmlString() const {
- using namespace html;
+const std::string ReplSetHtmlSummary::toHtmlString() const {
+ using namespace html;
- std::stringstream s;
+ std::stringstream s;
- if (!_config.isInitialized()) {
- s << p("Still starting up, or else replset is not yet initiated.");
- return s.str();
- }
- if (_selfIndex < 0) {
- s << p("This node is not a member of its replica set configuration, it most likely was"
- " removed recently");
- return s.str();
- }
+ if (!_config.isInitialized()) {
+ s << p("Still starting up, or else replset is not yet initiated.");
+ return s.str();
+ }
+ if (_selfIndex < 0) {
+ s << p(
+ "This node is not a member of its replica set configuration, it most likely was"
+ " removed recently");
+ return s.str();
+ }
- int votesUp = 0;
- int totalVotes = 0;
- // Build table of node information.
- std::stringstream memberTable;
- const char *h[] =
- {"Member",
- "<a title=\"member id in the replset config\">id</a>",
- "Up",
- "<a title=\"length of time we have been continuously connected to the other member "
- "with no reconnects (for self, shows uptime)\">cctime</a>",
- "<a title=\"when this server last received a heartbeat response - includes error code "
- "responses\">Last heartbeat</a>",
- "Votes",
- "Priority",
- "State",
- "Messages",
- "<a title=\"how up to date this server is. this value polled every few seconds so "
- "actually lag is typically lower than value shown here.\">optime</a>",
- 0
- };
- memberTable << table(h);
-
- for (int i = 0; i < _config.getNumMembers(); ++i) {
- const MemberConfig& memberConfig = _config.getMemberAt(i);
- const MemberHeartbeatData& memberHB = _hbData[i];
- bool isSelf = _selfIndex == i;
- bool up = memberHB.getHealth() > 0;
-
- totalVotes += memberConfig.getNumVotes();
- if (up || isSelf) {
- votesUp += memberConfig.getNumVotes();
- }
+ int votesUp = 0;
+ int totalVotes = 0;
+ // Build table of node information.
+ std::stringstream memberTable;
+ const char* h[] = {
+ "Member",
+ "<a title=\"member id in the replset config\">id</a>",
+ "Up",
+ "<a title=\"length of time we have been continuously connected to the other member "
+ "with no reconnects (for self, shows uptime)\">cctime</a>",
+ "<a title=\"when this server last received a heartbeat response - includes error code "
+ "responses\">Last heartbeat</a>",
+ "Votes",
+ "Priority",
+ "State",
+ "Messages",
+ "<a title=\"how up to date this server is. this value polled every few seconds so "
+ "actually lag is typically lower than value shown here.\">optime</a>",
+ 0};
+ memberTable << table(h);
+
+ for (int i = 0; i < _config.getNumMembers(); ++i) {
+ const MemberConfig& memberConfig = _config.getMemberAt(i);
+ const MemberHeartbeatData& memberHB = _hbData[i];
+ bool isSelf = _selfIndex == i;
+ bool up = memberHB.getHealth() > 0;
+
+ totalVotes += memberConfig.getNumVotes();
+ if (up || isSelf) {
+ votesUp += memberConfig.getNumVotes();
+ }
- memberTable << tr();
- if (isSelf) {
- memberTable << td(memberConfig.getHostAndPort().toString() + " (me)");
- memberTable << td(memberConfig.getId());
- memberTable << td("1"); // up
- memberTable << td(ago(_selfUptime));
- memberTable << td(""); // last heartbeat
- memberTable << td(ToString(memberConfig.getNumVotes()));
- memberTable << td(ToString(memberConfig.getPriority()));
- memberTable << td(stateAsHtml(_selfState) +
- (memberConfig.isHidden() ? " (hidden)" : ""));
- memberTable << td(_selfHeartbeatMessage);
- memberTable << td(_selfOptime.toString());
+ memberTable << tr();
+ if (isSelf) {
+ memberTable << td(memberConfig.getHostAndPort().toString() + " (me)");
+ memberTable << td(memberConfig.getId());
+ memberTable << td("1"); // up
+ memberTable << td(ago(_selfUptime));
+ memberTable << td(""); // last heartbeat
+ memberTable << td(ToString(memberConfig.getNumVotes()));
+ memberTable << td(ToString(memberConfig.getPriority()));
+ memberTable << td(stateAsHtml(_selfState) +
+ (memberConfig.isHidden() ? " (hidden)" : ""));
+ memberTable << td(_selfHeartbeatMessage);
+ memberTable << td(_selfOptime.toString());
+ } else {
+ std::stringstream link;
+ link << "http://" << memberConfig.getHostAndPort().host() << ':'
+ << (memberConfig.getHostAndPort().port() + 1000) << "/_replSet";
+ memberTable << td(a(link.str(), "", memberConfig.getHostAndPort().toString()));
+ memberTable << td(memberConfig.getId());
+ memberTable << td(red(str::stream() << memberHB.getHealth(), !up));
+ const unsigned int uptime = timeDifference(_now, memberHB.getUpSince());
+ memberTable << td(ago(uptime));
+ if (memberHB.getLastHeartbeat() == 0) {
+ memberTable << td("never");
+ } else {
+ memberTable << td(ago(timeDifference(_now, memberHB.getLastHeartbeat())));
}
- else {
- std::stringstream link;
- link << "http://" << memberConfig.getHostAndPort().host() << ':' <<
- (memberConfig.getHostAndPort().port() + 1000) << "/_replSet";
- memberTable << td( a(link.str(), "", memberConfig.getHostAndPort().toString()) );
- memberTable << td(memberConfig.getId());
- memberTable << td(red(str::stream() << memberHB.getHealth(), !up));
- const unsigned int uptime = timeDifference(_now, memberHB.getUpSince());
- memberTable << td(ago(uptime));
- if (memberHB.getLastHeartbeat() == 0) {
- memberTable << td("never");
- }
- else {
- memberTable << td(ago(timeDifference(_now, memberHB.getLastHeartbeat())));
- }
- memberTable << td(ToString(memberConfig.getNumVotes()));
- memberTable << td(ToString(memberConfig.getPriority()));
- std::string state = memberHB.getState().toString() +
- (memberConfig.isHidden() ? " (hidden)" : "");
- if (up) {
- memberTable << td(state);
- }
- else {
- memberTable << td( grey(str::stream() << "(was " << state << ')', true) );
- }
- memberTable << td(grey(memberHB.getLastHeartbeatMsg(), !up));
- memberTable << td(memberHB.getLastHeartbeat() == 0 ?
- "?" : memberHB.getOpTime().toString());
+ memberTable << td(ToString(memberConfig.getNumVotes()));
+ memberTable << td(ToString(memberConfig.getPriority()));
+ std::string state =
+ memberHB.getState().toString() + (memberConfig.isHidden() ? " (hidden)" : "");
+ if (up) {
+ memberTable << td(state);
+ } else {
+ memberTable << td(grey(str::stream() << "(was " << state << ')', true));
}
- memberTable << _tr();
+ memberTable << td(grey(memberHB.getLastHeartbeatMsg(), !up));
+ memberTable << td(memberHB.getLastHeartbeat() == 0 ? "?"
+ : memberHB.getOpTime().toString());
}
- memberTable << _table();
+ memberTable << _tr();
+ }
+ memberTable << _table();
- s << table(0, false);
- s << tr("Set name:", _config.getReplSetName());
- bool majorityUp = votesUp * 2 > totalVotes;
- s << tr("Majority up:", majorityUp ? "yes" : "no" );
+ s << table(0, false);
+ s << tr("Set name:", _config.getReplSetName());
+ bool majorityUp = votesUp * 2 > totalVotes;
+ s << tr("Majority up:", majorityUp ? "yes" : "no");
- const MemberConfig& selfConfig = _config.getMemberAt(_selfIndex);
+ const MemberConfig& selfConfig = _config.getMemberAt(_selfIndex);
- if (_primaryIndex >= 0 && _primaryIndex != _selfIndex && !selfConfig.isArbiter()) {
- int lag = _hbData[_primaryIndex].getOpTime().getSecs() - _selfOptime.getSecs();
- s << tr("Lag: ", str::stream() << lag << " secs");
- }
+ if (_primaryIndex >= 0 && _primaryIndex != _selfIndex && !selfConfig.isArbiter()) {
+ int lag = _hbData[_primaryIndex].getOpTime().getSecs() - _selfOptime.getSecs();
+ s << tr("Lag: ", str::stream() << lag << " secs");
+ }
- s << _table();
+ s << _table();
- s << memberTable.str();
+ s << memberTable.str();
- return s.str();
- }
+ return s.str();
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_html_summary.h b/src/mongo/db/repl/repl_set_html_summary.h
index 278f1ca35b6..98709ae4379 100644
--- a/src/mongo/db/repl/repl_set_html_summary.h
+++ b/src/mongo/db/repl/repl_set_html_summary.h
@@ -38,64 +38,63 @@ namespace mongo {
namespace repl {
- /**
- * Class containing all the information needed to build the replSet page on http interface,
- * and the logic to generate that page.
- */
- class ReplSetHtmlSummary {
- public:
- ReplSetHtmlSummary();
-
- const std::string toHtmlString() const;
-
- void setConfig(const ReplicaSetConfig& config) {
- _config = config;
- }
-
- void setHBData(const std::vector<MemberHeartbeatData>& hbData) {
- _hbData = hbData;
- }
-
- void setSelfIndex(int index) {
- _selfIndex = index;
- }
-
- void setPrimaryIndex(int index) {
- _primaryIndex = index;
- }
-
- void setSelfOptime(const OpTime& ts) {
- _selfOptime = ts;
- }
-
- void setSelfUptime(unsigned int time) {
- _selfUptime = time;
- }
-
- void setNow(Date_t now) {
- _now = now;
- }
-
- void setSelfState(const MemberState& state) {
- _selfState = state;
- }
-
- void setSelfHeartbeatMessage(StringData msg) {
- _selfHeartbeatMessage = msg.toString();
- }
-
- private:
-
- ReplicaSetConfig _config;
- std::vector<MemberHeartbeatData> _hbData;
- Date_t _now;
- int _selfIndex;
- int _primaryIndex;
- OpTime _selfOptime;
- unsigned int _selfUptime;
- MemberState _selfState;
- std::string _selfHeartbeatMessage;
- };
-
-} // namespace repl
-} // namespace mongo
+/**
+ * Class containing all the information needed to build the replSet page on http interface,
+ * and the logic to generate that page.
+ */
+class ReplSetHtmlSummary {
+public:
+ ReplSetHtmlSummary();
+
+ const std::string toHtmlString() const;
+
+ void setConfig(const ReplicaSetConfig& config) {
+ _config = config;
+ }
+
+ void setHBData(const std::vector<MemberHeartbeatData>& hbData) {
+ _hbData = hbData;
+ }
+
+ void setSelfIndex(int index) {
+ _selfIndex = index;
+ }
+
+ void setPrimaryIndex(int index) {
+ _primaryIndex = index;
+ }
+
+ void setSelfOptime(const OpTime& ts) {
+ _selfOptime = ts;
+ }
+
+ void setSelfUptime(unsigned int time) {
+ _selfUptime = time;
+ }
+
+ void setNow(Date_t now) {
+ _now = now;
+ }
+
+ void setSelfState(const MemberState& state) {
+ _selfState = state;
+ }
+
+ void setSelfHeartbeatMessage(StringData msg) {
+ _selfHeartbeatMessage = msg.toString();
+ }
+
+private:
+ ReplicaSetConfig _config;
+ std::vector<MemberHeartbeatData> _hbData;
+ Date_t _now;
+ int _selfIndex;
+ int _primaryIndex;
+ OpTime _selfOptime;
+ unsigned int _selfUptime;
+ MemberState _selfState;
+ std::string _selfHeartbeatMessage;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_seed_list.cpp b/src/mongo/db/repl/repl_set_seed_list.cpp
index 861a2398711..53db5cf555d 100644
--- a/src/mongo/db/repl/repl_set_seed_list.cpp
+++ b/src/mongo/db/repl/repl_set_seed_list.cpp
@@ -40,56 +40,55 @@ namespace mongo {
namespace repl {
- using std::string;
+using std::string;
- /** @param cfgString <setname>/<seedhost1>,<seedhost2> */
- void parseReplSetSeedList(ReplicationCoordinatorExternalState* externalState,
- const std::string& cfgString,
- std::string& setname,
- std::vector<HostAndPort>& seeds,
- std::set<HostAndPort>& seedSet) {
- const char *p = cfgString.c_str();
- const char *slash = strchr(p, '/');
- if( slash )
- setname = string(p, slash-p);
- else
- setname = p;
- uassert(13093,
- "bad --replSet config string format is: <setname>[/<seedhost1>,<seedhost2>,...]",
- !setname.empty());
+/** @param cfgString <setname>/<seedhost1>,<seedhost2> */
+void parseReplSetSeedList(ReplicationCoordinatorExternalState* externalState,
+ const std::string& cfgString,
+ std::string& setname,
+ std::vector<HostAndPort>& seeds,
+ std::set<HostAndPort>& seedSet) {
+ const char* p = cfgString.c_str();
+ const char* slash = strchr(p, '/');
+ if (slash)
+ setname = string(p, slash - p);
+ else
+ setname = p;
+ uassert(13093,
+ "bad --replSet config string format is: <setname>[/<seedhost1>,<seedhost2>,...]",
+ !setname.empty());
- if( slash == 0 )
- return;
+ if (slash == 0)
+ return;
- p = slash + 1;
- while( 1 ) {
- const char *comma = strchr(p, ',');
- if( comma == 0 ) comma = strchr(p,0);
- if( p == comma )
- break;
- {
- HostAndPort m;
- try {
- m = HostAndPort( string(p, comma-p) );
- }
- catch(...) {
- uassert(13114, "bad --replSet seed hostname", false);
- }
- uassert(13096, "bad --replSet command line config string - dups?",
- seedSet.count(m) == 0);
- seedSet.insert(m);
- //uassert(13101, "can't use localhost in replset host list", !m.isLocalHost());
- if (externalState->isSelf(m)) {
- LOG(1) << "replSet ignoring seed " << m.toString() << " (=self)";
- }
- else
- seeds.push_back(m);
- if( *comma == 0 )
- break;
- p = comma + 1;
+ p = slash + 1;
+ while (1) {
+ const char* comma = strchr(p, ',');
+ if (comma == 0)
+ comma = strchr(p, 0);
+ if (p == comma)
+ break;
+ {
+ HostAndPort m;
+ try {
+ m = HostAndPort(string(p, comma - p));
+ } catch (...) {
+ uassert(13114, "bad --replSet seed hostname", false);
}
+ uassert(
+ 13096, "bad --replSet command line config string - dups?", seedSet.count(m) == 0);
+ seedSet.insert(m);
+ // uassert(13101, "can't use localhost in replset host list", !m.isLocalHost());
+ if (externalState->isSelf(m)) {
+ LOG(1) << "replSet ignoring seed " << m.toString() << " (=self)";
+ } else
+ seeds.push_back(m);
+ if (*comma == 0)
+ break;
+ p = comma + 1;
}
}
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_seed_list.h b/src/mongo/db/repl/repl_set_seed_list.h
index 761928d2a7a..ca48c6604de 100644
--- a/src/mongo/db/repl/repl_set_seed_list.h
+++ b/src/mongo/db/repl/repl_set_seed_list.h
@@ -37,27 +37,27 @@
namespace mongo {
namespace repl {
- class ReplicationCoordinatorExternalState;
-
- void parseReplSetSeedList(ReplicationCoordinatorExternalState* externalState,
- const std::string& cfgString,
- std::string& setname,
- std::vector<HostAndPort>& seeds,
- std::set<HostAndPort>& seedSet);
-
- /** Parameter given to the --replSet command line option (parsed).
- Syntax is "<setname>/<seedhost1>,<seedhost2>"
- where setname is a name and seedhost is "<host>[:<port>]" */
- class ReplSetSeedList {
- public:
- ReplSetSeedList(ReplicationCoordinatorExternalState* externalState,
- const std::string& cfgString) {
- parseReplSetSeedList(externalState, cfgString, setname, seeds, seedSet);
- }
- std::string setname;
- std::vector<HostAndPort> seeds;
- std::set<HostAndPort> seedSet;
- };
-
-} // namespace repl
-} // namespace mongo
+class ReplicationCoordinatorExternalState;
+
+void parseReplSetSeedList(ReplicationCoordinatorExternalState* externalState,
+ const std::string& cfgString,
+ std::string& setname,
+ std::vector<HostAndPort>& seeds,
+ std::set<HostAndPort>& seedSet);
+
+/** Parameter given to the --replSet command line option (parsed).
+ Syntax is "<setname>/<seedhost1>,<seedhost2>"
+ where setname is a name and seedhost is "<host>[:<port>]" */
+class ReplSetSeedList {
+public:
+ ReplSetSeedList(ReplicationCoordinatorExternalState* externalState,
+ const std::string& cfgString) {
+ parseReplSetSeedList(externalState, cfgString, setname, seeds, seedSet);
+ }
+ std::string setname;
+ std::vector<HostAndPort> seeds;
+ std::set<HostAndPort> seedSet;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/repl_settings.cpp b/src/mongo/db/repl/repl_settings.cpp
index 3b22a3203eb..a385d89c55b 100644
--- a/src/mongo/db/repl/repl_settings.cpp
+++ b/src/mongo/db/repl/repl_settings.cpp
@@ -35,13 +35,12 @@
namespace mongo {
namespace repl {
- MONGO_EXPORT_STARTUP_SERVER_PARAMETER(maxSyncSourceLagSecs, int, 30);
- MONGO_INITIALIZER(maxSyncSourceLagSecsCheck) (InitializerContext*) {
- if (maxSyncSourceLagSecs < 1) {
- return Status(ErrorCodes::BadValue, "maxSyncSourceLagSecs must be > 0");
- }
- return Status::OK();
+MONGO_EXPORT_STARTUP_SERVER_PARAMETER(maxSyncSourceLagSecs, int, 30);
+MONGO_INITIALIZER(maxSyncSourceLagSecsCheck)(InitializerContext*) {
+ if (maxSyncSourceLagSecs < 1) {
+ return Status(ErrorCodes::BadValue, "maxSyncSourceLagSecs must be > 0");
}
-
+ return Status::OK();
+}
}
}
diff --git a/src/mongo/db/repl/repl_settings.h b/src/mongo/db/repl/repl_settings.h
index cec0b90040f..5c1e6032acc 100644
--- a/src/mongo/db/repl/repl_settings.h
+++ b/src/mongo/db/repl/repl_settings.h
@@ -38,90 +38,91 @@
namespace mongo {
namespace repl {
- extern int maxSyncSourceLagSecs;
-
- bool anyReplEnabled();
-
- /* replication slave? (possibly with slave)
- --slave cmd line setting -> SimpleSlave
- */
- typedef enum { NotSlave=0, SimpleSlave } SlaveTypes;
-
- class ReplSettings {
- public:
- SlaveTypes slave;
-
- /** true means we are master and doing replication. if we are not writing to oplog, this won't be true. */
- bool master;
-
- bool fastsync;
-
- bool autoresync;
-
- int slavedelay;
-
- long long oplogSize; // --oplogSize
-
- // for master/slave replication
- std::string source; // --source
- std::string only; // --only
- int pretouch; // --pretouch for replication application (experimental)
-
- std::string replSet; // --replSet[/<seedlist>]
- std::string ourSetName() const {
- std::string setname;
- size_t sl = replSet.find('/');
- if( sl == std::string::npos )
- return replSet;
- return replSet.substr(0, sl);
- }
- bool usingReplSets() const { return !replSet.empty(); }
-
- std::string rsIndexPrefetch;// --indexPrefetch
-
- ReplSettings()
- : slave(NotSlave),
- master(false),
- fastsync(),
- autoresync(false),
- slavedelay(),
- oplogSize(0),
- pretouch(0) {
- }
-
- // TODO(spencer): Remove explicit copy constructor after we no longer have mutable state
- // in ReplSettings.
- ReplSettings(const ReplSettings& other) :
- slave(other.slave),
- master(other.master),
- fastsync(other.fastsync),
- autoresync(other.autoresync),
- slavedelay(other.slavedelay),
- oplogSize(other.oplogSize),
- source(other.source),
- only(other.only),
- pretouch(other.pretouch),
- replSet(other.replSet),
- rsIndexPrefetch(other.rsIndexPrefetch) {}
-
- ReplSettings& operator=(const ReplSettings& other) {
- if (this == &other) return *this;
-
- slave = other.slave;
- master = other.master;
- fastsync = other.fastsync;
- autoresync = other.autoresync;
- slavedelay = other.slavedelay;
- oplogSize = other.oplogSize;
- source = other.source;
- only = other.only;
- pretouch = other.pretouch;
- replSet = other.replSet;
- rsIndexPrefetch = other.rsIndexPrefetch;
- return *this;
- }
+extern int maxSyncSourceLagSecs;
+
+bool anyReplEnabled();
- };
+/* replication slave? (possibly with slave)
+ --slave cmd line setting -> SimpleSlave
+*/
+typedef enum { NotSlave = 0, SimpleSlave } SlaveTypes;
+
+class ReplSettings {
+public:
+ SlaveTypes slave;
+
+ /** true means we are master and doing replication. if we are not writing to oplog, this won't be true. */
+ bool master;
+
+ bool fastsync;
+
+ bool autoresync;
+
+ int slavedelay;
+
+ long long oplogSize; // --oplogSize
+
+ // for master/slave replication
+ std::string source; // --source
+ std::string only; // --only
+ int pretouch; // --pretouch for replication application (experimental)
+
+ std::string replSet; // --replSet[/<seedlist>]
+ std::string ourSetName() const {
+ std::string setname;
+ size_t sl = replSet.find('/');
+ if (sl == std::string::npos)
+ return replSet;
+ return replSet.substr(0, sl);
+ }
+ bool usingReplSets() const {
+ return !replSet.empty();
+ }
+
+ std::string rsIndexPrefetch; // --indexPrefetch
+
+ ReplSettings()
+ : slave(NotSlave),
+ master(false),
+ fastsync(),
+ autoresync(false),
+ slavedelay(),
+ oplogSize(0),
+ pretouch(0) {}
+
+ // TODO(spencer): Remove explicit copy constructor after we no longer have mutable state
+ // in ReplSettings.
+ ReplSettings(const ReplSettings& other)
+ : slave(other.slave),
+ master(other.master),
+ fastsync(other.fastsync),
+ autoresync(other.autoresync),
+ slavedelay(other.slavedelay),
+ oplogSize(other.oplogSize),
+ source(other.source),
+ only(other.only),
+ pretouch(other.pretouch),
+ replSet(other.replSet),
+ rsIndexPrefetch(other.rsIndexPrefetch) {}
+
+ ReplSettings& operator=(const ReplSettings& other) {
+ if (this == &other)
+ return *this;
-} // namespace repl
-} // namespace mongo
+ slave = other.slave;
+ master = other.master;
+ fastsync = other.fastsync;
+ autoresync = other.autoresync;
+ slavedelay = other.slavedelay;
+ oplogSize = other.oplogSize;
+ source = other.source;
+ only = other.only;
+ pretouch = other.pretouch;
+ replSet = other.replSet;
+ rsIndexPrefetch = other.rsIndexPrefetch;
+ return *this;
+ }
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replica_set_config.cpp b/src/mongo/db/repl/replica_set_config.cpp
index 6b4aedd1601..3d7f6d1268e 100644
--- a/src/mongo/db/repl/replica_set_config.cpp
+++ b/src/mongo/db/repl/replica_set_config.cpp
@@ -41,510 +41,490 @@ namespace mongo {
namespace repl {
#ifndef _MSC_VER
- const size_t ReplicaSetConfig::kMaxMembers;
- const size_t ReplicaSetConfig::kMaxVotingMembers;
+const size_t ReplicaSetConfig::kMaxMembers;
+const size_t ReplicaSetConfig::kMaxVotingMembers;
#endif
- const Seconds ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod(10);
- const std::string ReplicaSetConfig::kIdFieldName = "_id";
- const std::string ReplicaSetConfig::kVersionFieldName = "version";
- const std::string ReplicaSetConfig::kMembersFieldName = "members";
- const std::string ReplicaSetConfig::kSettingsFieldName = "settings";
- const std::string ReplicaSetConfig::kMajorityWriteConcernModeName = "$majority";
- const std::string ReplicaSetConfig::kStepDownCheckWriteConcernModeName = "$stepDownCheck";
+const Seconds ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod(10);
+const std::string ReplicaSetConfig::kIdFieldName = "_id";
+const std::string ReplicaSetConfig::kVersionFieldName = "version";
+const std::string ReplicaSetConfig::kMembersFieldName = "members";
+const std::string ReplicaSetConfig::kSettingsFieldName = "settings";
+const std::string ReplicaSetConfig::kMajorityWriteConcernModeName = "$majority";
+const std::string ReplicaSetConfig::kStepDownCheckWriteConcernModeName = "$stepDownCheck";
namespace {
- const std::string kLegalConfigTopFieldNames[] = {
- ReplicaSetConfig::kIdFieldName,
- ReplicaSetConfig::kVersionFieldName,
- ReplicaSetConfig::kMembersFieldName,
- ReplicaSetConfig::kSettingsFieldName
- };
+const std::string kLegalConfigTopFieldNames[] = {ReplicaSetConfig::kIdFieldName,
+ ReplicaSetConfig::kVersionFieldName,
+ ReplicaSetConfig::kMembersFieldName,
+ ReplicaSetConfig::kSettingsFieldName};
- const std::string kHeartbeatTimeoutFieldName = "heartbeatTimeoutSecs";
- const std::string kChainingAllowedFieldName = "chainingAllowed";
- const std::string kGetLastErrorDefaultsFieldName = "getLastErrorDefaults";
- const std::string kGetLastErrorModesFieldName = "getLastErrorModes";
+const std::string kHeartbeatTimeoutFieldName = "heartbeatTimeoutSecs";
+const std::string kChainingAllowedFieldName = "chainingAllowed";
+const std::string kGetLastErrorDefaultsFieldName = "getLastErrorDefaults";
+const std::string kGetLastErrorModesFieldName = "getLastErrorModes";
} // namespace
- ReplicaSetConfig::ReplicaSetConfig() : _isInitialized(false), _heartbeatTimeoutPeriod(0) {}
-
- Status ReplicaSetConfig::initialize(const BSONObj& cfg) {
- _isInitialized = false;
- _members.clear();
- Status status = bsonCheckOnlyHasFields(
- "replica set configuration", cfg, kLegalConfigTopFieldNames);
- if (!status.isOK())
- return status;
-
- //
- // Parse replSetName
- //
- status = bsonExtractStringField(cfg, kIdFieldName, &_replSetName);
- if (!status.isOK())
- return status;
-
- //
- // Parse version
- //
- status = bsonExtractIntegerField(cfg, kVersionFieldName, &_version);
+ReplicaSetConfig::ReplicaSetConfig() : _isInitialized(false), _heartbeatTimeoutPeriod(0) {}
+
+Status ReplicaSetConfig::initialize(const BSONObj& cfg) {
+ _isInitialized = false;
+ _members.clear();
+ Status status =
+ bsonCheckOnlyHasFields("replica set configuration", cfg, kLegalConfigTopFieldNames);
+ if (!status.isOK())
+ return status;
+
+ //
+ // Parse replSetName
+ //
+ status = bsonExtractStringField(cfg, kIdFieldName, &_replSetName);
+ if (!status.isOK())
+ return status;
+
+ //
+ // Parse version
+ //
+ status = bsonExtractIntegerField(cfg, kVersionFieldName, &_version);
+ if (!status.isOK())
+ return status;
+
+ //
+ // Parse members
+ //
+ BSONElement membersElement;
+ status = bsonExtractTypedField(cfg, kMembersFieldName, Array, &membersElement);
+ if (!status.isOK())
+ return status;
+
+ for (BSONObj::iterator membersIterator(membersElement.Obj()); membersIterator.more();) {
+ BSONElement memberElement = membersIterator.next();
+ if (memberElement.type() != Object) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected type of " << kMembersFieldName << "."
+ << memberElement.fieldName() << " to be Object, but found "
+ << typeName(memberElement.type()));
+ }
+ _members.resize(_members.size() + 1);
+ status = _members.back().initialize(memberElement.Obj(), &_tagConfig);
if (!status.isOK())
return status;
-
- //
- // Parse members
- //
- BSONElement membersElement;
- status = bsonExtractTypedField(cfg, kMembersFieldName, Array, &membersElement);
- if (!status.isOK())
- return status;
-
- for (BSONObj::iterator membersIterator(membersElement.Obj()); membersIterator.more();) {
- BSONElement memberElement = membersIterator.next();
- if (memberElement.type() != Object) {
- return Status(ErrorCodes::TypeMismatch, str::stream() <<
- "Expected type of " << kMembersFieldName << "." <<
- memberElement.fieldName() << " to be Object, but found " <<
- typeName(memberElement.type()));
- }
- _members.resize(_members.size() + 1);
- status = _members.back().initialize(memberElement.Obj(), &_tagConfig);
- if (!status.isOK())
- return status;
- }
-
- //
- // Parse settings
- //
- BSONElement settingsElement;
- status = bsonExtractTypedField(cfg, kSettingsFieldName, Object, &settingsElement);
- BSONObj settings;
- if (status.isOK()) {
- settings = settingsElement.Obj();
- }
- else if (status != ErrorCodes::NoSuchKey) {
- return status;
- }
- status = _parseSettingsSubdocument(settings);
- if (!status.isOK())
- return status;
-
- _calculateMajorities();
- _addInternalWriteConcernModes();
- _isInitialized = true;
- return Status::OK();
}
- Status ReplicaSetConfig::_parseSettingsSubdocument(const BSONObj& settings) {
- //
- // Parse heartbeatTimeoutSecs
- //
- BSONElement hbTimeoutSecsElement = settings[kHeartbeatTimeoutFieldName];
- if (hbTimeoutSecsElement.eoo()) {
- _heartbeatTimeoutPeriod = Seconds(kDefaultHeartbeatTimeoutPeriod);
- }
- else if (hbTimeoutSecsElement.isNumber()) {
- _heartbeatTimeoutPeriod = Seconds(hbTimeoutSecsElement.numberInt());
- }
- else {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected type of " <<
- kSettingsFieldName << "." << kHeartbeatTimeoutFieldName <<
- " to be a number, but found a value of type " <<
- typeName(hbTimeoutSecsElement.type()));
- }
+ //
+ // Parse settings
+ //
+ BSONElement settingsElement;
+ status = bsonExtractTypedField(cfg, kSettingsFieldName, Object, &settingsElement);
+ BSONObj settings;
+ if (status.isOK()) {
+ settings = settingsElement.Obj();
+ } else if (status != ErrorCodes::NoSuchKey) {
+ return status;
+ }
+ status = _parseSettingsSubdocument(settings);
+ if (!status.isOK())
+ return status;
+
+ _calculateMajorities();
+ _addInternalWriteConcernModes();
+ _isInitialized = true;
+ return Status::OK();
+}
+
+Status ReplicaSetConfig::_parseSettingsSubdocument(const BSONObj& settings) {
+ //
+ // Parse heartbeatTimeoutSecs
+ //
+ BSONElement hbTimeoutSecsElement = settings[kHeartbeatTimeoutFieldName];
+ if (hbTimeoutSecsElement.eoo()) {
+ _heartbeatTimeoutPeriod = Seconds(kDefaultHeartbeatTimeoutPeriod);
+ } else if (hbTimeoutSecsElement.isNumber()) {
+ _heartbeatTimeoutPeriod = Seconds(hbTimeoutSecsElement.numberInt());
+ } else {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected type of " << kSettingsFieldName << "."
+ << kHeartbeatTimeoutFieldName
+ << " to be a number, but found a value of type "
+ << typeName(hbTimeoutSecsElement.type()));
+ }
- //
- // Parse chainingAllowed
- //
- Status status = bsonExtractBooleanFieldWithDefault(settings,
- kChainingAllowedFieldName,
- true,
- &_chainingAllowed);
+ //
+ // Parse chainingAllowed
+ //
+ Status status = bsonExtractBooleanFieldWithDefault(
+ settings, kChainingAllowedFieldName, true, &_chainingAllowed);
+ if (!status.isOK())
+ return status;
+
+ //
+ // Parse getLastErrorDefaults
+ //
+ BSONElement gleDefaultsElement;
+ status = bsonExtractTypedField(
+ settings, kGetLastErrorDefaultsFieldName, Object, &gleDefaultsElement);
+ if (status.isOK()) {
+ status = _defaultWriteConcern.parse(gleDefaultsElement.Obj());
if (!status.isOK())
return status;
+ } else if (status == ErrorCodes::NoSuchKey) {
+ // Default write concern is w: 1.
+ _defaultWriteConcern.reset();
+ _defaultWriteConcern.wNumNodes = 1;
+ } else {
+ return status;
+ }
- //
- // Parse getLastErrorDefaults
- //
- BSONElement gleDefaultsElement;
- status = bsonExtractTypedField(settings,
- kGetLastErrorDefaultsFieldName,
- Object,
- &gleDefaultsElement);
- if (status.isOK()) {
- status = _defaultWriteConcern.parse(gleDefaultsElement.Obj());
- if (!status.isOK())
- return status;
- }
- else if (status == ErrorCodes::NoSuchKey) {
- // Default write concern is w: 1.
- _defaultWriteConcern.reset();
- _defaultWriteConcern.wNumNodes = 1;
- }
- else {
- return status;
- }
+ //
+ // Parse getLastErrorModes
+ //
+ BSONElement gleModesElement;
+ status = bsonExtractTypedField(settings, kGetLastErrorModesFieldName, Object, &gleModesElement);
+ BSONObj gleModes;
+ if (status.isOK()) {
+ gleModes = gleModesElement.Obj();
+ } else if (status != ErrorCodes::NoSuchKey) {
+ return status;
+ }
- //
- // Parse getLastErrorModes
- //
- BSONElement gleModesElement;
- status = bsonExtractTypedField(settings,
- kGetLastErrorModesFieldName,
- Object,
- &gleModesElement);
- BSONObj gleModes;
- if (status.isOK()) {
- gleModes = gleModesElement.Obj();
- }
- else if (status != ErrorCodes::NoSuchKey) {
- return status;
+ for (BSONObj::iterator gleModeIter(gleModes); gleModeIter.more();) {
+ const BSONElement modeElement = gleModeIter.next();
+ if (_customWriteConcernModes.find(modeElement.fieldNameStringData()) !=
+ _customWriteConcernModes.end()) {
+ return Status(ErrorCodes::DuplicateKey,
+ str::stream() << kSettingsFieldName << '.' << kGetLastErrorModesFieldName
+ << " contains multiple fields named "
+ << modeElement.fieldName());
+ }
+ if (modeElement.type() != Object) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected " << kSettingsFieldName << '.'
+ << kGetLastErrorModesFieldName << '.'
+ << modeElement.fieldName() << " to be an Object, not "
+ << typeName(modeElement.type()));
}
-
- for (BSONObj::iterator gleModeIter(gleModes); gleModeIter.more();) {
- const BSONElement modeElement = gleModeIter.next();
- if (_customWriteConcernModes.find(modeElement.fieldNameStringData()) !=
- _customWriteConcernModes.end()) {
-
- return Status(ErrorCodes::DuplicateKey, str::stream() << kSettingsFieldName <<
- '.' << kGetLastErrorModesFieldName <<
- " contains multiple fields named " << modeElement.fieldName());
+ ReplicaSetTagPattern pattern = _tagConfig.makePattern();
+ for (BSONObj::iterator constraintIter(modeElement.Obj()); constraintIter.more();) {
+ const BSONElement constraintElement = constraintIter.next();
+ if (!constraintElement.isNumber()) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream()
+ << "Expected " << kSettingsFieldName << '.'
+ << kGetLastErrorModesFieldName << '.' << modeElement.fieldName()
+ << '.' << constraintElement.fieldName() << " to be a number, not "
+ << typeName(constraintElement.type()));
}
- if (modeElement.type() != Object) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected " <<
- kSettingsFieldName << '.' << kGetLastErrorModesFieldName << '.' <<
- modeElement.fieldName() << " to be an Object, not " <<
- typeName(modeElement.type()));
+ const int minCount = constraintElement.numberInt();
+ if (minCount <= 0) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "Value of " << kSettingsFieldName << '.'
+ << kGetLastErrorModesFieldName << '.'
+ << modeElement.fieldName() << '.'
+ << constraintElement.fieldName()
+ << " must be positive, but found " << minCount);
}
- ReplicaSetTagPattern pattern = _tagConfig.makePattern();
- for (BSONObj::iterator constraintIter(modeElement.Obj()); constraintIter.more();) {
- const BSONElement constraintElement = constraintIter.next();
- if (!constraintElement.isNumber()) {
- return Status(ErrorCodes::TypeMismatch, str::stream() << "Expected " <<
- kSettingsFieldName << '.' << kGetLastErrorModesFieldName << '.' <<
- modeElement.fieldName() << '.' << constraintElement.fieldName() <<
- " to be a number, not " << typeName(constraintElement.type()));
- }
- const int minCount = constraintElement.numberInt();
- if (minCount <= 0) {
- return Status(ErrorCodes::BadValue, str::stream() << "Value of " <<
- kSettingsFieldName << '.' << kGetLastErrorModesFieldName << '.' <<
- modeElement.fieldName() << '.' << constraintElement.fieldName() <<
- " must be positive, but found " << minCount);
- }
- status = _tagConfig.addTagCountConstraintToPattern(
- &pattern,
- constraintElement.fieldNameStringData(),
- minCount);
- if (!status.isOK()) {
- return status;
- }
+ status = _tagConfig.addTagCountConstraintToPattern(
+ &pattern, constraintElement.fieldNameStringData(), minCount);
+ if (!status.isOK()) {
+ return status;
}
- _customWriteConcernModes[modeElement.fieldNameStringData()] = pattern;
}
- return Status::OK();
+ _customWriteConcernModes[modeElement.fieldNameStringData()] = pattern;
+ }
+ return Status::OK();
+}
+
+Status ReplicaSetConfig::validate() const {
+ if (_version <= 0 || _version > std::numeric_limits<int>::max()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kVersionFieldName << " field value of " << _version
+ << " is out of range");
+ }
+ if (_replSetName.empty()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "Replica set configuration must have non-empty "
+ << kIdFieldName << " field");
+ }
+ if (_heartbeatTimeoutPeriod < Seconds(0)) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kSettingsFieldName << '.' << kHeartbeatTimeoutFieldName
+ << " field value must be non-negative, "
+ "but found " << _heartbeatTimeoutPeriod.total_seconds());
+ }
+ if (_members.size() > kMaxMembers || _members.empty()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "Replica set configuration contains " << _members.size()
+ << " members, but must have at least 1 and no more than "
+ << kMaxMembers);
}
- Status ReplicaSetConfig::validate() const {
- if (_version <= 0 || _version > std::numeric_limits<int>::max()) {
- return Status(ErrorCodes::BadValue, str::stream() << kVersionFieldName <<
- " field value of " << _version << " is out of range");
- }
- if (_replSetName.empty()) {
- return Status(ErrorCodes::BadValue, str::stream() <<
- "Replica set configuration must have non-empty " << kIdFieldName <<
- " field");
+ size_t localhostCount = 0;
+ size_t voterCount = 0;
+ size_t arbiterCount = 0;
+ size_t electableCount = 0;
+ for (size_t i = 0; i < _members.size(); ++i) {
+ const MemberConfig& memberI = _members[i];
+ Status status = memberI.validate();
+ if (!status.isOK())
+ return status;
+ if (memberI.getHostAndPort().isLocalHost()) {
+ ++localhostCount;
}
- if (_heartbeatTimeoutPeriod < Seconds(0)) {
- return Status(ErrorCodes::BadValue, str::stream() << kSettingsFieldName << '.' <<
- kHeartbeatTimeoutFieldName << " field value must be non-negative, "
- "but found " << _heartbeatTimeoutPeriod.total_seconds());
+ if (memberI.isVoter()) {
+ ++voterCount;
}
- if (_members.size() > kMaxMembers || _members.empty()) {
- return Status(ErrorCodes::BadValue, str::stream() <<
- "Replica set configuration contains " << _members.size() <<
- " members, but must have at least 1 and no more than " << kMaxMembers);
+ // Nodes may be arbiters or electable, or neither, but never both.
+ if (memberI.isArbiter()) {
+ ++arbiterCount;
+ } else if (memberI.getPriority() > 0) {
+ ++electableCount;
}
-
- size_t localhostCount = 0;
- size_t voterCount = 0;
- size_t arbiterCount = 0;
- size_t electableCount = 0;
- for (size_t i = 0; i < _members.size(); ++i) {
- const MemberConfig& memberI = _members[i];
- Status status = memberI.validate();
- if (!status.isOK())
- return status;
- if (memberI.getHostAndPort().isLocalHost()) {
- ++localhostCount;
- }
- if (memberI.isVoter()) {
- ++voterCount;
- }
- // Nodes may be arbiters or electable, or neither, but never both.
- if (memberI.isArbiter()) {
- ++arbiterCount;
- }
- else if (memberI.getPriority() > 0) {
- ++electableCount;
+ for (size_t j = 0; j < _members.size(); ++j) {
+ if (i == j)
+ continue;
+ const MemberConfig& memberJ = _members[j];
+ if (memberI.getId() == memberJ.getId()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream()
+ << "Found two member configurations with same "
+ << MemberConfig::kIdFieldName << " field, " << kMembersFieldName
+ << "." << i << "." << MemberConfig::kIdFieldName
+ << " == " << kMembersFieldName << "." << j << "."
+ << MemberConfig::kIdFieldName << " == " << memberI.getId());
}
- for (size_t j = 0; j < _members.size(); ++j) {
- if (i == j)
- continue;
- const MemberConfig& memberJ = _members[j];
- if (memberI.getId() == memberJ.getId()) {
- return Status(
- ErrorCodes::BadValue, str::stream() <<
- "Found two member configurations with same " <<
- MemberConfig::kIdFieldName << " field, " <<
- kMembersFieldName << "." << i << "." << MemberConfig::kIdFieldName <<
- " == " <<
- kMembersFieldName << "." << j << "." << MemberConfig::kIdFieldName <<
- " == " << memberI.getId());
- }
- if (memberI.getHostAndPort() == memberJ.getHostAndPort()) {
- return Status(
- ErrorCodes::BadValue, str::stream() <<
- "Found two member configurations with same " <<
- MemberConfig::kHostFieldName << " field, " <<
- kMembersFieldName << "." << i << "." << MemberConfig::kHostFieldName <<
- " == " <<
- kMembersFieldName << "." << j << "." << MemberConfig::kHostFieldName <<
- " == " << memberI.getHostAndPort().toString());
- }
+ if (memberI.getHostAndPort() == memberJ.getHostAndPort()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "Found two member configurations with same "
+ << MemberConfig::kHostFieldName << " field, "
+ << kMembersFieldName << "." << i << "."
+ << MemberConfig::kHostFieldName
+ << " == " << kMembersFieldName << "." << j << "."
+ << MemberConfig::kHostFieldName
+ << " == " << memberI.getHostAndPort().toString());
}
}
+ }
- if (localhostCount != 0 && localhostCount != _members.size()) {
- return Status(ErrorCodes::BadValue, str::stream() <<
- "Either all host names in a replica set configuration must be localhost "
- "references, or none must be; found " << localhostCount << " out of " <<
- _members.size());
- }
+ if (localhostCount != 0 && localhostCount != _members.size()) {
+ return Status(
+ ErrorCodes::BadValue,
+ str::stream()
+ << "Either all host names in a replica set configuration must be localhost "
+ "references, or none must be; found " << localhostCount << " out of "
+ << _members.size());
+ }
- if (voterCount > kMaxVotingMembers || voterCount == 0) {
- return Status(ErrorCodes::BadValue, str::stream() <<
- "Replica set configuration contains " << voterCount <<
- " voting members, but must be at least 1 and no more than " <<
- kMaxVotingMembers);
- }
+ if (voterCount > kMaxVotingMembers || voterCount == 0) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "Replica set configuration contains " << voterCount
+ << " voting members, but must be at least 1 and no more than "
+ << kMaxVotingMembers);
+ }
- if (electableCount == 0) {
- return Status(ErrorCodes::BadValue, "Replica set configuration must contain at least "
- "one non-arbiter member with priority > 0");
- }
+ if (electableCount == 0) {
+ return Status(ErrorCodes::BadValue,
+ "Replica set configuration must contain at least "
+ "one non-arbiter member with priority > 0");
+ }
- // TODO(schwerin): Validate satisfiability of write modes? Omitting for backwards
- // compatibility.
- if (_defaultWriteConcern.wMode.empty()) {
- if (_defaultWriteConcern.wNumNodes == 0) {
- return Status(ErrorCodes::BadValue,
- "Default write concern mode must wait for at least 1 member");
- }
+ // TODO(schwerin): Validate satisfiability of write modes? Omitting for backwards
+ // compatibility.
+ if (_defaultWriteConcern.wMode.empty()) {
+ if (_defaultWriteConcern.wNumNodes == 0) {
+ return Status(ErrorCodes::BadValue,
+ "Default write concern mode must wait for at least 1 member");
}
- else {
- if ("majority" != _defaultWriteConcern.wMode &&
- !findCustomWriteMode(_defaultWriteConcern.wMode).isOK()) {
- return Status(ErrorCodes::BadValue, str::stream() <<
- "Default write concern requires undefined write mode " <<
- _defaultWriteConcern.wMode);
- }
+ } else {
+ if ("majority" != _defaultWriteConcern.wMode &&
+ !findCustomWriteMode(_defaultWriteConcern.wMode).isOK()) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "Default write concern requires undefined write mode "
+ << _defaultWriteConcern.wMode);
}
-
- return Status::OK();
}
- Status ReplicaSetConfig::checkIfWriteConcernCanBeSatisfied(
- const WriteConcernOptions& writeConcern) const {
- if (!writeConcern.wMode.empty() && writeConcern.wMode != "majority") {
- StatusWith<ReplicaSetTagPattern> tagPatternStatus =
- findCustomWriteMode(writeConcern.wMode);
- if (!tagPatternStatus.isOK()) {
- return tagPatternStatus.getStatus();
- }
-
- ReplicaSetTagMatch matcher(tagPatternStatus.getValue());
- for (size_t j = 0; j < _members.size(); ++j) {
- const MemberConfig& memberConfig = _members[j];
- for (MemberConfig::TagIterator it = memberConfig.tagsBegin();
- it != memberConfig.tagsEnd(); ++it) {
- if (matcher.update(*it)) {
- return Status::OK();
- }
+ return Status::OK();
+}
+
+Status ReplicaSetConfig::checkIfWriteConcernCanBeSatisfied(
+ const WriteConcernOptions& writeConcern) const {
+ if (!writeConcern.wMode.empty() && writeConcern.wMode != "majority") {
+ StatusWith<ReplicaSetTagPattern> tagPatternStatus = findCustomWriteMode(writeConcern.wMode);
+ if (!tagPatternStatus.isOK()) {
+ return tagPatternStatus.getStatus();
+ }
+
+ ReplicaSetTagMatch matcher(tagPatternStatus.getValue());
+ for (size_t j = 0; j < _members.size(); ++j) {
+ const MemberConfig& memberConfig = _members[j];
+ for (MemberConfig::TagIterator it = memberConfig.tagsBegin();
+ it != memberConfig.tagsEnd();
+ ++it) {
+ if (matcher.update(*it)) {
+ return Status::OK();
}
}
- // Even if all the nodes in the set had a given write it still would not satisfy this
- // write concern mode.
- return Status(ErrorCodes::CannotSatisfyWriteConcern,
- str::stream() << "Not enough nodes match write concern mode \""
- << writeConcern.wMode << "\"");
}
- else {
- int nodesRemaining = writeConcern.wNumNodes;
- for (size_t j = 0; j < _members.size(); ++j) {
- if (!_members[j].isArbiter()) { // Only count data-bearing nodes
- --nodesRemaining;
- if (nodesRemaining <= 0) {
- return Status::OK();
- }
+ // Even if all the nodes in the set had a given write it still would not satisfy this
+ // write concern mode.
+ return Status(ErrorCodes::CannotSatisfyWriteConcern,
+ str::stream() << "Not enough nodes match write concern mode \""
+ << writeConcern.wMode << "\"");
+ } else {
+ int nodesRemaining = writeConcern.wNumNodes;
+ for (size_t j = 0; j < _members.size(); ++j) {
+ if (!_members[j].isArbiter()) { // Only count data-bearing nodes
+ --nodesRemaining;
+ if (nodesRemaining <= 0) {
+ return Status::OK();
}
}
- return Status(ErrorCodes::CannotSatisfyWriteConcern, "Not enough data-bearing nodes");
}
+ return Status(ErrorCodes::CannotSatisfyWriteConcern, "Not enough data-bearing nodes");
}
+}
- const MemberConfig& ReplicaSetConfig::getMemberAt(size_t i) const {
- invariant(i < _members.size());
- return _members[i];
- }
+const MemberConfig& ReplicaSetConfig::getMemberAt(size_t i) const {
+ invariant(i < _members.size());
+ return _members[i];
+}
- const MemberConfig* ReplicaSetConfig::findMemberByID(int id) const {
- for (std::vector<MemberConfig>::const_iterator it = _members.begin();
- it != _members.end(); ++it) {
- if (it->getId() == id) {
- return &(*it);
- }
+const MemberConfig* ReplicaSetConfig::findMemberByID(int id) const {
+ for (std::vector<MemberConfig>::const_iterator it = _members.begin(); it != _members.end();
+ ++it) {
+ if (it->getId() == id) {
+ return &(*it);
}
- return NULL;
}
+ return NULL;
+}
- const int ReplicaSetConfig::findMemberIndexByHostAndPort(const HostAndPort& hap) const {
- int x = 0;
- for (std::vector<MemberConfig>::const_iterator it = _members.begin();
- it != _members.end(); ++it) {
-
- if (it->getHostAndPort() == hap) {
- return x;
- }
- ++x;
+const int ReplicaSetConfig::findMemberIndexByHostAndPort(const HostAndPort& hap) const {
+ int x = 0;
+ for (std::vector<MemberConfig>::const_iterator it = _members.begin(); it != _members.end();
+ ++it) {
+ if (it->getHostAndPort() == hap) {
+ return x;
}
- return -1;
+ ++x;
}
-
- const MemberConfig* ReplicaSetConfig::findMemberByHostAndPort(const HostAndPort& hap) const {
- int idx = findMemberIndexByHostAndPort(hap);
- return idx != -1 ? &getMemberAt(idx) : NULL;
+ return -1;
+}
+
+const MemberConfig* ReplicaSetConfig::findMemberByHostAndPort(const HostAndPort& hap) const {
+ int idx = findMemberIndexByHostAndPort(hap);
+ return idx != -1 ? &getMemberAt(idx) : NULL;
+}
+
+ReplicaSetTag ReplicaSetConfig::findTag(const StringData& key, const StringData& value) const {
+ return _tagConfig.findTag(key, value);
+}
+
+StatusWith<ReplicaSetTagPattern> ReplicaSetConfig::findCustomWriteMode(
+ const StringData& patternName) const {
+ const StringMap<ReplicaSetTagPattern>::const_iterator iter =
+ _customWriteConcernModes.find(patternName);
+ if (iter == _customWriteConcernModes.end()) {
+ return StatusWith<ReplicaSetTagPattern>(
+ ErrorCodes::UnknownReplWriteConcern,
+ str::stream() << "No write concern mode named '" << escape(patternName.toString())
+ << "' found in replica set configuration");
}
-
- ReplicaSetTag ReplicaSetConfig::findTag(const StringData& key, const StringData& value) const {
- return _tagConfig.findTag(key, value);
+ return StatusWith<ReplicaSetTagPattern>(iter->second);
+}
+
+void ReplicaSetConfig::_calculateMajorities() {
+ const int voters = std::count_if(_members.begin(),
+ _members.end(),
+ stdx::bind(&MemberConfig::isVoter, stdx::placeholders::_1));
+ const int arbiters =
+ std::count_if(_members.begin(),
+ _members.end(),
+ stdx::bind(&MemberConfig::isArbiter, stdx::placeholders::_1));
+ _totalVotingMembers = voters;
+ _majorityVoteCount = voters / 2 + 1;
+ _writeMajority = std::min(_majorityVoteCount, voters - arbiters);
+}
+
+void ReplicaSetConfig::_addInternalWriteConcernModes() {
+ // $majority: the majority of voting nodes or all non-arbiter voting nodes if
+ // the majority of voting nodes are arbiters.
+ ReplicaSetTagPattern pattern = _tagConfig.makePattern();
+
+ Status status = _tagConfig.addTagCountConstraintToPattern(
+ &pattern, MemberConfig::kInternalVoterTagName, _writeMajority);
+
+ if (status.isOK()) {
+ _customWriteConcernModes[kMajorityWriteConcernModeName] = pattern;
+ } else if (status != ErrorCodes::NoSuchKey) {
+ // NoSuchKey means we have no $voter-tagged nodes in this config;
+ // other errors are unexpected.
+ fassert(28693, status);
}
- StatusWith<ReplicaSetTagPattern> ReplicaSetConfig::findCustomWriteMode(
- const StringData& patternName) const {
-
- const StringMap<ReplicaSetTagPattern>::const_iterator iter = _customWriteConcernModes.find(
- patternName);
- if (iter == _customWriteConcernModes.end()) {
- return StatusWith<ReplicaSetTagPattern>(
- ErrorCodes::UnknownReplWriteConcern,
- str::stream() <<
- "No write concern mode named '" << escape(patternName.toString()) <<
- "' found in replica set configuration");
- }
- return StatusWith<ReplicaSetTagPattern>(iter->second);
+ // $stepDownCheck: one electable node plus ourselves
+ pattern = _tagConfig.makePattern();
+ status = _tagConfig.addTagCountConstraintToPattern(
+ &pattern, MemberConfig::kInternalElectableTagName, 2);
+ if (status.isOK()) {
+ _customWriteConcernModes[kStepDownCheckWriteConcernModeName] = pattern;
+ } else if (status != ErrorCodes::NoSuchKey) {
+ // NoSuchKey means we have no $electable-tagged nodes in this config;
+ // other errors are unexpected
+ fassert(28694, status);
}
+}
- void ReplicaSetConfig::_calculateMajorities() {
- const int voters = std::count_if(
- _members.begin(),
- _members.end(),
- stdx::bind(&MemberConfig::isVoter, stdx::placeholders::_1));
- const int arbiters = std::count_if(
- _members.begin(),
- _members.end(),
- stdx::bind(&MemberConfig::isArbiter, stdx::placeholders::_1));
- _totalVotingMembers = voters;
- _majorityVoteCount = voters / 2 + 1;
- _writeMajority = std::min(_majorityVoteCount, voters - arbiters);
- }
+BSONObj ReplicaSetConfig::toBSON() const {
+ BSONObjBuilder configBuilder;
+ configBuilder.append("_id", _replSetName);
+ configBuilder.appendIntOrLL("version", _version);
- void ReplicaSetConfig::_addInternalWriteConcernModes() {
- // $majority: the majority of voting nodes or all non-arbiter voting nodes if
- // the majority of voting nodes are arbiters.
- ReplicaSetTagPattern pattern = _tagConfig.makePattern();
-
- Status status = _tagConfig.addTagCountConstraintToPattern(
- &pattern,
- MemberConfig::kInternalVoterTagName,
- _writeMajority);
-
- if (status.isOK()) {
- _customWriteConcernModes[kMajorityWriteConcernModeName] = pattern;
- }
- else if (status != ErrorCodes::NoSuchKey) {
- // NoSuchKey means we have no $voter-tagged nodes in this config;
- // other errors are unexpected.
- fassert(28693, status);
- }
-
- // $stepDownCheck: one electable node plus ourselves
- pattern = _tagConfig.makePattern();
- status = _tagConfig.addTagCountConstraintToPattern(&pattern,
- MemberConfig::kInternalElectableTagName,
- 2);
- if (status.isOK()) {
- _customWriteConcernModes[kStepDownCheckWriteConcernModeName] = pattern;
- }
- else if (status != ErrorCodes::NoSuchKey) {
- // NoSuchKey means we have no $electable-tagged nodes in this config;
- // other errors are unexpected
- fassert(28694, status);
- }
+ BSONArrayBuilder members(configBuilder.subarrayStart("members"));
+ for (MemberIterator mem = membersBegin(); mem != membersEnd(); mem++) {
+ members.append(mem->toBSON(getTagConfig()));
}
-
- BSONObj ReplicaSetConfig::toBSON() const {
- BSONObjBuilder configBuilder;
- configBuilder.append("_id", _replSetName);
- configBuilder.appendIntOrLL("version", _version);
-
- BSONArrayBuilder members(configBuilder.subarrayStart("members"));
- for (MemberIterator mem = membersBegin(); mem != membersEnd(); mem++) {
- members.append(mem->toBSON(getTagConfig()));
- }
- members.done();
-
- BSONObjBuilder settingsBuilder(configBuilder.subobjStart("settings"));
- settingsBuilder.append("chainingAllowed", _chainingAllowed);
- settingsBuilder.append("heartbeatTimeoutSecs", _heartbeatTimeoutPeriod.total_seconds());
-
- BSONObjBuilder gleModes(settingsBuilder.subobjStart("getLastErrorModes"));
- for (StringMap<ReplicaSetTagPattern>::const_iterator mode =
- _customWriteConcernModes.begin();
- mode != _customWriteConcernModes.end();
- ++mode) {
- if (mode->first[0] == '$') {
- // Filter out internal modes
- continue;
- }
- BSONObjBuilder modeBuilder(gleModes.subobjStart(mode->first));
- for (ReplicaSetTagPattern::ConstraintIterator itr = mode->second.constraintsBegin();
- itr != mode->second.constraintsEnd();
- itr++) {
- modeBuilder.append(_tagConfig.getTagKey(ReplicaSetTag(itr->getKeyIndex(), 0)),
- itr->getMinCount());
- }
- modeBuilder.done();
- }
- gleModes.done();
-
- settingsBuilder.append("getLastErrorDefaults", _defaultWriteConcern.toBSON());
- settingsBuilder.done();
- return configBuilder.obj();
+ members.done();
+
+ BSONObjBuilder settingsBuilder(configBuilder.subobjStart("settings"));
+ settingsBuilder.append("chainingAllowed", _chainingAllowed);
+ settingsBuilder.append("heartbeatTimeoutSecs", _heartbeatTimeoutPeriod.total_seconds());
+
+ BSONObjBuilder gleModes(settingsBuilder.subobjStart("getLastErrorModes"));
+ for (StringMap<ReplicaSetTagPattern>::const_iterator mode = _customWriteConcernModes.begin();
+ mode != _customWriteConcernModes.end();
+ ++mode) {
+ if (mode->first[0] == '$') {
+ // Filter out internal modes
+ continue;
+ }
+ BSONObjBuilder modeBuilder(gleModes.subobjStart(mode->first));
+ for (ReplicaSetTagPattern::ConstraintIterator itr = mode->second.constraintsBegin();
+ itr != mode->second.constraintsEnd();
+ itr++) {
+ modeBuilder.append(_tagConfig.getTagKey(ReplicaSetTag(itr->getKeyIndex(), 0)),
+ itr->getMinCount());
+ }
+ modeBuilder.done();
}
-
- std::vector<std::string> ReplicaSetConfig::getWriteConcernNames() const {
- std::vector<std::string> names;
- for (StringMap<ReplicaSetTagPattern>::const_iterator mode =
- _customWriteConcernModes.begin();
- mode != _customWriteConcernModes.end();
- ++mode) {
- names.push_back(mode->first);
- }
- return names;
- }
+ gleModes.done();
+
+ settingsBuilder.append("getLastErrorDefaults", _defaultWriteConcern.toBSON());
+ settingsBuilder.done();
+ return configBuilder.obj();
+}
+
+std::vector<std::string> ReplicaSetConfig::getWriteConcernNames() const {
+ std::vector<std::string> names;
+ for (StringMap<ReplicaSetTagPattern>::const_iterator mode = _customWriteConcernModes.begin();
+ mode != _customWriteConcernModes.end();
+ ++mode) {
+ names.push_back(mode->first);
+ }
+ return names;
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replica_set_config.h b/src/mongo/db/repl/replica_set_config.h
index 4baa96adbcf..2ff35b971db 100644
--- a/src/mongo/db/repl/replica_set_config.h
+++ b/src/mongo/db/repl/replica_set_config.h
@@ -41,208 +41,236 @@
namespace mongo {
- class BSONObj;
+class BSONObj;
namespace repl {
+/**
+ * Representation of the configuration information about a particular replica set.
+ */
+class ReplicaSetConfig {
+public:
+ typedef std::vector<MemberConfig>::const_iterator MemberIterator;
+
+ static const std::string kIdFieldName;
+ static const std::string kVersionFieldName;
+ static const std::string kMembersFieldName;
+ static const std::string kSettingsFieldName;
+ static const std::string kMajorityWriteConcernModeName;
+ static const std::string kStepDownCheckWriteConcernModeName;
+
+ static const size_t kMaxMembers = 50;
+ static const size_t kMaxVotingMembers = 7;
+ static const Seconds kDefaultHeartbeatTimeoutPeriod;
+
+ ReplicaSetConfig();
+ std::string asBson() {
+ return "";
+ }
+ /**
+ * Initializes this ReplicaSetConfig from the contents of "cfg".
+ */
+ Status initialize(const BSONObj& cfg);
+
+ /**
+ * Returns true if this object has been successfully initialized or copied from
+ * an initialized object.
+ */
+ bool isInitialized() const {
+ return _isInitialized;
+ }
+
+ /**
+ * Performs basic consistency checks on the replica set configuration.
+ */
+ Status validate() const;
+
+ /**
+ * Checks if this configuration can satisfy the given write concern.
+ *
+ * Things that are taken into consideration include:
+ * 1. If the set has enough data-bearing members.
+ * 2. If the write concern mode exists.
+ * 3. If there are enough members for the write concern mode specified.
+ */
+ Status checkIfWriteConcernCanBeSatisfied(const WriteConcernOptions& writeConcern) const;
+
+ /**
+ * Gets the version of this configuration.
+ *
+ * The version number sequences configurations of the replica set, so that
+ * nodes may distinguish between "older" and "newer" configurations.
+ */
+ long long getConfigVersion() const {
+ return _version;
+ }
+
+ /**
+ * Gets the name (_id field value) of the replica set described by this configuration.
+ */
+ const std::string& getReplSetName() const {
+ return _replSetName;
+ }
+
+ /**
+ * Gets the number of members in this configuration.
+ */
+ int getNumMembers() const {
+ return _members.size();
+ }
+
+ /**
+ * Gets a begin iterator over the MemberConfigs stored in this ReplicaSetConfig.
+ */
+ MemberIterator membersBegin() const {
+ return _members.begin();
+ }
+
+ /**
+ * Gets an end iterator over the MemberConfigs stored in this ReplicaSetConfig.
+ */
+ MemberIterator membersEnd() const {
+ return _members.end();
+ }
+
+ /**
+ * Access a MemberConfig element by index.
+ */
+ const MemberConfig& getMemberAt(size_t i) const;
+
+ /**
+ * Returns a pointer to the MemberConfig corresponding to the member with the given _id in
+ * the config, or NULL if there is no member with that ID.
+ */
+ const MemberConfig* findMemberByID(int id) const;
+
+ /**
+ * Returns a pointer to the MemberConfig corresponding to the member with the given
+ * HostAndPort in the config, or NULL if there is no member with that address.
+ */
+ const MemberConfig* findMemberByHostAndPort(const HostAndPort& hap) const;
+
+ /**
+ * Returns a MemberConfig index position corresponding to the member with the given
+ * HostAndPort in the config, or -1 if there is no member with that address.
+ */
+ const int findMemberIndexByHostAndPort(const HostAndPort& hap) const;
+
+ /**
+ * Gets the default write concern for the replica set described by this configuration.
+ */
+ const WriteConcernOptions& getDefaultWriteConcern() const {
+ return _defaultWriteConcern;
+ }
+
+ /**
+ * Gets the amount of time to wait for a response to hearbeats sent to other
+ * nodes in the replica set.
+ */
+ Seconds getHeartbeatTimeoutPeriod() const {
+ return _heartbeatTimeoutPeriod;
+ }
+
+ /**
+ * Gets the amount of time to wait for a response to hearbeats sent to other
+ * nodes in the replica set, as above, but returns a Milliseconds instead of
+ * Seconds object.
+ */
+ Milliseconds getHeartbeatTimeoutPeriodMillis() const {
+ return Milliseconds(_heartbeatTimeoutPeriod.total_milliseconds());
+ }
+
+ /**
+ * Gets the number of votes required to win an election.
+ */
+ int getMajorityVoteCount() const {
+ return _majorityVoteCount;
+ }
+
+ /**
+ * Gets the number of voters.
+ */
+ int getTotalVotingMembers() const {
+ return _totalVotingMembers;
+ }
+
+ /**
+ * Returns true if automatic (not explicitly set) chaining is allowed.
+ */
+ bool isChainingAllowed() const {
+ return _chainingAllowed;
+ }
+
+ /**
+ * Returns a ReplicaSetTag with the given "key" and "value", or an invalid
+ * tag if the configuration describes no such tag.
+ */
+ ReplicaSetTag findTag(const StringData& key, const StringData& value) const;
+
+ /**
+ * Returns the pattern corresponding to "patternName" in this configuration.
+ * If "patternName" is not a valid pattern in this configuration, returns
+ * ErrorCodes::NoSuchKey.
+ */
+ StatusWith<ReplicaSetTagPattern> findCustomWriteMode(const StringData& patternName) const;
+
+ /**
+ * Returns the "tags configuration" for this replicaset.
+ *
+ * NOTE(schwerin): Not clear if this should be used other than for reporting/debugging.
+ */
+ const ReplicaSetTagConfig& getTagConfig() const {
+ return _tagConfig;
+ }
+
+ /**
+ * Returns the config as a BSONObj.
+ */
+ BSONObj toBSON() const;
+
+ /**
+ * Returns a vector of strings which are the names of the WriteConcernModes.
+ * Currently used in unit tests to compare two configs.
+ */
+ std::vector<std::string> getWriteConcernNames() const;
+
/**
- * Representation of the configuration information about a particular replica set.
- */
- class ReplicaSetConfig {
- public:
- typedef std::vector<MemberConfig>::const_iterator MemberIterator;
-
- static const std::string kIdFieldName;
- static const std::string kVersionFieldName;
- static const std::string kMembersFieldName;
- static const std::string kSettingsFieldName;
- static const std::string kMajorityWriteConcernModeName;
- static const std::string kStepDownCheckWriteConcernModeName;
-
- static const size_t kMaxMembers = 50;
- static const size_t kMaxVotingMembers = 7;
- static const Seconds kDefaultHeartbeatTimeoutPeriod;
-
- ReplicaSetConfig();
- std::string asBson() { return ""; }
- /**
- * Initializes this ReplicaSetConfig from the contents of "cfg".
- */
- Status initialize(const BSONObj& cfg);
-
- /**
- * Returns true if this object has been successfully initialized or copied from
- * an initialized object.
- */
- bool isInitialized() const { return _isInitialized; }
-
- /**
- * Performs basic consistency checks on the replica set configuration.
- */
- Status validate() const;
-
- /**
- * Checks if this configuration can satisfy the given write concern.
- *
- * Things that are taken into consideration include:
- * 1. If the set has enough data-bearing members.
- * 2. If the write concern mode exists.
- * 3. If there are enough members for the write concern mode specified.
- */
- Status checkIfWriteConcernCanBeSatisfied(const WriteConcernOptions& writeConcern) const;
-
- /**
- * Gets the version of this configuration.
- *
- * The version number sequences configurations of the replica set, so that
- * nodes may distinguish between "older" and "newer" configurations.
- */
- long long getConfigVersion() const { return _version; }
-
- /**
- * Gets the name (_id field value) of the replica set described by this configuration.
- */
- const std::string& getReplSetName() const { return _replSetName; }
-
- /**
- * Gets the number of members in this configuration.
- */
- int getNumMembers() const { return _members.size(); }
-
- /**
- * Gets a begin iterator over the MemberConfigs stored in this ReplicaSetConfig.
- */
- MemberIterator membersBegin() const { return _members.begin(); }
-
- /**
- * Gets an end iterator over the MemberConfigs stored in this ReplicaSetConfig.
- */
- MemberIterator membersEnd() const { return _members.end(); }
-
- /**
- * Access a MemberConfig element by index.
- */
- const MemberConfig& getMemberAt(size_t i) const;
-
- /**
- * Returns a pointer to the MemberConfig corresponding to the member with the given _id in
- * the config, or NULL if there is no member with that ID.
- */
- const MemberConfig* findMemberByID(int id) const;
-
- /**
- * Returns a pointer to the MemberConfig corresponding to the member with the given
- * HostAndPort in the config, or NULL if there is no member with that address.
- */
- const MemberConfig* findMemberByHostAndPort(const HostAndPort& hap) const;
-
- /**
- * Returns a MemberConfig index position corresponding to the member with the given
- * HostAndPort in the config, or -1 if there is no member with that address.
- */
- const int findMemberIndexByHostAndPort(const HostAndPort& hap) const;
-
- /**
- * Gets the default write concern for the replica set described by this configuration.
- */
- const WriteConcernOptions& getDefaultWriteConcern() const { return _defaultWriteConcern; }
-
- /**
- * Gets the amount of time to wait for a response to hearbeats sent to other
- * nodes in the replica set.
- */
- Seconds getHeartbeatTimeoutPeriod() const { return _heartbeatTimeoutPeriod; }
-
- /**
- * Gets the amount of time to wait for a response to hearbeats sent to other
- * nodes in the replica set, as above, but returns a Milliseconds instead of
- * Seconds object.
- */
- Milliseconds getHeartbeatTimeoutPeriodMillis() const {
- return Milliseconds(_heartbeatTimeoutPeriod.total_milliseconds());
- }
-
- /**
- * Gets the number of votes required to win an election.
- */
- int getMajorityVoteCount() const { return _majorityVoteCount; }
-
- /**
- * Gets the number of voters.
- */
- int getTotalVotingMembers() const { return _totalVotingMembers; }
-
- /**
- * Returns true if automatic (not explicitly set) chaining is allowed.
- */
- bool isChainingAllowed() const { return _chainingAllowed; }
-
- /**
- * Returns a ReplicaSetTag with the given "key" and "value", or an invalid
- * tag if the configuration describes no such tag.
- */
- ReplicaSetTag findTag(const StringData& key, const StringData& value) const;
-
- /**
- * Returns the pattern corresponding to "patternName" in this configuration.
- * If "patternName" is not a valid pattern in this configuration, returns
- * ErrorCodes::NoSuchKey.
- */
- StatusWith<ReplicaSetTagPattern> findCustomWriteMode(const StringData& patternName) const;
-
- /**
- * Returns the "tags configuration" for this replicaset.
- *
- * NOTE(schwerin): Not clear if this should be used other than for reporting/debugging.
- */
- const ReplicaSetTagConfig& getTagConfig() const { return _tagConfig; }
-
- /**
- * Returns the config as a BSONObj.
- */
- BSONObj toBSON() const;
-
- /**
- * Returns a vector of strings which are the names of the WriteConcernModes.
- * Currently used in unit tests to compare two configs.
- */
- std::vector<std::string> getWriteConcernNames() const;
-
- /**
- * Returns the number of voting data-bearing members that must acknowledge a write
- * in order to satisfy a write concern of {w: "majority"}.
- */
- int getWriteMajority() const { return _writeMajority; }
-
- private:
- /**
- * Parses the "settings" subdocument of a replica set configuration.
- */
- Status _parseSettingsSubdocument(const BSONObj& settings);
-
- /**
- * Calculates and stores the majority for electing a primary (_majorityVoteCount).
- */
- void _calculateMajorities();
-
- /**
- * Adds internal write concern modes to the getLastErrorModes list.
- */
- void _addInternalWriteConcernModes();
-
- bool _isInitialized;
- long long _version;
- std::string _replSetName;
- std::vector<MemberConfig> _members;
- WriteConcernOptions _defaultWriteConcern;
- Seconds _heartbeatTimeoutPeriod;
- bool _chainingAllowed;
- int _majorityVoteCount;
- int _writeMajority;
- int _totalVotingMembers;
- ReplicaSetTagConfig _tagConfig;
- StringMap<ReplicaSetTagPattern> _customWriteConcernModes;
- };
+ * Returns the number of voting data-bearing members that must acknowledge a write
+ * in order to satisfy a write concern of {w: "majority"}.
+ */
+ int getWriteMajority() const {
+ return _writeMajority;
+ }
+
+private:
+ /**
+ * Parses the "settings" subdocument of a replica set configuration.
+ */
+ Status _parseSettingsSubdocument(const BSONObj& settings);
+
+ /**
+ * Calculates and stores the majority for electing a primary (_majorityVoteCount).
+ */
+ void _calculateMajorities();
+
+ /**
+ * Adds internal write concern modes to the getLastErrorModes list.
+ */
+ void _addInternalWriteConcernModes();
+
+ bool _isInitialized;
+ long long _version;
+ std::string _replSetName;
+ std::vector<MemberConfig> _members;
+ WriteConcernOptions _defaultWriteConcern;
+ Seconds _heartbeatTimeoutPeriod;
+ bool _chainingAllowed;
+ int _majorityVoteCount;
+ int _writeMajority;
+ int _totalVotingMembers;
+ ReplicaSetTagConfig _tagConfig;
+ StringMap<ReplicaSetTagPattern> _customWriteConcernModes;
+};
} // namespace repl
diff --git a/src/mongo/db/repl/replica_set_config_checks.cpp b/src/mongo/db/repl/replica_set_config_checks.cpp
index 7b97d3679a3..6b972063c6a 100644
--- a/src/mongo/db/repl/replica_set_config_checks.cpp
+++ b/src/mongo/db/repl/replica_set_config_checks.cpp
@@ -40,247 +40,234 @@ namespace mongo {
namespace repl {
namespace {
- /**
- * Finds the index of the one member configuration in "newConfig" that corresponds
- * to the current node (as identified by "externalState").
- *
- * Returns an error if the current node does not appear or appears multiple times in
- * "newConfig".
- */
- StatusWith<int> findSelfInConfig(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& newConfig) {
-
- std::vector<ReplicaSetConfig::MemberIterator> meConfigs;
- for (ReplicaSetConfig::MemberIterator iter = newConfig.membersBegin();
- iter != newConfig.membersEnd();
- ++iter) {
- if (externalState->isSelf(iter->getHostAndPort())) {
- meConfigs.push_back(iter);
- }
- }
- if (meConfigs.empty()) {
- return StatusWith<int>(ErrorCodes::NodeNotFound, str::stream() <<
- "No host described in new configuration " <<
- newConfig.getConfigVersion() << " for replica set " <<
- newConfig.getReplSetName() << " maps to this node");
- }
- if (meConfigs.size() > 1) {
- str::stream message;
- message << "The hosts " << meConfigs.front()->getHostAndPort().toString();
- for (size_t i = 1; i < meConfigs.size() - 1; ++i) {
- message << ", " << meConfigs[i]->getHostAndPort().toString();
- }
- message << " and " << meConfigs.back()->getHostAndPort().toString() <<
- " all map to this node in new configuration version " <<
- newConfig.getConfigVersion() << " for replica set " << newConfig.getReplSetName();
- return StatusWith<int>(ErrorCodes::DuplicateKey, message);
+/**
+ * Finds the index of the one member configuration in "newConfig" that corresponds
+ * to the current node (as identified by "externalState").
+ *
+ * Returns an error if the current node does not appear or appears multiple times in
+ * "newConfig".
+ */
+StatusWith<int> findSelfInConfig(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& newConfig) {
+ std::vector<ReplicaSetConfig::MemberIterator> meConfigs;
+ for (ReplicaSetConfig::MemberIterator iter = newConfig.membersBegin();
+ iter != newConfig.membersEnd();
+ ++iter) {
+ if (externalState->isSelf(iter->getHostAndPort())) {
+ meConfigs.push_back(iter);
}
-
- int myIndex = std::distance(newConfig.membersBegin(), meConfigs.front());
- invariant(myIndex >= 0 && myIndex < newConfig.getNumMembers());
- return StatusWith<int>(myIndex);
}
-
- /**
- * Checks if the node with the given config index is electable, returning a useful
- * status message if not.
- */
- Status checkElectable(const ReplicaSetConfig& newConfig, int configIndex) {
- const MemberConfig& myConfig = newConfig.getMemberAt(configIndex);
- if (!myConfig.isElectable()) {
- return Status(
- ErrorCodes::NodeNotElectable, str::stream() <<
- "This node, " << myConfig.getHostAndPort().toString() << ", with _id " <<
- myConfig.getId() << " is not electable under the new configuration version " <<
- newConfig.getConfigVersion() << " for replica set " <<
- newConfig.getReplSetName());
- }
- return Status::OK();
+ if (meConfigs.empty()) {
+ return StatusWith<int>(ErrorCodes::NodeNotFound,
+ str::stream() << "No host described in new configuration "
+ << newConfig.getConfigVersion() << " for replica set "
+ << newConfig.getReplSetName() << " maps to this node");
}
-
- /**
- * Like findSelfInConfig, above, but also returns an error if the member configuration
- * for this node is not electable, as this is a requirement for nodes accepting
- * reconfig or initiate commands.
- */
- StatusWith<int> findSelfInConfigIfElectable(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& newConfig) {
- StatusWith<int> result = findSelfInConfig(externalState, newConfig);
- if (result.isOK()) {
- Status status = checkElectable(newConfig, result.getValue());
- if (!status.isOK()) {
- return StatusWith<int>(status);
- }
+ if (meConfigs.size() > 1) {
+ str::stream message;
+ message << "The hosts " << meConfigs.front()->getHostAndPort().toString();
+ for (size_t i = 1; i < meConfigs.size() - 1; ++i) {
+ message << ", " << meConfigs[i]->getHostAndPort().toString();
}
- return result;
+ message << " and " << meConfigs.back()->getHostAndPort().toString()
+ << " all map to this node in new configuration version "
+ << newConfig.getConfigVersion() << " for replica set "
+ << newConfig.getReplSetName();
+ return StatusWith<int>(ErrorCodes::DuplicateKey, message);
}
- /**
- * Compares two initialized and validated replica set configurations, and checks to
- * see if "newConfig" is a legal successor configuration to "oldConfig".
- *
- * Returns Status::OK() if "newConfig" may replace "oldConfig", or an indicative error
- * otherwise.
- *
- * The checks performed by this test are necessary, but may not be sufficient for
- * ensuring that "newConfig" is a legal successor to "oldConfig". For example,
- * a legal reconfiguration must typically be executed on a node that is currently
- * primary under "oldConfig" and is electable under "newConfig". Such checks that
- * require knowledge of which node is executing the configuration are out of scope
- * for this function.
- */
- Status validateOldAndNewConfigsCompatible(
- const ReplicaSetConfig& oldConfig,
- const ReplicaSetConfig& newConfig) {
- invariant(newConfig.isInitialized());
- invariant(oldConfig.isInitialized());
-
- if (oldConfig.getConfigVersion() >= newConfig.getConfigVersion()) {
- return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream() <<
- "New replica set configuration version must be greater than old, but " <<
- newConfig.getConfigVersion() << " is not greater than " <<
- oldConfig.getConfigVersion() << " for replica set " <<
- newConfig.getReplSetName());
- }
+ int myIndex = std::distance(newConfig.membersBegin(), meConfigs.front());
+ invariant(myIndex >= 0 && myIndex < newConfig.getNumMembers());
+ return StatusWith<int>(myIndex);
+}
- if (oldConfig.getReplSetName() != newConfig.getReplSetName()) {
- return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream() <<
- "New and old configurations differ in replica set name; "
- "old was " << oldConfig.getReplSetName() << ", and new is " <<
- newConfig.getReplSetName());
- }
-
- //
- // For every member config mNew in newConfig, if there exists member config mOld
- // in oldConfig such that mNew.getHostAndPort() == mOld.getHostAndPort(), it is required
- // that mNew.getId() == mOld.getId().
- //
- // Also, one may not use reconfig to change the value of the buildIndexes or
- // arbiterOnly flags.
- //
- for (ReplicaSetConfig::MemberIterator mNew = newConfig.membersBegin();
- mNew != newConfig.membersEnd();
- ++mNew) {
- for (ReplicaSetConfig::MemberIterator mOld = oldConfig.membersBegin();
- mOld != oldConfig.membersEnd();
- ++mOld) {
-
- const bool idsEqual = mOld->getId() == mNew->getId();
- const bool hostsEqual = mOld->getHostAndPort() == mNew->getHostAndPort();
- if (!idsEqual && !hostsEqual) {
- continue;
- }
- if (hostsEqual && !idsEqual) {
- return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream() <<
- "New and old configurations both have members with " <<
- MemberConfig::kHostFieldName << " of " <<
- mOld->getHostAndPort().toString() <<
- " but in the new configuration the " <<
- MemberConfig::kIdFieldName << " field is " <<
- mNew->getId() << " and in the old configuration it is " <<
- mOld->getId() <<
- " for replica set " << newConfig.getReplSetName());
- }
- // At this point, the _id and host fields are equal, so we're looking at the old and
- // new configurations for the same member node.
- const bool buildIndexesFlagsEqual =
- mOld->shouldBuildIndexes() == mNew->shouldBuildIndexes();
- if (!buildIndexesFlagsEqual) {
- return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream() <<
- "New and old configurations differ in the setting of the "
- "buildIndexes field for member " <<
- mOld->getHostAndPort().toString() <<
- "; to make this change, remove then re-add the member");
- }
- const bool arbiterFlagsEqual = mOld->isArbiter() == mNew->isArbiter();
- if (!arbiterFlagsEqual) {
- return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream() <<
- "New and old configurations differ in the setting of the "
- "arbiterOnly field for member " <<
- mOld->getHostAndPort().toString() <<
- "; to make this change, remove then re-add the member");
-
- }
- }
- }
- return Status::OK();
+/**
+ * Checks if the node with the given config index is electable, returning a useful
+ * status message if not.
+ */
+Status checkElectable(const ReplicaSetConfig& newConfig, int configIndex) {
+ const MemberConfig& myConfig = newConfig.getMemberAt(configIndex);
+ if (!myConfig.isElectable()) {
+ return Status(ErrorCodes::NodeNotElectable,
+ str::stream() << "This node, " << myConfig.getHostAndPort().toString()
+ << ", with _id " << myConfig.getId()
+ << " is not electable under the new configuration version "
+ << newConfig.getConfigVersion() << " for replica set "
+ << newConfig.getReplSetName());
}
-} // namespace
+ return Status::OK();
+}
- StatusWith<int> validateConfigForStartUp(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& oldConfig,
- const ReplicaSetConfig& newConfig) {
- Status status = newConfig.validate();
+/**
+ * Like findSelfInConfig, above, but also returns an error if the member configuration
+ * for this node is not electable, as this is a requirement for nodes accepting
+ * reconfig or initiate commands.
+ */
+StatusWith<int> findSelfInConfigIfElectable(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& newConfig) {
+ StatusWith<int> result = findSelfInConfig(externalState, newConfig);
+ if (result.isOK()) {
+ Status status = checkElectable(newConfig, result.getValue());
if (!status.isOK()) {
return StatusWith<int>(status);
}
- if (oldConfig.isInitialized()) {
- status = validateOldAndNewConfigsCompatible(oldConfig, newConfig);
- if (!status.isOK()) {
- return StatusWith<int>(status);
- }
- }
- return findSelfInConfig(externalState, newConfig);
}
+ return result;
+}
- StatusWith<int> validateConfigForInitiate(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& newConfig) {
- Status status = newConfig.validate();
- if (!status.isOK()) {
- return StatusWith<int>(status);
- }
- if (newConfig.getConfigVersion() != 1) {
- return StatusWith<int>(
- ErrorCodes::NewReplicaSetConfigurationIncompatible,
- str::stream() << "Configuration used to initiate a replica set must " <<
- " have version 1, but found " << newConfig.getConfigVersion());
- }
- return findSelfInConfigIfElectable(externalState, newConfig);
+/**
+ * Compares two initialized and validated replica set configurations, and checks to
+ * see if "newConfig" is a legal successor configuration to "oldConfig".
+ *
+ * Returns Status::OK() if "newConfig" may replace "oldConfig", or an indicative error
+ * otherwise.
+ *
+ * The checks performed by this test are necessary, but may not be sufficient for
+ * ensuring that "newConfig" is a legal successor to "oldConfig". For example,
+ * a legal reconfiguration must typically be executed on a node that is currently
+ * primary under "oldConfig" and is electable under "newConfig". Such checks that
+ * require knowledge of which node is executing the configuration are out of scope
+ * for this function.
+ */
+Status validateOldAndNewConfigsCompatible(const ReplicaSetConfig& oldConfig,
+ const ReplicaSetConfig& newConfig) {
+ invariant(newConfig.isInitialized());
+ invariant(oldConfig.isInitialized());
+
+ if (oldConfig.getConfigVersion() >= newConfig.getConfigVersion()) {
+ return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream()
+ << "New replica set configuration version must be greater than old, but "
+ << newConfig.getConfigVersion() << " is not greater than "
+ << oldConfig.getConfigVersion() << " for replica set "
+ << newConfig.getReplSetName());
}
- StatusWith<int> validateConfigForReconfig(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& oldConfig,
- const ReplicaSetConfig& newConfig,
- bool force) {
+ if (oldConfig.getReplSetName() != newConfig.getReplSetName()) {
+ return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream() << "New and old configurations differ in replica set name; "
+ "old was " << oldConfig.getReplSetName() << ", and new is "
+ << newConfig.getReplSetName());
+ }
- Status status = newConfig.validate();
- if (!status.isOK()) {
- return StatusWith<int>(status);
+ //
+ // For every member config mNew in newConfig, if there exists member config mOld
+ // in oldConfig such that mNew.getHostAndPort() == mOld.getHostAndPort(), it is required
+ // that mNew.getId() == mOld.getId().
+ //
+ // Also, one may not use reconfig to change the value of the buildIndexes or
+ // arbiterOnly flags.
+ //
+ for (ReplicaSetConfig::MemberIterator mNew = newConfig.membersBegin();
+ mNew != newConfig.membersEnd();
+ ++mNew) {
+ for (ReplicaSetConfig::MemberIterator mOld = oldConfig.membersBegin();
+ mOld != oldConfig.membersEnd();
+ ++mOld) {
+ const bool idsEqual = mOld->getId() == mNew->getId();
+ const bool hostsEqual = mOld->getHostAndPort() == mNew->getHostAndPort();
+ if (!idsEqual && !hostsEqual) {
+ continue;
+ }
+ if (hostsEqual && !idsEqual) {
+ return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream()
+ << "New and old configurations both have members with "
+ << MemberConfig::kHostFieldName << " of "
+ << mOld->getHostAndPort().toString()
+ << " but in the new configuration the "
+ << MemberConfig::kIdFieldName << " field is " << mNew->getId()
+ << " and in the old configuration it is " << mOld->getId()
+ << " for replica set " << newConfig.getReplSetName());
+ }
+ // At this point, the _id and host fields are equal, so we're looking at the old and
+ // new configurations for the same member node.
+ const bool buildIndexesFlagsEqual =
+ mOld->shouldBuildIndexes() == mNew->shouldBuildIndexes();
+ if (!buildIndexesFlagsEqual) {
+ return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream()
+ << "New and old configurations differ in the setting of the "
+ "buildIndexes field for member "
+ << mOld->getHostAndPort().toString()
+ << "; to make this change, remove then re-add the member");
+ }
+ const bool arbiterFlagsEqual = mOld->isArbiter() == mNew->isArbiter();
+ if (!arbiterFlagsEqual) {
+ return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream()
+ << "New and old configurations differ in the setting of the "
+ "arbiterOnly field for member "
+ << mOld->getHostAndPort().toString()
+ << "; to make this change, remove then re-add the member");
+ }
}
+ }
+ return Status::OK();
+}
+} // namespace
+StatusWith<int> validateConfigForStartUp(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& oldConfig,
+ const ReplicaSetConfig& newConfig) {
+ Status status = newConfig.validate();
+ if (!status.isOK()) {
+ return StatusWith<int>(status);
+ }
+ if (oldConfig.isInitialized()) {
status = validateOldAndNewConfigsCompatible(oldConfig, newConfig);
if (!status.isOK()) {
return StatusWith<int>(status);
}
+ }
+ return findSelfInConfig(externalState, newConfig);
+}
- if (force) {
- return findSelfInConfig(externalState, newConfig);
- }
-
- return findSelfInConfigIfElectable(externalState, newConfig);
+StatusWith<int> validateConfigForInitiate(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& newConfig) {
+ Status status = newConfig.validate();
+ if (!status.isOK()) {
+ return StatusWith<int>(status);
+ }
+ if (newConfig.getConfigVersion() != 1) {
+ return StatusWith<int>(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream() << "Configuration used to initiate a replica set must "
+ << " have version 1, but found "
+ << newConfig.getConfigVersion());
}
+ return findSelfInConfigIfElectable(externalState, newConfig);
+}
- StatusWith<int> validateConfigForHeartbeatReconfig(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& newConfig) {
+StatusWith<int> validateConfigForReconfig(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& oldConfig,
+ const ReplicaSetConfig& newConfig,
+ bool force) {
+ Status status = newConfig.validate();
+ if (!status.isOK()) {
+ return StatusWith<int>(status);
+ }
- Status status = newConfig.validate();
- if (!status.isOK()) {
- return StatusWith<int>(status);
- }
+ status = validateOldAndNewConfigsCompatible(oldConfig, newConfig);
+ if (!status.isOK()) {
+ return StatusWith<int>(status);
+ }
+ if (force) {
return findSelfInConfig(externalState, newConfig);
}
+ return findSelfInConfigIfElectable(externalState, newConfig);
+}
+
+StatusWith<int> validateConfigForHeartbeatReconfig(
+ ReplicationCoordinatorExternalState* externalState, const ReplicaSetConfig& newConfig) {
+ Status status = newConfig.validate();
+ if (!status.isOK()) {
+ return StatusWith<int>(status);
+ }
+
+ return findSelfInConfig(externalState, newConfig);
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replica_set_config_checks.h b/src/mongo/db/repl/replica_set_config_checks.h
index ba7ad90f3fc..adeb4758093 100644
--- a/src/mongo/db/repl/replica_set_config_checks.h
+++ b/src/mongo/db/repl/replica_set_config_checks.h
@@ -33,61 +33,57 @@
namespace mongo {
namespace repl {
- class ReplicationCoordinatorExternalState;
- class ReplicaSetConfig;
+class ReplicationCoordinatorExternalState;
+class ReplicaSetConfig;
- /**
- * Validates that "newConfig" is a legal configuration that the current
- * node can accept from its local storage during startup.
- *
- * Returns the index of the current node's member configuration in "newConfig",
- * on success, and an indicative error on failure.
- *
- * If "oldConfig" is valid, this method only succeds if "newConfig" is a legal
- * successor configuration.
- */
- StatusWith<int> validateConfigForStartUp(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& oldConfig,
- const ReplicaSetConfig& newConfig);
+/**
+ * Validates that "newConfig" is a legal configuration that the current
+ * node can accept from its local storage during startup.
+ *
+ * Returns the index of the current node's member configuration in "newConfig",
+ * on success, and an indicative error on failure.
+ *
+ * If "oldConfig" is valid, this method only succeds if "newConfig" is a legal
+ * successor configuration.
+ */
+StatusWith<int> validateConfigForStartUp(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& oldConfig,
+ const ReplicaSetConfig& newConfig);
- /**
- * Validates that "newConfig" is a legal initial configuration that can be
- * initiated by the current node (identified via "externalState").
- *
- * Returns the index of the current node's member configuration in "newConfig",
- * on success, and an indicative error on failure.
- */
- StatusWith<int> validateConfigForInitiate(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& newConfig);
+/**
+ * Validates that "newConfig" is a legal initial configuration that can be
+ * initiated by the current node (identified via "externalState").
+ *
+ * Returns the index of the current node's member configuration in "newConfig",
+ * on success, and an indicative error on failure.
+ */
+StatusWith<int> validateConfigForInitiate(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& newConfig);
- /**
- * Validates that "newConfig" is a legal successor configuration to "oldConfig" that can be
- * initiated by the current node (identified via "externalState").
- *
- * If "force" is set to true, then compatibility with the old configuration and electability of
- * the current node in "newConfig" are not considered when determining if the reconfig is valid.
- *
- * Returns the index of the current node's member configuration in "newConfig",
- * on success, and an indicative error on failure.
- */
- StatusWith<int> validateConfigForReconfig(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& oldConfig,
- const ReplicaSetConfig& newConfig,
- bool force);
+/**
+ * Validates that "newConfig" is a legal successor configuration to "oldConfig" that can be
+ * initiated by the current node (identified via "externalState").
+ *
+ * If "force" is set to true, then compatibility with the old configuration and electability of
+ * the current node in "newConfig" are not considered when determining if the reconfig is valid.
+ *
+ * Returns the index of the current node's member configuration in "newConfig",
+ * on success, and an indicative error on failure.
+ */
+StatusWith<int> validateConfigForReconfig(ReplicationCoordinatorExternalState* externalState,
+ const ReplicaSetConfig& oldConfig,
+ const ReplicaSetConfig& newConfig,
+ bool force);
- /**
- * Validates that "newConfig" is an acceptable configuration when received in a heartbeat
- * reasponse.
- *
- * If the new configuration omits the current node, but is otherwise valid, returns
- * ErrorCodes::NodeNotFound. If the configuration is wholly valid, returns Status::OK().
- * Otherwise, returns some other error status.
- */
- StatusWith<int> validateConfigForHeartbeatReconfig(
- ReplicationCoordinatorExternalState* externalState,
- const ReplicaSetConfig& newConfig);
+/**
+ * Validates that "newConfig" is an acceptable configuration when received in a heartbeat
+ * reasponse.
+ *
+ * If the new configuration omits the current node, but is otherwise valid, returns
+ * ErrorCodes::NodeNotFound. If the configuration is wholly valid, returns Status::OK().
+ * Otherwise, returns some other error status.
+ */
+StatusWith<int> validateConfigForHeartbeatReconfig(
+ ReplicationCoordinatorExternalState* externalState, const ReplicaSetConfig& newConfig);
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replica_set_config_checks_test.cpp b/src/mongo/db/repl/replica_set_config_checks_test.cpp
index efb39f5e0fa..d495421689d 100644
--- a/src/mongo/db/repl/replica_set_config_checks_test.cpp
+++ b/src/mongo/db/repl/replica_set_config_checks_test.cpp
@@ -40,660 +40,679 @@ namespace mongo {
namespace repl {
namespace {
- TEST(ValidateConfigForInitiate, VersionMustBe1) {
- ReplicationCoordinatorExternalStateMock rses;
- rses.addSelf(HostAndPort("h1"));
-
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1")))));
- ASSERT_EQUALS(
- ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForInitiate(&rses, config).getStatus());
- }
-
- TEST(ValidateConfigForInitiate, MustFindSelf) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
- ReplicationCoordinatorExternalStateMock notPresentExternalState;
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ReplicationCoordinatorExternalStateMock presentTwiceExternalState;
- presentTwiceExternalState.addSelf(HostAndPort("h3"));
- presentTwiceExternalState.addSelf(HostAndPort("h1"));
-
- ASSERT_EQUALS(ErrorCodes::NodeNotFound,
- validateConfigForInitiate(&notPresentExternalState, config).getStatus());
- ASSERT_EQUALS(ErrorCodes::DuplicateKey,
- validateConfigForInitiate(&presentTwiceExternalState, config).getStatus());
- ASSERT_EQUALS(1, unittest::assertGet(validateConfigForInitiate(&presentOnceExternalState,
- config)));
- }
-
- TEST(ValidateConfigForInitiate, SelfMustBeElectable) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "priority" << 0) <<
- BSON("_id" << 3 << "host" << "h3")))));
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
-
- ASSERT_EQUALS(ErrorCodes::NodeNotElectable,
- validateConfigForInitiate(&presentOnceExternalState, config).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, NewConfigVersionNumberMustBeHigherThanOld) {
- ReplicationCoordinatorExternalStateMock externalState;
- externalState.addSelf(HostAndPort("h1"));
-
- ReplicaSetConfig oldConfig;
- ReplicaSetConfig newConfig;
-
- // Two configurations, identical except for version.
- ASSERT_OK(oldConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(newConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(oldConfig.validate());
- ASSERT_OK(newConfig.validate());
-
- // Can reconfig from old to new.
- ASSERT_OK(validateConfigForReconfig(&externalState,
- oldConfig,
- newConfig,
- false).getStatus());
-
-
- // Cannot reconfig from old to old (versions must be different).
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- oldConfig,
- false).getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- oldConfig,
- true).getStatus());
-
- // Cannot reconfig from new to old (versions must increase).
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- newConfig,
- oldConfig,
- false).getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- newConfig,
- oldConfig,
- true).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, NewConfigMustNotChangeSetName) {
- ReplicationCoordinatorExternalStateMock externalState;
- externalState.addSelf(HostAndPort("h1"));
-
- ReplicaSetConfig oldConfig;
- ReplicaSetConfig newConfig;
-
- // Two configurations, compatible except for set name.
- ASSERT_OK(oldConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(newConfig.initialize(
- BSON("_id" << "rs1" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(oldConfig.validate());
- ASSERT_OK(newConfig.validate());
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- newConfig,
- false).getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- newConfig,
- oldConfig,
- true).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, NewConfigMustNotFlipBuildIndexesFlag) {
- ReplicationCoordinatorExternalStateMock externalState;
- externalState.addSelf(HostAndPort("h1"));
-
- ReplicaSetConfig oldConfig;
- ReplicaSetConfig newConfig;
- ReplicaSetConfig oldConfigRefresh;
-
- // Three configurations, two compatible except that h2 flips the buildIndex flag.
- // The third, compatible with the first.
- ASSERT_OK(oldConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "buildIndexes" << false <<
- "priority" << 0) <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(newConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "buildIndexes" << true <<
- "priority" << 0) <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(oldConfigRefresh.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "buildIndexes" << false <<
- "priority" << 0) <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(oldConfig.validate());
- ASSERT_OK(newConfig.validate());
- ASSERT_OK(oldConfigRefresh.validate());
- ASSERT_OK(validateConfigForReconfig(&externalState,
- oldConfig,
- oldConfigRefresh,
- false).getStatus());
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- newConfig,
- false).getStatus());
-
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- newConfig,
- true).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, NewConfigMustNotFlipArbiterFlag) {
- ReplicationCoordinatorExternalStateMock externalState;
- externalState.addSelf(HostAndPort("h1"));
-
- ReplicaSetConfig oldConfig;
- ReplicaSetConfig newConfig;
- ReplicaSetConfig oldConfigRefresh;
-
- // Three configurations, two compatible except that h2 flips the arbiterOnly flag.
- // The third, compatible with the first.
- ASSERT_OK(oldConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "arbiterOnly" << false) <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(newConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "arbiterOnly" << true) <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(oldConfigRefresh.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "arbiterOnly" << false) <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ASSERT_OK(oldConfig.validate());
- ASSERT_OK(newConfig.validate());
- ASSERT_OK(oldConfigRefresh.validate());
- ASSERT_OK(validateConfigForReconfig(&externalState,
- oldConfig,
- oldConfigRefresh,
- false).getStatus());
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- newConfig,
- false).getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- newConfig,
- true).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, HostAndIdRemappingRestricted) {
- // When reconfiguring a replica set, it is allowed to introduce (host, id) pairs
- // absent from the old config only when the hosts and ids were both individually
- // absent in the old config.
-
- ReplicationCoordinatorExternalStateMock externalState;
- externalState.addSelf(HostAndPort("h1"));
-
- ReplicaSetConfig oldConfig;
- ReplicaSetConfig legalNewConfigWithNewHostAndId;
- ReplicaSetConfig illegalNewConfigReusingHost;
- ReplicaSetConfig illegalNewConfigReusingId;
-
- ASSERT_OK(oldConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
- ASSERT_OK(oldConfig.validate());
-
- //
- // Here, the new config is valid because we've replaced (2, "h2") with
- // (4, "h4"), so neither the member _id or host name were reused.
- //
- ASSERT_OK(legalNewConfigWithNewHostAndId.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 4 << "host" << "h4") <<
- BSON("_id" << 3 << "host" << "h3")))));
- ASSERT_OK(legalNewConfigWithNewHostAndId.validate());
- ASSERT_OK(validateConfigForReconfig(&externalState,
- oldConfig,
- legalNewConfigWithNewHostAndId,
- false).getStatus());
-
- //
- // Here, the new config is invalid because we've reused host name "h2" with
- // new _id 4.
- //
- ASSERT_OK(illegalNewConfigReusingHost.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 4 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
- ASSERT_OK(illegalNewConfigReusingHost.validate());
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- illegalNewConfigReusingHost,
- false).getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForReconfig(&externalState,
- oldConfig,
- illegalNewConfigReusingHost,
- true).getStatus());
- //
- // Here, the new config is valid, because all we've changed is the name of
- // the host representing _id 2.
- //
- ASSERT_OK(illegalNewConfigReusingId.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h4") <<
- BSON("_id" << 3 << "host" << "h3")))));
- ASSERT_OK(illegalNewConfigReusingId.validate());
- ASSERT_OK(validateConfigForReconfig(&externalState,
- oldConfig,
- illegalNewConfigReusingId,
- false).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, MustFindSelf) {
- // Old and new config are same except for version change; this is just testing that we can
- // find ourself in the new config.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
- ReplicationCoordinatorExternalStateMock notPresentExternalState;
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ReplicationCoordinatorExternalStateMock presentThriceExternalState;
- presentThriceExternalState.addSelf(HostAndPort("h3"));
- presentThriceExternalState.addSelf(HostAndPort("h2"));
- presentThriceExternalState.addSelf(HostAndPort("h1"));
-
- ASSERT_EQUALS(ErrorCodes::NodeNotFound,
- validateConfigForReconfig(&notPresentExternalState,
- oldConfig,
- newConfig,
- false).getStatus());
- ASSERT_EQUALS(ErrorCodes::DuplicateKey,
- validateConfigForReconfig(&presentThriceExternalState,
- oldConfig,
- newConfig,
- false).getStatus());
- ASSERT_EQUALS(1, unittest::assertGet(validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- false)));
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::NodeNotFound,
- validateConfigForReconfig(&notPresentExternalState,
- oldConfig,
- newConfig,
- true).getStatus());
- ASSERT_EQUALS(ErrorCodes::DuplicateKey,
- validateConfigForReconfig(&presentThriceExternalState,
- oldConfig,
- newConfig,
- true).getStatus());
- ASSERT_EQUALS(1, unittest::assertGet(validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- true)));
- }
-
- TEST(ValidateConfigForReconfig, SelfMustEndElectable) {
- // Old and new config are same except for version change and the electability of one node;
- // this is just testing that we must be electable in the new config.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 3 << "host" << "h3")))));
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 << "host" << "h2" <<
- "priority" << 0) <<
- BSON("_id" << 3 << "host" << "h3")))));
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
-
- ASSERT_EQUALS(ErrorCodes::NodeNotElectable,
- validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- false).getStatus());
- // Forced reconfig does not require electability.
- ASSERT_OK(validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- true).getStatus());
- }
-
- TEST(ValidateConfigForInitiate, NewConfigInvalid) {
- // The new config is not valid due to a duplicate _id value. This tests that if the new
- // config is invalid, validateConfigForInitiate will return a status indicating what is
- // wrong with the new config.
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 0 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::BadValue, validateConfigForInitiate(&presentOnceExternalState,
- newConfig).getStatus());
- }
-
- TEST(ValidateConfigForReconfig, NewConfigInvalid) {
- // The new config is not valid due to a duplicate _id value. This tests that if the new
- // config is invalid, validateConfigForReconfig will return a status indicating what is
- // wrong with the new config.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2")))));
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 0 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::BadValue, validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- false).getStatus());
- // Forced reconfigs also do not allow this.
- ASSERT_EQUALS(ErrorCodes::BadValue, validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- true).getStatus());
- }
-
- TEST(ValidateConfigForStartUp, NewConfigInvalid) {
- // The new config is not valid due to a duplicate _id value. This tests that if the new
- // config is invalid, validateConfigForStartUp will return a status indicating what is wrong
- // with the new config.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2")))));
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 0 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::BadValue, validateConfigForStartUp(&presentOnceExternalState,
- oldConfig,
- newConfig).getStatus());
- }
-
- TEST(ValidateConfigForStartUp, OldAndNewConfigIncompatible) {
- // The new config is not compatible with the old config due to a member changing _ids. This
- // tests that validateConfigForStartUp will return a status indicating the incompatiblilty
- // between the old and new config.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 1 << "host" << "h3")))));
-
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 2 << "host" << "h2") <<
- BSON("_id" << 1 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- validateConfigForStartUp(&presentOnceExternalState,
- oldConfig,
- newConfig).getStatus());
- }
-
- TEST(ValidateConfigForStartUp, OldAndNewConfigCompatible) {
- // The new config is compatible with the old config. This tests that
- // validateConfigForStartUp will return a Status::OK() indicating the validity of this
- // config change.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 1 << "host" << "h3")))));
-
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2" <<
- "priority" << 3) <<
- BSON("_id" << 1 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_OK(validateConfigForStartUp(&presentOnceExternalState,
- oldConfig,
- newConfig).getStatus());
- }
-
- TEST(ValidateConfigForHeartbeatReconfig, NewConfigInvalid) {
- // The new config is not valid due to a duplicate _id value. This tests that if the new
- // config is invalid, validateConfigForHeartbeatReconfig will return a status indicating
- // what is wrong with the new config.
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 0 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::BadValue,
- validateConfigForHeartbeatReconfig(&presentOnceExternalState,
- newConfig).getStatus());
- }
-
- TEST(ValidateConfigForHeartbeatReconfig, NewConfigValid) {
- // The new config is valid. This tests that validateConfigForHeartbeatReconfig will return
- // a Status::OK() indicating the validity of this config change.
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 1 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_OK(validateConfigForHeartbeatReconfig(&presentOnceExternalState,
- newConfig).getStatus());
- }
-
- TEST(ValidateForReconfig, ForceStillNeedsValidConfig) {
- // The new config is invalid due to two nodes with the same _id value. This tests that
- // ValidateForReconfig fails with an invalid config, even if force is true.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 1 << "host" << "h3")))));
-
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 0 << "host" << "h3")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::BadValue,
- validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- true).getStatus());
- }
-
- TEST(ValidateForReconfig, ForceStillNeedsSelfPresent) {
- // The new config does not contain self. This tests that ValidateForReconfig fails
- // if the member receiving it is absent from the config, even if force is true.
- ReplicaSetConfig oldConfig;
- ASSERT_OK(oldConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "h2") <<
- BSON("_id" << 1 << "host" << "h3")))));
-
-
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h3") <<
- BSON("_id" << 2 << "host" << "h4")))));
-
- ReplicationCoordinatorExternalStateMock presentOnceExternalState;
- presentOnceExternalState.addSelf(HostAndPort("h2"));
- ASSERT_EQUALS(ErrorCodes::NodeNotFound,
- validateConfigForReconfig(&presentOnceExternalState,
- oldConfig,
- newConfig,
- true).getStatus());
- }
+TEST(ValidateConfigForInitiate, VersionMustBe1) {
+ ReplicationCoordinatorExternalStateMock rses;
+ rses.addSelf(HostAndPort("h1"));
+
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")))));
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForInitiate(&rses, config).getStatus());
+}
+
+TEST(ValidateConfigForInitiate, MustFindSelf) {
+ ReplicaSetConfig config;
+ ASSERT_OK(
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ReplicationCoordinatorExternalStateMock notPresentExternalState;
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ReplicationCoordinatorExternalStateMock presentTwiceExternalState;
+ presentTwiceExternalState.addSelf(HostAndPort("h3"));
+ presentTwiceExternalState.addSelf(HostAndPort("h1"));
+
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound,
+ validateConfigForInitiate(&notPresentExternalState, config).getStatus());
+ ASSERT_EQUALS(ErrorCodes::DuplicateKey,
+ validateConfigForInitiate(&presentTwiceExternalState, config).getStatus());
+ ASSERT_EQUALS(
+ 1, unittest::assertGet(validateConfigForInitiate(&presentOnceExternalState, config)));
+}
+
+TEST(ValidateConfigForInitiate, SelfMustBeElectable) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "priority" << 0)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+
+ ASSERT_EQUALS(ErrorCodes::NodeNotElectable,
+ validateConfigForInitiate(&presentOnceExternalState, config).getStatus());
+}
+
+TEST(ValidateConfigForReconfig, NewConfigVersionNumberMustBeHigherThanOld) {
+ ReplicationCoordinatorExternalStateMock externalState;
+ externalState.addSelf(HostAndPort("h1"));
+
+ ReplicaSetConfig oldConfig;
+ ReplicaSetConfig newConfig;
+
+ // Two configurations, identical except for version.
+ ASSERT_OK(
+ oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(
+ newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(oldConfig.validate());
+ ASSERT_OK(newConfig.validate());
+
+ // Can reconfig from old to new.
+ ASSERT_OK(validateConfigForReconfig(&externalState, oldConfig, newConfig, false).getStatus());
+
+
+ // Cannot reconfig from old to old (versions must be different).
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, oldConfig, false).getStatus());
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, oldConfig, true).getStatus());
+
+ // Cannot reconfig from new to old (versions must increase).
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, newConfig, oldConfig, false).getStatus());
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, newConfig, oldConfig, true).getStatus());
+}
+
+TEST(ValidateConfigForReconfig, NewConfigMustNotChangeSetName) {
+ ReplicationCoordinatorExternalStateMock externalState;
+ externalState.addSelf(HostAndPort("h1"));
+
+ ReplicaSetConfig oldConfig;
+ ReplicaSetConfig newConfig;
+
+ // Two configurations, compatible except for set name.
+ ASSERT_OK(
+ oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(
+ newConfig.initialize(BSON("_id"
+ << "rs1"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(oldConfig.validate());
+ ASSERT_OK(newConfig.validate());
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, newConfig, false).getStatus());
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, newConfig, oldConfig, true).getStatus());
+}
+
+TEST(ValidateConfigForReconfig, NewConfigMustNotFlipBuildIndexesFlag) {
+ ReplicationCoordinatorExternalStateMock externalState;
+ externalState.addSelf(HostAndPort("h1"));
+
+ ReplicaSetConfig oldConfig;
+ ReplicaSetConfig newConfig;
+ ReplicaSetConfig oldConfigRefresh;
+
+ // Three configurations, two compatible except that h2 flips the buildIndex flag.
+ // The third, compatible with the first.
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "buildIndexes" << false
+ << "priority" << 0)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "buildIndexes" << true
+ << "priority" << 0)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(
+ oldConfigRefresh.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "buildIndexes" << false
+ << "priority" << 0)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(oldConfig.validate());
+ ASSERT_OK(newConfig.validate());
+ ASSERT_OK(oldConfigRefresh.validate());
+ ASSERT_OK(
+ validateConfigForReconfig(&externalState, oldConfig, oldConfigRefresh, false).getStatus());
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, newConfig, false).getStatus());
+
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, newConfig, true).getStatus());
+}
+
+TEST(ValidateConfigForReconfig, NewConfigMustNotFlipArbiterFlag) {
+ ReplicationCoordinatorExternalStateMock externalState;
+ externalState.addSelf(HostAndPort("h1"));
+
+ ReplicaSetConfig oldConfig;
+ ReplicaSetConfig newConfig;
+ ReplicaSetConfig oldConfigRefresh;
+
+ // Three configurations, two compatible except that h2 flips the arbiterOnly flag.
+ // The third, compatible with the first.
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "arbiterOnly" << false)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(
+ oldConfigRefresh.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "arbiterOnly" << false)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ASSERT_OK(oldConfig.validate());
+ ASSERT_OK(newConfig.validate());
+ ASSERT_OK(oldConfigRefresh.validate());
+ ASSERT_OK(
+ validateConfigForReconfig(&externalState, oldConfig, oldConfigRefresh, false).getStatus());
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, newConfig, false).getStatus());
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, oldConfig, newConfig, true).getStatus());
+}
+
+TEST(ValidateConfigForReconfig, HostAndIdRemappingRestricted) {
+ // When reconfiguring a replica set, it is allowed to introduce (host, id) pairs
+ // absent from the old config only when the hosts and ids were both individually
+ // absent in the old config.
+
+ ReplicationCoordinatorExternalStateMock externalState;
+ externalState.addSelf(HostAndPort("h1"));
+
+ ReplicaSetConfig oldConfig;
+ ReplicaSetConfig legalNewConfigWithNewHostAndId;
+ ReplicaSetConfig illegalNewConfigReusingHost;
+ ReplicaSetConfig illegalNewConfigReusingId;
+
+ ASSERT_OK(
+ oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ASSERT_OK(oldConfig.validate());
+
+ //
+ // Here, the new config is valid because we've replaced (2, "h2") with
+ // (4, "h4"), so neither the member _id or host name were reused.
+ //
+ ASSERT_OK(
+ legalNewConfigWithNewHostAndId.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 4 << "host"
+ << "h4")
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ASSERT_OK(legalNewConfigWithNewHostAndId.validate());
+ ASSERT_OK(validateConfigForReconfig(
+ &externalState, oldConfig, legalNewConfigWithNewHostAndId, false).getStatus());
+
+ //
+ // Here, the new config is invalid because we've reused host name "h2" with
+ // new _id 4.
+ //
+ ASSERT_OK(illegalNewConfigReusingHost.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 4 << "host"
+ << "h2")
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ASSERT_OK(illegalNewConfigReusingHost.validate());
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(
+ &externalState, oldConfig, illegalNewConfigReusingHost, false).getStatus());
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(
+ &externalState, oldConfig, illegalNewConfigReusingHost, true).getStatus());
+ //
+ // Here, the new config is valid, because all we've changed is the name of
+ // the host representing _id 2.
+ //
+ ASSERT_OK(illegalNewConfigReusingId.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h4")
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ASSERT_OK(illegalNewConfigReusingId.validate());
+ ASSERT_OK(validateConfigForReconfig(&externalState, oldConfig, illegalNewConfigReusingId, false)
+ .getStatus());
+}
+
+TEST(ValidateConfigForReconfig, MustFindSelf) {
+ // Old and new config are same except for version change; this is just testing that we can
+ // find ourself in the new config.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(
+ oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(
+ newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ReplicationCoordinatorExternalStateMock notPresentExternalState;
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ReplicationCoordinatorExternalStateMock presentThriceExternalState;
+ presentThriceExternalState.addSelf(HostAndPort("h3"));
+ presentThriceExternalState.addSelf(HostAndPort("h2"));
+ presentThriceExternalState.addSelf(HostAndPort("h1"));
+
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound,
+ validateConfigForReconfig(&notPresentExternalState, oldConfig, newConfig, false)
+ .getStatus());
+ ASSERT_EQUALS(ErrorCodes::DuplicateKey,
+ validateConfigForReconfig(
+ &presentThriceExternalState, oldConfig, newConfig, false).getStatus());
+ ASSERT_EQUALS(1,
+ unittest::assertGet(validateConfigForReconfig(
+ &presentOnceExternalState, oldConfig, newConfig, false)));
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound,
+ validateConfigForReconfig(&notPresentExternalState, oldConfig, newConfig, true)
+ .getStatus());
+ ASSERT_EQUALS(ErrorCodes::DuplicateKey,
+ validateConfigForReconfig(&presentThriceExternalState, oldConfig, newConfig, true)
+ .getStatus());
+ ASSERT_EQUALS(1,
+ unittest::assertGet(validateConfigForReconfig(
+ &presentOnceExternalState, oldConfig, newConfig, true)));
+}
+
+TEST(ValidateConfigForReconfig, SelfMustEndElectable) {
+ // Old and new config are same except for version change and the electability of one node;
+ // this is just testing that we must be electable in the new config.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(
+ oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3")))));
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "priority" << 0)
+ << BSON("_id" << 3 << "host"
+ << "h3")))));
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+
+ ASSERT_EQUALS(ErrorCodes::NodeNotElectable,
+ validateConfigForReconfig(&presentOnceExternalState, oldConfig, newConfig, false)
+ .getStatus());
+ // Forced reconfig does not require electability.
+ ASSERT_OK(validateConfigForReconfig(&presentOnceExternalState, oldConfig, newConfig, true)
+ .getStatus());
+}
+
+TEST(ValidateConfigForInitiate, NewConfigInvalid) {
+ // The new config is not valid due to a duplicate _id value. This tests that if the new
+ // config is invalid, validateConfigForInitiate will return a status indicating what is
+ // wrong with the new config.
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 0 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(ErrorCodes::BadValue,
+ validateConfigForInitiate(&presentOnceExternalState, newConfig).getStatus());
+}
+
+TEST(ValidateConfigForReconfig, NewConfigInvalid) {
+ // The new config is not valid due to a duplicate _id value. This tests that if the new
+ // config is invalid, validateConfigForReconfig will return a status indicating what is
+ // wrong with the new config.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")))));
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 0 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(ErrorCodes::BadValue,
+ validateConfigForReconfig(&presentOnceExternalState, oldConfig, newConfig, false)
+ .getStatus());
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(ErrorCodes::BadValue,
+ validateConfigForReconfig(&presentOnceExternalState, oldConfig, newConfig, true)
+ .getStatus());
+}
+
+TEST(ValidateConfigForStartUp, NewConfigInvalid) {
+ // The new config is not valid due to a duplicate _id value. This tests that if the new
+ // config is invalid, validateConfigForStartUp will return a status indicating what is wrong
+ // with the new config.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")))));
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 0 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(
+ ErrorCodes::BadValue,
+ validateConfigForStartUp(&presentOnceExternalState, oldConfig, newConfig).getStatus());
+}
+
+TEST(ValidateConfigForStartUp, OldAndNewConfigIncompatible) {
+ // The new config is not compatible with the old config due to a member changing _ids. This
+ // tests that validateConfigForStartUp will return a status indicating the incompatiblilty
+ // between the old and new config.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 2 << "host"
+ << "h2")
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForStartUp(&presentOnceExternalState, oldConfig, newConfig).getStatus());
+}
+
+TEST(ValidateConfigForStartUp, OldAndNewConfigCompatible) {
+ // The new config is compatible with the old config. This tests that
+ // validateConfigForStartUp will return a Status::OK() indicating the validity of this
+ // config change.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2"
+ << "priority" << 3)
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_OK(
+ validateConfigForStartUp(&presentOnceExternalState, oldConfig, newConfig).getStatus());
+}
+
+TEST(ValidateConfigForHeartbeatReconfig, NewConfigInvalid) {
+ // The new config is not valid due to a duplicate _id value. This tests that if the new
+ // config is invalid, validateConfigForHeartbeatReconfig will return a status indicating
+ // what is wrong with the new config.
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 0 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(
+ ErrorCodes::BadValue,
+ validateConfigForHeartbeatReconfig(&presentOnceExternalState, newConfig).getStatus());
+}
+
+TEST(ValidateConfigForHeartbeatReconfig, NewConfigValid) {
+ // The new config is valid. This tests that validateConfigForHeartbeatReconfig will return
+ // a Status::OK() indicating the validity of this config change.
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_OK(validateConfigForHeartbeatReconfig(&presentOnceExternalState, newConfig).getStatus());
+}
+
+TEST(ValidateForReconfig, ForceStillNeedsValidConfig) {
+ // The new config is invalid due to two nodes with the same _id value. This tests that
+ // ValidateForReconfig fails with an invalid config, even if force is true.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 0 << "host"
+ << "h3")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(ErrorCodes::BadValue,
+ validateConfigForReconfig(&presentOnceExternalState, oldConfig, newConfig, true)
+ .getStatus());
+}
+
+TEST(ValidateForReconfig, ForceStillNeedsSelfPresent) {
+ // The new config does not contain self. This tests that ValidateForReconfig fails
+ // if the member receiving it is absent from the config, even if force is true.
+ ReplicaSetConfig oldConfig;
+ ASSERT_OK(oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h2")
+ << BSON("_id" << 1 << "host"
+ << "h3")))));
+
+
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h3")
+ << BSON("_id" << 2 << "host"
+ << "h4")))));
+
+ ReplicationCoordinatorExternalStateMock presentOnceExternalState;
+ presentOnceExternalState.addSelf(HostAndPort("h2"));
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound,
+ validateConfigForReconfig(&presentOnceExternalState, oldConfig, newConfig, true)
+ .getStatus());
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/replica_set_config_test.cpp b/src/mongo/db/repl/replica_set_config_test.cpp
index b77a8844079..73e1b42d305 100644
--- a/src/mongo/db/repl/replica_set_config_test.cpp
+++ b/src/mongo/db/repl/replica_set_config_test.cpp
@@ -36,654 +36,704 @@ namespace mongo {
namespace repl {
namespace {
- TEST(ReplicaSetConfig, ParseMinimalConfigAndCheckDefaults) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS("rs0", config.getReplSetName());
- ASSERT_EQUALS(1, config.getConfigVersion());
- ASSERT_EQUALS(1, config.getNumMembers());
- ASSERT_EQUALS(0, config.membersBegin()->getId());
- ASSERT_EQUALS(1, config.getDefaultWriteConcern().wNumNodes);
- ASSERT_EQUALS("", config.getDefaultWriteConcern().wMode);
- ASSERT_EQUALS(10, config.getHeartbeatTimeoutPeriod().total_seconds());
- ASSERT_TRUE(config.isChainingAllowed());
- }
-
- TEST(ReplicaSetConfig, MajorityCalculationThreeVotersNoArbiters) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1") <<
- BSON("_id" << 4 << "host" << "h4:1" << "votes" << 0) <<
- BSON("_id" << 5 << "host" << "h5:1" << "votes" << 0)))));
- ASSERT_OK(config.validate());
-
- ASSERT_EQUALS(2, config.getWriteMajority());
- }
-
- TEST(ReplicaSetConfig, MajorityCalculationNearlyHalfArbiters) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2) <<
- BSON("host" << "node4:12345" <<
- "_id" << 3 <<
- "arbiterOnly" << true) <<
- BSON("host" << "node5:12345" <<
- "_id" << 4 <<
- "arbiterOnly" << true)))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS(3, config.getWriteMajority());
- }
-
- TEST(ReplicaSetConfig, MajorityCalculationNearlyHalfArbitersOthersNoVote) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" <<
- "_id" << 0 <<
- "votes" << 0) <<
- BSON("host" << "node2:12345" <<
- "_id" << 1 <<
- "votes" << 0) <<
- BSON("host" << "node3:12345" <<
- "_id" << 2 <<
- "votes" << 0) <<
- BSON("host" << "node4:12345" <<
- "_id" << 3 <<
- "arbiterOnly" << true) <<
- BSON("host" << "node5:12345" <<
- "_id" << 4 <<
- "arbiterOnly" << true)))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS(0, config.getWriteMajority());
- }
-
- TEST(ReplicaSetConfig, MajorityCalculationEvenNumberOfMembers) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2) <<
- BSON("host" << "node4:12345" << "_id" << 3)))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS(3, config.getWriteMajority());
- }
-
- TEST(ReplicaSetConfig, MajorityCalculationNearlyHalfSecondariesNoVotes) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" <<
- "_id" << 1 <<
- "votes" << 0) <<
- BSON("host" << "node3:12345" <<
- "_id" << 2 <<
- "votes" << 0) <<
- BSON("host" << "node4:12345" << "_id" << 3) <<
- BSON("host" << "node5:12345" << "_id" << 4)))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS(2, config.getWriteMajority());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithBadOrMissingIdField) {
- ReplicaSetConfig config;
- // Replica set name must be a string.
- ASSERT_EQUALS(
- ErrorCodes::TypeMismatch,
- config.initialize(
- BSON("_id" << 1 <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
-
- // Replica set name must be present.
- ASSERT_EQUALS(
- ErrorCodes::NoSuchKey,
- config.initialize(
- BSON("version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
-
- // Empty repl set name parses, but does not validate.
- ASSERT_OK(config.initialize(
- BSON("_id" << "" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
-
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithBadOrMissingVersionField) {
- ReplicaSetConfig config;
- // Config version field must be present.
- ASSERT_EQUALS(
- ErrorCodes::NoSuchKey,
- config.initialize(
- BSON("_id" << "rs0" <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
- ASSERT_EQUALS(
- ErrorCodes::TypeMismatch,
- config.initialize(
- BSON("_id" << "rs0" <<
- "version" << "1" <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
-
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1.0 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
- ASSERT_OK(config.validate());
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 0.0 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" <<
- static_cast<long long>(std::numeric_limits<int>::max()) + 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")))));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithBadMembers) {
- ReplicaSetConfig config;
- ASSERT_EQUALS(ErrorCodes::TypeMismatch,
- config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345") <<
- "localhost:23456"))));
- ASSERT_EQUALS(ErrorCodes::NoSuchKey,
- config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("host" << "localhost:12345")))));
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithLocalNonLocalHostMix) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost") <<
- BSON("_id" << 1 <<
- "host" << "otherhost")))));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNoElectableNodes) {
- ReplicaSetConfig config;
- const BSONObj configBsonNoElectableNodes = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1" << "priority" << 0) <<
- BSON("_id" << 1 << "host" << "localhost:2" << "priority" << 0)));
-
- ASSERT_OK(config.initialize(configBsonNoElectableNodes));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
-
- const BSONObj configBsonNoElectableNodesOneArbiter = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1" << "arbiterOnly" << 1) <<
- BSON("_id" << 1 << "host" << "localhost:2" << "priority" << 0)));
-
- ASSERT_OK(config.initialize(configBsonNoElectableNodesOneArbiter));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
-
- const BSONObj configBsonNoElectableNodesTwoArbiters = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1" << "arbiterOnly" << 1) <<
- BSON("_id" << 1 << "host" << "localhost:2" << "arbiterOnly" << 1)));
-
- ASSERT_OK(config.initialize(configBsonNoElectableNodesOneArbiter));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
-
- const BSONObj configBsonOneElectableNode = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1" << "priority" << 0) <<
- BSON("_id" << 1 << "host" << "localhost:2" << "priority" << 1)));
- ASSERT_OK(config.initialize(configBsonOneElectableNode));
- ASSERT_OK(config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithTooFewVoters) {
- ReplicaSetConfig config;
- const BSONObj configBsonNoVoters = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1" << "votes" << 0) <<
- BSON("_id" << 1 << "host" << "localhost:2" << "votes" << 0)));
-
- ASSERT_OK(config.initialize(configBsonNoVoters));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
-
- const BSONObj configBsonOneVoter = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1" << "votes" << 0) <<
- BSON("_id" << 1 << "host" << "localhost:2" << "votes" << 1)));
- ASSERT_OK(config.initialize(configBsonOneVoter));
- ASSERT_OK(config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithTooManyVoters) {
- ReplicaSetConfig config;
- namespace mmb = mutablebson;
- mmb::Document configDoc;
- mmb::Element configDocRoot = configDoc.root();
- ASSERT_OK(configDocRoot.appendString("_id", "rs0"));
- ASSERT_OK(configDocRoot.appendInt("version", 1));
- mmb::Element membersArray = configDoc.makeElementArray("members");
- ASSERT_OK(configDocRoot.pushBack(membersArray));
- for (size_t i = 0; i < ReplicaSetConfig::kMaxVotingMembers + 1; ++i) {
- mmb::Element memberElement = configDoc.makeElementObject("");
- ASSERT_OK(membersArray.pushBack(memberElement));
- ASSERT_OK(memberElement.appendInt("_id", i));
- ASSERT_OK(memberElement.appendString(
- "host", std::string(str::stream() << "localhost" << i + 1)));
- ASSERT_OK(memberElement.appendInt("votes", 1));
- }
-
- const BSONObj configBsonTooManyVoters = configDoc.getObject();
-
- membersArray.leftChild().findFirstChildNamed("votes").setValueInt(0);
- const BSONObj configBsonMaxVoters = configDoc.getObject();
-
-
- ASSERT_OK(config.initialize(configBsonMaxVoters));
- ASSERT_OK(config.validate());
- ASSERT_OK(config.initialize(configBsonTooManyVoters));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithDuplicateHost) {
- ReplicaSetConfig config;
- const BSONObj configBson = BSON(
- "_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "localhost:1") <<
- BSON("_id" << 1 << "host" << "localhost:1")));
- ASSERT_OK(config.initialize(configBson));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithTooManyNodes) {
- ReplicaSetConfig config;
- namespace mmb = mutablebson;
- mmb::Document configDoc;
- mmb::Element configDocRoot = configDoc.root();
- ASSERT_OK(configDocRoot.appendString("_id", "rs0"));
- ASSERT_OK(configDocRoot.appendInt("version", 1));
- mmb::Element membersArray = configDoc.makeElementArray("members");
- ASSERT_OK(configDocRoot.pushBack(membersArray));
- for (size_t i = 0; i < ReplicaSetConfig::kMaxMembers; ++i) {
- mmb::Element memberElement = configDoc.makeElementObject("");
- ASSERT_OK(membersArray.pushBack(memberElement));
- ASSERT_OK(memberElement.appendInt("_id", i));
- ASSERT_OK(memberElement.appendString(
- "host", std::string(str::stream() << "localhost" << i + 1)));
- if (i >= ReplicaSetConfig::kMaxVotingMembers) {
- ASSERT_OK(memberElement.appendInt("votes", 0));
- }
- }
- const BSONObj configBsonMaxNodes = configDoc.getObject();
-
+TEST(ReplicaSetConfig, ParseMinimalConfigAndCheckDefaults) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS("rs0", config.getReplSetName());
+ ASSERT_EQUALS(1, config.getConfigVersion());
+ ASSERT_EQUALS(1, config.getNumMembers());
+ ASSERT_EQUALS(0, config.membersBegin()->getId());
+ ASSERT_EQUALS(1, config.getDefaultWriteConcern().wNumNodes);
+ ASSERT_EQUALS("", config.getDefaultWriteConcern().wMode);
+ ASSERT_EQUALS(10, config.getHeartbeatTimeoutPeriod().total_seconds());
+ ASSERT_TRUE(config.isChainingAllowed());
+}
+
+TEST(ReplicaSetConfig, MajorityCalculationThreeVotersNoArbiters) {
+ ReplicaSetConfig config;
+ ASSERT_OK(
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1"
+ << "votes" << 0)
+ << BSON("_id" << 5 << "host"
+ << "h5:1"
+ << "votes" << 0)))));
+ ASSERT_OK(config.validate());
+
+ ASSERT_EQUALS(2, config.getWriteMajority());
+}
+
+TEST(ReplicaSetConfig, MajorityCalculationNearlyHalfArbiters) {
+ ReplicaSetConfig config;
+ ASSERT_OK(
+ config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3 << "arbiterOnly" << true)
+ << BSON("host"
+ << "node5:12345"
+ << "_id" << 4 << "arbiterOnly" << true)))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS(3, config.getWriteMajority());
+}
+
+TEST(ReplicaSetConfig, MajorityCalculationNearlyHalfArbitersOthersNoVote) {
+ ReplicaSetConfig config;
+ ASSERT_OK(
+ config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0 << "votes" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1 << "votes" << 0)
+ << BSON("host"
+ << "node3:12345"
+ << "_id" << 2 << "votes" << 0)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3 << "arbiterOnly" << true)
+ << BSON("host"
+ << "node5:12345"
+ << "_id" << 4 << "arbiterOnly" << true)))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS(0, config.getWriteMajority());
+}
+
+TEST(ReplicaSetConfig, MajorityCalculationEvenNumberOfMembers) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3)))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS(3, config.getWriteMajority());
+}
+
+TEST(ReplicaSetConfig, MajorityCalculationNearlyHalfSecondariesNoVotes) {
+ ReplicaSetConfig config;
+ ASSERT_OK(
+ config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1 << "votes" << 0)
+ << BSON("host"
+ << "node3:12345"
+ << "_id" << 2 << "votes" << 0)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3) << BSON("host"
+ << "node5:12345"
+ << "_id" << 4)))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS(2, config.getWriteMajority());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithBadOrMissingIdField) {
+ ReplicaSetConfig config;
+ // Replica set name must be a string.
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ config.initialize(BSON("_id" << 1 << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+
+ // Replica set name must be present.
+ ASSERT_EQUALS(
+ ErrorCodes::NoSuchKey,
+ config.initialize(
+ BSON("version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+
+ // Empty repl set name parses, but does not validate.
+ ASSERT_OK(config.initialize(BSON("_id"
+ << ""
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithBadOrMissingVersionField) {
+ ReplicaSetConfig config;
+ // Config version field must be present.
+ ASSERT_EQUALS(
+ ErrorCodes::NoSuchKey,
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+ ASSERT_EQUALS(
+ ErrorCodes::TypeMismatch,
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version"
+ << "1"
+ << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1.0 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+ ASSERT_OK(config.validate());
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 0.0 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+ ASSERT_OK(
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version"
+ << static_cast<long long>(std::numeric_limits<int>::max()) + 1
+ << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithBadMembers) {
+ ReplicaSetConfig config;
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch,
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")
+ << "localhost:23456"))));
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey,
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "localhost:12345")))));
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithLocalNonLocalHostMix) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost")
+ << BSON("_id" << 1 << "host"
+ << "otherhost")))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNoElectableNodes) {
+ ReplicaSetConfig config;
+ const BSONObj configBsonNoElectableNodes = BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1"
+ << "priority" << 0)
+ << BSON("_id" << 1 << "host"
+ << "localhost:2"
+ << "priority"
+ << 0)));
+
+ ASSERT_OK(config.initialize(configBsonNoElectableNodes));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+
+ const BSONObj configBsonNoElectableNodesOneArbiter =
+ BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1"
+ << "arbiterOnly" << 1)
+ << BSON("_id" << 1 << "host"
+ << "localhost:2"
+ << "priority" << 0)));
+
+ ASSERT_OK(config.initialize(configBsonNoElectableNodesOneArbiter));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+
+ const BSONObj configBsonNoElectableNodesTwoArbiters =
+ BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1"
+ << "arbiterOnly" << 1)
+ << BSON("_id" << 1 << "host"
+ << "localhost:2"
+ << "arbiterOnly" << 1)));
+
+ ASSERT_OK(config.initialize(configBsonNoElectableNodesOneArbiter));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+
+ const BSONObj configBsonOneElectableNode = BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1"
+ << "priority" << 0)
+ << BSON("_id" << 1 << "host"
+ << "localhost:2"
+ << "priority"
+ << 1)));
+ ASSERT_OK(config.initialize(configBsonOneElectableNode));
+ ASSERT_OK(config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithTooFewVoters) {
+ ReplicaSetConfig config;
+ const BSONObj configBsonNoVoters = BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1"
+ << "votes" << 0)
+ << BSON("_id" << 1 << "host"
+ << "localhost:2"
+ << "votes" << 0)));
+
+ ASSERT_OK(config.initialize(configBsonNoVoters));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+
+ const BSONObj configBsonOneVoter = BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1"
+ << "votes" << 0)
+ << BSON("_id" << 1 << "host"
+ << "localhost:2"
+ << "votes" << 1)));
+ ASSERT_OK(config.initialize(configBsonOneVoter));
+ ASSERT_OK(config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithTooManyVoters) {
+ ReplicaSetConfig config;
+ namespace mmb = mutablebson;
+ mmb::Document configDoc;
+ mmb::Element configDocRoot = configDoc.root();
+ ASSERT_OK(configDocRoot.appendString("_id", "rs0"));
+ ASSERT_OK(configDocRoot.appendInt("version", 1));
+ mmb::Element membersArray = configDoc.makeElementArray("members");
+ ASSERT_OK(configDocRoot.pushBack(membersArray));
+ for (size_t i = 0; i < ReplicaSetConfig::kMaxVotingMembers + 1; ++i) {
mmb::Element memberElement = configDoc.makeElementObject("");
ASSERT_OK(membersArray.pushBack(memberElement));
- ASSERT_OK(memberElement.appendInt("_id", ReplicaSetConfig::kMaxMembers));
- ASSERT_OK(memberElement.appendString(
- "host", std::string(str::stream() <<
- "localhost" << ReplicaSetConfig::kMaxMembers + 1)));
- ASSERT_OK(memberElement.appendInt("votes", 0));
- const BSONObj configBsonTooManyNodes = configDoc.getObject();
-
-
- ASSERT_OK(config.initialize(configBsonMaxNodes));
- ASSERT_OK(config.validate());
- ASSERT_OK(config.initialize(configBsonTooManyNodes));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithUnexpectedField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "unexpectedfield" << "value"));
- ASSERT_EQUALS(ErrorCodes::BadValue, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonArrayMembersField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << "value"));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonNumericHeartbeatTimeoutSecsField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << "no")));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonBoolChainingAllowedField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("chainingAllowed" << "no")));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonObjectSettingsField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << "none"));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithGetLastErrorDefaultsFieldUnparseable) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("getLastErrorDefaults" << BSON(
- "fsync" << "seven"))));
- ASSERT_EQUALS(ErrorCodes::FailedToParse, status);
+ ASSERT_OK(memberElement.appendInt("_id", i));
+ ASSERT_OK(
+ memberElement.appendString("host", std::string(str::stream() << "localhost" << i + 1)));
+ ASSERT_OK(memberElement.appendInt("votes", 1));
}
- TEST(ReplicaSetConfig, ParseFailsWithNonObjectGetLastErrorDefaultsField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("getLastErrorDefaults" << "no")));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonObjectGetLastErrorModesField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("getLastErrorModes" << "no")));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithDuplicateGetLastErrorModesField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "tags" << BSON("tag" << "yes"))) <<
- "settings" << BSON("getLastErrorModes" << BSON(
- "one" << BSON("tag" << 1) <<
- "one" << BSON("tag" << 1)))));
- ASSERT_EQUALS(ErrorCodes::DuplicateKey, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonObjectGetLastErrorModesEntryField) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "tags" << BSON("tag" << "yes"))) <<
- "settings" << BSON("getLastErrorModes" << BSON(
- "one" << 1))));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonNumericGetLastErrorModesConstraintValue) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "tags" << BSON("tag" << "yes"))) <<
- "settings" << BSON("getLastErrorModes" << BSON(
- "one" << BSON("tag" << "no")))));
- ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNegativeGetLastErrorModesConstraintValue) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "tags" << BSON("tag" << "yes"))) <<
- "settings" << BSON("getLastErrorModes" << BSON(
- "one" << BSON("tag" << -1)))));
- ASSERT_EQUALS(ErrorCodes::BadValue, status);
- }
-
- TEST(ReplicaSetConfig, ParseFailsWithNonExistentGetLastErrorModesConstraintTag) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "tags" << BSON("tag" << "yes"))) <<
- "settings" << BSON("getLastErrorModes" << BSON(
- "one" << BSON("tag2" << 1)))));
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, status);
- }
-
- TEST(ReplicaSetConfig, ValidateFailsWithDuplicateMemberId) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345") <<
- BSON("_id" << 0 <<
- "host" << "someoneelse:12345"))));
- ASSERT_OK(status);
-
- status = config.validate();
- ASSERT_EQUALS(ErrorCodes::BadValue, status);
- }
-
- TEST(ReplicaSetConfig, ValidateFailsWithInvalidMember) {
- ReplicaSetConfig config;
- Status status = config.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "hidden" << true))));
- ASSERT_OK(status);
-
- status = config.validate();
- ASSERT_EQUALS(ErrorCodes::BadValue, status);
- }
-
- TEST(ReplicaSetConfig, ChainingAllowedField) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("chainingAllowed" << true))));
- ASSERT_OK(config.validate());
- ASSERT_TRUE(config.isChainingAllowed());
-
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("chainingAllowed" << false))));
- ASSERT_OK(config.validate());
- ASSERT_FALSE(config.isChainingAllowed());
- }
-
- TEST(ReplicaSetConfig, HeartbeatTimeoutField) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 20))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS(20, config.getHeartbeatTimeoutPeriod().total_seconds());
-
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << -20))));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
- }
-
- TEST(ReplicaSetConfig, GleDefaultField) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON(
- "getLastErrorDefaults" << BSON("w" << "majority")))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS("majority", config.getDefaultWriteConcern().wMode);
-
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON(
- "getLastErrorDefaults" << BSON("w" << "frim")))));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
-
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON(
- "getLastErrorDefaults" << BSON("w" << 0)))));
- ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
-
- ASSERT_OK(config.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345" <<
- "tags" << BSON("a" << "v"))) <<
- "settings" << BSON(
- "getLastErrorDefaults" << BSON("w" << "frim") <<
- "getLastErrorModes" << BSON("frim" << BSON("a" << 1))))));
- ASSERT_OK(config.validate());
- ASSERT_EQUALS("frim", config.getDefaultWriteConcern().wMode);
- ASSERT_OK(config.findCustomWriteMode("frim").getStatus());
- }
-
- bool operator==(const MemberConfig& a, const MemberConfig& b) {
- // do tag comparisons
- for (MemberConfig::TagIterator itrA = a.tagsBegin(); itrA != a.tagsEnd(); ++itrA) {
- if (std::find(b.tagsBegin(), b.tagsEnd(), *itrA) == b.tagsEnd()) {
- return false;
- }
+ const BSONObj configBsonTooManyVoters = configDoc.getObject();
+
+ membersArray.leftChild().findFirstChildNamed("votes").setValueInt(0);
+ const BSONObj configBsonMaxVoters = configDoc.getObject();
+
+
+ ASSERT_OK(config.initialize(configBsonMaxVoters));
+ ASSERT_OK(config.validate());
+ ASSERT_OK(config.initialize(configBsonTooManyVoters));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithDuplicateHost) {
+ ReplicaSetConfig config;
+ const BSONObj configBson = BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:1")
+ << BSON("_id" << 1 << "host"
+ << "localhost:1")));
+ ASSERT_OK(config.initialize(configBson));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithTooManyNodes) {
+ ReplicaSetConfig config;
+ namespace mmb = mutablebson;
+ mmb::Document configDoc;
+ mmb::Element configDocRoot = configDoc.root();
+ ASSERT_OK(configDocRoot.appendString("_id", "rs0"));
+ ASSERT_OK(configDocRoot.appendInt("version", 1));
+ mmb::Element membersArray = configDoc.makeElementArray("members");
+ ASSERT_OK(configDocRoot.pushBack(membersArray));
+ for (size_t i = 0; i < ReplicaSetConfig::kMaxMembers; ++i) {
+ mmb::Element memberElement = configDoc.makeElementObject("");
+ ASSERT_OK(membersArray.pushBack(memberElement));
+ ASSERT_OK(memberElement.appendInt("_id", i));
+ ASSERT_OK(
+ memberElement.appendString("host", std::string(str::stream() << "localhost" << i + 1)));
+ if (i >= ReplicaSetConfig::kMaxVotingMembers) {
+ ASSERT_OK(memberElement.appendInt("votes", 0));
}
- return a.getId() == b.getId() &&
- a.getHostAndPort() == b.getHostAndPort() &&
- a.getPriority() == b.getPriority() &&
- a.getSlaveDelay() == b.getSlaveDelay() &&
- a.isVoter() == b.isVoter() &&
- a.isArbiter() == b.isArbiter() &&
- a.isHidden() == b.isHidden() &&
- a.shouldBuildIndexes() == b.shouldBuildIndexes() &&
- a.getNumTags() == b.getNumTags();
}
-
- bool operator==(const ReplicaSetConfig& a, const ReplicaSetConfig& b) {
- // compare WriteConcernModes
- std::vector<std::string> modeNames = a.getWriteConcernNames();
- for (std::vector<std::string>::iterator it = modeNames.begin();
- it != modeNames.end();
- it++) {
- ReplicaSetTagPattern patternA = a.findCustomWriteMode(*it).getValue();
- ReplicaSetTagPattern patternB = b.findCustomWriteMode(*it).getValue();
- for (ReplicaSetTagPattern::ConstraintIterator itrA = patternA.constraintsBegin();
- itrA != patternA.constraintsEnd();
- itrA++) {
- bool same = false;
- for (ReplicaSetTagPattern::ConstraintIterator itrB = patternB.constraintsBegin();
- itrB != patternB.constraintsEnd();
- itrB++) {
- if (itrA->getKeyIndex() == itrB->getKeyIndex() &&
- itrA->getMinCount() == itrB->getMinCount()) {
- same = true;
- break;
- }
- }
- if (!same) {
- return false;
- }
- }
+ const BSONObj configBsonMaxNodes = configDoc.getObject();
+
+ mmb::Element memberElement = configDoc.makeElementObject("");
+ ASSERT_OK(membersArray.pushBack(memberElement));
+ ASSERT_OK(memberElement.appendInt("_id", ReplicaSetConfig::kMaxMembers));
+ ASSERT_OK(memberElement.appendString(
+ "host", std::string(str::stream() << "localhost" << ReplicaSetConfig::kMaxMembers + 1)));
+ ASSERT_OK(memberElement.appendInt("votes", 0));
+ const BSONObj configBsonTooManyNodes = configDoc.getObject();
+
+
+ ASSERT_OK(config.initialize(configBsonMaxNodes));
+ ASSERT_OK(config.validate());
+ ASSERT_OK(config.initialize(configBsonTooManyNodes));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithUnexpectedField) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "unexpectedfield"
+ << "value"));
+ ASSERT_EQUALS(ErrorCodes::BadValue, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonArrayMembersField) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << "value"));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonNumericHeartbeatTimeoutSecsField) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"))
+ << "settings" << BSON("heartbeatTimeoutSecs"
+ << "no")));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonBoolChainingAllowedField) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"))
+ << "settings" << BSON("chainingAllowed"
+ << "no")));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonObjectSettingsField) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << "none"));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithGetLastErrorDefaultsFieldUnparseable) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("getLastErrorDefaults" << BSON("fsync"
+ << "seven"))));
+ ASSERT_EQUALS(ErrorCodes::FailedToParse, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonObjectGetLastErrorDefaultsField) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"))
+ << "settings" << BSON("getLastErrorDefaults"
+ << "no")));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonObjectGetLastErrorModesField) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"))
+ << "settings" << BSON("getLastErrorModes"
+ << "no")));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithDuplicateGetLastErrorModesField) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "tags" << BSON("tag"
+ << "yes"))) << "settings"
+ << BSON("getLastErrorModes"
+ << BSON("one" << BSON("tag" << 1) << "one"
+ << BSON("tag" << 1)))));
+ ASSERT_EQUALS(ErrorCodes::DuplicateKey, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonObjectGetLastErrorModesEntryField) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "tags" << BSON("tag"
+ << "yes"))) << "settings"
+ << BSON("getLastErrorModes" << BSON("one" << 1))));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonNumericGetLastErrorModesConstraintValue) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "tags" << BSON("tag"
+ << "yes"))) << "settings"
+ << BSON("getLastErrorModes" << BSON("one" << BSON("tag"
+ << "no")))));
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNegativeGetLastErrorModesConstraintValue) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "tags" << BSON("tag"
+ << "yes"))) << "settings"
+ << BSON("getLastErrorModes" << BSON("one" << BSON("tag" << -1)))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, status);
+}
+
+TEST(ReplicaSetConfig, ParseFailsWithNonExistentGetLastErrorModesConstraintTag) {
+ ReplicaSetConfig config;
+ Status status =
+ config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "tags" << BSON("tag"
+ << "yes"))) << "settings"
+ << BSON("getLastErrorModes" << BSON("one" << BSON("tag2" << 1)))));
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, status);
+}
+
+TEST(ReplicaSetConfig, ValidateFailsWithDuplicateMemberId) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")
+ << BSON("_id" << 0 << "host"
+ << "someoneelse:12345"))));
+ ASSERT_OK(status);
+
+ status = config.validate();
+ ASSERT_EQUALS(ErrorCodes::BadValue, status);
+}
+
+TEST(ReplicaSetConfig, ValidateFailsWithInvalidMember) {
+ ReplicaSetConfig config;
+ Status status = config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "hidden" << true))));
+ ASSERT_OK(status);
+
+ status = config.validate();
+ ASSERT_EQUALS(ErrorCodes::BadValue, status);
+}
+
+TEST(ReplicaSetConfig, ChainingAllowedField) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("chainingAllowed" << true))));
+ ASSERT_OK(config.validate());
+ ASSERT_TRUE(config.isChainingAllowed());
+
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("chainingAllowed" << false))));
+ ASSERT_OK(config.validate());
+ ASSERT_FALSE(config.isChainingAllowed());
+}
+
+TEST(ReplicaSetConfig, HeartbeatTimeoutField) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("heartbeatTimeoutSecs" << 20))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS(20, config.getHeartbeatTimeoutPeriod().total_seconds());
+
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("heartbeatTimeoutSecs" << -20))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+}
+
+TEST(ReplicaSetConfig, GleDefaultField) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("getLastErrorDefaults" << BSON("w"
+ << "majority")))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS("majority", config.getDefaultWriteConcern().wMode);
+
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("getLastErrorDefaults" << BSON("w"
+ << "frim")))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("getLastErrorDefaults" << BSON("w" << 0)))));
+ ASSERT_EQUALS(ErrorCodes::BadValue, config.validate());
+
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "tags" << BSON("a"
+ << "v")))
+ << "settings" << BSON("getLastErrorDefaults"
+ << BSON("w"
+ << "frim") << "getLastErrorModes"
+ << BSON("frim" << BSON("a" << 1))))));
+ ASSERT_OK(config.validate());
+ ASSERT_EQUALS("frim", config.getDefaultWriteConcern().wMode);
+ ASSERT_OK(config.findCustomWriteMode("frim").getStatus());
+}
+
+bool operator==(const MemberConfig& a, const MemberConfig& b) {
+ // do tag comparisons
+ for (MemberConfig::TagIterator itrA = a.tagsBegin(); itrA != a.tagsEnd(); ++itrA) {
+ if (std::find(b.tagsBegin(), b.tagsEnd(), *itrA) == b.tagsEnd()) {
+ return false;
}
-
- // compare the members
- for (ReplicaSetConfig::MemberIterator memA = a.membersBegin();
- memA != a.membersEnd();
- memA++) {
+ }
+ return a.getId() == b.getId() && a.getHostAndPort() == b.getHostAndPort() &&
+ a.getPriority() == b.getPriority() && a.getSlaveDelay() == b.getSlaveDelay() &&
+ a.isVoter() == b.isVoter() && a.isArbiter() == b.isArbiter() &&
+ a.isHidden() == b.isHidden() && a.shouldBuildIndexes() == b.shouldBuildIndexes() &&
+ a.getNumTags() == b.getNumTags();
+}
+
+bool operator==(const ReplicaSetConfig& a, const ReplicaSetConfig& b) {
+ // compare WriteConcernModes
+ std::vector<std::string> modeNames = a.getWriteConcernNames();
+ for (std::vector<std::string>::iterator it = modeNames.begin(); it != modeNames.end(); it++) {
+ ReplicaSetTagPattern patternA = a.findCustomWriteMode(*it).getValue();
+ ReplicaSetTagPattern patternB = b.findCustomWriteMode(*it).getValue();
+ for (ReplicaSetTagPattern::ConstraintIterator itrA = patternA.constraintsBegin();
+ itrA != patternA.constraintsEnd();
+ itrA++) {
bool same = false;
- for (ReplicaSetConfig::MemberIterator memB = b.membersBegin();
- memB != b.membersEnd();
- memB++) {
- if (*memA == *memB) {
+ for (ReplicaSetTagPattern::ConstraintIterator itrB = patternB.constraintsBegin();
+ itrB != patternB.constraintsEnd();
+ itrB++) {
+ if (itrA->getKeyIndex() == itrB->getKeyIndex() &&
+ itrA->getMinCount() == itrB->getMinCount()) {
same = true;
break;
}
@@ -692,291 +742,445 @@ namespace {
return false;
}
}
-
- // simple comparisons
- return a.getReplSetName() == b.getReplSetName() &&
- a.getConfigVersion() == b.getConfigVersion() &&
- a.getNumMembers() == b.getNumMembers() &&
- a.getHeartbeatTimeoutPeriod() == b.getHeartbeatTimeoutPeriod() &&
- a.isChainingAllowed() == b.isChainingAllowed() &&
- a.getDefaultWriteConcern().wNumNodes == b.getDefaultWriteConcern().wNumNodes &&
- a.getDefaultWriteConcern().wMode == b.getDefaultWriteConcern().wMode;
}
- TEST(ReplicaSetConfig, toBSONRoundTripAbility) {
- ReplicaSetConfig configA;
- ReplicaSetConfig configB;
- ASSERT_OK(configA.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "localhost:12345")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 20))));
- ASSERT_OK(configB.initialize(configA.toBSON()));
- ASSERT_TRUE(configA == configB);
- }
-
- TEST(ReplicaSetConfig, toBSONRoundTripAbilityLarge) {
- ReplicaSetConfig configA;
- ReplicaSetConfig configB;
- ASSERT_OK(configA.initialize(
- BSON("_id" << "asdf"
- << "version" << 9
- << "members" << BSON_ARRAY(
- BSON("_id" << 0
- << "host" << "localhost:12345"
- << "arbiterOnly" << true
- << "votes" << 1
- ) <<
- BSON("_id" << 3
- << "host" << "localhost:3828"
- << "arbiterOnly" << false
- << "hidden" << true
- << "buildIndexes" << false
- << "priority" << 0
- << "slaveDelay" << 17
- << "votes" << 0
- << "tags" << BSON("coast" << "east" << "ssd" << "true")
- ) <<
- BSON("_id" << 2
- << "host" << "foo.com:3828"
- << "priority" << 9
- << "votes" << 0
- << "tags" << BSON("coast" << "west" << "hdd" << "true")
- ))
- << "settings" << BSON("heartbeatTimeoutSecs" << 20
- << "chainingAllowd" << true
- << "getLastErrorDefaults" << BSON("w" << "majority")
- << "getLastErrorModes" << BSON(
- "disks" << BSON("ssd" << 1 << "hdd" << 1)
- << "coasts" << BSON("coast" << 2)))
- )));
- ASSERT_OK(configB.initialize(configA.toBSON()));
- ASSERT_TRUE(configA == configB);
- }
-
- TEST(ReplicaSetConfig, toBSONRoundTripAbilityInvalid) {
- ReplicaSetConfig configA;
- ReplicaSetConfig configB;
- ASSERT_OK(configA.initialize(
- BSON("_id" << ""
- << "version" << -3
- << "members" << BSON_ARRAY(
- BSON("_id" << 0
- << "host" << "localhost:12345"
- << "arbiterOnly" << true
- << "votes" << 0
- ) <<
- BSON("_id" << 0
- << "host" << "localhost:3828"
- << "arbiterOnly" << false
- << "buildIndexes" << false
- << "priority" << 2
- ) <<
- BSON("_id" << 2
- << "host" << "localhost:3828"
- << "priority" << 9
- << "votes" << 0
- ))
- << "settings" << BSON("heartbeatTimeoutSecs" << -20))));
- ASSERT_OK(configB.initialize(configA.toBSON()));
- ASSERT_NOT_OK(configA.validate());
- ASSERT_NOT_OK(configB.validate());
- ASSERT_TRUE(configA == configB);
- }
-
- TEST(ReplicaSetConfig, CheckIfWriteConcernCanBeSatisfied) {
- ReplicaSetConfig configA;
- ASSERT_OK(configA.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "node0" <<
- "tags" << BSON("dc" << "NA" <<
- "rack" << "rackNA1")) <<
- BSON("_id" << 1 <<
- "host" << "node1" <<
- "tags" << BSON("dc" << "NA" <<
- "rack" << "rackNA2")) <<
- BSON("_id" << 2 <<
- "host" << "node2" <<
- "tags" << BSON("dc" << "NA" <<
- "rack" << "rackNA3")) <<
- BSON("_id" << 3 <<
- "host" << "node3" <<
- "tags" << BSON("dc" << "EU" <<
- "rack" << "rackEU1")) <<
- BSON("_id" << 4 <<
- "host" << "node4" <<
- "tags" << BSON("dc" << "EU" <<
- "rack" << "rackEU2")) <<
- BSON("_id" << 5 <<
- "host" << "node5" <<
- "arbiterOnly" << true)) <<
- "settings" << BSON("getLastErrorModes" <<
- BSON("valid" << BSON("dc" << 2 << "rack" << 3) <<
- "invalidNotEnoughValues" << BSON("dc" << 3) <<
- "invalidNotEnoughNodes" << BSON("rack" << 6))))));
-
- WriteConcernOptions validNumberWC;
- validNumberWC.wNumNodes = 5;
- ASSERT_OK(configA.checkIfWriteConcernCanBeSatisfied(validNumberWC));
-
- WriteConcernOptions invalidNumberWC;
- invalidNumberWC.wNumNodes = 6;
- ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern,
- configA.checkIfWriteConcernCanBeSatisfied(invalidNumberWC));
-
- WriteConcernOptions majorityWC;
- majorityWC.wMode = "majority";
- ASSERT_OK(configA.checkIfWriteConcernCanBeSatisfied(majorityWC));
-
- WriteConcernOptions validModeWC;
- validModeWC.wMode = "valid";
- ASSERT_OK(configA.checkIfWriteConcernCanBeSatisfied(validModeWC));
-
- WriteConcernOptions fakeModeWC;
- fakeModeWC.wMode = "fake";
- ASSERT_EQUALS(ErrorCodes::UnknownReplWriteConcern,
- configA.checkIfWriteConcernCanBeSatisfied(fakeModeWC));
-
- WriteConcernOptions invalidModeNotEnoughValuesWC;
- invalidModeNotEnoughValuesWC.wMode = "invalidNotEnoughValues";
- ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern,
- configA.checkIfWriteConcernCanBeSatisfied(invalidModeNotEnoughValuesWC));
-
- WriteConcernOptions invalidModeNotEnoughNodesWC;
- invalidModeNotEnoughNodesWC.wMode = "invalidNotEnoughNodes";
- ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern,
- configA.checkIfWriteConcernCanBeSatisfied(invalidModeNotEnoughNodesWC));
- }
-
- TEST(ReplicaSetConfig, CheckMaximumNodesOkay) {
- ReplicaSetConfig configA;
- ReplicaSetConfig configB;
- ASSERT_OK(configA.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node0") <<
- BSON("_id" << 1 << "host" << "node1") <<
- BSON("_id" << 2 << "host" << "node2") <<
- BSON("_id" << 3 << "host" << "node3") <<
- BSON("_id" << 4 << "host" << "node4") <<
- BSON("_id" << 5 << "host" << "node5") <<
- BSON("_id" << 6 << "host" << "node6") <<
- BSON("_id" << 7 << "host" << "node7" << "votes" << 0) <<
- BSON("_id" << 8 << "host" << "node8" << "votes" << 0) <<
- BSON("_id" << 9 << "host" << "node9" << "votes" << 0) <<
- BSON("_id" << 10 << "host" << "node10" << "votes" << 0) <<
- BSON("_id" << 11 << "host" << "node11" << "votes" << 0) <<
- BSON("_id" << 12 << "host" << "node12" << "votes" << 0) <<
- BSON("_id" << 13 << "host" << "node13" << "votes" << 0) <<
- BSON("_id" << 14 << "host" << "node14" << "votes" << 0) <<
- BSON("_id" << 15 << "host" << "node15" << "votes" << 0) <<
- BSON("_id" << 16 << "host" << "node16" << "votes" << 0) <<
- BSON("_id" << 17 << "host" << "node17" << "votes" << 0) <<
- BSON("_id" << 18 << "host" << "node18" << "votes" << 0) <<
- BSON("_id" << 19 << "host" << "node19" << "votes" << 0) <<
- BSON("_id" << 20 << "host" << "node20" << "votes" << 0) <<
- BSON("_id" << 21 << "host" << "node21" << "votes" << 0) <<
- BSON("_id" << 22 << "host" << "node22" << "votes" << 0) <<
- BSON("_id" << 23 << "host" << "node23" << "votes" << 0) <<
- BSON("_id" << 24 << "host" << "node24" << "votes" << 0) <<
- BSON("_id" << 25 << "host" << "node25" << "votes" << 0) <<
- BSON("_id" << 26 << "host" << "node26" << "votes" << 0) <<
- BSON("_id" << 27 << "host" << "node27" << "votes" << 0) <<
- BSON("_id" << 28 << "host" << "node28" << "votes" << 0) <<
- BSON("_id" << 29 << "host" << "node29" << "votes" << 0) <<
- BSON("_id" << 30 << "host" << "node30" << "votes" << 0) <<
- BSON("_id" << 31 << "host" << "node31" << "votes" << 0) <<
- BSON("_id" << 32 << "host" << "node32" << "votes" << 0) <<
- BSON("_id" << 33 << "host" << "node33" << "votes" << 0) <<
- BSON("_id" << 34 << "host" << "node34" << "votes" << 0) <<
- BSON("_id" << 35 << "host" << "node35" << "votes" << 0) <<
- BSON("_id" << 36 << "host" << "node36" << "votes" << 0) <<
- BSON("_id" << 37 << "host" << "node37" << "votes" << 0) <<
- BSON("_id" << 38 << "host" << "node38" << "votes" << 0) <<
- BSON("_id" << 39 << "host" << "node39" << "votes" << 0) <<
- BSON("_id" << 40 << "host" << "node40" << "votes" << 0) <<
- BSON("_id" << 41 << "host" << "node41" << "votes" << 0) <<
- BSON("_id" << 42 << "host" << "node42" << "votes" << 0) <<
- BSON("_id" << 43 << "host" << "node43" << "votes" << 0) <<
- BSON("_id" << 44 << "host" << "node44" << "votes" << 0) <<
- BSON("_id" << 45 << "host" << "node45" << "votes" << 0) <<
- BSON("_id" << 46 << "host" << "node46" << "votes" << 0) <<
- BSON("_id" << 47 << "host" << "node47" << "votes" << 0) <<
- BSON("_id" << 48 << "host" << "node48" << "votes" << 0) <<
- BSON("_id" << 49 << "host" << "node49" << "votes" << 0)))));
- ASSERT_OK(configB.initialize(configA.toBSON()));
- ASSERT_OK(configA.validate());
- ASSERT_OK(configB.validate());
- ASSERT_TRUE(configA == configB);
+ // compare the members
+ for (ReplicaSetConfig::MemberIterator memA = a.membersBegin(); memA != a.membersEnd(); memA++) {
+ bool same = false;
+ for (ReplicaSetConfig::MemberIterator memB = b.membersBegin(); memB != b.membersEnd();
+ memB++) {
+ if (*memA == *memB) {
+ same = true;
+ break;
+ }
+ }
+ if (!same) {
+ return false;
+ }
}
- TEST(ReplicaSetConfig, CheckBeyondMaximumNodesFailsValidate) {
- ReplicaSetConfig configA;
- ReplicaSetConfig configB;
- ASSERT_OK(configA.initialize(
- BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node0") <<
- BSON("_id" << 1 << "host" << "node1") <<
- BSON("_id" << 2 << "host" << "node2") <<
- BSON("_id" << 3 << "host" << "node3") <<
- BSON("_id" << 4 << "host" << "node4") <<
- BSON("_id" << 5 << "host" << "node5") <<
- BSON("_id" << 6 << "host" << "node6") <<
- BSON("_id" << 7 << "host" << "node7" << "votes" << 0) <<
- BSON("_id" << 8 << "host" << "node8" << "votes" << 0) <<
- BSON("_id" << 9 << "host" << "node9" << "votes" << 0) <<
- BSON("_id" << 10 << "host" << "node10" << "votes" << 0) <<
- BSON("_id" << 11 << "host" << "node11" << "votes" << 0) <<
- BSON("_id" << 12 << "host" << "node12" << "votes" << 0) <<
- BSON("_id" << 13 << "host" << "node13" << "votes" << 0) <<
- BSON("_id" << 14 << "host" << "node14" << "votes" << 0) <<
- BSON("_id" << 15 << "host" << "node15" << "votes" << 0) <<
- BSON("_id" << 16 << "host" << "node16" << "votes" << 0) <<
- BSON("_id" << 17 << "host" << "node17" << "votes" << 0) <<
- BSON("_id" << 18 << "host" << "node18" << "votes" << 0) <<
- BSON("_id" << 19 << "host" << "node19" << "votes" << 0) <<
- BSON("_id" << 20 << "host" << "node20" << "votes" << 0) <<
- BSON("_id" << 21 << "host" << "node21" << "votes" << 0) <<
- BSON("_id" << 22 << "host" << "node22" << "votes" << 0) <<
- BSON("_id" << 23 << "host" << "node23" << "votes" << 0) <<
- BSON("_id" << 24 << "host" << "node24" << "votes" << 0) <<
- BSON("_id" << 25 << "host" << "node25" << "votes" << 0) <<
- BSON("_id" << 26 << "host" << "node26" << "votes" << 0) <<
- BSON("_id" << 27 << "host" << "node27" << "votes" << 0) <<
- BSON("_id" << 28 << "host" << "node28" << "votes" << 0) <<
- BSON("_id" << 29 << "host" << "node29" << "votes" << 0) <<
- BSON("_id" << 30 << "host" << "node30" << "votes" << 0) <<
- BSON("_id" << 31 << "host" << "node31" << "votes" << 0) <<
- BSON("_id" << 32 << "host" << "node32" << "votes" << 0) <<
- BSON("_id" << 33 << "host" << "node33" << "votes" << 0) <<
- BSON("_id" << 34 << "host" << "node34" << "votes" << 0) <<
- BSON("_id" << 35 << "host" << "node35" << "votes" << 0) <<
- BSON("_id" << 36 << "host" << "node36" << "votes" << 0) <<
- BSON("_id" << 37 << "host" << "node37" << "votes" << 0) <<
- BSON("_id" << 38 << "host" << "node38" << "votes" << 0) <<
- BSON("_id" << 39 << "host" << "node39" << "votes" << 0) <<
- BSON("_id" << 40 << "host" << "node40" << "votes" << 0) <<
- BSON("_id" << 41 << "host" << "node41" << "votes" << 0) <<
- BSON("_id" << 42 << "host" << "node42" << "votes" << 0) <<
- BSON("_id" << 43 << "host" << "node43" << "votes" << 0) <<
- BSON("_id" << 44 << "host" << "node44" << "votes" << 0) <<
- BSON("_id" << 45 << "host" << "node45" << "votes" << 0) <<
- BSON("_id" << 46 << "host" << "node46" << "votes" << 0) <<
- BSON("_id" << 47 << "host" << "node47" << "votes" << 0) <<
- BSON("_id" << 48 << "host" << "node48" << "votes" << 0) <<
- BSON("_id" << 49 << "host" << "node49" << "votes" << 0) <<
- BSON("_id" << 50 << "host" << "node50" << "votes" << 0)))));
- ASSERT_OK(configB.initialize(configA.toBSON()));
- ASSERT_NOT_OK(configA.validate());
- ASSERT_NOT_OK(configB.validate());
- ASSERT_TRUE(configA == configB);
- }
+ // simple comparisons
+ return a.getReplSetName() == b.getReplSetName() &&
+ a.getConfigVersion() == b.getConfigVersion() && a.getNumMembers() == b.getNumMembers() &&
+ a.getHeartbeatTimeoutPeriod() == b.getHeartbeatTimeoutPeriod() &&
+ a.isChainingAllowed() == b.isChainingAllowed() &&
+ a.getDefaultWriteConcern().wNumNodes == b.getDefaultWriteConcern().wNumNodes &&
+ a.getDefaultWriteConcern().wMode == b.getDefaultWriteConcern().wMode;
+}
+
+TEST(ReplicaSetConfig, toBSONRoundTripAbility) {
+ ReplicaSetConfig configA;
+ ReplicaSetConfig configB;
+ ASSERT_OK(configA.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")) << "settings"
+ << BSON("heartbeatTimeoutSecs" << 20))));
+ ASSERT_OK(configB.initialize(configA.toBSON()));
+ ASSERT_TRUE(configA == configB);
+}
+
+TEST(ReplicaSetConfig, toBSONRoundTripAbilityLarge) {
+ ReplicaSetConfig configA;
+ ReplicaSetConfig configB;
+ ASSERT_OK(configA.initialize(BSON(
+ "_id"
+ << "asdf"
+ << "version" << 9 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "arbiterOnly" << true << "votes" << 1)
+ << BSON("_id" << 3 << "host"
+ << "localhost:3828"
+ << "arbiterOnly" << false << "hidden" << true << "buildIndexes"
+ << false << "priority" << 0 << "slaveDelay" << 17 << "votes"
+ << 0 << "tags" << BSON("coast"
+ << "east"
+ << "ssd"
+ << "true"))
+ << BSON("_id" << 2 << "host"
+ << "foo.com:3828"
+ << "priority" << 9 << "votes" << 0 << "tags"
+ << BSON("coast"
+ << "west"
+ << "hdd"
+ << "true"))) << "settings"
+ << BSON("heartbeatTimeoutSecs" << 20 << "chainingAllowd" << true << "getLastErrorDefaults"
+ << BSON("w"
+ << "majority") << "getLastErrorModes"
+ << BSON("disks" << BSON("ssd" << 1 << "hdd" << 1) << "coasts"
+ << BSON("coast" << 2))))));
+ ASSERT_OK(configB.initialize(configA.toBSON()));
+ ASSERT_TRUE(configA == configB);
+}
+
+TEST(ReplicaSetConfig, toBSONRoundTripAbilityInvalid) {
+ ReplicaSetConfig configA;
+ ReplicaSetConfig configB;
+ ASSERT_OK(
+ configA.initialize(BSON("_id"
+ << ""
+ << "version" << -3 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"
+ << "arbiterOnly" << true << "votes" << 0)
+ << BSON("_id" << 0 << "host"
+ << "localhost:3828"
+ << "arbiterOnly" << false
+ << "buildIndexes" << false << "priority"
+ << 2)
+ << BSON("_id" << 2 << "host"
+ << "localhost:3828"
+ << "priority" << 9 << "votes" << 0))
+ << "settings" << BSON("heartbeatTimeoutSecs" << -20))));
+ ASSERT_OK(configB.initialize(configA.toBSON()));
+ ASSERT_NOT_OK(configA.validate());
+ ASSERT_NOT_OK(configB.validate());
+ ASSERT_TRUE(configA == configB);
+}
+
+TEST(ReplicaSetConfig, CheckIfWriteConcernCanBeSatisfied) {
+ ReplicaSetConfig configA;
+ ASSERT_OK(configA.initialize(BSON(
+ "_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node0"
+ << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rackNA1"))
+ << BSON("_id" << 1 << "host"
+ << "node1"
+ << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rackNA2"))
+ << BSON("_id" << 2 << "host"
+ << "node2"
+ << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rackNA3"))
+ << BSON("_id" << 3 << "host"
+ << "node3"
+ << "tags" << BSON("dc"
+ << "EU"
+ << "rack"
+ << "rackEU1"))
+ << BSON("_id" << 4 << "host"
+ << "node4"
+ << "tags" << BSON("dc"
+ << "EU"
+ << "rack"
+ << "rackEU2"))
+ << BSON("_id" << 5 << "host"
+ << "node5"
+ << "arbiterOnly" << true))
+ << "settings" << BSON("getLastErrorModes"
+ << BSON("valid" << BSON("dc" << 2 << "rack" << 3)
+ << "invalidNotEnoughValues" << BSON("dc" << 3)
+ << "invalidNotEnoughNodes" << BSON("rack" << 6))))));
+
+ WriteConcernOptions validNumberWC;
+ validNumberWC.wNumNodes = 5;
+ ASSERT_OK(configA.checkIfWriteConcernCanBeSatisfied(validNumberWC));
+
+ WriteConcernOptions invalidNumberWC;
+ invalidNumberWC.wNumNodes = 6;
+ ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern,
+ configA.checkIfWriteConcernCanBeSatisfied(invalidNumberWC));
+
+ WriteConcernOptions majorityWC;
+ majorityWC.wMode = "majority";
+ ASSERT_OK(configA.checkIfWriteConcernCanBeSatisfied(majorityWC));
+
+ WriteConcernOptions validModeWC;
+ validModeWC.wMode = "valid";
+ ASSERT_OK(configA.checkIfWriteConcernCanBeSatisfied(validModeWC));
+
+ WriteConcernOptions fakeModeWC;
+ fakeModeWC.wMode = "fake";
+ ASSERT_EQUALS(ErrorCodes::UnknownReplWriteConcern,
+ configA.checkIfWriteConcernCanBeSatisfied(fakeModeWC));
+
+ WriteConcernOptions invalidModeNotEnoughValuesWC;
+ invalidModeNotEnoughValuesWC.wMode = "invalidNotEnoughValues";
+ ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern,
+ configA.checkIfWriteConcernCanBeSatisfied(invalidModeNotEnoughValuesWC));
+
+ WriteConcernOptions invalidModeNotEnoughNodesWC;
+ invalidModeNotEnoughNodesWC.wMode = "invalidNotEnoughNodes";
+ ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern,
+ configA.checkIfWriteConcernCanBeSatisfied(invalidModeNotEnoughNodesWC));
+}
+
+TEST(ReplicaSetConfig, CheckMaximumNodesOkay) {
+ ReplicaSetConfig configA;
+ ReplicaSetConfig configB;
+ ASSERT_OK(configA.initialize(
+ BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node0")
+ << BSON("_id" << 1 << "host"
+ << "node1") << BSON("_id" << 2 << "host"
+ << "node2")
+ << BSON("_id" << 3 << "host"
+ << "node3") << BSON("_id" << 4 << "host"
+ << "node4")
+ << BSON("_id" << 5 << "host"
+ << "node5") << BSON("_id" << 6 << "host"
+ << "node6")
+ << BSON("_id" << 7 << "host"
+ << "node7"
+ << "votes" << 0) << BSON("_id" << 8 << "host"
+ << "node8"
+ << "votes" << 0)
+ << BSON("_id" << 9 << "host"
+ << "node9"
+ << "votes" << 0) << BSON("_id" << 10 << "host"
+ << "node10"
+ << "votes" << 0)
+ << BSON("_id" << 11 << "host"
+ << "node11"
+ << "votes" << 0) << BSON("_id" << 12 << "host"
+ << "node12"
+ << "votes" << 0)
+ << BSON("_id" << 13 << "host"
+ << "node13"
+ << "votes" << 0) << BSON("_id" << 14 << "host"
+ << "node14"
+ << "votes" << 0)
+ << BSON("_id" << 15 << "host"
+ << "node15"
+ << "votes" << 0) << BSON("_id" << 16 << "host"
+ << "node16"
+ << "votes" << 0)
+ << BSON("_id" << 17 << "host"
+ << "node17"
+ << "votes" << 0) << BSON("_id" << 18 << "host"
+ << "node18"
+ << "votes" << 0)
+ << BSON("_id" << 19 << "host"
+ << "node19"
+ << "votes" << 0) << BSON("_id" << 20 << "host"
+ << "node20"
+ << "votes" << 0)
+ << BSON("_id" << 21 << "host"
+ << "node21"
+ << "votes" << 0) << BSON("_id" << 22 << "host"
+ << "node22"
+ << "votes" << 0)
+ << BSON("_id" << 23 << "host"
+ << "node23"
+ << "votes" << 0) << BSON("_id" << 24 << "host"
+ << "node24"
+ << "votes" << 0)
+ << BSON("_id" << 25 << "host"
+ << "node25"
+ << "votes" << 0) << BSON("_id" << 26 << "host"
+ << "node26"
+ << "votes" << 0)
+ << BSON("_id" << 27 << "host"
+ << "node27"
+ << "votes" << 0) << BSON("_id" << 28 << "host"
+ << "node28"
+ << "votes" << 0)
+ << BSON("_id" << 29 << "host"
+ << "node29"
+ << "votes" << 0) << BSON("_id" << 30 << "host"
+ << "node30"
+ << "votes" << 0)
+ << BSON("_id" << 31 << "host"
+ << "node31"
+ << "votes" << 0) << BSON("_id" << 32 << "host"
+ << "node32"
+ << "votes" << 0)
+ << BSON("_id" << 33 << "host"
+ << "node33"
+ << "votes" << 0) << BSON("_id" << 34 << "host"
+ << "node34"
+ << "votes" << 0)
+ << BSON("_id" << 35 << "host"
+ << "node35"
+ << "votes" << 0) << BSON("_id" << 36 << "host"
+ << "node36"
+ << "votes" << 0)
+ << BSON("_id" << 37 << "host"
+ << "node37"
+ << "votes" << 0) << BSON("_id" << 38 << "host"
+ << "node38"
+ << "votes" << 0)
+ << BSON("_id" << 39 << "host"
+ << "node39"
+ << "votes" << 0) << BSON("_id" << 40 << "host"
+ << "node40"
+ << "votes" << 0)
+ << BSON("_id" << 41 << "host"
+ << "node41"
+ << "votes" << 0) << BSON("_id" << 42 << "host"
+ << "node42"
+ << "votes" << 0)
+ << BSON("_id" << 43 << "host"
+ << "node43"
+ << "votes" << 0) << BSON("_id" << 44 << "host"
+ << "node44"
+ << "votes" << 0)
+ << BSON("_id" << 45 << "host"
+ << "node45"
+ << "votes" << 0) << BSON("_id" << 46 << "host"
+ << "node46"
+ << "votes" << 0)
+ << BSON("_id" << 47 << "host"
+ << "node47"
+ << "votes" << 0) << BSON("_id" << 48 << "host"
+ << "node48"
+ << "votes" << 0)
+ << BSON("_id" << 49 << "host"
+ << "node49"
+ << "votes" << 0)))));
+ ASSERT_OK(configB.initialize(configA.toBSON()));
+ ASSERT_OK(configA.validate());
+ ASSERT_OK(configB.validate());
+ ASSERT_TRUE(configA == configB);
+}
+
+TEST(ReplicaSetConfig, CheckBeyondMaximumNodesFailsValidate) {
+ ReplicaSetConfig configA;
+ ReplicaSetConfig configB;
+ ASSERT_OK(configA.initialize(
+ BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node0")
+ << BSON("_id" << 1 << "host"
+ << "node1") << BSON("_id" << 2 << "host"
+ << "node2")
+ << BSON("_id" << 3 << "host"
+ << "node3") << BSON("_id" << 4 << "host"
+ << "node4")
+ << BSON("_id" << 5 << "host"
+ << "node5") << BSON("_id" << 6 << "host"
+ << "node6")
+ << BSON("_id" << 7 << "host"
+ << "node7"
+ << "votes" << 0) << BSON("_id" << 8 << "host"
+ << "node8"
+ << "votes" << 0)
+ << BSON("_id" << 9 << "host"
+ << "node9"
+ << "votes" << 0) << BSON("_id" << 10 << "host"
+ << "node10"
+ << "votes" << 0)
+ << BSON("_id" << 11 << "host"
+ << "node11"
+ << "votes" << 0) << BSON("_id" << 12 << "host"
+ << "node12"
+ << "votes" << 0)
+ << BSON("_id" << 13 << "host"
+ << "node13"
+ << "votes" << 0) << BSON("_id" << 14 << "host"
+ << "node14"
+ << "votes" << 0)
+ << BSON("_id" << 15 << "host"
+ << "node15"
+ << "votes" << 0) << BSON("_id" << 16 << "host"
+ << "node16"
+ << "votes" << 0)
+ << BSON("_id" << 17 << "host"
+ << "node17"
+ << "votes" << 0) << BSON("_id" << 18 << "host"
+ << "node18"
+ << "votes" << 0)
+ << BSON("_id" << 19 << "host"
+ << "node19"
+ << "votes" << 0) << BSON("_id" << 20 << "host"
+ << "node20"
+ << "votes" << 0)
+ << BSON("_id" << 21 << "host"
+ << "node21"
+ << "votes" << 0) << BSON("_id" << 22 << "host"
+ << "node22"
+ << "votes" << 0)
+ << BSON("_id" << 23 << "host"
+ << "node23"
+ << "votes" << 0) << BSON("_id" << 24 << "host"
+ << "node24"
+ << "votes" << 0)
+ << BSON("_id" << 25 << "host"
+ << "node25"
+ << "votes" << 0) << BSON("_id" << 26 << "host"
+ << "node26"
+ << "votes" << 0)
+ << BSON("_id" << 27 << "host"
+ << "node27"
+ << "votes" << 0) << BSON("_id" << 28 << "host"
+ << "node28"
+ << "votes" << 0)
+ << BSON("_id" << 29 << "host"
+ << "node29"
+ << "votes" << 0) << BSON("_id" << 30 << "host"
+ << "node30"
+ << "votes" << 0)
+ << BSON("_id" << 31 << "host"
+ << "node31"
+ << "votes" << 0) << BSON("_id" << 32 << "host"
+ << "node32"
+ << "votes" << 0)
+ << BSON("_id" << 33 << "host"
+ << "node33"
+ << "votes" << 0) << BSON("_id" << 34 << "host"
+ << "node34"
+ << "votes" << 0)
+ << BSON("_id" << 35 << "host"
+ << "node35"
+ << "votes" << 0) << BSON("_id" << 36 << "host"
+ << "node36"
+ << "votes" << 0)
+ << BSON("_id" << 37 << "host"
+ << "node37"
+ << "votes" << 0) << BSON("_id" << 38 << "host"
+ << "node38"
+ << "votes" << 0)
+ << BSON("_id" << 39 << "host"
+ << "node39"
+ << "votes" << 0) << BSON("_id" << 40 << "host"
+ << "node40"
+ << "votes" << 0)
+ << BSON("_id" << 41 << "host"
+ << "node41"
+ << "votes" << 0) << BSON("_id" << 42 << "host"
+ << "node42"
+ << "votes" << 0)
+ << BSON("_id" << 43 << "host"
+ << "node43"
+ << "votes" << 0) << BSON("_id" << 44 << "host"
+ << "node44"
+ << "votes" << 0)
+ << BSON("_id" << 45 << "host"
+ << "node45"
+ << "votes" << 0) << BSON("_id" << 46 << "host"
+ << "node46"
+ << "votes" << 0)
+ << BSON("_id" << 47 << "host"
+ << "node47"
+ << "votes" << 0) << BSON("_id" << 48 << "host"
+ << "node48"
+ << "votes" << 0)
+ << BSON("_id" << 49 << "host"
+ << "node49"
+ << "votes" << 0) << BSON("_id" << 50 << "host"
+ << "node50"
+ << "votes" << 0)))));
+ ASSERT_OK(configB.initialize(configA.toBSON()));
+ ASSERT_NOT_OK(configA.validate());
+ ASSERT_NOT_OK(configB.validate());
+ ASSERT_TRUE(configA == configB);
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/replica_set_tag.cpp b/src/mongo/db/repl/replica_set_tag.cpp
index 05363498fcd..631b097abdd 100644
--- a/src/mongo/db/repl/replica_set_tag.cpp
+++ b/src/mongo/db/repl/replica_set_tag.cpp
@@ -41,206 +41,198 @@
namespace mongo {
namespace repl {
- bool ReplicaSetTag::operator==(const ReplicaSetTag& other) const {
- return _keyIndex == other._keyIndex && _valueIndex == other._valueIndex;
- }
-
- bool ReplicaSetTag::operator!=(const ReplicaSetTag& other) const {
- return !(*this == other);
- }
-
- void ReplicaSetTagPattern::addTagCountConstraint(int32_t keyIndex, int32_t minCount) {
- const std::vector<TagCountConstraint>::iterator iter = std::find_if(
- _constraints.begin(),
- _constraints.end(),
- stdx::bind(std::equal_to<int32_t>(),
- keyIndex,
- stdx::bind(&TagCountConstraint::getKeyIndex, stdx::placeholders::_1)));
- if (iter == _constraints.end()) {
- _constraints.push_back(TagCountConstraint(keyIndex, minCount));
- }
- else if (iter->getMinCount() < minCount) {
- *iter = TagCountConstraint(keyIndex, minCount);
- }
- }
-
- ReplicaSetTagPattern::TagCountConstraint::TagCountConstraint(int32_t keyIndex,
- int32_t minCount) :
- _keyIndex(keyIndex), _minCount(minCount) {}
-
- ReplicaSetTagMatch::ReplicaSetTagMatch(const ReplicaSetTagPattern& pattern) {
- for (ReplicaSetTagPattern::ConstraintIterator iter = pattern.constraintsBegin();
- iter != pattern.constraintsEnd();
- ++iter) {
-
- _boundTagValues.push_back(BoundTagValue(*iter));
- }
- }
-
- bool ReplicaSetTagMatch::update(const ReplicaSetTag& tag) {
- const std::vector<BoundTagValue>::iterator iter = std::find_if(
- _boundTagValues.begin(),
- _boundTagValues.end(),
- stdx::bind(std::equal_to<int32_t>(), tag.getKeyIndex(), stdx::bind(
- &BoundTagValue::getKeyIndex, stdx::placeholders::_1)));
- if (iter != _boundTagValues.end()) {
- if (!sequenceContains(iter->boundValues, tag.getValueIndex())) {
- iter->boundValues.push_back(tag.getValueIndex());
- }
+bool ReplicaSetTag::operator==(const ReplicaSetTag& other) const {
+ return _keyIndex == other._keyIndex && _valueIndex == other._valueIndex;
+}
+
+bool ReplicaSetTag::operator!=(const ReplicaSetTag& other) const {
+ return !(*this == other);
+}
+
+void ReplicaSetTagPattern::addTagCountConstraint(int32_t keyIndex, int32_t minCount) {
+ const std::vector<TagCountConstraint>::iterator iter = std::find_if(
+ _constraints.begin(),
+ _constraints.end(),
+ stdx::bind(std::equal_to<int32_t>(),
+ keyIndex,
+ stdx::bind(&TagCountConstraint::getKeyIndex, stdx::placeholders::_1)));
+ if (iter == _constraints.end()) {
+ _constraints.push_back(TagCountConstraint(keyIndex, minCount));
+ } else if (iter->getMinCount() < minCount) {
+ *iter = TagCountConstraint(keyIndex, minCount);
+ }
+}
+
+ReplicaSetTagPattern::TagCountConstraint::TagCountConstraint(int32_t keyIndex, int32_t minCount)
+ : _keyIndex(keyIndex), _minCount(minCount) {}
+
+ReplicaSetTagMatch::ReplicaSetTagMatch(const ReplicaSetTagPattern& pattern) {
+ for (ReplicaSetTagPattern::ConstraintIterator iter = pattern.constraintsBegin();
+ iter != pattern.constraintsEnd();
+ ++iter) {
+ _boundTagValues.push_back(BoundTagValue(*iter));
+ }
+}
+
+bool ReplicaSetTagMatch::update(const ReplicaSetTag& tag) {
+ const std::vector<BoundTagValue>::iterator iter =
+ std::find_if(_boundTagValues.begin(),
+ _boundTagValues.end(),
+ stdx::bind(std::equal_to<int32_t>(),
+ tag.getKeyIndex(),
+ stdx::bind(&BoundTagValue::getKeyIndex, stdx::placeholders::_1)));
+ if (iter != _boundTagValues.end()) {
+ if (!sequenceContains(iter->boundValues, tag.getValueIndex())) {
+ iter->boundValues.push_back(tag.getValueIndex());
}
- return isSatisfied();
}
-
- bool ReplicaSetTagMatch::isSatisfied() const {
- const std::vector<BoundTagValue>::const_iterator iter = std::find_if(
- _boundTagValues.begin(),
- _boundTagValues.end(),
- stdx::bind(std::logical_not<bool>(),
- stdx::bind(&BoundTagValue::isSatisfied, stdx::placeholders::_1)));
- return iter == _boundTagValues.end();
- }
-
- bool ReplicaSetTagMatch::BoundTagValue::isSatisfied() const {
- return constraint.getMinCount() <= int32_t(boundValues.size());
- }
-
- ReplicaSetTag ReplicaSetTagConfig::makeTag(const StringData& key, const StringData& value) {
- int32_t keyIndex = _findKeyIndex(key);
- if (size_t(keyIndex) == _tagData.size()) {
- _tagData.push_back(make_pair(key.toString(), ValueVector()));
- }
- ValueVector& values = _tagData[keyIndex].second;
- for (size_t valueIndex = 0; valueIndex < values.size(); ++valueIndex) {
- if (values[valueIndex] != value)
- continue;
- return ReplicaSetTag(keyIndex, int32_t(valueIndex));
- }
- values.push_back(value.toString());
- return ReplicaSetTag(keyIndex, int32_t(values.size()) - 1);
- }
-
- ReplicaSetTag ReplicaSetTagConfig::findTag(const StringData& key,
- const StringData& value) const {
- int32_t keyIndex = _findKeyIndex(key);
- if (size_t(keyIndex) == _tagData.size())
- return ReplicaSetTag(-1, -1);
- const ValueVector& values = _tagData[keyIndex].second;
- for (size_t valueIndex = 0; valueIndex < values.size(); ++valueIndex) {
- if (values[valueIndex] == value) {
- return ReplicaSetTag(keyIndex, int32_t(valueIndex));
- }
- }
+ return isSatisfied();
+}
+
+bool ReplicaSetTagMatch::isSatisfied() const {
+ const std::vector<BoundTagValue>::const_iterator iter =
+ std::find_if(_boundTagValues.begin(),
+ _boundTagValues.end(),
+ stdx::bind(std::logical_not<bool>(),
+ stdx::bind(&BoundTagValue::isSatisfied, stdx::placeholders::_1)));
+ return iter == _boundTagValues.end();
+}
+
+bool ReplicaSetTagMatch::BoundTagValue::isSatisfied() const {
+ return constraint.getMinCount() <= int32_t(boundValues.size());
+}
+
+ReplicaSetTag ReplicaSetTagConfig::makeTag(const StringData& key, const StringData& value) {
+ int32_t keyIndex = _findKeyIndex(key);
+ if (size_t(keyIndex) == _tagData.size()) {
+ _tagData.push_back(make_pair(key.toString(), ValueVector()));
+ }
+ ValueVector& values = _tagData[keyIndex].second;
+ for (size_t valueIndex = 0; valueIndex < values.size(); ++valueIndex) {
+ if (values[valueIndex] != value)
+ continue;
+ return ReplicaSetTag(keyIndex, int32_t(valueIndex));
+ }
+ values.push_back(value.toString());
+ return ReplicaSetTag(keyIndex, int32_t(values.size()) - 1);
+}
+
+ReplicaSetTag ReplicaSetTagConfig::findTag(const StringData& key, const StringData& value) const {
+ int32_t keyIndex = _findKeyIndex(key);
+ if (size_t(keyIndex) == _tagData.size())
return ReplicaSetTag(-1, -1);
- }
-
- ReplicaSetTagPattern ReplicaSetTagConfig::makePattern() const {
- return ReplicaSetTagPattern();
- }
-
- Status ReplicaSetTagConfig::addTagCountConstraintToPattern(ReplicaSetTagPattern* pattern,
- const StringData& tagKey,
- int32_t minCount) const {
- int32_t keyIndex = _findKeyIndex(tagKey);
- if (size_t(keyIndex) == _tagData.size()) {
- return Status(ErrorCodes::NoSuchKey,
- str::stream() << "No replica set tag key " << tagKey << " in config");
- }
- pattern->addTagCountConstraint(keyIndex, minCount);
- return Status::OK();
- }
-
- int32_t ReplicaSetTagConfig::_findKeyIndex(const StringData& key) const {
- size_t i;
- for (i = 0; i < _tagData.size(); ++i) {
- if (_tagData[i].first == key) {
- break;
- }
- }
- return int32_t(i);
- }
-
- std::string ReplicaSetTagConfig::getTagKey(const ReplicaSetTag& tag) const {
- invariant(tag.isValid() && size_t(tag.getKeyIndex()) < _tagData.size());
- return _tagData[tag.getKeyIndex()].first;
- }
-
- std::string ReplicaSetTagConfig::getTagValue(const ReplicaSetTag& tag) const {
- invariant(tag.isValid() && size_t(tag.getKeyIndex()) < _tagData.size());
- const ValueVector& values = _tagData[tag.getKeyIndex()].second;
- invariant(tag.getValueIndex() >= 0 && size_t(tag.getValueIndex()) < values.size());
- return values[tag.getValueIndex()];
- }
-
- void ReplicaSetTagConfig::put(const ReplicaSetTag& tag, std::ostream& os) const {
- BSONObjBuilder builder;
- _appendTagKey(tag.getKeyIndex(), &builder);
- _appendTagValue(tag.getKeyIndex(), tag.getValueIndex(), &builder);
- os << builder.done();
- }
-
- void ReplicaSetTagConfig::put(const ReplicaSetTagPattern& pattern, std::ostream& os) const {
- BSONObjBuilder builder;
- BSONArrayBuilder allConstraintsBuilder(builder.subarrayStart("constraints"));
- for (ReplicaSetTagPattern::ConstraintIterator iter = pattern.constraintsBegin();
- iter != pattern.constraintsEnd();
- ++iter) {
-
- BSONObjBuilder constraintBuilder(allConstraintsBuilder.subobjStart());
- _appendConstraint(*iter, &constraintBuilder);
+ const ValueVector& values = _tagData[keyIndex].second;
+ for (size_t valueIndex = 0; valueIndex < values.size(); ++valueIndex) {
+ if (values[valueIndex] == value) {
+ return ReplicaSetTag(keyIndex, int32_t(valueIndex));
}
- allConstraintsBuilder.doneFast();
- os << builder.done();
}
-
- void ReplicaSetTagConfig::put(const ReplicaSetTagMatch& matcher, std::ostream& os) const {
- BSONObjBuilder builder;
- BSONArrayBuilder allBindingsBuilder(builder.subarrayStart("bindings"));
- for (size_t i = 0; i < matcher._boundTagValues.size(); ++i) {
-
- BSONObjBuilder bindingBuilder(allBindingsBuilder.subobjStart());
- _appendConstraint(matcher._boundTagValues[i].constraint, &bindingBuilder);
- BSONArrayBuilder boundValues(bindingBuilder.subarrayStart("boundValues"));
- for (size_t j = 0; j < matcher._boundTagValues[i].boundValues.size(); ++j) {
- BSONObjBuilder bvb(boundValues.subobjStart());
- _appendTagValue(matcher._boundTagValues[i].constraint.getKeyIndex(),
- matcher._boundTagValues[i].boundValues[j],
- &bvb);
- }
+ return ReplicaSetTag(-1, -1);
+}
+
+ReplicaSetTagPattern ReplicaSetTagConfig::makePattern() const {
+ return ReplicaSetTagPattern();
+}
+
+Status ReplicaSetTagConfig::addTagCountConstraintToPattern(ReplicaSetTagPattern* pattern,
+ const StringData& tagKey,
+ int32_t minCount) const {
+ int32_t keyIndex = _findKeyIndex(tagKey);
+ if (size_t(keyIndex) == _tagData.size()) {
+ return Status(ErrorCodes::NoSuchKey,
+ str::stream() << "No replica set tag key " << tagKey << " in config");
+ }
+ pattern->addTagCountConstraint(keyIndex, minCount);
+ return Status::OK();
+}
+
+int32_t ReplicaSetTagConfig::_findKeyIndex(const StringData& key) const {
+ size_t i;
+ for (i = 0; i < _tagData.size(); ++i) {
+ if (_tagData[i].first == key) {
+ break;
}
- allBindingsBuilder.doneFast();
- os << builder.done();
}
-
- void ReplicaSetTagConfig::_appendTagKey(int32_t keyIndex, BSONObjBuilder* builder) const {
- if (keyIndex < 0 || size_t(keyIndex) >= _tagData.size()) {
- builder->append("tagKey", int(keyIndex));
- }
- else {
- builder->append("tagKey", _tagData[keyIndex].first);
+ return int32_t(i);
+}
+
+std::string ReplicaSetTagConfig::getTagKey(const ReplicaSetTag& tag) const {
+ invariant(tag.isValid() && size_t(tag.getKeyIndex()) < _tagData.size());
+ return _tagData[tag.getKeyIndex()].first;
+}
+
+std::string ReplicaSetTagConfig::getTagValue(const ReplicaSetTag& tag) const {
+ invariant(tag.isValid() && size_t(tag.getKeyIndex()) < _tagData.size());
+ const ValueVector& values = _tagData[tag.getKeyIndex()].second;
+ invariant(tag.getValueIndex() >= 0 && size_t(tag.getValueIndex()) < values.size());
+ return values[tag.getValueIndex()];
+}
+
+void ReplicaSetTagConfig::put(const ReplicaSetTag& tag, std::ostream& os) const {
+ BSONObjBuilder builder;
+ _appendTagKey(tag.getKeyIndex(), &builder);
+ _appendTagValue(tag.getKeyIndex(), tag.getValueIndex(), &builder);
+ os << builder.done();
+}
+
+void ReplicaSetTagConfig::put(const ReplicaSetTagPattern& pattern, std::ostream& os) const {
+ BSONObjBuilder builder;
+ BSONArrayBuilder allConstraintsBuilder(builder.subarrayStart("constraints"));
+ for (ReplicaSetTagPattern::ConstraintIterator iter = pattern.constraintsBegin();
+ iter != pattern.constraintsEnd();
+ ++iter) {
+ BSONObjBuilder constraintBuilder(allConstraintsBuilder.subobjStart());
+ _appendConstraint(*iter, &constraintBuilder);
+ }
+ allConstraintsBuilder.doneFast();
+ os << builder.done();
+}
+
+void ReplicaSetTagConfig::put(const ReplicaSetTagMatch& matcher, std::ostream& os) const {
+ BSONObjBuilder builder;
+ BSONArrayBuilder allBindingsBuilder(builder.subarrayStart("bindings"));
+ for (size_t i = 0; i < matcher._boundTagValues.size(); ++i) {
+ BSONObjBuilder bindingBuilder(allBindingsBuilder.subobjStart());
+ _appendConstraint(matcher._boundTagValues[i].constraint, &bindingBuilder);
+ BSONArrayBuilder boundValues(bindingBuilder.subarrayStart("boundValues"));
+ for (size_t j = 0; j < matcher._boundTagValues[i].boundValues.size(); ++j) {
+ BSONObjBuilder bvb(boundValues.subobjStart());
+ _appendTagValue(matcher._boundTagValues[i].constraint.getKeyIndex(),
+ matcher._boundTagValues[i].boundValues[j],
+ &bvb);
}
}
-
- void ReplicaSetTagConfig::_appendTagValue(int32_t keyIndex,
- int32_t valueIndex,
- BSONObjBuilder* builder) const {
- if (keyIndex < 0 || size_t(keyIndex) >= _tagData.size()) {
- builder->append("tagValue", valueIndex);
- return;
- }
- KeyValueVector::const_reference keyEntry = _tagData[keyIndex];
- if (valueIndex < 0 || size_t(valueIndex) < keyEntry.second.size()) {
- builder->append("tagValue", valueIndex);
- }
- builder->append("tagValue", keyEntry.second[valueIndex]);
- }
-
- void ReplicaSetTagConfig::_appendConstraint(
- const ReplicaSetTagPattern::TagCountConstraint& constraint,
- BSONObjBuilder* builder) const {
-
- _appendTagKey(constraint.getKeyIndex(), builder);
- builder->append("minCount", int(constraint.getMinCount()));
- }
+ allBindingsBuilder.doneFast();
+ os << builder.done();
+}
+
+void ReplicaSetTagConfig::_appendTagKey(int32_t keyIndex, BSONObjBuilder* builder) const {
+ if (keyIndex < 0 || size_t(keyIndex) >= _tagData.size()) {
+ builder->append("tagKey", int(keyIndex));
+ } else {
+ builder->append("tagKey", _tagData[keyIndex].first);
+ }
+}
+
+void ReplicaSetTagConfig::_appendTagValue(int32_t keyIndex,
+ int32_t valueIndex,
+ BSONObjBuilder* builder) const {
+ if (keyIndex < 0 || size_t(keyIndex) >= _tagData.size()) {
+ builder->append("tagValue", valueIndex);
+ return;
+ }
+ KeyValueVector::const_reference keyEntry = _tagData[keyIndex];
+ if (valueIndex < 0 || size_t(valueIndex) < keyEntry.second.size()) {
+ builder->append("tagValue", valueIndex);
+ }
+ builder->append("tagValue", keyEntry.second[valueIndex]);
+}
+
+void ReplicaSetTagConfig::_appendConstraint(
+ const ReplicaSetTagPattern::TagCountConstraint& constraint, BSONObjBuilder* builder) const {
+ _appendTagKey(constraint.getKeyIndex(), builder);
+ builder->append("minCount", int(constraint.getMinCount()));
+}
} // namespace repl
diff --git a/src/mongo/db/repl/replica_set_tag.h b/src/mongo/db/repl/replica_set_tag.h
index 3f4a2022baf..01c70ed0d2f 100644
--- a/src/mongo/db/repl/replica_set_tag.h
+++ b/src/mongo/db/repl/replica_set_tag.h
@@ -38,265 +38,282 @@
#include "mongo/platform/cstdint.h"
namespace mongo {
- class BSONObjBuilder;
+class BSONObjBuilder;
namespace repl {
+/**
+ * Representation of a tag on a replica set node.
+ *
+ * Tags are only meaningful when used with a copy of the ReplicaSetTagConfig that
+ * created them.
+ */
+class ReplicaSetTag {
+public:
/**
- * Representation of a tag on a replica set node.
- *
- * Tags are only meaningful when used with a copy of the ReplicaSetTagConfig that
- * created them.
+ * Default constructor, produces an uninitialized tag.
*/
- class ReplicaSetTag {
- public:
- /**
- * Default constructor, produces an uninitialized tag.
- */
- ReplicaSetTag() {}
-
- /**
- * Constructs a tag with the given key and value indexes.
- * Do not call directly; used by ReplicaSetTagConfig.
- */
- ReplicaSetTag(int32_t keyIndex, int32_t valueIndex) :
- _keyIndex(keyIndex),
- _valueIndex(valueIndex) {}
-
- /**
- * Returns true if the tag is not explicitly invalid.
- */
- bool isValid() const { return _keyIndex >= 0; }
-
- /**
- * Gets the key index of the tag.
- */
- int32_t getKeyIndex() const { return _keyIndex; }
-
- /**
- * Gets the value index of the tag.
- */
- int32_t getValueIndex() const { return _valueIndex; }
-
- /**
- * Compares two tags from the *same* ReplicaSetTagConfig for equality.
- */
- bool operator==(const ReplicaSetTag& other) const;
-
- /**
- * Compares two tags from the *same* ReplicaSetTagConfig for inequality.
- */
- bool operator!=(const ReplicaSetTag& other) const;
+ ReplicaSetTag() {}
- private:
- // The index of the key in the associated ReplicaSetTagConfig.
- int32_t _keyIndex;
+ /**
+ * Constructs a tag with the given key and value indexes.
+ * Do not call directly; used by ReplicaSetTagConfig.
+ */
+ ReplicaSetTag(int32_t keyIndex, int32_t valueIndex)
+ : _keyIndex(keyIndex), _valueIndex(valueIndex) {}
- // The index of the value in the entry for the key in the associated ReplicaSetTagConfig.
- int32_t _valueIndex;
- };
+ /**
+ * Returns true if the tag is not explicitly invalid.
+ */
+ bool isValid() const {
+ return _keyIndex >= 0;
+ }
+
+ /**
+ * Gets the key index of the tag.
+ */
+ int32_t getKeyIndex() const {
+ return _keyIndex;
+ }
+
+ /**
+ * Gets the value index of the tag.
+ */
+ int32_t getValueIndex() const {
+ return _valueIndex;
+ }
+
+ /**
+ * Compares two tags from the *same* ReplicaSetTagConfig for equality.
+ */
+ bool operator==(const ReplicaSetTag& other) const;
+
+ /**
+ * Compares two tags from the *same* ReplicaSetTagConfig for inequality.
+ */
+ bool operator!=(const ReplicaSetTag& other) const;
+
+private:
+ // The index of the key in the associated ReplicaSetTagConfig.
+ int32_t _keyIndex;
+
+ // The index of the value in the entry for the key in the associated ReplicaSetTagConfig.
+ int32_t _valueIndex;
+};
+/**
+ * Representation of a tag matching pattern, like { "dc": 2, "rack": 3 }, of the form
+ * used for tagged replica set writes.
+ */
+class ReplicaSetTagPattern {
+public:
/**
- * Representation of a tag matching pattern, like { "dc": 2, "rack": 3 }, of the form
- * used for tagged replica set writes.
+ * Representation of a single tag's minimum count constraint in a pattern.
*/
- class ReplicaSetTagPattern {
+ class TagCountConstraint {
public:
- /**
- * Representation of a single tag's minimum count constraint in a pattern.
- */
- class TagCountConstraint {
- public:
- TagCountConstraint() {}
- TagCountConstraint(int32_t keyIndex, int32_t minCount);
- int32_t getKeyIndex() const { return _keyIndex; }
- int32_t getMinCount() const { return _minCount; }
- private:
- int32_t _keyIndex;
- int32_t _minCount;
- };
-
- typedef std::vector<TagCountConstraint>::const_iterator ConstraintIterator;
-
- /**
- * Adds a count constraint for the given key index with the given count.
- *
- * Do not call directly, but use the addTagCountConstraintToPattern method
- * of ReplicaSetTagConfig.
- */
- void addTagCountConstraint(int32_t keyIndex, int32_t minCount);
-
- /**
- * Gets the begin iterator over the constraints in this pattern.
- */
- ConstraintIterator constraintsBegin() const { return _constraints.begin(); }
-
- /**
- * Gets the end iterator over the constraints in this pattern.
- */
- ConstraintIterator constraintsEnd() const { return _constraints.end(); }
+ TagCountConstraint() {}
+ TagCountConstraint(int32_t keyIndex, int32_t minCount);
+ int32_t getKeyIndex() const {
+ return _keyIndex;
+ }
+ int32_t getMinCount() const {
+ return _minCount;
+ }
private:
- std::vector<TagCountConstraint> _constraints;
+ int32_t _keyIndex;
+ int32_t _minCount;
};
+ typedef std::vector<TagCountConstraint>::const_iterator ConstraintIterator;
+
/**
- * State object for progressive detection of ReplicaSetTagPattern constraint satisfaction.
+ * Adds a count constraint for the given key index with the given count.
*
- * This is an abstraction of the replica set write tag satisfaction problem.
+ * Do not call directly, but use the addTagCountConstraintToPattern method
+ * of ReplicaSetTagConfig.
+ */
+ void addTagCountConstraint(int32_t keyIndex, int32_t minCount);
+
+ /**
+ * Gets the begin iterator over the constraints in this pattern.
+ */
+ ConstraintIterator constraintsBegin() const {
+ return _constraints.begin();
+ }
+
+ /**
+ * Gets the end iterator over the constraints in this pattern.
+ */
+ ConstraintIterator constraintsEnd() const {
+ return _constraints.end();
+ }
+
+private:
+ std::vector<TagCountConstraint> _constraints;
+};
+
+/**
+ * State object for progressive detection of ReplicaSetTagPattern constraint satisfaction.
+ *
+ * This is an abstraction of the replica set write tag satisfaction problem.
+ *
+ * Replica set tag matching is an event-driven constraint satisfaction process. This type
+ * represents the state of that process. It is initialized from a pattern object, then
+ * progressively updated with tags. After processing a sequence of tags sufficient to satisfy
+ * the pattern, isSatisfied() becomes true.
+ */
+class ReplicaSetTagMatch {
+ friend class ReplicaSetTagConfig;
+
+public:
+ /**
+ * Constructs an empty match object, equivalent to one that matches an
+ * empty pattern.
+ */
+ ReplicaSetTagMatch() {}
+
+ /**
+ * Constructs a clean match object for the given pattern.
+ */
+ explicit ReplicaSetTagMatch(const ReplicaSetTagPattern& pattern);
+
+ /**
+ * Updates the match state based on the data for the given tag.
*
- * Replica set tag matching is an event-driven constraint satisfaction process. This type
- * represents the state of that process. It is initialized from a pattern object, then
- * progressively updated with tags. After processing a sequence of tags sufficient to satisfy
- * the pattern, isSatisfied() becomes true.
+ * Returns true if, after this update, isSatisfied() is true.
*/
- class ReplicaSetTagMatch {
- friend class ReplicaSetTagConfig;
- public:
- /**
- * Constructs an empty match object, equivalent to one that matches an
- * empty pattern.
- */
- ReplicaSetTagMatch() {}
-
- /**
- * Constructs a clean match object for the given pattern.
- */
- explicit ReplicaSetTagMatch(const ReplicaSetTagPattern& pattern);
-
- /**
- * Updates the match state based on the data for the given tag.
- *
- * Returns true if, after this update, isSatisfied() is true.
- */
- bool update(const ReplicaSetTag& tag);
-
- /**
- * Returns true if the match has received a sequence of tags sufficient to satisfy the
- * pattern.
- */
+ bool update(const ReplicaSetTag& tag);
+
+ /**
+ * Returns true if the match has received a sequence of tags sufficient to satisfy the
+ * pattern.
+ */
+ bool isSatisfied() const;
+
+private:
+ /**
+ * Representation of the state related to a single tag key in the match pattern.
+ * Consists of a constraint (key index and min count for satisfaction) and a list
+ * of already observed values.
+ *
+ * A BoundTagValue is satisfied when the size of boundValues is at least
+ * constraint.getMinCount().
+ */
+ struct BoundTagValue {
+ BoundTagValue() {}
+ explicit BoundTagValue(const ReplicaSetTagPattern::TagCountConstraint& aConstraint)
+ : constraint(aConstraint) {}
+
+ int32_t getKeyIndex() const {
+ return constraint.getKeyIndex();
+ }
bool isSatisfied() const;
- private:
- /**
- * Representation of the state related to a single tag key in the match pattern.
- * Consists of a constraint (key index and min count for satisfaction) and a list
- * of already observed values.
- *
- * A BoundTagValue is satisfied when the size of boundValues is at least
- * constraint.getMinCount().
- */
- struct BoundTagValue {
- BoundTagValue() {}
- explicit BoundTagValue(const ReplicaSetTagPattern::TagCountConstraint& aConstraint) :
- constraint(aConstraint) {}
-
- int32_t getKeyIndex() const { return constraint.getKeyIndex(); }
- bool isSatisfied() const;
-
- ReplicaSetTagPattern::TagCountConstraint constraint;
- std::vector<int32_t> boundValues;
- };
- std::vector<BoundTagValue> _boundTagValues;
+ ReplicaSetTagPattern::TagCountConstraint constraint;
+ std::vector<int32_t> boundValues;
};
+ std::vector<BoundTagValue> _boundTagValues;
+};
+
+/**
+ * Representation of the tag configuration information for a replica set.
+ *
+ * This type, like all in this file, is copyable. Tags and patterns from one instance of this
+ * class are compatible with other instances of this class that are *copies* of the original
+ * instance.
+ */
+class ReplicaSetTagConfig {
+public:
+ /**
+ * Finds or allocates a tag with the given "key" and "value" strings.
+ */
+ ReplicaSetTag makeTag(const StringData& key, const StringData& value);
+
+ /**
+ * Finds a tag with the given key and value strings, or returns a tag whose isValid() method
+ * returns false if the configuration has never allocated such a tag via makeTag().
+ */
+ ReplicaSetTag findTag(const StringData& key, const StringData& value) const;
+
+ /**
+ * Makes a new, empty pattern object.
+ */
+ ReplicaSetTagPattern makePattern() const;
/**
- * Representation of the tag configuration information for a replica set.
+ * Adds a constraint clause to the given "pattern". This particular
+ * constraint requires that at least "minCount" distinct tags with the given "tagKey"
+ * be observed. Two tags "t1" and "t2" are distinct if "t1 != t2", so this constraint
+ * means that we must see at least "minCount" tags with the specified "tagKey".
+ */
+ Status addTagCountConstraintToPattern(ReplicaSetTagPattern* pattern,
+ const StringData& tagKey,
+ int32_t minCount) const;
+
+ /**
+ * Gets the string key for the given "tag".
*
- * This type, like all in this file, is copyable. Tags and patterns from one instance of this
- * class are compatible with other instances of this class that are *copies* of the original
- * instance.
+ * Behavior is undefined if "tag" is not valid or was not from this
+ * config or one of its copies.
*/
- class ReplicaSetTagConfig {
- public:
- /**
- * Finds or allocates a tag with the given "key" and "value" strings.
- */
- ReplicaSetTag makeTag(const StringData& key, const StringData& value);
-
- /**
- * Finds a tag with the given key and value strings, or returns a tag whose isValid() method
- * returns false if the configuration has never allocated such a tag via makeTag().
- */
- ReplicaSetTag findTag(const StringData& key, const StringData& value) const;
-
- /**
- * Makes a new, empty pattern object.
- */
- ReplicaSetTagPattern makePattern() const;
-
- /**
- * Adds a constraint clause to the given "pattern". This particular
- * constraint requires that at least "minCount" distinct tags with the given "tagKey"
- * be observed. Two tags "t1" and "t2" are distinct if "t1 != t2", so this constraint
- * means that we must see at least "minCount" tags with the specified "tagKey".
- */
- Status addTagCountConstraintToPattern(ReplicaSetTagPattern* pattern,
- const StringData& tagKey,
- int32_t minCount) const;
-
- /**
- * Gets the string key for the given "tag".
- *
- * Behavior is undefined if "tag" is not valid or was not from this
- * config or one of its copies.
- */
- std::string getTagKey(const ReplicaSetTag& tag) const;
-
- /**
- * Gets the string value for the given "tag".
- *
- * Like getTagKey, above, behavior is undefined if "tag" is not valid or was not from this
- * config or one of its copies.
- */
- std::string getTagValue(const ReplicaSetTag& tag) const;
-
- /**
- * Helper that writes a string debugging representation of "tag" to "os".
- */
- void put(const ReplicaSetTag& tag, std::ostream& os) const;
-
- /**
- * Helper that writes a string debugging representation of "pattern" to "os".
- */
- void put(const ReplicaSetTagPattern& pattern, std::ostream& os) const;
-
- /**
- * Helper that writes a string debugging representation of "matcher" to "os".
- */
- void put(const ReplicaSetTagMatch& matcher, std::ostream& os) const;
+ std::string getTagKey(const ReplicaSetTag& tag) const;
- private:
- typedef std::vector<std::string> ValueVector;
- typedef std::vector<std::pair<std::string, ValueVector> > KeyValueVector;
-
- /**
- * Returns the index corresponding to "key", or _tagData.size() if there is no
- * such index.
- */
- int32_t _findKeyIndex(const StringData& key) const;
-
- /**
- * Helper that writes a "tagKey" field for the given "keyIndex" to "builder".
- */
- void _appendTagKey(int32_t keyIndex, BSONObjBuilder* builder) const;
-
- /**
- * Helper that writes a "tagValue" field for the given "keyIndex" and "valueIndex"
- * to "builder".
- */
- void _appendTagValue(int32_t keyIndex, int32_t valueIndex, BSONObjBuilder* builder) const;
-
- /**
- * Helper that writes a constraint object to "builder".
- */
- void _appendConstraint(const ReplicaSetTagPattern::TagCountConstraint& constraint,
- BSONObjBuilder* builder) const;
-
- // Data about known tags. Conceptually, it maps between keys and their indexes,
- // keys and their associated values, and (key, value) pairs and the values' indexes.
- KeyValueVector _tagData;
- };
+ /**
+ * Gets the string value for the given "tag".
+ *
+ * Like getTagKey, above, behavior is undefined if "tag" is not valid or was not from this
+ * config or one of its copies.
+ */
+ std::string getTagValue(const ReplicaSetTag& tag) const;
+
+ /**
+ * Helper that writes a string debugging representation of "tag" to "os".
+ */
+ void put(const ReplicaSetTag& tag, std::ostream& os) const;
+
+ /**
+ * Helper that writes a string debugging representation of "pattern" to "os".
+ */
+ void put(const ReplicaSetTagPattern& pattern, std::ostream& os) const;
+
+ /**
+ * Helper that writes a string debugging representation of "matcher" to "os".
+ */
+ void put(const ReplicaSetTagMatch& matcher, std::ostream& os) const;
+
+private:
+ typedef std::vector<std::string> ValueVector;
+ typedef std::vector<std::pair<std::string, ValueVector>> KeyValueVector;
+
+ /**
+ * Returns the index corresponding to "key", or _tagData.size() if there is no
+ * such index.
+ */
+ int32_t _findKeyIndex(const StringData& key) const;
+
+ /**
+ * Helper that writes a "tagKey" field for the given "keyIndex" to "builder".
+ */
+ void _appendTagKey(int32_t keyIndex, BSONObjBuilder* builder) const;
+
+ /**
+ * Helper that writes a "tagValue" field for the given "keyIndex" and "valueIndex"
+ * to "builder".
+ */
+ void _appendTagValue(int32_t keyIndex, int32_t valueIndex, BSONObjBuilder* builder) const;
+
+ /**
+ * Helper that writes a constraint object to "builder".
+ */
+ void _appendConstraint(const ReplicaSetTagPattern::TagCountConstraint& constraint,
+ BSONObjBuilder* builder) const;
+
+ // Data about known tags. Conceptually, it maps between keys and their indexes,
+ // keys and their associated values, and (key, value) pairs and the values' indexes.
+ KeyValueVector _tagData;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replica_set_tag_test.cpp b/src/mongo/db/repl/replica_set_tag_test.cpp
index 1a2bdf9e120..1d70ee39bbe 100644
--- a/src/mongo/db/repl/replica_set_tag_test.cpp
+++ b/src/mongo/db/repl/replica_set_tag_test.cpp
@@ -33,129 +33,129 @@ namespace mongo {
namespace repl {
namespace {
- template <typename T>
- class StreamPutter {
- public:
- StreamPutter(const ReplicaSetTagConfig& tagConfig, const T& item) :
- _tagConfig(&tagConfig), _item(&item) {}
- void put(std::ostream& os) const {
- _tagConfig->put(*_item, os);
- }
-
- private:
- const ReplicaSetTagConfig* _tagConfig;
- const T* _item;
- };
-
- template <typename T>
- StreamPutter<T> streamput(const ReplicaSetTagConfig& tagConfig, const T& item) {
- return StreamPutter<T>(tagConfig, item);
+template <typename T>
+class StreamPutter {
+public:
+ StreamPutter(const ReplicaSetTagConfig& tagConfig, const T& item)
+ : _tagConfig(&tagConfig), _item(&item) {}
+ void put(std::ostream& os) const {
+ _tagConfig->put(*_item, os);
}
- template <typename T>
- std::ostream& operator<<(std::ostream& os, const StreamPutter<T>& putter) {
- putter.put(os);
- return os;
+private:
+ const ReplicaSetTagConfig* _tagConfig;
+ const T* _item;
+};
+
+template <typename T>
+StreamPutter<T> streamput(const ReplicaSetTagConfig& tagConfig, const T& item) {
+ return StreamPutter<T>(tagConfig, item);
+}
+
+template <typename T>
+std::ostream& operator<<(std::ostream& os, const StreamPutter<T>& putter) {
+ putter.put(os);
+ return os;
+}
+
+TEST(ReplicaSetTagConfigTest, MakeAndFindTags) {
+ ReplicaSetTagConfig tagConfig;
+ ReplicaSetTag dcNY = tagConfig.makeTag("dc", "ny");
+ ReplicaSetTag dcRI = tagConfig.makeTag("dc", "ri");
+ ReplicaSetTag rack1 = tagConfig.makeTag("rack", "1");
+ ReplicaSetTag rack2 = tagConfig.makeTag("rack", "2");
+ ASSERT_TRUE(dcNY.isValid());
+ ASSERT_EQUALS("dc", tagConfig.getTagKey(dcNY));
+ ASSERT_EQUALS("ny", tagConfig.getTagValue(dcNY));
+ ASSERT_EQUALS("dc", tagConfig.getTagKey(dcRI));
+ ASSERT_EQUALS("ri", tagConfig.getTagValue(dcRI));
+ ASSERT_EQUALS("rack", tagConfig.getTagKey(rack1));
+ ASSERT_EQUALS("1", tagConfig.getTagValue(rack1));
+ ASSERT_EQUALS("rack", tagConfig.getTagKey(rack2));
+ ASSERT_EQUALS("2", tagConfig.getTagValue(rack2));
+
+ ASSERT_EQUALS(rack1.getKeyIndex(), rack2.getKeyIndex());
+ ASSERT_NOT_EQUALS(rack1.getKeyIndex(), dcRI.getKeyIndex());
+ ASSERT_NOT_EQUALS(rack1.getValueIndex(), rack2.getValueIndex());
+
+ ASSERT_TRUE(rack1 == tagConfig.makeTag("rack", "1"));
+ ASSERT_TRUE(rack1 == tagConfig.findTag("rack", "1"));
+ ASSERT_FALSE(tagConfig.findTag("rack", "7").isValid());
+ ASSERT_FALSE(tagConfig.findTag("country", "us").isValid());
+}
+
+class ReplicaSetTagMatchTest : public unittest::Test {
+public:
+ void setUp() {
+ dcNY = tagConfig.makeTag("dc", "ny");
+ dcVA = tagConfig.makeTag("dc", "va");
+ dcRI = tagConfig.makeTag("dc", "ri");
+ rack1 = tagConfig.makeTag("rack", "1");
+ rack2 = tagConfig.makeTag("rack", "2");
+ rack3 = tagConfig.makeTag("rack", "3");
+ rack4 = tagConfig.makeTag("rack", "4");
}
- TEST(ReplicaSetTagConfigTest, MakeAndFindTags) {
- ReplicaSetTagConfig tagConfig;
- ReplicaSetTag dcNY = tagConfig.makeTag("dc", "ny");
- ReplicaSetTag dcRI = tagConfig.makeTag("dc", "ri");
- ReplicaSetTag rack1 = tagConfig.makeTag("rack", "1");
- ReplicaSetTag rack2 = tagConfig.makeTag("rack", "2");
- ASSERT_TRUE(dcNY.isValid());
- ASSERT_EQUALS("dc", tagConfig.getTagKey(dcNY));
- ASSERT_EQUALS("ny", tagConfig.getTagValue(dcNY));
- ASSERT_EQUALS("dc", tagConfig.getTagKey(dcRI));
- ASSERT_EQUALS("ri", tagConfig.getTagValue(dcRI));
- ASSERT_EQUALS("rack", tagConfig.getTagKey(rack1));
- ASSERT_EQUALS("1", tagConfig.getTagValue(rack1));
- ASSERT_EQUALS("rack", tagConfig.getTagKey(rack2));
- ASSERT_EQUALS("2", tagConfig.getTagValue(rack2));
-
- ASSERT_EQUALS(rack1.getKeyIndex(), rack2.getKeyIndex());
- ASSERT_NOT_EQUALS(rack1.getKeyIndex(), dcRI.getKeyIndex());
- ASSERT_NOT_EQUALS(rack1.getValueIndex(), rack2.getValueIndex());
-
- ASSERT_TRUE(rack1 == tagConfig.makeTag("rack", "1"));
- ASSERT_TRUE(rack1 == tagConfig.findTag("rack", "1"));
- ASSERT_FALSE(tagConfig.findTag("rack", "7").isValid());
- ASSERT_FALSE(tagConfig.findTag("country", "us").isValid());
- }
-
- class ReplicaSetTagMatchTest : public unittest::Test {
- public:
- void setUp() {
- dcNY = tagConfig.makeTag("dc", "ny");
- dcVA = tagConfig.makeTag("dc", "va");
- dcRI = tagConfig.makeTag("dc", "ri");
- rack1 = tagConfig.makeTag("rack", "1");
- rack2 = tagConfig.makeTag("rack", "2");
- rack3 = tagConfig.makeTag("rack", "3");
- rack4 = tagConfig.makeTag("rack", "4");
- }
-
- protected:
- ReplicaSetTagConfig tagConfig;
- ReplicaSetTag dcNY;
- ReplicaSetTag dcVA;
- ReplicaSetTag dcRI;
- ReplicaSetTag rack1;
- ReplicaSetTag rack2;
- ReplicaSetTag rack3;
- ReplicaSetTag rack4;
- };
-
- TEST_F(ReplicaSetTagMatchTest, EmptyPatternAlwaysSatisfied) {
- ReplicaSetTagPattern pattern = tagConfig.makePattern();
- ASSERT_TRUE(ReplicaSetTagMatch(pattern).isSatisfied());
- ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "dc", 0));
- ASSERT_TRUE(ReplicaSetTagMatch(pattern).isSatisfied());
- }
-
- TEST_F(ReplicaSetTagMatchTest, SingleTagConstraint) {
- ReplicaSetTagPattern pattern = tagConfig.makePattern();
- ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "dc", 2));
- ReplicaSetTagMatch matcher(pattern);
- ASSERT_FALSE(matcher.isSatisfied());
- ASSERT_FALSE(matcher.update(dcVA)); // One DC alone won't satisfy "dc: 2".
- ASSERT_FALSE(matcher.update(rack2)); // Adding one rack won't satisfy.
- ASSERT_FALSE(matcher.update(rack3)); // Two racks won't satisfy "dc: 2".
- ASSERT_FALSE(matcher.update(dcVA)); // Same tag twice won't satisfy.
- ASSERT_TRUE(matcher.update(dcRI)); // Two DCs satisfies.
- ASSERT_TRUE(matcher.isSatisfied());
- ASSERT_TRUE(matcher.update(dcNY)); // Three DCs satisfies.
- ASSERT_TRUE(matcher.update(rack1)); // Once matcher is satisfied, it stays satisfied.
- }
-
- TEST_F(ReplicaSetTagMatchTest, MaskingConstraints) {
- // The highest count constraint for a tag key is the only one that matters.
- ReplicaSetTagPattern pattern = tagConfig.makePattern();
- ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "rack", 2));
- ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "rack", 3));
- ReplicaSetTagMatch matcher(pattern);
- ASSERT_FALSE(matcher.isSatisfied());
- ASSERT_FALSE(matcher.update(rack2));
- ASSERT_FALSE(matcher.update(rack3));
- ASSERT_FALSE(matcher.update(rack2));
- ASSERT_TRUE(matcher.update(rack1));
- }
-
- TEST_F(ReplicaSetTagMatchTest, MultipleConstraints) {
- ReplicaSetTagPattern pattern = tagConfig.makePattern();
- ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "dc", 3));
- ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "rack", 2));
- ReplicaSetTagMatch matcher(pattern);
- ASSERT_FALSE(matcher.isSatisfied());
- ASSERT_FALSE(matcher.update(dcVA));
- ASSERT_FALSE(matcher.update(rack2));
- ASSERT_FALSE(matcher.update(rack3));
- ASSERT_FALSE(matcher.update(dcVA));
- ASSERT_FALSE(matcher.update(dcRI));
- ASSERT_TRUE(matcher.update(dcNY));
- ASSERT_TRUE(matcher.isSatisfied());
- }
+protected:
+ ReplicaSetTagConfig tagConfig;
+ ReplicaSetTag dcNY;
+ ReplicaSetTag dcVA;
+ ReplicaSetTag dcRI;
+ ReplicaSetTag rack1;
+ ReplicaSetTag rack2;
+ ReplicaSetTag rack3;
+ ReplicaSetTag rack4;
+};
+
+TEST_F(ReplicaSetTagMatchTest, EmptyPatternAlwaysSatisfied) {
+ ReplicaSetTagPattern pattern = tagConfig.makePattern();
+ ASSERT_TRUE(ReplicaSetTagMatch(pattern).isSatisfied());
+ ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "dc", 0));
+ ASSERT_TRUE(ReplicaSetTagMatch(pattern).isSatisfied());
+}
+
+TEST_F(ReplicaSetTagMatchTest, SingleTagConstraint) {
+ ReplicaSetTagPattern pattern = tagConfig.makePattern();
+ ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "dc", 2));
+ ReplicaSetTagMatch matcher(pattern);
+ ASSERT_FALSE(matcher.isSatisfied());
+ ASSERT_FALSE(matcher.update(dcVA)); // One DC alone won't satisfy "dc: 2".
+ ASSERT_FALSE(matcher.update(rack2)); // Adding one rack won't satisfy.
+ ASSERT_FALSE(matcher.update(rack3)); // Two racks won't satisfy "dc: 2".
+ ASSERT_FALSE(matcher.update(dcVA)); // Same tag twice won't satisfy.
+ ASSERT_TRUE(matcher.update(dcRI)); // Two DCs satisfies.
+ ASSERT_TRUE(matcher.isSatisfied());
+ ASSERT_TRUE(matcher.update(dcNY)); // Three DCs satisfies.
+ ASSERT_TRUE(matcher.update(rack1)); // Once matcher is satisfied, it stays satisfied.
+}
+
+TEST_F(ReplicaSetTagMatchTest, MaskingConstraints) {
+ // The highest count constraint for a tag key is the only one that matters.
+ ReplicaSetTagPattern pattern = tagConfig.makePattern();
+ ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "rack", 2));
+ ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "rack", 3));
+ ReplicaSetTagMatch matcher(pattern);
+ ASSERT_FALSE(matcher.isSatisfied());
+ ASSERT_FALSE(matcher.update(rack2));
+ ASSERT_FALSE(matcher.update(rack3));
+ ASSERT_FALSE(matcher.update(rack2));
+ ASSERT_TRUE(matcher.update(rack1));
+}
+
+TEST_F(ReplicaSetTagMatchTest, MultipleConstraints) {
+ ReplicaSetTagPattern pattern = tagConfig.makePattern();
+ ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "dc", 3));
+ ASSERT_OK(tagConfig.addTagCountConstraintToPattern(&pattern, "rack", 2));
+ ReplicaSetTagMatch matcher(pattern);
+ ASSERT_FALSE(matcher.isSatisfied());
+ ASSERT_FALSE(matcher.update(dcVA));
+ ASSERT_FALSE(matcher.update(rack2));
+ ASSERT_FALSE(matcher.update(rack3));
+ ASSERT_FALSE(matcher.update(dcVA));
+ ASSERT_FALSE(matcher.update(dcRI));
+ ASSERT_TRUE(matcher.update(dcNY));
+ ASSERT_TRUE(matcher.isSatisfied());
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/replication_coordinator.cpp b/src/mongo/db/repl/replication_coordinator.cpp
index a8b1eaa2fee..5b7d25f969a 100644
--- a/src/mongo/db/repl/replication_coordinator.cpp
+++ b/src/mongo/db/repl/replication_coordinator.cpp
@@ -33,11 +33,11 @@
namespace mongo {
namespace repl {
- ReplicationCoordinator::ReplicationCoordinator() {}
- ReplicationCoordinator::~ReplicationCoordinator() {}
+ReplicationCoordinator::ReplicationCoordinator() {}
+ReplicationCoordinator::~ReplicationCoordinator() {}
- // TODO(dannenberg) remove when master slave is removed
- const char *replAllDead = 0;
+// TODO(dannenberg) remove when master slave is removed
+const char* replAllDead = 0;
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index f9927f719dd..fe45fb2fd30 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -39,530 +39,518 @@
namespace mongo {
- class BSONObj;
- class BSONObjBuilder;
- class IndexDescriptor;
- class NamespaceString;
- class OperationContext;
- class OpTime;
- struct WriteConcernOptions;
+class BSONObj;
+class BSONObjBuilder;
+class IndexDescriptor;
+class NamespaceString;
+class OperationContext;
+class OpTime;
+struct WriteConcernOptions;
namespace repl {
- class BackgroundSync;
- class HandshakeArgs;
- class IsMasterResponse;
- class OplogReader;
- class ReplSetHeartbeatArgs;
- class ReplSetHeartbeatResponse;
- class ReplSetHtmlSummary;
- class UpdatePositionArgs;
+class BackgroundSync;
+class HandshakeArgs;
+class IsMasterResponse;
+class OplogReader;
+class ReplSetHeartbeatArgs;
+class ReplSetHeartbeatResponse;
+class ReplSetHtmlSummary;
+class UpdatePositionArgs;
+
+/**
+ * Global variable that contains a std::string telling why master/slave halted
+ *
+ * "dead" means something really bad happened like replication falling completely out of sync.
+ * when non-null, we are dead and the string is informational
+ *
+ * TODO(dannenberg) remove when master slave goes
+ */
+extern const char* replAllDead;
+
+/**
+ * The ReplicationCoordinator is responsible for coordinating the interaction of replication
+ * with the rest of the system. The public methods on ReplicationCoordinator are the public
+ * API that the replication subsystem presents to the rest of the codebase.
+ */
+class ReplicationCoordinator {
+ MONGO_DISALLOW_COPYING(ReplicationCoordinator);
+
+public:
+ typedef boost::posix_time::milliseconds Milliseconds;
+
+ struct StatusAndDuration {
+ public:
+ Status status;
+ Milliseconds duration;
+
+ StatusAndDuration(const Status& stat, Milliseconds ms) : status(stat), duration(ms) {}
+ };
+
+ virtual ~ReplicationCoordinator();
+
+ /**
+ * Does any initial bookkeeping needed to start replication, and instructs the other
+ * components of the replication system to start up whatever threads and do whatever
+ * initialization they need.
+ */
+ virtual void startReplication(OperationContext* txn) = 0;
+
+ /**
+ * Does whatever cleanup is required to stop replication, including instructing the other
+ * components of the replication system to shut down and stop any threads they are using,
+ * blocking until all replication-related shutdown tasks are complete.
+ */
+ virtual void shutdown() = 0;
/**
- * Global variable that contains a std::string telling why master/slave halted
+ * Returns a reference to the parsed command line arguments that are related to replication.
+ */
+ virtual const ReplSettings& getSettings() const = 0;
+
+ enum Mode { modeNone = 0, modeReplSet, modeMasterSlave };
+
+ /**
+ * Returns a value indicating whether this node was configured at start-up to run
+ * standalone, as part of a master-slave pair, or as a member of a replica set.
+ */
+ virtual Mode getReplicationMode() const = 0;
+
+ /**
+ * Returns true if this node is configured to be a member of a replica set or master/slave
+ * setup.
+ */
+ virtual bool isReplEnabled() const = 0;
+
+ /**
+ * Returns the current replica set state of this node (PRIMARY, SECONDARY, STARTUP, etc).
+ * It is invalid to call this unless getReplicationMode() == modeReplSet.
+ */
+ virtual MemberState getMemberState() const = 0;
+
+ /**
+ * Returns true if this node is in state PRIMARY or SECONDARY.
*
- * "dead" means something really bad happened like replication falling completely out of sync.
- * when non-null, we are dead and the string is informational
+ * It is invalid to call this unless getReplicationMode() == modeReplSet.
*
- * TODO(dannenberg) remove when master slave goes
+ * This method may be optimized to reduce synchronization overhead compared to
+ * reading the current member state with getMemberState().
*/
- extern const char *replAllDead;
-
+ virtual bool isInPrimaryOrSecondaryState() const = 0;
+
+
/**
- * The ReplicationCoordinator is responsible for coordinating the interaction of replication
- * with the rest of the system. The public methods on ReplicationCoordinator are the public
- * API that the replication subsystem presents to the rest of the codebase.
+ * Returns how slave delayed this node is configured to be.
+ *
+ * Raises a DBException if this node is not a member of the current replica set
+ * configuration.
*/
- class ReplicationCoordinator {
- MONGO_DISALLOW_COPYING(ReplicationCoordinator);
+ virtual Seconds getSlaveDelaySecs() const = 0;
- public:
+ /**
+ * Clears the list of sync sources we have blacklisted.
+ */
+ virtual void clearSyncSourceBlacklist() = 0;
+
+ /**
+ * Blocks the calling thread for up to writeConcern.wTimeout millis, or until "ts" has been
+ * replicated to at least a set of nodes that satisfies the writeConcern, whichever comes
+ * first. A writeConcern.wTimeout of 0 indicates no timeout (block forever) and a
+ * writeConcern.wTimeout of -1 indicates return immediately after checking. Return codes:
+ * ErrorCodes::ExceededTimeLimit if the writeConcern.wTimeout is reached before
+ * the data has been sufficiently replicated
+ * ErrorCodes::NotMaster if the node is not Primary/Master
+ * ErrorCodes::UnknownReplWriteConcern if the writeConcern.wMode contains a write concern
+ * mode that is not known
+ * ErrorCodes::ShutdownInProgress if we are mid-shutdown
+ * ErrorCodes::Interrupted if the operation was killed with killop()
+ */
+ virtual StatusAndDuration awaitReplication(const OperationContext* txn,
+ const OpTime& ts,
+ const WriteConcernOptions& writeConcern) = 0;
+
+ /**
+ * Like awaitReplication(), above, but waits for the replication of the last operation
+ * performed on the client associated with "txn".
+ */
+ virtual StatusAndDuration awaitReplicationOfLastOpForClient(
+ const OperationContext* txn, const WriteConcernOptions& writeConcern) = 0;
+
+ /**
+ * Causes this node to relinquish being primary for at least 'stepdownTime'. If 'force' is
+ * false, before doing so it will wait for 'waitTime' for one other node to be within 10
+ * seconds of this node's optime before stepping down. Returns a Status with the code
+ * ErrorCodes::ExceededTimeLimit if no secondary catches up within waitTime,
+ * ErrorCodes::NotMaster if you are no longer primary when trying to step down,
+ * ErrorCodes::SecondaryAheadOfPrimary if we are primary but there is another node that
+ * seems to be ahead of us in replication, and Status::OK otherwise.
+ */
+ virtual Status stepDown(OperationContext* txn,
+ bool force,
+ const Milliseconds& waitTime,
+ const Milliseconds& stepdownTime) = 0;
+
+ /**
+ * Returns true if the node can be considered master for the purpose of introspective
+ * commands such as isMaster() and rs.status().
+ */
+ virtual bool isMasterForReportingPurposes() = 0;
- typedef boost::posix_time::milliseconds Milliseconds;
-
- struct StatusAndDuration {
- public:
- Status status;
- Milliseconds duration;
-
- StatusAndDuration(const Status& stat, Milliseconds ms) : status(stat),
- duration(ms) {}
- };
-
- virtual ~ReplicationCoordinator();
-
- /**
- * Does any initial bookkeeping needed to start replication, and instructs the other
- * components of the replication system to start up whatever threads and do whatever
- * initialization they need.
- */
- virtual void startReplication(OperationContext* txn) = 0;
-
- /**
- * Does whatever cleanup is required to stop replication, including instructing the other
- * components of the replication system to shut down and stop any threads they are using,
- * blocking until all replication-related shutdown tasks are complete.
- */
- virtual void shutdown() = 0;
-
- /**
- * Returns a reference to the parsed command line arguments that are related to replication.
- */
- virtual const ReplSettings& getSettings() const = 0;
-
- enum Mode {
- modeNone = 0,
- modeReplSet,
- modeMasterSlave
- };
-
- /**
- * Returns a value indicating whether this node was configured at start-up to run
- * standalone, as part of a master-slave pair, or as a member of a replica set.
- */
- virtual Mode getReplicationMode() const = 0;
-
- /**
- * Returns true if this node is configured to be a member of a replica set or master/slave
- * setup.
- */
- virtual bool isReplEnabled() const = 0;
-
- /**
- * Returns the current replica set state of this node (PRIMARY, SECONDARY, STARTUP, etc).
- * It is invalid to call this unless getReplicationMode() == modeReplSet.
- */
- virtual MemberState getMemberState() const = 0;
-
- /**
- * Returns true if this node is in state PRIMARY or SECONDARY.
- *
- * It is invalid to call this unless getReplicationMode() == modeReplSet.
- *
- * This method may be optimized to reduce synchronization overhead compared to
- * reading the current member state with getMemberState().
- */
- virtual bool isInPrimaryOrSecondaryState() const = 0;
-
-
- /**
- * Returns how slave delayed this node is configured to be.
- *
- * Raises a DBException if this node is not a member of the current replica set
- * configuration.
- */
- virtual Seconds getSlaveDelaySecs() const = 0;
-
- /**
- * Clears the list of sync sources we have blacklisted.
- */
- virtual void clearSyncSourceBlacklist() = 0;
-
- /**
- * Blocks the calling thread for up to writeConcern.wTimeout millis, or until "ts" has been
- * replicated to at least a set of nodes that satisfies the writeConcern, whichever comes
- * first. A writeConcern.wTimeout of 0 indicates no timeout (block forever) and a
- * writeConcern.wTimeout of -1 indicates return immediately after checking. Return codes:
- * ErrorCodes::ExceededTimeLimit if the writeConcern.wTimeout is reached before
- * the data has been sufficiently replicated
- * ErrorCodes::NotMaster if the node is not Primary/Master
- * ErrorCodes::UnknownReplWriteConcern if the writeConcern.wMode contains a write concern
- * mode that is not known
- * ErrorCodes::ShutdownInProgress if we are mid-shutdown
- * ErrorCodes::Interrupted if the operation was killed with killop()
- */
- virtual StatusAndDuration awaitReplication(const OperationContext* txn,
- const OpTime& ts,
- const WriteConcernOptions& writeConcern) = 0;
-
- /**
- * Like awaitReplication(), above, but waits for the replication of the last operation
- * performed on the client associated with "txn".
- */
- virtual StatusAndDuration awaitReplicationOfLastOpForClient(
- const OperationContext* txn,
- const WriteConcernOptions& writeConcern) = 0;
-
- /**
- * Causes this node to relinquish being primary for at least 'stepdownTime'. If 'force' is
- * false, before doing so it will wait for 'waitTime' for one other node to be within 10
- * seconds of this node's optime before stepping down. Returns a Status with the code
- * ErrorCodes::ExceededTimeLimit if no secondary catches up within waitTime,
- * ErrorCodes::NotMaster if you are no longer primary when trying to step down,
- * ErrorCodes::SecondaryAheadOfPrimary if we are primary but there is another node that
- * seems to be ahead of us in replication, and Status::OK otherwise.
- */
- virtual Status stepDown(OperationContext* txn,
- bool force,
- const Milliseconds& waitTime,
- const Milliseconds& stepdownTime) = 0;
-
- /**
- * Returns true if the node can be considered master for the purpose of introspective
- * commands such as isMaster() and rs.status().
- */
- virtual bool isMasterForReportingPurposes() = 0;
-
- /**
- * Returns true if it is valid for this node to accept writes on the given database.
- * Currently this is true only if this node is Primary, master in master/slave,
- * a standalone, or is writing to the local database.
- *
- * If a node was started with the replSet argument, but has not yet received a config, it
- * will not be able to receive writes to a database other than local (it will not be treated
- * as standalone node).
- *
- * NOTE: This function can only be meaningfully called while the caller holds the global
- * lock in some mode other than MODE_NONE.
- */
- virtual bool canAcceptWritesForDatabase(const StringData& dbName) = 0;
-
- /**
- * Checks if the current replica set configuration can satisfy the given write concern.
- *
- * Things that are taken into consideration include:
- * 1. If the set has enough data-bearing members.
- * 2. If the write concern mode exists.
- * 3. If there are enough members for the write concern mode specified.
- */
- virtual Status checkIfWriteConcernCanBeSatisfied(
- const WriteConcernOptions& writeConcern) const = 0;
-
- /**
- * Returns Status::OK() if it is valid for this node to serve reads on the given collection
- * and an errorcode indicating why the node cannot if it cannot.
- */
- virtual Status checkCanServeReadsFor(OperationContext* txn,
- const NamespaceString& ns,
- bool slaveOk) = 0;
-
- /**
- * Returns true if this node should ignore unique index constraints on new documents.
- * Currently this is needed for nodes in STARTUP2, RECOVERING, and ROLLBACK states.
- */
- virtual bool shouldIgnoreUniqueIndex(const IndexDescriptor* idx) = 0;
-
- /**
- * Updates our internal tracking of the last OpTime applied for the given slave
- * identified by "rid". Only valid to call in master/slave mode
- */
- virtual Status setLastOptimeForSlave(const OID& rid, const OpTime& ts) = 0;
-
- /**
- * Updates our internal tracking of the last OpTime applied to this node.
- *
- * The new value of "ts" must be no less than any prior value passed to this method, and it
- * is the caller's job to properly synchronize this behavior. The exception to this rule is
- * that after calls to resetLastOpTimeFromOplog(), the minimum acceptable value for "ts" is
- * reset based on the contents of the oplog, and may go backwards due to rollback.
- */
- virtual void setMyLastOptime(const OpTime& ts) = 0;
-
- /**
- * Same as above, but used during places we need to zero our last optime.
- */
- virtual void resetMyLastOptime() = 0;
-
- /**
- * Updates our the message we include in heartbeat responses.
- */
- virtual void setMyHeartbeatMessage(const std::string& msg) = 0;
-
- /**
- * Returns the last optime recorded by setMyLastOptime.
- */
- virtual OpTime getMyLastOptime() const = 0;
-
- /**
- * Retrieves and returns the current election id, which is a unique id that is local to
- * this node and changes every time we become primary.
- * TODO(spencer): Use term instead.
- */
- virtual OID getElectionId() = 0;
-
- /**
- * Returns the RID for this node. The RID is used to identify this node to our sync source
- * when sending updates about our replication progress.
- */
- virtual OID getMyRID() const = 0;
-
- /**
- * Returns the id for this node as specified in the current replica set configuration.
- */
- virtual int getMyId() const = 0;
-
- /**
- * Sets this node into a specific follower mode.
- *
- * Returns true if the follower mode was successfully set. Returns false if the
- * node is or becomes a leader before setFollowerMode completes.
- *
- * Follower modes are RS_STARTUP2 (initial sync), RS_SECONDARY, RS_ROLLBACK and
- * RS_RECOVERING. They are the valid states of a node whose topology coordinator has the
- * follower role.
- *
- * This is essentially an interface that allows the applier to prevent the node from
- * becoming a candidate or accepting reads, depending on circumstances in the oplog
- * application process.
- */
- virtual bool setFollowerMode(const MemberState& newState) = 0;
-
- /**
- * Returns true if the coordinator wants the applier to pause application.
- *
- * If this returns true, the applier should call signalDrainComplete() when it has
- * completed draining its operation buffer and no further ops are being applied.
- */
- virtual bool isWaitingForApplierToDrain() = 0;
-
- /**
- * Signals that a previously requested pause and drain of the applier buffer
- * has completed.
- *
- * This is an interface that allows the applier to reenable writes after
- * a successful election triggers the draining of the applier buffer.
- */
- virtual void signalDrainComplete(OperationContext* txn) = 0;
-
- /**
- * Signals the sync source feedback thread to wake up and send a handshake and
- * replSetUpdatePosition command to our sync source.
- */
- virtual void signalUpstreamUpdater() = 0;
-
- /**
- * Prepares a BSONObj describing an invocation of the replSetUpdatePosition command that can
- * be sent to this node's sync source to update it about our progress in replication.
- *
- * The returned bool indicates whether or not the command was created.
- */
- virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) = 0;
-
- /**
- * For ourself and each secondary chaining off of us, adds a BSONObj to "handshakes"
- * describing an invocation of the replSetUpdateCommand that can be sent to this node's
- * sync source to handshake us and our chained secondaries, informing the sync source that
- * we are replicating off of it.
- */
- virtual void prepareReplSetUpdatePositionCommandHandshakes(
- std::vector<BSONObj>* handshakes) = 0;
-
- /**
- * Handles an incoming replSetGetStatus command. Adds BSON to 'result'.
- */
- virtual Status processReplSetGetStatus(BSONObjBuilder* result) = 0;
-
- /**
- * Handles an incoming isMaster command for a replica set node. Should not be
- * called on a master-slave or standalone node.
- */
- virtual void fillIsMasterForReplSet(IsMasterResponse* result) = 0;
-
- /**
- * Adds to "result" a description of the slaveInfo data structure used to map RIDs to their
- * last known optimes.
- */
- virtual void appendSlaveInfoData(BSONObjBuilder* result) = 0;
-
- /**
- * Handles an incoming replSetGetConfig command. Adds BSON to 'result'.
- */
- virtual void processReplSetGetConfig(BSONObjBuilder* result) = 0;
-
- /**
- * Toggles maintenanceMode to the value expressed by 'activate'
- * return Status::OK if the change worked, NotSecondary if it failed because we are
- * PRIMARY, and OperationFailed if we are not currently in maintenance mode
- */
- virtual Status setMaintenanceMode(bool activate) = 0;
-
- /**
- * Retrieves the current count of maintenanceMode and returns 'true' if greater than 0.
- */
- virtual bool getMaintenanceMode() = 0;
-
- /**
- * Handles an incoming replSetSyncFrom command. Adds BSON to 'result'
- * returns Status::OK if the sync target could be set and an ErrorCode indicating why it
- * couldn't otherwise.
- */
- virtual Status processReplSetSyncFrom(const HostAndPort& target,
- BSONObjBuilder* resultObj) = 0;
-
- /**
- * Handles an incoming replSetFreeze command. Adds BSON to 'resultObj'
- * returns Status::OK() if the node is a member of a replica set with a config and an
- * error Status otherwise
- */
- virtual Status processReplSetFreeze(int secs, BSONObjBuilder* resultObj) = 0;
-
- /**
- * Handles an incoming heartbeat command with arguments 'args'. Populates 'response';
- * returns a Status with either OK or an error message.
- */
- virtual Status processHeartbeat(const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response) = 0;
-
- /**
- * Arguments for the replSetReconfig command.
- */
- struct ReplSetReconfigArgs {
- BSONObj newConfigObj;
- bool force;
- };
-
- /**
- * Handles an incoming replSetReconfig command. Adds BSON to 'resultObj';
- * returns a Status with either OK or an error message.
- */
- virtual Status processReplSetReconfig(OperationContext* txn,
- const ReplSetReconfigArgs& args,
- BSONObjBuilder* resultObj) = 0;
-
- /*
- * Handles an incoming replSetInitiate command. If "configObj" is empty, generates a default
- * configuration to use.
- * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
- */
- virtual Status processReplSetInitiate(OperationContext* txn,
- const BSONObj& configObj,
- BSONObjBuilder* resultObj) = 0;
-
- /*
- * Handles an incoming replSetGetRBID command.
- * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
- */
- virtual Status processReplSetGetRBID(BSONObjBuilder* resultObj) = 0;
-
- /**
- * Increments this process's rollback id. Called every time a rollback occurs.
- */
- virtual void incrementRollbackID() = 0;
-
- /**
- * Arguments to the replSetFresh command.
- */
- struct ReplSetFreshArgs {
- StringData setName; // Name of the replset
- HostAndPort who; // host and port of the member that sent the replSetFresh command
- unsigned id; // replSet id of the member that sent the replSetFresh command
- int cfgver; // replSet config version that the member who sent the command thinks it has
- OpTime opTime; // last optime seen by the member who sent the replSetFresh command
- };
-
- /*
- * Handles an incoming replSetFresh command.
- * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
- */
- virtual Status processReplSetFresh(const ReplSetFreshArgs& args,
- BSONObjBuilder* resultObj) = 0;
-
- /**
- * Arguments to the replSetElect command.
- */
- struct ReplSetElectArgs {
- StringData set; // Name of the replset
- int whoid; // replSet id of the member that sent the replSetFresh command
- int cfgver; // replSet config version that the member who sent the command thinks it has
- OID round; // unique ID for this election
- };
-
- /*
- * Handles an incoming replSetElect command.
- * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
- */
- virtual Status processReplSetElect(const ReplSetElectArgs& args,
- BSONObjBuilder* resultObj) = 0;
-
- /**
- * Handles an incoming replSetUpdatePosition command, updating each node's oplog progress.
- * Returns Status::OK() if all updates are processed correctly, NodeNotFound
- * if any updating node cannot be found in the config, InvalidReplicaSetConfig if the
- * "cfgver" sent in any of the updates doesn't match our config version, or
- * NotMasterOrSecondaryCode if we are in state REMOVED or otherwise don't have a valid
- * replica set config.
- * If a non-OK status is returned, it is unspecified whether none or some of the updates
- * were applied.
- */
- virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates) = 0;
-
- /**
- * Handles an incoming Handshake command (or a handshake from replSetUpdatePosition).
- * Associates the node's 'remoteID' with its 'handshake' object. This association is used
- * to update local.slaves and to forward the node's replication progress upstream when this
- * node is being chained through.
- *
- * Returns ErrorCodes::NodeNotFound if no replica set member exists with the given member ID
- * and ErrorCodes::NotMasterOrSecondaryCode if we're in state REMOVED or otherwise don't
- * have a valid config.
- */
- virtual Status processHandshake(OperationContext* txn, const HandshakeArgs& handshake) = 0;
-
- /**
- * Returns a bool indicating whether or not this node builds indexes.
- */
- virtual bool buildsIndexes() = 0;
-
- /**
- * Returns a vector of members that have applied the operation with OpTime 'op'.
- */
- virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op) = 0;
-
- /**
- * Returns a vector of the members other than ourself in the replica set, as specified in
- * the replica set config. Invalid to call if we are not in replica set mode. Returns
- * an empty vector if we do not have a valid config.
- */
- virtual std::vector<HostAndPort> getOtherNodesInReplSet() const = 0;
-
- /**
- * Returns a BSONObj containing a representation of the current default write concern.
- */
- virtual WriteConcernOptions getGetLastErrorDefault() = 0;
-
- /**
- * Checks that the --replSet flag was passed when starting up the node and that the node
- * has a valid replica set config.
- *
- * Returns a Status indicating whether those conditions are met with errorcode
- * NoReplicationEnabled if --replSet was not present during start up or with errorcode
- * NotYetInitialized in the absence of a valid config. Also adds error info to "result".
- */
- virtual Status checkReplEnabledForCommand(BSONObjBuilder* result) = 0;
-
- /**
- * Chooses a viable sync source, or, if none available, returns empty HostAndPort.
- */
- virtual HostAndPort chooseNewSyncSource(const OpTime& lastOpTimeFetched) = 0;
-
- /**
- * Blacklists choosing 'host' as a sync source until time 'until'.
- */
- virtual void blacklistSyncSource(const HostAndPort& host, Date_t until) = 0;
-
- /**
- * Loads the optime from the last op in the oplog into the coordinator's lastOpApplied
- * value.
- */
- virtual void resetLastOpTimeFromOplog(OperationContext* txn) = 0;
-
- /**
- * Determines if a new sync source should be considered.
- * currentSource: the current sync source
- */
- virtual bool shouldChangeSyncSource(const HostAndPort& currentSource) = 0;
-
- /**
- * Writes into 'output' all the information needed to generate a summary of the current
- * replication state for use by the web interface.
- */
- virtual void summarizeAsHtml(ReplSetHtmlSummary* output) = 0;
-
- protected:
-
- ReplicationCoordinator();
+ /**
+ * Returns true if it is valid for this node to accept writes on the given database.
+ * Currently this is true only if this node is Primary, master in master/slave,
+ * a standalone, or is writing to the local database.
+ *
+ * If a node was started with the replSet argument, but has not yet received a config, it
+ * will not be able to receive writes to a database other than local (it will not be treated
+ * as standalone node).
+ *
+ * NOTE: This function can only be meaningfully called while the caller holds the global
+ * lock in some mode other than MODE_NONE.
+ */
+ virtual bool canAcceptWritesForDatabase(const StringData& dbName) = 0;
+
+ /**
+ * Checks if the current replica set configuration can satisfy the given write concern.
+ *
+ * Things that are taken into consideration include:
+ * 1. If the set has enough data-bearing members.
+ * 2. If the write concern mode exists.
+ * 3. If there are enough members for the write concern mode specified.
+ */
+ virtual Status checkIfWriteConcernCanBeSatisfied(
+ const WriteConcernOptions& writeConcern) const = 0;
+
+ /**
+ * Returns Status::OK() if it is valid for this node to serve reads on the given collection
+ * and an errorcode indicating why the node cannot if it cannot.
+ */
+ virtual Status checkCanServeReadsFor(OperationContext* txn,
+ const NamespaceString& ns,
+ bool slaveOk) = 0;
+
+ /**
+ * Returns true if this node should ignore unique index constraints on new documents.
+ * Currently this is needed for nodes in STARTUP2, RECOVERING, and ROLLBACK states.
+ */
+ virtual bool shouldIgnoreUniqueIndex(const IndexDescriptor* idx) = 0;
+
+ /**
+ * Updates our internal tracking of the last OpTime applied for the given slave
+ * identified by "rid". Only valid to call in master/slave mode
+ */
+ virtual Status setLastOptimeForSlave(const OID& rid, const OpTime& ts) = 0;
+ /**
+ * Updates our internal tracking of the last OpTime applied to this node.
+ *
+ * The new value of "ts" must be no less than any prior value passed to this method, and it
+ * is the caller's job to properly synchronize this behavior. The exception to this rule is
+ * that after calls to resetLastOpTimeFromOplog(), the minimum acceptable value for "ts" is
+ * reset based on the contents of the oplog, and may go backwards due to rollback.
+ */
+ virtual void setMyLastOptime(const OpTime& ts) = 0;
+
+ /**
+ * Same as above, but used during places we need to zero our last optime.
+ */
+ virtual void resetMyLastOptime() = 0;
+
+ /**
+ * Updates our the message we include in heartbeat responses.
+ */
+ virtual void setMyHeartbeatMessage(const std::string& msg) = 0;
+
+ /**
+ * Returns the last optime recorded by setMyLastOptime.
+ */
+ virtual OpTime getMyLastOptime() const = 0;
+
+ /**
+ * Retrieves and returns the current election id, which is a unique id that is local to
+ * this node and changes every time we become primary.
+ * TODO(spencer): Use term instead.
+ */
+ virtual OID getElectionId() = 0;
+
+ /**
+ * Returns the RID for this node. The RID is used to identify this node to our sync source
+ * when sending updates about our replication progress.
+ */
+ virtual OID getMyRID() const = 0;
+
+ /**
+ * Returns the id for this node as specified in the current replica set configuration.
+ */
+ virtual int getMyId() const = 0;
+
+ /**
+ * Sets this node into a specific follower mode.
+ *
+ * Returns true if the follower mode was successfully set. Returns false if the
+ * node is or becomes a leader before setFollowerMode completes.
+ *
+ * Follower modes are RS_STARTUP2 (initial sync), RS_SECONDARY, RS_ROLLBACK and
+ * RS_RECOVERING. They are the valid states of a node whose topology coordinator has the
+ * follower role.
+ *
+ * This is essentially an interface that allows the applier to prevent the node from
+ * becoming a candidate or accepting reads, depending on circumstances in the oplog
+ * application process.
+ */
+ virtual bool setFollowerMode(const MemberState& newState) = 0;
+
+ /**
+ * Returns true if the coordinator wants the applier to pause application.
+ *
+ * If this returns true, the applier should call signalDrainComplete() when it has
+ * completed draining its operation buffer and no further ops are being applied.
+ */
+ virtual bool isWaitingForApplierToDrain() = 0;
+
+ /**
+ * Signals that a previously requested pause and drain of the applier buffer
+ * has completed.
+ *
+ * This is an interface that allows the applier to reenable writes after
+ * a successful election triggers the draining of the applier buffer.
+ */
+ virtual void signalDrainComplete(OperationContext* txn) = 0;
+
+ /**
+ * Signals the sync source feedback thread to wake up and send a handshake and
+ * replSetUpdatePosition command to our sync source.
+ */
+ virtual void signalUpstreamUpdater() = 0;
+
+ /**
+ * Prepares a BSONObj describing an invocation of the replSetUpdatePosition command that can
+ * be sent to this node's sync source to update it about our progress in replication.
+ *
+ * The returned bool indicates whether or not the command was created.
+ */
+ virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) = 0;
+
+ /**
+ * For ourself and each secondary chaining off of us, adds a BSONObj to "handshakes"
+ * describing an invocation of the replSetUpdateCommand that can be sent to this node's
+ * sync source to handshake us and our chained secondaries, informing the sync source that
+ * we are replicating off of it.
+ */
+ virtual void prepareReplSetUpdatePositionCommandHandshakes(
+ std::vector<BSONObj>* handshakes) = 0;
+
+ /**
+ * Handles an incoming replSetGetStatus command. Adds BSON to 'result'.
+ */
+ virtual Status processReplSetGetStatus(BSONObjBuilder* result) = 0;
+
+ /**
+ * Handles an incoming isMaster command for a replica set node. Should not be
+ * called on a master-slave or standalone node.
+ */
+ virtual void fillIsMasterForReplSet(IsMasterResponse* result) = 0;
+
+ /**
+ * Adds to "result" a description of the slaveInfo data structure used to map RIDs to their
+ * last known optimes.
+ */
+ virtual void appendSlaveInfoData(BSONObjBuilder* result) = 0;
+
+ /**
+ * Handles an incoming replSetGetConfig command. Adds BSON to 'result'.
+ */
+ virtual void processReplSetGetConfig(BSONObjBuilder* result) = 0;
+
+ /**
+ * Toggles maintenanceMode to the value expressed by 'activate'
+ * return Status::OK if the change worked, NotSecondary if it failed because we are
+ * PRIMARY, and OperationFailed if we are not currently in maintenance mode
+ */
+ virtual Status setMaintenanceMode(bool activate) = 0;
+
+ /**
+ * Retrieves the current count of maintenanceMode and returns 'true' if greater than 0.
+ */
+ virtual bool getMaintenanceMode() = 0;
+
+ /**
+ * Handles an incoming replSetSyncFrom command. Adds BSON to 'result'
+ * returns Status::OK if the sync target could be set and an ErrorCode indicating why it
+ * couldn't otherwise.
+ */
+ virtual Status processReplSetSyncFrom(const HostAndPort& target, BSONObjBuilder* resultObj) = 0;
+
+ /**
+ * Handles an incoming replSetFreeze command. Adds BSON to 'resultObj'
+ * returns Status::OK() if the node is a member of a replica set with a config and an
+ * error Status otherwise
+ */
+ virtual Status processReplSetFreeze(int secs, BSONObjBuilder* resultObj) = 0;
+
+ /**
+ * Handles an incoming heartbeat command with arguments 'args'. Populates 'response';
+ * returns a Status with either OK or an error message.
+ */
+ virtual Status processHeartbeat(const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response) = 0;
+
+ /**
+ * Arguments for the replSetReconfig command.
+ */
+ struct ReplSetReconfigArgs {
+ BSONObj newConfigObj;
+ bool force;
};
-} // namespace repl
-} // namespace mongo
+ /**
+ * Handles an incoming replSetReconfig command. Adds BSON to 'resultObj';
+ * returns a Status with either OK or an error message.
+ */
+ virtual Status processReplSetReconfig(OperationContext* txn,
+ const ReplSetReconfigArgs& args,
+ BSONObjBuilder* resultObj) = 0;
+
+ /*
+ * Handles an incoming replSetInitiate command. If "configObj" is empty, generates a default
+ * configuration to use.
+ * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
+ */
+ virtual Status processReplSetInitiate(OperationContext* txn,
+ const BSONObj& configObj,
+ BSONObjBuilder* resultObj) = 0;
+
+ /*
+ * Handles an incoming replSetGetRBID command.
+ * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
+ */
+ virtual Status processReplSetGetRBID(BSONObjBuilder* resultObj) = 0;
+
+ /**
+ * Increments this process's rollback id. Called every time a rollback occurs.
+ */
+ virtual void incrementRollbackID() = 0;
+
+ /**
+ * Arguments to the replSetFresh command.
+ */
+ struct ReplSetFreshArgs {
+ StringData setName; // Name of the replset
+ HostAndPort who; // host and port of the member that sent the replSetFresh command
+ unsigned id; // replSet id of the member that sent the replSetFresh command
+ int cfgver; // replSet config version that the member who sent the command thinks it has
+ OpTime opTime; // last optime seen by the member who sent the replSetFresh command
+ };
+
+ /*
+ * Handles an incoming replSetFresh command.
+ * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
+ */
+ virtual Status processReplSetFresh(const ReplSetFreshArgs& args, BSONObjBuilder* resultObj) = 0;
+
+ /**
+ * Arguments to the replSetElect command.
+ */
+ struct ReplSetElectArgs {
+ StringData set; // Name of the replset
+ int whoid; // replSet id of the member that sent the replSetFresh command
+ int cfgver; // replSet config version that the member who sent the command thinks it has
+ OID round; // unique ID for this election
+ };
+
+ /*
+ * Handles an incoming replSetElect command.
+ * Adds BSON to 'resultObj'; returns a Status with either OK or an error message.
+ */
+ virtual Status processReplSetElect(const ReplSetElectArgs& args, BSONObjBuilder* resultObj) = 0;
+
+ /**
+ * Handles an incoming replSetUpdatePosition command, updating each node's oplog progress.
+ * Returns Status::OK() if all updates are processed correctly, NodeNotFound
+ * if any updating node cannot be found in the config, InvalidReplicaSetConfig if the
+ * "cfgver" sent in any of the updates doesn't match our config version, or
+ * NotMasterOrSecondaryCode if we are in state REMOVED or otherwise don't have a valid
+ * replica set config.
+ * If a non-OK status is returned, it is unspecified whether none or some of the updates
+ * were applied.
+ */
+ virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates) = 0;
+
+ /**
+ * Handles an incoming Handshake command (or a handshake from replSetUpdatePosition).
+ * Associates the node's 'remoteID' with its 'handshake' object. This association is used
+ * to update local.slaves and to forward the node's replication progress upstream when this
+ * node is being chained through.
+ *
+ * Returns ErrorCodes::NodeNotFound if no replica set member exists with the given member ID
+ * and ErrorCodes::NotMasterOrSecondaryCode if we're in state REMOVED or otherwise don't
+ * have a valid config.
+ */
+ virtual Status processHandshake(OperationContext* txn, const HandshakeArgs& handshake) = 0;
+
+ /**
+ * Returns a bool indicating whether or not this node builds indexes.
+ */
+ virtual bool buildsIndexes() = 0;
+
+ /**
+ * Returns a vector of members that have applied the operation with OpTime 'op'.
+ */
+ virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op) = 0;
+
+ /**
+ * Returns a vector of the members other than ourself in the replica set, as specified in
+ * the replica set config. Invalid to call if we are not in replica set mode. Returns
+ * an empty vector if we do not have a valid config.
+ */
+ virtual std::vector<HostAndPort> getOtherNodesInReplSet() const = 0;
+
+ /**
+ * Returns a BSONObj containing a representation of the current default write concern.
+ */
+ virtual WriteConcernOptions getGetLastErrorDefault() = 0;
+
+ /**
+ * Checks that the --replSet flag was passed when starting up the node and that the node
+ * has a valid replica set config.
+ *
+ * Returns a Status indicating whether those conditions are met with errorcode
+ * NoReplicationEnabled if --replSet was not present during start up or with errorcode
+ * NotYetInitialized in the absence of a valid config. Also adds error info to "result".
+ */
+ virtual Status checkReplEnabledForCommand(BSONObjBuilder* result) = 0;
+
+ /**
+ * Chooses a viable sync source, or, if none available, returns empty HostAndPort.
+ */
+ virtual HostAndPort chooseNewSyncSource(const OpTime& lastOpTimeFetched) = 0;
+
+ /**
+ * Blacklists choosing 'host' as a sync source until time 'until'.
+ */
+ virtual void blacklistSyncSource(const HostAndPort& host, Date_t until) = 0;
+
+ /**
+ * Loads the optime from the last op in the oplog into the coordinator's lastOpApplied
+ * value.
+ */
+ virtual void resetLastOpTimeFromOplog(OperationContext* txn) = 0;
+
+ /**
+ * Determines if a new sync source should be considered.
+ * currentSource: the current sync source
+ */
+ virtual bool shouldChangeSyncSource(const HostAndPort& currentSource) = 0;
+
+ /**
+ * Writes into 'output' all the information needed to generate a summary of the current
+ * replication state for use by the web interface.
+ */
+ virtual void summarizeAsHtml(ReplSetHtmlSummary* output) = 0;
+
+protected:
+ ReplicationCoordinator();
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_external_state.cpp b/src/mongo/db/repl/replication_coordinator_external_state.cpp
index 68403755b07..fbeddfba68a 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state.cpp
@@ -33,8 +33,8 @@
namespace mongo {
namespace repl {
- ReplicationCoordinatorExternalState::ReplicationCoordinatorExternalState() {}
- ReplicationCoordinatorExternalState::~ReplicationCoordinatorExternalState() {}
+ReplicationCoordinatorExternalState::ReplicationCoordinatorExternalState() {}
+ReplicationCoordinatorExternalState::~ReplicationCoordinatorExternalState() {}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_external_state.h b/src/mongo/db/repl/replication_coordinator_external_state.h
index bb44acf5fbe..4d654dfc148 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state.h
@@ -36,144 +36,145 @@
namespace mongo {
- class BSONObj;
- class OID;
- class OperationContext;
- class Status;
- struct HostAndPort;
- template <typename T> class StatusWith;
+class BSONObj;
+class OID;
+class OperationContext;
+class Status;
+struct HostAndPort;
+template <typename T>
+class StatusWith;
namespace repl {
+/**
+ * This class represents the interface the ReplicationCoordinator uses to interact with the
+ * rest of the system. All functionality of the ReplicationCoordinatorImpl that would introduce
+ * dependencies on large sections of the server code and thus break the unit testability of
+ * ReplicationCoordinatorImpl should be moved here.
+ */
+class ReplicationCoordinatorExternalState {
+ MONGO_DISALLOW_COPYING(ReplicationCoordinatorExternalState);
+
+public:
+ ReplicationCoordinatorExternalState();
+ virtual ~ReplicationCoordinatorExternalState();
+
+ /**
+ * Starts the background sync, producer, and sync source feedback threads
+ *
+ * NOTE: Only starts threads if they are not already started,
+ */
+ virtual void startThreads() = 0;
+
+ /**
+ * Starts the Master/Slave threads and sets up logOp
+ */
+ virtual void startMasterSlave(OperationContext* txn) = 0;
+
+ /**
+ * Performs any necessary external state specific shutdown tasks, such as cleaning up
+ * the threads it started.
+ */
+ virtual void shutdown() = 0;
+
+ /**
+ * Creates the oplog and writes the first entry.
+ */
+ virtual void initiateOplog(OperationContext* txn) = 0;
+
+ /**
+ * Simple wrapper around SyncSourceFeedback::forwardSlaveHandshake. Signals to the
+ * SyncSourceFeedback thread that it needs to wake up and send a replication handshake
+ * upstream.
+ */
+ virtual void forwardSlaveHandshake() = 0;
+
+ /**
+ * Simple wrapper around SyncSourceFeedback::forwardSlaveProgress. Signals to the
+ * SyncSourceFeedback thread that it needs to wake up and send a replSetUpdatePosition
+ * command upstream.
+ */
+ virtual void forwardSlaveProgress() = 0;
+
+ /**
+ * Queries the singleton document in local.me. If it exists and our hostname has not
+ * changed since we wrote, returns the RID stored in the object. If the document does not
+ * exist or our hostname doesn't match what was recorded in local.me, generates a new OID
+ * to use as our RID, stores it in local.me, and returns it.
+ */
+ virtual OID ensureMe(OperationContext*) = 0;
+
+ /**
+ * Returns true if "host" is one of the network identities of this node.
+ */
+ virtual bool isSelf(const HostAndPort& host) = 0;
+
+ /**
+ * Gets the replica set config document from local storage, or returns an error.
+ */
+ virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* txn) = 0;
+
+ /**
+ * Stores the replica set config document in local storage, or returns an error.
+ */
+ virtual Status storeLocalConfigDocument(OperationContext* txn, const BSONObj& config) = 0;
+
+ /**
+ * Sets the global opTime to be 'newTime'.
+ */
+ virtual void setGlobalOpTime(const OpTime& newTime) = 0;
+
+ /**
+ * Gets the last optime of an operation performed on this host, from stable
+ * storage.
+ */
+ virtual StatusWith<OpTime> loadLastOpTime(OperationContext* txn) = 0;
+
+ /**
+ * Returns the HostAndPort of the remote client connected to us that initiated the operation
+ * represented by "txn".
+ */
+ virtual HostAndPort getClientHostAndPort(const OperationContext* txn) = 0;
+
+ /**
+ * Closes all connections except those marked with the keepOpen property, which should
+ * just be connections used for heartbeating.
+ * This is used during stepdown, and transition out of primary.
+ */
+ virtual void closeConnections() = 0;
+
+ /**
+ * Kills all operations that have a Client that is associated with an incoming user
+ * connection. Used during stepdown.
+ */
+ virtual void killAllUserOperations(OperationContext* txn) = 0;
+
/**
- * This class represents the interface the ReplicationCoordinator uses to interact with the
- * rest of the system. All functionality of the ReplicationCoordinatorImpl that would introduce
- * dependencies on large sections of the server code and thus break the unit testability of
- * ReplicationCoordinatorImpl should be moved here.
- */
- class ReplicationCoordinatorExternalState {
- MONGO_DISALLOW_COPYING(ReplicationCoordinatorExternalState);
- public:
-
- ReplicationCoordinatorExternalState();
- virtual ~ReplicationCoordinatorExternalState();
-
- /**
- * Starts the background sync, producer, and sync source feedback threads
- *
- * NOTE: Only starts threads if they are not already started,
- */
- virtual void startThreads() = 0;
-
- /**
- * Starts the Master/Slave threads and sets up logOp
- */
- virtual void startMasterSlave(OperationContext* txn) = 0;
-
- /**
- * Performs any necessary external state specific shutdown tasks, such as cleaning up
- * the threads it started.
- */
- virtual void shutdown() = 0;
-
- /**
- * Creates the oplog and writes the first entry.
- */
- virtual void initiateOplog(OperationContext* txn) = 0;
-
- /**
- * Simple wrapper around SyncSourceFeedback::forwardSlaveHandshake. Signals to the
- * SyncSourceFeedback thread that it needs to wake up and send a replication handshake
- * upstream.
- */
- virtual void forwardSlaveHandshake() = 0;
-
- /**
- * Simple wrapper around SyncSourceFeedback::forwardSlaveProgress. Signals to the
- * SyncSourceFeedback thread that it needs to wake up and send a replSetUpdatePosition
- * command upstream.
- */
- virtual void forwardSlaveProgress() = 0;
-
- /**
- * Queries the singleton document in local.me. If it exists and our hostname has not
- * changed since we wrote, returns the RID stored in the object. If the document does not
- * exist or our hostname doesn't match what was recorded in local.me, generates a new OID
- * to use as our RID, stores it in local.me, and returns it.
- */
- virtual OID ensureMe(OperationContext*) = 0;
-
- /**
- * Returns true if "host" is one of the network identities of this node.
- */
- virtual bool isSelf(const HostAndPort& host) = 0;
-
- /**
- * Gets the replica set config document from local storage, or returns an error.
- */
- virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* txn) = 0;
-
- /**
- * Stores the replica set config document in local storage, or returns an error.
- */
- virtual Status storeLocalConfigDocument(OperationContext* txn, const BSONObj& config) = 0;
-
- /**
- * Sets the global opTime to be 'newTime'.
- */
- virtual void setGlobalOpTime(const OpTime& newTime) = 0;
-
- /**
- * Gets the last optime of an operation performed on this host, from stable
- * storage.
- */
- virtual StatusWith<OpTime> loadLastOpTime(OperationContext* txn) = 0;
-
- /**
- * Returns the HostAndPort of the remote client connected to us that initiated the operation
- * represented by "txn".
- */
- virtual HostAndPort getClientHostAndPort(const OperationContext* txn) = 0;
-
- /**
- * Closes all connections except those marked with the keepOpen property, which should
- * just be connections used for heartbeating.
- * This is used during stepdown, and transition out of primary.
- */
- virtual void closeConnections() = 0;
-
- /**
- * Kills all operations that have a Client that is associated with an incoming user
- * connection. Used during stepdown.
- */
- virtual void killAllUserOperations(OperationContext* txn) = 0;
-
- /**
- * Clears all cached sharding metadata on this server. This is called after stepDown to
- * ensure that if the node becomes primary again in the future it will reload an up-to-date
- * version of the sharding data.
- */
- virtual void clearShardingState() = 0;
-
- /**
- * Notifies the bgsync and syncSourceFeedback threads to choose a new sync source.
- */
- virtual void signalApplierToChooseNewSyncSource() = 0;
-
- /**
- * Returns an OperationContext, owned by the caller, that may be used in methods of
- * the same instance that require an OperationContext.
- */
- virtual OperationContext* createOperationContext(const std::string& threadName) = 0;
-
- /**
- * Drops all temporary collections on all databases except "local".
- *
- * The implementation may assume that the caller has acquired the global exclusive lock
- * for "txn".
- */
- virtual void dropAllTempCollections(OperationContext* txn) = 0;
- };
-
-} // namespace repl
-} // namespace mongo
+ * Clears all cached sharding metadata on this server. This is called after stepDown to
+ * ensure that if the node becomes primary again in the future it will reload an up-to-date
+ * version of the sharding data.
+ */
+ virtual void clearShardingState() = 0;
+
+ /**
+ * Notifies the bgsync and syncSourceFeedback threads to choose a new sync source.
+ */
+ virtual void signalApplierToChooseNewSyncSource() = 0;
+
+ /**
+ * Returns an OperationContext, owned by the caller, that may be used in methods of
+ * the same instance that require an OperationContext.
+ */
+ virtual OperationContext* createOperationContext(const std::string& threadName) = 0;
+
+ /**
+ * Drops all temporary collections on all databases except "local".
+ *
+ * The implementation may assume that the caller has acquired the global exclusive lock
+ * for "txn".
+ */
+ virtual void dropAllTempCollections(OperationContext* txn) = 0;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index e2472bd3406..03ad878aac9 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -65,229 +65,216 @@ namespace mongo {
namespace repl {
namespace {
- const char configCollectionName[] = "local.system.replset";
- const char configDatabaseName[] = "local";
- const char meCollectionName[] = "local.me";
- const char meDatabaseName[] = "local";
- const char tsFieldName[] = "ts";
+const char configCollectionName[] = "local.system.replset";
+const char configDatabaseName[] = "local";
+const char meCollectionName[] = "local.me";
+const char meDatabaseName[] = "local";
+const char tsFieldName[] = "ts";
} // namespace
- ReplicationCoordinatorExternalStateImpl::ReplicationCoordinatorExternalStateImpl() :
- _startedThreads(false)
- , _nextThreadId(0) {}
- ReplicationCoordinatorExternalStateImpl::~ReplicationCoordinatorExternalStateImpl() {}
+ReplicationCoordinatorExternalStateImpl::ReplicationCoordinatorExternalStateImpl()
+ : _startedThreads(false), _nextThreadId(0) {}
+ReplicationCoordinatorExternalStateImpl::~ReplicationCoordinatorExternalStateImpl() {}
- void ReplicationCoordinatorExternalStateImpl::startThreads() {
- boost::lock_guard<boost::mutex> lk(_threadMutex);
- if (_startedThreads) {
- return;
- }
- log() << "Starting replication applier threads";
- _applierThread.reset(new boost::thread(runSyncThread));
+void ReplicationCoordinatorExternalStateImpl::startThreads() {
+ boost::lock_guard<boost::mutex> lk(_threadMutex);
+ if (_startedThreads) {
+ return;
+ }
+ log() << "Starting replication applier threads";
+ _applierThread.reset(new boost::thread(runSyncThread));
+ BackgroundSync* bgsync = BackgroundSync::get();
+ _producerThread.reset(new boost::thread(stdx::bind(&BackgroundSync::producerThread, bgsync)));
+ _syncSourceFeedbackThread.reset(
+ new boost::thread(stdx::bind(&SyncSourceFeedback::run, &_syncSourceFeedback)));
+ _startedThreads = true;
+}
+
+void ReplicationCoordinatorExternalStateImpl::startMasterSlave(OperationContext* txn) {
+ repl::startMasterSlave(txn);
+}
+
+void ReplicationCoordinatorExternalStateImpl::shutdown() {
+ boost::lock_guard<boost::mutex> lk(_threadMutex);
+ if (_startedThreads) {
+ log() << "Stopping replication applier threads";
+ _syncSourceFeedback.shutdown();
+ _syncSourceFeedbackThread->join();
+ _applierThread->join();
BackgroundSync* bgsync = BackgroundSync::get();
- _producerThread.reset(new boost::thread(stdx::bind(&BackgroundSync::producerThread,
- bgsync)));
- _syncSourceFeedbackThread.reset(new boost::thread(stdx::bind(&SyncSourceFeedback::run,
- &_syncSourceFeedback)));
- _startedThreads = true;
+ bgsync->shutdown();
+ _producerThread->join();
}
-
- void ReplicationCoordinatorExternalStateImpl::startMasterSlave(OperationContext* txn) {
- repl::startMasterSlave(txn);
+}
+
+void ReplicationCoordinatorExternalStateImpl::initiateOplog(OperationContext* txn) {
+ createOplog(txn);
+
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction scopedXact(txn, MODE_X);
+ Lock::GlobalWrite globalWrite(txn->lockState());
+ WriteUnitOfWork wuow(txn);
+ logOpInitiate(txn,
+ BSON("msg"
+ << "initiating set"));
+ wuow.commit();
}
-
- void ReplicationCoordinatorExternalStateImpl::shutdown() {
- boost::lock_guard<boost::mutex> lk(_threadMutex);
- if (_startedThreads) {
- log() << "Stopping replication applier threads";
- _syncSourceFeedback.shutdown();
- _syncSourceFeedbackThread->join();
- _applierThread->join();
- BackgroundSync* bgsync = BackgroundSync::get();
- bgsync->shutdown();
- _producerThread->join();
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "initiate oplog entry", "local.oplog.rs");
+}
+
+void ReplicationCoordinatorExternalStateImpl::forwardSlaveHandshake() {
+ _syncSourceFeedback.forwardSlaveHandshake();
+}
+
+void ReplicationCoordinatorExternalStateImpl::forwardSlaveProgress() {
+ _syncSourceFeedback.forwardSlaveProgress();
+}
+
+OID ReplicationCoordinatorExternalStateImpl::ensureMe(OperationContext* txn) {
+ std::string myname = getHostName();
+ OID myRID;
+ {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock lock(txn->lockState(), meDatabaseName, MODE_X);
+
+ BSONObj me;
+ // local.me is an identifier for a server for getLastError w:2+
+ // TODO: handle WriteConflictExceptions below
+ if (!Helpers::getSingleton(txn, meCollectionName, me) || !me.hasField("host") ||
+ me["host"].String() != myname) {
+ myRID = OID::gen();
+
+ // clean out local.me
+ Helpers::emptyCollection(txn, meCollectionName);
+
+ // repopulate
+ BSONObjBuilder b;
+ b.append("_id", myRID);
+ b.append("host", myname);
+ Helpers::putSingleton(txn, meCollectionName, b.done());
+ } else {
+ myRID = me["_id"].OID();
}
}
+ return myRID;
+}
- void ReplicationCoordinatorExternalStateImpl::initiateOplog(OperationContext* txn) {
- createOplog(txn);
-
+StatusWith<BSONObj> ReplicationCoordinatorExternalStateImpl::loadLocalConfigDocument(
+ OperationContext* txn) {
+ try {
MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction scopedXact(txn, MODE_X);
- Lock::GlobalWrite globalWrite(txn->lockState());
- WriteUnitOfWork wuow(txn);
- logOpInitiate(txn, BSON("msg" << "initiating set"));
- wuow.commit();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "initiate oplog entry", "local.oplog.rs");
- }
-
- void ReplicationCoordinatorExternalStateImpl::forwardSlaveHandshake() {
- _syncSourceFeedback.forwardSlaveHandshake();
- }
-
- void ReplicationCoordinatorExternalStateImpl::forwardSlaveProgress() {
- _syncSourceFeedback.forwardSlaveProgress();
- }
-
- OID ReplicationCoordinatorExternalStateImpl::ensureMe(OperationContext* txn) {
- std::string myname = getHostName();
- OID myRID;
- {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock lock(txn->lockState(), meDatabaseName, MODE_X);
-
- BSONObj me;
- // local.me is an identifier for a server for getLastError w:2+
- // TODO: handle WriteConflictExceptions below
- if (!Helpers::getSingleton(txn, meCollectionName, me) ||
- !me.hasField("host") ||
- me["host"].String() != myname) {
-
- myRID = OID::gen();
-
- // clean out local.me
- Helpers::emptyCollection(txn, meCollectionName);
-
- // repopulate
- BSONObjBuilder b;
- b.append("_id", myRID);
- b.append("host", myname);
- Helpers::putSingleton(txn, meCollectionName, b.done());
- } else {
- myRID = me["_id"].OID();
+ BSONObj config;
+ if (!Helpers::getSingleton(txn, configCollectionName, config)) {
+ return StatusWith<BSONObj>(
+ ErrorCodes::NoMatchingDocument,
+ str::stream() << "Did not find replica set configuration document in "
+ << configCollectionName);
}
+ return StatusWith<BSONObj>(config);
}
- return myRID;
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "load replica set config", configCollectionName);
+ } catch (const DBException& ex) {
+ return StatusWith<BSONObj>(ex.toStatus());
}
+}
- StatusWith<BSONObj> ReplicationCoordinatorExternalStateImpl::loadLocalConfigDocument(
- OperationContext* txn) {
- try {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- BSONObj config;
- if (!Helpers::getSingleton(txn, configCollectionName, config)) {
- return StatusWith<BSONObj>(
- ErrorCodes::NoMatchingDocument,
- str::stream() << "Did not find replica set configuration document in "
- << configCollectionName);
- }
- return StatusWith<BSONObj>(config);
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn,
- "load replica set config",
- configCollectionName);
- }
- catch (const DBException& ex) {
- return StatusWith<BSONObj>(ex.toStatus());
+Status ReplicationCoordinatorExternalStateImpl::storeLocalConfigDocument(OperationContext* txn,
+ const BSONObj& config) {
+ try {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock dbWriteLock(txn->lockState(), configDatabaseName, MODE_X);
+ Helpers::putSingleton(txn, configCollectionName, config);
+ return Status::OK();
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "save replica set config", configCollectionName);
+ } catch (const DBException& ex) {
+ return ex.toStatus();
}
-
- Status ReplicationCoordinatorExternalStateImpl::storeLocalConfigDocument(
- OperationContext* txn,
- const BSONObj& config) {
- try {
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock dbWriteLock(txn->lockState(), configDatabaseName, MODE_X);
- Helpers::putSingleton(txn, configCollectionName, config);
- return Status::OK();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn,
- "save replica set config",
- configCollectionName);
+}
+
+void ReplicationCoordinatorExternalStateImpl::setGlobalOpTime(const OpTime& newTime) {
+ setNewOptime(newTime);
+}
+
+StatusWith<OpTime> ReplicationCoordinatorExternalStateImpl::loadLastOpTime(OperationContext* txn) {
+ // TODO: handle WriteConflictExceptions below
+ try {
+ BSONObj oplogEntry;
+ if (!Helpers::getLast(txn, rsoplog, oplogEntry)) {
+ return StatusWith<OpTime>(ErrorCodes::NoMatchingDocument,
+ str::stream() << "Did not find any entries in " << rsoplog);
}
- catch (const DBException& ex) {
- return ex.toStatus();
+ BSONElement tsElement = oplogEntry[tsFieldName];
+ if (tsElement.eoo()) {
+ return StatusWith<OpTime>(ErrorCodes::NoSuchKey,
+ str::stream() << "Most recent entry in " << rsoplog
+ << " missing \"" << tsFieldName << "\" field");
}
-
- }
-
- void ReplicationCoordinatorExternalStateImpl::setGlobalOpTime(const OpTime& newTime) {
- setNewOptime(newTime);
- }
-
- StatusWith<OpTime> ReplicationCoordinatorExternalStateImpl::loadLastOpTime(
- OperationContext* txn) {
-
- // TODO: handle WriteConflictExceptions below
- try {
- BSONObj oplogEntry;
- if (!Helpers::getLast(txn, rsoplog, oplogEntry)) {
- return StatusWith<OpTime>(
- ErrorCodes::NoMatchingDocument,
- str::stream() << "Did not find any entries in " << rsoplog);
- }
- BSONElement tsElement = oplogEntry[tsFieldName];
- if (tsElement.eoo()) {
- return StatusWith<OpTime>(
- ErrorCodes::NoSuchKey,
- str::stream() << "Most recent entry in " << rsoplog << " missing \"" <<
- tsFieldName << "\" field");
- }
- if (tsElement.type() != Timestamp) {
- return StatusWith<OpTime>(
- ErrorCodes::TypeMismatch,
- str::stream() << "Expected type of \"" << tsFieldName <<
- "\" in most recent " << rsoplog <<
- " entry to have type Timestamp, but found " << typeName(tsElement.type()));
- }
- return StatusWith<OpTime>(tsElement._opTime());
- }
- catch (const DBException& ex) {
- return StatusWith<OpTime>(ex.toStatus());
+ if (tsElement.type() != Timestamp) {
+ return StatusWith<OpTime>(ErrorCodes::TypeMismatch,
+ str::stream() << "Expected type of \"" << tsFieldName
+ << "\" in most recent " << rsoplog
+ << " entry to have type Timestamp, but found "
+ << typeName(tsElement.type()));
}
+ return StatusWith<OpTime>(tsElement._opTime());
+ } catch (const DBException& ex) {
+ return StatusWith<OpTime>(ex.toStatus());
}
-
- bool ReplicationCoordinatorExternalStateImpl::isSelf(const HostAndPort& host) {
- return repl::isSelf(host);
-
- }
-
- HostAndPort ReplicationCoordinatorExternalStateImpl::getClientHostAndPort(
- const OperationContext* txn) {
- return HostAndPort(txn->getClient()->clientAddress(true));
- }
-
- void ReplicationCoordinatorExternalStateImpl::closeConnections() {
- MessagingPort::closeAllSockets(ScopedConn::keepOpen);
- }
-
- void ReplicationCoordinatorExternalStateImpl::killAllUserOperations(OperationContext* txn) {
- GlobalEnvironmentExperiment* environment = getGlobalEnvironment();
- environment->killAllUserOperations(txn);
- }
-
- void ReplicationCoordinatorExternalStateImpl::clearShardingState() {
- shardingState.resetShardingState();
- }
-
- void ReplicationCoordinatorExternalStateImpl::signalApplierToChooseNewSyncSource() {
- BackgroundSync::get()->clearSyncTarget();
- }
-
- OperationContext* ReplicationCoordinatorExternalStateImpl::createOperationContext(
- const std::string& threadName) {
- Client::initThreadIfNotAlready(threadName.c_str());
- return new OperationContextImpl;
- }
-
- void ReplicationCoordinatorExternalStateImpl::dropAllTempCollections(OperationContext* txn) {
- std::vector<std::string> dbNames;
- StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
- storageEngine->listDatabases(&dbNames);
-
- for (std::vector<std::string>::iterator it = dbNames.begin(); it != dbNames.end(); ++it) {
- // The local db is special because it isn't replicated. It is cleared at startup even on
- // replica set members.
- if (*it == "local")
- continue;
- LOG(2) << "Removing temporary collections from " << *it;
- Database* db = dbHolder().get(txn, *it);
- // Since we must be holding the global lock during this function, if listDatabases
- // returned this dbname, we should be able to get a reference to it - it can't have
- // been dropped.
- invariant(db);
- db->clearTmpCollections(txn);
- }
+}
+
+bool ReplicationCoordinatorExternalStateImpl::isSelf(const HostAndPort& host) {
+ return repl::isSelf(host);
+}
+
+HostAndPort ReplicationCoordinatorExternalStateImpl::getClientHostAndPort(
+ const OperationContext* txn) {
+ return HostAndPort(txn->getClient()->clientAddress(true));
+}
+
+void ReplicationCoordinatorExternalStateImpl::closeConnections() {
+ MessagingPort::closeAllSockets(ScopedConn::keepOpen);
+}
+
+void ReplicationCoordinatorExternalStateImpl::killAllUserOperations(OperationContext* txn) {
+ GlobalEnvironmentExperiment* environment = getGlobalEnvironment();
+ environment->killAllUserOperations(txn);
+}
+
+void ReplicationCoordinatorExternalStateImpl::clearShardingState() {
+ shardingState.resetShardingState();
+}
+
+void ReplicationCoordinatorExternalStateImpl::signalApplierToChooseNewSyncSource() {
+ BackgroundSync::get()->clearSyncTarget();
+}
+
+OperationContext* ReplicationCoordinatorExternalStateImpl::createOperationContext(
+ const std::string& threadName) {
+ Client::initThreadIfNotAlready(threadName.c_str());
+ return new OperationContextImpl;
+}
+
+void ReplicationCoordinatorExternalStateImpl::dropAllTempCollections(OperationContext* txn) {
+ std::vector<std::string> dbNames;
+ StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
+ storageEngine->listDatabases(&dbNames);
+
+ for (std::vector<std::string>::iterator it = dbNames.begin(); it != dbNames.end(); ++it) {
+ // The local db is special because it isn't replicated. It is cleared at startup even on
+ // replica set members.
+ if (*it == "local")
+ continue;
+ LOG(2) << "Removing temporary collections from " << *it;
+ Database* db = dbHolder().get(txn, *it);
+ // Since we must be holding the global lock during this function, if listDatabases
+ // returned this dbname, we should be able to get a reference to it - it can't have
+ // been dropped.
+ invariant(db);
+ db->clearTmpCollections(txn);
}
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.h b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
index ed4c01b5823..7918479cbc1 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
@@ -39,60 +39,60 @@
namespace mongo {
namespace repl {
- class ReplicationCoordinatorExternalStateImpl : public ReplicationCoordinatorExternalState {
- MONGO_DISALLOW_COPYING(ReplicationCoordinatorExternalStateImpl);
- public:
+class ReplicationCoordinatorExternalStateImpl : public ReplicationCoordinatorExternalState {
+ MONGO_DISALLOW_COPYING(ReplicationCoordinatorExternalStateImpl);
- ReplicationCoordinatorExternalStateImpl();
- virtual ~ReplicationCoordinatorExternalStateImpl();
- virtual void startThreads();
- virtual void startMasterSlave(OperationContext* txn);
- virtual void shutdown();
- virtual void initiateOplog(OperationContext* txn);
- virtual void forwardSlaveHandshake();
- virtual void forwardSlaveProgress();
- virtual OID ensureMe(OperationContext* txn);
- virtual bool isSelf(const HostAndPort& host);
- virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* txn);
- virtual Status storeLocalConfigDocument(OperationContext* txn, const BSONObj& config);
- virtual void setGlobalOpTime(const OpTime& newTime);
- virtual StatusWith<OpTime> loadLastOpTime(OperationContext* txn);
- virtual HostAndPort getClientHostAndPort(const OperationContext* txn);
- virtual void closeConnections();
- virtual void killAllUserOperations(OperationContext* txn);
- virtual void clearShardingState();
- virtual void signalApplierToChooseNewSyncSource();
- virtual OperationContext* createOperationContext(const std::string& threadName);
- virtual void dropAllTempCollections(OperationContext* txn);
+public:
+ ReplicationCoordinatorExternalStateImpl();
+ virtual ~ReplicationCoordinatorExternalStateImpl();
+ virtual void startThreads();
+ virtual void startMasterSlave(OperationContext* txn);
+ virtual void shutdown();
+ virtual void initiateOplog(OperationContext* txn);
+ virtual void forwardSlaveHandshake();
+ virtual void forwardSlaveProgress();
+ virtual OID ensureMe(OperationContext* txn);
+ virtual bool isSelf(const HostAndPort& host);
+ virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* txn);
+ virtual Status storeLocalConfigDocument(OperationContext* txn, const BSONObj& config);
+ virtual void setGlobalOpTime(const OpTime& newTime);
+ virtual StatusWith<OpTime> loadLastOpTime(OperationContext* txn);
+ virtual HostAndPort getClientHostAndPort(const OperationContext* txn);
+ virtual void closeConnections();
+ virtual void killAllUserOperations(OperationContext* txn);
+ virtual void clearShardingState();
+ virtual void signalApplierToChooseNewSyncSource();
+ virtual OperationContext* createOperationContext(const std::string& threadName);
+ virtual void dropAllTempCollections(OperationContext* txn);
- std::string getNextOpContextThreadName();
+ std::string getNextOpContextThreadName();
- private:
- // Guards starting threads and setting _startedThreads
- boost::mutex _threadMutex;
+private:
+ // Guards starting threads and setting _startedThreads
+ boost::mutex _threadMutex;
- // True when the threads have been started
- bool _startedThreads;
+ // True when the threads have been started
+ bool _startedThreads;
- // The SyncSourceFeedback class is responsible for sending replSetUpdatePosition commands
- // for forwarding replication progress information upstream when there is chained
- // replication.
- SyncSourceFeedback _syncSourceFeedback;
+ // The SyncSourceFeedback class is responsible for sending replSetUpdatePosition commands
+ // for forwarding replication progress information upstream when there is chained
+ // replication.
+ SyncSourceFeedback _syncSourceFeedback;
- // Thread running SyncSourceFeedback::run().
- boost::scoped_ptr<boost::thread> _syncSourceFeedbackThread;
+ // Thread running SyncSourceFeedback::run().
+ boost::scoped_ptr<boost::thread> _syncSourceFeedbackThread;
- // Thread running runSyncThread().
- boost::scoped_ptr<boost::thread> _applierThread;
+ // Thread running runSyncThread().
+ boost::scoped_ptr<boost::thread> _applierThread;
- // Thread running BackgroundSync::producerThread().
- boost::scoped_ptr<boost::thread> _producerThread;
+ // Thread running BackgroundSync::producerThread().
+ boost::scoped_ptr<boost::thread> _producerThread;
- // Mutex guarding the _nextThreadId value to prevent concurrent incrementing.
- boost::mutex _nextThreadIdMutex;
- // Number used to uniquely name threads.
- long long _nextThreadId;
- };
+ // Mutex guarding the _nextThreadId value to prevent concurrent incrementing.
+ boost::mutex _nextThreadIdMutex;
+ // Number used to uniquely name threads.
+ long long _nextThreadId;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
index e88cf78d9ce..941575a7f26 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
@@ -42,114 +42,108 @@
namespace mongo {
namespace repl {
- ReplicationCoordinatorExternalStateMock::ReplicationCoordinatorExternalStateMock()
- : _localRsConfigDocument(ErrorCodes::NoMatchingDocument, "No local config document"),
- _lastOpTime(ErrorCodes::NoMatchingDocument, "No last oplog entry"),
- _canAcquireGlobalSharedLock(true),
- _storeLocalConfigDocumentStatus(Status::OK()),
- _storeLocalConfigDocumentShouldHang(false),
- _connectionsClosed(false) {
- }
-
- ReplicationCoordinatorExternalStateMock::~ReplicationCoordinatorExternalStateMock() {}
-
- void ReplicationCoordinatorExternalStateMock::startThreads() {}
- void ReplicationCoordinatorExternalStateMock::startMasterSlave(OperationContext*) {}
- void ReplicationCoordinatorExternalStateMock::initiateOplog(OperationContext* txn) {}
- void ReplicationCoordinatorExternalStateMock::shutdown() {}
- void ReplicationCoordinatorExternalStateMock::forwardSlaveHandshake() {}
- void ReplicationCoordinatorExternalStateMock::forwardSlaveProgress() {}
-
- OID ReplicationCoordinatorExternalStateMock::ensureMe(OperationContext*) {
- return OID::gen();
- }
-
- bool ReplicationCoordinatorExternalStateMock::isSelf(const HostAndPort& host) {
- return sequenceContains(_selfHosts, host);
- }
-
- void ReplicationCoordinatorExternalStateMock::addSelf(const HostAndPort& host) {
- _selfHosts.push_back(host);
- }
-
- HostAndPort ReplicationCoordinatorExternalStateMock::getClientHostAndPort(
- const OperationContext* txn) {
- return _clientHostAndPort;
- }
-
- void ReplicationCoordinatorExternalStateMock::setClientHostAndPort(
- const HostAndPort& clientHostAndPort) {
- _clientHostAndPort = clientHostAndPort;
- }
-
- StatusWith<BSONObj> ReplicationCoordinatorExternalStateMock::loadLocalConfigDocument(
- OperationContext* txn) {
- return _localRsConfigDocument;
- }
-
- Status ReplicationCoordinatorExternalStateMock::storeLocalConfigDocument(
- OperationContext* txn,
- const BSONObj& config) {
- {
- boost::unique_lock<boost::mutex> lock(_shouldHangMutex);
- while (_storeLocalConfigDocumentShouldHang) {
- _shouldHangCondVar.wait(lock);
- }
- }
- if (_storeLocalConfigDocumentStatus.isOK()) {
- setLocalConfigDocument(StatusWith<BSONObj>(config));
- return Status::OK();
+ReplicationCoordinatorExternalStateMock::ReplicationCoordinatorExternalStateMock()
+ : _localRsConfigDocument(ErrorCodes::NoMatchingDocument, "No local config document"),
+ _lastOpTime(ErrorCodes::NoMatchingDocument, "No last oplog entry"),
+ _canAcquireGlobalSharedLock(true),
+ _storeLocalConfigDocumentStatus(Status::OK()),
+ _storeLocalConfigDocumentShouldHang(false),
+ _connectionsClosed(false) {}
+
+ReplicationCoordinatorExternalStateMock::~ReplicationCoordinatorExternalStateMock() {}
+
+void ReplicationCoordinatorExternalStateMock::startThreads() {}
+void ReplicationCoordinatorExternalStateMock::startMasterSlave(OperationContext*) {}
+void ReplicationCoordinatorExternalStateMock::initiateOplog(OperationContext* txn) {}
+void ReplicationCoordinatorExternalStateMock::shutdown() {}
+void ReplicationCoordinatorExternalStateMock::forwardSlaveHandshake() {}
+void ReplicationCoordinatorExternalStateMock::forwardSlaveProgress() {}
+
+OID ReplicationCoordinatorExternalStateMock::ensureMe(OperationContext*) {
+ return OID::gen();
+}
+
+bool ReplicationCoordinatorExternalStateMock::isSelf(const HostAndPort& host) {
+ return sequenceContains(_selfHosts, host);
+}
+
+void ReplicationCoordinatorExternalStateMock::addSelf(const HostAndPort& host) {
+ _selfHosts.push_back(host);
+}
+
+HostAndPort ReplicationCoordinatorExternalStateMock::getClientHostAndPort(
+ const OperationContext* txn) {
+ return _clientHostAndPort;
+}
+
+void ReplicationCoordinatorExternalStateMock::setClientHostAndPort(
+ const HostAndPort& clientHostAndPort) {
+ _clientHostAndPort = clientHostAndPort;
+}
+
+StatusWith<BSONObj> ReplicationCoordinatorExternalStateMock::loadLocalConfigDocument(
+ OperationContext* txn) {
+ return _localRsConfigDocument;
+}
+
+Status ReplicationCoordinatorExternalStateMock::storeLocalConfigDocument(OperationContext* txn,
+ const BSONObj& config) {
+ {
+ boost::unique_lock<boost::mutex> lock(_shouldHangMutex);
+ while (_storeLocalConfigDocumentShouldHang) {
+ _shouldHangCondVar.wait(lock);
}
- return _storeLocalConfigDocumentStatus;
}
-
- void ReplicationCoordinatorExternalStateMock::setLocalConfigDocument(
- const StatusWith<BSONObj>& localConfigDocument) {
-
- _localRsConfigDocument = localConfigDocument;
+ if (_storeLocalConfigDocumentStatus.isOK()) {
+ setLocalConfigDocument(StatusWith<BSONObj>(config));
+ return Status::OK();
}
+ return _storeLocalConfigDocumentStatus;
+}
- void ReplicationCoordinatorExternalStateMock::setGlobalOpTime(const OpTime& newTime) {
- }
+void ReplicationCoordinatorExternalStateMock::setLocalConfigDocument(
+ const StatusWith<BSONObj>& localConfigDocument) {
+ _localRsConfigDocument = localConfigDocument;
+}
- StatusWith<OpTime> ReplicationCoordinatorExternalStateMock::loadLastOpTime(
- OperationContext* txn) {
- return _lastOpTime;
- }
+void ReplicationCoordinatorExternalStateMock::setGlobalOpTime(const OpTime& newTime) {}
- void ReplicationCoordinatorExternalStateMock::setLastOpTime(
- const StatusWith<OpTime>& lastApplied) {
- _lastOpTime = lastApplied;
- }
+StatusWith<OpTime> ReplicationCoordinatorExternalStateMock::loadLastOpTime(OperationContext* txn) {
+ return _lastOpTime;
+}
- void ReplicationCoordinatorExternalStateMock::setStoreLocalConfigDocumentStatus(Status status) {
- _storeLocalConfigDocumentStatus = status;
- }
+void ReplicationCoordinatorExternalStateMock::setLastOpTime(const StatusWith<OpTime>& lastApplied) {
+ _lastOpTime = lastApplied;
+}
- void ReplicationCoordinatorExternalStateMock::setStoreLocalConfigDocumentToHang(bool hang) {
- boost::unique_lock<boost::mutex> lock(_shouldHangMutex);
- _storeLocalConfigDocumentShouldHang = hang;
- if (!hang) {
- _shouldHangCondVar.notify_all();
- }
- }
+void ReplicationCoordinatorExternalStateMock::setStoreLocalConfigDocumentStatus(Status status) {
+ _storeLocalConfigDocumentStatus = status;
+}
- void ReplicationCoordinatorExternalStateMock::closeConnections() {
- _connectionsClosed = true;
+void ReplicationCoordinatorExternalStateMock::setStoreLocalConfigDocumentToHang(bool hang) {
+ boost::unique_lock<boost::mutex> lock(_shouldHangMutex);
+ _storeLocalConfigDocumentShouldHang = hang;
+ if (!hang) {
+ _shouldHangCondVar.notify_all();
}
+}
- void ReplicationCoordinatorExternalStateMock::killAllUserOperations(OperationContext* txn) {}
+void ReplicationCoordinatorExternalStateMock::closeConnections() {
+ _connectionsClosed = true;
+}
- void ReplicationCoordinatorExternalStateMock::clearShardingState() {}
+void ReplicationCoordinatorExternalStateMock::killAllUserOperations(OperationContext* txn) {}
- void ReplicationCoordinatorExternalStateMock::signalApplierToChooseNewSyncSource() {}
+void ReplicationCoordinatorExternalStateMock::clearShardingState() {}
- OperationContext* ReplicationCoordinatorExternalStateMock::createOperationContext(
- const std::string& threadName) {
- return new OperationContextReplMock;
- }
+void ReplicationCoordinatorExternalStateMock::signalApplierToChooseNewSyncSource() {}
+
+OperationContext* ReplicationCoordinatorExternalStateMock::createOperationContext(
+ const std::string& threadName) {
+ return new OperationContextReplMock;
+}
- void ReplicationCoordinatorExternalStateMock::dropAllTempCollections(OperationContext* txn) {}
+void ReplicationCoordinatorExternalStateMock::dropAllTempCollections(OperationContext* txn) {}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
index 0e44b0cc2e7..dd648dd72e9 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
@@ -41,79 +41,80 @@
namespace mongo {
namespace repl {
- class ReplicationCoordinatorExternalStateMock : public ReplicationCoordinatorExternalState {
- MONGO_DISALLOW_COPYING(ReplicationCoordinatorExternalStateMock);
- public:
- class GlobalSharedLockAcquirer;
+class ReplicationCoordinatorExternalStateMock : public ReplicationCoordinatorExternalState {
+ MONGO_DISALLOW_COPYING(ReplicationCoordinatorExternalStateMock);
- ReplicationCoordinatorExternalStateMock();
- virtual ~ReplicationCoordinatorExternalStateMock();
- virtual void startThreads();
- virtual void startMasterSlave(OperationContext*);
- virtual void shutdown();
- virtual void initiateOplog(OperationContext* txn);
- virtual void forwardSlaveHandshake();
- virtual void forwardSlaveProgress();
- virtual OID ensureMe(OperationContext*);
- virtual bool isSelf(const HostAndPort& host);
- virtual HostAndPort getClientHostAndPort(const OperationContext* txn);
- virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* txn);
- virtual Status storeLocalConfigDocument(OperationContext* txn, const BSONObj& config);
- virtual void setGlobalOpTime(const OpTime& newTime);
- virtual StatusWith<OpTime> loadLastOpTime(OperationContext* txn);
- virtual void closeConnections();
- virtual void killAllUserOperations(OperationContext* txn);
- virtual void clearShardingState();
- virtual void signalApplierToChooseNewSyncSource();
- virtual OperationContext* createOperationContext(const std::string& threadName);
- virtual void dropAllTempCollections(OperationContext* txn);
+public:
+ class GlobalSharedLockAcquirer;
- /**
- * Adds "host" to the list of hosts that this mock will match when responding to "isSelf"
- * messages.
- */
- void addSelf(const HostAndPort& host);
+ ReplicationCoordinatorExternalStateMock();
+ virtual ~ReplicationCoordinatorExternalStateMock();
+ virtual void startThreads();
+ virtual void startMasterSlave(OperationContext*);
+ virtual void shutdown();
+ virtual void initiateOplog(OperationContext* txn);
+ virtual void forwardSlaveHandshake();
+ virtual void forwardSlaveProgress();
+ virtual OID ensureMe(OperationContext*);
+ virtual bool isSelf(const HostAndPort& host);
+ virtual HostAndPort getClientHostAndPort(const OperationContext* txn);
+ virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* txn);
+ virtual Status storeLocalConfigDocument(OperationContext* txn, const BSONObj& config);
+ virtual void setGlobalOpTime(const OpTime& newTime);
+ virtual StatusWith<OpTime> loadLastOpTime(OperationContext* txn);
+ virtual void closeConnections();
+ virtual void killAllUserOperations(OperationContext* txn);
+ virtual void clearShardingState();
+ virtual void signalApplierToChooseNewSyncSource();
+ virtual OperationContext* createOperationContext(const std::string& threadName);
+ virtual void dropAllTempCollections(OperationContext* txn);
- /**
- * Sets the return value for subsequent calls to loadLocalConfigDocument().
- */
- void setLocalConfigDocument(const StatusWith<BSONObj>& localConfigDocument);
+ /**
+ * Adds "host" to the list of hosts that this mock will match when responding to "isSelf"
+ * messages.
+ */
+ void addSelf(const HostAndPort& host);
- /**
- * Sets the return value for subsequent calls to getClientHostAndPort().
- */
- void setClientHostAndPort(const HostAndPort& clientHostAndPort);
+ /**
+ * Sets the return value for subsequent calls to loadLocalConfigDocument().
+ */
+ void setLocalConfigDocument(const StatusWith<BSONObj>& localConfigDocument);
- /**
- * Sets the return value for subsequent calls to loadLastOpTimeApplied.
- */
- void setLastOpTime(const StatusWith<OpTime>& lastApplied);
+ /**
+ * Sets the return value for subsequent calls to getClientHostAndPort().
+ */
+ void setClientHostAndPort(const HostAndPort& clientHostAndPort);
- /**
- * Sets the return value for subsequent calls to storeLocalConfigDocument().
- * If "status" is Status::OK(), the subsequent calls will call the underlying funtion.
- */
- void setStoreLocalConfigDocumentStatus(Status status);
+ /**
+ * Sets the return value for subsequent calls to loadLastOpTimeApplied.
+ */
+ void setLastOpTime(const StatusWith<OpTime>& lastApplied);
- /**
- * Sets whether or not subsequent calls to storeLocalConfigDocument() should hang
- * indefinitely or not based on the value of "hang".
- */
- void setStoreLocalConfigDocumentToHang(bool hang);
+ /**
+ * Sets the return value for subsequent calls to storeLocalConfigDocument().
+ * If "status" is Status::OK(), the subsequent calls will call the underlying funtion.
+ */
+ void setStoreLocalConfigDocumentStatus(Status status);
- private:
- StatusWith<BSONObj> _localRsConfigDocument;
- StatusWith<OpTime> _lastOpTime;
- std::vector<HostAndPort> _selfHosts;
- bool _canAcquireGlobalSharedLock;
- Status _storeLocalConfigDocumentStatus;
- // mutex and cond var for controlling stroeLocalConfigDocument()'s hanging
- boost::mutex _shouldHangMutex;
- boost::condition _shouldHangCondVar;
- bool _storeLocalConfigDocumentShouldHang;
- bool _connectionsClosed;
- HostAndPort _clientHostAndPort;
- };
+ /**
+ * Sets whether or not subsequent calls to storeLocalConfigDocument() should hang
+ * indefinitely or not based on the value of "hang".
+ */
+ void setStoreLocalConfigDocumentToHang(bool hang);
-} // namespace repl
-} // namespace mongo
+private:
+ StatusWith<BSONObj> _localRsConfigDocument;
+ StatusWith<OpTime> _lastOpTime;
+ std::vector<HostAndPort> _selfHosts;
+ bool _canAcquireGlobalSharedLock;
+ Status _storeLocalConfigDocumentStatus;
+ // mutex and cond var for controlling stroeLocalConfigDocument()'s hanging
+ boost::mutex _shouldHangMutex;
+ boost::condition _shouldHangCondVar;
+ bool _storeLocalConfigDocumentShouldHang;
+ bool _connectionsClosed;
+ HostAndPort _clientHostAndPort;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_global.cpp b/src/mongo/db/repl/replication_coordinator_global.cpp
index a586f65b437..03891d163d6 100644
--- a/src/mongo/db/repl/replication_coordinator_global.cpp
+++ b/src/mongo/db/repl/replication_coordinator_global.cpp
@@ -34,16 +34,16 @@ namespace mongo {
namespace repl {
namespace {
- ReplicationCoordinator* coordinator = NULL;
-} // namespace
+ReplicationCoordinator* coordinator = NULL;
+} // namespace
- ReplicationCoordinator* getGlobalReplicationCoordinator() {
- return coordinator;
- }
+ReplicationCoordinator* getGlobalReplicationCoordinator() {
+ return coordinator;
+}
- void setGlobalReplicationCoordinator(ReplicationCoordinator* newCoordinator) {
- coordinator = newCoordinator;
- }
+void setGlobalReplicationCoordinator(ReplicationCoordinator* newCoordinator) {
+ coordinator = newCoordinator;
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_global.h b/src/mongo/db/repl/replication_coordinator_global.h
index c107959dbf6..a18033fd162 100644
--- a/src/mongo/db/repl/replication_coordinator_global.h
+++ b/src/mongo/db/repl/replication_coordinator_global.h
@@ -33,8 +33,8 @@
namespace mongo {
namespace repl {
- ReplicationCoordinator* getGlobalReplicationCoordinator();
- void setGlobalReplicationCoordinator(ReplicationCoordinator* coordinator);
+ReplicationCoordinator* getGlobalReplicationCoordinator();
+void setGlobalReplicationCoordinator(ReplicationCoordinator* coordinator);
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 40b28dbb546..adac75fab35 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -67,1620 +67,1568 @@ namespace mongo {
namespace repl {
namespace {
- typedef StatusWith<ReplicationExecutor::CallbackHandle> CBHStatus;
+typedef StatusWith<ReplicationExecutor::CallbackHandle> CBHStatus;
- void lockAndCall(boost::unique_lock<boost::mutex>* lk, const stdx::function<void ()>& fn) {
- if (!lk->owns_lock()) {
- lk->lock();
- }
- fn();
+void lockAndCall(boost::unique_lock<boost::mutex>* lk, const stdx::function<void()>& fn) {
+ if (!lk->owns_lock()) {
+ lk->lock();
}
+ fn();
+}
- /**
- * Implements the force-reconfig behavior of incrementing config version by a large random
- * number.
- */
- BSONObj incrementConfigVersionByRandom(BSONObj config) {
- BSONObjBuilder builder;
- for (BSONObjIterator iter(config); iter.more(); iter.next()) {
- BSONElement elem = *iter;
- if (elem.fieldNameStringData() == ReplicaSetConfig::kVersionFieldName &&
- elem.isNumber()) {
-
- boost::scoped_ptr<SecureRandom> generator(SecureRandom::create());
- const int random = std::abs(static_cast<int>(generator->nextInt64()) % 100000);
- builder.appendIntOrLL(ReplicaSetConfig::kVersionFieldName,
- elem.numberLong() + 10000 + random);
- }
- else {
- builder.append(elem);
- }
- }
- return builder.obj();
- }
-
-} //namespace
-
- struct ReplicationCoordinatorImpl::WaiterInfo {
-
- /**
- * Constructor takes the list of waiters and enqueues itself on the list, removing itself
- * in the destructor.
- */
- WaiterInfo(std::vector<WaiterInfo*>* _list,
- unsigned int _opID,
- const OpTime* _opTime,
- const WriteConcernOptions* _writeConcern,
- boost::condition_variable* _condVar) : list(_list),
- master(true),
- opID(_opID),
- opTime(_opTime),
- writeConcern(_writeConcern),
- condVar(_condVar) {
- list->push_back(this);
+/**
+ * Implements the force-reconfig behavior of incrementing config version by a large random
+ * number.
+ */
+BSONObj incrementConfigVersionByRandom(BSONObj config) {
+ BSONObjBuilder builder;
+ for (BSONObjIterator iter(config); iter.more(); iter.next()) {
+ BSONElement elem = *iter;
+ if (elem.fieldNameStringData() == ReplicaSetConfig::kVersionFieldName && elem.isNumber()) {
+ boost::scoped_ptr<SecureRandom> generator(SecureRandom::create());
+ const int random = std::abs(static_cast<int>(generator->nextInt64()) % 100000);
+ builder.appendIntOrLL(ReplicaSetConfig::kVersionFieldName,
+ elem.numberLong() + 10000 + random);
+ } else {
+ builder.append(elem);
}
+ }
+ return builder.obj();
+}
- ~WaiterInfo() {
- list->erase(std::remove(list->begin(), list->end(), this), list->end());
- }
+} // namespace
- std::vector<WaiterInfo*>* list;
- bool master; // Set to false to indicate that stepDown was called while waiting
- const unsigned int opID;
- const OpTime* opTime;
- const WriteConcernOptions* writeConcern;
- boost::condition_variable* condVar;
- };
+struct ReplicationCoordinatorImpl::WaiterInfo {
+ /**
+ * Constructor takes the list of waiters and enqueues itself on the list, removing itself
+ * in the destructor.
+ */
+ WaiterInfo(std::vector<WaiterInfo*>* _list,
+ unsigned int _opID,
+ const OpTime* _opTime,
+ const WriteConcernOptions* _writeConcern,
+ boost::condition_variable* _condVar)
+ : list(_list),
+ master(true),
+ opID(_opID),
+ opTime(_opTime),
+ writeConcern(_writeConcern),
+ condVar(_condVar) {
+ list->push_back(this);
+ }
+
+ ~WaiterInfo() {
+ list->erase(std::remove(list->begin(), list->end(), this), list->end());
+ }
+
+ std::vector<WaiterInfo*>* list;
+ bool master; // Set to false to indicate that stepDown was called while waiting
+ const unsigned int opID;
+ const OpTime* opTime;
+ const WriteConcernOptions* writeConcern;
+ boost::condition_variable* condVar;
+};
namespace {
- ReplicationCoordinator::Mode getReplicationModeFromSettings(const ReplSettings& settings) {
- if (settings.usingReplSets()) {
- return ReplicationCoordinator::modeReplSet;
- }
- if (settings.master || settings.slave) {
- return ReplicationCoordinator::modeMasterSlave;
- }
- return ReplicationCoordinator::modeNone;
+ReplicationCoordinator::Mode getReplicationModeFromSettings(const ReplSettings& settings) {
+ if (settings.usingReplSets()) {
+ return ReplicationCoordinator::modeReplSet;
+ }
+ if (settings.master || settings.slave) {
+ return ReplicationCoordinator::modeMasterSlave;
}
+ return ReplicationCoordinator::modeNone;
+}
} // namespace
- ReplicationCoordinatorImpl::ReplicationCoordinatorImpl(
- const ReplSettings& settings,
- ReplicationCoordinatorExternalState* externalState,
- ReplicationExecutor::NetworkInterface* network,
- TopologyCoordinator* topCoord,
- int64_t prngSeed) :
- _settings(settings),
- _replMode(getReplicationModeFromSettings(settings)),
- _topCoord(topCoord),
- _replExecutor(network, prngSeed),
- _externalState(externalState),
- _inShutdown(false),
- _memberState(MemberState::RS_STARTUP),
- _isWaitingForDrainToComplete(false),
- _rsConfigState(kConfigPreStart),
- _selfIndex(-1),
- _sleptLastElection(false),
- _canAcceptNonLocalWrites(!(settings.usingReplSets() || settings.slave)),
- _canServeNonLocalReads(0U) {
-
- if (!isReplEnabled()) {
- return;
- }
-
- boost::scoped_ptr<SecureRandom> rbidGenerator(SecureRandom::create());
- _rbid = static_cast<int>(rbidGenerator->nextInt64());
- if (_rbid < 0) {
- // Ensure _rbid is always positive
- _rbid = -_rbid;
- }
-
- // Make sure there is always an entry in _slaveInfo for ourself.
- SlaveInfo selfInfo;
- selfInfo.self = true;
- _slaveInfo.push_back(selfInfo);
+ReplicationCoordinatorImpl::ReplicationCoordinatorImpl(
+ const ReplSettings& settings,
+ ReplicationCoordinatorExternalState* externalState,
+ ReplicationExecutor::NetworkInterface* network,
+ TopologyCoordinator* topCoord,
+ int64_t prngSeed)
+ : _settings(settings),
+ _replMode(getReplicationModeFromSettings(settings)),
+ _topCoord(topCoord),
+ _replExecutor(network, prngSeed),
+ _externalState(externalState),
+ _inShutdown(false),
+ _memberState(MemberState::RS_STARTUP),
+ _isWaitingForDrainToComplete(false),
+ _rsConfigState(kConfigPreStart),
+ _selfIndex(-1),
+ _sleptLastElection(false),
+ _canAcceptNonLocalWrites(!(settings.usingReplSets() || settings.slave)),
+ _canServeNonLocalReads(0U) {
+ if (!isReplEnabled()) {
+ return;
+ }
+
+ boost::scoped_ptr<SecureRandom> rbidGenerator(SecureRandom::create());
+ _rbid = static_cast<int>(rbidGenerator->nextInt64());
+ if (_rbid < 0) {
+ // Ensure _rbid is always positive
+ _rbid = -_rbid;
+ }
+
+ // Make sure there is always an entry in _slaveInfo for ourself.
+ SlaveInfo selfInfo;
+ selfInfo.self = true;
+ _slaveInfo.push_back(selfInfo);
+}
+
+ReplicationCoordinatorImpl::~ReplicationCoordinatorImpl() {}
+
+void ReplicationCoordinatorImpl::waitForStartUpComplete() {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ while (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
+ _rsConfigStateChange.wait(lk);
+ }
+}
+
+ReplicaSetConfig ReplicationCoordinatorImpl::getReplicaSetConfig_forTest() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ return _rsConfig;
+}
+
+bool ReplicationCoordinatorImpl::_startLoadLocalConfig(OperationContext* txn) {
+ StatusWith<BSONObj> cfg = _externalState->loadLocalConfigDocument(txn);
+ if (!cfg.isOK()) {
+ log() << "Did not find local replica set configuration document at startup; "
+ << cfg.getStatus();
+ return true;
}
-
- ReplicationCoordinatorImpl::~ReplicationCoordinatorImpl() {}
-
- void ReplicationCoordinatorImpl::waitForStartUpComplete() {
- boost::unique_lock<boost::mutex> lk(_mutex);
- while (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
- _rsConfigStateChange.wait(lk);
+ ReplicaSetConfig localConfig;
+ Status status = localConfig.initialize(cfg.getValue());
+ if (!status.isOK()) {
+ error() << "Locally stored replica set configuration does not parse; See "
+ "http://www.mongodb.org/dochub/core/recover-replica-set-from-invalid-config "
+ "for information on how to recover from this. Got \"" << status
+ << "\" while parsing " << cfg.getValue();
+ fassertFailedNoTrace(28545);
+ }
+
+ StatusWith<OpTime> lastOpTimeStatus = _externalState->loadLastOpTime(txn);
+
+ // Use a callback here, because _finishLoadLocalConfig calls isself() which requires
+ // that the server's networking layer be up and running and accepting connections, which
+ // doesn't happen until startReplication finishes.
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_finishLoadLocalConfig,
+ this,
+ stdx::placeholders::_1,
+ localConfig,
+ lastOpTimeStatus));
+ return false;
+}
+
+void ReplicationCoordinatorImpl::_finishLoadLocalConfig(
+ const ReplicationExecutor::CallbackData& cbData,
+ const ReplicaSetConfig& localConfig,
+ const StatusWith<OpTime>& lastOpTimeStatus) {
+ if (!cbData.status.isOK()) {
+ LOG(1) << "Loading local replica set configuration failed due to " << cbData.status;
+ return;
+ }
+
+ StatusWith<int> myIndex =
+ validateConfigForStartUp(_externalState.get(), _rsConfig, localConfig);
+ if (!myIndex.isOK()) {
+ if (myIndex.getStatus() == ErrorCodes::NodeNotFound ||
+ myIndex.getStatus() == ErrorCodes::DuplicateKey) {
+ warning() << "Locally stored replica set configuration does not have a valid entry "
+ "for the current node; waiting for reconfig or remote heartbeat; Got \""
+ << myIndex.getStatus() << "\" while validating " << localConfig.toBSON();
+ myIndex = StatusWith<int>(-1);
+ } else {
+ error() << "Locally stored replica set configuration is invalid; See "
+ "http://www.mongodb.org/dochub/core/recover-replica-set-from-invalid-config"
+ " for information on how to recover from this. Got \"" << myIndex.getStatus()
+ << "\" while validating " << localConfig.toBSON();
+ fassertFailedNoTrace(28544);
}
}
- ReplicaSetConfig ReplicationCoordinatorImpl::getReplicaSetConfig_forTest() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- return _rsConfig;
+ if (localConfig.getReplSetName() != _settings.ourSetName()) {
+ warning() << "Local replica set configuration document reports set name of "
+ << localConfig.getReplSetName() << ", but command line reports "
+ << _settings.ourSetName() << "; waitng for reconfig or remote heartbeat";
+ myIndex = StatusWith<int>(-1);
}
- bool ReplicationCoordinatorImpl::_startLoadLocalConfig(OperationContext* txn) {
-
- StatusWith<BSONObj> cfg = _externalState->loadLocalConfigDocument(txn);
- if (!cfg.isOK()) {
- log() << "Did not find local replica set configuration document at startup; " <<
- cfg.getStatus();
- return true;
- }
- ReplicaSetConfig localConfig;
- Status status = localConfig.initialize(cfg.getValue());
- if (!status.isOK()) {
- error() << "Locally stored replica set configuration does not parse; See "
- "http://www.mongodb.org/dochub/core/recover-replica-set-from-invalid-config "
- "for information on how to recover from this. Got \"" <<
- status << "\" while parsing " << cfg.getValue();
- fassertFailedNoTrace(28545);
+ // Do not check optime, if this node is an arbiter.
+ bool isArbiter =
+ myIndex.getValue() != -1 && localConfig.getMemberAt(myIndex.getValue()).isArbiter();
+ OpTime lastOpTime(0, 0);
+ if (!isArbiter) {
+ if (!lastOpTimeStatus.isOK()) {
+ warning() << "Failed to load timestamp of most recently applied operation; "
+ << lastOpTimeStatus.getStatus();
+ } else {
+ lastOpTime = lastOpTimeStatus.getValue();
}
-
- StatusWith<OpTime> lastOpTimeStatus = _externalState->loadLastOpTime(txn);
-
- // Use a callback here, because _finishLoadLocalConfig calls isself() which requires
- // that the server's networking layer be up and running and accepting connections, which
- // doesn't happen until startReplication finishes.
- _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_finishLoadLocalConfig,
- this,
- stdx::placeholders::_1,
- localConfig,
- lastOpTimeStatus));
- return false;
}
- void ReplicationCoordinatorImpl::_finishLoadLocalConfig(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplicaSetConfig& localConfig,
- const StatusWith<OpTime>& lastOpTimeStatus) {
- if (!cbData.status.isOK()) {
- LOG(1) << "Loading local replica set configuration failed due to " << cbData.status;
- return;
- }
-
- StatusWith<int> myIndex = validateConfigForStartUp(_externalState.get(),
- _rsConfig,
- localConfig);
- if (!myIndex.isOK()) {
- if (myIndex.getStatus() == ErrorCodes::NodeNotFound ||
- myIndex.getStatus() == ErrorCodes::DuplicateKey) {
- warning() << "Locally stored replica set configuration does not have a valid entry "
- "for the current node; waiting for reconfig or remote heartbeat; Got \"" <<
- myIndex.getStatus() << "\" while validating " << localConfig.toBSON();
- myIndex = StatusWith<int>(-1);
- }
- else {
- error() << "Locally stored replica set configuration is invalid; See "
- "http://www.mongodb.org/dochub/core/recover-replica-set-from-invalid-config"
- " for information on how to recover from this. Got \"" <<
- myIndex.getStatus() << "\" while validating " << localConfig.toBSON();
- fassertFailedNoTrace(28544);
- }
- }
-
- if (localConfig.getReplSetName() != _settings.ourSetName()) {
- warning() << "Local replica set configuration document reports set name of " <<
- localConfig.getReplSetName() << ", but command line reports " <<
- _settings.ourSetName() << "; waitng for reconfig or remote heartbeat";
- myIndex = StatusWith<int>(-1);
- }
-
- // Do not check optime, if this node is an arbiter.
- bool isArbiter = myIndex.getValue() != -1 &&
- localConfig.getMemberAt(myIndex.getValue()).isArbiter();
- OpTime lastOpTime(0, 0);
- if (!isArbiter) {
- if (!lastOpTimeStatus.isOK()) {
- warning() << "Failed to load timestamp of most recently applied operation; " <<
- lastOpTimeStatus.getStatus();
- }
- else {
- lastOpTime = lastOpTimeStatus.getValue();
- }
- }
-
- boost::unique_lock<boost::mutex> lk(_mutex);
- invariant(_rsConfigState == kConfigStartingUp);
- const PostMemberStateUpdateAction action =
- _setCurrentRSConfig_inlock(localConfig, myIndex.getValue());
- _setMyLastOptime_inlock(&lk, lastOpTime, false);
- _externalState->setGlobalOpTime(lastOpTime);
- if (lk.owns_lock()) {
- lk.unlock();
- }
- _performPostMemberStateUpdateAction(action);
- _externalState->startThreads();
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ invariant(_rsConfigState == kConfigStartingUp);
+ const PostMemberStateUpdateAction action =
+ _setCurrentRSConfig_inlock(localConfig, myIndex.getValue());
+ _setMyLastOptime_inlock(&lk, lastOpTime, false);
+ _externalState->setGlobalOpTime(lastOpTime);
+ if (lk.owns_lock()) {
+ lk.unlock();
}
+ _performPostMemberStateUpdateAction(action);
+ _externalState->startThreads();
+}
- void ReplicationCoordinatorImpl::startReplication(OperationContext* txn) {
- if (!isReplEnabled()) {
- boost::lock_guard<boost::mutex> lk(_mutex);
- _setConfigState_inlock(kConfigReplicationDisabled);
- return;
- }
-
- {
- OID rid = _externalState->ensureMe(txn);
-
- boost::lock_guard<boost::mutex> lk(_mutex);
- fassert(18822, !_inShutdown);
- _setConfigState_inlock(kConfigStartingUp);
- _myRID = rid;
- _slaveInfo[_getMyIndexInSlaveInfo_inlock()].rid = rid;
- }
-
- if (!_settings.usingReplSets()) {
- // Must be Master/Slave
- invariant(_settings.master || _settings.slave);
- _externalState->startMasterSlave(txn);
- return;
- }
-
- _topCoordDriverThread.reset(new boost::thread(stdx::bind(&ReplicationExecutor::run,
- &_replExecutor)));
-
- bool doneLoadingConfig = _startLoadLocalConfig(txn);
- if (doneLoadingConfig) {
- // If we're not done loading the config, then the config state will be set by
- // _finishLoadLocalConfig.
- boost::lock_guard<boost::mutex> lk(_mutex);
- invariant(!_rsConfig.isInitialized());
- _setConfigState_inlock(kConfigUninitialized);
- }
+void ReplicationCoordinatorImpl::startReplication(OperationContext* txn) {
+ if (!isReplEnabled()) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ _setConfigState_inlock(kConfigReplicationDisabled);
+ return;
}
- void ReplicationCoordinatorImpl::shutdown() {
- // Shutdown must:
- // * prevent new threads from blocking in awaitReplication
- // * wake up all existing threads blocking in awaitReplication
- // * tell the ReplicationExecutor to shut down
- // * wait for the thread running the ReplicationExecutor to finish
-
- if (!_settings.usingReplSets()) {
- return;
- }
+ {
+ OID rid = _externalState->ensureMe(txn);
- boost::thread* hbReconfigThread = NULL;
- {
- boost::lock_guard<boost::mutex> lk(_mutex);
- fassert(28533, !_inShutdown);
- _inShutdown = true;
- if (_rsConfigState == kConfigPreStart) {
- warning() << "ReplicationCoordinatorImpl::shutdown() called before "
- "startReplication() finished. Shutting down without cleaning up the "
- "replication system";
- return;
- }
- fassert(18823, _rsConfigState != kConfigStartingUp);
- for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
- it != _replicationWaiterList.end(); ++it) {
- WaiterInfo* waiter = *it;
- waiter->condVar->notify_all();
- }
-
- // Since we've set _inShutdown we know that _heartbeatReconfigThread will not be
- // changed again, which makes it safe to store the pointer to it to be accessed outside
- // of _mutex.
- hbReconfigThread = _heartbeatReconfigThread.get();
- }
-
- if (hbReconfigThread) {
- hbReconfigThread->join();
- }
-
- _replExecutor.shutdown();
- _topCoordDriverThread->join(); // must happen outside _mutex
- _externalState->shutdown();
- }
-
- const ReplSettings& ReplicationCoordinatorImpl::getSettings() const {
- return _settings;
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ fassert(18822, !_inShutdown);
+ _setConfigState_inlock(kConfigStartingUp);
+ _myRID = rid;
+ _slaveInfo[_getMyIndexInSlaveInfo_inlock()].rid = rid;
}
- ReplicationCoordinator::Mode ReplicationCoordinatorImpl::getReplicationMode() const {
- return _getReplicationMode_inlock();
+ if (!_settings.usingReplSets()) {
+ // Must be Master/Slave
+ invariant(_settings.master || _settings.slave);
+ _externalState->startMasterSlave(txn);
+ return;
}
- ReplicationCoordinator::Mode ReplicationCoordinatorImpl::_getReplicationMode_inlock() const {
- return _replMode;
- }
+ _topCoordDriverThread.reset(
+ new boost::thread(stdx::bind(&ReplicationExecutor::run, &_replExecutor)));
- MemberState ReplicationCoordinatorImpl::getMemberState() const {
+ bool doneLoadingConfig = _startLoadLocalConfig(txn);
+ if (doneLoadingConfig) {
+ // If we're not done loading the config, then the config state will be set by
+ // _finishLoadLocalConfig.
boost::lock_guard<boost::mutex> lk(_mutex);
- return _getMemberState_inlock();
+ invariant(!_rsConfig.isInitialized());
+ _setConfigState_inlock(kConfigUninitialized);
}
+}
+
+void ReplicationCoordinatorImpl::shutdown() {
+ // Shutdown must:
+ // * prevent new threads from blocking in awaitReplication
+ // * wake up all existing threads blocking in awaitReplication
+ // * tell the ReplicationExecutor to shut down
+ // * wait for the thread running the ReplicationExecutor to finish
- MemberState ReplicationCoordinatorImpl::_getMemberState_inlock() const {
- return _memberState;
+ if (!_settings.usingReplSets()) {
+ return;
}
- Seconds ReplicationCoordinatorImpl::getSlaveDelaySecs() const {
+ boost::thread* hbReconfigThread = NULL;
+ {
boost::lock_guard<boost::mutex> lk(_mutex);
- invariant(_rsConfig.isInitialized());
- uassert(28524,
- "Node not a member of the current set configuration",
- _selfIndex != -1);
- return _rsConfig.getMemberAt(_selfIndex).getSlaveDelay();
- }
-
- void ReplicationCoordinatorImpl::clearSyncSourceBlacklist() {
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_clearSyncSourceBlacklist_finish,
- this,
- stdx::placeholders::_1));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ fassert(28533, !_inShutdown);
+ _inShutdown = true;
+ if (_rsConfigState == kConfigPreStart) {
+ warning() << "ReplicationCoordinatorImpl::shutdown() called before "
+ "startReplication() finished. Shutting down without cleaning up the "
+ "replication system";
return;
}
- fassert(18907, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
+ fassert(18823, _rsConfigState != kConfigStartingUp);
+ for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
+ it != _replicationWaiterList.end();
+ ++it) {
+ WaiterInfo* waiter = *it;
+ waiter->condVar->notify_all();
+ }
+
+ // Since we've set _inShutdown we know that _heartbeatReconfigThread will not be
+ // changed again, which makes it safe to store the pointer to it to be accessed outside
+ // of _mutex.
+ hbReconfigThread = _heartbeatReconfigThread.get();
+ }
+
+ if (hbReconfigThread) {
+ hbReconfigThread->join();
+ }
+
+ _replExecutor.shutdown();
+ _topCoordDriverThread->join(); // must happen outside _mutex
+ _externalState->shutdown();
+}
+
+const ReplSettings& ReplicationCoordinatorImpl::getSettings() const {
+ return _settings;
+}
+
+ReplicationCoordinator::Mode ReplicationCoordinatorImpl::getReplicationMode() const {
+ return _getReplicationMode_inlock();
+}
+
+ReplicationCoordinator::Mode ReplicationCoordinatorImpl::_getReplicationMode_inlock() const {
+ return _replMode;
+}
+
+MemberState ReplicationCoordinatorImpl::getMemberState() const {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ return _getMemberState_inlock();
+}
+
+MemberState ReplicationCoordinatorImpl::_getMemberState_inlock() const {
+ return _memberState;
+}
+
+Seconds ReplicationCoordinatorImpl::getSlaveDelaySecs() const {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ invariant(_rsConfig.isInitialized());
+ uassert(28524, "Node not a member of the current set configuration", _selfIndex != -1);
+ return _rsConfig.getMemberAt(_selfIndex).getSlaveDelay();
+}
+
+void ReplicationCoordinatorImpl::clearSyncSourceBlacklist() {
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_clearSyncSourceBlacklist_finish,
+ this,
+ stdx::placeholders::_1));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(18907, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+}
+
+void ReplicationCoordinatorImpl::_clearSyncSourceBlacklist_finish(
+ const ReplicationExecutor::CallbackData& cbData) {
+ if (cbData.status == ErrorCodes::CallbackCanceled)
+ return;
+ _topCoord->clearSyncSourceBlacklist();
+}
+
+bool ReplicationCoordinatorImpl::setFollowerMode(const MemberState& newState) {
+ StatusWith<ReplicationExecutor::EventHandle> finishedSettingFollowerState =
+ _replExecutor.makeEvent();
+ if (finishedSettingFollowerState.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return false;
}
-
- void ReplicationCoordinatorImpl::_clearSyncSourceBlacklist_finish(
- const ReplicationExecutor::CallbackData& cbData) {
- if (cbData.status == ErrorCodes::CallbackCanceled)
- return;
- _topCoord->clearSyncSourceBlacklist();
+ fassert(18812, finishedSettingFollowerState.getStatus());
+ bool success = false;
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_setFollowerModeFinish,
+ this,
+ stdx::placeholders::_1,
+ newState,
+ finishedSettingFollowerState.getValue(),
+ &success));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return false;
}
+ fassert(18699, cbh.getStatus());
+ _replExecutor.waitForEvent(finishedSettingFollowerState.getValue());
+ return success;
+}
- bool ReplicationCoordinatorImpl::setFollowerMode(const MemberState& newState) {
- StatusWith<ReplicationExecutor::EventHandle> finishedSettingFollowerState =
- _replExecutor.makeEvent();
- if (finishedSettingFollowerState.getStatus() == ErrorCodes::ShutdownInProgress) {
- return false;
- }
- fassert(18812, finishedSettingFollowerState.getStatus());
- bool success = false;
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_setFollowerModeFinish,
- this,
- stdx::placeholders::_1,
- newState,
- finishedSettingFollowerState.getValue(),
- &success));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return false;
- }
- fassert(18699, cbh.getStatus());
- _replExecutor.waitForEvent(finishedSettingFollowerState.getValue());
- return success;
+void ReplicationCoordinatorImpl::_setFollowerModeFinish(
+ const ReplicationExecutor::CallbackData& cbData,
+ const MemberState& newState,
+ const ReplicationExecutor::EventHandle& finishedSettingFollowerMode,
+ bool* success) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
}
-
- void ReplicationCoordinatorImpl::_setFollowerModeFinish(
- const ReplicationExecutor::CallbackData& cbData,
- const MemberState& newState,
- const ReplicationExecutor::EventHandle& finishedSettingFollowerMode,
- bool* success) {
-
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
- if (newState == _topCoord->getMemberState()) {
- *success = true;
- _replExecutor.signalEvent(finishedSettingFollowerMode);
- return;
- }
- if (_topCoord->getRole() == TopologyCoordinator::Role::leader) {
- *success = false;
- _replExecutor.signalEvent(finishedSettingFollowerMode);
- return;
- }
-
- if (_topCoord->getRole() == TopologyCoordinator::Role::candidate) {
- // We are a candidate, which means _topCoord believs us to be in state RS_SECONDARY, and
- // we know that newState != RS_SECONDARY because we would have returned early, above if
- // the old and new state were equal. So, cancel the running election and try again to
- // finish setting the follower mode.
- invariant(_freshnessChecker);
- _freshnessChecker->cancel(&_replExecutor);
- if (_electCmdRunner) {
- _electCmdRunner->cancel(&_replExecutor);
- }
- _replExecutor.onEvent(
- _electionFinishedEvent,
- stdx::bind(&ReplicationCoordinatorImpl::_setFollowerModeFinish,
- this,
- stdx::placeholders::_1,
- newState,
- finishedSettingFollowerMode,
- success));
- return;
- }
-
- boost::unique_lock<boost::mutex> lk(_mutex);
- _topCoord->setFollowerMode(newState.s);
-
- const PostMemberStateUpdateAction action =
- _updateMemberStateFromTopologyCoordinator_inlock();
+ if (newState == _topCoord->getMemberState()) {
*success = true;
_replExecutor.signalEvent(finishedSettingFollowerMode);
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+ return;
}
-
- bool ReplicationCoordinatorImpl::isWaitingForApplierToDrain() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- return _isWaitingForDrainToComplete;
- }
-
- void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* txn) {
- // This logic is a little complicated in order to avoid acquiring the global exclusive lock
- // unnecessarily. This is important because the applier may call signalDrainComplete()
- // whenever it wants, not only when the ReplicationCoordinator is expecting it.
- //
- // The steps are:
- // 1.) Check to see if we're waiting for this signal. If not, return early.
- // 2.) Otherwise, release the mutex while acquiring the global exclusive lock,
- // since that might take a while (NB there's a deadlock cycle otherwise, too).
- // 3.) Re-check to see if we've somehow left drain mode. If we have not, clear
- // _isWaitingForDrainToComplete, set the flag allowing non-local database writes and
- // drop the mutex. At this point, no writes can occur from other threads, due to the
- // global exclusive lock.
- // 4.) Drop all temp collections.
- // 5.) Drop the global exclusive lock.
- //
- // Because replicatable writes are forbidden while in drain mode, and we don't exit drain
- // mode until we have the global exclusive lock, which forbids all other threads from making
- // writes, we know that from the time that _isWaitingForDrainToComplete is set in
- // _performPostMemberStateUpdateAction(kActionWinElection) until this method returns, no
- // external writes will be processed. This is important so that a new temp collection isn't
- // introduced on the new primary before we drop all the temp collections.
-
- boost::unique_lock<boost::mutex> lk(_mutex);
- if (!_isWaitingForDrainToComplete) {
- return;
- }
- lk.unlock();
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite globalWriteLock(txn->lockState());
- lk.lock();
- if (!_isWaitingForDrainToComplete) {
- return;
+ if (_topCoord->getRole() == TopologyCoordinator::Role::leader) {
+ *success = false;
+ _replExecutor.signalEvent(finishedSettingFollowerMode);
+ return;
+ }
+
+ if (_topCoord->getRole() == TopologyCoordinator::Role::candidate) {
+ // We are a candidate, which means _topCoord believs us to be in state RS_SECONDARY, and
+ // we know that newState != RS_SECONDARY because we would have returned early, above if
+ // the old and new state were equal. So, cancel the running election and try again to
+ // finish setting the follower mode.
+ invariant(_freshnessChecker);
+ _freshnessChecker->cancel(&_replExecutor);
+ if (_electCmdRunner) {
+ _electCmdRunner->cancel(&_replExecutor);
+ }
+ _replExecutor.onEvent(_electionFinishedEvent,
+ stdx::bind(&ReplicationCoordinatorImpl::_setFollowerModeFinish,
+ this,
+ stdx::placeholders::_1,
+ newState,
+ finishedSettingFollowerMode,
+ success));
+ return;
+ }
+
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ _topCoord->setFollowerMode(newState.s);
+
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator_inlock();
+ *success = true;
+ _replExecutor.signalEvent(finishedSettingFollowerMode);
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+}
+
+bool ReplicationCoordinatorImpl::isWaitingForApplierToDrain() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ return _isWaitingForDrainToComplete;
+}
+
+void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* txn) {
+ // This logic is a little complicated in order to avoid acquiring the global exclusive lock
+ // unnecessarily. This is important because the applier may call signalDrainComplete()
+ // whenever it wants, not only when the ReplicationCoordinator is expecting it.
+ //
+ // The steps are:
+ // 1.) Check to see if we're waiting for this signal. If not, return early.
+ // 2.) Otherwise, release the mutex while acquiring the global exclusive lock,
+ // since that might take a while (NB there's a deadlock cycle otherwise, too).
+ // 3.) Re-check to see if we've somehow left drain mode. If we have not, clear
+ // _isWaitingForDrainToComplete, set the flag allowing non-local database writes and
+ // drop the mutex. At this point, no writes can occur from other threads, due to the
+ // global exclusive lock.
+ // 4.) Drop all temp collections.
+ // 5.) Drop the global exclusive lock.
+ //
+ // Because replicatable writes are forbidden while in drain mode, and we don't exit drain
+ // mode until we have the global exclusive lock, which forbids all other threads from making
+ // writes, we know that from the time that _isWaitingForDrainToComplete is set in
+ // _performPostMemberStateUpdateAction(kActionWinElection) until this method returns, no
+ // external writes will be processed. This is important so that a new temp collection isn't
+ // introduced on the new primary before we drop all the temp collections.
+
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ if (!_isWaitingForDrainToComplete) {
+ return;
+ }
+ lk.unlock();
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite globalWriteLock(txn->lockState());
+ lk.lock();
+ if (!_isWaitingForDrainToComplete) {
+ return;
+ }
+ _isWaitingForDrainToComplete = false;
+ _canAcceptNonLocalWrites = true;
+ lk.unlock();
+ _externalState->dropAllTempCollections(txn);
+ log() << "transition to primary complete; database writes are now permitted" << rsLog;
+}
+
+void ReplicationCoordinatorImpl::signalUpstreamUpdater() {
+ _externalState->forwardSlaveHandshake();
+}
+
+ReplicationCoordinatorImpl::SlaveInfo* ReplicationCoordinatorImpl::_findSlaveInfoByMemberID_inlock(
+ int memberId) {
+ for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
+ if (it->memberId == memberId) {
+ return &(*it);
+ }
+ }
+ return NULL;
+}
+
+ReplicationCoordinatorImpl::SlaveInfo* ReplicationCoordinatorImpl::_findSlaveInfoByRID_inlock(
+ const OID& rid) {
+ for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
+ if (it->rid == rid) {
+ return &(*it);
+ }
+ }
+ return NULL;
+}
+
+void ReplicationCoordinatorImpl::_addSlaveInfo_inlock(const SlaveInfo& slaveInfo) {
+ invariant(_getReplicationMode_inlock() == modeMasterSlave);
+ _slaveInfo.push_back(slaveInfo);
+
+ // Wake up any threads waiting for replication that now have their replication
+ // check satisfied
+ _wakeReadyWaiters_inlock();
+}
+
+void ReplicationCoordinatorImpl::_updateSlaveInfoOptime_inlock(SlaveInfo* slaveInfo, OpTime ts) {
+ slaveInfo->opTime = ts;
+
+ // Wake up any threads waiting for replication that now have their replication
+ // check satisfied
+ _wakeReadyWaiters_inlock();
+}
+
+void ReplicationCoordinatorImpl::_updateSlaveInfoFromConfig_inlock() {
+ invariant(_settings.usingReplSets());
+
+ SlaveInfoVector oldSlaveInfos;
+ _slaveInfo.swap(oldSlaveInfos);
+
+ if (_selfIndex == -1) {
+ // If we aren't in the config then the only data we care about is for ourself
+ for (SlaveInfoVector::const_iterator it = oldSlaveInfos.begin(); it != oldSlaveInfos.end();
+ ++it) {
+ if (it->self) {
+ SlaveInfo slaveInfo = *it;
+ slaveInfo.memberId = -1;
+ _slaveInfo.push_back(slaveInfo);
+ return;
+ }
}
- _isWaitingForDrainToComplete = false;
- _canAcceptNonLocalWrites = true;
- lk.unlock();
- _externalState->dropAllTempCollections(txn);
- log() << "transition to primary complete; database writes are now permitted" << rsLog;
+ invariant(false); // There should always have been an entry for ourself
}
- void ReplicationCoordinatorImpl::signalUpstreamUpdater() {
- _externalState->forwardSlaveHandshake();
- }
+ for (int i = 0; i < _rsConfig.getNumMembers(); ++i) {
+ const MemberConfig& memberConfig = _rsConfig.getMemberAt(i);
+ int memberId = memberConfig.getId();
+ const HostAndPort& memberHostAndPort = memberConfig.getHostAndPort();
- ReplicationCoordinatorImpl::SlaveInfo*
- ReplicationCoordinatorImpl::_findSlaveInfoByMemberID_inlock(int memberId) {
- for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
- if (it->memberId == memberId) {
- return &(*it);
- }
- }
- return NULL;
- }
+ SlaveInfo slaveInfo;
- ReplicationCoordinatorImpl::SlaveInfo*
- ReplicationCoordinatorImpl::_findSlaveInfoByRID_inlock(const OID& rid) {
- for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
- if (it->rid == rid) {
- return &(*it);
+ // Check if the node existed with the same member ID and hostname in the old data
+ for (SlaveInfoVector::const_iterator it = oldSlaveInfos.begin(); it != oldSlaveInfos.end();
+ ++it) {
+ if ((it->memberId == memberId && it->hostAndPort == memberHostAndPort) ||
+ (i == _selfIndex && it->self)) {
+ slaveInfo = *it;
}
}
- return NULL;
- }
- void ReplicationCoordinatorImpl::_addSlaveInfo_inlock(const SlaveInfo& slaveInfo) {
- invariant(_getReplicationMode_inlock() == modeMasterSlave);
+ // Make sure you have the most up-to-date info for member ID and hostAndPort.
+ slaveInfo.memberId = memberId;
+ slaveInfo.hostAndPort = memberHostAndPort;
_slaveInfo.push_back(slaveInfo);
-
- // Wake up any threads waiting for replication that now have their replication
- // check satisfied
- _wakeReadyWaiters_inlock();
}
+ invariant(static_cast<int>(_slaveInfo.size()) == _rsConfig.getNumMembers());
+}
- void ReplicationCoordinatorImpl::_updateSlaveInfoOptime_inlock(SlaveInfo* slaveInfo,
- OpTime ts) {
-
- slaveInfo->opTime = ts;
-
- // Wake up any threads waiting for replication that now have their replication
- // check satisfied
- _wakeReadyWaiters_inlock();
- }
-
- void ReplicationCoordinatorImpl::_updateSlaveInfoFromConfig_inlock() {
+size_t ReplicationCoordinatorImpl::_getMyIndexInSlaveInfo_inlock() const {
+ if (_getReplicationMode_inlock() == modeMasterSlave) {
+ // Self data always lives in the first entry in _slaveInfo for master/slave
+ return 0;
+ } else {
invariant(_settings.usingReplSets());
-
- SlaveInfoVector oldSlaveInfos;
- _slaveInfo.swap(oldSlaveInfos);
-
if (_selfIndex == -1) {
- // If we aren't in the config then the only data we care about is for ourself
- for (SlaveInfoVector::const_iterator it = oldSlaveInfos.begin();
- it != oldSlaveInfos.end(); ++it) {
- if (it->self) {
- SlaveInfo slaveInfo = *it;
- slaveInfo.memberId = -1;
- _slaveInfo.push_back(slaveInfo);
- return;
- }
- }
- invariant(false); // There should always have been an entry for ourself
- }
-
- for (int i = 0; i < _rsConfig.getNumMembers(); ++i) {
- const MemberConfig& memberConfig = _rsConfig.getMemberAt(i);
- int memberId = memberConfig.getId();
- const HostAndPort& memberHostAndPort = memberConfig.getHostAndPort();
-
- SlaveInfo slaveInfo;
-
- // Check if the node existed with the same member ID and hostname in the old data
- for (SlaveInfoVector::const_iterator it = oldSlaveInfos.begin();
- it != oldSlaveInfos.end(); ++it) {
- if ((it->memberId == memberId && it->hostAndPort == memberHostAndPort)
- || (i == _selfIndex && it->self)) {
- slaveInfo = *it;
- }
- }
-
- // Make sure you have the most up-to-date info for member ID and hostAndPort.
- slaveInfo.memberId = memberId;
- slaveInfo.hostAndPort = memberHostAndPort;
- _slaveInfo.push_back(slaveInfo);
- }
- invariant(static_cast<int>(_slaveInfo.size()) == _rsConfig.getNumMembers());
- }
-
- size_t ReplicationCoordinatorImpl::_getMyIndexInSlaveInfo_inlock() const {
- if (_getReplicationMode_inlock() == modeMasterSlave) {
- // Self data always lives in the first entry in _slaveInfo for master/slave
+ invariant(_slaveInfo.size() == 1);
return 0;
- }
- else {
- invariant(_settings.usingReplSets());
- if (_selfIndex == -1) {
- invariant(_slaveInfo.size() == 1);
- return 0;
- }
- else {
- return _selfIndex;
- }
+ } else {
+ return _selfIndex;
}
}
+}
- Status ReplicationCoordinatorImpl::setLastOptimeForSlave(const OID& rid,
- const OpTime& ts) {
- boost::unique_lock<boost::mutex> lock(_mutex);
- massert(28576,
- "Received an old style replication progress update, which is only used for Master/"
- "Slave replication now, but this node is not using Master/Slave replication. "
- "This is likely caused by an old (pre-2.6) member syncing from this node.",
- _getReplicationMode_inlock() == modeMasterSlave);
+Status ReplicationCoordinatorImpl::setLastOptimeForSlave(const OID& rid, const OpTime& ts) {
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ massert(28576,
+ "Received an old style replication progress update, which is only used for Master/"
+ "Slave replication now, but this node is not using Master/Slave replication. "
+ "This is likely caused by an old (pre-2.6) member syncing from this node.",
+ _getReplicationMode_inlock() == modeMasterSlave);
- SlaveInfo* slaveInfo = _findSlaveInfoByRID_inlock(rid);
- if (slaveInfo) {
- if (slaveInfo->opTime < ts) {
- _updateSlaveInfoOptime_inlock(slaveInfo, ts);
- }
- }
- else {
- SlaveInfo newSlaveInfo;
- newSlaveInfo.rid = rid;
- newSlaveInfo.opTime = ts;
- _addSlaveInfo_inlock(newSlaveInfo);
+ SlaveInfo* slaveInfo = _findSlaveInfoByRID_inlock(rid);
+ if (slaveInfo) {
+ if (slaveInfo->opTime < ts) {
+ _updateSlaveInfoOptime_inlock(slaveInfo, ts);
}
+ } else {
+ SlaveInfo newSlaveInfo;
+ newSlaveInfo.rid = rid;
+ newSlaveInfo.opTime = ts;
+ _addSlaveInfo_inlock(newSlaveInfo);
+ }
+ return Status::OK();
+}
+
+void ReplicationCoordinatorImpl::setMyHeartbeatMessage(const std::string& msg) {
+ CBHStatus cbh = _replExecutor.scheduleWork(stdx::bind(
+ &TopologyCoordinator::setMyHeartbeatMessage, _topCoord.get(), _replExecutor.now(), msg));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(28540, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+}
+
+void ReplicationCoordinatorImpl::setMyLastOptime(const OpTime& ts) {
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ _setMyLastOptime_inlock(&lock, ts, false);
+}
+
+void ReplicationCoordinatorImpl::resetMyLastOptime() {
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ _setMyLastOptime_inlock(&lock, OpTime(), true);
+}
+
+void ReplicationCoordinatorImpl::_setMyLastOptime_inlock(boost::unique_lock<boost::mutex>* lock,
+ const OpTime& ts,
+ bool isRollbackAllowed) {
+ invariant(lock->owns_lock());
+ SlaveInfo* mySlaveInfo = &_slaveInfo[_getMyIndexInSlaveInfo_inlock()];
+ invariant(isRollbackAllowed || mySlaveInfo->opTime <= ts);
+ _updateSlaveInfoOptime_inlock(mySlaveInfo, ts);
+
+ if (_getReplicationMode_inlock() != modeReplSet) {
+ return;
+ }
+ if (_getMemberState_inlock().primary()) {
+ return;
+ }
+ lock->unlock();
+ _externalState->forwardSlaveProgress(); // Must do this outside _mutex
+}
+
+OpTime ReplicationCoordinatorImpl::getMyLastOptime() const {
+ boost::lock_guard<boost::mutex> lock(_mutex);
+ return _getMyLastOptime_inlock();
+}
+
+OpTime ReplicationCoordinatorImpl::_getMyLastOptime_inlock() const {
+ return _slaveInfo[_getMyIndexInSlaveInfo_inlock()].opTime;
+}
+
+Status ReplicationCoordinatorImpl::setLastOptime_forTest(const OID& rid, const OpTime& ts) {
+ boost::lock_guard<boost::mutex> lock(_mutex);
+ invariant(_getReplicationMode_inlock() == modeReplSet);
+
+ const UpdatePositionArgs::UpdateInfo update(rid, ts, -1, -1);
+ return _setLastOptime_inlock(update);
+}
+
+Status ReplicationCoordinatorImpl::_setLastOptime_inlock(
+ const UpdatePositionArgs::UpdateInfo& args) {
+ if (_selfIndex == -1) {
+ // Ignore updates when we're in state REMOVED
+ return Status(ErrorCodes::NotMasterOrSecondaryCode,
+ "Received replSetUpdatePosition command but we are in state REMOVED");
+ }
+ invariant(_getReplicationMode_inlock() == modeReplSet);
+
+ if (args.rid == _getMyRID_inlock() ||
+ args.memberId == _rsConfig.getMemberAt(_selfIndex).getId()) {
+ // Do not let remote nodes tell us what our optime is.
return Status::OK();
}
- void ReplicationCoordinatorImpl::setMyHeartbeatMessage(const std::string& msg) {
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&TopologyCoordinator::setMyHeartbeatMessage,
- _topCoord.get(),
- _replExecutor.now(),
- msg));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(28540, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- }
-
- void ReplicationCoordinatorImpl::setMyLastOptime(const OpTime& ts) {
- boost::unique_lock<boost::mutex> lock(_mutex);
- _setMyLastOptime_inlock(&lock, ts, false);
- }
-
- void ReplicationCoordinatorImpl::resetMyLastOptime() {
- boost::unique_lock<boost::mutex> lock(_mutex);
- _setMyLastOptime_inlock(&lock, OpTime(), true);
- }
+ LOG(2) << "received notification that node with RID " << args.rid
+ << " has reached optime: " << args.ts;
- void ReplicationCoordinatorImpl::_setMyLastOptime_inlock(
- boost::unique_lock<boost::mutex>* lock, const OpTime& ts, bool isRollbackAllowed) {
- invariant(lock->owns_lock());
- SlaveInfo* mySlaveInfo = &_slaveInfo[_getMyIndexInSlaveInfo_inlock()];
- invariant(isRollbackAllowed || mySlaveInfo->opTime <= ts);
- _updateSlaveInfoOptime_inlock(mySlaveInfo, ts);
-
- if (_getReplicationMode_inlock() != modeReplSet) {
- return;
+ SlaveInfo* slaveInfo = NULL;
+ if (args.memberId >= 0) {
+ if (args.cfgver != _rsConfig.getConfigVersion()) {
+ std::string errmsg = str::stream()
+ << "Received replSetUpdatePosition for node with memberId " << args.memberId
+ << " whose config version of " << args.cfgver
+ << " doesn't match our config version of " << _rsConfig.getConfigVersion();
+ LOG(1) << errmsg;
+ return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
}
- if (_getMemberState_inlock().primary()) {
+
+ slaveInfo = _findSlaveInfoByMemberID_inlock(args.memberId);
+ if (!slaveInfo) {
+ invariant(!_rsConfig.findMemberByID(args.memberId));
+
+ std::string errmsg = str::stream()
+ << "Received replSetUpdatePosition for node with memberId " << args.memberId
+ << " which doesn't exist in our config";
+ LOG(1) << errmsg;
+ return Status(ErrorCodes::NodeNotFound, errmsg);
+ }
+ } else {
+ // The command we received didn't contain a memberId, most likely this is because it
+ // came from a member running something prior to 3.0.
+ // Fall back to finding the node by RID.
+ slaveInfo = _findSlaveInfoByRID_inlock(args.rid);
+ if (!slaveInfo) {
+ std::string errmsg = str::stream()
+ << "Received replSetUpdatePosition for node with RID " << args.rid
+ << ", but we haven't yet received a handshake for that node.";
+ LOG(1) << errmsg;
+ return Status(ErrorCodes::NodeNotFound, errmsg);
+ }
+ invariant(slaveInfo->memberId >= 0);
+ }
+ invariant(slaveInfo);
+ invariant(args.memberId < 0 || args.memberId == slaveInfo->memberId);
+
+ LOG(3) << "Node with RID " << args.rid << " and memberId " << slaveInfo->memberId
+ << " currently has optime " << slaveInfo->opTime << "; updating to " << args.ts;
+
+ // Only update remote optimes if they increase.
+ if (slaveInfo->opTime < args.ts) {
+ _updateSlaveInfoOptime_inlock(slaveInfo, args.ts);
+ }
+ return Status::OK();
+}
+
+void ReplicationCoordinatorImpl::interrupt(unsigned opId) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
+ it != _replicationWaiterList.end();
+ ++it) {
+ WaiterInfo* info = *it;
+ if (info->opID == opId) {
+ info->condVar->notify_all();
return;
}
- lock->unlock();
- _externalState->forwardSlaveProgress(); // Must do this outside _mutex
}
- OpTime ReplicationCoordinatorImpl::getMyLastOptime() const {
- boost::lock_guard<boost::mutex> lock(_mutex);
- return _getMyLastOptime_inlock();
- }
+ _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback,
+ this,
+ stdx::placeholders::_1));
+}
- OpTime ReplicationCoordinatorImpl::_getMyLastOptime_inlock() const {
- return _slaveInfo[_getMyIndexInSlaveInfo_inlock()].opTime;
+void ReplicationCoordinatorImpl::interruptAll() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
+ it != _replicationWaiterList.end();
+ ++it) {
+ WaiterInfo* info = *it;
+ info->condVar->notify_all();
}
- Status ReplicationCoordinatorImpl::setLastOptime_forTest(const OID& rid, const OpTime& ts) {
- boost::lock_guard<boost::mutex> lock(_mutex);
- invariant(_getReplicationMode_inlock() == modeReplSet);
+ _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback,
+ this,
+ stdx::placeholders::_1));
+}
- const UpdatePositionArgs::UpdateInfo update(rid, ts, -1, -1);
- return _setLastOptime_inlock(update);
+bool ReplicationCoordinatorImpl::_doneWaitingForReplication_inlock(
+ const OpTime& opTime, const WriteConcernOptions& writeConcern) {
+ Status status = _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
+ if (!status.isOK()) {
+ return true;
}
- Status ReplicationCoordinatorImpl::_setLastOptime_inlock(
- const UpdatePositionArgs::UpdateInfo& args) {
-
- if (_selfIndex == -1) {
- // Ignore updates when we're in state REMOVED
- return Status(ErrorCodes::NotMasterOrSecondaryCode,
- "Received replSetUpdatePosition command but we are in state REMOVED");
- }
- invariant(_getReplicationMode_inlock() == modeReplSet);
-
- if (args.rid == _getMyRID_inlock() ||
- args.memberId == _rsConfig.getMemberAt(_selfIndex).getId()) {
- // Do not let remote nodes tell us what our optime is.
- return Status::OK();
- }
-
- LOG(2) << "received notification that node with RID " << args.rid <<
- " has reached optime: " << args.ts;
-
- SlaveInfo* slaveInfo = NULL;
- if (args.memberId >= 0) {
- if (args.cfgver != _rsConfig.getConfigVersion()) {
- std::string errmsg = str::stream()
- << "Received replSetUpdatePosition for node with memberId "
- << args.memberId << " whose config version of " << args.cfgver
- << " doesn't match our config version of "
- << _rsConfig.getConfigVersion();
- LOG(1) << errmsg;
- return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
- }
-
- slaveInfo = _findSlaveInfoByMemberID_inlock(args.memberId);
- if (!slaveInfo) {
- invariant(!_rsConfig.findMemberByID(args.memberId));
-
- std::string errmsg = str::stream()
- << "Received replSetUpdatePosition for node with memberId "
- << args.memberId << " which doesn't exist in our config";
- LOG(1) << errmsg;
- return Status(ErrorCodes::NodeNotFound, errmsg);
- }
- }
- else {
- // The command we received didn't contain a memberId, most likely this is because it
- // came from a member running something prior to 3.0.
- // Fall back to finding the node by RID.
- slaveInfo = _findSlaveInfoByRID_inlock(args.rid);
- if (!slaveInfo) {
- std::string errmsg = str::stream()
- << "Received replSetUpdatePosition for node with RID " << args.rid
- << ", but we haven't yet received a handshake for that node.";
- LOG(1) << errmsg;
- return Status(ErrorCodes::NodeNotFound, errmsg);
- }
- invariant(slaveInfo->memberId >= 0);
+ if (!writeConcern.wMode.empty()) {
+ StringData patternName;
+ if (writeConcern.wMode == "majority") {
+ patternName = "$majority";
+ } else {
+ patternName = writeConcern.wMode;
}
- invariant(slaveInfo);
- invariant(args.memberId < 0 || args.memberId == slaveInfo->memberId);
-
- LOG(3) << "Node with RID " << args.rid << " and memberId " << slaveInfo->memberId
- << " currently has optime " << slaveInfo->opTime << "; updating to " << args.ts;
-
- // Only update remote optimes if they increase.
- if (slaveInfo->opTime < args.ts) {
- _updateSlaveInfoOptime_inlock(slaveInfo, args.ts);
+ StatusWith<ReplicaSetTagPattern> tagPattern = _rsConfig.findCustomWriteMode(patternName);
+ if (!tagPattern.isOK()) {
+ return true;
}
- return Status::OK();
+ return _haveTaggedNodesReachedOpTime_inlock(opTime, tagPattern.getValue());
+ } else {
+ return _haveNumNodesReachedOpTime_inlock(opTime, writeConcern.wNumNodes);
}
+}
- void ReplicationCoordinatorImpl::interrupt(unsigned opId) {
- boost::lock_guard<boost::mutex> lk(_mutex);
- for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
- it != _replicationWaiterList.end(); ++it) {
- WaiterInfo* info = *it;
- if (info->opID == opId) {
- info->condVar->notify_all();
- return;
- }
- }
-
- _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback,
- this,
- stdx::placeholders::_1));
+bool ReplicationCoordinatorImpl::_haveNumNodesReachedOpTime_inlock(const OpTime& opTime,
+ int numNodes) {
+ if (_getMyLastOptime_inlock() < opTime) {
+ // Secondaries that are for some reason ahead of us should not allow us to
+ // satisfy a write concern if we aren't caught up ourselves.
+ return false;
}
- void ReplicationCoordinatorImpl::interruptAll() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
- it != _replicationWaiterList.end(); ++it) {
- WaiterInfo* info = *it;
- info->condVar->notify_all();
+ for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
+ const OpTime& slaveTime = it->opTime;
+ if (slaveTime >= opTime) {
+ --numNodes;
}
- _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback,
- this,
- stdx::placeholders::_1));
- }
-
- bool ReplicationCoordinatorImpl::_doneWaitingForReplication_inlock(
- const OpTime& opTime, const WriteConcernOptions& writeConcern) {
- Status status = _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
- if (!status.isOK()) {
+ if (numNodes <= 0) {
return true;
}
-
- if (!writeConcern.wMode.empty()) {
- StringData patternName;
- if (writeConcern.wMode == "majority") {
- patternName = "$majority";
- }
- else {
- patternName = writeConcern.wMode;
- }
- StatusWith<ReplicaSetTagPattern> tagPattern =
- _rsConfig.findCustomWriteMode(patternName);
- if (!tagPattern.isOK()) {
- return true;
- }
- return _haveTaggedNodesReachedOpTime_inlock(opTime, tagPattern.getValue());
- }
- else {
- return _haveNumNodesReachedOpTime_inlock(opTime, writeConcern.wNumNodes);
- }
- }
-
- bool ReplicationCoordinatorImpl::_haveNumNodesReachedOpTime_inlock(const OpTime& opTime,
- int numNodes) {
- if (_getMyLastOptime_inlock() < opTime) {
- // Secondaries that are for some reason ahead of us should not allow us to
- // satisfy a write concern if we aren't caught up ourselves.
- return false;
- }
-
- for (SlaveInfoVector::iterator it = _slaveInfo.begin();
- it != _slaveInfo.end(); ++it) {
-
- const OpTime& slaveTime = it->opTime;
- if (slaveTime >= opTime) {
- --numNodes;
- }
-
- if (numNodes <= 0) {
- return true;
- }
- }
- return false;
}
-
- bool ReplicationCoordinatorImpl::_haveTaggedNodesReachedOpTime_inlock(
- const OpTime& opTime, const ReplicaSetTagPattern& tagPattern) {
-
- ReplicaSetTagMatch matcher(tagPattern);
- for (SlaveInfoVector::iterator it = _slaveInfo.begin();
- it != _slaveInfo.end(); ++it) {
-
- const OpTime& slaveTime = it->opTime;
- if (slaveTime >= opTime) {
- // This node has reached the desired optime, now we need to check if it is a part
- // of the tagPattern.
- const MemberConfig* memberConfig = _rsConfig.findMemberByID(it->memberId);
- invariant(memberConfig);
- for (MemberConfig::TagIterator it = memberConfig->tagsBegin();
- it != memberConfig->tagsEnd(); ++it) {
- if (matcher.update(*it)) {
- return true;
- }
+ return false;
+}
+
+bool ReplicationCoordinatorImpl::_haveTaggedNodesReachedOpTime_inlock(
+ const OpTime& opTime, const ReplicaSetTagPattern& tagPattern) {
+ ReplicaSetTagMatch matcher(tagPattern);
+ for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
+ const OpTime& slaveTime = it->opTime;
+ if (slaveTime >= opTime) {
+ // This node has reached the desired optime, now we need to check if it is a part
+ // of the tagPattern.
+ const MemberConfig* memberConfig = _rsConfig.findMemberByID(it->memberId);
+ invariant(memberConfig);
+ for (MemberConfig::TagIterator it = memberConfig->tagsBegin();
+ it != memberConfig->tagsEnd();
+ ++it) {
+ if (matcher.update(*it)) {
+ return true;
}
}
}
- return false;
}
-
- ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::awaitReplication(
- const OperationContext* txn,
- const OpTime& opTime,
- const WriteConcernOptions& writeConcern) {
- Timer timer;
- boost::unique_lock<boost::mutex> lock(_mutex);
- return _awaitReplication_inlock(&timer, &lock, txn, opTime, writeConcern);
+ return false;
+}
+
+ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::awaitReplication(
+ const OperationContext* txn, const OpTime& opTime, const WriteConcernOptions& writeConcern) {
+ Timer timer;
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ return _awaitReplication_inlock(&timer, &lock, txn, opTime, writeConcern);
+}
+
+ReplicationCoordinator::StatusAndDuration
+ReplicationCoordinatorImpl::awaitReplicationOfLastOpForClient(
+ const OperationContext* txn, const WriteConcernOptions& writeConcern) {
+ Timer timer;
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ return _awaitReplication_inlock(
+ &timer, &lock, txn, txn->getClient()->getLastOp(), writeConcern);
+}
+
+ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::_awaitReplication_inlock(
+ const Timer* timer,
+ boost::unique_lock<boost::mutex>* lock,
+ const OperationContext* txn,
+ const OpTime& opTime,
+ const WriteConcernOptions& writeConcern) {
+ const Mode replMode = _getReplicationMode_inlock();
+ if (replMode == modeNone || serverGlobalParams.configsvr) {
+ // no replication check needed (validated above)
+ return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
}
- ReplicationCoordinator::StatusAndDuration
- ReplicationCoordinatorImpl::awaitReplicationOfLastOpForClient(
- const OperationContext* txn,
- const WriteConcernOptions& writeConcern) {
- Timer timer;
- boost::unique_lock<boost::mutex> lock(_mutex);
- return _awaitReplication_inlock(
- &timer, &lock, txn, txn->getClient()->getLastOp(), writeConcern);
+ if (replMode == modeMasterSlave && writeConcern.wMode == "majority") {
+ // with master/slave, majority is equivalent to w=1
+ return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
}
- ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::_awaitReplication_inlock(
- const Timer* timer,
- boost::unique_lock<boost::mutex>* lock,
- const OperationContext* txn,
- const OpTime& opTime,
- const WriteConcernOptions& writeConcern) {
-
- const Mode replMode = _getReplicationMode_inlock();
- if (replMode == modeNone || serverGlobalParams.configsvr) {
- // no replication check needed (validated above)
- return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
- }
-
- if (replMode == modeMasterSlave && writeConcern.wMode == "majority") {
- // with master/slave, majority is equivalent to w=1
- return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
- }
-
- if (opTime.isNull()) {
- // If waiting for the empty optime, always say it's been replicated.
- return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
- }
-
- if (replMode == modeReplSet && !_memberState.primary()) {
- return StatusAndDuration(Status(ErrorCodes::NotMaster,
- "Not master while waiting for replication"),
- Milliseconds(timer->millis()));
- }
-
- if (writeConcern.wMode.empty()) {
- if (writeConcern.wNumNodes < 1) {
- return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
- }
- else if (writeConcern.wNumNodes == 1 && _getMyLastOptime_inlock() >= opTime) {
- return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
- }
- }
-
- // Must hold _mutex before constructing waitInfo as it will modify _replicationWaiterList
- boost::condition_variable condVar;
- WaiterInfo waitInfo(
- &_replicationWaiterList, txn->getOpID(), &opTime, &writeConcern, &condVar);
- while (!_doneWaitingForReplication_inlock(opTime, writeConcern)) {
- const int elapsed = timer->millis();
-
- Status interruptedStatus = txn->checkForInterruptNoAssert();
- if (!interruptedStatus.isOK()) {
- return StatusAndDuration(interruptedStatus, Milliseconds(elapsed));
- }
-
- if (!waitInfo.master) {
- return StatusAndDuration(Status(ErrorCodes::NotMaster,
- "Not master anymore while waiting for replication"
- " - this most likely means that a step down"
- " occurred while waiting for replication"),
- Milliseconds(elapsed));
- }
-
- if (writeConcern.wTimeout != WriteConcernOptions::kNoTimeout &&
- elapsed > writeConcern.wTimeout) {
- return StatusAndDuration(Status(ErrorCodes::ExceededTimeLimit,
- "waiting for replication timed out"),
- Milliseconds(elapsed));
- }
-
- if (_inShutdown) {
- return StatusAndDuration(Status(ErrorCodes::ShutdownInProgress,
- "Replication is being shut down"),
- Milliseconds(elapsed));
- }
-
- try {
- if (writeConcern.wTimeout == WriteConcernOptions::kNoTimeout) {
- condVar.wait(*lock);
- }
- else {
- condVar.timed_wait(*lock, Milliseconds(writeConcern.wTimeout - elapsed));
- }
- } catch (const boost::thread_interrupted&) {}
- }
-
- Status status = _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
- if (!status.isOK()) {
- return StatusAndDuration(status, Milliseconds(timer->millis()));
- }
-
+ if (opTime.isNull()) {
+ // If waiting for the empty optime, always say it's been replicated.
return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
}
- Status ReplicationCoordinatorImpl::stepDown(OperationContext* txn,
- bool force,
- const Milliseconds& waitTime,
- const Milliseconds& stepdownTime) {
- const Date_t startTime = _replExecutor.now();
- const Date_t stepDownUntil(startTime.millis + stepdownTime.total_milliseconds());
- const Date_t waitUntil(startTime.millis + waitTime.total_milliseconds());
-
- if (!getMemberState().primary()) {
- // Note this check is inherently racy - it's always possible for the node to
- // stepdown from some other path before we acquire the global shared lock, but
- // that's okay because we are resiliant to that happening in _stepDownContinue.
- return Status(ErrorCodes::NotMaster, "not primary so can't step down");
- }
-
- LockResult lockState = txn->lockState()->lockGlobalBegin(MODE_S);
- // We've requested the global shared lock which will stop new writes from coming in,
- // but existing writes could take a long time to finish, so kill all user operations
- // to help us get the global lock faster.
- _externalState->killAllUserOperations(txn);
-
- if (lockState == LOCK_WAITING) {
- lockState = txn->lockState()->lockGlobalComplete(stepdownTime.total_milliseconds());
- if (lockState == LOCK_TIMEOUT) {
- return Status(ErrorCodes::ExceededTimeLimit,
- "Could not acquire the global shared lock within the amount of time "
- "specified that we should step down for");
- }
- }
- invariant(lockState == LOCK_OK);
- ON_BLOCK_EXIT(&Locker::unlockAll, txn->lockState());
- // From this point onward we are guaranteed to be holding the global shared lock.
-
- StatusWith<ReplicationExecutor::EventHandle> finishedEvent = _replExecutor.makeEvent();
- if (finishedEvent.getStatus() == ErrorCodes::ShutdownInProgress) {
- return finishedEvent.getStatus();
- }
- fassert(26000, finishedEvent.getStatus());
- Status result(ErrorCodes::InternalError, "didn't set status in _stepDownContinue");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_stepDownContinue,
- this,
- stdx::placeholders::_1,
- finishedEvent.getValue(),
- txn,
- waitUntil,
- stepDownUntil,
- force,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return cbh.getStatus();
- }
- fassert(18809, cbh.getStatus());
- cbh = _replExecutor.scheduleWorkAt(
- waitUntil,
- stdx::bind(&ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback,
- this,
- stdx::placeholders::_1));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return cbh.getStatus();
- }
- fassert(26001, cbh.getStatus());
- _replExecutor.waitForEvent(finishedEvent.getValue());
- return result;
+ if (replMode == modeReplSet && !_memberState.primary()) {
+ return StatusAndDuration(
+ Status(ErrorCodes::NotMaster, "Not master while waiting for replication"),
+ Milliseconds(timer->millis()));
}
- void ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback(
- const ReplicationExecutor::CallbackData& cbData) {
- if (!cbData.status.isOK()) {
- return;
- }
-
- _signalStepDownWaiters();
- }
-
- void ReplicationCoordinatorImpl::_signalStepDownWaiters() {
- std::for_each(_stepDownWaiters.begin(),
- _stepDownWaiters.end(),
- stdx::bind(&ReplicationExecutor::signalEvent,
- &_replExecutor,
- stdx::placeholders::_1));
- _stepDownWaiters.clear();
- }
-
- void ReplicationCoordinatorImpl::_stepDownContinue(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplicationExecutor::EventHandle finishedEvent,
- OperationContext* txn,
- const Date_t waitUntil,
- const Date_t stepDownUntil,
- bool force,
- Status* result) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- // Cancelation only occurs on shutdown, which will also handle signaling the event.
- *result = Status(ErrorCodes::ShutdownInProgress, "Shutting down replication");
- return;
+ if (writeConcern.wMode.empty()) {
+ if (writeConcern.wNumNodes < 1) {
+ return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
+ } else if (writeConcern.wNumNodes == 1 && _getMyLastOptime_inlock() >= opTime) {
+ return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
}
+ }
- ScopeGuard allFinishedGuard = MakeGuard(
- stdx::bind(&ReplicationExecutor::signalEvent, &_replExecutor, finishedEvent));
- if (!cbData.status.isOK()) {
- *result = cbData.status;
- return;
- }
+ // Must hold _mutex before constructing waitInfo as it will modify _replicationWaiterList
+ boost::condition_variable condVar;
+ WaiterInfo waitInfo(&_replicationWaiterList, txn->getOpID(), &opTime, &writeConcern, &condVar);
+ while (!_doneWaitingForReplication_inlock(opTime, writeConcern)) {
+ const int elapsed = timer->millis();
Status interruptedStatus = txn->checkForInterruptNoAssert();
if (!interruptedStatus.isOK()) {
- *result = interruptedStatus;
- return;
+ return StatusAndDuration(interruptedStatus, Milliseconds(elapsed));
}
- if (_topCoord->getRole() != TopologyCoordinator::Role::leader) {
- *result = Status(ErrorCodes::NotMaster,
- "Already stepped down from primary while processing step down "
- "request");
- return;
- }
- const Date_t now = _replExecutor.now();
- if (now >= stepDownUntil) {
- *result = Status(ErrorCodes::ExceededTimeLimit,
- "By the time we were ready to step down, we were already past the "
- "time we were supposed to step down until");
- return;
+ if (!waitInfo.master) {
+ return StatusAndDuration(Status(ErrorCodes::NotMaster,
+ "Not master anymore while waiting for replication"
+ " - this most likely means that a step down"
+ " occurred while waiting for replication"),
+ Milliseconds(elapsed));
}
- bool forceNow = now >= waitUntil ? force : false;
- if (_topCoord->stepDown(stepDownUntil, forceNow, getMyLastOptime())) {
- // Schedule work to (potentially) step back up once the stepdown period has ended.
- _replExecutor.scheduleWorkAt(stepDownUntil,
- stdx::bind(&ReplicationCoordinatorImpl::_handleTimePassing,
- this,
- stdx::placeholders::_1));
- boost::unique_lock<boost::mutex> lk(_mutex);
- const PostMemberStateUpdateAction action =
- _updateMemberStateFromTopologyCoordinator_inlock();
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
- *result = Status::OK();
- return;
+ if (writeConcern.wTimeout != WriteConcernOptions::kNoTimeout &&
+ elapsed > writeConcern.wTimeout) {
+ return StatusAndDuration(
+ Status(ErrorCodes::ExceededTimeLimit, "waiting for replication timed out"),
+ Milliseconds(elapsed));
}
- // Step down failed. Keep waiting if we can, otherwise finish.
- if (now >= waitUntil) {
- *result = Status(ErrorCodes::ExceededTimeLimit, str::stream() <<
- "No electable secondaries caught up as of " <<
- dateToISOStringLocal(now));
- return;
+ if (_inShutdown) {
+ return StatusAndDuration(
+ Status(ErrorCodes::ShutdownInProgress, "Replication is being shut down"),
+ Milliseconds(elapsed));
}
- if (_stepDownWaiters.empty()) {
- StatusWith<ReplicationExecutor::EventHandle> reschedEvent =
- _replExecutor.makeEvent();
- if (!reschedEvent.isOK()) {
- *result = reschedEvent.getStatus();
- return;
+ try {
+ if (writeConcern.wTimeout == WriteConcernOptions::kNoTimeout) {
+ condVar.wait(*lock);
+ } else {
+ condVar.timed_wait(*lock, Milliseconds(writeConcern.wTimeout - elapsed));
}
- _stepDownWaiters.push_back(reschedEvent.getValue());
- }
- CBHStatus cbh = _replExecutor.onEvent(
- _stepDownWaiters.back(),
- stdx::bind(&ReplicationCoordinatorImpl::_stepDownContinue,
- this,
- stdx::placeholders::_1,
- finishedEvent,
- txn,
- waitUntil,
- stepDownUntil,
- force,
- result));
- if (!cbh.isOK()) {
- *result = cbh.getStatus();
- return;
- }
- allFinishedGuard.Dismiss();
- }
-
- void ReplicationCoordinatorImpl::_handleTimePassing(
- const ReplicationExecutor::CallbackData& cbData) {
- if (!cbData.status.isOK()) {
- return;
- }
+ } catch (const boost::thread_interrupted&) {
+ }
+ }
+
+ Status status = _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
+ if (!status.isOK()) {
+ return StatusAndDuration(status, Milliseconds(timer->millis()));
+ }
+
+ return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
+}
+
+Status ReplicationCoordinatorImpl::stepDown(OperationContext* txn,
+ bool force,
+ const Milliseconds& waitTime,
+ const Milliseconds& stepdownTime) {
+ const Date_t startTime = _replExecutor.now();
+ const Date_t stepDownUntil(startTime.millis + stepdownTime.total_milliseconds());
+ const Date_t waitUntil(startTime.millis + waitTime.total_milliseconds());
+
+ if (!getMemberState().primary()) {
+ // Note this check is inherently racy - it's always possible for the node to
+ // stepdown from some other path before we acquire the global shared lock, but
+ // that's okay because we are resiliant to that happening in _stepDownContinue.
+ return Status(ErrorCodes::NotMaster, "not primary so can't step down");
+ }
+
+ LockResult lockState = txn->lockState()->lockGlobalBegin(MODE_S);
+ // We've requested the global shared lock which will stop new writes from coming in,
+ // but existing writes could take a long time to finish, so kill all user operations
+ // to help us get the global lock faster.
+ _externalState->killAllUserOperations(txn);
+
+ if (lockState == LOCK_WAITING) {
+ lockState = txn->lockState()->lockGlobalComplete(stepdownTime.total_milliseconds());
+ if (lockState == LOCK_TIMEOUT) {
+ return Status(ErrorCodes::ExceededTimeLimit,
+ "Could not acquire the global shared lock within the amount of time "
+ "specified that we should step down for");
+ }
+ }
+ invariant(lockState == LOCK_OK);
+ ON_BLOCK_EXIT(&Locker::unlockAll, txn->lockState());
+ // From this point onward we are guaranteed to be holding the global shared lock.
+
+ StatusWith<ReplicationExecutor::EventHandle> finishedEvent = _replExecutor.makeEvent();
+ if (finishedEvent.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return finishedEvent.getStatus();
+ }
+ fassert(26000, finishedEvent.getStatus());
+ Status result(ErrorCodes::InternalError, "didn't set status in _stepDownContinue");
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_stepDownContinue,
+ this,
+ stdx::placeholders::_1,
+ finishedEvent.getValue(),
+ txn,
+ waitUntil,
+ stepDownUntil,
+ force,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return cbh.getStatus();
+ }
+ fassert(18809, cbh.getStatus());
+ cbh = _replExecutor.scheduleWorkAt(
+ waitUntil,
+ stdx::bind(&ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback,
+ this,
+ stdx::placeholders::_1));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return cbh.getStatus();
+ }
+ fassert(26001, cbh.getStatus());
+ _replExecutor.waitForEvent(finishedEvent.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_signalStepDownWaitersFromCallback(
+ const ReplicationExecutor::CallbackData& cbData) {
+ if (!cbData.status.isOK()) {
+ return;
+ }
+
+ _signalStepDownWaiters();
+}
+
+void ReplicationCoordinatorImpl::_signalStepDownWaiters() {
+ std::for_each(
+ _stepDownWaiters.begin(),
+ _stepDownWaiters.end(),
+ stdx::bind(&ReplicationExecutor::signalEvent, &_replExecutor, stdx::placeholders::_1));
+ _stepDownWaiters.clear();
+}
+
+void ReplicationCoordinatorImpl::_stepDownContinue(
+ const ReplicationExecutor::CallbackData& cbData,
+ const ReplicationExecutor::EventHandle finishedEvent,
+ OperationContext* txn,
+ const Date_t waitUntil,
+ const Date_t stepDownUntil,
+ bool force,
+ Status* result) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ // Cancelation only occurs on shutdown, which will also handle signaling the event.
+ *result = Status(ErrorCodes::ShutdownInProgress, "Shutting down replication");
+ return;
+ }
+
+ ScopeGuard allFinishedGuard =
+ MakeGuard(stdx::bind(&ReplicationExecutor::signalEvent, &_replExecutor, finishedEvent));
+ if (!cbData.status.isOK()) {
+ *result = cbData.status;
+ return;
+ }
+
+ Status interruptedStatus = txn->checkForInterruptNoAssert();
+ if (!interruptedStatus.isOK()) {
+ *result = interruptedStatus;
+ return;
+ }
+
+ if (_topCoord->getRole() != TopologyCoordinator::Role::leader) {
+ *result = Status(ErrorCodes::NotMaster,
+ "Already stepped down from primary while processing step down "
+ "request");
+ return;
+ }
+ const Date_t now = _replExecutor.now();
+ if (now >= stepDownUntil) {
+ *result = Status(ErrorCodes::ExceededTimeLimit,
+ "By the time we were ready to step down, we were already past the "
+ "time we were supposed to step down until");
+ return;
+ }
+ bool forceNow = now >= waitUntil ? force : false;
+ if (_topCoord->stepDown(stepDownUntil, forceNow, getMyLastOptime())) {
+ // Schedule work to (potentially) step back up once the stepdown period has ended.
+ _replExecutor.scheduleWorkAt(stepDownUntil,
+ stdx::bind(&ReplicationCoordinatorImpl::_handleTimePassing,
+ this,
+ stdx::placeholders::_1));
- if (_topCoord->becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(_replExecutor.now())) {
- _performPostMemberStateUpdateAction(kActionWinElection);
- }
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ const PostMemberStateUpdateAction action =
+ _updateMemberStateFromTopologyCoordinator_inlock();
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+ *result = Status::OK();
+ return;
}
- bool ReplicationCoordinatorImpl::isMasterForReportingPurposes() {
- if (_settings.usingReplSets()) {
- boost::lock_guard<boost::mutex> lock(_mutex);
- if (_getReplicationMode_inlock() == modeReplSet &&
- _getMemberState_inlock().primary()) {
- return true;
- }
- return false;
- }
-
- if (!_settings.slave)
- return true;
-
-
- // TODO(dannenberg) replAllDead is bad and should be removed when master slave is removed
- if (replAllDead) {
- return false;
- }
-
- if (_settings.master) {
- // if running with --master --slave, allow.
- return true;
- }
-
- return false;
+ // Step down failed. Keep waiting if we can, otherwise finish.
+ if (now >= waitUntil) {
+ *result = Status(ErrorCodes::ExceededTimeLimit,
+ str::stream() << "No electable secondaries caught up as of "
+ << dateToISOStringLocal(now));
+ return;
}
- bool ReplicationCoordinatorImpl::canAcceptWritesForDatabase(const StringData& dbName) {
- // _canAcceptNonLocalWrites is always true for standalone nodes, always false for nodes
- // started with --slave, and adjusted based on primary+drain state in replica sets.
- //
- // That is, stand-alone nodes, non-slave nodes and drained replica set primaries can always
- // accept writes. Similarly, writes are always permitted to the "local" database. Finally,
- // in the event that a node is started with --slave and --master, we allow writes unless the
- // master/slave system has set the replAllDead flag.
- if (_canAcceptNonLocalWrites) {
- return true;
- }
- if (dbName == "local") {
- return true;
+ if (_stepDownWaiters.empty()) {
+ StatusWith<ReplicationExecutor::EventHandle> reschedEvent = _replExecutor.makeEvent();
+ if (!reschedEvent.isOK()) {
+ *result = reschedEvent.getStatus();
+ return;
}
- return !replAllDead && _settings.master;
+ _stepDownWaiters.push_back(reschedEvent.getValue());
+ }
+ CBHStatus cbh = _replExecutor.onEvent(_stepDownWaiters.back(),
+ stdx::bind(&ReplicationCoordinatorImpl::_stepDownContinue,
+ this,
+ stdx::placeholders::_1,
+ finishedEvent,
+ txn,
+ waitUntil,
+ stepDownUntil,
+ force,
+ result));
+ if (!cbh.isOK()) {
+ *result = cbh.getStatus();
+ return;
}
+ allFinishedGuard.Dismiss();
+}
- Status ReplicationCoordinatorImpl::checkCanServeReadsFor(OperationContext* txn,
- const NamespaceString& ns,
- bool slaveOk) {
- if (txn->getClient()->isInDirectClient()) {
- return Status::OK();
- }
- if (canAcceptWritesForDatabase(ns.db())) {
- return Status::OK();
- }
- if (_settings.slave || _settings.master) {
- return Status::OK();
- }
- if (slaveOk) {
- if (_canServeNonLocalReads.loadRelaxed()) {
- return Status::OK();
- }
- return Status(
- ErrorCodes::NotMasterOrSecondaryCode,
- "not master or secondary; cannot currently read from this replSet member");
- }
- return Status(ErrorCodes::NotMasterNoSlaveOkCode, "not master and slaveOk=false");
+void ReplicationCoordinatorImpl::_handleTimePassing(
+ const ReplicationExecutor::CallbackData& cbData) {
+ if (!cbData.status.isOK()) {
+ return;
}
- bool ReplicationCoordinatorImpl::isInPrimaryOrSecondaryState() const {
- return _canServeNonLocalReads.loadRelaxed();
+ if (_topCoord->becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(_replExecutor.now())) {
+ _performPostMemberStateUpdateAction(kActionWinElection);
}
+}
- bool ReplicationCoordinatorImpl::shouldIgnoreUniqueIndex(const IndexDescriptor* idx) {
- if (!idx->unique()) {
- return false;
- }
- // Never ignore _id index
- if (idx->isIdIndex()) {
- return false;
- }
- if (nsToDatabaseSubstring(idx->parentNS()) == "local" ) {
- // always enforce on local
- return false;
- }
+bool ReplicationCoordinatorImpl::isMasterForReportingPurposes() {
+ if (_settings.usingReplSets()) {
boost::lock_guard<boost::mutex> lock(_mutex);
- if (_getReplicationMode_inlock() != modeReplSet) {
- return false;
- }
- // see SERVER-6671
- MemberState ms = _getMemberState_inlock();
- switch ( ms.s ) {
- case MemberState::RS_SECONDARY:
- case MemberState::RS_RECOVERING:
- case MemberState::RS_ROLLBACK:
- case MemberState::RS_STARTUP2:
+ if (_getReplicationMode_inlock() == modeReplSet && _getMemberState_inlock().primary()) {
return true;
- default:
- return false;
}
+ return false;
}
- OID ReplicationCoordinatorImpl::getElectionId() {
- boost::lock_guard<boost::mutex> lock(_mutex);
- return _electionId;
- }
+ if (!_settings.slave)
+ return true;
- OID ReplicationCoordinatorImpl::getMyRID() const {
- boost::lock_guard<boost::mutex> lock(_mutex);
- return _getMyRID_inlock();
- }
- OID ReplicationCoordinatorImpl::_getMyRID_inlock() const {
- return _myRID;
+ // TODO(dannenberg) replAllDead is bad and should be removed when master slave is removed
+ if (replAllDead) {
+ return false;
}
- int ReplicationCoordinatorImpl::getMyId() const {
- boost::lock_guard<boost::mutex> lock(_mutex);
- return _getMyId_inlock();
+ if (_settings.master) {
+ // if running with --master --slave, allow.
+ return true;
}
- int ReplicationCoordinatorImpl::_getMyId_inlock() const {
- const MemberConfig& self = _rsConfig.getMemberAt(_selfIndex);
- return self.getId();
- }
+ return false;
+}
- bool ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommand(
- BSONObjBuilder* cmdBuilder) {
- boost::lock_guard<boost::mutex> lock(_mutex);
- invariant(_rsConfig.isInitialized());
- // do not send updates if we have been removed from the config
- if (_selfIndex == -1) {
- return false;
- }
- cmdBuilder->append("replSetUpdatePosition", 1);
- // create an array containing objects each member connected to us and for ourself
- BSONArrayBuilder arrayBuilder(cmdBuilder->subarrayStart("optimes"));
- {
- for (SlaveInfoVector::const_iterator itr = _slaveInfo.begin();
- itr != _slaveInfo.end(); ++itr) {
- if (itr->opTime.isNull()) {
- // Don't include info on members we haven't heard from yet.
- continue;
- }
- BSONObjBuilder entry(arrayBuilder.subobjStart());
- entry.append("_id", itr->rid);
- entry.append("optime", itr->opTime);
- entry.append("memberId", itr->memberId);
- entry.append("cfgver", _rsConfig.getConfigVersion());
- // SERVER-14550 Even though the "config" field isn't used on the other end in 3.0,
- // we need to keep sending it for 2.6 compatibility.
- // TODO(spencer): Remove this after 3.0 is released.
- const MemberConfig* member = _rsConfig.findMemberByID(itr->memberId);
- fassert(18651, member); // We ensured the member existed in processHandshake.
- entry.append("config", member->toBSON(_rsConfig.getTagConfig()));
- }
- }
+bool ReplicationCoordinatorImpl::canAcceptWritesForDatabase(const StringData& dbName) {
+ // _canAcceptNonLocalWrites is always true for standalone nodes, always false for nodes
+ // started with --slave, and adjusted based on primary+drain state in replica sets.
+ //
+ // That is, stand-alone nodes, non-slave nodes and drained replica set primaries can always
+ // accept writes. Similarly, writes are always permitted to the "local" database. Finally,
+ // in the event that a node is started with --slave and --master, we allow writes unless the
+ // master/slave system has set the replAllDead flag.
+ if (_canAcceptNonLocalWrites) {
return true;
}
-
- void ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommandHandshakes(
- std::vector<BSONObj>* handshakes) {
- boost::lock_guard<boost::mutex> lock(_mutex);
- // do not send handshakes if we have been removed from the config
- if (_selfIndex == -1) {
- return;
- }
- // handshake objs for ourself and all chained members
- for (SlaveInfoVector::const_iterator itr = _slaveInfo.begin();
- itr != _slaveInfo.end(); ++itr) {
- if (!itr->rid.isSet()) {
- // Don't include info on members we haven't heard from yet.
- continue;
- }
-
- BSONObjBuilder cmd;
- cmd.append("replSetUpdatePosition", 1);
- {
- BSONObjBuilder subCmd (cmd.subobjStart("handshake"));
- subCmd.append("handshake", itr->rid);
- subCmd.append("member", itr->memberId);
- // SERVER-14550 Even though the "config" field isn't used on the other end in 3.0,
- // we need to keep sending it for 2.6 compatibility.
- // TODO(spencer): Remove this after 3.0 is released.
- const MemberConfig* member = _rsConfig.findMemberByID(itr->memberId);
- fassert(18650, member); // We ensured the member existed in processHandshake.
- subCmd.append("config", member->toBSON(_rsConfig.getTagConfig()));
- }
- handshakes->push_back(cmd.obj());
- }
+ if (dbName == "local") {
+ return true;
}
+ return !replAllDead && _settings.master;
+}
- Status ReplicationCoordinatorImpl::processReplSetGetStatus(BSONObjBuilder* response) {
- Status result(ErrorCodes::InternalError, "didn't set status in prepareStatusResponse");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&TopologyCoordinator::prepareStatusResponse,
- _topCoord.get(),
- stdx::placeholders::_1,
- _replExecutor.now(),
- time(0) - serverGlobalParams.started,
- getMyLastOptime(),
- response,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- }
- fassert(18640, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
-
- return result;
+Status ReplicationCoordinatorImpl::checkCanServeReadsFor(OperationContext* txn,
+ const NamespaceString& ns,
+ bool slaveOk) {
+ if (txn->getClient()->isInDirectClient()) {
+ return Status::OK();
}
-
- void ReplicationCoordinatorImpl::fillIsMasterForReplSet(IsMasterResponse* response) {
- invariant(getSettings().usingReplSets());
-
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_fillIsMasterForReplSet_finish,
- this,
- stdx::placeholders::_1,
- response));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- response->markAsShutdownInProgress();
- return;
- }
- fassert(28602, cbh.getStatus());
-
- _replExecutor.wait(cbh.getValue());
- if (isWaitingForApplierToDrain()) {
- // Report that we are secondary to ismaster callers until drain completes.
- response->setIsMaster(false);
- response->setIsSecondary(true);
- }
+ if (canAcceptWritesForDatabase(ns.db())) {
+ return Status::OK();
}
-
- void ReplicationCoordinatorImpl::_fillIsMasterForReplSet_finish(
- const ReplicationExecutor::CallbackData& cbData, IsMasterResponse* response) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- response->markAsShutdownInProgress();
- return;
- }
- _topCoord->fillIsMasterForReplSet(response);
+ if (_settings.slave || _settings.master) {
+ return Status::OK();
}
-
- void ReplicationCoordinatorImpl::appendSlaveInfoData(BSONObjBuilder* result) {
- boost::lock_guard<boost::mutex> lock(_mutex);
- BSONArrayBuilder slaves(result->subarrayStart("slaves"));
- {
- for (SlaveInfoVector::const_iterator itr = _slaveInfo.begin();
- itr != _slaveInfo.end(); ++itr) {
- BSONObjBuilder entry(slaves.subobjStart());
- entry.append("rid", itr->rid);
- entry.append("optime", itr->opTime);
- entry.append("host", itr->hostAndPort.toString());
- if (_getReplicationMode_inlock() == modeReplSet) {
- if (_selfIndex == -1) {
- continue;
- }
- invariant(itr->memberId >= 0);
- entry.append("memberId", itr->memberId);
- }
- }
+ if (slaveOk) {
+ if (_canServeNonLocalReads.loadRelaxed()) {
+ return Status::OK();
}
+ return Status(ErrorCodes::NotMasterOrSecondaryCode,
+ "not master or secondary; cannot currently read from this replSet member");
}
+ return Status(ErrorCodes::NotMasterNoSlaveOkCode, "not master and slaveOk=false");
+}
- void ReplicationCoordinatorImpl::processReplSetGetConfig(BSONObjBuilder* result) {
- boost::lock_guard<boost::mutex> lock(_mutex);
- result->append("config", _rsConfig.toBSON());
- }
+bool ReplicationCoordinatorImpl::isInPrimaryOrSecondaryState() const {
+ return _canServeNonLocalReads.loadRelaxed();
+}
- bool ReplicationCoordinatorImpl::getMaintenanceMode() {
- bool maintenanceMode(false);
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_getMaintenanceMode_helper,
- this,
- stdx::placeholders::_1,
- &maintenanceMode));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return false;
- }
- fassert(18811, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return maintenanceMode;
+bool ReplicationCoordinatorImpl::shouldIgnoreUniqueIndex(const IndexDescriptor* idx) {
+ if (!idx->unique()) {
+ return false;
}
-
- void ReplicationCoordinatorImpl::_getMaintenanceMode_helper(
- const ReplicationExecutor::CallbackData& cbData,
- bool* maintenanceMode) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
- *maintenanceMode = _topCoord->getMaintenanceCount() > 0;
+ // Never ignore _id index
+ if (idx->isIdIndex()) {
+ return false;
}
-
- Status ReplicationCoordinatorImpl::setMaintenanceMode(bool activate) {
- if (_getReplicationMode_inlock() != modeReplSet) {
- return Status(ErrorCodes::NoReplicationEnabled,
- "can only set maintenance mode on replica set members");
- }
-
- Status result(ErrorCodes::InternalError, "didn't set status in _setMaintenanceMode_helper");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_setMaintenanceMode_helper,
- this,
- stdx::placeholders::_1,
- activate,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return cbh.getStatus();
- }
- fassert(18698, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return result;
+ if (nsToDatabaseSubstring(idx->parentNS()) == "local") {
+ // always enforce on local
+ return false;
}
-
- void ReplicationCoordinatorImpl::_setMaintenanceMode_helper(
- const ReplicationExecutor::CallbackData& cbData,
- bool activate,
- Status* result) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
- return;
- }
-
- boost::unique_lock<boost::mutex> lk(_mutex);
- if (_getMemberState_inlock().primary()) {
- *result = Status(ErrorCodes::NotSecondary, "primaries can't modify maintenance mode");
- return;
- }
-
- int curMaintenanceCalls = _topCoord->getMaintenanceCount();
- if (activate) {
- log() << "replSet going into maintenance mode with " << curMaintenanceCalls
- << " other maintenance mode tasks in progress" << rsLog;
- _topCoord->adjustMaintenanceCountBy(1);
- }
- else if (curMaintenanceCalls > 0) {
- invariant(_topCoord->getRole() == TopologyCoordinator::Role::follower);
-
- _topCoord->adjustMaintenanceCountBy(-1);
-
- log() << "leaving maintenance mode (" << curMaintenanceCalls-1
- << " other maintenance mode tasks ongoing)" << rsLog;
- } else {
- warning() << "Attempted to leave maintenance mode but it is not currently active";
- *result = Status(ErrorCodes::OperationFailed, "already out of maintenance mode");
- return;
- }
-
- const PostMemberStateUpdateAction action =
- _updateMemberStateFromTopologyCoordinator_inlock();
- *result = Status::OK();
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+ boost::lock_guard<boost::mutex> lock(_mutex);
+ if (_getReplicationMode_inlock() != modeReplSet) {
+ return false;
}
-
- Status ReplicationCoordinatorImpl::processReplSetSyncFrom(const HostAndPort& target,
- BSONObjBuilder* resultObj) {
- Status result(ErrorCodes::InternalError, "didn't set status in prepareSyncFromResponse");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&TopologyCoordinator::prepareSyncFromResponse,
- _topCoord.get(),
- stdx::placeholders::_1,
- target,
- _getMyLastOptime_inlock(),
- resultObj,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- }
- fassert(18649, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return result;
- }
-
- Status ReplicationCoordinatorImpl::processReplSetFreeze(int secs, BSONObjBuilder* resultObj) {
- Status result(ErrorCodes::InternalError, "didn't set status in prepareFreezeResponse");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_processReplSetFreeze_finish,
- this,
- stdx::placeholders::_1,
- secs,
- resultObj,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return cbh.getStatus();
- }
- fassert(18641, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return result;
- }
-
- void ReplicationCoordinatorImpl::_processReplSetFreeze_finish(
- const ReplicationExecutor::CallbackData& cbData,
- int secs,
- BSONObjBuilder* response,
- Status* result) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
- return;
- }
-
- _topCoord->prepareFreezeResponse(_replExecutor.now(), secs, response);
-
- if (_topCoord->getRole() == TopologyCoordinator::Role::candidate) {
- // If we just unfroze and ended our stepdown period and we are a one node replica set,
- // the topology coordinator will have gone into the candidate role to signal that we
- // need to elect ourself.
- _performPostMemberStateUpdateAction(kActionWinElection);
- }
- *result = Status::OK();
+ // see SERVER-6671
+ MemberState ms = _getMemberState_inlock();
+ switch (ms.s) {
+ case MemberState::RS_SECONDARY:
+ case MemberState::RS_RECOVERING:
+ case MemberState::RS_ROLLBACK:
+ case MemberState::RS_STARTUP2:
+ return true;
+ default:
+ return false;
}
-
- Status ReplicationCoordinatorImpl::processHeartbeat(const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response) {
+}
+
+OID ReplicationCoordinatorImpl::getElectionId() {
+ boost::lock_guard<boost::mutex> lock(_mutex);
+ return _electionId;
+}
+
+OID ReplicationCoordinatorImpl::getMyRID() const {
+ boost::lock_guard<boost::mutex> lock(_mutex);
+ return _getMyRID_inlock();
+}
+
+OID ReplicationCoordinatorImpl::_getMyRID_inlock() const {
+ return _myRID;
+}
+
+int ReplicationCoordinatorImpl::getMyId() const {
+ boost::lock_guard<boost::mutex> lock(_mutex);
+ return _getMyId_inlock();
+}
+
+int ReplicationCoordinatorImpl::_getMyId_inlock() const {
+ const MemberConfig& self = _rsConfig.getMemberAt(_selfIndex);
+ return self.getId();
+}
+
+bool ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) {
+ boost::lock_guard<boost::mutex> lock(_mutex);
+ invariant(_rsConfig.isInitialized());
+ // do not send updates if we have been removed from the config
+ if (_selfIndex == -1) {
+ return false;
+ }
+ cmdBuilder->append("replSetUpdatePosition", 1);
+ // create an array containing objects each member connected to us and for ourself
+ BSONArrayBuilder arrayBuilder(cmdBuilder->subarrayStart("optimes"));
+ {
+ for (SlaveInfoVector::const_iterator itr = _slaveInfo.begin(); itr != _slaveInfo.end();
+ ++itr) {
+ if (itr->opTime.isNull()) {
+ // Don't include info on members we haven't heard from yet.
+ continue;
+ }
+ BSONObjBuilder entry(arrayBuilder.subobjStart());
+ entry.append("_id", itr->rid);
+ entry.append("optime", itr->opTime);
+ entry.append("memberId", itr->memberId);
+ entry.append("cfgver", _rsConfig.getConfigVersion());
+ // SERVER-14550 Even though the "config" field isn't used on the other end in 3.0,
+ // we need to keep sending it for 2.6 compatibility.
+ // TODO(spencer): Remove this after 3.0 is released.
+ const MemberConfig* member = _rsConfig.findMemberByID(itr->memberId);
+ fassert(18651, member); // We ensured the member existed in processHandshake.
+ entry.append("config", member->toBSON(_rsConfig.getTagConfig()));
+ }
+ }
+ return true;
+}
+
+void ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommandHandshakes(
+ std::vector<BSONObj>* handshakes) {
+ boost::lock_guard<boost::mutex> lock(_mutex);
+ // do not send handshakes if we have been removed from the config
+ if (_selfIndex == -1) {
+ return;
+ }
+ // handshake objs for ourself and all chained members
+ for (SlaveInfoVector::const_iterator itr = _slaveInfo.begin(); itr != _slaveInfo.end(); ++itr) {
+ if (!itr->rid.isSet()) {
+ // Don't include info on members we haven't heard from yet.
+ continue;
+ }
+
+ BSONObjBuilder cmd;
+ cmd.append("replSetUpdatePosition", 1);
{
- boost::lock_guard<boost::mutex> lock(_mutex);
- if (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
- return Status(ErrorCodes::NotYetInitialized,
- "Received heartbeat while still initializing replication system");
+ BSONObjBuilder subCmd(cmd.subobjStart("handshake"));
+ subCmd.append("handshake", itr->rid);
+ subCmd.append("member", itr->memberId);
+ // SERVER-14550 Even though the "config" field isn't used on the other end in 3.0,
+ // we need to keep sending it for 2.6 compatibility.
+ // TODO(spencer): Remove this after 3.0 is released.
+ const MemberConfig* member = _rsConfig.findMemberByID(itr->memberId);
+ fassert(18650, member); // We ensured the member existed in processHandshake.
+ subCmd.append("config", member->toBSON(_rsConfig.getTagConfig()));
+ }
+ handshakes->push_back(cmd.obj());
+ }
+}
+
+Status ReplicationCoordinatorImpl::processReplSetGetStatus(BSONObjBuilder* response) {
+ Status result(ErrorCodes::InternalError, "didn't set status in prepareStatusResponse");
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&TopologyCoordinator::prepareStatusResponse,
+ _topCoord.get(),
+ stdx::placeholders::_1,
+ _replExecutor.now(),
+ time(0) - serverGlobalParams.started,
+ getMyLastOptime(),
+ response,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ }
+ fassert(18640, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+
+ return result;
+}
+
+void ReplicationCoordinatorImpl::fillIsMasterForReplSet(IsMasterResponse* response) {
+ invariant(getSettings().usingReplSets());
+
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_fillIsMasterForReplSet_finish,
+ this,
+ stdx::placeholders::_1,
+ response));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ response->markAsShutdownInProgress();
+ return;
+ }
+ fassert(28602, cbh.getStatus());
+
+ _replExecutor.wait(cbh.getValue());
+ if (isWaitingForApplierToDrain()) {
+ // Report that we are secondary to ismaster callers until drain completes.
+ response->setIsMaster(false);
+ response->setIsSecondary(true);
+ }
+}
+
+void ReplicationCoordinatorImpl::_fillIsMasterForReplSet_finish(
+ const ReplicationExecutor::CallbackData& cbData, IsMasterResponse* response) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ response->markAsShutdownInProgress();
+ return;
+ }
+ _topCoord->fillIsMasterForReplSet(response);
+}
+
+void ReplicationCoordinatorImpl::appendSlaveInfoData(BSONObjBuilder* result) {
+ boost::lock_guard<boost::mutex> lock(_mutex);
+ BSONArrayBuilder slaves(result->subarrayStart("slaves"));
+ {
+ for (SlaveInfoVector::const_iterator itr = _slaveInfo.begin(); itr != _slaveInfo.end();
+ ++itr) {
+ BSONObjBuilder entry(slaves.subobjStart());
+ entry.append("rid", itr->rid);
+ entry.append("optime", itr->opTime);
+ entry.append("host", itr->hostAndPort.toString());
+ if (_getReplicationMode_inlock() == modeReplSet) {
+ if (_selfIndex == -1) {
+ continue;
+ }
+ invariant(itr->memberId >= 0);
+ entry.append("memberId", itr->memberId);
}
}
-
- Status result(ErrorCodes::InternalError, "didn't set status in prepareHeartbeatResponse");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_processHeartbeatFinish,
- this,
- stdx::placeholders::_1,
- args,
- response,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- }
- fassert(18508, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return result;
}
+}
- void ReplicationCoordinatorImpl::_processHeartbeatFinish(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response,
- Status* outStatus) {
+void ReplicationCoordinatorImpl::processReplSetGetConfig(BSONObjBuilder* result) {
+ boost::lock_guard<boost::mutex> lock(_mutex);
+ result->append("config", _rsConfig.toBSON());
+}
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- *outStatus = Status(ErrorCodes::ShutdownInProgress, "Replication shutdown in progress");
- return;
- }
- fassert(18910, cbData.status);
- const Date_t now = _replExecutor.now();
- *outStatus = _topCoord->prepareHeartbeatResponse(
- now,
- args,
- _settings.ourSetName(),
- getMyLastOptime(),
- response);
- if ((outStatus->isOK() || *outStatus == ErrorCodes::InvalidReplicaSetConfig) &&
- _selfIndex < 0) {
- // If this node does not belong to the configuration it knows about, send heartbeats
- // back to any node that sends us a heartbeat, in case one of those remote nodes has
- // a configuration that contains us. Chances are excellent that it will, since that
- // is the only reason for a remote node to send this node a heartbeat request.
- if (!args.getSenderHost().empty() && _seedList.insert(args.getSenderHost()).second) {
- _scheduleHeartbeatToTarget(args.getSenderHost(), -1, now);
- }
- }
+bool ReplicationCoordinatorImpl::getMaintenanceMode() {
+ bool maintenanceMode(false);
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_getMaintenanceMode_helper,
+ this,
+ stdx::placeholders::_1,
+ &maintenanceMode));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return false;
}
-
- Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* txn,
- const ReplSetReconfigArgs& args,
- BSONObjBuilder* resultObj) {
-
- log() << "replSetReconfig admin command received from client";
-
- boost::unique_lock<boost::mutex> lk(_mutex);
-
- while (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
- _rsConfigStateChange.wait(lk);
- }
-
- switch (_rsConfigState) {
+ fassert(18811, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return maintenanceMode;
+}
+
+void ReplicationCoordinatorImpl::_getMaintenanceMode_helper(
+ const ReplicationExecutor::CallbackData& cbData, bool* maintenanceMode) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+ *maintenanceMode = _topCoord->getMaintenanceCount() > 0;
+}
+
+Status ReplicationCoordinatorImpl::setMaintenanceMode(bool activate) {
+ if (_getReplicationMode_inlock() != modeReplSet) {
+ return Status(ErrorCodes::NoReplicationEnabled,
+ "can only set maintenance mode on replica set members");
+ }
+
+ Status result(ErrorCodes::InternalError, "didn't set status in _setMaintenanceMode_helper");
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_setMaintenanceMode_helper,
+ this,
+ stdx::placeholders::_1,
+ activate,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return cbh.getStatus();
+ }
+ fassert(18698, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_setMaintenanceMode_helper(
+ const ReplicationExecutor::CallbackData& cbData, bool activate, Status* result) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
+ return;
+ }
+
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ if (_getMemberState_inlock().primary()) {
+ *result = Status(ErrorCodes::NotSecondary, "primaries can't modify maintenance mode");
+ return;
+ }
+
+ int curMaintenanceCalls = _topCoord->getMaintenanceCount();
+ if (activate) {
+ log() << "replSet going into maintenance mode with " << curMaintenanceCalls
+ << " other maintenance mode tasks in progress" << rsLog;
+ _topCoord->adjustMaintenanceCountBy(1);
+ } else if (curMaintenanceCalls > 0) {
+ invariant(_topCoord->getRole() == TopologyCoordinator::Role::follower);
+
+ _topCoord->adjustMaintenanceCountBy(-1);
+
+ log() << "leaving maintenance mode (" << curMaintenanceCalls - 1
+ << " other maintenance mode tasks ongoing)" << rsLog;
+ } else {
+ warning() << "Attempted to leave maintenance mode but it is not currently active";
+ *result = Status(ErrorCodes::OperationFailed, "already out of maintenance mode");
+ return;
+ }
+
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator_inlock();
+ *result = Status::OK();
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+}
+
+Status ReplicationCoordinatorImpl::processReplSetSyncFrom(const HostAndPort& target,
+ BSONObjBuilder* resultObj) {
+ Status result(ErrorCodes::InternalError, "didn't set status in prepareSyncFromResponse");
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&TopologyCoordinator::prepareSyncFromResponse,
+ _topCoord.get(),
+ stdx::placeholders::_1,
+ target,
+ _getMyLastOptime_inlock(),
+ resultObj,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ }
+ fassert(18649, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+Status ReplicationCoordinatorImpl::processReplSetFreeze(int secs, BSONObjBuilder* resultObj) {
+ Status result(ErrorCodes::InternalError, "didn't set status in prepareFreezeResponse");
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_processReplSetFreeze_finish,
+ this,
+ stdx::placeholders::_1,
+ secs,
+ resultObj,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return cbh.getStatus();
+ }
+ fassert(18641, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_processReplSetFreeze_finish(
+ const ReplicationExecutor::CallbackData& cbData,
+ int secs,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
+ return;
+ }
+
+ _topCoord->prepareFreezeResponse(_replExecutor.now(), secs, response);
+
+ if (_topCoord->getRole() == TopologyCoordinator::Role::candidate) {
+ // If we just unfroze and ended our stepdown period and we are a one node replica set,
+ // the topology coordinator will have gone into the candidate role to signal that we
+ // need to elect ourself.
+ _performPostMemberStateUpdateAction(kActionWinElection);
+ }
+ *result = Status::OK();
+}
+
+Status ReplicationCoordinatorImpl::processHeartbeat(const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response) {
+ {
+ boost::lock_guard<boost::mutex> lock(_mutex);
+ if (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
+ return Status(ErrorCodes::NotYetInitialized,
+ "Received heartbeat while still initializing replication system");
+ }
+ }
+
+ Status result(ErrorCodes::InternalError, "didn't set status in prepareHeartbeatResponse");
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_processHeartbeatFinish,
+ this,
+ stdx::placeholders::_1,
+ args,
+ response,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ }
+ fassert(18508, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_processHeartbeatFinish(
+ const ReplicationExecutor::CallbackData& cbData,
+ const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response,
+ Status* outStatus) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ *outStatus = Status(ErrorCodes::ShutdownInProgress, "Replication shutdown in progress");
+ return;
+ }
+ fassert(18910, cbData.status);
+ const Date_t now = _replExecutor.now();
+ *outStatus = _topCoord->prepareHeartbeatResponse(
+ now, args, _settings.ourSetName(), getMyLastOptime(), response);
+ if ((outStatus->isOK() || *outStatus == ErrorCodes::InvalidReplicaSetConfig) &&
+ _selfIndex < 0) {
+ // If this node does not belong to the configuration it knows about, send heartbeats
+ // back to any node that sends us a heartbeat, in case one of those remote nodes has
+ // a configuration that contains us. Chances are excellent that it will, since that
+ // is the only reason for a remote node to send this node a heartbeat request.
+ if (!args.getSenderHost().empty() && _seedList.insert(args.getSenderHost()).second) {
+ _scheduleHeartbeatToTarget(args.getSenderHost(), -1, now);
+ }
+ }
+}
+
+Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* txn,
+ const ReplSetReconfigArgs& args,
+ BSONObjBuilder* resultObj) {
+ log() << "replSetReconfig admin command received from client";
+
+ boost::unique_lock<boost::mutex> lk(_mutex);
+
+ while (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
+ _rsConfigStateChange.wait(lk);
+ }
+
+ switch (_rsConfigState) {
case kConfigSteady:
break;
case kConfigUninitialized:
return Status(ErrorCodes::NotYetInitialized,
"Node not yet initialized; use the replSetInitiate command");
case kConfigReplicationDisabled:
- invariant(false); // should be unreachable due to !_settings.usingReplSets() check above
+ invariant(
+ false); // should be unreachable due to !_settings.usingReplSets() check above
case kConfigInitiating:
case kConfigReconfiguring:
case kConfigHBReconfiguring:
@@ -1690,279 +1638,260 @@ namespace {
default:
severe() << "Unexpected _rsConfigState " << int(_rsConfigState);
fassertFailed(18914);
- }
+ }
- invariant(_rsConfig.isInitialized());
+ invariant(_rsConfig.isInitialized());
- if (!args.force && !_getMemberState_inlock().primary()) {
- return Status(ErrorCodes::NotMaster, str::stream() <<
- "replSetReconfig should only be run on PRIMARY, but my state is " <<
- _getMemberState_inlock().toString() <<
- "; use the \"force\" argument to override");
- }
+ if (!args.force && !_getMemberState_inlock().primary()) {
+ return Status(ErrorCodes::NotMaster,
+ str::stream()
+ << "replSetReconfig should only be run on PRIMARY, but my state is "
+ << _getMemberState_inlock().toString()
+ << "; use the \"force\" argument to override");
+ }
- _setConfigState_inlock(kConfigReconfiguring);
- ScopeGuard configStateGuard = MakeGuard(
- lockAndCall,
- &lk,
- stdx::bind(&ReplicationCoordinatorImpl::_setConfigState_inlock,
- this,
- kConfigSteady));
+ _setConfigState_inlock(kConfigReconfiguring);
+ ScopeGuard configStateGuard = MakeGuard(
+ lockAndCall,
+ &lk,
+ stdx::bind(&ReplicationCoordinatorImpl::_setConfigState_inlock, this, kConfigSteady));
- ReplicaSetConfig oldConfig = _rsConfig;
- lk.unlock();
+ ReplicaSetConfig oldConfig = _rsConfig;
+ lk.unlock();
- ReplicaSetConfig newConfig;
- BSONObj newConfigObj = args.newConfigObj;
- if (args.force) {
- newConfigObj = incrementConfigVersionByRandom(newConfigObj);
- }
- Status status = newConfig.initialize(newConfigObj);
- if (!status.isOK()) {
- error() << "replSetReconfig got " << status << " while parsing " << newConfigObj;
- return Status(ErrorCodes::InvalidReplicaSetConfig, status.reason());;
- }
- if (newConfig.getReplSetName() != _settings.ourSetName()) {
- str::stream errmsg;
- errmsg << "Attempting to reconfigure a replica set with name " <<
- newConfig.getReplSetName() << ", but command line reports " <<
- _settings.ourSetName() << "; rejecting";
- error() << std::string(errmsg);
- return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
- }
-
- StatusWith<int> myIndex = validateConfigForReconfig(
- _externalState.get(),
- oldConfig,
- newConfig,
- args.force);
- if (!myIndex.isOK()) {
- error() << "replSetReconfig got " << myIndex.getStatus() << " while validating " <<
- newConfigObj;
- return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- myIndex.getStatus().reason());
- }
+ ReplicaSetConfig newConfig;
+ BSONObj newConfigObj = args.newConfigObj;
+ if (args.force) {
+ newConfigObj = incrementConfigVersionByRandom(newConfigObj);
+ }
+ Status status = newConfig.initialize(newConfigObj);
+ if (!status.isOK()) {
+ error() << "replSetReconfig got " << status << " while parsing " << newConfigObj;
+ return Status(ErrorCodes::InvalidReplicaSetConfig, status.reason());
+ ;
+ }
+ if (newConfig.getReplSetName() != _settings.ourSetName()) {
+ str::stream errmsg;
+ errmsg << "Attempting to reconfigure a replica set with name " << newConfig.getReplSetName()
+ << ", but command line reports " << _settings.ourSetName() << "; rejecting";
+ error() << std::string(errmsg);
+ return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
+ }
- log() << "replSetReconfig config object with " << newConfig.getNumMembers() <<
- " members parses ok";
+ StatusWith<int> myIndex =
+ validateConfigForReconfig(_externalState.get(), oldConfig, newConfig, args.force);
+ if (!myIndex.isOK()) {
+ error() << "replSetReconfig got " << myIndex.getStatus() << " while validating "
+ << newConfigObj;
+ return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ myIndex.getStatus().reason());
+ }
- if (!args.force) {
- status = checkQuorumForReconfig(&_replExecutor,
- newConfig,
- myIndex.getValue());
- if (!status.isOK()) {
- error() << "replSetReconfig failed; " << status;
- return status;
- }
- }
+ log() << "replSetReconfig config object with " << newConfig.getNumMembers()
+ << " members parses ok";
- status = _externalState->storeLocalConfigDocument(txn, newConfig.toBSON());
+ if (!args.force) {
+ status = checkQuorumForReconfig(&_replExecutor, newConfig, myIndex.getValue());
if (!status.isOK()) {
- error() << "replSetReconfig failed to store config document; " << status;
+ error() << "replSetReconfig failed; " << status;
return status;
}
-
- const stdx::function<void (const ReplicationExecutor::CallbackData&)> reconfigFinishFn(
- stdx::bind(&ReplicationCoordinatorImpl::_finishReplSetReconfig,
- this,
- stdx::placeholders::_1,
- newConfig,
- myIndex.getValue()));
-
- // If it's a force reconfig, the primary node may not be electable after the configuration
- // change. In case we are that primary node, finish the reconfig under the global lock,
- // so that the step down occurs safely.
- CBHStatus cbh =
- args.force ?
- _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn) :
- _replExecutor.scheduleWork(reconfigFinishFn);
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return status;
- }
- fassert(18824, cbh.getStatus());
- configStateGuard.Dismiss();
- _replExecutor.wait(cbh.getValue());
- return Status::OK();
}
- void ReplicationCoordinatorImpl::_finishReplSetReconfig(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplicaSetConfig& newConfig,
- int myIndex) {
-
- boost::unique_lock<boost::mutex> lk(_mutex);
- invariant(_rsConfigState == kConfigReconfiguring);
- invariant(_rsConfig.isInitialized());
- const PostMemberStateUpdateAction action = _setCurrentRSConfig_inlock(newConfig, myIndex);
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+ status = _externalState->storeLocalConfigDocument(txn, newConfig.toBSON());
+ if (!status.isOK()) {
+ error() << "replSetReconfig failed to store config document; " << status;
+ return status;
}
- Status ReplicationCoordinatorImpl::processReplSetInitiate(OperationContext* txn,
- const BSONObj& configObj,
- BSONObjBuilder* resultObj) {
- log() << "replSetInitiate admin command received from client";
-
- boost::unique_lock<boost::mutex> lk(_mutex);
- if (!_settings.usingReplSets()) {
- return Status(ErrorCodes::NoReplicationEnabled, "server is not running with --replSet");
- }
-
- while (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
- _rsConfigStateChange.wait(lk);
- }
-
- if (_rsConfigState != kConfigUninitialized) {
- resultObj->append("info",
- "try querying local.system.replset to see current configuration");
- return Status(ErrorCodes::AlreadyInitialized, "already initialized");
- }
- invariant(!_rsConfig.isInitialized());
- _setConfigState_inlock(kConfigInitiating);
- ScopeGuard configStateGuard = MakeGuard(
- lockAndCall,
- &lk,
- stdx::bind(&ReplicationCoordinatorImpl::_setConfigState_inlock,
- this,
- kConfigUninitialized));
- lk.unlock();
-
- ReplicaSetConfig newConfig;
- Status status = newConfig.initialize(configObj);
- if (!status.isOK()) {
- error() << "replSet initiate got " << status << " while parsing " << configObj;
- return Status(ErrorCodes::InvalidReplicaSetConfig, status.reason());;
- }
- if (newConfig.getReplSetName() != _settings.ourSetName()) {
- str::stream errmsg;
- errmsg << "Attempting to initiate a replica set with name " <<
- newConfig.getReplSetName() << ", but command line reports " <<
- _settings.ourSetName() << "; rejecting";
- error() << std::string(errmsg);
- return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
- }
-
- StatusWith<int> myIndex = validateConfigForInitiate(_externalState.get(), newConfig);
- if (!myIndex.isOK()) {
- error() << "replSet initiate got " << myIndex.getStatus() << " while validating " <<
- configObj;
- return Status(ErrorCodes::InvalidReplicaSetConfig, myIndex.getStatus().reason());
- }
-
- log() << "replSet replSetInitiate config object with " << newConfig.getNumMembers() <<
- " members parses ok";
-
- status = checkQuorumForInitiate(
- &_replExecutor,
- newConfig,
- myIndex.getValue());
-
- if (!status.isOK()) {
- error() << "replSet replSetInitiate failed; " << status;
- return status;
- }
+ const stdx::function<void(const ReplicationExecutor::CallbackData&)> reconfigFinishFn(
+ stdx::bind(&ReplicationCoordinatorImpl::_finishReplSetReconfig,
+ this,
+ stdx::placeholders::_1,
+ newConfig,
+ myIndex.getValue()));
- status = _externalState->storeLocalConfigDocument(txn, newConfig.toBSON());
- if (!status.isOK()) {
- error() << "replSet replSetInitiate failed to store config document; " << status;
- return status;
- }
-
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_finishReplSetInitiate,
- this,
- stdx::placeholders::_1,
- newConfig,
- myIndex.getValue()));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return status;
- }
- configStateGuard.Dismiss();
- fassert(18654, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
-
- if (status.isOK()) {
- // Create the oplog with the first entry, and start repl threads.
- _externalState->initiateOplog(txn);
- _externalState->startThreads();
- }
+ // If it's a force reconfig, the primary node may not be electable after the configuration
+ // change. In case we are that primary node, finish the reconfig under the global lock,
+ // so that the step down occurs safely.
+ CBHStatus cbh = args.force ? _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn)
+ : _replExecutor.scheduleWork(reconfigFinishFn);
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return status;
+ }
+ fassert(18824, cbh.getStatus());
+ configStateGuard.Dismiss();
+ _replExecutor.wait(cbh.getValue());
+ return Status::OK();
+}
+
+void ReplicationCoordinatorImpl::_finishReplSetReconfig(
+ const ReplicationExecutor::CallbackData& cbData,
+ const ReplicaSetConfig& newConfig,
+ int myIndex) {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ invariant(_rsConfigState == kConfigReconfiguring);
+ invariant(_rsConfig.isInitialized());
+ const PostMemberStateUpdateAction action = _setCurrentRSConfig_inlock(newConfig, myIndex);
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+}
+
+Status ReplicationCoordinatorImpl::processReplSetInitiate(OperationContext* txn,
+ const BSONObj& configObj,
+ BSONObjBuilder* resultObj) {
+ log() << "replSetInitiate admin command received from client";
+
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ if (!_settings.usingReplSets()) {
+ return Status(ErrorCodes::NoReplicationEnabled, "server is not running with --replSet");
+ }
+
+ while (_rsConfigState == kConfigPreStart || _rsConfigState == kConfigStartingUp) {
+ _rsConfigStateChange.wait(lk);
+ }
+
+ if (_rsConfigState != kConfigUninitialized) {
+ resultObj->append("info", "try querying local.system.replset to see current configuration");
+ return Status(ErrorCodes::AlreadyInitialized, "already initialized");
+ }
+ invariant(!_rsConfig.isInitialized());
+ _setConfigState_inlock(kConfigInitiating);
+ ScopeGuard configStateGuard = MakeGuard(
+ lockAndCall,
+ &lk,
+ stdx::bind(
+ &ReplicationCoordinatorImpl::_setConfigState_inlock, this, kConfigUninitialized));
+ lk.unlock();
+
+ ReplicaSetConfig newConfig;
+ Status status = newConfig.initialize(configObj);
+ if (!status.isOK()) {
+ error() << "replSet initiate got " << status << " while parsing " << configObj;
+ return Status(ErrorCodes::InvalidReplicaSetConfig, status.reason());
+ ;
+ }
+ if (newConfig.getReplSetName() != _settings.ourSetName()) {
+ str::stream errmsg;
+ errmsg << "Attempting to initiate a replica set with name " << newConfig.getReplSetName()
+ << ", but command line reports " << _settings.ourSetName() << "; rejecting";
+ error() << std::string(errmsg);
+ return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
+ }
+
+ StatusWith<int> myIndex = validateConfigForInitiate(_externalState.get(), newConfig);
+ if (!myIndex.isOK()) {
+ error() << "replSet initiate got " << myIndex.getStatus() << " while validating "
+ << configObj;
+ return Status(ErrorCodes::InvalidReplicaSetConfig, myIndex.getStatus().reason());
+ }
+
+ log() << "replSet replSetInitiate config object with " << newConfig.getNumMembers()
+ << " members parses ok";
+
+ status = checkQuorumForInitiate(&_replExecutor, newConfig, myIndex.getValue());
+
+ if (!status.isOK()) {
+ error() << "replSet replSetInitiate failed; " << status;
return status;
}
- void ReplicationCoordinatorImpl::_finishReplSetInitiate(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplicaSetConfig& newConfig,
- int myIndex) {
-
- boost::unique_lock<boost::mutex> lk(_mutex);
- invariant(_rsConfigState == kConfigInitiating);
- invariant(!_rsConfig.isInitialized());
- const PostMemberStateUpdateAction action = _setCurrentRSConfig_inlock(newConfig, myIndex);
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+ status = _externalState->storeLocalConfigDocument(txn, newConfig.toBSON());
+ if (!status.isOK()) {
+ error() << "replSet replSetInitiate failed to store config document; " << status;
+ return status;
}
- void ReplicationCoordinatorImpl::_setConfigState_inlock(ConfigState newState) {
- if (newState != _rsConfigState) {
- _rsConfigState = newState;
- _rsConfigStateChange.notify_all();
- }
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_finishReplSetInitiate,
+ this,
+ stdx::placeholders::_1,
+ newConfig,
+ myIndex.getValue()));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return status;
}
+ configStateGuard.Dismiss();
+ fassert(18654, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
- ReplicationCoordinatorImpl::PostMemberStateUpdateAction
- ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator_inlock() {
- const MemberState newState = _topCoord->getMemberState();
- if (newState == _memberState) {
- if (_topCoord->getRole() == TopologyCoordinator::Role::candidate) {
- invariant(_rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 &&
- _rsConfig.getMemberAt(0).isElectable());
- return kActionWinElection;
- }
- return kActionNone;
- }
- PostMemberStateUpdateAction result;
- if (_memberState.primary() || newState.removed()) {
- // Wake up any threads blocked in awaitReplication, close connections, etc.
- for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
- it != _replicationWaiterList.end(); ++it) {
- WaiterInfo* info = *it;
- info->master = false;
- info->condVar->notify_all();
- }
- _canAcceptNonLocalWrites = false;
- result = kActionCloseAllConnections;
- }
- else {
- if (_memberState.secondary() && !newState.primary()) {
- // Switching out of SECONDARY, but not to PRIMARY.
- _canServeNonLocalReads.store(0U);
- }
- else if (newState.secondary()) {
- // Switching into SECONDARY, but not from PRIMARY.
- _canServeNonLocalReads.store(1U);
- }
- result = kActionChooseNewSyncSource;
- }
- if (newState.secondary() && _topCoord->getRole() == TopologyCoordinator::Role::candidate) {
- // When transitioning to SECONDARY, the only way for _topCoord to report the candidate
- // role is if the configuration represents a single-node replica set. In that case, the
- // overriding requirement is to elect this singleton node primary.
- invariant(_rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 &&
+ if (status.isOK()) {
+ // Create the oplog with the first entry, and start repl threads.
+ _externalState->initiateOplog(txn);
+ _externalState->startThreads();
+ }
+ return status;
+}
+
+void ReplicationCoordinatorImpl::_finishReplSetInitiate(
+ const ReplicationExecutor::CallbackData& cbData,
+ const ReplicaSetConfig& newConfig,
+ int myIndex) {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ invariant(_rsConfigState == kConfigInitiating);
+ invariant(!_rsConfig.isInitialized());
+ const PostMemberStateUpdateAction action = _setCurrentRSConfig_inlock(newConfig, myIndex);
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+}
+
+void ReplicationCoordinatorImpl::_setConfigState_inlock(ConfigState newState) {
+ if (newState != _rsConfigState) {
+ _rsConfigState = newState;
+ _rsConfigStateChange.notify_all();
+ }
+}
+
+ReplicationCoordinatorImpl::PostMemberStateUpdateAction
+ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator_inlock() {
+ const MemberState newState = _topCoord->getMemberState();
+ if (newState == _memberState) {
+ if (_topCoord->getRole() == TopologyCoordinator::Role::candidate) {
+ invariant(_rsConfig.getNumMembers() == 1 && _selfIndex == 0 &&
_rsConfig.getMemberAt(0).isElectable());
- result = kActionWinElection;
+ return kActionWinElection;
}
-
- _memberState = newState;
- log() << "transition to " << newState.toString() << rsLog;
- return result;
+ return kActionNone;
}
-
- void ReplicationCoordinatorImpl::_performPostMemberStateUpdateAction(
- PostMemberStateUpdateAction action) {
-
- switch (action) {
+ PostMemberStateUpdateAction result;
+ if (_memberState.primary() || newState.removed()) {
+ // Wake up any threads blocked in awaitReplication, close connections, etc.
+ for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
+ it != _replicationWaiterList.end();
+ ++it) {
+ WaiterInfo* info = *it;
+ info->master = false;
+ info->condVar->notify_all();
+ }
+ _canAcceptNonLocalWrites = false;
+ result = kActionCloseAllConnections;
+ } else {
+ if (_memberState.secondary() && !newState.primary()) {
+ // Switching out of SECONDARY, but not to PRIMARY.
+ _canServeNonLocalReads.store(0U);
+ } else if (newState.secondary()) {
+ // Switching into SECONDARY, but not from PRIMARY.
+ _canServeNonLocalReads.store(1U);
+ }
+ result = kActionChooseNewSyncSource;
+ }
+ if (newState.secondary() && _topCoord->getRole() == TopologyCoordinator::Role::candidate) {
+ // When transitioning to SECONDARY, the only way for _topCoord to report the candidate
+ // role is if the configuration represents a single-node replica set. In that case, the
+ // overriding requirement is to elect this singleton node primary.
+ invariant(_rsConfig.getNumMembers() == 1 && _selfIndex == 0 &&
+ _rsConfig.getMemberAt(0).isElectable());
+ result = kActionWinElection;
+ }
+
+ _memberState = newState;
+ log() << "transition to " << newState.toString() << rsLog;
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_performPostMemberStateUpdateAction(
+ PostMemberStateUpdateAction action) {
+ switch (action) {
case kActionNone:
break;
case kActionChooseNewSyncSource:
@@ -1987,441 +1916,426 @@ namespace {
default:
severe() << "Unknown post member state update action " << static_cast<int>(action);
fassertFailed(26010);
- }
- }
-
- Status ReplicationCoordinatorImpl::processReplSetGetRBID(BSONObjBuilder* resultObj) {
- boost::lock_guard<boost::mutex> lk(_mutex);
- resultObj->append("rbid", _rbid);
- return Status::OK();
- }
-
- void ReplicationCoordinatorImpl::incrementRollbackID() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- ++_rbid;
- }
-
- Status ReplicationCoordinatorImpl::processReplSetFresh(const ReplSetFreshArgs& args,
- BSONObjBuilder* resultObj) {
-
- Status result(ErrorCodes::InternalError, "didn't set status in prepareFreshResponse");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_processReplSetFresh_finish,
- this,
- stdx::placeholders::_1,
- args,
- resultObj,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- }
- fassert(18652, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return result;
- }
-
- void ReplicationCoordinatorImpl::_processReplSetFresh_finish(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplSetFreshArgs& args,
- BSONObjBuilder* response,
- Status* result) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- return;
- }
-
- _topCoord->prepareFreshResponse(
- args, _replExecutor.now(), getMyLastOptime(), response, result);
- }
-
- Status ReplicationCoordinatorImpl::processReplSetElect(const ReplSetElectArgs& args,
- BSONObjBuilder* responseObj) {
- Status result = Status(ErrorCodes::InternalError, "status not set by callback");
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_processReplSetElect_finish,
- this,
- stdx::placeholders::_1,
- args,
- responseObj,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- }
- fassert(18657, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return result;
- }
-
- void ReplicationCoordinatorImpl::_processReplSetElect_finish(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplSetElectArgs& args,
- BSONObjBuilder* response,
- Status* result) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- return;
- }
-
- _topCoord->prepareElectResponse(
- args, _replExecutor.now(), getMyLastOptime(), response, result);
- }
-
- ReplicationCoordinatorImpl::PostMemberStateUpdateAction
- ReplicationCoordinatorImpl::_setCurrentRSConfig_inlock(
- const ReplicaSetConfig& newConfig,
- int myIndex) {
- invariant(_settings.usingReplSets());
- _cancelHeartbeats();
- _setConfigState_inlock(kConfigSteady);
- OpTime myOptime = _getMyLastOptime_inlock(); // Must get this before changing our config.
- _topCoord->updateConfig(
- newConfig,
- myIndex,
- _replExecutor.now(),
- myOptime);
- _rsConfig = newConfig;
- log() << "New replica set config in use: " << _rsConfig.toBSON() << rsLog;
- _selfIndex = myIndex;
- if (_selfIndex >= 0) {
- log() << "This node is " <<
- _rsConfig.getMemberAt(_selfIndex).getHostAndPort() << " in the config";
- }
- else {
- log() << "This node is not a member of the config";
- }
-
- const PostMemberStateUpdateAction action =
- _updateMemberStateFromTopologyCoordinator_inlock();
- _updateSlaveInfoFromConfig_inlock();
- if (_selfIndex >= 0) {
- // Don't send heartbeats if we're not in the config, if we get re-added one of the
- // nodes in the set will contact us.
- _startHeartbeats();
- }
- _wakeReadyWaiters_inlock();
- return action;
- }
-
- void ReplicationCoordinatorImpl::_wakeReadyWaiters_inlock(){
- for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
- it != _replicationWaiterList.end(); ++it) {
- WaiterInfo* info = *it;
- if (_doneWaitingForReplication_inlock(*info->opTime, *info->writeConcern)) {
- info->condVar->notify_all();
- }
- }
- }
-
- Status ReplicationCoordinatorImpl::processReplSetUpdatePosition(
- const UpdatePositionArgs& updates) {
-
- boost::unique_lock<boost::mutex> lock(_mutex);
- Status status = Status::OK();
- bool somethingChanged = false;
- for (UpdatePositionArgs::UpdateIterator update = updates.updatesBegin();
- update != updates.updatesEnd();
- ++update) {
- status = _setLastOptime_inlock(*update);
- if (!status.isOK()) {
- break;
- }
- somethingChanged = true;
- }
-
- if (somethingChanged && !_getMemberState_inlock().primary()) {
- lock.unlock();
- _externalState->forwardSlaveProgress(); // Must do this outside _mutex
- }
- return status;
}
-
- Status ReplicationCoordinatorImpl::processHandshake(OperationContext* txn,
- const HandshakeArgs& handshake) {
- LOG(2) << "Received handshake " << handshake.toBSON();
-
- boost::unique_lock<boost::mutex> lock(_mutex);
- if (_getReplicationMode_inlock() == modeReplSet) {
- if (_selfIndex == -1) {
- // Ignore updates when we're in state REMOVED
- return Status(ErrorCodes::NotMasterOrSecondaryCode,
- "Received replSetUpdatePosition command but we are in state REMOVED");
- }
-
- int memberId = handshake.getMemberId();
- const MemberConfig* member = _rsConfig.findMemberByID(memberId);
- if (!member) {
- return Status(ErrorCodes::NodeNotFound,
- str::stream() << "Node with replica set memberId " << memberId <<
- " could not be found in replica set config while attempting"
- " to associate it with RID " << handshake.getRid() <<
- " in replication handshake. ReplSet Config: " <<
- _rsConfig.toBSON().toString());
- }
- SlaveInfo* slaveInfo = _findSlaveInfoByMemberID_inlock(handshake.getMemberId());
- invariant(slaveInfo); // If it's in the config it must be in _slaveInfo
- slaveInfo->rid = handshake.getRid();
- slaveInfo->hostAndPort = member->getHostAndPort();
-
- if (!_getMemberState_inlock().primary()) {
- lock.unlock();
- _externalState->forwardSlaveHandshake(); // must do outside _mutex
- }
- return Status::OK();
- }
-
- // master-slave from here down
- SlaveInfo* slaveInfo = _findSlaveInfoByRID_inlock(handshake.getRid());
- if (slaveInfo) {
- return Status::OK(); // nothing to do
+}
+
+Status ReplicationCoordinatorImpl::processReplSetGetRBID(BSONObjBuilder* resultObj) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ resultObj->append("rbid", _rbid);
+ return Status::OK();
+}
+
+void ReplicationCoordinatorImpl::incrementRollbackID() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ ++_rbid;
+}
+
+Status ReplicationCoordinatorImpl::processReplSetFresh(const ReplSetFreshArgs& args,
+ BSONObjBuilder* resultObj) {
+ Status result(ErrorCodes::InternalError, "didn't set status in prepareFreshResponse");
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_processReplSetFresh_finish,
+ this,
+ stdx::placeholders::_1,
+ args,
+ resultObj,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ }
+ fassert(18652, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_processReplSetFresh_finish(
+ const ReplicationExecutor::CallbackData& cbData,
+ const ReplSetFreshArgs& args,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ return;
+ }
+
+ _topCoord->prepareFreshResponse(args, _replExecutor.now(), getMyLastOptime(), response, result);
+}
+
+Status ReplicationCoordinatorImpl::processReplSetElect(const ReplSetElectArgs& args,
+ BSONObjBuilder* responseObj) {
+ Status result = Status(ErrorCodes::InternalError, "status not set by callback");
+ CBHStatus cbh = _replExecutor.scheduleWork(
+ stdx::bind(&ReplicationCoordinatorImpl::_processReplSetElect_finish,
+ this,
+ stdx::placeholders::_1,
+ args,
+ responseObj,
+ &result));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ }
+ fassert(18657, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return result;
+}
+
+void ReplicationCoordinatorImpl::_processReplSetElect_finish(
+ const ReplicationExecutor::CallbackData& cbData,
+ const ReplSetElectArgs& args,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
+ return;
+ }
+
+ _topCoord->prepareElectResponse(args, _replExecutor.now(), getMyLastOptime(), response, result);
+}
+
+ReplicationCoordinatorImpl::PostMemberStateUpdateAction
+ReplicationCoordinatorImpl::_setCurrentRSConfig_inlock(const ReplicaSetConfig& newConfig,
+ int myIndex) {
+ invariant(_settings.usingReplSets());
+ _cancelHeartbeats();
+ _setConfigState_inlock(kConfigSteady);
+ OpTime myOptime = _getMyLastOptime_inlock(); // Must get this before changing our config.
+ _topCoord->updateConfig(newConfig, myIndex, _replExecutor.now(), myOptime);
+ _rsConfig = newConfig;
+ log() << "New replica set config in use: " << _rsConfig.toBSON() << rsLog;
+ _selfIndex = myIndex;
+ if (_selfIndex >= 0) {
+ log() << "This node is " << _rsConfig.getMemberAt(_selfIndex).getHostAndPort()
+ << " in the config";
+ } else {
+ log() << "This node is not a member of the config";
+ }
+
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator_inlock();
+ _updateSlaveInfoFromConfig_inlock();
+ if (_selfIndex >= 0) {
+ // Don't send heartbeats if we're not in the config, if we get re-added one of the
+ // nodes in the set will contact us.
+ _startHeartbeats();
+ }
+ _wakeReadyWaiters_inlock();
+ return action;
+}
+
+void ReplicationCoordinatorImpl::_wakeReadyWaiters_inlock() {
+ for (std::vector<WaiterInfo*>::iterator it = _replicationWaiterList.begin();
+ it != _replicationWaiterList.end();
+ ++it) {
+ WaiterInfo* info = *it;
+ if (_doneWaitingForReplication_inlock(*info->opTime, *info->writeConcern)) {
+ info->condVar->notify_all();
}
-
- SlaveInfo newSlaveInfo;
- newSlaveInfo.rid = handshake.getRid();
- newSlaveInfo.memberId = -1;
- newSlaveInfo.hostAndPort = _externalState->getClientHostAndPort(txn);
- // Don't call _addSlaveInfo_inlock as that would wake sleepers unnecessarily.
- _slaveInfo.push_back(newSlaveInfo);
-
- return Status::OK();
}
+}
- bool ReplicationCoordinatorImpl::buildsIndexes() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- if (_selfIndex == -1) {
- return true;
+Status ReplicationCoordinatorImpl::processReplSetUpdatePosition(const UpdatePositionArgs& updates) {
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ Status status = Status::OK();
+ bool somethingChanged = false;
+ for (UpdatePositionArgs::UpdateIterator update = updates.updatesBegin();
+ update != updates.updatesEnd();
+ ++update) {
+ status = _setLastOptime_inlock(*update);
+ if (!status.isOK()) {
+ break;
}
- const MemberConfig& self = _rsConfig.getMemberAt(_selfIndex);
- return self.shouldBuildIndexes();
+ somethingChanged = true;
}
- std::vector<HostAndPort> ReplicationCoordinatorImpl::getHostsWrittenTo(const OpTime& op) {
- std::vector<HostAndPort> hosts;
- boost::lock_guard<boost::mutex> lk(_mutex);
- for (size_t i = 0; i < _slaveInfo.size(); ++i) {
- const SlaveInfo& slaveInfo = _slaveInfo[i];
- if (slaveInfo.opTime < op) {
- continue;
- }
-
- if (_getReplicationMode_inlock() == modeMasterSlave &&
- slaveInfo.rid == _getMyRID_inlock()) {
- // Master-slave doesn't know the HostAndPort for itself at this point.
- continue;
- }
- hosts.push_back(slaveInfo.hostAndPort);
- }
- return hosts;
+ if (somethingChanged && !_getMemberState_inlock().primary()) {
+ lock.unlock();
+ _externalState->forwardSlaveProgress(); // Must do this outside _mutex
}
+ return status;
+}
- std::vector<HostAndPort> ReplicationCoordinatorImpl::getOtherNodesInReplSet() const {
- boost::lock_guard<boost::mutex> lk(_mutex);
- invariant(_settings.usingReplSets());
+Status ReplicationCoordinatorImpl::processHandshake(OperationContext* txn,
+ const HandshakeArgs& handshake) {
+ LOG(2) << "Received handshake " << handshake.toBSON();
- std::vector<HostAndPort> nodes;
+ boost::unique_lock<boost::mutex> lock(_mutex);
+ if (_getReplicationMode_inlock() == modeReplSet) {
if (_selfIndex == -1) {
- return nodes;
+ // Ignore updates when we're in state REMOVED
+ return Status(ErrorCodes::NotMasterOrSecondaryCode,
+ "Received replSetUpdatePosition command but we are in state REMOVED");
}
- for (int i = 0; i < _rsConfig.getNumMembers(); ++i) {
- if (i == _selfIndex)
- continue;
+ int memberId = handshake.getMemberId();
+ const MemberConfig* member = _rsConfig.findMemberByID(memberId);
+ if (!member) {
+ return Status(ErrorCodes::NodeNotFound,
+ str::stream()
+ << "Node with replica set memberId " << memberId
+ << " could not be found in replica set config while attempting"
+ " to associate it with RID " << handshake.getRid()
+ << " in replication handshake. ReplSet Config: "
+ << _rsConfig.toBSON().toString());
+ }
+ SlaveInfo* slaveInfo = _findSlaveInfoByMemberID_inlock(handshake.getMemberId());
+ invariant(slaveInfo); // If it's in the config it must be in _slaveInfo
+ slaveInfo->rid = handshake.getRid();
+ slaveInfo->hostAndPort = member->getHostAndPort();
- nodes.push_back(_rsConfig.getMemberAt(i).getHostAndPort());
+ if (!_getMemberState_inlock().primary()) {
+ lock.unlock();
+ _externalState->forwardSlaveHandshake(); // must do outside _mutex
}
- return nodes;
+ return Status::OK();
}
- Status ReplicationCoordinatorImpl::checkIfWriteConcernCanBeSatisfied(
- const WriteConcernOptions& writeConcern) const {
- boost::lock_guard<boost::mutex> lock(_mutex);
- return _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
+ // master-slave from here down
+ SlaveInfo* slaveInfo = _findSlaveInfoByRID_inlock(handshake.getRid());
+ if (slaveInfo) {
+ return Status::OK(); // nothing to do
}
- Status ReplicationCoordinatorImpl::_checkIfWriteConcernCanBeSatisfied_inlock(
- const WriteConcernOptions& writeConcern) const {
- if (_getReplicationMode_inlock() == modeNone) {
- return Status(ErrorCodes::NoReplicationEnabled,
- "No replication enabled when checking if write concern can be satisfied");
- }
+ SlaveInfo newSlaveInfo;
+ newSlaveInfo.rid = handshake.getRid();
+ newSlaveInfo.memberId = -1;
+ newSlaveInfo.hostAndPort = _externalState->getClientHostAndPort(txn);
+ // Don't call _addSlaveInfo_inlock as that would wake sleepers unnecessarily.
+ _slaveInfo.push_back(newSlaveInfo);
- if (_getReplicationMode_inlock() == modeMasterSlave) {
- if (!writeConcern.wMode.empty()) {
- return Status(ErrorCodes::UnknownReplWriteConcern,
- "Cannot use named write concern modes in master-slave");
- }
- // No way to know how many slaves there are, so assume any numeric mode is possible.
- return Status::OK();
- }
+ return Status::OK();
+}
- invariant(_getReplicationMode_inlock() == modeReplSet);
- return _rsConfig.checkIfWriteConcernCanBeSatisfied(writeConcern);
+bool ReplicationCoordinatorImpl::buildsIndexes() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ if (_selfIndex == -1) {
+ return true;
}
+ const MemberConfig& self = _rsConfig.getMemberAt(_selfIndex);
+ return self.shouldBuildIndexes();
+}
- WriteConcernOptions ReplicationCoordinatorImpl::getGetLastErrorDefault() {
- boost::mutex::scoped_lock lock(_mutex);
- if (_rsConfig.isInitialized()) {
- return _rsConfig.getDefaultWriteConcern();
+std::vector<HostAndPort> ReplicationCoordinatorImpl::getHostsWrittenTo(const OpTime& op) {
+ std::vector<HostAndPort> hosts;
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ for (size_t i = 0; i < _slaveInfo.size(); ++i) {
+ const SlaveInfo& slaveInfo = _slaveInfo[i];
+ if (slaveInfo.opTime < op) {
+ continue;
}
- return WriteConcernOptions();
- }
- Status ReplicationCoordinatorImpl::checkReplEnabledForCommand(BSONObjBuilder* result) {
- if (!_settings.usingReplSets()) {
- if (serverGlobalParams.configsvr) {
- result->append("info", "configsvr"); // for shell prompt
- }
- return Status(ErrorCodes::NoReplicationEnabled, "not running with --replSet");
+ if (_getReplicationMode_inlock() == modeMasterSlave &&
+ slaveInfo.rid == _getMyRID_inlock()) {
+ // Master-slave doesn't know the HostAndPort for itself at this point.
+ continue;
}
+ hosts.push_back(slaveInfo.hostAndPort);
+ }
+ return hosts;
+}
- if (getMemberState().startup()) {
- result->append("info", "run rs.initiate(...) if not yet done for the set");
- return Status(ErrorCodes::NotYetInitialized, "no replset config has been received");
- }
+std::vector<HostAndPort> ReplicationCoordinatorImpl::getOtherNodesInReplSet() const {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ invariant(_settings.usingReplSets());
- return Status::OK();
+ std::vector<HostAndPort> nodes;
+ if (_selfIndex == -1) {
+ return nodes;
}
- bool ReplicationCoordinatorImpl::isReplEnabled() const {
- return getReplicationMode() != modeNone;
- }
+ for (int i = 0; i < _rsConfig.getNumMembers(); ++i) {
+ if (i == _selfIndex)
+ continue;
- void ReplicationCoordinatorImpl::_chooseNewSyncSource(
- const ReplicationExecutor::CallbackData& cbData,
- const OpTime& lastOpTimeFetched,
- HostAndPort* newSyncSource) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
- *newSyncSource = _topCoord->chooseNewSyncSource(_replExecutor.now(), lastOpTimeFetched);
- }
-
- HostAndPort ReplicationCoordinatorImpl::chooseNewSyncSource(const OpTime& lastOpTimeFetched) {
- HostAndPort newSyncSource;
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_chooseNewSyncSource,
- this,
- stdx::placeholders::_1,
- lastOpTimeFetched,
- &newSyncSource));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return newSyncSource; // empty
- }
- fassert(18740, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return newSyncSource;
+ nodes.push_back(_rsConfig.getMemberAt(i).getHostAndPort());
}
+ return nodes;
+}
- void ReplicationCoordinatorImpl::_blacklistSyncSource(
- const ReplicationExecutor::CallbackData& cbData,
- const HostAndPort& host,
- Date_t until) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
- _topCoord->blacklistSyncSource(host, until);
-
- CBHStatus cbh = _replExecutor.scheduleWorkAt(
- until,
- stdx::bind(&ReplicationCoordinatorImpl::_unblacklistSyncSource,
- this,
- stdx::placeholders::_1,
- host));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(28610, cbh.getStatus());
- }
+Status ReplicationCoordinatorImpl::checkIfWriteConcernCanBeSatisfied(
+ const WriteConcernOptions& writeConcern) const {
+ boost::lock_guard<boost::mutex> lock(_mutex);
+ return _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
+}
- void ReplicationCoordinatorImpl::_unblacklistSyncSource(
- const ReplicationExecutor::CallbackData& cbData,
- const HostAndPort& host) {
- if (cbData.status == ErrorCodes::CallbackCanceled)
- return;
- _topCoord->unblacklistSyncSource(host, _replExecutor.now());
+Status ReplicationCoordinatorImpl::_checkIfWriteConcernCanBeSatisfied_inlock(
+ const WriteConcernOptions& writeConcern) const {
+ if (_getReplicationMode_inlock() == modeNone) {
+ return Status(ErrorCodes::NoReplicationEnabled,
+ "No replication enabled when checking if write concern can be satisfied");
}
- void ReplicationCoordinatorImpl::blacklistSyncSource(const HostAndPort& host, Date_t until) {
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_blacklistSyncSource,
- this,
- stdx::placeholders::_1,
- host,
- until));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
+ if (_getReplicationMode_inlock() == modeMasterSlave) {
+ if (!writeConcern.wMode.empty()) {
+ return Status(ErrorCodes::UnknownReplWriteConcern,
+ "Cannot use named write concern modes in master-slave");
}
- fassert(18741, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
+ // No way to know how many slaves there are, so assume any numeric mode is possible.
+ return Status::OK();
}
- void ReplicationCoordinatorImpl::resetLastOpTimeFromOplog(OperationContext* txn) {
- StatusWith<OpTime> lastOpTimeStatus = _externalState->loadLastOpTime(txn);
- OpTime lastOpTime(0, 0);
- if (!lastOpTimeStatus.isOK()) {
- warning() << "Failed to load timestamp of most recently applied operation; " <<
- lastOpTimeStatus.getStatus();
- }
- else {
- lastOpTime = lastOpTimeStatus.getValue();
- }
- boost::unique_lock<boost::mutex> lk(_mutex);
- _setMyLastOptime_inlock(&lk, lastOpTime, true);
- _externalState->setGlobalOpTime(lastOpTime);
+ invariant(_getReplicationMode_inlock() == modeReplSet);
+ return _rsConfig.checkIfWriteConcernCanBeSatisfied(writeConcern);
+}
+
+WriteConcernOptions ReplicationCoordinatorImpl::getGetLastErrorDefault() {
+ boost::mutex::scoped_lock lock(_mutex);
+ if (_rsConfig.isInitialized()) {
+ return _rsConfig.getDefaultWriteConcern();
+ }
+ return WriteConcernOptions();
+}
+
+Status ReplicationCoordinatorImpl::checkReplEnabledForCommand(BSONObjBuilder* result) {
+ if (!_settings.usingReplSets()) {
+ if (serverGlobalParams.configsvr) {
+ result->append("info", "configsvr"); // for shell prompt
+ }
+ return Status(ErrorCodes::NoReplicationEnabled, "not running with --replSet");
+ }
+
+ if (getMemberState().startup()) {
+ result->append("info", "run rs.initiate(...) if not yet done for the set");
+ return Status(ErrorCodes::NotYetInitialized, "no replset config has been received");
+ }
+
+ return Status::OK();
+}
+
+bool ReplicationCoordinatorImpl::isReplEnabled() const {
+ return getReplicationMode() != modeNone;
+}
+
+void ReplicationCoordinatorImpl::_chooseNewSyncSource(
+ const ReplicationExecutor::CallbackData& cbData,
+ const OpTime& lastOpTimeFetched,
+ HostAndPort* newSyncSource) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+ *newSyncSource = _topCoord->chooseNewSyncSource(_replExecutor.now(), lastOpTimeFetched);
+}
+
+HostAndPort ReplicationCoordinatorImpl::chooseNewSyncSource(const OpTime& lastOpTimeFetched) {
+ HostAndPort newSyncSource;
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_chooseNewSyncSource,
+ this,
+ stdx::placeholders::_1,
+ lastOpTimeFetched,
+ &newSyncSource));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return newSyncSource; // empty
+ }
+ fassert(18740, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return newSyncSource;
+}
+
+void ReplicationCoordinatorImpl::_blacklistSyncSource(
+ const ReplicationExecutor::CallbackData& cbData, const HostAndPort& host, Date_t until) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+ _topCoord->blacklistSyncSource(host, until);
+
+ CBHStatus cbh =
+ _replExecutor.scheduleWorkAt(until,
+ stdx::bind(&ReplicationCoordinatorImpl::_unblacklistSyncSource,
+ this,
+ stdx::placeholders::_1,
+ host));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(28610, cbh.getStatus());
+}
+
+void ReplicationCoordinatorImpl::_unblacklistSyncSource(
+ const ReplicationExecutor::CallbackData& cbData, const HostAndPort& host) {
+ if (cbData.status == ErrorCodes::CallbackCanceled)
+ return;
+ _topCoord->unblacklistSyncSource(host, _replExecutor.now());
+}
+
+void ReplicationCoordinatorImpl::blacklistSyncSource(const HostAndPort& host, Date_t until) {
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_blacklistSyncSource,
+ this,
+ stdx::placeholders::_1,
+ host,
+ until));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(18741, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+}
+
+void ReplicationCoordinatorImpl::resetLastOpTimeFromOplog(OperationContext* txn) {
+ StatusWith<OpTime> lastOpTimeStatus = _externalState->loadLastOpTime(txn);
+ OpTime lastOpTime(0, 0);
+ if (!lastOpTimeStatus.isOK()) {
+ warning() << "Failed to load timestamp of most recently applied operation; "
+ << lastOpTimeStatus.getStatus();
+ } else {
+ lastOpTime = lastOpTimeStatus.getValue();
+ }
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ _setMyLastOptime_inlock(&lk, lastOpTime, true);
+ _externalState->setGlobalOpTime(lastOpTime);
+}
+
+void ReplicationCoordinatorImpl::_shouldChangeSyncSource(
+ const ReplicationExecutor::CallbackData& cbData,
+ const HostAndPort& currentSource,
+ bool* shouldChange) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+
+ *shouldChange = _topCoord->shouldChangeSyncSource(currentSource, _replExecutor.now());
+}
+
+bool ReplicationCoordinatorImpl::shouldChangeSyncSource(const HostAndPort& currentSource) {
+ bool shouldChange(false);
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_shouldChangeSyncSource,
+ this,
+ stdx::placeholders::_1,
+ currentSource,
+ &shouldChange));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return false;
}
+ fassert(18906, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+ return shouldChange;
+}
- void ReplicationCoordinatorImpl::_shouldChangeSyncSource(
- const ReplicationExecutor::CallbackData& cbData,
- const HostAndPort& currentSource,
- bool* shouldChange) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
-
- *shouldChange = _topCoord->shouldChangeSyncSource(currentSource, _replExecutor.now());
+void ReplicationCoordinatorImpl::summarizeAsHtml(ReplSetHtmlSummary* output) {
+ CBHStatus cbh =
+ _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_summarizeAsHtml_finish,
+ this,
+ stdx::placeholders::_1,
+ output));
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
}
+ fassert(28638, cbh.getStatus());
+ _replExecutor.wait(cbh.getValue());
+}
- bool ReplicationCoordinatorImpl::shouldChangeSyncSource(const HostAndPort& currentSource) {
- bool shouldChange(false);
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_shouldChangeSyncSource,
- this,
- stdx::placeholders::_1,
- currentSource,
- &shouldChange));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return false;
- }
- fassert(18906, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
- return shouldChange;
- }
-
- void ReplicationCoordinatorImpl::summarizeAsHtml(ReplSetHtmlSummary* output) {
- CBHStatus cbh = _replExecutor.scheduleWork(
- stdx::bind(&ReplicationCoordinatorImpl::_summarizeAsHtml_finish,
- this,
- stdx::placeholders::_1,
- output));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(28638, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
+void ReplicationCoordinatorImpl::_summarizeAsHtml_finish(
+ const ReplicationExecutor::CallbackData& cbData, ReplSetHtmlSummary* output) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
}
- void ReplicationCoordinatorImpl::_summarizeAsHtml_finish(
- const ReplicationExecutor::CallbackData& cbData,
- ReplSetHtmlSummary* output) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
+ output->setSelfOptime(getMyLastOptime());
+ output->setSelfUptime(time(0) - serverGlobalParams.started);
+ output->setNow(_replExecutor.now());
- output->setSelfOptime(getMyLastOptime());
- output->setSelfUptime(time(0) - serverGlobalParams.started);
- output->setNow(_replExecutor.now());
-
- _topCoord->summarizeAsHtml(output);
- }
+ _topCoord->summarizeAsHtml(output);
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index ae08703b628..bdf6166cbcd 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -50,836 +50,819 @@
namespace mongo {
- class Timer;
- template <typename T> class StatusWith;
+class Timer;
+template <typename T>
+class StatusWith;
namespace repl {
- class ElectCmdRunner;
- class FreshnessChecker;
- class HeartbeatResponseAction;
- class OplogReader;
- class SyncSourceFeedback;
- class TopologyCoordinator;
+class ElectCmdRunner;
+class FreshnessChecker;
+class HeartbeatResponseAction;
+class OplogReader;
+class SyncSourceFeedback;
+class TopologyCoordinator;
- class ReplicationCoordinatorImpl : public ReplicationCoordinator,
- public KillOpListenerInterface {
- MONGO_DISALLOW_COPYING(ReplicationCoordinatorImpl);
+class ReplicationCoordinatorImpl : public ReplicationCoordinator, public KillOpListenerInterface {
+ MONGO_DISALLOW_COPYING(ReplicationCoordinatorImpl);
- public:
+public:
+ // Takes ownership of the "externalState", "topCoord" and "network" objects.
+ ReplicationCoordinatorImpl(const ReplSettings& settings,
+ ReplicationCoordinatorExternalState* externalState,
+ ReplicationExecutor::NetworkInterface* network,
+ TopologyCoordinator* topoCoord,
+ int64_t prngSeed);
+ virtual ~ReplicationCoordinatorImpl();
- // Takes ownership of the "externalState", "topCoord" and "network" objects.
- ReplicationCoordinatorImpl(const ReplSettings& settings,
- ReplicationCoordinatorExternalState* externalState,
- ReplicationExecutor::NetworkInterface* network,
- TopologyCoordinator* topoCoord,
- int64_t prngSeed);
- virtual ~ReplicationCoordinatorImpl();
+ // ================== Members of public ReplicationCoordinator API ===================
- // ================== Members of public ReplicationCoordinator API ===================
+ virtual void startReplication(OperationContext* txn);
- virtual void startReplication(OperationContext* txn);
+ virtual void shutdown();
- virtual void shutdown();
+ virtual const ReplSettings& getSettings() const;
- virtual const ReplSettings& getSettings() const;
+ virtual Mode getReplicationMode() const;
- virtual Mode getReplicationMode() const;
+ virtual MemberState getMemberState() const;
- virtual MemberState getMemberState() const;
+ virtual bool isInPrimaryOrSecondaryState() const;
- virtual bool isInPrimaryOrSecondaryState() const;
+ virtual Seconds getSlaveDelaySecs() const;
- virtual Seconds getSlaveDelaySecs() const;
+ virtual void clearSyncSourceBlacklist();
- virtual void clearSyncSourceBlacklist();
+ /*
+ * Implementation of the KillOpListenerInterface interrupt method so that we can wake up
+ * threads blocked in awaitReplication() when a killOp command comes in.
+ */
+ virtual void interrupt(unsigned opId);
- /*
- * Implementation of the KillOpListenerInterface interrupt method so that we can wake up
- * threads blocked in awaitReplication() when a killOp command comes in.
- */
- virtual void interrupt(unsigned opId);
+ /*
+ * Implementation of the KillOpListenerInterface interruptAll method so that we can wake up
+ * threads blocked in awaitReplication() when we kill all operations.
+ */
+ virtual void interruptAll();
- /*
- * Implementation of the KillOpListenerInterface interruptAll method so that we can wake up
- * threads blocked in awaitReplication() when we kill all operations.
- */
- virtual void interruptAll();
+ virtual ReplicationCoordinator::StatusAndDuration awaitReplication(
+ const OperationContext* txn, const OpTime& ts, const WriteConcernOptions& writeConcern);
- virtual ReplicationCoordinator::StatusAndDuration awaitReplication(
- const OperationContext* txn,
- const OpTime& ts,
- const WriteConcernOptions& writeConcern);
+ virtual ReplicationCoordinator::StatusAndDuration awaitReplicationOfLastOpForClient(
+ const OperationContext* txn, const WriteConcernOptions& writeConcern);
- virtual ReplicationCoordinator::StatusAndDuration awaitReplicationOfLastOpForClient(
- const OperationContext* txn,
- const WriteConcernOptions& writeConcern);
+ virtual Status stepDown(OperationContext* txn,
+ bool force,
+ const Milliseconds& waitTime,
+ const Milliseconds& stepdownTime);
- virtual Status stepDown(OperationContext* txn,
- bool force,
- const Milliseconds& waitTime,
- const Milliseconds& stepdownTime);
+ virtual bool isMasterForReportingPurposes();
- virtual bool isMasterForReportingPurposes();
+ virtual bool canAcceptWritesForDatabase(const StringData& dbName);
- virtual bool canAcceptWritesForDatabase(const StringData& dbName);
+ virtual Status checkIfWriteConcernCanBeSatisfied(const WriteConcernOptions& writeConcern) const;
- virtual Status checkIfWriteConcernCanBeSatisfied(
- const WriteConcernOptions& writeConcern) const;
+ virtual Status checkCanServeReadsFor(OperationContext* txn,
+ const NamespaceString& ns,
+ bool slaveOk);
- virtual Status checkCanServeReadsFor(OperationContext* txn,
- const NamespaceString& ns,
- bool slaveOk);
+ virtual bool shouldIgnoreUniqueIndex(const IndexDescriptor* idx);
- virtual bool shouldIgnoreUniqueIndex(const IndexDescriptor* idx);
+ virtual Status setLastOptimeForSlave(const OID& rid, const OpTime& ts);
- virtual Status setLastOptimeForSlave(const OID& rid, const OpTime& ts);
+ virtual void setMyLastOptime(const OpTime& ts);
- virtual void setMyLastOptime(const OpTime& ts);
+ virtual void resetMyLastOptime();
- virtual void resetMyLastOptime();
+ virtual void setMyHeartbeatMessage(const std::string& msg);
- virtual void setMyHeartbeatMessage(const std::string& msg);
+ virtual OpTime getMyLastOptime() const;
- virtual OpTime getMyLastOptime() const;
+ virtual OID getElectionId();
- virtual OID getElectionId();
+ virtual OID getMyRID() const;
- virtual OID getMyRID() const;
+ virtual int getMyId() const;
- virtual int getMyId() const;
+ virtual bool setFollowerMode(const MemberState& newState);
- virtual bool setFollowerMode(const MemberState& newState);
+ virtual bool isWaitingForApplierToDrain();
- virtual bool isWaitingForApplierToDrain();
+ virtual void signalDrainComplete(OperationContext* txn);
- virtual void signalDrainComplete(OperationContext* txn);
+ virtual void signalUpstreamUpdater();
- virtual void signalUpstreamUpdater();
+ virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder);
- virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder);
+ virtual void prepareReplSetUpdatePositionCommandHandshakes(std::vector<BSONObj>* handshakes);
- virtual void prepareReplSetUpdatePositionCommandHandshakes(
- std::vector<BSONObj>* handshakes);
+ virtual Status processReplSetGetStatus(BSONObjBuilder* result);
- virtual Status processReplSetGetStatus(BSONObjBuilder* result);
+ virtual void fillIsMasterForReplSet(IsMasterResponse* result);
- virtual void fillIsMasterForReplSet(IsMasterResponse* result);
+ virtual void appendSlaveInfoData(BSONObjBuilder* result);
- virtual void appendSlaveInfoData(BSONObjBuilder* result);
+ virtual void processReplSetGetConfig(BSONObjBuilder* result);
- virtual void processReplSetGetConfig(BSONObjBuilder* result);
+ virtual Status setMaintenanceMode(bool activate);
- virtual Status setMaintenanceMode(bool activate);
+ virtual bool getMaintenanceMode();
- virtual bool getMaintenanceMode();
+ virtual Status processReplSetSyncFrom(const HostAndPort& target, BSONObjBuilder* resultObj);
- virtual Status processReplSetSyncFrom(const HostAndPort& target,
- BSONObjBuilder* resultObj);
+ virtual Status processReplSetFreeze(int secs, BSONObjBuilder* resultObj);
- virtual Status processReplSetFreeze(int secs, BSONObjBuilder* resultObj);
+ virtual Status processHeartbeat(const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response);
- virtual Status processHeartbeat(const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response);
+ virtual Status processReplSetReconfig(OperationContext* txn,
+ const ReplSetReconfigArgs& args,
+ BSONObjBuilder* resultObj);
- virtual Status processReplSetReconfig(OperationContext* txn,
- const ReplSetReconfigArgs& args,
- BSONObjBuilder* resultObj);
+ virtual Status processReplSetInitiate(OperationContext* txn,
+ const BSONObj& configObj,
+ BSONObjBuilder* resultObj);
- virtual Status processReplSetInitiate(OperationContext* txn,
- const BSONObj& configObj,
- BSONObjBuilder* resultObj);
+ virtual Status processReplSetGetRBID(BSONObjBuilder* resultObj);
- virtual Status processReplSetGetRBID(BSONObjBuilder* resultObj);
+ virtual void incrementRollbackID();
- virtual void incrementRollbackID();
+ virtual Status processReplSetFresh(const ReplSetFreshArgs& args, BSONObjBuilder* resultObj);
- virtual Status processReplSetFresh(const ReplSetFreshArgs& args,
- BSONObjBuilder* resultObj);
+ virtual Status processReplSetElect(const ReplSetElectArgs& args, BSONObjBuilder* response);
- virtual Status processReplSetElect(const ReplSetElectArgs& args,
- BSONObjBuilder* response);
+ virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates);
- virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates);
+ virtual Status processHandshake(OperationContext* txn, const HandshakeArgs& handshake);
- virtual Status processHandshake(OperationContext* txn, const HandshakeArgs& handshake);
+ virtual bool buildsIndexes();
- virtual bool buildsIndexes();
+ virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op);
- virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op);
+ virtual std::vector<HostAndPort> getOtherNodesInReplSet() const;
- virtual std::vector<HostAndPort> getOtherNodesInReplSet() const;
+ virtual WriteConcernOptions getGetLastErrorDefault();
- virtual WriteConcernOptions getGetLastErrorDefault();
+ virtual Status checkReplEnabledForCommand(BSONObjBuilder* result);
- virtual Status checkReplEnabledForCommand(BSONObjBuilder* result);
+ virtual bool isReplEnabled() const;
- virtual bool isReplEnabled() const;
+ virtual HostAndPort chooseNewSyncSource(const OpTime& lastOpTimeFetched);
- virtual HostAndPort chooseNewSyncSource(const OpTime& lastOpTimeFetched);
+ virtual void blacklistSyncSource(const HostAndPort& host, Date_t until);
- virtual void blacklistSyncSource(const HostAndPort& host, Date_t until);
+ virtual void resetLastOpTimeFromOplog(OperationContext* txn);
- virtual void resetLastOpTimeFromOplog(OperationContext* txn);
+ virtual bool shouldChangeSyncSource(const HostAndPort& currentSource);
- virtual bool shouldChangeSyncSource(const HostAndPort& currentSource);
+ virtual void summarizeAsHtml(ReplSetHtmlSummary* s);
- virtual void summarizeAsHtml(ReplSetHtmlSummary* s);
+ // ================== Test support API ===================
- // ================== Test support API ===================
+ /**
+ * If called after startReplication(), blocks until all asynchronous
+ * activities associated with replication start-up complete.
+ */
+ void waitForStartUpComplete();
- /**
- * If called after startReplication(), blocks until all asynchronous
- * activities associated with replication start-up complete.
- */
- void waitForStartUpComplete();
+ /**
+ * Gets the replica set configuration in use by the node.
+ */
+ ReplicaSetConfig getReplicaSetConfig_forTest();
- /**
- * Gets the replica set configuration in use by the node.
- */
- ReplicaSetConfig getReplicaSetConfig_forTest();
+ /**
+ * Simple wrapper around _setLastOptime_inlock to make it easier to test.
+ */
+ Status setLastOptime_forTest(const OID& rid, const OpTime& ts);
- /**
- * Simple wrapper around _setLastOptime_inlock to make it easier to test.
- */
- Status setLastOptime_forTest(const OID& rid, const OpTime& ts);
+private:
+ /**
+ * Configuration states for a replica set node.
+ *
+ * Transition diagram:
+ *
+ * PreStart ------------------> ReplicationDisabled
+ * |
+ * |
+ * v
+ * StartingUp -------> Uninitialized <------> Initiating
+ * \ ^ |
+ * ------- | |
+ * | | |
+ * v v |
+ * Reconfig <---> Steady <----> HBReconfig |
+ * ^ /
+ * | /
+ * \ /
+ * -----------------------
+ */
+ enum ConfigState {
+ kConfigPreStart,
+ kConfigStartingUp,
+ kConfigReplicationDisabled,
+ kConfigUninitialized,
+ kConfigSteady,
+ kConfigInitiating,
+ kConfigReconfiguring,
+ kConfigHBReconfiguring
+ };
- private:
+ /**
+ * Type describing actions to take after a change to the MemberState _memberState.
+ */
+ enum PostMemberStateUpdateAction {
+ kActionNone,
+ kActionCloseAllConnections, // Also indicates that we should clear sharding state.
+ kActionChooseNewSyncSource,
+ kActionWinElection
+ };
- /**
- * Configuration states for a replica set node.
- *
- * Transition diagram:
- *
- * PreStart ------------------> ReplicationDisabled
- * |
- * |
- * v
- * StartingUp -------> Uninitialized <------> Initiating
- * \ ^ |
- * ------- | |
- * | | |
- * v v |
- * Reconfig <---> Steady <----> HBReconfig |
- * ^ /
- * | /
- * \ /
- * -----------------------
- */
- enum ConfigState {
- kConfigPreStart,
- kConfigStartingUp,
- kConfigReplicationDisabled,
- kConfigUninitialized,
- kConfigSteady,
- kConfigInitiating,
- kConfigReconfiguring,
- kConfigHBReconfiguring
- };
-
- /**
- * Type describing actions to take after a change to the MemberState _memberState.
- */
- enum PostMemberStateUpdateAction {
- kActionNone,
- kActionCloseAllConnections, // Also indicates that we should clear sharding state.
- kActionChooseNewSyncSource,
- kActionWinElection
- };
-
- // Struct that holds information about clients waiting for replication.
- struct WaiterInfo;
-
- // Struct that holds information about nodes in this replication group, mainly used for
- // tracking replication progress for write concern satisfaction.
- struct SlaveInfo {
- OpTime opTime; // Our last known OpTime that this slave has replicated to.
- HostAndPort hostAndPort; // Client address of the slave.
- int memberId; // Id of the node in the replica set config, or -1 if we're not a replSet.
- OID rid; // RID of the node.
- bool self; // Whether this SlaveInfo stores the information about ourself
- SlaveInfo() : memberId(-1), self(false) {}
- };
-
- typedef std::vector<SlaveInfo> SlaveInfoVector;
-
- typedef std::vector<ReplicationExecutor::CallbackHandle> HeartbeatHandles;
-
- /**
- * Looks up the SlaveInfo in _slaveInfo associated with the given RID and returns a pointer
- * to it, or returns NULL if there is no SlaveInfo with the given RID.
- */
- SlaveInfo* _findSlaveInfoByRID_inlock(const OID& rid);
-
- /**
- * Looks up the SlaveInfo in _slaveInfo associated with the given member ID and returns a
- * pointer to it, or returns NULL if there is no SlaveInfo with the given member ID.
- */
- SlaveInfo* _findSlaveInfoByMemberID_inlock(int memberID);
-
- /**
- * Adds the given SlaveInfo to _slaveInfo and wakes up any threads waiting for replication
- * that now have their write concern satisfied. Only valid to call in master/slave setups.
- */
- void _addSlaveInfo_inlock(const SlaveInfo& slaveInfo);
-
- /**
- * Updates the item in _slaveInfo pointed to by 'slaveInfo' with the given OpTime 'ts'
- * and wakes up any threads waiting for replication that now have their write concern
- * satisfied.
- */
- void _updateSlaveInfoOptime_inlock(SlaveInfo* slaveInfo, OpTime ts);
-
- /**
- * Returns the index into _slaveInfo where data corresponding to ourself is stored.
- * For more info on the rules about how we know where our entry is, see the comment for
- * _slaveInfo.
- */
- size_t _getMyIndexInSlaveInfo_inlock() const;
-
- /**
- * Helper method that removes entries from _slaveInfo if they correspond to a node
- * with a member ID that is not in the current replica set config. Will always leave an
- * entry for ourself at the beginning of _slaveInfo, even if we aren't present in the
- * config.
- */
- void _updateSlaveInfoFromConfig_inlock();
-
- /**
- * Helper to update our saved config, cancel any pending heartbeats, and kick off sending
- * new heartbeats based on the new config. Must *only* be called from within the
- * ReplicationExecutor context.
- *
- * Returns an action to be performed after unlocking _mutex, via
- * _performPostMemberStateUpdateAction.
- */
- PostMemberStateUpdateAction _setCurrentRSConfig_inlock(
- const ReplicaSetConfig& newConfig,
- int myIndex);
-
- /**
- * Helper to wake waiters in _replicationWaiterList that are doneWaitingForReplication.
- */
- void _wakeReadyWaiters_inlock();
-
- /**
- * Helper method for setting/unsetting maintenance mode. Scheduled by setMaintenanceMode()
- * to run in a global write lock in the replication executor thread.
- */
- void _setMaintenanceMode_helper(const ReplicationExecutor::CallbackData& cbData,
- bool activate,
- Status* result);
-
- /**
- * Helper method for retrieving maintenance mode. Scheduled by getMaintenanceMode() to run
- * in the replication executor thread.
- */
- void _getMaintenanceMode_helper(const ReplicationExecutor::CallbackData& cbData,
- bool* maintenanceMode);
-
- /**
- * Bottom half of fillIsMasterForReplSet.
- */
- void _fillIsMasterForReplSet_finish(const ReplicationExecutor::CallbackData& cbData,
- IsMasterResponse* result);
-
- /**
- * Bottom half of processReplSetFresh.
- */
- void _processReplSetFresh_finish(const ReplicationExecutor::CallbackData& cbData,
- const ReplSetFreshArgs& args,
- BSONObjBuilder* response,
- Status* result);
-
- /**
- * Bottom half of processReplSetElect.
- */
- void _processReplSetElect_finish(const ReplicationExecutor::CallbackData& cbData,
- const ReplSetElectArgs& args,
- BSONObjBuilder* response,
- Status* result);
-
- /**
- * Bottom half of processReplSetFreeze.
- */
- void _processReplSetFreeze_finish(const ReplicationExecutor::CallbackData& cbData,
- int secs,
- BSONObjBuilder* response,
- Status* result);
- /*
- * Bottom half of clearSyncSourceBlacklist
- */
- void _clearSyncSourceBlacklist_finish(const ReplicationExecutor::CallbackData& cbData);
-
- /**
- * Scheduled to cause the ReplicationCoordinator to reconsider any state that might
- * need to change as a result of time passing - for instance becoming PRIMARY when a single
- * node replica set member's stepDown period ends.
- */
- void _handleTimePassing(const ReplicationExecutor::CallbackData& cbData);
-
- /**
- * Helper method for _awaitReplication that takes an already locked unique_lock and a
- * Timer for timing the operation which has been counting since before the lock was
- * acquired.
- */
- ReplicationCoordinator::StatusAndDuration _awaitReplication_inlock(
- const Timer* timer,
- boost::unique_lock<boost::mutex>* lock,
- const OperationContext* txn,
- const OpTime& ts,
- const WriteConcernOptions& writeConcern);
-
- /*
- * Returns true if the given writeConcern is satisfied up to "optime" or is unsatisfiable.
- */
- bool _doneWaitingForReplication_inlock(const OpTime& opTime,
- const WriteConcernOptions& writeConcern);
-
- /**
- * Helper for _doneWaitingForReplication_inlock that takes an integer write concern.
- */
- bool _haveNumNodesReachedOpTime_inlock(const OpTime& opTime, int numNodes);
-
- /**
- * Helper for _doneWaitingForReplication_inlock that takes a tag pattern representing a
- * named write concern mode.
- */
- bool _haveTaggedNodesReachedOpTime_inlock(const OpTime& opTime,
- const ReplicaSetTagPattern& tagPattern);
-
- Status _checkIfWriteConcernCanBeSatisfied_inlock(
- const WriteConcernOptions& writeConcern) const;
-
- /**
- * Triggers all callbacks that are blocked waiting for new heartbeat data
- * to decide whether or not to finish a step down.
- * Should only be called from executor callbacks.
- */
- void _signalStepDownWaitersFromCallback(const ReplicationExecutor::CallbackData& cbData);
- void _signalStepDownWaiters();
-
- /**
- * Helper for stepDown run within a ReplicationExecutor callback. This method assumes
- * it is running within a global shared lock, and thus that no writes are going on at the
- * same time.
- */
- void _stepDownContinue(const ReplicationExecutor::CallbackData& cbData,
- const ReplicationExecutor::EventHandle finishedEvent,
- OperationContext* txn,
- Date_t waitUntil,
- Date_t stepdownUntil,
- bool force,
- Status* result);
-
- OID _getMyRID_inlock() const;
-
- int _getMyId_inlock() const;
-
- OpTime _getMyLastOptime_inlock() const;
-
-
- /**
- * Bottom half of setFollowerMode.
- *
- * May reschedule itself after the current election, so it is not sufficient to
- * wait for a callback scheduled to execute this method to complete. Instead,
- * supply an event, "finishedSettingFollowerMode", and wait for that event to
- * be signaled. Do not observe "*success" until after the event is signaled.
- */
- void _setFollowerModeFinish(
- const ReplicationExecutor::CallbackData& cbData,
- const MemberState& newState,
- const ReplicationExecutor::EventHandle& finishedSettingFollowerMode,
- bool* success);
-
- /**
- * Helper method for updating our tracking of the last optime applied by a given node.
- * This is only valid to call on replica sets.
- */
- Status _setLastOptime_inlock(const UpdatePositionArgs::UpdateInfo& args);
-
- /**
- * Helper method for setMyLastOptime that takes in a unique lock on
- * _mutex. The passed in lock must already be locked. It is unspecified what state the
- * lock will be in after this method finishes.
- *
- * This function has the same rules for "ts" as setMyLastOptime(), unless
- * "isRollbackAllowed" is true.
- */
- void _setMyLastOptime_inlock(boost::unique_lock<boost::mutex>* lock,
- const OpTime& ts,
- bool isRollbackAllowed);
-
- /**
- * Schedules a heartbeat to be sent to "target" at "when". "targetIndex" is the index
- * into the replica set config members array that corresponds to the "target", or -1 if
- * "target" is not in _rsConfig.
- */
- void _scheduleHeartbeatToTarget(const HostAndPort& target, int targetIndex, Date_t when);
-
- /**
- * Processes each heartbeat response.
- *
- * Schedules additional heartbeats, triggers elections and step downs, etc.
- */
- void _handleHeartbeatResponse(const ReplicationExecutor::RemoteCommandCallbackData& cbData,
- int targetIndex);
-
- void _trackHeartbeatHandle(const StatusWith<ReplicationExecutor::CallbackHandle>& handle);
-
- void _untrackHeartbeatHandle(const ReplicationExecutor::CallbackHandle& handle);
-
- /**
- * Helper for _handleHeartbeatResponse.
- *
- * Updates the optime associated with the member at "memberIndex" in our config.
- */
- void _updateOpTimeFromHeartbeat_inlock(int memberIndex, OpTime optime);
-
- /**
- * Starts a heartbeat for each member in the current config. Called within the executor
- * context.
- */
- void _startHeartbeats();
-
- /**
- * Cancels all heartbeats. Called within executor context.
- */
- void _cancelHeartbeats();
-
- /**
- * Asynchronously sends a heartbeat to "target". "targetIndex" is the index
- * into the replica set config members array that corresponds to the "target", or -1 if
- * we don't have a valid replica set config.
- *
- * Scheduled by _scheduleHeartbeatToTarget.
- */
- void _doMemberHeartbeat(ReplicationExecutor::CallbackData cbData,
- const HostAndPort& target,
- int targetIndex);
-
-
- MemberState _getMemberState_inlock() const;
-
- /**
- * Returns the current replication mode. This method requires the caller to be holding
- * "_mutex" to be called safely.
- */
- Mode _getReplicationMode_inlock() const;
-
- /**
- * Starts loading the replication configuration from local storage, and if it is valid,
- * schedules a callback (of _finishLoadLocalConfig) to set it as the current replica set
- * config (sets _rsConfig and _thisMembersConfigIndex).
- * Returns true if it finishes loading the local config, which most likely means there
- * was no local config at all or it was invalid in some way, and false if there was a valid
- * config detected but more work is needed to set it as the local config (which will be
- * handled by the callback to _finishLoadLocalConfig).
- */
- bool _startLoadLocalConfig(OperationContext* txn);
-
- /**
- * Callback that finishes the work started in _startLoadLocalConfig and sets _rsConfigState
- * to kConfigSteady, so that we can begin processing heartbeats and reconfigs.
- */
- void _finishLoadLocalConfig(const ReplicationExecutor::CallbackData& cbData,
- const ReplicaSetConfig& localConfig,
- const StatusWith<OpTime>& lastOpTimeStatus);
-
- /**
- * Callback that finishes the work of processReplSetInitiate() inside the replication
- * executor context, in the event of a successful quorum check.
- */
- void _finishReplSetInitiate(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplicaSetConfig& newConfig,
- int myIndex);
-
- /**
- * Callback that finishes the work of processReplSetReconfig inside the replication
- * executor context, in the event of a successful quorum check.
- */
- void _finishReplSetReconfig(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplicaSetConfig& newConfig,
- int myIndex);
-
- /**
- * Changes _rsConfigState to newState, and notify any waiters.
- */
- void _setConfigState_inlock(ConfigState newState);
-
- /**
- * Updates the cached value, _memberState, to match _topCoord's reported
- * member state, from getMemberState().
- *
- * Returns an enum indicating what action to take after releasing _mutex, if any.
- * Call performPostMemberStateUpdateAction on the return value after releasing
- * _mutex.
- */
- PostMemberStateUpdateAction _updateMemberStateFromTopologyCoordinator_inlock();
-
- /**
- * Performs a post member-state update action. Do not call while holding _mutex.
- */
- void _performPostMemberStateUpdateAction(PostMemberStateUpdateAction action);
-
- /**
- * Begins an attempt to elect this node.
- * Called after an incoming heartbeat changes this node's view of the set such that it
- * believes it can be elected PRIMARY.
- * For proper concurrency, must be called via a ReplicationExecutor callback.
- */
- void _startElectSelf();
-
- /**
- * Callback called when the FreshnessChecker has completed; checks the results and
- * decides whether to continue election proceedings.
- * finishEvh is an event that is signaled when election is complete.
- **/
- void _onFreshnessCheckComplete();
-
- /**
- * Callback called when the ElectCmdRunner has completed; checks the results and
- * decides whether to complete the election and change state to primary.
- * finishEvh is an event that is signaled when election is complete.
- **/
- void _onElectCmdRunnerComplete();
-
- /**
- * Callback called after a random delay, to prevent repeated election ties.
- */
- void _recoverFromElectionTie(const ReplicationExecutor::CallbackData& cbData);
-
- /**
- * Chooses a new sync source. Must be scheduled as a callback.
- *
- * Calls into the Topology Coordinator, which uses its current view of the set to choose
- * the most appropriate sync source.
- */
- void _chooseNewSyncSource(const ReplicationExecutor::CallbackData& cbData,
- const OpTime& lastOpTimeFetched,
- HostAndPort* newSyncSource);
-
- /**
- * Adds 'host' to the sync source blacklist until 'until'. A blacklisted source cannot
- * be chosen as a sync source. Schedules a callback to unblacklist the sync source to be
- * run at 'until'.
- *
- * Must be scheduled as a callback.
- */
- void _blacklistSyncSource(const ReplicationExecutor::CallbackData& cbData,
- const HostAndPort& host,
- Date_t until);
-
- /**
- * Removes 'host' from the sync source blacklist. If 'host' isn't found, it's simply
- * ignored and no error is thrown.
- *
- * Must be scheduled as a callback.
- */
- void _unblacklistSyncSource(const ReplicationExecutor::CallbackData& cbData,
- const HostAndPort& host);
-
- /**
- * Determines if a new sync source should be considered.
- *
- * Must be scheduled as a callback.
- */
- void _shouldChangeSyncSource(const ReplicationExecutor::CallbackData& cbData,
- const HostAndPort& currentSource,
- bool* shouldChange);
-
- /**
- * Schedules a request that the given host step down; logs any errors.
- */
- void _requestRemotePrimaryStepdown(const HostAndPort& target);
-
- void _heartbeatStepDownStart();
-
- /**
- * Completes a step-down of the current node triggered by a heartbeat. Must
- * be run with a global shared or global exclusive lock.
- */
- void _heartbeatStepDownFinish(const ReplicationExecutor::CallbackData& cbData);
-
- /**
- * Schedules a replica set config change.
- */
- void _scheduleHeartbeatReconfig(const ReplicaSetConfig& newConfig);
-
- /**
- * Callback that continues a heartbeat-initiated reconfig after a running election
- * completes.
- */
- void _heartbeatReconfigAfterElectionCanceled(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplicaSetConfig& newConfig);
-
- /**
- * Method to write a configuration transmitted via heartbeat message to stable storage.
- */
- void _heartbeatReconfigStore(const ReplicaSetConfig& newConfig);
-
- /**
- * Conclusion actions of a heartbeat-triggered reconfiguration.
- */
- void _heartbeatReconfigFinish(const ReplicationExecutor::CallbackData& cbData,
- const ReplicaSetConfig& newConfig,
- StatusWith<int> myIndex);
-
- /**
- * Utility method that schedules or performs actions specified by a HeartbeatResponseAction
- * returned by a TopologyCoordinator::processHeartbeatResponse call with the given
- * value of "responseStatus".
- */
- void _handleHeartbeatResponseAction(
- const HeartbeatResponseAction& action,
- const StatusWith<ReplSetHeartbeatResponse>& responseStatus);
-
- /**
- * Bottom half of processHeartbeat(), which runs in the replication executor.
- */
- void _processHeartbeatFinish(const ReplicationExecutor::CallbackData& cbData,
- const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response,
- Status* outStatus);
-
- void _summarizeAsHtml_finish(const ReplicationExecutor::CallbackData& cbData,
- ReplSetHtmlSummary* output);
-
- //
- // All member variables are labeled with one of the following codes indicating the
- // synchronization rules for accessing them.
- //
- // (R) Read-only in concurrent operation; no synchronization required.
- // (S) Self-synchronizing; access in any way from any context.
- // (PS) Pointer is read-only in concurrent operation, item pointed to is self-synchronizing;
- // Access in any context.
- // (M) Reads and writes guarded by _mutex
- // (X) Reads and writes must be performed in a callback in _replExecutor
- // (MX) Must hold _mutex and be in a callback in _replExecutor to write; must either hold
- // _mutex or be in a callback in _replExecutor to read.
- // (GX) Readable under a global intent lock. Must either hold global lock in exclusive
- // mode (MODE_X) or both hold global lock in shared mode (MODE_S) and be in executor
- // context to write.
- // (I) Independently synchronized, see member variable comment.
-
- // Protects member data of this ReplicationCoordinator.
- mutable boost::mutex _mutex; // (S)
-
- // Handles to actively queued heartbeats.
- HeartbeatHandles _heartbeatHandles; // (X)
-
- // When this node does not know itself to be a member of a config, it adds
- // every host that sends it a heartbeat request to this set, and also starts
- // sending heartbeat requests to that host. This set is cleared whenever
- // a node discovers that it is a member of a config.
- unordered_set<HostAndPort> _seedList; // (X)
-
- // Parsed command line arguments related to replication.
- const ReplSettings _settings; // (R)
-
- // Mode of replication specified by _settings.
- const Mode _replMode; // (R)
-
- // Pointer to the TopologyCoordinator owned by this ReplicationCoordinator.
- boost::scoped_ptr<TopologyCoordinator> _topCoord; // (X)
-
- // Executor that drives the topology coordinator.
- ReplicationExecutor _replExecutor; // (S)
-
- // Pointer to the ReplicationCoordinatorExternalState owned by this ReplicationCoordinator.
- boost::scoped_ptr<ReplicationCoordinatorExternalState> _externalState; // (PS)
-
- // Thread that drives actions in the topology coordinator
- // Set in startReplication() and thereafter accessed in shutdown.
- boost::scoped_ptr<boost::thread> _topCoordDriverThread; // (I)
-
- // Thread that is used to write new configs received via a heartbeat reconfig
- // to stable storage. It is an error to change this if _inShutdown is true.
- boost::scoped_ptr<boost::thread> _heartbeatReconfigThread; // (M)
-
- // Our RID, used to identify us to our sync source when sending replication progress
- // updates upstream. Set once in startReplication() and then never modified again.
- OID _myRID; // (M)
-
- // Rollback ID. Used to check if a rollback happened during some interval of time
- // TODO: ideally this should only change on rollbacks NOT on mongod restarts also.
- int _rbid; // (M)
-
- // list of information about clients waiting on replication. Does *not* own the
- // WaiterInfos.
- std::vector<WaiterInfo*> _replicationWaiterList; // (M)
-
- // Set to true when we are in the process of shutting down replication.
- bool _inShutdown; // (M)
-
- // Election ID of the last election that resulted in this node becoming primary.
- OID _electionId; // (M)
-
- // Vector containing known information about each member (such as replication
- // progress and member ID) in our replica set or each member replicating from
- // us in a master-slave deployment. In master/slave, the first entry is
- // guaranteed to correspond to ourself. In replica sets where we don't have a
- // valid config or are in state REMOVED then the vector will be a single element
- // just with info about ourself. In replica sets with a valid config the elements
- // will be in the same order as the members in the replica set config, thus
- // the entry for ourself will be at _thisMemberConfigIndex.
- SlaveInfoVector _slaveInfo; // (M)
-
- // Current ReplicaSet state.
- MemberState _memberState; // (MX)
-
- // True if we are waiting for the applier to finish draining.
- bool _isWaitingForDrainToComplete; // (M)
-
- // Used to signal threads waiting for changes to _rsConfigState.
- boost::condition_variable _rsConfigStateChange; // (M)
-
- // Represents the configuration state of the coordinator, which controls how and when
- // _rsConfig may change. See the state transition diagram in the type definition of
- // ConfigState for details.
- ConfigState _rsConfigState; // (M)
-
- // The current ReplicaSet configuration object, including the information about tag groups
- // that is used to satisfy write concern requests with named gle modes.
- ReplicaSetConfig _rsConfig; // (MX)
-
- // This member's index position in the current config.
- int _selfIndex; // (MX)
-
- // Vector of events that should be signaled whenever new heartbeat data comes in.
- std::vector<ReplicationExecutor::EventHandle> _stepDownWaiters; // (X)
-
- // State for conducting an election of this node.
- // the presence of a non-null _freshnessChecker pointer indicates that an election is
- // currently in progress. Only one election is allowed at once.
- boost::scoped_ptr<FreshnessChecker> _freshnessChecker; // (X)
-
- boost::scoped_ptr<ElectCmdRunner> _electCmdRunner; // (X)
+ // Struct that holds information about clients waiting for replication.
+ struct WaiterInfo;
+
+ // Struct that holds information about nodes in this replication group, mainly used for
+ // tracking replication progress for write concern satisfaction.
+ struct SlaveInfo {
+ OpTime opTime; // Our last known OpTime that this slave has replicated to.
+ HostAndPort hostAndPort; // Client address of the slave.
+ int memberId; // Id of the node in the replica set config, or -1 if we're not a replSet.
+ OID rid; // RID of the node.
+ bool self; // Whether this SlaveInfo stores the information about ourself
+ SlaveInfo() : memberId(-1), self(false) {}
+ };
- // Event that the election code will signal when the in-progress election completes.
- // Unspecified value when _freshnessChecker is NULL.
- ReplicationExecutor::EventHandle _electionFinishedEvent; // (X)
+ typedef std::vector<SlaveInfo> SlaveInfoVector;
+
+ typedef std::vector<ReplicationExecutor::CallbackHandle> HeartbeatHandles;
+
+ /**
+ * Looks up the SlaveInfo in _slaveInfo associated with the given RID and returns a pointer
+ * to it, or returns NULL if there is no SlaveInfo with the given RID.
+ */
+ SlaveInfo* _findSlaveInfoByRID_inlock(const OID& rid);
+
+ /**
+ * Looks up the SlaveInfo in _slaveInfo associated with the given member ID and returns a
+ * pointer to it, or returns NULL if there is no SlaveInfo with the given member ID.
+ */
+ SlaveInfo* _findSlaveInfoByMemberID_inlock(int memberID);
+
+ /**
+ * Adds the given SlaveInfo to _slaveInfo and wakes up any threads waiting for replication
+ * that now have their write concern satisfied. Only valid to call in master/slave setups.
+ */
+ void _addSlaveInfo_inlock(const SlaveInfo& slaveInfo);
+
+ /**
+ * Updates the item in _slaveInfo pointed to by 'slaveInfo' with the given OpTime 'ts'
+ * and wakes up any threads waiting for replication that now have their write concern
+ * satisfied.
+ */
+ void _updateSlaveInfoOptime_inlock(SlaveInfo* slaveInfo, OpTime ts);
+
+ /**
+ * Returns the index into _slaveInfo where data corresponding to ourself is stored.
+ * For more info on the rules about how we know where our entry is, see the comment for
+ * _slaveInfo.
+ */
+ size_t _getMyIndexInSlaveInfo_inlock() const;
+
+ /**
+ * Helper method that removes entries from _slaveInfo if they correspond to a node
+ * with a member ID that is not in the current replica set config. Will always leave an
+ * entry for ourself at the beginning of _slaveInfo, even if we aren't present in the
+ * config.
+ */
+ void _updateSlaveInfoFromConfig_inlock();
+
+ /**
+ * Helper to update our saved config, cancel any pending heartbeats, and kick off sending
+ * new heartbeats based on the new config. Must *only* be called from within the
+ * ReplicationExecutor context.
+ *
+ * Returns an action to be performed after unlocking _mutex, via
+ * _performPostMemberStateUpdateAction.
+ */
+ PostMemberStateUpdateAction _setCurrentRSConfig_inlock(const ReplicaSetConfig& newConfig,
+ int myIndex);
+
+ /**
+ * Helper to wake waiters in _replicationWaiterList that are doneWaitingForReplication.
+ */
+ void _wakeReadyWaiters_inlock();
+
+ /**
+ * Helper method for setting/unsetting maintenance mode. Scheduled by setMaintenanceMode()
+ * to run in a global write lock in the replication executor thread.
+ */
+ void _setMaintenanceMode_helper(const ReplicationExecutor::CallbackData& cbData,
+ bool activate,
+ Status* result);
+
+ /**
+ * Helper method for retrieving maintenance mode. Scheduled by getMaintenanceMode() to run
+ * in the replication executor thread.
+ */
+ void _getMaintenanceMode_helper(const ReplicationExecutor::CallbackData& cbData,
+ bool* maintenanceMode);
+
+ /**
+ * Bottom half of fillIsMasterForReplSet.
+ */
+ void _fillIsMasterForReplSet_finish(const ReplicationExecutor::CallbackData& cbData,
+ IsMasterResponse* result);
+
+ /**
+ * Bottom half of processReplSetFresh.
+ */
+ void _processReplSetFresh_finish(const ReplicationExecutor::CallbackData& cbData,
+ const ReplSetFreshArgs& args,
+ BSONObjBuilder* response,
+ Status* result);
+
+ /**
+ * Bottom half of processReplSetElect.
+ */
+ void _processReplSetElect_finish(const ReplicationExecutor::CallbackData& cbData,
+ const ReplSetElectArgs& args,
+ BSONObjBuilder* response,
+ Status* result);
+
+ /**
+ * Bottom half of processReplSetFreeze.
+ */
+ void _processReplSetFreeze_finish(const ReplicationExecutor::CallbackData& cbData,
+ int secs,
+ BSONObjBuilder* response,
+ Status* result);
+ /*
+ * Bottom half of clearSyncSourceBlacklist
+ */
+ void _clearSyncSourceBlacklist_finish(const ReplicationExecutor::CallbackData& cbData);
+
+ /**
+ * Scheduled to cause the ReplicationCoordinator to reconsider any state that might
+ * need to change as a result of time passing - for instance becoming PRIMARY when a single
+ * node replica set member's stepDown period ends.
+ */
+ void _handleTimePassing(const ReplicationExecutor::CallbackData& cbData);
+
+ /**
+ * Helper method for _awaitReplication that takes an already locked unique_lock and a
+ * Timer for timing the operation which has been counting since before the lock was
+ * acquired.
+ */
+ ReplicationCoordinator::StatusAndDuration _awaitReplication_inlock(
+ const Timer* timer,
+ boost::unique_lock<boost::mutex>* lock,
+ const OperationContext* txn,
+ const OpTime& ts,
+ const WriteConcernOptions& writeConcern);
+
+ /*
+ * Returns true if the given writeConcern is satisfied up to "optime" or is unsatisfiable.
+ */
+ bool _doneWaitingForReplication_inlock(const OpTime& opTime,
+ const WriteConcernOptions& writeConcern);
+
+ /**
+ * Helper for _doneWaitingForReplication_inlock that takes an integer write concern.
+ */
+ bool _haveNumNodesReachedOpTime_inlock(const OpTime& opTime, int numNodes);
+
+ /**
+ * Helper for _doneWaitingForReplication_inlock that takes a tag pattern representing a
+ * named write concern mode.
+ */
+ bool _haveTaggedNodesReachedOpTime_inlock(const OpTime& opTime,
+ const ReplicaSetTagPattern& tagPattern);
+
+ Status _checkIfWriteConcernCanBeSatisfied_inlock(const WriteConcernOptions& writeConcern) const;
+
+ /**
+ * Triggers all callbacks that are blocked waiting for new heartbeat data
+ * to decide whether or not to finish a step down.
+ * Should only be called from executor callbacks.
+ */
+ void _signalStepDownWaitersFromCallback(const ReplicationExecutor::CallbackData& cbData);
+ void _signalStepDownWaiters();
+
+ /**
+ * Helper for stepDown run within a ReplicationExecutor callback. This method assumes
+ * it is running within a global shared lock, and thus that no writes are going on at the
+ * same time.
+ */
+ void _stepDownContinue(const ReplicationExecutor::CallbackData& cbData,
+ const ReplicationExecutor::EventHandle finishedEvent,
+ OperationContext* txn,
+ Date_t waitUntil,
+ Date_t stepdownUntil,
+ bool force,
+ Status* result);
+
+ OID _getMyRID_inlock() const;
+
+ int _getMyId_inlock() const;
+
+ OpTime _getMyLastOptime_inlock() const;
+
+
+ /**
+ * Bottom half of setFollowerMode.
+ *
+ * May reschedule itself after the current election, so it is not sufficient to
+ * wait for a callback scheduled to execute this method to complete. Instead,
+ * supply an event, "finishedSettingFollowerMode", and wait for that event to
+ * be signaled. Do not observe "*success" until after the event is signaled.
+ */
+ void _setFollowerModeFinish(const ReplicationExecutor::CallbackData& cbData,
+ const MemberState& newState,
+ const ReplicationExecutor::EventHandle& finishedSettingFollowerMode,
+ bool* success);
+
+ /**
+ * Helper method for updating our tracking of the last optime applied by a given node.
+ * This is only valid to call on replica sets.
+ */
+ Status _setLastOptime_inlock(const UpdatePositionArgs::UpdateInfo& args);
+
+ /**
+ * Helper method for setMyLastOptime that takes in a unique lock on
+ * _mutex. The passed in lock must already be locked. It is unspecified what state the
+ * lock will be in after this method finishes.
+ *
+ * This function has the same rules for "ts" as setMyLastOptime(), unless
+ * "isRollbackAllowed" is true.
+ */
+ void _setMyLastOptime_inlock(boost::unique_lock<boost::mutex>* lock,
+ const OpTime& ts,
+ bool isRollbackAllowed);
+
+ /**
+ * Schedules a heartbeat to be sent to "target" at "when". "targetIndex" is the index
+ * into the replica set config members array that corresponds to the "target", or -1 if
+ * "target" is not in _rsConfig.
+ */
+ void _scheduleHeartbeatToTarget(const HostAndPort& target, int targetIndex, Date_t when);
+
+ /**
+ * Processes each heartbeat response.
+ *
+ * Schedules additional heartbeats, triggers elections and step downs, etc.
+ */
+ void _handleHeartbeatResponse(const ReplicationExecutor::RemoteCommandCallbackData& cbData,
+ int targetIndex);
+
+ void _trackHeartbeatHandle(const StatusWith<ReplicationExecutor::CallbackHandle>& handle);
+
+ void _untrackHeartbeatHandle(const ReplicationExecutor::CallbackHandle& handle);
+
+ /**
+ * Helper for _handleHeartbeatResponse.
+ *
+ * Updates the optime associated with the member at "memberIndex" in our config.
+ */
+ void _updateOpTimeFromHeartbeat_inlock(int memberIndex, OpTime optime);
+
+ /**
+ * Starts a heartbeat for each member in the current config. Called within the executor
+ * context.
+ */
+ void _startHeartbeats();
+
+ /**
+ * Cancels all heartbeats. Called within executor context.
+ */
+ void _cancelHeartbeats();
+
+ /**
+ * Asynchronously sends a heartbeat to "target". "targetIndex" is the index
+ * into the replica set config members array that corresponds to the "target", or -1 if
+ * we don't have a valid replica set config.
+ *
+ * Scheduled by _scheduleHeartbeatToTarget.
+ */
+ void _doMemberHeartbeat(ReplicationExecutor::CallbackData cbData,
+ const HostAndPort& target,
+ int targetIndex);
+
+
+ MemberState _getMemberState_inlock() const;
+
+ /**
+ * Returns the current replication mode. This method requires the caller to be holding
+ * "_mutex" to be called safely.
+ */
+ Mode _getReplicationMode_inlock() const;
+
+ /**
+ * Starts loading the replication configuration from local storage, and if it is valid,
+ * schedules a callback (of _finishLoadLocalConfig) to set it as the current replica set
+ * config (sets _rsConfig and _thisMembersConfigIndex).
+ * Returns true if it finishes loading the local config, which most likely means there
+ * was no local config at all or it was invalid in some way, and false if there was a valid
+ * config detected but more work is needed to set it as the local config (which will be
+ * handled by the callback to _finishLoadLocalConfig).
+ */
+ bool _startLoadLocalConfig(OperationContext* txn);
+
+ /**
+ * Callback that finishes the work started in _startLoadLocalConfig and sets _rsConfigState
+ * to kConfigSteady, so that we can begin processing heartbeats and reconfigs.
+ */
+ void _finishLoadLocalConfig(const ReplicationExecutor::CallbackData& cbData,
+ const ReplicaSetConfig& localConfig,
+ const StatusWith<OpTime>& lastOpTimeStatus);
+
+ /**
+ * Callback that finishes the work of processReplSetInitiate() inside the replication
+ * executor context, in the event of a successful quorum check.
+ */
+ void _finishReplSetInitiate(const ReplicationExecutor::CallbackData& cbData,
+ const ReplicaSetConfig& newConfig,
+ int myIndex);
+
+ /**
+ * Callback that finishes the work of processReplSetReconfig inside the replication
+ * executor context, in the event of a successful quorum check.
+ */
+ void _finishReplSetReconfig(const ReplicationExecutor::CallbackData& cbData,
+ const ReplicaSetConfig& newConfig,
+ int myIndex);
+
+ /**
+ * Changes _rsConfigState to newState, and notify any waiters.
+ */
+ void _setConfigState_inlock(ConfigState newState);
+
+ /**
+ * Updates the cached value, _memberState, to match _topCoord's reported
+ * member state, from getMemberState().
+ *
+ * Returns an enum indicating what action to take after releasing _mutex, if any.
+ * Call performPostMemberStateUpdateAction on the return value after releasing
+ * _mutex.
+ */
+ PostMemberStateUpdateAction _updateMemberStateFromTopologyCoordinator_inlock();
+
+ /**
+ * Performs a post member-state update action. Do not call while holding _mutex.
+ */
+ void _performPostMemberStateUpdateAction(PostMemberStateUpdateAction action);
+
+ /**
+ * Begins an attempt to elect this node.
+ * Called after an incoming heartbeat changes this node's view of the set such that it
+ * believes it can be elected PRIMARY.
+ * For proper concurrency, must be called via a ReplicationExecutor callback.
+ */
+ void _startElectSelf();
+
+ /**
+ * Callback called when the FreshnessChecker has completed; checks the results and
+ * decides whether to continue election proceedings.
+ * finishEvh is an event that is signaled when election is complete.
+ **/
+ void _onFreshnessCheckComplete();
+
+ /**
+ * Callback called when the ElectCmdRunner has completed; checks the results and
+ * decides whether to complete the election and change state to primary.
+ * finishEvh is an event that is signaled when election is complete.
+ **/
+ void _onElectCmdRunnerComplete();
+
+ /**
+ * Callback called after a random delay, to prevent repeated election ties.
+ */
+ void _recoverFromElectionTie(const ReplicationExecutor::CallbackData& cbData);
+
+ /**
+ * Chooses a new sync source. Must be scheduled as a callback.
+ *
+ * Calls into the Topology Coordinator, which uses its current view of the set to choose
+ * the most appropriate sync source.
+ */
+ void _chooseNewSyncSource(const ReplicationExecutor::CallbackData& cbData,
+ const OpTime& lastOpTimeFetched,
+ HostAndPort* newSyncSource);
+
+ /**
+ * Adds 'host' to the sync source blacklist until 'until'. A blacklisted source cannot
+ * be chosen as a sync source. Schedules a callback to unblacklist the sync source to be
+ * run at 'until'.
+ *
+ * Must be scheduled as a callback.
+ */
+ void _blacklistSyncSource(const ReplicationExecutor::CallbackData& cbData,
+ const HostAndPort& host,
+ Date_t until);
+
+ /**
+ * Removes 'host' from the sync source blacklist. If 'host' isn't found, it's simply
+ * ignored and no error is thrown.
+ *
+ * Must be scheduled as a callback.
+ */
+ void _unblacklistSyncSource(const ReplicationExecutor::CallbackData& cbData,
+ const HostAndPort& host);
+
+ /**
+ * Determines if a new sync source should be considered.
+ *
+ * Must be scheduled as a callback.
+ */
+ void _shouldChangeSyncSource(const ReplicationExecutor::CallbackData& cbData,
+ const HostAndPort& currentSource,
+ bool* shouldChange);
+
+ /**
+ * Schedules a request that the given host step down; logs any errors.
+ */
+ void _requestRemotePrimaryStepdown(const HostAndPort& target);
+
+ void _heartbeatStepDownStart();
+
+ /**
+ * Completes a step-down of the current node triggered by a heartbeat. Must
+ * be run with a global shared or global exclusive lock.
+ */
+ void _heartbeatStepDownFinish(const ReplicationExecutor::CallbackData& cbData);
+
+ /**
+ * Schedules a replica set config change.
+ */
+ void _scheduleHeartbeatReconfig(const ReplicaSetConfig& newConfig);
+
+ /**
+ * Callback that continues a heartbeat-initiated reconfig after a running election
+ * completes.
+ */
+ void _heartbeatReconfigAfterElectionCanceled(const ReplicationExecutor::CallbackData& cbData,
+ const ReplicaSetConfig& newConfig);
+
+ /**
+ * Method to write a configuration transmitted via heartbeat message to stable storage.
+ */
+ void _heartbeatReconfigStore(const ReplicaSetConfig& newConfig);
+
+ /**
+ * Conclusion actions of a heartbeat-triggered reconfiguration.
+ */
+ void _heartbeatReconfigFinish(const ReplicationExecutor::CallbackData& cbData,
+ const ReplicaSetConfig& newConfig,
+ StatusWith<int> myIndex);
+
+ /**
+ * Utility method that schedules or performs actions specified by a HeartbeatResponseAction
+ * returned by a TopologyCoordinator::processHeartbeatResponse call with the given
+ * value of "responseStatus".
+ */
+ void _handleHeartbeatResponseAction(const HeartbeatResponseAction& action,
+ const StatusWith<ReplSetHeartbeatResponse>& responseStatus);
+
+ /**
+ * Bottom half of processHeartbeat(), which runs in the replication executor.
+ */
+ void _processHeartbeatFinish(const ReplicationExecutor::CallbackData& cbData,
+ const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response,
+ Status* outStatus);
+
+ void _summarizeAsHtml_finish(const ReplicationExecutor::CallbackData& cbData,
+ ReplSetHtmlSummary* output);
+
+ //
+ // All member variables are labeled with one of the following codes indicating the
+ // synchronization rules for accessing them.
+ //
+ // (R) Read-only in concurrent operation; no synchronization required.
+ // (S) Self-synchronizing; access in any way from any context.
+ // (PS) Pointer is read-only in concurrent operation, item pointed to is self-synchronizing;
+ // Access in any context.
+ // (M) Reads and writes guarded by _mutex
+ // (X) Reads and writes must be performed in a callback in _replExecutor
+ // (MX) Must hold _mutex and be in a callback in _replExecutor to write; must either hold
+ // _mutex or be in a callback in _replExecutor to read.
+ // (GX) Readable under a global intent lock. Must either hold global lock in exclusive
+ // mode (MODE_X) or both hold global lock in shared mode (MODE_S) and be in executor
+ // context to write.
+ // (I) Independently synchronized, see member variable comment.
+
+ // Protects member data of this ReplicationCoordinator.
+ mutable boost::mutex _mutex; // (S)
+
+ // Handles to actively queued heartbeats.
+ HeartbeatHandles _heartbeatHandles; // (X)
+
+ // When this node does not know itself to be a member of a config, it adds
+ // every host that sends it a heartbeat request to this set, and also starts
+ // sending heartbeat requests to that host. This set is cleared whenever
+ // a node discovers that it is a member of a config.
+ unordered_set<HostAndPort> _seedList; // (X)
+
+ // Parsed command line arguments related to replication.
+ const ReplSettings _settings; // (R)
+
+ // Mode of replication specified by _settings.
+ const Mode _replMode; // (R)
+
+ // Pointer to the TopologyCoordinator owned by this ReplicationCoordinator.
+ boost::scoped_ptr<TopologyCoordinator> _topCoord; // (X)
+
+ // Executor that drives the topology coordinator.
+ ReplicationExecutor _replExecutor; // (S)
+
+ // Pointer to the ReplicationCoordinatorExternalState owned by this ReplicationCoordinator.
+ boost::scoped_ptr<ReplicationCoordinatorExternalState> _externalState; // (PS)
+
+ // Thread that drives actions in the topology coordinator
+ // Set in startReplication() and thereafter accessed in shutdown.
+ boost::scoped_ptr<boost::thread> _topCoordDriverThread; // (I)
+
+ // Thread that is used to write new configs received via a heartbeat reconfig
+ // to stable storage. It is an error to change this if _inShutdown is true.
+ boost::scoped_ptr<boost::thread> _heartbeatReconfigThread; // (M)
+
+ // Our RID, used to identify us to our sync source when sending replication progress
+ // updates upstream. Set once in startReplication() and then never modified again.
+ OID _myRID; // (M)
+
+ // Rollback ID. Used to check if a rollback happened during some interval of time
+ // TODO: ideally this should only change on rollbacks NOT on mongod restarts also.
+ int _rbid; // (M)
+
+ // list of information about clients waiting on replication. Does *not* own the
+ // WaiterInfos.
+ std::vector<WaiterInfo*> _replicationWaiterList; // (M)
+
+ // Set to true when we are in the process of shutting down replication.
+ bool _inShutdown; // (M)
+
+ // Election ID of the last election that resulted in this node becoming primary.
+ OID _electionId; // (M)
+
+ // Vector containing known information about each member (such as replication
+ // progress and member ID) in our replica set or each member replicating from
+ // us in a master-slave deployment. In master/slave, the first entry is
+ // guaranteed to correspond to ourself. In replica sets where we don't have a
+ // valid config or are in state REMOVED then the vector will be a single element
+ // just with info about ourself. In replica sets with a valid config the elements
+ // will be in the same order as the members in the replica set config, thus
+ // the entry for ourself will be at _thisMemberConfigIndex.
+ SlaveInfoVector _slaveInfo; // (M)
+
+ // Current ReplicaSet state.
+ MemberState _memberState; // (MX)
+
+ // True if we are waiting for the applier to finish draining.
+ bool _isWaitingForDrainToComplete; // (M)
+
+ // Used to signal threads waiting for changes to _rsConfigState.
+ boost::condition_variable _rsConfigStateChange; // (M)
+
+ // Represents the configuration state of the coordinator, which controls how and when
+ // _rsConfig may change. See the state transition diagram in the type definition of
+ // ConfigState for details.
+ ConfigState _rsConfigState; // (M)
+
+ // The current ReplicaSet configuration object, including the information about tag groups
+ // that is used to satisfy write concern requests with named gle modes.
+ ReplicaSetConfig _rsConfig; // (MX)
+
+ // This member's index position in the current config.
+ int _selfIndex; // (MX)
+
+ // Vector of events that should be signaled whenever new heartbeat data comes in.
+ std::vector<ReplicationExecutor::EventHandle> _stepDownWaiters; // (X)
+
+ // State for conducting an election of this node.
+ // the presence of a non-null _freshnessChecker pointer indicates that an election is
+ // currently in progress. Only one election is allowed at once.
+ boost::scoped_ptr<FreshnessChecker> _freshnessChecker; // (X)
+
+ boost::scoped_ptr<ElectCmdRunner> _electCmdRunner; // (X)
- // Whether we slept last time we attempted an election but possibly tied with other nodes.
- bool _sleptLastElection; // (X)
+ // Event that the election code will signal when the in-progress election completes.
+ // Unspecified value when _freshnessChecker is NULL.
+ ReplicationExecutor::EventHandle _electionFinishedEvent; // (X)
- // Flag that indicates whether writes to databases other than "local" are allowed. Used to
- // answer the canAcceptWritesForDatabase() question. Always true for standalone nodes and
- // masters in master-slave relationships.
- bool _canAcceptNonLocalWrites; // (GX)
+ // Whether we slept last time we attempted an election but possibly tied with other nodes.
+ bool _sleptLastElection; // (X)
- // Flag that indicates whether reads from databases other than "local" are allowed. Unlike
- // _canAcceptNonLocalWrites, above, this question is about admission control on secondaries,
- // and we do not require that its observers be strongly synchronized. Accidentally
- // providing the prior value for a limited period of time is acceptable. Also unlike
- // _canAcceptNonLocalWrites, its value is only meaningful on replica set secondaries.
- AtomicUInt32 _canServeNonLocalReads; // (S)
- };
+ // Flag that indicates whether writes to databases other than "local" are allowed. Used to
+ // answer the canAcceptWritesForDatabase() question. Always true for standalone nodes and
+ // masters in master-slave relationships.
+ bool _canAcceptNonLocalWrites; // (GX)
+
+ // Flag that indicates whether reads from databases other than "local" are allowed. Unlike
+ // _canAcceptNonLocalWrites, above, this question is about admission control on secondaries,
+ // and we do not require that its observers be strongly synchronized. Accidentally
+ // providing the prior value for a limited period of time is acceptable. Also unlike
+ // _canAcceptNonLocalWrites, its value is only meaningful on replica set secondaries.
+ AtomicUInt32 _canServeNonLocalReads; // (S)
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
index aa4618210b5..cb34f89c2d7 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
@@ -42,54 +42,55 @@ namespace mongo {
namespace repl {
namespace {
- class LoseElectionGuard {
- MONGO_DISALLOW_COPYING(LoseElectionGuard);
- public:
- LoseElectionGuard(
- TopologyCoordinator* topCoord,
- ReplicationExecutor* executor,
- boost::scoped_ptr<FreshnessChecker>* freshnessChecker,
- boost::scoped_ptr<ElectCmdRunner>* electCmdRunner,
- ReplicationExecutor::EventHandle* electionFinishedEvent)
- : _topCoord(topCoord),
- _executor(executor),
- _freshnessChecker(freshnessChecker),
- _electCmdRunner(electCmdRunner),
- _electionFinishedEvent(electionFinishedEvent),
- _dismissed(false) {
+class LoseElectionGuard {
+ MONGO_DISALLOW_COPYING(LoseElectionGuard);
+
+public:
+ LoseElectionGuard(TopologyCoordinator* topCoord,
+ ReplicationExecutor* executor,
+ boost::scoped_ptr<FreshnessChecker>* freshnessChecker,
+ boost::scoped_ptr<ElectCmdRunner>* electCmdRunner,
+ ReplicationExecutor::EventHandle* electionFinishedEvent)
+ : _topCoord(topCoord),
+ _executor(executor),
+ _freshnessChecker(freshnessChecker),
+ _electCmdRunner(electCmdRunner),
+ _electionFinishedEvent(electionFinishedEvent),
+ _dismissed(false) {}
+
+ ~LoseElectionGuard() {
+ if (_dismissed) {
+ return;
}
-
- ~LoseElectionGuard() {
- if (_dismissed) {
- return;
- }
- _topCoord->processLoseElection();
- _freshnessChecker->reset(NULL);
- _electCmdRunner->reset(NULL);
- if (_electionFinishedEvent->isValid()) {
- _executor->signalEvent(*_electionFinishedEvent);
- }
+ _topCoord->processLoseElection();
+ _freshnessChecker->reset(NULL);
+ _electCmdRunner->reset(NULL);
+ if (_electionFinishedEvent->isValid()) {
+ _executor->signalEvent(*_electionFinishedEvent);
}
+ }
- void dismiss() { _dismissed = true; }
+ void dismiss() {
+ _dismissed = true;
+ }
- private:
- TopologyCoordinator* const _topCoord;
- ReplicationExecutor* const _executor;
- boost::scoped_ptr<FreshnessChecker>* const _freshnessChecker;
- boost::scoped_ptr<ElectCmdRunner>* const _electCmdRunner;
- const ReplicationExecutor::EventHandle* _electionFinishedEvent;
- bool _dismissed;
- };
+private:
+ TopologyCoordinator* const _topCoord;
+ ReplicationExecutor* const _executor;
+ boost::scoped_ptr<FreshnessChecker>* const _freshnessChecker;
+ boost::scoped_ptr<ElectCmdRunner>* const _electCmdRunner;
+ const ReplicationExecutor::EventHandle* _electionFinishedEvent;
+ bool _dismissed;
+};
} // namespace
- void ReplicationCoordinatorImpl::_startElectSelf() {
- invariant(!_freshnessChecker);
- invariant(!_electCmdRunner);
+void ReplicationCoordinatorImpl::_startElectSelf() {
+ invariant(!_freshnessChecker);
+ invariant(!_electCmdRunner);
- boost::unique_lock<boost::mutex> lk(_mutex);
- switch (_rsConfigState) {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ switch (_rsConfigState) {
case kConfigSteady:
break;
case kConfigInitiating:
@@ -100,183 +101,183 @@ namespace {
_topCoord->processLoseElection();
return;
default:
- severe() << "Entered replica set election code while in illegal config state " <<
- int(_rsConfigState);
+ severe() << "Entered replica set election code while in illegal config state "
+ << int(_rsConfigState);
fassertFailed(18913);
- }
+ }
- log() << "Standing for election";
- const StatusWith<ReplicationExecutor::EventHandle> finishEvh = _replExecutor.makeEvent();
- if (finishEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(18680, finishEvh.getStatus());
- _electionFinishedEvent = finishEvh.getValue();
- LoseElectionGuard lossGuard(_topCoord.get(),
- &_replExecutor,
- &_freshnessChecker,
- &_electCmdRunner,
- &_electionFinishedEvent);
+ log() << "Standing for election";
+ const StatusWith<ReplicationExecutor::EventHandle> finishEvh = _replExecutor.makeEvent();
+ if (finishEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(18680, finishEvh.getStatus());
+ _electionFinishedEvent = finishEvh.getValue();
+ LoseElectionGuard lossGuard(_topCoord.get(),
+ &_replExecutor,
+ &_freshnessChecker,
+ &_electCmdRunner,
+ &_electionFinishedEvent);
+
+
+ invariant(_rsConfig.getMemberAt(_selfIndex).isElectable());
+ OpTime lastOpTimeApplied(_getMyLastOptime_inlock());
+
+ if (lastOpTimeApplied == OpTime()) {
+ log() << "replSet info not trying to elect self, "
+ "do not yet have a complete set of data from any point in time";
+ return;
+ }
+ _freshnessChecker.reset(new FreshnessChecker);
+
+ // This is necessary because the freshnessChecker may call directly into winning an
+ // election, if there are no other MaybeUp nodes. Winning an election attempts to lock
+ // _mutex again.
+ lk.unlock();
+
+ StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _freshnessChecker->start(
+ &_replExecutor,
+ lastOpTimeApplied,
+ _rsConfig,
+ _selfIndex,
+ _topCoord->getMaybeUpHostAndPorts(),
+ stdx::bind(&ReplicationCoordinatorImpl::_onFreshnessCheckComplete, this));
+ if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(18681, nextPhaseEvh.getStatus());
+ lossGuard.dismiss();
+}
+
+void ReplicationCoordinatorImpl::_onFreshnessCheckComplete() {
+ invariant(_freshnessChecker);
+ invariant(!_electCmdRunner);
+ LoseElectionGuard lossGuard(_topCoord.get(),
+ &_replExecutor,
+ &_freshnessChecker,
+ &_electCmdRunner,
+ &_electionFinishedEvent);
+
+ if (_freshnessChecker->isCanceled()) {
+ LOG(2) << "Election canceled during freshness check phase";
+ return;
+ }
- invariant(_rsConfig.getMemberAt(_selfIndex).isElectable());
- OpTime lastOpTimeApplied(_getMyLastOptime_inlock());
+ const Date_t now(_replExecutor.now());
+ const FreshnessChecker::ElectionAbortReason abortReason =
+ _freshnessChecker->shouldAbortElection();
- if (lastOpTimeApplied == OpTime()) {
- log() << "replSet info not trying to elect self, "
- "do not yet have a complete set of data from any point in time";
+ // need to not sleep after last time sleeping,
+ switch (abortReason) {
+ case FreshnessChecker::None:
+ break;
+ case FreshnessChecker::FreshnessTie:
+ if ((_selfIndex != 0) && !_sleptLastElection) {
+ const long long ms = _replExecutor.nextRandomInt64(1000) + 50;
+ const Date_t nextCandidateTime = now + ms;
+ log() << "replSet possible election tie; sleeping " << ms << "ms until "
+ << dateToISOStringLocal(nextCandidateTime);
+ _topCoord->setElectionSleepUntil(nextCandidateTime);
+ _replExecutor.scheduleWorkAt(
+ nextCandidateTime,
+ stdx::bind(&ReplicationCoordinatorImpl::_recoverFromElectionTie,
+ this,
+ stdx::placeholders::_1));
+ _sleptLastElection = true;
+ return;
+ }
+ _sleptLastElection = false;
+ break;
+ case FreshnessChecker::FresherNodeFound:
+ log() << "not electing self, we are not freshest";
return;
- }
-
- _freshnessChecker.reset(new FreshnessChecker);
-
- // This is necessary because the freshnessChecker may call directly into winning an
- // election, if there are no other MaybeUp nodes. Winning an election attempts to lock
- // _mutex again.
- lk.unlock();
-
- StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _freshnessChecker->start(
- &_replExecutor,
- lastOpTimeApplied,
- _rsConfig,
- _selfIndex,
- _topCoord->getMaybeUpHostAndPorts(),
- stdx::bind(&ReplicationCoordinatorImpl::_onFreshnessCheckComplete, this));
- if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ case FreshnessChecker::QuorumUnreachable:
+ log() << "not electing self, we could not contact enough voting members";
+ return;
+ default:
+ log() << "not electing self due to election abort message :"
+ << static_cast<int>(abortReason);
return;
- }
- fassert(18681, nextPhaseEvh.getStatus());
- lossGuard.dismiss();
}
- void ReplicationCoordinatorImpl::_onFreshnessCheckComplete() {
- invariant(_freshnessChecker);
- invariant(!_electCmdRunner);
- LoseElectionGuard lossGuard(_topCoord.get(),
- &_replExecutor,
- &_freshnessChecker,
- &_electCmdRunner,
- &_electionFinishedEvent);
-
- if (_freshnessChecker->isCanceled()) {
- LOG(2) << "Election canceled during freshness check phase";
- return;
- }
+ log() << "replSet info electSelf";
+ // Secure our vote for ourself first
+ if (!_topCoord->voteForMyself(now)) {
+ return;
+ }
- const Date_t now(_replExecutor.now());
- const FreshnessChecker::ElectionAbortReason abortReason =
- _freshnessChecker->shouldAbortElection();
-
- // need to not sleep after last time sleeping,
- switch (abortReason) {
- case FreshnessChecker::None:
- break;
- case FreshnessChecker::FreshnessTie:
- if ((_selfIndex != 0) && !_sleptLastElection) {
- const long long ms = _replExecutor.nextRandomInt64(1000) + 50;
- const Date_t nextCandidateTime = now + ms;
- log() << "replSet possible election tie; sleeping " << ms << "ms until " <<
- dateToISOStringLocal(nextCandidateTime);
- _topCoord->setElectionSleepUntil(nextCandidateTime);
- _replExecutor.scheduleWorkAt(
- nextCandidateTime,
- stdx::bind(&ReplicationCoordinatorImpl::_recoverFromElectionTie,
- this,
- stdx::placeholders::_1));
- _sleptLastElection = true;
- return;
- }
- _sleptLastElection = false;
- break;
- case FreshnessChecker::FresherNodeFound:
- log() << "not electing self, we are not freshest";
- return;
- case FreshnessChecker::QuorumUnreachable:
- log() << "not electing self, we could not contact enough voting members";
- return;
- default:
- log() << "not electing self due to election abort message :"
- << static_cast<int>(abortReason);
- return;
- }
+ _electCmdRunner.reset(new ElectCmdRunner);
+ StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _electCmdRunner->start(
+ &_replExecutor,
+ _rsConfig,
+ _selfIndex,
+ _topCoord->getMaybeUpHostAndPorts(),
+ stdx::bind(&ReplicationCoordinatorImpl::_onElectCmdRunnerComplete, this));
+ if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
+ }
+ fassert(18685, nextPhaseEvh.getStatus());
+ lossGuard.dismiss();
+}
+
+void ReplicationCoordinatorImpl::_onElectCmdRunnerComplete() {
+ LoseElectionGuard lossGuard(_topCoord.get(),
+ &_replExecutor,
+ &_freshnessChecker,
+ &_electCmdRunner,
+ &_electionFinishedEvent);
+
+ invariant(_freshnessChecker);
+ invariant(_electCmdRunner);
+ if (_electCmdRunner->isCanceled()) {
+ LOG(2) << "Election canceled during elect self phase";
+ return;
+ }
- log() << "replSet info electSelf";
- // Secure our vote for ourself first
- if (!_topCoord->voteForMyself(now)) {
- return;
- }
+ const int receivedVotes = _electCmdRunner->getReceivedVotes();
- _electCmdRunner.reset(new ElectCmdRunner);
- StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _electCmdRunner->start(
- &_replExecutor,
- _rsConfig,
- _selfIndex,
- _topCoord->getMaybeUpHostAndPorts(),
- stdx::bind(&ReplicationCoordinatorImpl::_onElectCmdRunnerComplete, this));
- if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(18685, nextPhaseEvh.getStatus());
- lossGuard.dismiss();
+ if (receivedVotes < _rsConfig.getMajorityVoteCount()) {
+ log() << "replSet couldn't elect self, only received " << receivedVotes
+ << " votes, but needed at least " << _rsConfig.getMajorityVoteCount();
+ // Suppress ourselves from standing for election again, giving other nodes a chance
+ // to win their elections.
+ const long long ms = _replExecutor.nextRandomInt64(1000) + 50;
+ const Date_t now(_replExecutor.now());
+ const Date_t nextCandidateTime = now + ms;
+ log() << "waiting until " << nextCandidateTime << " before standing for election again";
+ _topCoord->setElectionSleepUntil(nextCandidateTime);
+ _replExecutor.scheduleWorkAt(
+ nextCandidateTime,
+ stdx::bind(&ReplicationCoordinatorImpl::_recoverFromElectionTie,
+ this,
+ stdx::placeholders::_1));
+ return;
}
- void ReplicationCoordinatorImpl::_onElectCmdRunnerComplete() {
- LoseElectionGuard lossGuard(_topCoord.get(),
- &_replExecutor,
- &_freshnessChecker,
- &_electCmdRunner,
- &_electionFinishedEvent);
-
- invariant(_freshnessChecker);
- invariant(_electCmdRunner);
- if (_electCmdRunner->isCanceled()) {
- LOG(2) << "Election canceled during elect self phase";
- return;
- }
+ if (_rsConfig.getConfigVersion() != _freshnessChecker->getOriginalConfigVersion()) {
+ log() << "replSet config version changed during our election, ignoring result";
+ return;
+ }
- const int receivedVotes = _electCmdRunner->getReceivedVotes();
-
- if (receivedVotes < _rsConfig.getMajorityVoteCount()) {
- log() << "replSet couldn't elect self, only received " << receivedVotes <<
- " votes, but needed at least " << _rsConfig.getMajorityVoteCount();
- // Suppress ourselves from standing for election again, giving other nodes a chance
- // to win their elections.
- const long long ms = _replExecutor.nextRandomInt64(1000) + 50;
- const Date_t now(_replExecutor.now());
- const Date_t nextCandidateTime = now + ms;
- log() << "waiting until " << nextCandidateTime << " before standing for election again";
- _topCoord->setElectionSleepUntil(nextCandidateTime);
- _replExecutor.scheduleWorkAt(
- nextCandidateTime,
- stdx::bind(&ReplicationCoordinatorImpl::_recoverFromElectionTie,
- this,
- stdx::placeholders::_1));
- return;
- }
+ log() << "replSet election succeeded, assuming primary role";
- if (_rsConfig.getConfigVersion() != _freshnessChecker->getOriginalConfigVersion()) {
- log() << "replSet config version changed during our election, ignoring result";
- return;
- }
-
- log() << "replSet election succeeded, assuming primary role";
+ lossGuard.dismiss();
+ _freshnessChecker.reset(NULL);
+ _electCmdRunner.reset(NULL);
+ _performPostMemberStateUpdateAction(kActionWinElection);
+ _replExecutor.signalEvent(_electionFinishedEvent);
+}
- lossGuard.dismiss();
- _freshnessChecker.reset(NULL);
- _electCmdRunner.reset(NULL);
- _performPostMemberStateUpdateAction(kActionWinElection);
- _replExecutor.signalEvent(_electionFinishedEvent);
+void ReplicationCoordinatorImpl::_recoverFromElectionTie(
+ const ReplicationExecutor::CallbackData& cbData) {
+ if (!cbData.status.isOK()) {
+ return;
}
-
- void ReplicationCoordinatorImpl::_recoverFromElectionTie(
- const ReplicationExecutor::CallbackData& cbData) {
- if (!cbData.status.isOK()) {
- return;
- }
- if (_topCoord->checkShouldStandForElection(_replExecutor.now(), getMyLastOptime())) {
- _startElectSelf();
- }
+ if (_topCoord->checkShouldStandForElection(_replExecutor.now(), getMyLastOptime())) {
+ _startElectSelf();
}
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp
index ac36d2802ec..7f005cdb0e3 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp
@@ -48,364 +48,369 @@ namespace mongo {
namespace repl {
namespace {
- typedef ReplicationExecutor::RemoteCommandRequest RemoteCommandRequest;
-
- class ReplCoordElectTest : public ReplCoordTest {
- protected:
- void simulateEnoughHeartbeatsForElectability();
- void simulateFreshEnoughForElectability();
- };
-
- void ReplCoordElectTest::simulateEnoughHeartbeatsForElectability() {
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- for (int i = 0; i < rsConfig.getNumMembers() - 1; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgs hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(rsConfig.getReplSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setVersion(rsConfig.getConfigVersion());
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
+typedef ReplicationExecutor::RemoteCommandRequest RemoteCommandRequest;
+
+class ReplCoordElectTest : public ReplCoordTest {
+protected:
+ void simulateEnoughHeartbeatsForElectability();
+ void simulateFreshEnoughForElectability();
+};
+
+void ReplCoordElectTest::simulateEnoughHeartbeatsForElectability() {
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ for (int i = 0; i < rsConfig.getNumMembers() - 1; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgs hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(rsConfig.getReplSetName());
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setVersion(rsConfig.getConfigVersion());
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
- net->exitNetwork();
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+}
- void ReplCoordElectTest::simulateFreshEnoughForElectability() {
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- for (int i = 0; i < rsConfig.getNumMembers() - 1; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- if (request.cmdObj.firstElement().fieldNameStringData() == "replSetFresh") {
- net->scheduleResponse(noi, net->now(), makeResponseStatus(
- BSON("ok" << 1 <<
- "fresher" << false <<
- "opTime" << Date_t(OpTime(0, 0).asDate()) <<
- "veto" << false)));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
+void ReplCoordElectTest::simulateFreshEnoughForElectability() {
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ for (int i = 0; i < rsConfig.getNumMembers() - 1; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ if (request.cmdObj.firstElement().fieldNameStringData() == "replSetFresh") {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "fresher" << false << "opTime"
+ << Date_t(OpTime(0, 0).asDate()) << "veto" << false)));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
- net->exitNetwork();
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+}
- TEST_F(ReplCoordElectTest, ElectTooSoon) {
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- // Election never starts because we haven't set a lastOpTimeApplied value yet, via a
- // heartbeat.
- startCapturingLogMessages();
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345"))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- simulateEnoughHeartbeatsForElectability();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("node has no applied oplog entries"));
- }
+TEST_F(ReplCoordElectTest, ElectTooSoon) {
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ // Election never starts because we haven't set a lastOpTimeApplied value yet, via a
+ // heartbeat.
+ startCapturingLogMessages();
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ simulateEnoughHeartbeatsForElectability();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("node has no applied oplog entries"));
+}
- /**
- * This test checks that an election can happen when only one node is up, and it has the
- * vote(s) to win.
- */
- TEST_F(ReplCoordElectTest, ElectTwoNodesWithOneZeroVoter) {
- OperationContextReplMock txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345" <<
- "votes" << 0 << "hidden" << true <<
- "priority" << 0))),
- HostAndPort("node1", 12345));
-
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
-
- ASSERT(getReplCoord()->getMemberState().secondary()) <<
- getReplCoord()->getMemberState().toString();
-
- getReplCoord()->setMyLastOptime(OpTime(10,0));
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now(),
- ResponseStatus(ErrorCodes::OperationFailed, "timeout"));
- net->runReadyNetworkOperations();
- const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
- net->scheduleResponse(noi2,
- net->now(),
- ResponseStatus(ErrorCodes::OperationFailed, "timeout"));
- net->runReadyNetworkOperations();
- net->exitNetwork();
-
- ASSERT(getReplCoord()->getMemberState().primary()) <<
- getReplCoord()->getMemberState().toString();
- ASSERT(getReplCoord()->isWaitingForApplierToDrain());
-
- // Since we're still in drain mode, expect that we report ismaster: false, issecondary:true.
- IsMasterResponse imResponse;
- getReplCoord()->fillIsMasterForReplSet(&imResponse);
- ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- getReplCoord()->signalDrainComplete(&txn);
- getReplCoord()->fillIsMasterForReplSet(&imResponse);
- ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- }
+/**
+ * This test checks that an election can happen when only one node is up, and it has the
+ * vote(s) to win.
+ */
+TEST_F(ReplCoordElectTest, ElectTwoNodesWithOneZeroVoter) {
+ OperationContextReplMock txn;
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"
+ << "votes" << 0 << "hidden" << true << "priority" << 0))),
+ HostAndPort("node1", 12345));
+
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+
+ ASSERT(getReplCoord()->getMemberState().secondary())
+ << getReplCoord()->getMemberState().toString();
+
+ getReplCoord()->setMyLastOptime(OpTime(10, 0));
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi, net->now(), ResponseStatus(ErrorCodes::OperationFailed, "timeout"));
+ net->runReadyNetworkOperations();
+ const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
+ net->scheduleResponse(noi2, net->now(), ResponseStatus(ErrorCodes::OperationFailed, "timeout"));
+ net->runReadyNetworkOperations();
+ net->exitNetwork();
+
+ ASSERT(getReplCoord()->getMemberState().primary())
+ << getReplCoord()->getMemberState().toString();
+ ASSERT(getReplCoord()->isWaitingForApplierToDrain());
+
+ // Since we're still in drain mode, expect that we report ismaster: false, issecondary:true.
+ IsMasterResponse imResponse;
+ getReplCoord()->fillIsMasterForReplSet(&imResponse);
+ ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+ getReplCoord()->signalDrainComplete(&txn);
+ getReplCoord()->fillIsMasterForReplSet(&imResponse);
+ ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+}
- TEST_F(ReplCoordElectTest, Elect1NodeSuccess) {
- OperationContextReplMock txn;
- startCapturingLogMessages();
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345"))),
- HostAndPort("node1", 12345));
-
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
-
- ASSERT(getReplCoord()->getMemberState().primary()) <<
- getReplCoord()->getMemberState().toString();
- ASSERT(getReplCoord()->isWaitingForApplierToDrain());
-
- // Since we're still in drain mode, expect that we report ismaster: false, issecondary:true.
- IsMasterResponse imResponse;
- getReplCoord()->fillIsMasterForReplSet(&imResponse);
- ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- getReplCoord()->signalDrainComplete(&txn);
- getReplCoord()->fillIsMasterForReplSet(&imResponse);
- ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- }
+TEST_F(ReplCoordElectTest, Elect1NodeSuccess) {
+ OperationContextReplMock txn;
+ startCapturingLogMessages();
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345"))),
+ HostAndPort("node1", 12345));
+
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+
+ ASSERT(getReplCoord()->getMemberState().primary())
+ << getReplCoord()->getMemberState().toString();
+ ASSERT(getReplCoord()->isWaitingForApplierToDrain());
+
+ // Since we're still in drain mode, expect that we report ismaster: false, issecondary:true.
+ IsMasterResponse imResponse;
+ getReplCoord()->fillIsMasterForReplSet(&imResponse);
+ ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+ getReplCoord()->signalDrainComplete(&txn);
+ getReplCoord()->fillIsMasterForReplSet(&imResponse);
+ ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+}
- TEST_F(ReplCoordElectTest, ElectManyNodesSuccess) {
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ));
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- OperationContextNoop txn;
- getReplCoord()->setMyLastOptime(OpTime (100, 1));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- startCapturingLogMessages();
- simulateSuccessfulElection();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("election succeeded"));
- }
+TEST_F(ReplCoordElectTest, ElectManyNodesSuccess) {
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")));
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ OperationContextNoop txn;
+ getReplCoord()->setMyLastOptime(OpTime(100, 1));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ startCapturingLogMessages();
+ simulateSuccessfulElection();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("election succeeded"));
+}
- TEST_F(ReplCoordElectTest, ElectNotEnoughVotes) {
- // one responds with -10000 votes, and one doesn't respond, and we are not elected
- startCapturingLogMessages();
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ));
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- ReplicaSetConfig config = assertMakeRSConfig(configObj);
-
- OperationContextNoop txn;
- OpTime time1(100, 1);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- simulateEnoughHeartbeatsForElectability();
- simulateFreshEnoughForElectability();
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- while (net->hasReadyRequests()) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- if (request.target != HostAndPort("node2", 12345)) {
- net->blackHole(noi);
- }
- else if (request.cmdObj.firstElement().fieldNameStringData() != "replSetElect") {
- net->blackHole(noi);
- }
- else {
- net->scheduleResponse(
- noi,
- net->now(),
- makeResponseStatus(BSON("ok" << 1 <<
- "vote" << -10000 <<
- "round" << OID())));
- }
- net->runReadyNetworkOperations();
+TEST_F(ReplCoordElectTest, ElectNotEnoughVotes) {
+ // one responds with -10000 votes, and one doesn't respond, and we are not elected
+ startCapturingLogMessages();
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")));
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplicaSetConfig config = assertMakeRSConfig(configObj);
+
+ OperationContextNoop txn;
+ OpTime time1(100, 1);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ simulateEnoughHeartbeatsForElectability();
+ simulateFreshEnoughForElectability();
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ while (net->hasReadyRequests()) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ if (request.target != HostAndPort("node2", 12345)) {
+ net->blackHole(noi);
+ } else if (request.cmdObj.firstElement().fieldNameStringData() != "replSetElect") {
+ net->blackHole(noi);
+ } else {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "vote" << -10000 << "round" << OID())));
}
- net->exitNetwork();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1,
- countLogLinesContaining("replSet couldn't elect self, only received -9999 votes"));
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(
+ 1, countLogLinesContaining("replSet couldn't elect self, only received -9999 votes"));
+}
- TEST_F(ReplCoordElectTest, ElectWrongTypeForVote) {
- // one responds with a bad 'vote' field, and one doesn't respond, and we are not elected
- startCapturingLogMessages();
- BSONObj configObj = BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345")
- << BSON("_id" << 2 << "host" << "node2:12345")
- << BSON("_id" << 3 << "host" << "node3:12345")
- ));
- assertStartSuccess(configObj, HostAndPort("node1", 12345));
- ReplicaSetConfig config = assertMakeRSConfig(configObj);
-
- OperationContextNoop txn;
- OpTime time1(100, 1);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- simulateEnoughHeartbeatsForElectability();
- simulateFreshEnoughForElectability();
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- while (net->hasReadyRequests()) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- if (request.target != HostAndPort("node2", 12345)) {
- net->blackHole(noi);
- }
- else if (request.cmdObj.firstElement().fieldNameStringData() != "replSetElect") {
- net->blackHole(noi);
- }
- else {
- net->scheduleResponse(
- noi,
- net->now(),
- makeResponseStatus(BSON("ok" << 1 <<
- "vote" << "yea" <<
- "round" << OID())));
- }
- net->runReadyNetworkOperations();
+TEST_F(ReplCoordElectTest, ElectWrongTypeForVote) {
+ // one responds with a bad 'vote' field, and one doesn't respond, and we are not elected
+ startCapturingLogMessages();
+ BSONObj configObj = BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")));
+ assertStartSuccess(configObj, HostAndPort("node1", 12345));
+ ReplicaSetConfig config = assertMakeRSConfig(configObj);
+
+ OperationContextNoop txn;
+ OpTime time1(100, 1);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ simulateEnoughHeartbeatsForElectability();
+ simulateFreshEnoughForElectability();
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ while (net->hasReadyRequests()) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ if (request.target != HostAndPort("node2", 12345)) {
+ net->blackHole(noi);
+ } else if (request.cmdObj.firstElement().fieldNameStringData() != "replSetElect") {
+ net->blackHole(noi);
+ } else {
+ net->scheduleResponse(noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "vote"
+ << "yea"
+ << "round" << OID())));
}
- net->exitNetwork();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1,
- countLogLinesContaining("wrong type for vote argument in replSetElect command"));
+ net->runReadyNetworkOperations();
}
+ net->exitNetwork();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1,
+ countLogLinesContaining("wrong type for vote argument in replSetElect command"));
+}
- TEST_F(ReplCoordElectTest, ElectionDuringHBReconfigFails) {
- // start up, receive reconfig via heartbeat while at the same time, become candidate.
- // candidate state should be cleared.
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") <<
- BSON("_id" << 3 << "host" << "node3:12345") <<
- BSON("_id" << 4 << "host" << "node4:12345") <<
- BSON("_id" << 5 << "host" << "node5:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100,0));
-
- // set hbreconfig to hang while in progress
- getExternalState()->setStoreLocalConfigDocumentToHang(true);
-
- // hb reconfig
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- ReplSetHeartbeatResponse hbResp2;
- ReplicaSetConfig config;
- config.initialize(BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345"))));
- hbResp2.setConfig(config);
- hbResp2.setVersion(3);
- hbResp2.setSetName("mySet");
- hbResp2.setState(MemberState::RS_SECONDARY);
- BSONObjBuilder respObj2;
- respObj2 << "ok" << 1;
- hbResp2.addToBSON(&respObj2);
- net->runUntil(net->now() + 10*1000); // run until we've sent a heartbeat request
- const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
- net->scheduleResponse(noi2, net->now(), makeResponseStatus(respObj2.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
-
- // prepare candidacy
- BSONObjBuilder result;
- ReplicationCoordinator::ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = config.toBSON();
- ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
-
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(2));
- startCapturingLogMessages();
-
- // receive sufficient heartbeats to trigger an election
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- net->enterNetwork();
- for (int i = 0; i < 2; ++i) {
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgs hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(rsConfig.getReplSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setVersion(rsConfig.getConfigVersion());
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
+TEST_F(ReplCoordElectTest, ElectionDuringHBReconfigFails) {
+ // start up, receive reconfig via heartbeat while at the same time, become candidate.
+ // candidate state should be cleared.
+ OperationContextNoop txn;
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345") << BSON("_id" << 3 << "host"
+ << "node3:12345")
+ << BSON("_id" << 4 << "host"
+ << "node4:12345") << BSON("_id" << 5 << "host"
+ << "node5:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+
+ // set hbreconfig to hang while in progress
+ getExternalState()->setStoreLocalConfigDocumentToHang(true);
+
+ // hb reconfig
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ ReplSetHeartbeatResponse hbResp2;
+ ReplicaSetConfig config;
+ config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))));
+ hbResp2.setConfig(config);
+ hbResp2.setVersion(3);
+ hbResp2.setSetName("mySet");
+ hbResp2.setState(MemberState::RS_SECONDARY);
+ BSONObjBuilder respObj2;
+ respObj2 << "ok" << 1;
+ hbResp2.addToBSON(&respObj2);
+ net->runUntil(net->now() + 10 * 1000); // run until we've sent a heartbeat request
+ const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
+ net->scheduleResponse(noi2, net->now(), makeResponseStatus(respObj2.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+
+ // prepare candidacy
+ BSONObjBuilder result;
+ ReplicationCoordinator::ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = config.toBSON();
+ ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(2));
+ startCapturingLogMessages();
+
+ // receive sufficient heartbeats to trigger an election
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ net->enterNetwork();
+ for (int i = 0; i < 2; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgs hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(rsConfig.getReplSetName());
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setVersion(rsConfig.getConfigVersion());
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
-
- stopCapturingLogMessages();
- // ensure node does not stand for election
- ASSERT_EQUALS(1,
- countLogLinesContaining("Not standing for election; processing "
- "a configuration change"));
- getExternalState()->setStoreLocalConfigDocumentToHang(false);
+ net->runReadyNetworkOperations();
}
+ stopCapturingLogMessages();
+ // ensure node does not stand for election
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "Not standing for election; processing "
+ "a configuration change"));
+ getExternalState()->setStoreLocalConfigDocumentToHang(false);
+}
}
}
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index f80d2a5c1a7..cd247f9a864 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -55,146 +55,125 @@ namespace mongo {
namespace repl {
namespace {
- typedef StatusWith<ReplicationExecutor::CallbackHandle> CBHStatus;
- typedef ReplicationExecutor::RemoteCommandRequest CmdRequest;
- typedef ReplicationExecutor::CallbackHandle CBHandle;
+typedef StatusWith<ReplicationExecutor::CallbackHandle> CBHStatus;
+typedef ReplicationExecutor::RemoteCommandRequest CmdRequest;
+typedef ReplicationExecutor::CallbackHandle CBHandle;
-} //namespace
-
- void ReplicationCoordinatorImpl::_doMemberHeartbeat(ReplicationExecutor::CallbackData cbData,
- const HostAndPort& target,
- int targetIndex) {
-
- _untrackHeartbeatHandle(cbData.myHandle);
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
+} // namespace
- const Date_t now = _replExecutor.now();
- const std::pair<ReplSetHeartbeatArgs, Milliseconds> hbRequest =
- _topCoord->prepareHeartbeatRequest(
- now,
- _settings.ourSetName(),
- target);
-
- const CmdRequest request(target, "admin", hbRequest.first.toBSON(), hbRequest.second);
- const ReplicationExecutor::RemoteCommandCallbackFn callback = stdx::bind(
- &ReplicationCoordinatorImpl::_handleHeartbeatResponse,
- this,
- stdx::placeholders::_1,
- targetIndex);
-
- _trackHeartbeatHandle(_replExecutor.scheduleRemoteCommand(request, callback));
+void ReplicationCoordinatorImpl::_doMemberHeartbeat(ReplicationExecutor::CallbackData cbData,
+ const HostAndPort& target,
+ int targetIndex) {
+ _untrackHeartbeatHandle(cbData.myHandle);
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
}
- void ReplicationCoordinatorImpl::_scheduleHeartbeatToTarget(
- const HostAndPort& target,
- int targetIndex,
- Date_t when) {
-
- LOG(2) << "Scheduling heartbeat to " << target << " at " << dateToISOStringUTC(when);
- _trackHeartbeatHandle(
- _replExecutor.scheduleWorkAt(
- when,
- stdx::bind(&ReplicationCoordinatorImpl::_doMemberHeartbeat,
- this,
- stdx::placeholders::_1,
- target,
- targetIndex)));
+ const Date_t now = _replExecutor.now();
+ const std::pair<ReplSetHeartbeatArgs, Milliseconds> hbRequest =
+ _topCoord->prepareHeartbeatRequest(now, _settings.ourSetName(), target);
+
+ const CmdRequest request(target, "admin", hbRequest.first.toBSON(), hbRequest.second);
+ const ReplicationExecutor::RemoteCommandCallbackFn callback =
+ stdx::bind(&ReplicationCoordinatorImpl::_handleHeartbeatResponse,
+ this,
+ stdx::placeholders::_1,
+ targetIndex);
+
+ _trackHeartbeatHandle(_replExecutor.scheduleRemoteCommand(request, callback));
+}
+
+void ReplicationCoordinatorImpl::_scheduleHeartbeatToTarget(const HostAndPort& target,
+ int targetIndex,
+ Date_t when) {
+ LOG(2) << "Scheduling heartbeat to " << target << " at " << dateToISOStringUTC(when);
+ _trackHeartbeatHandle(
+ _replExecutor.scheduleWorkAt(when,
+ stdx::bind(&ReplicationCoordinatorImpl::_doMemberHeartbeat,
+ this,
+ stdx::placeholders::_1,
+ target,
+ targetIndex)));
+}
+
+void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
+ const ReplicationExecutor::RemoteCommandCallbackData& cbData, int targetIndex) {
+ // remove handle from queued heartbeats
+ _untrackHeartbeatHandle(cbData.myHandle);
+
+ // Parse and validate the response. At the end of this step, if responseStatus is OK then
+ // hbResponse is valid.
+ Status responseStatus = cbData.response.getStatus();
+ if (responseStatus == ErrorCodes::CallbackCanceled) {
+ return;
}
- void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
- const ReplicationExecutor::RemoteCommandCallbackData& cbData, int targetIndex) {
-
- // remove handle from queued heartbeats
- _untrackHeartbeatHandle(cbData.myHandle);
-
- // Parse and validate the response. At the end of this step, if responseStatus is OK then
- // hbResponse is valid.
- Status responseStatus = cbData.response.getStatus();
- if (responseStatus == ErrorCodes::CallbackCanceled) {
- return;
+ const HostAndPort& target = cbData.request.target;
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj resp;
+ if (responseStatus.isOK()) {
+ resp = cbData.response.getValue().data;
+ responseStatus = hbResponse.initialize(resp);
+ }
+ const bool isUnauthorized = (responseStatus.code() == ErrorCodes::Unauthorized) ||
+ (responseStatus.code() == ErrorCodes::AuthenticationFailed);
+ const Date_t now = _replExecutor.now();
+ const OpTime lastApplied = getMyLastOptime(); // Locks and unlocks _mutex.
+ Milliseconds networkTime(0);
+ StatusWith<ReplSetHeartbeatResponse> hbStatusResponse(hbResponse);
+
+ if (responseStatus.isOK()) {
+ networkTime = cbData.response.getValue().elapsedMillis;
+ } else {
+ log() << "Error in heartbeat request to " << target << "; " << responseStatus;
+ if (!resp.isEmpty()) {
+ LOG(3) << "heartbeat response: " << resp;
}
- const HostAndPort& target = cbData.request.target;
- ReplSetHeartbeatResponse hbResponse;
- BSONObj resp;
- if (responseStatus.isOK()) {
- resp = cbData.response.getValue().data;
- responseStatus = hbResponse.initialize(resp);
- }
- const bool isUnauthorized = (responseStatus.code() == ErrorCodes::Unauthorized) ||
- (responseStatus.code() == ErrorCodes::AuthenticationFailed);
- const Date_t now = _replExecutor.now();
- const OpTime lastApplied = getMyLastOptime(); // Locks and unlocks _mutex.
- Milliseconds networkTime(0);
- StatusWith<ReplSetHeartbeatResponse> hbStatusResponse(hbResponse);
-
- if (responseStatus.isOK()) {
+ if (isUnauthorized) {
networkTime = cbData.response.getValue().elapsedMillis;
}
- else {
- log() << "Error in heartbeat request to " << target << "; " << responseStatus;
- if (!resp.isEmpty()) {
- LOG(3) << "heartbeat response: " << resp;
- }
+ hbStatusResponse = StatusWith<ReplSetHeartbeatResponse>(responseStatus);
+ }
- if (isUnauthorized) {
- networkTime = cbData.response.getValue().elapsedMillis;
- }
- hbStatusResponse = StatusWith<ReplSetHeartbeatResponse>(responseStatus);
- }
+ HeartbeatResponseAction action = _topCoord->processHeartbeatResponse(
+ now, networkTime, target, hbStatusResponse, lastApplied);
- HeartbeatResponseAction action =
- _topCoord->processHeartbeatResponse(
- now,
- networkTime,
- target,
- hbStatusResponse,
- lastApplied);
-
- if (action.getAction() == HeartbeatResponseAction::NoAction &&
- hbStatusResponse.isOK() &&
- hbStatusResponse.getValue().hasOpTime() &&
- targetIndex >= 0 &&
- hbStatusResponse.getValue().hasState() &&
- hbStatusResponse.getValue().getState() != MemberState::RS_PRIMARY) {
- boost::lock_guard<boost::mutex> lk(_mutex);
- if (hbStatusResponse.getValue().getVersion() == _rsConfig.getConfigVersion()) {
- _updateOpTimeFromHeartbeat_inlock(targetIndex,
- hbStatusResponse.getValue().getOpTime());
- }
+ if (action.getAction() == HeartbeatResponseAction::NoAction && hbStatusResponse.isOK() &&
+ hbStatusResponse.getValue().hasOpTime() && targetIndex >= 0 &&
+ hbStatusResponse.getValue().hasState() &&
+ hbStatusResponse.getValue().getState() != MemberState::RS_PRIMARY) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ if (hbStatusResponse.getValue().getVersion() == _rsConfig.getConfigVersion()) {
+ _updateOpTimeFromHeartbeat_inlock(targetIndex, hbStatusResponse.getValue().getOpTime());
}
+ }
- _signalStepDownWaiters();
+ _signalStepDownWaiters();
- _scheduleHeartbeatToTarget(
- target,
- targetIndex,
- std::max(now, action.getNextHeartbeatStartDate()));
+ _scheduleHeartbeatToTarget(
+ target, targetIndex, std::max(now, action.getNextHeartbeatStartDate()));
- _handleHeartbeatResponseAction(action, hbStatusResponse);
- }
+ _handleHeartbeatResponseAction(action, hbStatusResponse);
+}
- void ReplicationCoordinatorImpl::_updateOpTimeFromHeartbeat_inlock(int targetIndex,
- OpTime optime) {
- invariant(_selfIndex >= 0);
- invariant(targetIndex >= 0);
-
- SlaveInfo& slaveInfo = _slaveInfo[targetIndex];
- if (optime > slaveInfo.opTime && slaveInfo.rid.isSet()) {
- // TODO(spencer): The second part of the above if-statement can be removed after 3.0
- // but for now, to maintain compatibility with 2.6, we can't record optimes for any
- // nodes we haven't heard from via replSetUpdatePosition yet to associate an RID.
- _updateSlaveInfoOptime_inlock(&slaveInfo, optime);
- }
- }
+void ReplicationCoordinatorImpl::_updateOpTimeFromHeartbeat_inlock(int targetIndex, OpTime optime) {
+ invariant(_selfIndex >= 0);
+ invariant(targetIndex >= 0);
- void ReplicationCoordinatorImpl::_handleHeartbeatResponseAction(
- const HeartbeatResponseAction& action,
- const StatusWith<ReplSetHeartbeatResponse>& responseStatus) {
+ SlaveInfo& slaveInfo = _slaveInfo[targetIndex];
+ if (optime > slaveInfo.opTime && slaveInfo.rid.isSet()) {
+ // TODO(spencer): The second part of the above if-statement can be removed after 3.0
+ // but for now, to maintain compatibility with 2.6, we can't record optimes for any
+ // nodes we haven't heard from via replSetUpdatePosition yet to associate an RID.
+ _updateSlaveInfoOptime_inlock(&slaveInfo, optime);
+ }
+}
- switch (action.getAction()) {
+void ReplicationCoordinatorImpl::_handleHeartbeatResponseAction(
+ const HeartbeatResponseAction& action,
+ const StatusWith<ReplSetHeartbeatResponse>& responseStatus) {
+ switch (action.getAction()) {
case HeartbeatResponseAction::NoAction:
// Update the cached member state if different than the current topology member state
if (_memberState != _topCoord->getMemberState()) {
@@ -219,331 +198,309 @@ namespace {
case HeartbeatResponseAction::StepDownRemotePrimary: {
invariant(action.getPrimaryConfigIndex() != _selfIndex);
_requestRemotePrimaryStepdown(
- _rsConfig.getMemberAt(action.getPrimaryConfigIndex()).getHostAndPort());
+ _rsConfig.getMemberAt(action.getPrimaryConfigIndex()).getHostAndPort());
break;
}
default:
severe() << "Illegal heartbeat response action code " << int(action.getAction());
invariant(false);
- }
}
+}
namespace {
- /**
- * This callback is purely for logging and has no effect on any other operations
- */
- void remoteStepdownCallback(const ReplicationExecutor::RemoteCommandCallbackData& cbData) {
-
- const Status status = cbData.response.getStatus();
- if (status == ErrorCodes::CallbackCanceled) {
- return;
- }
+/**
+ * This callback is purely for logging and has no effect on any other operations
+ */
+void remoteStepdownCallback(const ReplicationExecutor::RemoteCommandCallbackData& cbData) {
+ const Status status = cbData.response.getStatus();
+ if (status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
- if (status.isOK()) {
- LOG(1) << "replset: stepdown of primary(" << cbData.request.target
- << ") succeeded with response -- "
- << cbData.response.getValue().data;
- }
- else {
- warning() << "replset: stepdown of primary(" << cbData.request.target
- << ") failed due to " << cbData.response.getStatus();
- }
+ if (status.isOK()) {
+ LOG(1) << "replset: stepdown of primary(" << cbData.request.target
+ << ") succeeded with response -- " << cbData.response.getValue().data;
+ } else {
+ warning() << "replset: stepdown of primary(" << cbData.request.target << ") failed due to "
+ << cbData.response.getStatus();
}
+}
} // namespace
- void ReplicationCoordinatorImpl::_requestRemotePrimaryStepdown(const HostAndPort& target) {
- CmdRequest request(target, "admin", BSON("replSetStepDown" << 1));
+void ReplicationCoordinatorImpl::_requestRemotePrimaryStepdown(const HostAndPort& target) {
+ CmdRequest request(target, "admin", BSON("replSetStepDown" << 1));
- log() << "Requesting " << target << " step down from primary";
- CBHStatus cbh = _replExecutor.scheduleRemoteCommand(
- request, remoteStepdownCallback);
- if (cbh.getStatus() != ErrorCodes::ShutdownInProgress) {
- fassert(18808, cbh.getStatus());
- }
+ log() << "Requesting " << target << " step down from primary";
+ CBHStatus cbh = _replExecutor.scheduleRemoteCommand(request, remoteStepdownCallback);
+ if (cbh.getStatus() != ErrorCodes::ShutdownInProgress) {
+ fassert(18808, cbh.getStatus());
}
-
- void ReplicationCoordinatorImpl::_heartbeatStepDownStart() {
- log() << "Stepping down from primary in response to heartbeat";
- _replExecutor.scheduleWorkWithGlobalExclusiveLock(
- stdx::bind(&ReplicationCoordinatorImpl::_heartbeatStepDownFinish,
- this,
- stdx::placeholders::_1));
+}
+
+void ReplicationCoordinatorImpl::_heartbeatStepDownStart() {
+ log() << "Stepping down from primary in response to heartbeat";
+ _replExecutor.scheduleWorkWithGlobalExclusiveLock(stdx::bind(
+ &ReplicationCoordinatorImpl::_heartbeatStepDownFinish, this, stdx::placeholders::_1));
+}
+
+void ReplicationCoordinatorImpl::_heartbeatStepDownFinish(
+ const ReplicationExecutor::CallbackData& cbData) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
}
-
- void ReplicationCoordinatorImpl::_heartbeatStepDownFinish(
- const ReplicationExecutor::CallbackData& cbData) {
-
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
- invariant(cbData.txn);
- // TODO Add invariant that we've got global shared or global exclusive lock, when supported
- // by lock manager.
- boost::unique_lock<boost::mutex> lk(_mutex);
- _topCoord->stepDownIfPending();
- const PostMemberStateUpdateAction action =
- _updateMemberStateFromTopologyCoordinator_inlock();
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+ invariant(cbData.txn);
+ // TODO Add invariant that we've got global shared or global exclusive lock, when supported
+ // by lock manager.
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ _topCoord->stepDownIfPending();
+ const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator_inlock();
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+}
+
+void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig(const ReplicaSetConfig& newConfig) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ if (_inShutdown) {
+ return;
}
- void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig(const ReplicaSetConfig& newConfig) {
- boost::lock_guard<boost::mutex> lk(_mutex);
- if (_inShutdown) {
- return;
- }
-
- switch (_rsConfigState) {
+ switch (_rsConfigState) {
case kConfigStartingUp:
- LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() <<
- " because still attempting to load local configuration information";
+ LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion()
+ << " because still attempting to load local configuration information";
return;
case kConfigUninitialized:
case kConfigSteady:
- LOG(1) << "Received new config via heartbeat with version " <<
- newConfig.getConfigVersion();
+ LOG(1) << "Received new config via heartbeat with version "
+ << newConfig.getConfigVersion();
break;
case kConfigInitiating:
case kConfigReconfiguring:
case kConfigHBReconfiguring:
- LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() <<
- " because already in the midst of a configuration process";
+ LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion()
+ << " because already in the midst of a configuration process";
return;
default:
- severe() << "Reconfiguration request occurred while _rsConfigState == " <<
- int(_rsConfigState) << "; aborting.";
+ severe() << "Reconfiguration request occurred while _rsConfigState == "
+ << int(_rsConfigState) << "; aborting.";
fassertFailed(18807);
- }
- _setConfigState_inlock(kConfigHBReconfiguring);
- invariant(!_rsConfig.isInitialized() ||
- _rsConfig.getConfigVersion() < newConfig.getConfigVersion());
- if (_freshnessChecker) {
- _freshnessChecker->cancel(&_replExecutor);
- if (_electCmdRunner) {
- _electCmdRunner->cancel(&_replExecutor);
- }
- _replExecutor.onEvent(
- _electionFinishedEvent,
- stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigAfterElectionCanceled,
- this,
- stdx::placeholders::_1,
- newConfig));
- return;
- }
- invariant(!_heartbeatReconfigThread.get());
- _heartbeatReconfigThread.reset(
- new boost::thread(stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigStore,
- this,
- newConfig)));;
}
-
- void ReplicationCoordinatorImpl::_heartbeatReconfigAfterElectionCanceled(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplicaSetConfig& newConfig) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
- fassert(18911, cbData.status);
- boost::lock_guard<boost::mutex> lk(_mutex);
- if (_inShutdown) {
- return;
+ _setConfigState_inlock(kConfigHBReconfiguring);
+ invariant(!_rsConfig.isInitialized() ||
+ _rsConfig.getConfigVersion() < newConfig.getConfigVersion());
+ if (_freshnessChecker) {
+ _freshnessChecker->cancel(&_replExecutor);
+ if (_electCmdRunner) {
+ _electCmdRunner->cancel(&_replExecutor);
}
-
- invariant(!_heartbeatReconfigThread.get());
- _heartbeatReconfigThread.reset(
- new boost::thread(stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigStore,
- this,
- newConfig)));
+ _replExecutor.onEvent(
+ _electionFinishedEvent,
+ stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigAfterElectionCanceled,
+ this,
+ stdx::placeholders::_1,
+ newConfig));
+ return;
+ }
+ invariant(!_heartbeatReconfigThread.get());
+ _heartbeatReconfigThread.reset(new boost::thread(
+ stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigStore, this, newConfig)));
+ ;
+}
+
+void ReplicationCoordinatorImpl::_heartbeatReconfigAfterElectionCanceled(
+ const ReplicationExecutor::CallbackData& cbData, const ReplicaSetConfig& newConfig) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+ fassert(18911, cbData.status);
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ if (_inShutdown) {
+ return;
}
- void ReplicationCoordinatorImpl::_heartbeatReconfigStore(const ReplicaSetConfig& newConfig) {
- class StoreThreadGuard {
- public:
- StoreThreadGuard(boost::unique_lock<boost::mutex>* lk,
- boost::scoped_ptr<boost::thread>* thread,
- bool* inShutdown) :
- _lk(lk),
- _thread(thread),
- _inShutdown(inShutdown) {}
- ~StoreThreadGuard() {
- if (!_lk->owns_lock()) {
- _lk->lock();
- }
- if (*_inShutdown) {
- return;
- }
- _thread->get()->detach();
- _thread->reset(NULL);
+ invariant(!_heartbeatReconfigThread.get());
+ _heartbeatReconfigThread.reset(new boost::thread(
+ stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigStore, this, newConfig)));
+}
+
+void ReplicationCoordinatorImpl::_heartbeatReconfigStore(const ReplicaSetConfig& newConfig) {
+ class StoreThreadGuard {
+ public:
+ StoreThreadGuard(boost::unique_lock<boost::mutex>* lk,
+ boost::scoped_ptr<boost::thread>* thread,
+ bool* inShutdown)
+ : _lk(lk), _thread(thread), _inShutdown(inShutdown) {}
+ ~StoreThreadGuard() {
+ if (!_lk->owns_lock()) {
+ _lk->lock();
}
-
- private:
- boost::unique_lock<boost::mutex>* const _lk;
- boost::scoped_ptr<boost::thread>* const _thread;
- bool* const _inShutdown;
- };
-
- boost::unique_lock<boost::mutex> lk(_mutex, boost::defer_lock_t());
- StoreThreadGuard guard(&lk, &_heartbeatReconfigThread, &_inShutdown);
-
- const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig(
- _externalState.get(),
- newConfig);
-
- if (myIndex.getStatus() == ErrorCodes::NodeNotFound) {
- lk.lock();
- // If this node absent in newConfig, and this node was not previously initialized,
- // return to kConfigUninitialized immediately, rather than storing the config and
- // transitioning into the RS_REMOVED state. See SERVER-15740.
- if (!_rsConfig.isInitialized()) {
- invariant(_rsConfigState == kConfigHBReconfiguring);
- LOG(1) << "Ignoring new configuration in heartbeat response because we are "
- "uninitialized and not a member of the new configuration";
- _setConfigState_inlock(kConfigUninitialized);
+ if (*_inShutdown) {
return;
}
- lk.unlock();
+ _thread->get()->detach();
+ _thread->reset(NULL);
}
- if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) {
- warning() << "Not persisting new configuration in heartbeat response to disk because "
- "it is invalid: "<< myIndex.getStatus();
- }
- else {
- boost::scoped_ptr<OperationContext> txn(
- _externalState->createOperationContext("WriteReplSetConfig"));
- Status status = _externalState->storeLocalConfigDocument(txn.get(), newConfig.toBSON());
-
- lk.lock();
- if (!status.isOK()) {
- error() << "Ignoring new configuration in heartbeat response because we failed to"
- " write it to stable storage; " << status;
- invariant(_rsConfigState == kConfigHBReconfiguring);
- if (_rsConfig.isInitialized()) {
- _setConfigState_inlock(kConfigSteady);
- }
- else {
- _setConfigState_inlock(kConfigUninitialized);
- }
- return;
- }
-
- lk.unlock();
+ private:
+ boost::unique_lock<boost::mutex>* const _lk;
+ boost::scoped_ptr<boost::thread>* const _thread;
+ bool* const _inShutdown;
+ };
- _externalState->startThreads();
- }
+ boost::unique_lock<boost::mutex> lk(_mutex, boost::defer_lock_t());
+ StoreThreadGuard guard(&lk, &_heartbeatReconfigThread, &_inShutdown);
- const stdx::function<void (const ReplicationExecutor::CallbackData&)> reconfigFinishFn(
- stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
- this,
- stdx::placeholders::_1,
- newConfig,
- myIndex));
+ const StatusWith<int> myIndex =
+ validateConfigForHeartbeatReconfig(_externalState.get(), newConfig);
- // Make sure that the reconfigFinishFn doesn't finish until we've reset
- // _heartbeatReconfigThread.
+ if (myIndex.getStatus() == ErrorCodes::NodeNotFound) {
lk.lock();
- if (_memberState.primary()) {
- // If the primary is receiving a heartbeat reconfig, that strongly suggests
- // that there has been a force reconfiguration. In any event, it might lead
- // to this node stepping down as primary, so we'd better do it with the global
- // lock.
- _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn);
- }
- else {
- _replExecutor.scheduleWork(reconfigFinishFn);
+ // If this node absent in newConfig, and this node was not previously initialized,
+ // return to kConfigUninitialized immediately, rather than storing the config and
+ // transitioning into the RS_REMOVED state. See SERVER-15740.
+ if (!_rsConfig.isInitialized()) {
+ invariant(_rsConfigState == kConfigHBReconfiguring);
+ LOG(1) << "Ignoring new configuration in heartbeat response because we are "
+ "uninitialized and not a member of the new configuration";
+ _setConfigState_inlock(kConfigUninitialized);
+ return;
}
+ lk.unlock();
}
- void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplicaSetConfig& newConfig,
- StatusWith<int> myIndex) {
- if (cbData.status == ErrorCodes::CallbackCanceled) {
- return;
- }
+ if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) {
+ warning() << "Not persisting new configuration in heartbeat response to disk because "
+ "it is invalid: " << myIndex.getStatus();
+ } else {
+ boost::scoped_ptr<OperationContext> txn(
+ _externalState->createOperationContext("WriteReplSetConfig"));
+ Status status = _externalState->storeLocalConfigDocument(txn.get(), newConfig.toBSON());
- boost::unique_lock<boost::mutex> lk(_mutex);
- invariant(_rsConfigState == kConfigHBReconfiguring);
- invariant(!_rsConfig.isInitialized() ||
- _rsConfig.getConfigVersion() < newConfig.getConfigVersion());
-
- if (_getMemberState_inlock().primary() && !cbData.txn) {
- // Not having an OperationContext in the CallbackData means we definitely aren't holding
- // the global lock. Since we're primary and this reconfig could cause us to stepdown,
- // reschedule this work with the global exclusive lock so the stepdown is safe.
- // TODO(spencer): When we *do* have an OperationContext, consult it to confirm that
- // we are indeed holding the global lock.
- _replExecutor.scheduleWorkWithGlobalExclusiveLock(
- stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
- this,
- stdx::placeholders::_1,
- newConfig,
- myIndex));
+ lk.lock();
+ if (!status.isOK()) {
+ error() << "Ignoring new configuration in heartbeat response because we failed to"
+ " write it to stable storage; " << status;
+ invariant(_rsConfigState == kConfigHBReconfiguring);
+ if (_rsConfig.isInitialized()) {
+ _setConfigState_inlock(kConfigSteady);
+ } else {
+ _setConfigState_inlock(kConfigUninitialized);
+ }
return;
}
- if (!myIndex.isOK()) {
- switch (myIndex.getStatus().code()) {
+ lk.unlock();
+
+ _externalState->startThreads();
+ }
+
+ const stdx::function<void(const ReplicationExecutor::CallbackData&)> reconfigFinishFn(
+ stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
+ this,
+ stdx::placeholders::_1,
+ newConfig,
+ myIndex));
+
+ // Make sure that the reconfigFinishFn doesn't finish until we've reset
+ // _heartbeatReconfigThread.
+ lk.lock();
+ if (_memberState.primary()) {
+ // If the primary is receiving a heartbeat reconfig, that strongly suggests
+ // that there has been a force reconfiguration. In any event, it might lead
+ // to this node stepping down as primary, so we'd better do it with the global
+ // lock.
+ _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn);
+ } else {
+ _replExecutor.scheduleWork(reconfigFinishFn);
+ }
+}
+
+void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
+ const ReplicationExecutor::CallbackData& cbData,
+ const ReplicaSetConfig& newConfig,
+ StatusWith<int> myIndex) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ return;
+ }
+
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ invariant(_rsConfigState == kConfigHBReconfiguring);
+ invariant(!_rsConfig.isInitialized() ||
+ _rsConfig.getConfigVersion() < newConfig.getConfigVersion());
+
+ if (_getMemberState_inlock().primary() && !cbData.txn) {
+ // Not having an OperationContext in the CallbackData means we definitely aren't holding
+ // the global lock. Since we're primary and this reconfig could cause us to stepdown,
+ // reschedule this work with the global exclusive lock so the stepdown is safe.
+ // TODO(spencer): When we *do* have an OperationContext, consult it to confirm that
+ // we are indeed holding the global lock.
+ _replExecutor.scheduleWorkWithGlobalExclusiveLock(
+ stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
+ this,
+ stdx::placeholders::_1,
+ newConfig,
+ myIndex));
+ return;
+ }
+
+ if (!myIndex.isOK()) {
+ switch (myIndex.getStatus().code()) {
case ErrorCodes::NodeNotFound:
- log() << "Cannot find self in new replica set configuration; I must be removed; " <<
- myIndex.getStatus();
+ log() << "Cannot find self in new replica set configuration; I must be removed; "
+ << myIndex.getStatus();
break;
case ErrorCodes::DuplicateKey:
error() << "Several entries in new config represent this node; "
- "Removing self until an acceptable configuration arrives; " <<
- myIndex.getStatus();
+ "Removing self until an acceptable configuration arrives; "
+ << myIndex.getStatus();
break;
default:
error() << "Could not validate configuration received from remote node; "
- "Removing self until an acceptable configuration arrives; " <<
- myIndex.getStatus();
+ "Removing self until an acceptable configuration arrives; "
+ << myIndex.getStatus();
break;
- }
- myIndex = StatusWith<int>(-1);
}
- const PostMemberStateUpdateAction action =
- _setCurrentRSConfig_inlock(newConfig, myIndex.getValue());
- lk.unlock();
- _performPostMemberStateUpdateAction(action);
+ myIndex = StatusWith<int>(-1);
}
-
- void ReplicationCoordinatorImpl::_trackHeartbeatHandle(const StatusWith<CBHandle>& handle) {
- if (handle.getStatus() == ErrorCodes::ShutdownInProgress) {
- return;
- }
- fassert(18912, handle.getStatus());
- _heartbeatHandles.push_back(handle.getValue());
+ const PostMemberStateUpdateAction action =
+ _setCurrentRSConfig_inlock(newConfig, myIndex.getValue());
+ lk.unlock();
+ _performPostMemberStateUpdateAction(action);
+}
+
+void ReplicationCoordinatorImpl::_trackHeartbeatHandle(const StatusWith<CBHandle>& handle) {
+ if (handle.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return;
}
-
- void ReplicationCoordinatorImpl::_untrackHeartbeatHandle(const CBHandle& handle) {
- const HeartbeatHandles::iterator newEnd = std::remove(
- _heartbeatHandles.begin(),
- _heartbeatHandles.end(),
- handle);
- invariant(newEnd != _heartbeatHandles.end());
- _heartbeatHandles.erase(newEnd, _heartbeatHandles.end());
- }
-
- void ReplicationCoordinatorImpl::_cancelHeartbeats() {
- std::for_each(_heartbeatHandles.begin(),
- _heartbeatHandles.end(),
- stdx::bind(&ReplicationExecutor::cancel,
- &_replExecutor,
- stdx::placeholders::_1));
- // Heartbeat callbacks will remove themselves from _heartbeatHandles when they execute with
- // CallbackCanceled status, so it's better to leave the handles in the list, for now.
- }
-
- void ReplicationCoordinatorImpl::_startHeartbeats() {
- const Date_t now = _replExecutor.now();
- _seedList.clear();
- for (int i = 0; i < _rsConfig.getNumMembers(); ++i) {
- if (i == _selfIndex) {
- continue;
- }
- _scheduleHeartbeatToTarget(_rsConfig.getMemberAt(i).getHostAndPort(), i, now);
+ fassert(18912, handle.getStatus());
+ _heartbeatHandles.push_back(handle.getValue());
+}
+
+void ReplicationCoordinatorImpl::_untrackHeartbeatHandle(const CBHandle& handle) {
+ const HeartbeatHandles::iterator newEnd =
+ std::remove(_heartbeatHandles.begin(), _heartbeatHandles.end(), handle);
+ invariant(newEnd != _heartbeatHandles.end());
+ _heartbeatHandles.erase(newEnd, _heartbeatHandles.end());
+}
+
+void ReplicationCoordinatorImpl::_cancelHeartbeats() {
+ std::for_each(_heartbeatHandles.begin(),
+ _heartbeatHandles.end(),
+ stdx::bind(&ReplicationExecutor::cancel, &_replExecutor, stdx::placeholders::_1));
+ // Heartbeat callbacks will remove themselves from _heartbeatHandles when they execute with
+ // CallbackCanceled status, so it's better to leave the handles in the list, for now.
+}
+
+void ReplicationCoordinatorImpl::_startHeartbeats() {
+ const Date_t now = _replExecutor.now();
+ _seedList.clear();
+ for (int i = 0; i < _rsConfig.getNumMembers(); ++i) {
+ if (i == _selfIndex) {
+ continue;
}
+ _scheduleHeartbeatToTarget(_rsConfig.getMemberAt(i).getHostAndPort(), i, now);
}
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_test.cpp
index 9008dbb9854..a2edb95ad6b 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_test.cpp
@@ -47,200 +47,208 @@ namespace mongo {
namespace repl {
namespace {
- class ReplCoordHBTest : public ReplCoordTest {
- protected:
- void assertMemberState(MemberState expected, std::string msg = "");
- ReplSetHeartbeatResponse receiveHeartbeatFrom(
- const ReplicaSetConfig& rsConfig,
- int sourceId,
- const HostAndPort& source);
- };
-
- void ReplCoordHBTest::assertMemberState(const MemberState expected, std::string msg) {
- const MemberState actual = getReplCoord()->getMemberState();
- ASSERT(expected == actual) << "Expected coordinator to report state " <<
- expected.toString() << " but found " << actual.toString() << " - " << msg;
+class ReplCoordHBTest : public ReplCoordTest {
+protected:
+ void assertMemberState(MemberState expected, std::string msg = "");
+ ReplSetHeartbeatResponse receiveHeartbeatFrom(const ReplicaSetConfig& rsConfig,
+ int sourceId,
+ const HostAndPort& source);
+};
+
+void ReplCoordHBTest::assertMemberState(const MemberState expected, std::string msg) {
+ const MemberState actual = getReplCoord()->getMemberState();
+ ASSERT(expected == actual) << "Expected coordinator to report state " << expected.toString()
+ << " but found " << actual.toString() << " - " << msg;
+}
+
+ReplSetHeartbeatResponse ReplCoordHBTest::receiveHeartbeatFrom(const ReplicaSetConfig& rsConfig,
+ int sourceId,
+ const HostAndPort& source) {
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(rsConfig.getConfigVersion());
+ hbArgs.setSetName(rsConfig.getReplSetName());
+ hbArgs.setSenderHost(source);
+ hbArgs.setSenderId(sourceId);
+ ASSERT(hbArgs.isInitialized());
+
+ ReplSetHeartbeatResponse response;
+ ASSERT_OK(getReplCoord()->processHeartbeat(hbArgs, &response));
+ return response;
+}
+
+TEST_F(ReplCoordHBTest, JoinExistingReplSet) {
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1"))));
+ init("mySet");
+ addSelf(HostAndPort("h2", 1));
+ const Date_t startDate = getNet()->now();
+ start();
+ enterNetwork();
+ assertMemberState(MemberState::RS_STARTUP);
+ NetworkInterfaceMock* net = getNet();
+ ASSERT_FALSE(net->hasReadyRequests());
+ exitNetwork();
+ receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
+
+ enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
+ ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ ASSERT_EQUALS("mySet", hbArgs.getSetName());
+ ASSERT_EQUALS(-2, hbArgs.getConfigVersion());
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_PRIMARY);
+ hbResp.noteReplSet();
+ hbResp.setVersion(rsConfig.getConfigVersion());
+ hbResp.setConfig(rsConfig);
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1;
+ hbResp.addToBSON(&responseBuilder);
+ net->scheduleResponse(noi, startDate + 200, makeResponseStatus(responseBuilder.obj()));
+ assertRunUntil(startDate + 200);
+
+ // Because the new config is stored using an out-of-band thread, we need to perform some
+ // extra synchronization to let the executor finish the heartbeat reconfig. We know that
+ // after the out-of-band thread completes, it schedules new heartbeats. We assume that no
+ // other network operations get scheduled during or before the reconfig, though this may
+ // cease to be true in the future.
+ noi = net->getNextReadyRequest();
+
+ assertMemberState(MemberState::RS_STARTUP2);
+ OperationContextNoop txn;
+ ReplicaSetConfig storedConfig;
+ ASSERT_OK(storedConfig.initialize(
+ unittest::assertGet(getExternalState()->loadLocalConfigDocument(&txn))));
+ ASSERT_OK(storedConfig.validate());
+ ASSERT_EQUALS(3, storedConfig.getConfigVersion());
+ ASSERT_EQUALS(3, storedConfig.getNumMembers());
+ exitNetwork();
+}
+
+TEST_F(ReplCoordHBTest, DoNotJoinReplSetIfNotAMember) {
+ // Tests that a node in RS_STARTUP will not transition to RS_REMOVED if it receives a
+ // configuration that does not contain it.
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1"))));
+ init("mySet");
+ addSelf(HostAndPort("h4", 1));
+ const Date_t startDate = getNet()->now();
+ start();
+ enterNetwork();
+ assertMemberState(MemberState::RS_STARTUP, "1");
+ NetworkInterfaceMock* net = getNet();
+ ASSERT_FALSE(net->hasReadyRequests());
+ exitNetwork();
+ receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
+
+ enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
+ ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ ASSERT_EQUALS("mySet", hbArgs.getSetName());
+ ASSERT_EQUALS(-2, hbArgs.getConfigVersion());
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_PRIMARY);
+ hbResp.noteReplSet();
+ hbResp.setVersion(rsConfig.getConfigVersion());
+ hbResp.setConfig(rsConfig);
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1;
+ hbResp.addToBSON(&responseBuilder);
+ net->scheduleResponse(noi, startDate + 200, makeResponseStatus(responseBuilder.obj()));
+ assertRunUntil(startDate + 2200);
+
+ // Because the new config is stored using an out-of-band thread, we need to perform some
+ // extra synchronization to let the executor finish the heartbeat reconfig. We know that
+ // after the out-of-band thread completes, it schedules new heartbeats. We assume that no
+ // other network operations get scheduled during or before the reconfig, though this may
+ // cease to be true in the future.
+ noi = net->getNextReadyRequest();
+
+ assertMemberState(MemberState::RS_STARTUP, "2");
+ OperationContextNoop txn;
+
+ StatusWith<BSONObj> loadedConfig(getExternalState()->loadLocalConfigDocument(&txn));
+ ASSERT_NOT_OK(loadedConfig.getStatus()) << loadedConfig.getValue();
+ exitNetwork();
+}
+
+TEST_F(ReplCoordHBTest, NotYetInitializedConfigStateEarlyReturn) {
+ // ensure that if we've yet to receive an initial config, we return NotYetInitialized
+ init("mySet");
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(3);
+ hbArgs.setSetName("mySet");
+ hbArgs.setSenderHost(HostAndPort("h1:1"));
+ hbArgs.setSenderId(1);
+ ASSERT(hbArgs.isInitialized());
+
+ ReplSetHeartbeatResponse response;
+ Status status = getReplCoord()->processHeartbeat(hbArgs, &response);
+ ASSERT_EQUALS(ErrorCodes::NotYetInitialized, status.code());
+}
+
+TEST_F(ReplCoordHBTest, OnlyUnauthorizedUpCausesRecovering) {
+ // Tests that a node that only has auth error heartbeats is recovering
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ // process heartbeat
+ enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ getNet()->scheduleResponse(
+ noi,
+ getNet()->now(),
+ makeResponseStatus(BSON("ok" << 0.0 << "errmsg"
+ << "unauth'd"
+ << "code" << ErrorCodes::Unauthorized)));
+
+ if (request.target != HostAndPort("node2", 12345) &&
+ request.cmdObj.firstElement().fieldNameStringData() != "replSetHeartbeat") {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ getNet()->blackHole(noi);
}
+ getNet()->runReadyNetworkOperations();
+ exitNetwork();
- ReplSetHeartbeatResponse ReplCoordHBTest::receiveHeartbeatFrom(
- const ReplicaSetConfig& rsConfig,
- int sourceId,
- const HostAndPort& source) {
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(rsConfig.getConfigVersion());
- hbArgs.setSetName(rsConfig.getReplSetName());
- hbArgs.setSenderHost(source);
- hbArgs.setSenderId(sourceId);
- ASSERT(hbArgs.isInitialized());
-
- ReplSetHeartbeatResponse response;
- ASSERT_OK(getReplCoord()->processHeartbeat(hbArgs, &response));
- return response;
- }
-
- TEST_F(ReplCoordHBTest, JoinExistingReplSet) {
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1"))));
- init("mySet");
- addSelf(HostAndPort("h2", 1));
- const Date_t startDate = getNet()->now();
- start();
- enterNetwork();
- assertMemberState(MemberState::RS_STARTUP);
- NetworkInterfaceMock* net = getNet();
- ASSERT_FALSE(net->hasReadyRequests());
- exitNetwork();
- receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
-
- enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
- ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- ASSERT_EQUALS("mySet", hbArgs.getSetName());
- ASSERT_EQUALS(-2, hbArgs.getConfigVersion());
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_PRIMARY);
- hbResp.noteReplSet();
- hbResp.setVersion(rsConfig.getConfigVersion());
- hbResp.setConfig(rsConfig);
- BSONObjBuilder responseBuilder;
- responseBuilder << "ok" << 1;
- hbResp.addToBSON(&responseBuilder);
- net->scheduleResponse(noi, startDate + 200, makeResponseStatus(responseBuilder.obj()));
- assertRunUntil(startDate + 200);
-
- // Because the new config is stored using an out-of-band thread, we need to perform some
- // extra synchronization to let the executor finish the heartbeat reconfig. We know that
- // after the out-of-band thread completes, it schedules new heartbeats. We assume that no
- // other network operations get scheduled during or before the reconfig, though this may
- // cease to be true in the future.
- noi = net->getNextReadyRequest();
-
- assertMemberState(MemberState::RS_STARTUP2);
- OperationContextNoop txn;
- ReplicaSetConfig storedConfig;
- ASSERT_OK(storedConfig.initialize(
- unittest::assertGet(getExternalState()->loadLocalConfigDocument(&txn))));
- ASSERT_OK(storedConfig.validate());
- ASSERT_EQUALS(3, storedConfig.getConfigVersion());
- ASSERT_EQUALS(3, storedConfig.getNumMembers());
- exitNetwork();
- }
-
- TEST_F(ReplCoordHBTest, DoNotJoinReplSetIfNotAMember) {
- // Tests that a node in RS_STARTUP will not transition to RS_REMOVED if it receives a
- // configuration that does not contain it.
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- ReplicaSetConfig rsConfig = assertMakeRSConfig(
- BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "h1:1") <<
- BSON("_id" << 2 << "host" << "h2:1") <<
- BSON("_id" << 3 << "host" << "h3:1"))));
- init("mySet");
- addSelf(HostAndPort("h4", 1));
- const Date_t startDate = getNet()->now();
- start();
- enterNetwork();
- assertMemberState(MemberState::RS_STARTUP, "1");
- NetworkInterfaceMock* net = getNet();
- ASSERT_FALSE(net->hasReadyRequests());
- exitNetwork();
- receiveHeartbeatFrom(rsConfig, 1, HostAndPort("h1", 1));
-
- enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
- ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- ASSERT_EQUALS("mySet", hbArgs.getSetName());
- ASSERT_EQUALS(-2, hbArgs.getConfigVersion());
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_PRIMARY);
- hbResp.noteReplSet();
- hbResp.setVersion(rsConfig.getConfigVersion());
- hbResp.setConfig(rsConfig);
- BSONObjBuilder responseBuilder;
- responseBuilder << "ok" << 1;
- hbResp.addToBSON(&responseBuilder);
- net->scheduleResponse(noi, startDate + 200, makeResponseStatus(responseBuilder.obj()));
- assertRunUntil(startDate + 2200);
-
- // Because the new config is stored using an out-of-band thread, we need to perform some
- // extra synchronization to let the executor finish the heartbeat reconfig. We know that
- // after the out-of-band thread completes, it schedules new heartbeats. We assume that no
- // other network operations get scheduled during or before the reconfig, though this may
- // cease to be true in the future.
- noi = net->getNextReadyRequest();
-
- assertMemberState(MemberState::RS_STARTUP, "2");
- OperationContextNoop txn;
-
- StatusWith<BSONObj> loadedConfig(getExternalState()->loadLocalConfigDocument(&txn));
- ASSERT_NOT_OK(loadedConfig.getStatus()) << loadedConfig.getValue();
- exitNetwork();
- }
-
- TEST_F(ReplCoordHBTest, NotYetInitializedConfigStateEarlyReturn) {
- // ensure that if we've yet to receive an initial config, we return NotYetInitialized
- init("mySet");
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(3);
- hbArgs.setSetName("mySet");
- hbArgs.setSenderHost(HostAndPort("h1:1"));
- hbArgs.setSenderId(1);
- ASSERT(hbArgs.isInitialized());
-
- ReplSetHeartbeatResponse response;
- Status status = getReplCoord()->processHeartbeat(hbArgs, &response);
- ASSERT_EQUALS(ErrorCodes::NotYetInitialized, status.code());
- }
-
- TEST_F(ReplCoordHBTest, OnlyUnauthorizedUpCausesRecovering) {
- // Tests that a node that only has auth error heartbeats is recovering
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345"))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
- // process heartbeat
- enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(
- BSON("ok" << 0.0 <<
- "errmsg" << "unauth'd" <<
- "code" << ErrorCodes::Unauthorized)));
-
- if (request.target != HostAndPort("node2", 12345)
- && request.cmdObj.firstElement().fieldNameStringData() != "replSetHeartbeat") {
- error() << "Black holing unexpected request to "
- << request.target << ": " << request.cmdObj;
- getNet()->blackHole(noi);
- }
- getNet()->runReadyNetworkOperations();
- exitNetwork();
-
- ASSERT_TRUE(getTopoCoord().getMemberState().recovering());
- assertMemberState(MemberState::RS_RECOVERING, "0");
- }
+ ASSERT_TRUE(getTopoCoord().getMemberState().recovering());
+ assertMemberState(MemberState::RS_RECOVERING, "0");
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
index 39ba0557b18..079f6e2227a 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
@@ -39,7 +39,7 @@
#include "mongo/db/repl/replication_coordinator_external_state_mock.h"
#include "mongo/db/repl/replication_coordinator_impl.h"
#include "mongo/db/repl/replication_coordinator_test_fixture.h"
-#include "mongo/db/repl/replication_coordinator.h" // ReplSetReconfigArgs
+#include "mongo/db/repl/replication_coordinator.h" // ReplSetReconfigArgs
#include "mongo/unittest/unittest.h"
#include "mongo/util/log.h"
@@ -47,391 +47,418 @@ namespace mongo {
namespace repl {
namespace {
- typedef ReplicationCoordinator::ReplSetReconfigArgs ReplSetReconfigArgs;
- typedef ReplicationExecutor::RemoteCommandRequest RemoteCommandRequest;
-
- TEST_F(ReplCoordTest, ReconfigBeforeInitialized) {
- // start up but do not initiate
- OperationContextNoop txn;
- init();
- start();
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
-
- ASSERT_EQUALS(ErrorCodes::NotYetInitialized,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
- }
-
- TEST_F(ReplCoordTest, ReconfigWhileNotPrimary) {
- // start up, become secondary, receive reconfig
- OperationContextNoop txn;
- init();
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
-
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- ASSERT_EQUALS(ErrorCodes::NotMaster,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
- }
-
- TEST_F(ReplCoordTest, ReconfigWithUninitializableConfig) {
- // start up, become primary, receive uninitializable config
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 2 <<
- "invalidlyNamedField" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345" <<
- "arbiterOnly" << true) <<
- BSON("_id" << 2 <<
- "host" << "node2:12345" <<
- "arbiterOnly" << true)));
- // ErrorCodes::BadValue should be propagated from ReplicaSetConfig::initialize()
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
- }
-
- TEST_F(ReplCoordTest, ReconfigWithWrongReplSetName) {
- // start up, become primary, receive config with incorrect replset name
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "notMySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345")));
-
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
- }
-
- TEST_F(ReplCoordTest, ReconfigValidateFails) {
- // start up, become primary, validate fails
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << -3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345")));
-
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
- }
-
- void doReplSetInitiate(ReplicationCoordinatorImpl* replCoord, Status* status) {
- OperationContextNoop txn;
- BSONObjBuilder garbage;
- *status = replCoord->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345"))),
- &garbage);
- }
-
- void doReplSetReconfig(ReplicationCoordinatorImpl* replCoord, Status* status) {
- OperationContextNoop txn;
- BSONObjBuilder garbage;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345" <<
- "priority" << 3)));
- *status = replCoord->processReplSetReconfig(&txn, args, &garbage);
- }
-
- TEST_F(ReplCoordTest, ReconfigQuorumCheckFails) {
- // start up, become primary, fail during quorum check due to a heartbeat
- // containing a higher config version
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- Status status(ErrorCodes::InternalError, "Not Set");
- boost::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
-
- NetworkInterfaceMock* net = getNet();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- repl::ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- repl::ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setVersion(5);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
- reconfigThread.join();
- ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
- }
-
- TEST_F(ReplCoordTest, ReconfigStoreLocalConfigDocumentFails) {
- // start up, become primary, saving the config fails
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- Status status(ErrorCodes::InternalError, "Not Set");
- getExternalState()->setStoreLocalConfigDocumentStatus(Status(ErrorCodes::OutOfDiskSpace,
- "The test set this"));
- boost::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
-
- NetworkInterfaceMock* net = getNet();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- repl::ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- repl::ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setVersion(2);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
- reconfigThread.join();
- ASSERT_EQUALS(ErrorCodes::OutOfDiskSpace, status);
- }
-
- TEST_F(ReplCoordTest, ReconfigWhileReconfiggingFails) {
- // start up, become primary, reconfig, then before that reconfig concludes, reconfig again
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- Status status(ErrorCodes::InternalError, "Not Set");
- // first reconfig
- boost::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
- getNet()->enterNetwork();
- getNet()->blackHole(getNet()->getNextReadyRequest());
- getNet()->exitNetwork();
-
- // second reconfig
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345")));
-
- ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
-
- shutdown();
- reconfigThread.join();
- }
-
- TEST_F(ReplCoordTest, ReconfigWhileInitializingFails) {
- // start up, initiate, then before that initiate concludes, reconfig
- OperationContextNoop txn;
- init();
- start(HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
-
- // initiate
- Status status(ErrorCodes::InternalError, "Not Set");
- boost::thread initateThread(stdx::bind(doReplSetInitiate, getReplCoord(), &status));
- getNet()->enterNetwork();
- getNet()->blackHole(getNet()->getNextReadyRequest());
- getNet()->exitNetwork();
-
- // reconfig
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345")));
-
- ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
- ASSERT_TRUE(result.obj().isEmpty());
-
- shutdown();
- initateThread.join();
- }
-
- TEST_F(ReplCoordTest, ReconfigSuccessful) {
- // start up, become primary, reconfig successfully
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345"))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- Status status(ErrorCodes::InternalError, "Not Set");
- boost::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
-
- NetworkInterfaceMock* net = getNet();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- repl::ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- repl::ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setVersion(2);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
- reconfigThread.join();
- ASSERT_OK(status);
- }
-
- TEST_F(ReplCoordTest, ReconfigDuringHBReconfigFails) {
- // start up, become primary, receive reconfig via heartbeat, then a second one
- // from reconfig
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100,0));
- simulateSuccessfulElection();
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
-
- // set hbreconfig to hang while in progress
- getExternalState()->setStoreLocalConfigDocumentToHang(true);
-
- // hb reconfig
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- ReplSetHeartbeatResponse hbResp2;
- ReplicaSetConfig config;
- config.initialize(BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345"))));
- hbResp2.setConfig(config);
- hbResp2.setVersion(3);
- hbResp2.setSetName("mySet");
- hbResp2.setState(MemberState::RS_SECONDARY);
- BSONObjBuilder respObj2;
- respObj2 << "ok" << 1;
- hbResp2.addToBSON(&respObj2);
- net->runUntil(net->now() + 10*1000); // run until we've sent a heartbeat request
- const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
- net->scheduleResponse(noi2, net->now(), makeResponseStatus(respObj2.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
-
- // reconfig
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = config.toBSON();
- ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
-
- getExternalState()->setStoreLocalConfigDocumentToHang(false);
- }
+typedef ReplicationCoordinator::ReplSetReconfigArgs ReplSetReconfigArgs;
+typedef ReplicationExecutor::RemoteCommandRequest RemoteCommandRequest;
+
+TEST_F(ReplCoordTest, ReconfigBeforeInitialized) {
+ // start up but do not initiate
+ OperationContextNoop txn;
+ init();
+ start();
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+
+ ASSERT_EQUALS(ErrorCodes::NotYetInitialized,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+TEST_F(ReplCoordTest, ReconfigWhileNotPrimary) {
+ // start up, become secondary, receive reconfig
+ OperationContextNoop txn;
+ init();
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ ASSERT_EQUALS(ErrorCodes::NotMaster,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+TEST_F(ReplCoordTest, ReconfigWithUninitializableConfig) {
+ // start up, become primary, receive uninitializable config
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 2 << "invalidlyNamedField" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"
+ << "arbiterOnly" << true)));
+ // ErrorCodes::BadValue should be propagated from ReplicaSetConfig::initialize()
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+TEST_F(ReplCoordTest, ReconfigWithWrongReplSetName) {
+ // start up, become primary, receive config with incorrect replset name
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "notMySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")));
+
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+TEST_F(ReplCoordTest, ReconfigValidateFails) {
+ // start up, become primary, validate fails
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << -3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")));
+
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+void doReplSetInitiate(ReplicationCoordinatorImpl* replCoord, Status* status) {
+ OperationContextNoop txn;
+ BSONObjBuilder garbage;
+ *status =
+ replCoord->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ &garbage);
+}
+
+void doReplSetReconfig(ReplicationCoordinatorImpl* replCoord, Status* status) {
+ OperationContextNoop txn;
+ BSONObjBuilder garbage;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"
+ << "priority" << 3)));
+ *status = replCoord->processReplSetReconfig(&txn, args, &garbage);
+}
+
+TEST_F(ReplCoordTest, ReconfigQuorumCheckFails) {
+ // start up, become primary, fail during quorum check due to a heartbeat
+ // containing a higher config version
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ Status status(ErrorCodes::InternalError, "Not Set");
+ boost::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
+
+ NetworkInterfaceMock* net = getNet();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ repl::ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ repl::ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setVersion(5);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+ reconfigThread.join();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+}
+
+TEST_F(ReplCoordTest, ReconfigStoreLocalConfigDocumentFails) {
+ // start up, become primary, saving the config fails
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ Status status(ErrorCodes::InternalError, "Not Set");
+ getExternalState()->setStoreLocalConfigDocumentStatus(
+ Status(ErrorCodes::OutOfDiskSpace, "The test set this"));
+ boost::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
+
+ NetworkInterfaceMock* net = getNet();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ repl::ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ repl::ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setVersion(2);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+ reconfigThread.join();
+ ASSERT_EQUALS(ErrorCodes::OutOfDiskSpace, status);
+}
+
+TEST_F(ReplCoordTest, ReconfigWhileReconfiggingFails) {
+ // start up, become primary, reconfig, then before that reconfig concludes, reconfig again
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ Status status(ErrorCodes::InternalError, "Not Set");
+ // first reconfig
+ boost::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
+ getNet()->enterNetwork();
+ getNet()->blackHole(getNet()->getNextReadyRequest());
+ getNet()->exitNetwork();
+
+ // second reconfig
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")));
+
+ ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+
+ shutdown();
+ reconfigThread.join();
+}
+
+TEST_F(ReplCoordTest, ReconfigWhileInitializingFails) {
+ // start up, initiate, then before that initiate concludes, reconfig
+ OperationContextNoop txn;
+ init();
+ start(HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+
+ // initiate
+ Status status(ErrorCodes::InternalError, "Not Set");
+ boost::thread initateThread(stdx::bind(doReplSetInitiate, getReplCoord(), &status));
+ getNet()->enterNetwork();
+ getNet()->blackHole(getNet()->getNextReadyRequest());
+ getNet()->exitNetwork();
+
+ // reconfig
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")));
+
+ ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+
+ shutdown();
+ initateThread.join();
+}
+
+TEST_F(ReplCoordTest, ReconfigSuccessful) {
+ // start up, become primary, reconfig successfully
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ Status status(ErrorCodes::InternalError, "Not Set");
+ boost::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
+
+ NetworkInterfaceMock* net = getNet();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ repl::ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ repl::ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setVersion(2);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+ reconfigThread.join();
+ ASSERT_OK(status);
+}
+
+TEST_F(ReplCoordTest, ReconfigDuringHBReconfigFails) {
+ // start up, become primary, receive reconfig via heartbeat, then a second one
+ // from reconfig
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
+ // set hbreconfig to hang while in progress
+ getExternalState()->setStoreLocalConfigDocumentToHang(true);
+
+ // hb reconfig
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ ReplSetHeartbeatResponse hbResp2;
+ ReplicaSetConfig config;
+ config.initialize(BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))));
+ hbResp2.setConfig(config);
+ hbResp2.setVersion(3);
+ hbResp2.setSetName("mySet");
+ hbResp2.setState(MemberState::RS_SECONDARY);
+ BSONObjBuilder respObj2;
+ respObj2 << "ok" << 1;
+ hbResp2.addToBSON(&respObj2);
+ net->runUntil(net->now() + 10 * 1000); // run until we've sent a heartbeat request
+ const NetworkInterfaceMock::NetworkOperationIterator noi2 = net->getNextReadyRequest();
+ net->scheduleResponse(noi2, net->now(), makeResponseStatus(respObj2.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+
+ // reconfig
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = config.toBSON();
+ ASSERT_EQUALS(ErrorCodes::ConfigurationInProgress,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+
+ getExternalState()->setStoreLocalConfigDocumentToHang(false);
+}
// TEST_F(ReplCoordTest, HBReconfigDuringReconfigFails) {
// // start up, become primary, reconfig, while reconfigging receive reconfig via heartbeat
@@ -446,7 +473,7 @@ namespace {
// getReplCoord()->setMyLastOptime(OpTime(100,0));
// simulateSuccessfulElection();
// ASSERT_TRUE(getReplCoord()->getCurrentMemberState().primary());
-//
+//
// // schedule hb reconfig
// NetworkInterfaceMock* net = getNet();
// net->enterNetwork();
@@ -468,7 +495,7 @@ namespace {
// respObj2 << "ok" << 1;
// hbResp.addToBSON(&respObj2);
// net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj2.obj()));
-//
+//
// // start reconfig thread
// Status status2(ErrorCodes::InternalError, "Not Set");
// boost::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status2));
@@ -499,41 +526,44 @@ namespace {
// logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
// }
- TEST_F(ReplCoordTest, ForceReconfigWhileNotPrimarySuccessful) {
- // start up, become a secondary, receive a forced reconfig
- OperationContextNoop txn;
- init();
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:12345") )),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
-
- // fail before forced
- BSONObjBuilder result;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345") <<
- BSON("_id" << 2 <<
- "host" << "node2:12345")));
- ASSERT_EQUALS(ErrorCodes::NotMaster,
- getReplCoord()->processReplSetReconfig(&txn, args, &result));
-
- // forced should succeed
- args.force = true;
- ASSERT_OK(getReplCoord()->processReplSetReconfig(&txn, args, &result));
- getReplCoord()->processReplSetGetConfig(&result);
-
- // ensure forced reconfig results in a random larger version
- ASSERT_GREATER_THAN(result.obj()["config"].Obj()["version"].numberInt(), 3);
- }
-
-} // anonymous namespace
-} // namespace repl
-} // namespace mongo
+TEST_F(ReplCoordTest, ForceReconfigWhileNotPrimarySuccessful) {
+ // start up, become a secondary, receive a forced reconfig
+ OperationContextNoop txn;
+ init();
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+
+ // fail before forced
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")));
+ ASSERT_EQUALS(ErrorCodes::NotMaster,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+
+ // forced should succeed
+ args.force = true;
+ ASSERT_OK(getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ getReplCoord()->processReplSetGetConfig(&result);
+
+ // ensure forced reconfig results in a random larger version
+ ASSERT_GREATER_THAN(result.obj()["config"].Obj()["version"].numberInt(), 3);
+}
+
+} // anonymous namespace
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index 205a2a9ff2a..cc256ca6fe3 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -45,7 +45,7 @@
#include "mongo/db/repl/repl_set_heartbeat_args.h"
#include "mongo/db/repl/repl_settings.h"
#include "mongo/db/repl/replica_set_config.h"
-#include "mongo/db/repl/replication_coordinator.h" // ReplSetReconfigArgs
+#include "mongo/db/repl/replication_coordinator.h" // ReplSetReconfigArgs
#include "mongo/db/repl/replication_coordinator_external_state_mock.h"
#include "mongo/db/repl/replication_coordinator_impl.h"
#include "mongo/db/repl/replication_coordinator_test_fixture.h"
@@ -62,2148 +62,2238 @@ namespace mongo {
namespace repl {
namespace {
- typedef ReplicationCoordinator::ReplSetReconfigArgs ReplSetReconfigArgs;
+typedef ReplicationCoordinator::ReplSetReconfigArgs ReplSetReconfigArgs;
+
+TEST_F(ReplCoordTest, StartupWithValidLocalConfig) {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345"))),
+ HostAndPort("node1", 12345));
+}
+
+TEST_F(ReplCoordTest, StartupWithConfigMissingSelf) {
+ startCapturingLogMessages();
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:54321"))),
+ HostAndPort("node3", 12345));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("NodeNotFound"));
+}
+
+TEST_F(ReplCoordTest, StartupWithLocalConfigSetNameMismatch) {
+ init("mySet");
+ startCapturingLogMessages();
+ assertStartSuccess(BSON("_id"
+ << "notMySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345"))),
+ HostAndPort("node1", 12345));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("reports set name of notMySet,"));
+}
+
+TEST_F(ReplCoordTest, StartupWithNoLocalConfig) {
+ startCapturingLogMessages();
+ start();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("Did not find local "));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, InitiateFailsWithEmptyConfig) {
+ OperationContextNoop txn;
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ BSONObjBuilder result;
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetInitiate(&txn, BSONObj(), &result));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, InitiateSucceedsWithOneNodeConfig) {
+ OperationContextNoop txn;
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ // Starting uninitialized, show that we can perform the initiate behavior.
+ BSONObjBuilder result1;
+ ASSERT_OK(
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"))),
+ &result1));
+ ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
+
+ // Show that initiate fails after it has already succeeded.
+ BSONObjBuilder result2;
+ ASSERT_EQUALS(
+ ErrorCodes::AlreadyInitialized,
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"))),
+ &result2));
+
+ // Still in repl set mode, even after failed reinitiate.
+ ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
+}
+
+TEST_F(ReplCoordTest, InitiateSucceedsAfterFailing) {
+ OperationContextNoop txn;
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ BSONObjBuilder result;
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetInitiate(&txn, BSONObj(), &result));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ // Having failed to initiate once, show that we can now initiate.
+ BSONObjBuilder result1;
+ ASSERT_OK(
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"))),
+ &result1));
+ ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
+}
+
+TEST_F(ReplCoordTest, InitiateFailsIfAlreadyInitialized) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345"))),
+ HostAndPort("node1", 12345));
+ BSONObjBuilder result;
+ ASSERT_EQUALS(
+ ErrorCodes::AlreadyInitialized,
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345"))),
+ &result));
+}
+
+TEST_F(ReplCoordTest, InitiateFailsIfSelfMissing) {
+ OperationContextNoop txn;
+ BSONObjBuilder result;
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(
+ ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node4"))),
+ &result));
+}
+
+void doReplSetInitiate(ReplicationCoordinatorImpl* replCoord, Status* status) {
+ OperationContextNoop txn;
+ BSONObjBuilder garbage;
+ *status =
+ replCoord->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345")
+ << BSON("_id" << 1 << "host"
+ << "node2:54321"))),
+ &garbage);
+}
+
+TEST_F(ReplCoordTest, InitiateFailsIfQuorumNotMet) {
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setSetName("mySet");
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(1);
+ hbArgs.setCheckEmpty(true);
+ hbArgs.setSenderHost(HostAndPort("node1", 12345));
+ hbArgs.setSenderId(0);
+
+ Status status(ErrorCodes::InternalError, "Not set");
+ boost::thread prsiThread(stdx::bind(doReplSetInitiate, getReplCoord(), &status));
+ const Date_t startDate = getNet()->now();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
+ ASSERT_EQUALS(HostAndPort("node2", 54321), noi->getRequest().target);
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(hbArgs.toBSON(), noi->getRequest().cmdObj);
+ getNet()->scheduleResponse(
+ noi, startDate + 10, ResponseStatus(ErrorCodes::NoSuchKey, "No response"));
+ getNet()->runUntil(startDate + 10);
+ getNet()->exitNetwork();
+ ASSERT_EQUALS(startDate + 10, getNet()->now());
+ prsiThread.join();
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, InitiatePassesIfQuorumMet) {
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setSetName("mySet");
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(1);
+ hbArgs.setCheckEmpty(true);
+ hbArgs.setSenderHost(HostAndPort("node1", 12345));
+ hbArgs.setSenderId(0);
+
+ Status status(ErrorCodes::InternalError, "Not set");
+ boost::thread prsiThread(stdx::bind(doReplSetInitiate, getReplCoord(), &status));
+ const Date_t startDate = getNet()->now();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
+ ASSERT_EQUALS(HostAndPort("node2", 54321), noi->getRequest().target);
+ ASSERT_EQUALS("admin", noi->getRequest().dbname);
+ ASSERT_EQUALS(hbArgs.toBSON(), noi->getRequest().cmdObj);
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setVersion(0);
+ getNet()->scheduleResponse(noi,
+ startDate + 10,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ hbResp.toBSON(), Milliseconds(8))));
+ getNet()->runUntil(startDate + 10);
+ getNet()->exitNetwork();
+ ASSERT_EQUALS(startDate + 10, getNet()->now());
+ prsiThread.join();
+ ASSERT_OK(status);
+ ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
+}
+
+TEST_F(ReplCoordTest, InitiateFailsWithSetNameMismatch) {
+ OperationContextNoop txn;
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ BSONObjBuilder result1;
+ ASSERT_EQUALS(
+ ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "wrongSet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"))),
+ &result1));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, InitiateFailsWithoutReplSetFlag) {
+ OperationContextNoop txn;
+ init("");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ BSONObjBuilder result1;
+ ASSERT_EQUALS(
+ ErrorCodes::NoReplicationEnabled,
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"))),
+ &result1));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, InitiateFailsWhileStoringLocalConfigDocument) {
+ OperationContextNoop txn;
+ init("mySet");
+ start(HostAndPort("node1", 12345));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+
+ BSONObjBuilder result1;
+ getExternalState()->setStoreLocalConfigDocumentStatus(
+ Status(ErrorCodes::OutOfDiskSpace, "The test set this"));
+ ASSERT_EQUALS(
+ ErrorCodes::OutOfDiskSpace,
+ getReplCoord()->processReplSetInitiate(&txn,
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"))),
+ &result1));
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, CheckReplEnabledForCommandNotRepl) {
+ // pass in settings to avoid having a replSet
+ ReplSettings settings;
+ init(settings);
+ start();
+
+ // check status NoReplicationEnabled and empty result
+ BSONObjBuilder result;
+ Status status = getReplCoord()->checkReplEnabledForCommand(&result);
+ ASSERT_EQUALS(status, ErrorCodes::NoReplicationEnabled);
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+TEST_F(ReplCoordTest, checkReplEnabledForCommandConfigSvr) {
+ ReplSettings settings;
+ serverGlobalParams.configsvr = true;
+ init(settings);
+ start();
+
+ // check status NoReplicationEnabled and result mentions configsrv
+ BSONObjBuilder result;
+ Status status = getReplCoord()->checkReplEnabledForCommand(&result);
+ ASSERT_EQUALS(status, ErrorCodes::NoReplicationEnabled);
+ ASSERT_EQUALS(result.obj()["info"].String(), "configsvr");
+ serverGlobalParams.configsvr = false;
+}
+
+TEST_F(ReplCoordTest, checkReplEnabledForCommandNoConfig) {
+ start();
+
+ // check status NotYetInitialized and result mentions rs.initiate
+ BSONObjBuilder result;
+ Status status = getReplCoord()->checkReplEnabledForCommand(&result);
+ ASSERT_EQUALS(status, ErrorCodes::NotYetInitialized);
+ ASSERT_TRUE(result.obj()["info"].String().find("rs.initiate") != std::string::npos);
+}
+
+TEST_F(ReplCoordTest, checkReplEnabledForCommandWorking) {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members" << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0))),
+ HostAndPort("node1", 12345));
+
+ // check status OK and result is empty
+ BSONObjBuilder result;
+ Status status = getReplCoord()->checkReplEnabledForCommand(&result);
+ ASSERT_EQUALS(status, Status::OK());
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
+TEST_F(ReplCoordTest, BasicRBIDUsage) {
+ start();
+ BSONObjBuilder result;
+ getReplCoord()->processReplSetGetRBID(&result);
+ long long initialValue = result.obj()["rbid"].Int();
+ getReplCoord()->incrementRollbackID();
+
+ BSONObjBuilder result2;
+ getReplCoord()->processReplSetGetRBID(&result2);
+ long long incrementedValue = result2.obj()["rbid"].Int();
+ ASSERT_EQUALS(incrementedValue, initialValue + 1);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationNoReplEnabled) {
+ init("");
+ OperationContextNoop txn;
+ OpTime time(100, 1);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 2;
+
+ // Because we didn't set ReplSettings.replSet, it will think we're a standalone so
+ // awaitReplication will always work.
+ ReplicationCoordinator::StatusAndDuration statusAndDur =
+ getReplCoord()->awaitReplication(&txn, time, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationMasterSlaveMajorityBaseCase) {
+ ReplSettings settings;
+ settings.master = true;
+ init(settings);
+ OperationContextNoop txn;
+ OpTime time(100, 1);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 2;
+
+
+ writeConcern.wNumNodes = 0;
+ writeConcern.wMode = "majority";
+ // w:majority always works on master/slave
+ ReplicationCoordinator::StatusAndDuration statusAndDur =
+ getReplCoord()->awaitReplication(&txn, time, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationReplSetBaseCases) {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+
+ OperationContextNoop txn;
+ OpTime time(100, 1);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 0; // Waiting for 0 nodes always works
+ writeConcern.wMode = "";
+
+ // Should fail when not primary
+ ReplicationCoordinator::StatusAndDuration statusAndDur =
+ getReplCoord()->awaitReplication(&txn, time, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::NotMaster, statusAndDur.status);
+
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationNumberOfNodesNonBlocking) {
+ OperationContextNoop txn;
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2) << BSON("host"
+ << "node4:12345"
+ << "_id" << 3))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ OID client1 = OID::gen();
+ OID client2 = OID::gen();
+ OID client3 = OID::gen();
+ OpTime time1(100, 1);
+ OpTime time2(100, 2);
+
+ HandshakeArgs handshake1;
+ ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
+ HandshakeArgs handshake2;
+ ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
+ HandshakeArgs handshake3;
+ ASSERT_OK(handshake3.initialize(BSON("handshake" << client3 << "member" << 3)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake3));
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 1;
+
+ // 1 node waiting for time 1
+ ReplicationCoordinator::StatusAndDuration statusAndDur =
+ getReplCoord()->awaitReplication(&txn, time1, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
+ getReplCoord()->setMyLastOptime(time1);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+
+ // 2 nodes waiting for time1
+ writeConcern.wNumNodes = 2;
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time1));
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+
+ // 2 nodes waiting for time2
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
+ getReplCoord()->setMyLastOptime(time2);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client3, time2));
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+
+ // 3 nodes waiting for time2
+ writeConcern.wNumNodes = 3;
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time2));
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationNamedModesNonBlocking) {
+ OperationContextNoop txn;
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node0"
+ << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rackNA1"))
+ << BSON("_id" << 1 << "host"
+ << "node1"
+ << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rackNA2"))
+ << BSON("_id" << 2 << "host"
+ << "node2"
+ << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rackNA3"))
+ << BSON("_id" << 3 << "host"
+ << "node3"
+ << "tags" << BSON("dc"
+ << "EU"
+ << "rack"
+ << "rackEU1"))
+ << BSON("_id" << 4 << "host"
+ << "node4"
+ << "tags" << BSON("dc"
+ << "EU"
+ << "rack"
+ << "rackEU2"))) << "settings"
+ << BSON("getLastErrorModes" << BSON("multiDC" << BSON("dc" << 2) << "multiDCAndRack"
+ << BSON("dc" << 2 << "rack" << 3)))),
+ HostAndPort("node0"));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ OID clientRID1 = OID::gen();
+ OID clientRID2 = OID::gen();
+ OID clientRID3 = OID::gen();
+ OID clientRID4 = OID::gen();
+ OpTime time1(100, 1);
+ OpTime time2(100, 2);
+
+ HandshakeArgs handshake1;
+ ASSERT_OK(handshake1.initialize(BSON("handshake" << clientRID1 << "member" << 1)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
+ HandshakeArgs handshake2;
+ ASSERT_OK(handshake2.initialize(BSON("handshake" << clientRID2 << "member" << 2)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
+ HandshakeArgs handshake3;
+ ASSERT_OK(handshake3.initialize(BSON("handshake" << clientRID3 << "member" << 3)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake3));
+ HandshakeArgs handshake4;
+ ASSERT_OK(handshake4.initialize(BSON("handshake" << clientRID4 << "member" << 4)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake4));
+
+ // Test invalid write concern
+ WriteConcernOptions invalidWriteConcern;
+ invalidWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ invalidWriteConcern.wMode = "fakemode";
+
+ ReplicationCoordinator::StatusAndDuration statusAndDur =
+ getReplCoord()->awaitReplication(&txn, time1, invalidWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::UnknownReplWriteConcern, statusAndDur.status);
+
+
+ // Set up valid write concerns for the rest of the test
+ WriteConcernOptions majorityWriteConcern;
+ majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ majorityWriteConcern.wMode = "majority";
+
+ WriteConcernOptions multiDCWriteConcern;
+ multiDCWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ multiDCWriteConcern.wMode = "multiDC";
+
+ WriteConcernOptions multiRackWriteConcern;
+ multiRackWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ multiRackWriteConcern.wMode = "multiDCAndRack";
+
+
+ // Nothing satisfied
+ getReplCoord()->setMyLastOptime(time1);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiDCWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiRackWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
+
+ // Majority satisfied but not either custom mode
+ getReplCoord()->setLastOptime_forTest(clientRID1, time1);
+ getReplCoord()->setLastOptime_forTest(clientRID2, time1);
+
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
+ ASSERT_OK(statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiDCWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiRackWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
+
+ // All modes satisfied
+ getReplCoord()->setLastOptime_forTest(clientRID3, time1);
+
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
+ ASSERT_OK(statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiDCWriteConcern);
+ ASSERT_OK(statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiRackWriteConcern);
+ ASSERT_OK(statusAndDur.status);
+
+ // multiDC satisfied but not majority or multiRack
+ getReplCoord()->setMyLastOptime(time2);
+ getReplCoord()->setLastOptime_forTest(clientRID3, time2);
+
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, majorityWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, multiDCWriteConcern);
+ ASSERT_OK(statusAndDur.status);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, multiRackWriteConcern);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
+}
- TEST_F(ReplCoordTest, StartupWithValidLocalConfig) {
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345"))),
- HostAndPort("node1", 12345));
- }
-
- TEST_F(ReplCoordTest, StartupWithConfigMissingSelf) {
- startCapturingLogMessages();
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node2:54321"))),
- HostAndPort("node3", 12345));
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("NodeNotFound"));
- }
-
- TEST_F(ReplCoordTest, StartupWithLocalConfigSetNameMismatch) {
- init("mySet");
- startCapturingLogMessages();
- assertStartSuccess(
- BSON("_id" << "notMySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345"))),
- HostAndPort("node1", 12345));
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("reports set name of notMySet,"));
- }
-
- TEST_F(ReplCoordTest, StartupWithNoLocalConfig) {
- startCapturingLogMessages();
- start();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("Did not find local "));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- }
-
- TEST_F(ReplCoordTest, InitiateFailsWithEmptyConfig) {
- OperationContextNoop txn;
- init("mySet");
- start(HostAndPort("node1", 12345));
- BSONObjBuilder result;
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetInitiate(&txn, BSONObj(), &result));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- }
-
- TEST_F(ReplCoordTest, InitiateSucceedsWithOneNodeConfig) {
- OperationContextNoop txn;
- init("mySet");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- // Starting uninitialized, show that we can perform the initiate behavior.
- BSONObjBuilder result1;
- ASSERT_OK(getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345"))),
- &result1));
- ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
-
- // Show that initiate fails after it has already succeeded.
- BSONObjBuilder result2;
- ASSERT_EQUALS(ErrorCodes::AlreadyInitialized,
- getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345"))),
- &result2));
-
- // Still in repl set mode, even after failed reinitiate.
- ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
- }
-
- TEST_F(ReplCoordTest, InitiateSucceedsAfterFailing) {
- OperationContextNoop txn;
- init("mySet");
- start(HostAndPort("node1", 12345));
- BSONObjBuilder result;
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetInitiate(&txn, BSONObj(), &result));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- // Having failed to initiate once, show that we can now initiate.
- BSONObjBuilder result1;
- ASSERT_OK(getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345"))),
- &result1));
- ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
- }
-
- TEST_F(ReplCoordTest, InitiateFailsIfAlreadyInitialized) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 << "host" << "node1:12345"))),
- HostAndPort("node1", 12345));
- BSONObjBuilder result;
- ASSERT_EQUALS(ErrorCodes::AlreadyInitialized,
- getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 1 <<
- "host" << "node1:12345"))),
- &result));
- }
-
- TEST_F(ReplCoordTest, InitiateFailsIfSelfMissing) {
- OperationContextNoop txn;
- BSONObjBuilder result;
- init("mySet");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node4"))),
- &result));
- }
-
- void doReplSetInitiate(ReplicationCoordinatorImpl* replCoord, Status* status) {
- OperationContextNoop txn;
- BSONObjBuilder garbage;
- *status = replCoord->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345") <<
- BSON("_id" << 1 << "host" << "node2:54321"))),
- &garbage);
- }
-
- TEST_F(ReplCoordTest, InitiateFailsIfQuorumNotMet) {
- init("mySet");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setSetName("mySet");
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(1);
- hbArgs.setCheckEmpty(true);
- hbArgs.setSenderHost(HostAndPort("node1", 12345));
- hbArgs.setSenderId(0);
-
- Status status(ErrorCodes::InternalError, "Not set");
- boost::thread prsiThread(stdx::bind(doReplSetInitiate, getReplCoord(), &status));
- const Date_t startDate = getNet()->now();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- ASSERT_EQUALS(HostAndPort("node2", 54321), noi->getRequest().target);
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(hbArgs.toBSON(), noi->getRequest().cmdObj);
- getNet()->scheduleResponse(noi, startDate + 10, ResponseStatus(ErrorCodes::NoSuchKey,
- "No response"));
- getNet()->runUntil(startDate + 10);
- getNet()->exitNetwork();
- ASSERT_EQUALS(startDate + 10, getNet()->now());
- prsiThread.join();
- ASSERT_EQUALS(ErrorCodes::NodeNotFound, status);
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- }
-
- TEST_F(ReplCoordTest, InitiatePassesIfQuorumMet) {
- init("mySet");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setSetName("mySet");
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(1);
- hbArgs.setCheckEmpty(true);
- hbArgs.setSenderHost(HostAndPort("node1", 12345));
- hbArgs.setSenderId(0);
-
- Status status(ErrorCodes::InternalError, "Not set");
- boost::thread prsiThread(stdx::bind(doReplSetInitiate, getReplCoord(), &status));
- const Date_t startDate = getNet()->now();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- ASSERT_EQUALS(HostAndPort("node2", 54321), noi->getRequest().target);
- ASSERT_EQUALS("admin", noi->getRequest().dbname);
- ASSERT_EQUALS(hbArgs.toBSON(), noi->getRequest().cmdObj);
- ReplSetHeartbeatResponse hbResp;
- hbResp.setVersion(0);
- getNet()->scheduleResponse(
- noi,
- startDate + 10,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(hbResp.toBSON(),
- Milliseconds(8))));
- getNet()->runUntil(startDate + 10);
- getNet()->exitNetwork();
- ASSERT_EQUALS(startDate + 10, getNet()->now());
- prsiThread.join();
- ASSERT_OK(status);
- ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
+/**
+ * Used to wait for replication in a separate thread without blocking execution of the test.
+ * To use, set the optime and write concern to be passed to awaitReplication and then call
+ * start(), which will spawn a thread that calls awaitReplication. No calls may be made
+ * on the ReplicationAwaiter instance between calling start and getResult(). After returning
+ * from getResult(), you can call reset() to allow the awaiter to be reused for another
+ * awaitReplication call.
+ */
+class ReplicationAwaiter {
+public:
+ ReplicationAwaiter(ReplicationCoordinatorImpl* replCoord, OperationContext* txn)
+ : _replCoord(replCoord),
+ _finished(false),
+ _result(ReplicationCoordinator::StatusAndDuration(
+ Status::OK(), ReplicationCoordinator::Milliseconds(0))) {}
+
+ void setOpTime(const OpTime& ot) {
+ _optime = ot;
}
- TEST_F(ReplCoordTest, InitiateFailsWithSetNameMismatch) {
- OperationContextNoop txn;
- init("mySet");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- BSONObjBuilder result1;
- ASSERT_EQUALS(
- ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "wrongSet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345"))),
- &result1));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+ void setWriteConcern(const WriteConcernOptions& wc) {
+ _writeConcern = wc;
}
- TEST_F(ReplCoordTest, InitiateFailsWithoutReplSetFlag) {
- OperationContextNoop txn;
- init("");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- BSONObjBuilder result1;
- ASSERT_EQUALS(
- ErrorCodes::NoReplicationEnabled,
- getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345"))),
- &result1));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+ // may block
+ ReplicationCoordinator::StatusAndDuration getResult() {
+ _thread->join();
+ ASSERT(_finished);
+ return _result;
}
- TEST_F(ReplCoordTest, InitiateFailsWhileStoringLocalConfigDocument) {
- OperationContextNoop txn;
- init("mySet");
- start(HostAndPort("node1", 12345));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
-
- BSONObjBuilder result1;
- getExternalState()->setStoreLocalConfigDocumentStatus(Status(ErrorCodes::OutOfDiskSpace,
- "The test set this"));
- ASSERT_EQUALS(
- ErrorCodes::OutOfDiskSpace,
- getReplCoord()->processReplSetInitiate(
- &txn,
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345"))),
- &result1));
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+ void start(OperationContext* txn) {
+ ASSERT(!_finished);
+ _thread.reset(
+ new boost::thread(stdx::bind(&ReplicationAwaiter::_awaitReplication, this, txn)));
}
- TEST_F(ReplCoordTest, CheckReplEnabledForCommandNotRepl) {
- // pass in settings to avoid having a replSet
- ReplSettings settings;
- init(settings);
- start();
-
- // check status NoReplicationEnabled and empty result
- BSONObjBuilder result;
- Status status = getReplCoord()->checkReplEnabledForCommand(&result);
- ASSERT_EQUALS(status, ErrorCodes::NoReplicationEnabled);
- ASSERT_TRUE(result.obj().isEmpty());
+ void reset() {
+ ASSERT(_finished);
+ _finished = false;
+ _result = ReplicationCoordinator::StatusAndDuration(
+ Status::OK(), ReplicationCoordinator::Milliseconds(0));
}
- TEST_F(ReplCoordTest, checkReplEnabledForCommandConfigSvr) {
- ReplSettings settings;
- serverGlobalParams.configsvr = true;
- init(settings);
- start();
-
- // check status NoReplicationEnabled and result mentions configsrv
- BSONObjBuilder result;
- Status status = getReplCoord()->checkReplEnabledForCommand(&result);
- ASSERT_EQUALS(status, ErrorCodes::NoReplicationEnabled);
- ASSERT_EQUALS(result.obj()["info"].String(), "configsvr");
- serverGlobalParams.configsvr = false;
+private:
+ void _awaitReplication(OperationContext* txn) {
+ _result = _replCoord->awaitReplication(txn, _optime, _writeConcern);
+ _finished = true;
}
- TEST_F(ReplCoordTest, checkReplEnabledForCommandNoConfig) {
- start();
-
- // check status NotYetInitialized and result mentions rs.initiate
- BSONObjBuilder result;
- Status status = getReplCoord()->checkReplEnabledForCommand(&result);
- ASSERT_EQUALS(status, ErrorCodes::NotYetInitialized);
- ASSERT_TRUE(result.obj()["info"].String().find("rs.initiate") != std::string::npos);
+ ReplicationCoordinatorImpl* _replCoord;
+ bool _finished;
+ OpTime _optime;
+ WriteConcernOptions _writeConcern;
+ ReplicationCoordinator::StatusAndDuration _result;
+ boost::scoped_ptr<boost::thread> _thread;
+};
+
+TEST_F(ReplCoordTest, AwaitReplicationNumberOfNodesBlocking) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+
+ OID client1 = OID::gen();
+ OID client2 = OID::gen();
+ OpTime time1(100, 1);
+ OpTime time2(100, 2);
+
+ HandshakeArgs handshake1;
+ ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
+ HandshakeArgs handshake2;
+ ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wNumNodes = 2;
+
+ // 2 nodes waiting for time1
+ awaiter.setOpTime(time1);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+ getReplCoord()->setMyLastOptime(time1);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time1));
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_OK(statusAndDur.status);
+ awaiter.reset();
+
+ // 2 nodes waiting for time2
+ awaiter.setOpTime(time2);
+ awaiter.start(&txn);
+ getReplCoord()->setMyLastOptime(time2);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time2));
+ statusAndDur = awaiter.getResult();
+ ASSERT_OK(statusAndDur.status);
+ awaiter.reset();
+
+ // 3 nodes waiting for time2
+ writeConcern.wNumNodes = 3;
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time2));
+ statusAndDur = awaiter.getResult();
+ ASSERT_OK(statusAndDur.status);
+ awaiter.reset();
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationTimeout) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+
+ OID client = OID::gen();
+ OpTime time1(100, 1);
+ OpTime time2(100, 2);
+
+ HandshakeArgs handshake;
+ ASSERT_OK(handshake.initialize(BSON("handshake" << client << "member" << 1)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = 50;
+ writeConcern.wNumNodes = 2;
+
+ // 2 nodes waiting for time2
+ awaiter.setOpTime(time2);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+ getReplCoord()->setMyLastOptime(time2);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client, time1));
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
+ awaiter.reset();
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationShutdown) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+
+ OID client1 = OID::gen();
+ OID client2 = OID::gen();
+ OpTime time1(100, 1);
+ OpTime time2(100, 2);
+
+ HandshakeArgs handshake1;
+ ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
+ HandshakeArgs handshake2;
+ ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wNumNodes = 2;
+
+ // 2 nodes waiting for time2
+ awaiter.setOpTime(time2);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time1));
+ shutdown();
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, statusAndDur.status);
+ awaiter.reset();
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationStepDown) {
+ // Test that a thread blocked in awaitReplication will be woken up and return NotMaster
+ // if the node steps down while it is waiting.
+ OperationContextReplMock txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+
+ OID client1 = OID::gen();
+ OID client2 = OID::gen();
+ OpTime time1(100, 1);
+ OpTime time2(100, 2);
+
+ HandshakeArgs handshake1;
+ ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
+ HandshakeArgs handshake2;
+ ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wNumNodes = 2;
+
+ // 2 nodes waiting for time2
+ awaiter.setOpTime(time2);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time1));
+ getReplCoord()->stepDown(&txn, true, Milliseconds(0), Milliseconds(1000));
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_EQUALS(ErrorCodes::NotMaster, statusAndDur.status);
+ awaiter.reset();
+}
+
+class OperationContextNoopWithInterrupt : public OperationContextReplMock {
+public:
+ OperationContextNoopWithInterrupt() : _opID(0), _interruptOp(false) {}
+
+ virtual unsigned int getOpID() const {
+ return _opID;
}
- TEST_F(ReplCoordTest, checkReplEnabledForCommandWorking) {
- assertStartSuccess(BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" <<
- "_id" << 0 ))),
- HostAndPort("node1", 12345));
-
- // check status OK and result is empty
- BSONObjBuilder result;
- Status status = getReplCoord()->checkReplEnabledForCommand(&result);
- ASSERT_EQUALS(status, Status::OK());
- ASSERT_TRUE(result.obj().isEmpty());
+ /**
+ * Can only be called before any multi-threaded access to this object has begun.
+ */
+ void setOpID(unsigned int opID) {
+ _opID = opID;
}
- TEST_F(ReplCoordTest, BasicRBIDUsage) {
- start();
- BSONObjBuilder result;
- getReplCoord()->processReplSetGetRBID(&result);
- long long initialValue = result.obj()["rbid"].Int();
- getReplCoord()->incrementRollbackID();
-
- BSONObjBuilder result2;
- getReplCoord()->processReplSetGetRBID(&result2);
- long long incrementedValue = result2.obj()["rbid"].Int();
- ASSERT_EQUALS(incrementedValue, initialValue + 1);
+ virtual void checkForInterrupt() const {
+ if (_interruptOp) {
+ uasserted(ErrorCodes::Interrupted, "operation was interrupted");
+ }
}
- TEST_F(ReplCoordTest, AwaitReplicationNoReplEnabled) {
- init("");
- OperationContextNoop txn;
- OpTime time(100, 1);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern.wNumNodes = 2;
-
- // Because we didn't set ReplSettings.replSet, it will think we're a standalone so
- // awaitReplication will always work.
- ReplicationCoordinator::StatusAndDuration statusAndDur =
- getReplCoord()->awaitReplication(&txn, time, writeConcern);
- ASSERT_OK(statusAndDur.status);
+ virtual Status checkForInterruptNoAssert() const {
+ if (_interruptOp) {
+ return Status(ErrorCodes::Interrupted, "operation was interrupted");
+ }
+ return Status::OK();
}
- TEST_F(ReplCoordTest, AwaitReplicationMasterSlaveMajorityBaseCase) {
- ReplSettings settings;
- settings.master = true;
- init(settings);
- OperationContextNoop txn;
- OpTime time(100, 1);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern.wNumNodes = 2;
-
-
- writeConcern.wNumNodes = 0;
- writeConcern.wMode = "majority";
- // w:majority always works on master/slave
- ReplicationCoordinator::StatusAndDuration statusAndDur = getReplCoord()->awaitReplication(
- &txn, time, writeConcern);
- ASSERT_OK(statusAndDur.status);
+ /**
+ * Can only be called before any multi-threaded access to this object has begun.
+ */
+ void setInterruptOp(bool interrupt) {
+ _interruptOp = interrupt;
}
- TEST_F(ReplCoordTest, AwaitReplicationReplSetBaseCases) {
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
-
- OperationContextNoop txn;
- OpTime time(100, 1);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern.wNumNodes = 0; // Waiting for 0 nodes always works
- writeConcern.wMode = "";
-
- // Should fail when not primary
- ReplicationCoordinator::StatusAndDuration statusAndDur = getReplCoord()->awaitReplication(
- &txn, time, writeConcern);
- ASSERT_EQUALS(ErrorCodes::NotMaster, statusAndDur.status);
-
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- statusAndDur = getReplCoord()->awaitReplication(&txn, time, writeConcern);
- ASSERT_OK(statusAndDur.status);
- }
+private:
+ unsigned int _opID;
+ bool _interruptOp;
+};
+
+TEST_F(ReplCoordTest, AwaitReplicationInterrupt) {
+ // Tests that a thread blocked in awaitReplication can be killed by a killOp operation
+ OperationContextNoopWithInterrupt txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1")
+ << BSON("_id" << 1 << "host"
+ << "node2") << BSON("_id" << 2 << "host"
+ << "node3"))),
+ HostAndPort("node1"));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+
+ OID client1 = OID::gen();
+ OID client2 = OID::gen();
+ OpTime time1(100, 1);
+ OpTime time2(100, 2);
+
+ HandshakeArgs handshake1;
+ ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
+ HandshakeArgs handshake2;
+ ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wNumNodes = 2;
+
+ unsigned int opID = 100;
+ txn.setOpID(opID);
+
+ // 2 nodes waiting for time2
+ awaiter.setOpTime(time2);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time1));
+
+ txn.setInterruptOp(true);
+ getReplCoord()->interrupt(opID);
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_EQUALS(ErrorCodes::Interrupted, statusAndDur.status);
+ awaiter.reset();
+}
+
+class StepDownTest : public ReplCoordTest {
+protected:
+ OID myRid;
+ OID rid2;
+ OID rid3;
+
+private:
+ virtual void setUp() {
+ ReplCoordTest::setUp();
+ init("mySet/test1:1234,test2:1234,test3:1234");
- TEST_F(ReplCoordTest, AwaitReplicationNumberOfNodesNonBlocking) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2) <<
- BSON("host" << "node4:12345" << "_id" << 3))),
- HostAndPort("node1", 12345));
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234")
+ << BSON("_id" << 1 << "host"
+ << "test2:1234")
+ << BSON("_id" << 2 << "host"
+ << "test3:1234"))),
+ HostAndPort("test1", 1234));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- OID client1 = OID::gen();
- OID client2 = OID::gen();
- OID client3 = OID::gen();
- OpTime time1(100, 1);
- OpTime time2(100, 2);
-
- HandshakeArgs handshake1;
- ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
+ myRid = getReplCoord()->getMyRID();
+ rid2 = OID::gen();
+ rid3 = OID::gen();
HandshakeArgs handshake2;
- ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
+ handshake2.initialize(
+ BSON("handshake" << rid2 << "member" << 1 << "config" << BSON("_id" << 1 << "host"
+ << "test2:1234")));
HandshakeArgs handshake3;
- ASSERT_OK(handshake3.initialize(BSON("handshake" << client3 << "member" << 3)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake3));
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern.wNumNodes = 1;
-
- // 1 node waiting for time 1
- ReplicationCoordinator::StatusAndDuration statusAndDur =
- getReplCoord()->awaitReplication(&txn, time1, writeConcern);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
- getReplCoord()->setMyLastOptime(time1);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
- ASSERT_OK(statusAndDur.status);
-
- // 2 nodes waiting for time1
- writeConcern.wNumNodes = 2;
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time1));
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
- ASSERT_OK(statusAndDur.status);
-
- // 2 nodes waiting for time2
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
- getReplCoord()->setMyLastOptime(time2);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client3, time2));
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
- ASSERT_OK(statusAndDur.status);
-
- // 3 nodes waiting for time2
- writeConcern.wNumNodes = 3;
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time2));
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
- ASSERT_OK(statusAndDur.status);
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationNamedModesNonBlocking) {
+ handshake3.initialize(
+ BSON("handshake" << rid3 << "member" << 2 << "config" << BSON("_id" << 2 << "host"
+ << "test3:1234")));
OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "node0" <<
- "tags" << BSON("dc" << "NA" <<
- "rack" << "rackNA1")) <<
- BSON("_id" << 1 <<
- "host" << "node1" <<
- "tags" << BSON("dc" << "NA" <<
- "rack" << "rackNA2")) <<
- BSON("_id" << 2 <<
- "host" << "node2" <<
- "tags" << BSON("dc" << "NA" <<
- "rack" << "rackNA3")) <<
- BSON("_id" << 3 <<
- "host" << "node3" <<
- "tags" << BSON("dc" << "EU" <<
- "rack" << "rackEU1")) <<
- BSON("_id" << 4 <<
- "host" << "node4" <<
- "tags" << BSON("dc" << "EU" <<
- "rack" << "rackEU2"))) <<
- "settings" << BSON("getLastErrorModes" <<
- BSON("multiDC" << BSON("dc" << 2) <<
- "multiDCAndRack" << BSON("dc" << 2 << "rack" << 3)))),
- HostAndPort("node0"));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- OID clientRID1 = OID::gen();
- OID clientRID2 = OID::gen();
- OID clientRID3 = OID::gen();
- OID clientRID4 = OID::gen();
- OpTime time1(100, 1);
- OpTime time2(100, 2);
-
- HandshakeArgs handshake1;
- ASSERT_OK(handshake1.initialize(BSON("handshake" << clientRID1 << "member" << 1)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
- HandshakeArgs handshake2;
- ASSERT_OK(handshake2.initialize(BSON("handshake" << clientRID2 << "member" << 2)));
ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
- HandshakeArgs handshake3;
- ASSERT_OK(handshake3.initialize(BSON("handshake" << clientRID3 << "member" << 3)));
ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake3));
- HandshakeArgs handshake4;
- ASSERT_OK(handshake4.initialize(BSON("handshake" << clientRID4 << "member" << 4)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake4));
-
- // Test invalid write concern
- WriteConcernOptions invalidWriteConcern;
- invalidWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- invalidWriteConcern.wMode = "fakemode";
-
- ReplicationCoordinator::StatusAndDuration statusAndDur =
- getReplCoord()->awaitReplication(&txn, time1, invalidWriteConcern);
- ASSERT_EQUALS(ErrorCodes::UnknownReplWriteConcern, statusAndDur.status);
-
-
- // Set up valid write concerns for the rest of the test
- WriteConcernOptions majorityWriteConcern;
- majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- majorityWriteConcern.wMode = "majority";
-
- WriteConcernOptions multiDCWriteConcern;
- multiDCWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- multiDCWriteConcern.wMode = "multiDC";
-
- WriteConcernOptions multiRackWriteConcern;
- multiRackWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- multiRackWriteConcern.wMode = "multiDCAndRack";
-
-
- // Nothing satisfied
- getReplCoord()->setMyLastOptime(time1);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiDCWriteConcern);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiRackWriteConcern);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
-
- // Majority satisfied but not either custom mode
- getReplCoord()->setLastOptime_forTest(clientRID1, time1);
- getReplCoord()->setLastOptime_forTest(clientRID2, time1);
-
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
- ASSERT_OK(statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiDCWriteConcern);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiRackWriteConcern);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
-
- // All modes satisfied
- getReplCoord()->setLastOptime_forTest(clientRID3, time1);
-
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
- ASSERT_OK(statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiDCWriteConcern);
- ASSERT_OK(statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time1, multiRackWriteConcern);
- ASSERT_OK(statusAndDur.status);
-
- // multiDC satisfied but not majority or multiRack
- getReplCoord()->setMyLastOptime(time2);
- getReplCoord()->setLastOptime_forTest(clientRID3, time2);
-
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, majorityWriteConcern);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, multiDCWriteConcern);
- ASSERT_OK(statusAndDur.status);
- statusAndDur = getReplCoord()->awaitReplication(&txn, time2, multiRackWriteConcern);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
}
-
- /**
- * Used to wait for replication in a separate thread without blocking execution of the test.
- * To use, set the optime and write concern to be passed to awaitReplication and then call
- * start(), which will spawn a thread that calls awaitReplication. No calls may be made
- * on the ReplicationAwaiter instance between calling start and getResult(). After returning
- * from getResult(), you can call reset() to allow the awaiter to be reused for another
- * awaitReplication call.
- */
- class ReplicationAwaiter {
- public:
-
- ReplicationAwaiter(ReplicationCoordinatorImpl* replCoord, OperationContext* txn) :
- _replCoord(replCoord), _finished(false),
- _result(ReplicationCoordinator::StatusAndDuration(
- Status::OK(), ReplicationCoordinator::Milliseconds(0))) {}
-
- void setOpTime(const OpTime& ot) {
- _optime = ot;
- }
-
- void setWriteConcern(const WriteConcernOptions& wc) {
- _writeConcern = wc;
- }
-
- // may block
- ReplicationCoordinator::StatusAndDuration getResult() {
- _thread->join();
- ASSERT(_finished);
- return _result;
- }
-
- void start(OperationContext* txn) {
- ASSERT(!_finished);
- _thread.reset(new boost::thread(stdx::bind(&ReplicationAwaiter::_awaitReplication,
- this,
- txn)));
- }
-
- void reset() {
- ASSERT(_finished);
- _finished = false;
- _result = ReplicationCoordinator::StatusAndDuration(
- Status::OK(), ReplicationCoordinator::Milliseconds(0));
- }
-
- private:
-
- void _awaitReplication(OperationContext* txn) {
- _result = _replCoord->awaitReplication(txn, _optime, _writeConcern);
- _finished = true;
- }
-
- ReplicationCoordinatorImpl* _replCoord;
- bool _finished;
- OpTime _optime;
- WriteConcernOptions _writeConcern;
- ReplicationCoordinator::StatusAndDuration _result;
- boost::scoped_ptr<boost::thread> _thread;
- };
-
- TEST_F(ReplCoordTest, AwaitReplicationNumberOfNodesBlocking) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
-
- OID client1 = OID::gen();
- OID client2 = OID::gen();
- OpTime time1(100, 1);
- OpTime time2(100, 2);
-
- HandshakeArgs handshake1;
- ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
- HandshakeArgs handshake2;
- ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wNumNodes = 2;
-
- // 2 nodes waiting for time1
- awaiter.setOpTime(time1);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time1));
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_OK(statusAndDur.status);
- awaiter.reset();
-
- // 2 nodes waiting for time2
- awaiter.setOpTime(time2);
- awaiter.start(&txn);
- getReplCoord()->setMyLastOptime(time2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time2));
- statusAndDur = awaiter.getResult();
- ASSERT_OK(statusAndDur.status);
- awaiter.reset();
-
- // 3 nodes waiting for time2
- writeConcern.wNumNodes = 3;
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time2));
- statusAndDur = awaiter.getResult();
- ASSERT_OK(statusAndDur.status);
- awaiter.reset();
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationTimeout) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
-
- OID client = OID::gen();
- OpTime time1(100, 1);
- OpTime time2(100, 2);
-
- HandshakeArgs handshake;
- ASSERT_OK(handshake.initialize(BSON("handshake" << client << "member" << 1)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = 50;
- writeConcern.wNumNodes = 2;
-
- // 2 nodes waiting for time2
- awaiter.setOpTime(time2);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
- getReplCoord()->setMyLastOptime(time2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client, time1));
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, statusAndDur.status);
- awaiter.reset();
+};
+
+TEST_F(StepDownTest, StepDownNotPrimary) {
+ OperationContextReplMock txn;
+ OpTime optime1(100, 1);
+ // All nodes are caught up
+ getReplCoord()->setMyLastOptime(optime1);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime1));
+
+ Status status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(0));
+ ASSERT_EQUALS(ErrorCodes::NotMaster, status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+}
+
+TEST_F(StepDownTest, StepDownTimeoutAcquiringGlobalLock) {
+ OperationContextReplMock txn;
+ OpTime optime1(100, 1);
+ // All nodes are caught up
+ getReplCoord()->setMyLastOptime(optime1);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime1));
+
+ simulateSuccessfulElection();
+
+ // Make sure stepDown cannot grab the global shared lock
+ Lock::GlobalWrite lk(txn.lockState());
+
+ Status status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000));
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+}
+
+TEST_F(StepDownTest, StepDownNoWaiting) {
+ OperationContextReplMock txn;
+ OpTime optime1(100, 1);
+ // All nodes are caught up
+ getReplCoord()->setMyLastOptime(optime1);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime1));
+
+ simulateSuccessfulElection();
+
+ enterNetwork();
+ getNet()->runUntil(getNet()->now() + 2000);
+ ASSERT(getNet()->hasReadyRequests());
+ NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
+ ReplicationExecutor::RemoteCommandRequest request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgs hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(hbArgs.getSetName());
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setVersion(hbArgs.getConfigVersion());
+ hbResp.setOpTime(optime1);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj);
+ getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(respObj.obj()));
}
-
- TEST_F(ReplCoordTest, AwaitReplicationShutdown) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
-
- OID client1 = OID::gen();
- OID client2 = OID::gen();
- OpTime time1(100, 1);
- OpTime time2(100, 2);
-
- HandshakeArgs handshake1;
- ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
- HandshakeArgs handshake2;
- ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wNumNodes = 2;
-
- // 2 nodes waiting for time2
- awaiter.setOpTime(time2);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time1));
- shutdown();
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, statusAndDur.status);
- awaiter.reset();
+ while (getNet()->hasReadyRequests()) {
+ getNet()->blackHole(getNet()->getNextReadyRequest());
}
+ getNet()->runReadyNetworkOperations();
+ exitNetwork();
+
+
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+ ASSERT_OK(getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000)));
+ enterNetwork(); // So we can safely inspect the topology coordinator
+ ASSERT_EQUALS(Date_t(getNet()->now().millis + 1000), getTopoCoord().getStepDownTime());
+ ASSERT_TRUE(getTopoCoord().getMemberState().secondary());
+ exitNetwork();
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+}
+
+TEST_F(ReplCoordTest, StepDownAndBackUpSingleNode) {
+ init("mySet");
+
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234"))),
+ HostAndPort("test1", 1234));
+ OperationContextReplMock txn;
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+ ASSERT_OK(getReplCoord()->stepDown(&txn, true, Milliseconds(0), Milliseconds(1000)));
+ getNet()->enterNetwork(); // Must do this before inspecting the topocoord
+ Date_t stepdownUntil = Date_t(getNet()->now().millis + 1000);
+ ASSERT_EQUALS(stepdownUntil, getTopoCoord().getStepDownTime());
+ ASSERT_TRUE(getTopoCoord().getMemberState().secondary());
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+
+ // Now run time forward and make sure that the node becomes primary again when the stepdown
+ // period ends.
+ getNet()->runUntil(stepdownUntil);
+ ASSERT_EQUALS(stepdownUntil, getNet()->now());
+ ASSERT_TRUE(getTopoCoord().getMemberState().primary());
+ getNet()->exitNetwork();
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+}
- TEST_F(ReplCoordTest, AwaitReplicationStepDown) {
- // Test that a thread blocked in awaitReplication will be woken up and return NotMaster
- // if the node steps down while it is waiting.
- OperationContextReplMock txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
-
- OID client1 = OID::gen();
- OID client2 = OID::gen();
- OpTime time1(100, 1);
- OpTime time2(100, 2);
-
- HandshakeArgs handshake1;
- ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
- HandshakeArgs handshake2;
- ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wNumNodes = 2;
-
- // 2 nodes waiting for time2
- awaiter.setOpTime(time2);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time1));
- getReplCoord()->stepDown(&txn, true, Milliseconds(0), Milliseconds(1000));
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_EQUALS(ErrorCodes::NotMaster, statusAndDur.status);
- awaiter.reset();
+/**
+ * Used to run wait for stepDown() to finish in a separate thread without blocking execution of
+ * the test. To use, set the values of "force", "waitTime", and "stepDownTime", which will be
+ * used as the arguments passed to stepDown, and then call
+ * start(), which will spawn a thread that calls stepDown. No calls may be made
+ * on the StepDownRunner instance between calling start and getResult(). After returning
+ * from getResult(), you can call reset() to allow the StepDownRunner to be reused for another
+ * stepDown call.
+ */
+class StepDownRunner {
+public:
+ StepDownRunner(ReplicationCoordinatorImpl* replCoord)
+ : _replCoord(replCoord),
+ _finished(false),
+ _result(Status::OK()),
+ _force(false),
+ _waitTime(0),
+ _stepDownTime(0) {}
+
+ // may block
+ Status getResult() {
+ _thread->join();
+ ASSERT(_finished);
+ return _result;
}
- class OperationContextNoopWithInterrupt : public OperationContextReplMock {
- public:
-
- OperationContextNoopWithInterrupt() : _opID(0), _interruptOp(false) {}
-
- virtual unsigned int getOpID() const {
- return _opID;
- }
-
- /**
- * Can only be called before any multi-threaded access to this object has begun.
- */
- void setOpID(unsigned int opID) {
- _opID = opID;
- }
-
- virtual void checkForInterrupt() const {
- if (_interruptOp) {
- uasserted(ErrorCodes::Interrupted, "operation was interrupted");
- }
- }
-
- virtual Status checkForInterruptNoAssert() const {
- if (_interruptOp) {
- return Status(ErrorCodes::Interrupted, "operation was interrupted");
- }
- return Status::OK();
- }
-
- /**
- * Can only be called before any multi-threaded access to this object has begun.
- */
- void setInterruptOp(bool interrupt) {
- _interruptOp = interrupt;
- }
-
- private:
- unsigned int _opID;
- bool _interruptOp;
- };
-
- TEST_F(ReplCoordTest, AwaitReplicationInterrupt) {
- // Tests that a thread blocked in awaitReplication can be killed by a killOp operation
- OperationContextNoopWithInterrupt txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "node1") <<
- BSON("_id" << 1 << "host" << "node2") <<
- BSON("_id" << 2 << "host" << "node3"))),
- HostAndPort("node1"));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
-
- OID client1 = OID::gen();
- OID client2 = OID::gen();
- OpTime time1(100, 1);
- OpTime time2(100, 2);
-
- HandshakeArgs handshake1;
- ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
- HandshakeArgs handshake2;
- ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wNumNodes = 2;
-
- unsigned int opID = 100;
- txn.setOpID(opID);
-
- // 2 nodes waiting for time2
- awaiter.setOpTime(time2);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time1));
-
- txn.setInterruptOp(true);
- getReplCoord()->interrupt(opID);
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_EQUALS(ErrorCodes::Interrupted, statusAndDur.status);
- awaiter.reset();
+ void start(OperationContext* txn) {
+ ASSERT(!_finished);
+ _thread.reset(new boost::thread(stdx::bind(&StepDownRunner::_stepDown, this, txn)));
}
- class StepDownTest : public ReplCoordTest {
- protected:
- OID myRid;
- OID rid2;
- OID rid3;
-
- private:
- virtual void setUp() {
- ReplCoordTest::setUp();
- init("mySet/test1:1234,test2:1234,test3:1234");
-
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test1:1234") <<
- BSON("_id" << 1 << "host" << "test2:1234") <<
- BSON("_id" << 2 << "host" << "test3:1234"))),
- HostAndPort("test1", 1234));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- myRid = getReplCoord()->getMyRID();
- rid2 = OID::gen();
- rid3 = OID::gen();
- HandshakeArgs handshake2;
- handshake2.initialize(BSON("handshake" << rid2 <<
- "member" << 1 <<
- "config" << BSON("_id" << 1 << "host" << "test2:1234")));
- HandshakeArgs handshake3;
- handshake3.initialize(BSON("handshake" << rid3 <<
- "member" << 2 <<
- "config" << BSON("_id" << 2 << "host" << "test3:1234")));
- OperationContextNoop txn;
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake3));
- }
- };
-
- TEST_F(StepDownTest, StepDownNotPrimary) {
- OperationContextReplMock txn;
- OpTime optime1(100, 1);
- // All nodes are caught up
- getReplCoord()->setMyLastOptime(optime1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime1));
-
- Status status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(0));
- ASSERT_EQUALS(ErrorCodes::NotMaster, status);
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+ void reset() {
+ ASSERT(_finished);
+ _finished = false;
+ _result = Status(ErrorCodes::InternalError, "Result Status never set");
}
- TEST_F(StepDownTest, StepDownTimeoutAcquiringGlobalLock) {
- OperationContextReplMock txn;
- OpTime optime1(100, 1);
- // All nodes are caught up
- getReplCoord()->setMyLastOptime(optime1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime1));
-
- simulateSuccessfulElection();
-
- // Make sure stepDown cannot grab the global shared lock
- Lock::GlobalWrite lk(txn.lockState());
-
- Status status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000));
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+ void setForce(bool force) {
+ _force = force;
}
- TEST_F(StepDownTest, StepDownNoWaiting) {
- OperationContextReplMock txn;
- OpTime optime1(100, 1);
- // All nodes are caught up
- getReplCoord()->setMyLastOptime(optime1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime1));
-
- simulateSuccessfulElection();
-
- enterNetwork();
- getNet()->runUntil(getNet()->now() + 2000);
- ASSERT(getNet()->hasReadyRequests());
- NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- ReplicationExecutor::RemoteCommandRequest request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgs hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(hbArgs.getSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setVersion(hbArgs.getConfigVersion());
- hbResp.setOpTime(optime1);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj);
- getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(respObj.obj()));
- }
- while (getNet()->hasReadyRequests()) {
- getNet()->blackHole(getNet()->getNextReadyRequest());
- }
- getNet()->runReadyNetworkOperations();
- exitNetwork();
-
-
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
- ASSERT_OK(getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(1000)));
- enterNetwork(); // So we can safely inspect the topology coordinator
- ASSERT_EQUALS(Date_t(getNet()->now().millis + 1000), getTopoCoord().getStepDownTime());
- ASSERT_TRUE(getTopoCoord().getMemberState().secondary());
- exitNetwork();
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+ void setWaitTime(const Milliseconds& waitTime) {
+ _waitTime = waitTime;
}
- TEST_F(ReplCoordTest, StepDownAndBackUpSingleNode) {
- init("mySet");
-
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test1:1234"))),
- HostAndPort("test1", 1234));
- OperationContextReplMock txn;
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
-
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
- ASSERT_OK(getReplCoord()->stepDown(&txn, true, Milliseconds(0), Milliseconds(1000)));
- getNet()->enterNetwork(); // Must do this before inspecting the topocoord
- Date_t stepdownUntil = Date_t(getNet()->now().millis + 1000);
- ASSERT_EQUALS(stepdownUntil, getTopoCoord().getStepDownTime());
- ASSERT_TRUE(getTopoCoord().getMemberState().secondary());
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
- // Now run time forward and make sure that the node becomes primary again when the stepdown
- // period ends.
- getNet()->runUntil(stepdownUntil);
- ASSERT_EQUALS(stepdownUntil, getNet()->now());
- ASSERT_TRUE(getTopoCoord().getMemberState().primary());
- getNet()->exitNetwork();
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+ void setStepDownTime(const Milliseconds& stepDownTime) {
+ _stepDownTime = stepDownTime;
}
- /**
- * Used to run wait for stepDown() to finish in a separate thread without blocking execution of
- * the test. To use, set the values of "force", "waitTime", and "stepDownTime", which will be
- * used as the arguments passed to stepDown, and then call
- * start(), which will spawn a thread that calls stepDown. No calls may be made
- * on the StepDownRunner instance between calling start and getResult(). After returning
- * from getResult(), you can call reset() to allow the StepDownRunner to be reused for another
- * stepDown call.
- */
- class StepDownRunner {
- public:
-
- StepDownRunner(ReplicationCoordinatorImpl* replCoord) :
- _replCoord(replCoord), _finished(false), _result(Status::OK()), _force(false),
- _waitTime(0), _stepDownTime(0) {}
-
- // may block
- Status getResult() {
- _thread->join();
- ASSERT(_finished);
- return _result;
- }
-
- void start(OperationContext* txn) {
- ASSERT(!_finished);
- _thread.reset(new boost::thread(stdx::bind(&StepDownRunner::_stepDown,
- this,
- txn)));
- }
-
- void reset() {
- ASSERT(_finished);
- _finished = false;
- _result = Status(ErrorCodes::InternalError, "Result Status never set");
- }
-
- void setForce(bool force) {
- _force = force;
- }
-
- void setWaitTime(const Milliseconds& waitTime) {
- _waitTime = waitTime;
- }
-
- void setStepDownTime(const Milliseconds& stepDownTime) {
- _stepDownTime = stepDownTime;
- }
-
- private:
-
- void _stepDown(OperationContext* txn) {
- _result = _replCoord->stepDown(txn, _force, _waitTime, _stepDownTime);
- _finished = true;
- }
-
- ReplicationCoordinatorImpl* _replCoord;
- bool _finished;
- Status _result;
- boost::scoped_ptr<boost::thread> _thread;
- bool _force;
- Milliseconds _waitTime;
- Milliseconds _stepDownTime;
- };
-
- TEST_F(StepDownTest, StepDownNotCaughtUp) {
- OperationContextReplMock txn;
- OpTime optime1(100, 1);
- OpTime optime2(100, 2);
- // No secondary is caught up
- getReplCoord()->setMyLastOptime(optime2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime1));
-
- // Try to stepDown but time out because no secondaries are caught up
- StepDownRunner runner(getReplCoord());
- runner.setForce(false);
- runner.setWaitTime(Milliseconds(0));
- runner.setStepDownTime(Milliseconds(1000));
-
- simulateSuccessfulElection();
-
- runner.start(&txn);
- Status status = runner.getResult();
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
-
- // Now use "force" to force it to step down even though no one is caught up
- runner.reset();
- getNet()->enterNetwork();
- const Date_t startDate = getNet()->now();
- while (startDate + 1000 < getNet()->now()) {
- while (getNet()->hasReadyRequests()) {
- getNet()->blackHole(getNet()->getNextReadyRequest());
- }
- getNet()->runUntil(startDate + 1000);
- }
- getNet()->exitNetwork();
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
- runner.setForce(true);
- runner.start(&txn);
- status = runner.getResult();
- ASSERT_OK(status);
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
+private:
+ void _stepDown(OperationContext* txn) {
+ _result = _replCoord->stepDown(txn, _force, _waitTime, _stepDownTime);
+ _finished = true;
}
- TEST_F(StepDownTest, StepDownCatchUp) {
- OperationContextReplMock txn;
- OpTime optime1(100, 1);
- OpTime optime2(100, 2);
- // No secondary is caught up
- getReplCoord()->setMyLastOptime(optime2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime1));
-
- // stepDown where the secondary actually has to catch up before the stepDown can succeed
- StepDownRunner runner(getReplCoord());
- runner.setForce(false);
- runner.setWaitTime(Milliseconds(10000));
- runner.setStepDownTime(Milliseconds(60000));
-
- simulateSuccessfulElection();
-
- runner.start(&txn);
-
- // Make a secondary actually catch up
- enterNetwork();
- getNet()->runUntil(getNet()->now() + 2000);
- ASSERT(getNet()->hasReadyRequests());
- NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
- ReplicationExecutor::RemoteCommandRequest request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgs hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(hbArgs.getSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setVersion(hbArgs.getConfigVersion());
- hbResp.setOpTime(optime2);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj);
- getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(respObj.obj()));
- }
+ ReplicationCoordinatorImpl* _replCoord;
+ bool _finished;
+ Status _result;
+ boost::scoped_ptr<boost::thread> _thread;
+ bool _force;
+ Milliseconds _waitTime;
+ Milliseconds _stepDownTime;
+};
+
+TEST_F(StepDownTest, StepDownNotCaughtUp) {
+ OperationContextReplMock txn;
+ OpTime optime1(100, 1);
+ OpTime optime2(100, 2);
+ // No secondary is caught up
+ getReplCoord()->setMyLastOptime(optime2);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime1));
+
+ // Try to stepDown but time out because no secondaries are caught up
+ StepDownRunner runner(getReplCoord());
+ runner.setForce(false);
+ runner.setWaitTime(Milliseconds(0));
+ runner.setStepDownTime(Milliseconds(1000));
+
+ simulateSuccessfulElection();
+
+ runner.start(&txn);
+ Status status = runner.getResult();
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
+ // Now use "force" to force it to step down even though no one is caught up
+ runner.reset();
+ getNet()->enterNetwork();
+ const Date_t startDate = getNet()->now();
+ while (startDate + 1000 < getNet()->now()) {
while (getNet()->hasReadyRequests()) {
getNet()->blackHole(getNet()->getNextReadyRequest());
}
- getNet()->runReadyNetworkOperations();
- exitNetwork();
-
- ASSERT_OK(runner.getResult());
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+ getNet()->runUntil(startDate + 1000);
}
-
- TEST_F(StepDownTest, InterruptStepDown) {
- OperationContextNoopWithInterrupt txn;
- OpTime optime1(100, 1);
- OpTime optime2(100, 2);
- // No secondary is caught up
- getReplCoord()->setMyLastOptime(optime2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime1));
-
- // stepDown where the secondary actually has to catch up before the stepDown can succeed
- StepDownRunner runner(getReplCoord());
- runner.setForce(false);
- runner.setWaitTime(Milliseconds(10000));
- runner.setStepDownTime(Milliseconds(60000));
-
- simulateSuccessfulElection();
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
-
- runner.start(&txn);
-
- unsigned int opID = 100;
- txn.setOpID(opID);
- txn.setInterruptOp(true);
- getReplCoord()->interrupt(opID);
-
- ASSERT_EQUALS(ErrorCodes::Interrupted, runner.getResult());
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
- }
-
- TEST_F(ReplCoordTest, GetReplicationModeNone) {
- init();
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- }
-
- TEST_F(ReplCoordTest, GetReplicationModeMaster) {
- // modeMasterSlave if master set
- ReplSettings settings;
- settings.master = true;
- init(settings);
- ASSERT_EQUALS(ReplicationCoordinator::modeMasterSlave,
- getReplCoord()->getReplicationMode());
- }
-
- TEST_F(ReplCoordTest, GetReplicationModeSlave) {
- // modeMasterSlave if the slave flag was set
- ReplSettings settings;
- settings.slave = SimpleSlave;
- init(settings);
- ASSERT_EQUALS(ReplicationCoordinator::modeMasterSlave,
- getReplCoord()->getReplicationMode());
+ getNet()->exitNetwork();
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+ runner.setForce(true);
+ runner.start(&txn);
+ status = runner.getResult();
+ ASSERT_OK(status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+}
+
+TEST_F(StepDownTest, StepDownCatchUp) {
+ OperationContextReplMock txn;
+ OpTime optime1(100, 1);
+ OpTime optime2(100, 2);
+ // No secondary is caught up
+ getReplCoord()->setMyLastOptime(optime2);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime1));
+
+ // stepDown where the secondary actually has to catch up before the stepDown can succeed
+ StepDownRunner runner(getReplCoord());
+ runner.setForce(false);
+ runner.setWaitTime(Milliseconds(10000));
+ runner.setStepDownTime(Milliseconds(60000));
+
+ simulateSuccessfulElection();
+
+ runner.start(&txn);
+
+ // Make a secondary actually catch up
+ enterNetwork();
+ getNet()->runUntil(getNet()->now() + 2000);
+ ASSERT(getNet()->hasReadyRequests());
+ NetworkInterfaceMock::NetworkOperationIterator noi = getNet()->getNextReadyRequest();
+ ReplicationExecutor::RemoteCommandRequest request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgs hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(hbArgs.getSetName());
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setVersion(hbArgs.getConfigVersion());
+ hbResp.setOpTime(optime2);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj);
+ getNet()->scheduleResponse(noi, getNet()->now(), makeResponseStatus(respObj.obj()));
}
-
- TEST_F(ReplCoordTest, GetReplicationModeRepl) {
- // modeReplSet if the set name was supplied.
- ReplSettings settings;
- settings.replSet = "mySet/node1:12345";
- init(settings);
- ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0 ))),
- HostAndPort("node1", 12345));
+ while (getNet()->hasReadyRequests()) {
+ getNet()->blackHole(getNet()->getNextReadyRequest());
}
-
- TEST_F(ReplCoordTest, TestPrepareReplSetUpdatePositionCommand) {
- OperationContextNoop txn;
- init("mySet/test1:1234,test2:1234,test3:1234");
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test1:1234") <<
- BSON("_id" << 1 << "host" << "test2:1234") <<
- BSON("_id" << 2 << "host" << "test3:1234"))),
- HostAndPort("test1", 1234));
- OID myRid = getReplCoord()->getMyRID();
- OID rid2 = OID::gen();
- OID rid3 = OID::gen();
- HandshakeArgs handshake2;
- handshake2.initialize(BSON("handshake" << rid2 <<
- "member" << 1 <<
- "config" << BSON("_id" << 1 << "host" << "test2:1234")));
- HandshakeArgs handshake3;
- handshake3.initialize(BSON("handshake" << rid3 <<
- "member" << 2 <<
- "config" << BSON("_id" << 2 << "host" << "test3:1234")));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake3));
- OpTime optime1(100, 1);
- OpTime optime2(100, 2);
- OpTime optime3(2, 1);
- getReplCoord()->setMyLastOptime(optime1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime2));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime3));
-
- // Check that the proper BSON is generated for the replSetUpdatePositionCommand
- BSONObjBuilder cmdBuilder;
- getReplCoord()->prepareReplSetUpdatePositionCommand(&cmdBuilder);
- BSONObj cmd = cmdBuilder.done();
-
- ASSERT_EQUALS(2, cmd.nFields());
- ASSERT_EQUALS("replSetUpdatePosition", cmd.firstElement().fieldNameStringData());
-
- std::set<OID> rids;
- BSONForEach(entryElement, cmd["optimes"].Obj()) {
- BSONObj entry = entryElement.Obj();
- OID rid = entry["_id"].OID();
- rids.insert(rid);
- if (rid == myRid) {
- ASSERT_EQUALS(optime1, entry["optime"]._opTime());
- } else if (rid == rid2) {
- ASSERT_EQUALS(optime2, entry["optime"]._opTime());
- } else {
- ASSERT_EQUALS(rid3, rid);
- ASSERT_EQUALS(optime3, entry["optime"]._opTime());
- }
+ getNet()->runReadyNetworkOperations();
+ exitNetwork();
+
+ ASSERT_OK(runner.getResult());
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+}
+
+TEST_F(StepDownTest, InterruptStepDown) {
+ OperationContextNoopWithInterrupt txn;
+ OpTime optime1(100, 1);
+ OpTime optime2(100, 2);
+ // No secondary is caught up
+ getReplCoord()->setMyLastOptime(optime2);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime1));
+
+ // stepDown where the secondary actually has to catch up before the stepDown can succeed
+ StepDownRunner runner(getReplCoord());
+ runner.setForce(false);
+ runner.setWaitTime(Milliseconds(10000));
+ runner.setStepDownTime(Milliseconds(60000));
+
+ simulateSuccessfulElection();
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
+ runner.start(&txn);
+
+ unsigned int opID = 100;
+ txn.setOpID(opID);
+ txn.setInterruptOp(true);
+ getReplCoord()->interrupt(opID);
+
+ ASSERT_EQUALS(ErrorCodes::Interrupted, runner.getResult());
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+}
+
+TEST_F(ReplCoordTest, GetReplicationModeNone) {
+ init();
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+TEST_F(ReplCoordTest, GetReplicationModeMaster) {
+ // modeMasterSlave if master set
+ ReplSettings settings;
+ settings.master = true;
+ init(settings);
+ ASSERT_EQUALS(ReplicationCoordinator::modeMasterSlave, getReplCoord()->getReplicationMode());
+}
+
+TEST_F(ReplCoordTest, GetReplicationModeSlave) {
+ // modeMasterSlave if the slave flag was set
+ ReplSettings settings;
+ settings.slave = SimpleSlave;
+ init(settings);
+ ASSERT_EQUALS(ReplicationCoordinator::modeMasterSlave, getReplCoord()->getReplicationMode());
+}
+
+TEST_F(ReplCoordTest, GetReplicationModeRepl) {
+ // modeReplSet if the set name was supplied.
+ ReplSettings settings;
+ settings.replSet = "mySet/node1:12345";
+ init(settings);
+ ASSERT_EQUALS(ReplicationCoordinator::modeReplSet, getReplCoord()->getReplicationMode());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members" << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0))),
+ HostAndPort("node1", 12345));
+}
+
+TEST_F(ReplCoordTest, TestPrepareReplSetUpdatePositionCommand) {
+ OperationContextNoop txn;
+ init("mySet/test1:1234,test2:1234,test3:1234");
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234")
+ << BSON("_id" << 1 << "host"
+ << "test2:1234") << BSON("_id" << 2 << "host"
+ << "test3:1234"))),
+ HostAndPort("test1", 1234));
+ OID myRid = getReplCoord()->getMyRID();
+ OID rid2 = OID::gen();
+ OID rid3 = OID::gen();
+ HandshakeArgs handshake2;
+ handshake2.initialize(
+ BSON("handshake" << rid2 << "member" << 1 << "config" << BSON("_id" << 1 << "host"
+ << "test2:1234")));
+ HandshakeArgs handshake3;
+ handshake3.initialize(
+ BSON("handshake" << rid3 << "member" << 2 << "config" << BSON("_id" << 2 << "host"
+ << "test3:1234")));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake3));
+ OpTime optime1(100, 1);
+ OpTime optime2(100, 2);
+ OpTime optime3(2, 1);
+ getReplCoord()->setMyLastOptime(optime1);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid2, optime2));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(rid3, optime3));
+
+ // Check that the proper BSON is generated for the replSetUpdatePositionCommand
+ BSONObjBuilder cmdBuilder;
+ getReplCoord()->prepareReplSetUpdatePositionCommand(&cmdBuilder);
+ BSONObj cmd = cmdBuilder.done();
+
+ ASSERT_EQUALS(2, cmd.nFields());
+ ASSERT_EQUALS("replSetUpdatePosition", cmd.firstElement().fieldNameStringData());
+
+ std::set<OID> rids;
+ BSONForEach(entryElement, cmd["optimes"].Obj()) {
+ BSONObj entry = entryElement.Obj();
+ OID rid = entry["_id"].OID();
+ rids.insert(rid);
+ if (rid == myRid) {
+ ASSERT_EQUALS(optime1, entry["optime"]._opTime());
+ } else if (rid == rid2) {
+ ASSERT_EQUALS(optime2, entry["optime"]._opTime());
+ } else {
+ ASSERT_EQUALS(rid3, rid);
+ ASSERT_EQUALS(optime3, entry["optime"]._opTime());
}
- ASSERT_EQUALS(3U, rids.size()); // Make sure we saw all 3 nodes
}
-
- TEST_F(ReplCoordTest, TestHandshakes) {
- init("mySet/test1:1234,test2:1234,test3:1234");
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test1:1234") <<
- BSON("_id" << 1 << "host" << "test2:1234") <<
- BSON("_id" << 2 << "host" << "test3:1234"))),
- HostAndPort("test2", 1234));
- // Test generating basic handshake with no chaining
- std::vector<BSONObj> handshakes;
- OperationContextNoop txn;
- getReplCoord()->prepareReplSetUpdatePositionCommandHandshakes(&handshakes);
- ASSERT_EQUALS(1U, handshakes.size());
- BSONObj handshakeCmd = handshakes[0];
+ ASSERT_EQUALS(3U, rids.size()); // Make sure we saw all 3 nodes
+}
+
+TEST_F(ReplCoordTest, TestHandshakes) {
+ init("mySet/test1:1234,test2:1234,test3:1234");
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234")
+ << BSON("_id" << 1 << "host"
+ << "test2:1234") << BSON("_id" << 2 << "host"
+ << "test3:1234"))),
+ HostAndPort("test2", 1234));
+ // Test generating basic handshake with no chaining
+ std::vector<BSONObj> handshakes;
+ OperationContextNoop txn;
+ getReplCoord()->prepareReplSetUpdatePositionCommandHandshakes(&handshakes);
+ ASSERT_EQUALS(1U, handshakes.size());
+ BSONObj handshakeCmd = handshakes[0];
+ ASSERT_EQUALS(2, handshakeCmd.nFields());
+ ASSERT_EQUALS("replSetUpdatePosition", handshakeCmd.firstElement().fieldNameStringData());
+ BSONObj handshake = handshakeCmd["handshake"].Obj();
+ ASSERT_EQUALS(getReplCoord()->getMyRID(), handshake["handshake"].OID());
+ ASSERT_EQUALS(1, handshake["member"].Int());
+ handshakes.clear();
+
+ // Have other nodes handshake us and make sure we process it right.
+ OID slave1RID = OID::gen();
+ OID slave2RID = OID::gen();
+ HandshakeArgs slave1Handshake;
+ slave1Handshake.initialize(
+ BSON("handshake" << slave1RID << "member" << 0 << "config" << BSON("_id" << 0 << "host"
+ << "test1:1234")));
+ HandshakeArgs slave2Handshake;
+ slave2Handshake.initialize(
+ BSON("handshake" << slave2RID << "member" << 2 << "config" << BSON("_id" << 2 << "host"
+ << "test2:1234")));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, slave1Handshake));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, slave2Handshake));
+
+ getReplCoord()->prepareReplSetUpdatePositionCommandHandshakes(&handshakes);
+ ASSERT_EQUALS(3U, handshakes.size());
+ std::set<OID> rids;
+ for (std::vector<BSONObj>::iterator it = handshakes.begin(); it != handshakes.end(); ++it) {
+ BSONObj handshakeCmd = *it;
ASSERT_EQUALS(2, handshakeCmd.nFields());
ASSERT_EQUALS("replSetUpdatePosition", handshakeCmd.firstElement().fieldNameStringData());
- BSONObj handshake = handshakeCmd["handshake"].Obj();
- ASSERT_EQUALS(getReplCoord()->getMyRID(), handshake["handshake"].OID());
- ASSERT_EQUALS(1, handshake["member"].Int());
- handshakes.clear();
-
- // Have other nodes handshake us and make sure we process it right.
- OID slave1RID = OID::gen();
- OID slave2RID = OID::gen();
- HandshakeArgs slave1Handshake;
- slave1Handshake.initialize(BSON("handshake" << slave1RID <<
- "member" << 0 <<
- "config" << BSON("_id" << 0 << "host" << "test1:1234")));
- HandshakeArgs slave2Handshake;
- slave2Handshake.initialize(BSON("handshake" << slave2RID <<
- "member" << 2 <<
- "config" << BSON("_id" << 2 << "host" << "test2:1234")));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, slave1Handshake));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, slave2Handshake));
-
- getReplCoord()->prepareReplSetUpdatePositionCommandHandshakes(&handshakes);
- ASSERT_EQUALS(3U, handshakes.size());
- std::set<OID> rids;
- for (std::vector<BSONObj>::iterator it = handshakes.begin(); it != handshakes.end(); ++it) {
- BSONObj handshakeCmd = *it;
- ASSERT_EQUALS(2, handshakeCmd.nFields());
- ASSERT_EQUALS("replSetUpdatePosition",
- handshakeCmd.firstElement().fieldNameStringData());
-
- BSONObj handshake = handshakeCmd["handshake"].Obj();
- OID rid = handshake["handshake"].OID();
- rids.insert(rid);
- if (rid == getReplCoord()->getMyRID()) {
- ASSERT_EQUALS(1, handshake["member"].Int());
- } else if (rid == slave1RID) {
- ASSERT_EQUALS(0, handshake["member"].Int());
- } else {
- ASSERT_EQUALS(slave2RID, rid);
- ASSERT_EQUALS(2, handshake["member"].Int());
- }
- }
- ASSERT_EQUALS(3U, rids.size()); // Make sure we saw all 3 nodes
- }
-
- TEST_F(ReplCoordTest, SetMaintenanceMode) {
- init("mySet/test1:1234,test2:1234,test3:1234");
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test1:1234") <<
- BSON("_id" << 1 << "host" << "test2:1234") <<
- BSON("_id" << 2 << "host" << "test3:1234"))),
- HostAndPort("test2", 1234));
- OperationContextNoop txn;
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
-
- // Can't unset maintenance mode if it was never set to begin with.
- Status status = getReplCoord()->setMaintenanceMode(false);
- ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
- // valid set
- ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
- ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
-
- // If we go into rollback while in maintenance mode, our state changes to RS_ROLLBACK.
- getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK);
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
-
- // When we go back to SECONDARY, we still observe RECOVERING because of maintenance mode.
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
-
- // Can set multiple times
- ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
- ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
-
- // Need to unset the number of times you set
- ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
- ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
- ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
- status = getReplCoord()->setMaintenanceMode(false);
- // fourth one fails b/c we only set three times
- ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
- // Unsetting maintenance mode changes our state to secondary if maintenance mode was
- // the only thinking keeping us out of it.
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
- // From rollback, entering and exiting maintenance mode doesn't change perceived
- // state.
- getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK);
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
- ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
- ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
-
- // Rollback is sticky even if entered while in maintenance mode.
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
- ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
- ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
- getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK);
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
- ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
- ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
- // Can't modify maintenance mode when PRIMARY
- simulateSuccessfulElection();
-
- status = getReplCoord()->setMaintenanceMode(true);
- ASSERT_EQUALS(ErrorCodes::NotSecondary, status);
- ASSERT_TRUE(getReplCoord()->getMemberState().primary());
-
- simulateStepDownOnIsolation();
-
- status = getReplCoord()->setMaintenanceMode(false);
- ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
- ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
- ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
- }
-
- TEST_F(ReplCoordTest, GetHostsWrittenToReplSet) {
- HostAndPort myHost("node1:12345");
- HostAndPort client1Host("node2:12345");
- HostAndPort client2Host("node3:12345") ;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << myHost.toString()) <<
- BSON("_id" << 1 << "host" << client1Host.toString()) <<
- BSON("_id" << 2 << "host" << client2Host.toString()))),
- HostAndPort("node1", 12345));
- OperationContextNoop txn;
-
- OID client1 = OID::gen();
- OID client2 = OID::gen();
- OpTime time1(100, 1);
- OpTime time2(100, 2);
-
- HandshakeArgs handshake1;
- ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
- HandshakeArgs handshake2;
- ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
-
- getReplCoord()->setMyLastOptime(time2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time1));
-
- std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
- ASSERT_EQUALS(1U, caughtUpHosts.size());
- ASSERT_EQUALS(myHost, caughtUpHosts[0]);
-
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time2));
- caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
- ASSERT_EQUALS(2U, caughtUpHosts.size());
- if (myHost == caughtUpHosts[0]) {
- ASSERT_EQUALS(client2Host, caughtUpHosts[1]);
- }
- else {
- ASSERT_EQUALS(client2Host, caughtUpHosts[0]);
- ASSERT_EQUALS(myHost, caughtUpHosts[1]);
- }
- }
- TEST_F(ReplCoordTest, GetHostsWrittenToMasterSlave) {
- ReplSettings settings;
- settings.master = true;
- init(settings);
- HostAndPort clientHost("node2:12345");
- OperationContextNoop txn;
-
- OID client = OID::gen();
- OpTime time1(100, 1);
- OpTime time2(100, 2);
-
- getExternalState()->setClientHostAndPort(clientHost);
- HandshakeArgs handshake;
- ASSERT_OK(handshake.initialize(BSON("handshake" << client)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
-
- getReplCoord()->setMyLastOptime(time2);
- ASSERT_OK(getReplCoord()->setLastOptimeForSlave(client, time1));
-
- std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
- ASSERT_EQUALS(0U, caughtUpHosts.size()); // self doesn't get included in master-slave
-
- ASSERT_OK(getReplCoord()->setLastOptimeForSlave(client, time2));
- caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
- ASSERT_EQUALS(1U, caughtUpHosts.size());
- ASSERT_EQUALS(clientHost, caughtUpHosts[0]);
- }
-
- TEST_F(ReplCoordTest, GetOtherNodesInReplSetNoConfig) {
- start();
- ASSERT_EQUALS(0U, getReplCoord()->getOtherNodesInReplSet().size());
- }
-
- TEST_F(ReplCoordTest, GetOtherNodesInReplSet) {
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "h1") <<
- BSON("_id" << 1 << "host" << "h2") <<
- BSON("_id" << 2 <<
- "host" << "h3" <<
- "priority" << 0 <<
- "hidden" << true))),
- HostAndPort("h1"));
-
- std::vector<HostAndPort> otherNodes = getReplCoord()->getOtherNodesInReplSet();
- ASSERT_EQUALS(2U, otherNodes.size());
- if (otherNodes[0] == HostAndPort("h2")) {
- ASSERT_EQUALS(HostAndPort("h3"), otherNodes[1]);
- }
- else {
- ASSERT_EQUALS(HostAndPort("h3"), otherNodes[0]);
- ASSERT_EQUALS(HostAndPort("h2"), otherNodes[0]);
- }
- }
-
- TEST_F(ReplCoordTest, IsMasterNoConfig) {
- start();
- IsMasterResponse response;
-
- getReplCoord()->fillIsMasterForReplSet(&response);
- ASSERT_FALSE(response.isConfigSet());
- BSONObj responseObj = response.toBSON();
- ASSERT_FALSE(responseObj["ismaster"].Bool());
- ASSERT_FALSE(responseObj["secondary"].Bool());
- ASSERT_TRUE(responseObj["isreplicaset"].Bool());
- ASSERT_EQUALS("Does not have a valid replica set config", responseObj["info"].String());
-
- IsMasterResponse roundTripped;
- ASSERT_OK(roundTripped.initialize(response.toBSON()));
- }
-
- TEST_F(ReplCoordTest, IsMaster) {
- HostAndPort h1("h1");
- HostAndPort h2("h2");
- HostAndPort h3("h3");
- HostAndPort h4("h4");
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << h1.toString()) <<
- BSON("_id" << 1 << "host" << h2.toString()) <<
- BSON("_id" << 2 <<
- "host" << h3.toString() <<
- "arbiterOnly" << true) <<
- BSON("_id" << 3 <<
- "host" << h4.toString() <<
- "priority" << 0 <<
- "tags" << BSON("key1" << "value1" <<
- "key2" << "value2")))),
- h4);
- getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
-
- IsMasterResponse response;
- getReplCoord()->fillIsMasterForReplSet(&response);
-
- ASSERT_EQUALS("mySet", response.getReplSetName());
- ASSERT_EQUALS(2, response.getReplSetVersion());
- ASSERT_FALSE(response.isMaster());
- ASSERT_TRUE(response.isSecondary());
- // TODO(spencer): test that response includes current primary when there is one.
- ASSERT_FALSE(response.isArbiterOnly());
- ASSERT_TRUE(response.isPassive());
- ASSERT_FALSE(response.isHidden());
- ASSERT_TRUE(response.shouldBuildIndexes());
- ASSERT_EQUALS(0, response.getSlaveDelay().total_seconds());
- ASSERT_EQUALS(h4, response.getMe());
-
- std::vector<HostAndPort> hosts = response.getHosts();
- ASSERT_EQUALS(2U, hosts.size());
- if (hosts[0] == h1) {
- ASSERT_EQUALS(h2, hosts[1]);
- }
- else {
- ASSERT_EQUALS(h2, hosts[0]);
- ASSERT_EQUALS(h1, hosts[1]);
+ BSONObj handshake = handshakeCmd["handshake"].Obj();
+ OID rid = handshake["handshake"].OID();
+ rids.insert(rid);
+ if (rid == getReplCoord()->getMyRID()) {
+ ASSERT_EQUALS(1, handshake["member"].Int());
+ } else if (rid == slave1RID) {
+ ASSERT_EQUALS(0, handshake["member"].Int());
+ } else {
+ ASSERT_EQUALS(slave2RID, rid);
+ ASSERT_EQUALS(2, handshake["member"].Int());
}
- std::vector<HostAndPort> passives = response.getPassives();
- ASSERT_EQUALS(1U, passives.size());
- ASSERT_EQUALS(h4, passives[0]);
- std::vector<HostAndPort> arbiters = response.getArbiters();
- ASSERT_EQUALS(1U, arbiters.size());
- ASSERT_EQUALS(h3, arbiters[0]);
-
- unordered_map<std::string, std::string> tags = response.getTags();
- ASSERT_EQUALS(2U, tags.size());
- ASSERT_EQUALS("value1", tags["key1"]);
- ASSERT_EQUALS("value2", tags["key2"]);
-
- IsMasterResponse roundTripped;
- ASSERT_OK(roundTripped.initialize(response.toBSON()));
- }
-
- TEST_F(ReplCoordTest, ShutDownBeforeStartUpFinished) {
- init();
- startCapturingLogMessages();
- getReplCoord()->shutdown();
- stopCapturingLogMessages();
- ASSERT_EQUALS(1,
- countLogLinesContaining("shutdown() called before startReplication() finished"));
- }
-
- TEST_F(ReplCoordTest, UpdatePositionWithRIDTest) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2) <<
- BSON("host" << "node4:12345" << "_id" << 3) <<
- BSON("host" << "node5:12345" << "_id" << 4))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- OID selfRID = getReplCoord()->getMyRID();
- OID client1 = OID::gen();
- OID client2 = OID::gen();
- OID client3 = OID::gen();
- OID client4 = OID::gen();
- OpTime time1(100, 1);
- OpTime time2(100, 2);
- OpTime staleTime(10, 0);
- getReplCoord()->setMyLastOptime(time2);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern.wNumNodes = 2;
-
- // receive an updateposition for 3 members, with new enough time, but no handshakes yet
- UpdatePositionArgs args;
- ASSERT_OK(args.initialize(BSON("replSetUpdatePosition" << 1 <<
- "optimes" << BSON_ARRAY(
- BSON("_id" << client1 << "optime" << time1) <<
- BSON("_id" << client2 << "optime" << time1) <<
- BSON("_id" << client3 << "optime" << time1)))));
- ASSERT_EQUALS(ErrorCodes::NodeNotFound,
- getReplCoord()->processReplSetUpdatePosition(args));
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
- getReplCoord()->awaitReplication(&txn, time1, writeConcern).status);
-
- // handshake for middle of three nodes, updatePosition should end early, not updating
- // any members, write concern 2 should still fail
- HandshakeArgs handshake2;
- ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
- ASSERT_EQUALS(ErrorCodes::NodeNotFound,
- getReplCoord()->processReplSetUpdatePosition(args));
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
- getReplCoord()->awaitReplication(&txn, time1, writeConcern).status);
-
- // handshake for first of three nodes, updatePosition should end early, but the first two
- // should get through and writeconcern <=3 should pass, but 4 should fail
- HandshakeArgs handshake1;
- ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
- ASSERT_EQUALS(ErrorCodes::NodeNotFound,
- getReplCoord()->processReplSetUpdatePosition(args));
- ASSERT_OK(getReplCoord()->awaitReplication(&txn, time1, writeConcern).status);
- writeConcern.wNumNodes = 3;
- ASSERT_OK(getReplCoord()->awaitReplication(&txn, time1, writeConcern).status);
- writeConcern.wNumNodes = 4;
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
- getReplCoord()->awaitReplication(&txn, time1, writeConcern).status);
-
- // receive a stale value for ourself, should not cause progress to go backwards
- HandshakeArgs handshake3;
- ASSERT_OK(handshake3.initialize(BSON("handshake" << client3 << "member" << 3)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake3));
- HandshakeArgs handshake4;
- ASSERT_OK(handshake4.initialize(BSON("handshake" << client4 << "member" << 4)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake4));
- UpdatePositionArgs args2;
- ASSERT_OK(args2.initialize(BSON("replSetUpdatePosition" << 1 <<
- "optimes" << BSON_ARRAY(
- BSON("_id" << selfRID << "optime" << staleTime) <<
- BSON("_id" << client3 << "optime" << time2) <<
- BSON("_id" << client4 << "optime" << time2)))));
- ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args2));
- // all nodes should have through time1 and three should have through time2
- writeConcern.wNumNodes = 5;
- ASSERT_OK(getReplCoord()->awaitReplication(&txn, time1, writeConcern).status);
- writeConcern.wNumNodes = 3;
- ASSERT_OK(getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
- writeConcern.wNumNodes = 4;
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
- getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
-
- // receive a stale value for another, should not cause progress to go backwards
- UpdatePositionArgs args3;
- ASSERT_OK(args3.initialize(BSON("replSetUpdatePosition" << 1 <<
- "optimes" << BSON_ARRAY(
- BSON("_id" << client1 << "optime" << time2) <<
- BSON("_id" << client2 << "optime" << time2) <<
- BSON("_id" << client3 << "optime" << staleTime)))));
- ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args3));
- // all nodes should have through time2
- writeConcern.wNumNodes = 5;
- ASSERT_OK(getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
- }
-
- TEST_F(ReplCoordTest, UpdatePositionWithConfigVersionAndMemberIdTest) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 0));
- simulateSuccessfulElection();
-
- OpTime time1(100, 1);
- OpTime time2(100, 2);
- OpTime staleTime(10, 0);
- getReplCoord()->setMyLastOptime(time1);
-
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern.wNumNodes = 1;
-
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
- getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
-
- // receive updatePosition containing ourself, should not process the update for self
- UpdatePositionArgs args;
- ASSERT_OK(args.initialize(BSON("replSetUpdatePosition" << 1 <<
- "optimes" << BSON_ARRAY(
- BSON("cfgver" << 2 <<
- "memberId" << 0 <<
- "optime" << time2)))));
-
- ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args));
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
- getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
-
- // receive updatePosition with incorrect config version
- UpdatePositionArgs args2;
- ASSERT_OK(args2.initialize(BSON("replSetUpdatePosition" << 1 <<
- "optimes" << BSON_ARRAY(
- BSON("cfgver" << 3 <<
- "memberId" << 1 <<
- "optime" << time2)))));
-
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
- getReplCoord()->processReplSetUpdatePosition(args2));
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
- getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
-
- // receive updatePosition with nonexistent member id
- UpdatePositionArgs args3;
- ASSERT_OK(args3.initialize(BSON("replSetUpdatePosition" << 1 <<
- "optimes" << BSON_ARRAY(
- BSON("cfgver" << 2 <<
- "memberId" << 9 <<
- "optime" << time2)))));
-
- ASSERT_EQUALS(ErrorCodes::NodeNotFound,
- getReplCoord()->processReplSetUpdatePosition(args3));
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
- getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
-
- // receive a good update position
- getReplCoord()->setMyLastOptime(time2);
- UpdatePositionArgs args4;
- ASSERT_OK(args4.initialize(BSON("replSetUpdatePosition" << 1 <<
- "optimes" << BSON_ARRAY(
- BSON("cfgver" << 2 <<
- "memberId" << 1 <<
- "optime" << time2) <<
- BSON("cfgver" << 2 <<
- "memberId" << 2 <<
- "optime" << time2)))));
-
- ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args4));
- ASSERT_OK(getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
-
- writeConcern.wNumNodes = 3;
- ASSERT_OK(getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
- }
-
- void doReplSetReconfig(ReplicationCoordinatorImpl* replCoord, Status* status) {
- OperationContextNoop txn;
- BSONObjBuilder garbage;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 <<
- "host" << "node1:12345" <<
- "priority" << 3) <<
- BSON("_id" << 1 << "host" << "node2:12345") <<
- BSON("_id" << 2 << "host" << "node3:12345")));
- *status = replCoord->processReplSetReconfig(&txn, args, &garbage);
}
-
- TEST_F(ReplCoordTest, AwaitReplicationReconfigSimple) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 2));
- simulateSuccessfulElection();
-
- OID selfRID = getReplCoord()->getMyRID();
- OID node2 = OID::gen();
- OID node3 = OID::gen();
- OpTime time(100, 2);
-
- HandshakeArgs handshake;
- ASSERT_OK(handshake.initialize(BSON("handshake" << node2 << "member" << 1)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
- ASSERT_OK(handshake.initialize(BSON("handshake" << node3 << "member" << 2)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
-
- // 3 nodes waiting for time
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wNumNodes = 3;
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
- awaiter.setOpTime(time);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
-
- // reconfig
- Status status(ErrorCodes::InternalError, "Not Set");
- boost::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
-
- NetworkInterfaceMock* net = getNet();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- repl::ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- repl::ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setVersion(2);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
- reconfigThread.join();
- ASSERT_OK(status);
-
- // satisfy write concern
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(selfRID, time));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(node2, time));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(node3, time));
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_OK(statusAndDur.status);
- awaiter.reset();
+ ASSERT_EQUALS(3U, rids.size()); // Make sure we saw all 3 nodes
+}
+
+TEST_F(ReplCoordTest, SetMaintenanceMode) {
+ init("mySet/test1:1234,test2:1234,test3:1234");
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234")
+ << BSON("_id" << 1 << "host"
+ << "test2:1234") << BSON("_id" << 2 << "host"
+ << "test3:1234"))),
+ HostAndPort("test2", 1234));
+ OperationContextNoop txn;
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+
+ // Can't unset maintenance mode if it was never set to begin with.
+ Status status = getReplCoord()->setMaintenanceMode(false);
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+
+ // valid set
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
+ ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
+
+ // If we go into rollback while in maintenance mode, our state changes to RS_ROLLBACK.
+ getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK);
+ ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
+
+ // When we go back to SECONDARY, we still observe RECOVERING because of maintenance mode.
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
+
+ // Can set multiple times
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
+
+ // Need to unset the number of times you set
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
+ status = getReplCoord()->setMaintenanceMode(false);
+ // fourth one fails b/c we only set three times
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
+ // Unsetting maintenance mode changes our state to secondary if maintenance mode was
+ // the only thinking keeping us out of it.
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+
+ // From rollback, entering and exiting maintenance mode doesn't change perceived
+ // state.
+ getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK);
+ ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
+ ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
+ ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
+
+ // Rollback is sticky even if entered while in maintenance mode.
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
+ ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
+ getReplCoord()->setFollowerMode(MemberState::RS_ROLLBACK);
+ ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
+ ASSERT_TRUE(getReplCoord()->getMemberState().rollback());
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+
+ // Can't modify maintenance mode when PRIMARY
+ simulateSuccessfulElection();
+
+ status = getReplCoord()->setMaintenanceMode(true);
+ ASSERT_EQUALS(ErrorCodes::NotSecondary, status);
+ ASSERT_TRUE(getReplCoord()->getMemberState().primary());
+
+ simulateStepDownOnIsolation();
+
+ status = getReplCoord()->setMaintenanceMode(false);
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
+ ASSERT_OK(getReplCoord()->setMaintenanceMode(false));
+}
+
+TEST_F(ReplCoordTest, GetHostsWrittenToReplSet) {
+ HostAndPort myHost("node1:12345");
+ HostAndPort client1Host("node2:12345");
+ HostAndPort client2Host("node3:12345");
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host" << myHost.toString())
+ << BSON("_id" << 1 << "host" << client1Host.toString())
+ << BSON("_id" << 2 << "host" << client2Host.toString()))),
+ HostAndPort("node1", 12345));
+ OperationContextNoop txn;
+
+ OID client1 = OID::gen();
+ OID client2 = OID::gen();
+ OpTime time1(100, 1);
+ OpTime time2(100, 2);
+
+ HandshakeArgs handshake1;
+ ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
+ HandshakeArgs handshake2;
+ ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
+
+ getReplCoord()->setMyLastOptime(time2);
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time1));
+
+ std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
+ ASSERT_EQUALS(1U, caughtUpHosts.size());
+ ASSERT_EQUALS(myHost, caughtUpHosts[0]);
+
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time2));
+ caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
+ ASSERT_EQUALS(2U, caughtUpHosts.size());
+ if (myHost == caughtUpHosts[0]) {
+ ASSERT_EQUALS(client2Host, caughtUpHosts[1]);
+ } else {
+ ASSERT_EQUALS(client2Host, caughtUpHosts[0]);
+ ASSERT_EQUALS(myHost, caughtUpHosts[1]);
}
-
- void doReplSetReconfigToFewer(ReplicationCoordinatorImpl* replCoord, Status* status) {
- OperationContextNoop txn;
- BSONObjBuilder garbage;
- ReplSetReconfigArgs args;
- args.force = false;
- args.newConfigObj = BSON("_id" << "mySet" <<
- "version" << 3 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "node1:12345") <<
- BSON("_id" << 2 << "host" << "node3:12345")));
- *status = replCoord->processReplSetReconfig(&txn, args, &garbage);
+}
+
+TEST_F(ReplCoordTest, GetHostsWrittenToMasterSlave) {
+ ReplSettings settings;
+ settings.master = true;
+ init(settings);
+ HostAndPort clientHost("node2:12345");
+ OperationContextNoop txn;
+
+ OID client = OID::gen();
+ OpTime time1(100, 1);
+ OpTime time2(100, 2);
+
+ getExternalState()->setClientHostAndPort(clientHost);
+ HandshakeArgs handshake;
+ ASSERT_OK(handshake.initialize(BSON("handshake" << client)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
+
+ getReplCoord()->setMyLastOptime(time2);
+ ASSERT_OK(getReplCoord()->setLastOptimeForSlave(client, time1));
+
+ std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
+ ASSERT_EQUALS(0U, caughtUpHosts.size()); // self doesn't get included in master-slave
+
+ ASSERT_OK(getReplCoord()->setLastOptimeForSlave(client, time2));
+ caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
+ ASSERT_EQUALS(1U, caughtUpHosts.size());
+ ASSERT_EQUALS(clientHost, caughtUpHosts[0]);
+}
+
+TEST_F(ReplCoordTest, GetOtherNodesInReplSetNoConfig) {
+ start();
+ ASSERT_EQUALS(0U, getReplCoord()->getOtherNodesInReplSet().size());
+}
+
+TEST_F(ReplCoordTest, GetOtherNodesInReplSet) {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "h1")
+ << BSON("_id" << 1 << "host"
+ << "h2")
+ << BSON("_id" << 2 << "host"
+ << "h3"
+ << "priority" << 0 << "hidden" << true))),
+ HostAndPort("h1"));
+
+ std::vector<HostAndPort> otherNodes = getReplCoord()->getOtherNodesInReplSet();
+ ASSERT_EQUALS(2U, otherNodes.size());
+ if (otherNodes[0] == HostAndPort("h2")) {
+ ASSERT_EQUALS(HostAndPort("h3"), otherNodes[1]);
+ } else {
+ ASSERT_EQUALS(HostAndPort("h3"), otherNodes[0]);
+ ASSERT_EQUALS(HostAndPort("h2"), otherNodes[0]);
}
-
- TEST_F(ReplCoordTest, AwaitReplicationReconfigNodeCountExceedsNumberOfNodes) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 2));
- simulateSuccessfulElection();
-
- OID node2 = OID::gen();
- OID node3 = OID::gen();
- OpTime time(100, 2);
-
- HandshakeArgs handshake;
- ASSERT_OK(handshake.initialize(BSON("handshake" << node2 << "member" << 1)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
- ASSERT_OK(handshake.initialize(BSON("handshake" << node3 << "member" << 2)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
-
- // 3 nodes waiting for time
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wNumNodes = 3;
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
- awaiter.setOpTime(time);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
-
- // reconfig to fewer nodes
- Status status(ErrorCodes::InternalError, "Not Set");
- boost::thread reconfigThread(stdx::bind(doReplSetReconfigToFewer, getReplCoord(), &status));
-
- NetworkInterfaceMock* net = getNet();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- repl::ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- repl::ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setVersion(2);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
- reconfigThread.join();
- ASSERT_OK(status);
- std::cout << "asdf" << std::endl;
-
- // writeconcern feasability should be reevaluated and an error should be returned
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern, statusAndDur.status);
- awaiter.reset();
+}
+
+TEST_F(ReplCoordTest, IsMasterNoConfig) {
+ start();
+ IsMasterResponse response;
+
+ getReplCoord()->fillIsMasterForReplSet(&response);
+ ASSERT_FALSE(response.isConfigSet());
+ BSONObj responseObj = response.toBSON();
+ ASSERT_FALSE(responseObj["ismaster"].Bool());
+ ASSERT_FALSE(responseObj["secondary"].Bool());
+ ASSERT_TRUE(responseObj["isreplicaset"].Bool());
+ ASSERT_EQUALS("Does not have a valid replica set config", responseObj["info"].String());
+
+ IsMasterResponse roundTripped;
+ ASSERT_OK(roundTripped.initialize(response.toBSON()));
+}
+
+TEST_F(ReplCoordTest, IsMaster) {
+ HostAndPort h1("h1");
+ HostAndPort h2("h2");
+ HostAndPort h3("h3");
+ HostAndPort h4("h4");
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host" << h1.toString())
+ << BSON("_id" << 1 << "host" << h2.toString())
+ << BSON("_id" << 2 << "host" << h3.toString() << "arbiterOnly" << true)
+ << BSON("_id" << 3 << "host" << h4.toString() << "priority" << 0
+ << "tags" << BSON("key1"
+ << "value1"
+ << "key2"
+ << "value2")))),
+ h4);
+ getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+
+ IsMasterResponse response;
+ getReplCoord()->fillIsMasterForReplSet(&response);
+
+ ASSERT_EQUALS("mySet", response.getReplSetName());
+ ASSERT_EQUALS(2, response.getReplSetVersion());
+ ASSERT_FALSE(response.isMaster());
+ ASSERT_TRUE(response.isSecondary());
+ // TODO(spencer): test that response includes current primary when there is one.
+ ASSERT_FALSE(response.isArbiterOnly());
+ ASSERT_TRUE(response.isPassive());
+ ASSERT_FALSE(response.isHidden());
+ ASSERT_TRUE(response.shouldBuildIndexes());
+ ASSERT_EQUALS(0, response.getSlaveDelay().total_seconds());
+ ASSERT_EQUALS(h4, response.getMe());
+
+ std::vector<HostAndPort> hosts = response.getHosts();
+ ASSERT_EQUALS(2U, hosts.size());
+ if (hosts[0] == h1) {
+ ASSERT_EQUALS(h2, hosts[1]);
+ } else {
+ ASSERT_EQUALS(h2, hosts[0]);
+ ASSERT_EQUALS(h1, hosts[1]);
}
-
- TEST_F(ReplCoordTest, AwaitReplicationReconfigToSmallerMajority) {
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2) <<
- BSON("host" << "node4:12345" << "_id" << 3) <<
- BSON("host" << "node5:12345" << "_id" << 4))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(100, 1));
- simulateSuccessfulElection();
-
- OID node2 = OID::gen();
- OID node3 = OID::gen();
- OpTime time(100, 2);
-
- HandshakeArgs handshake;
- ASSERT_OK(handshake.initialize(BSON("handshake" << node2 << "member" << 1)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
- ASSERT_OK(handshake.initialize(BSON("handshake" << node3 << "member" << 2)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(node2, time));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(node3, time));
-
- // majority nodes waiting for time
- WriteConcernOptions writeConcern;
- writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wMode = "majority";
-
- ReplicationAwaiter awaiter(getReplCoord(), &txn);
- awaiter.setOpTime(time);
- awaiter.setWriteConcern(writeConcern);
- awaiter.start(&txn);
-
- // demonstrate that majority cannot currently be satisfied
- WriteConcernOptions writeConcern2;
- writeConcern2.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern2.wMode = "majority";
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
- getReplCoord()->awaitReplication(&txn, time, writeConcern2).status);
-
- // reconfig to three nodes
- Status status(ErrorCodes::InternalError, "Not Set");
- boost::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
-
- NetworkInterfaceMock* net = getNet();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- repl::ReplSetHeartbeatArgs hbArgs;
- ASSERT_OK(hbArgs.initialize(request.cmdObj));
- repl::ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setVersion(2);
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
- reconfigThread.join();
- ASSERT_OK(status);
-
- // writeconcern feasability should be reevaluated and be satisfied
- ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
- ASSERT_OK(statusAndDur.status);
- awaiter.reset();
- }
-
- TEST_F(ReplCoordTest, AwaitReplicationMajority) {
- // Test that we can satisfy majority write concern can only be
- // statisfied by voting data-bearing members.
- OperationContextNoop txn;
- assertStartSuccess(
- BSON("_id" << "mySet" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(BSON("host" << "node1:12345" << "_id" << 0) <<
- BSON("host" << "node2:12345" << "_id" << 1) <<
- BSON("host" << "node3:12345" << "_id" << 2) <<
- BSON("host" << "node4:12345" <<
- "_id" << 3 <<
- "votes" << 0) <<
- BSON("host" << "node5:12345" <<
- "_id" << 4 <<
- "arbiterOnly" << true))),
- HostAndPort("node1", 12345));
- ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- OpTime time(100, 0);
- getReplCoord()->setMyLastOptime(time);
- simulateSuccessfulElection();
-
- WriteConcernOptions majorityWriteConcern;
- majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- majorityWriteConcern.wMode = "majority";
-
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
- getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
-
- OID client1 = OID::gen();
- HandshakeArgs handshake1;
- ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time));
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
- getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
-
- // this member does not vote and as a result should not count towards write concern
- OID client3 = OID::gen();
- HandshakeArgs handshake3;
- ASSERT_OK(handshake3.initialize(BSON("handshake" << client3 << "member" << 3)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake3));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client3, time));
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
- getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
-
- OID client2 = OID::gen();
- HandshakeArgs handshake2;
- ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
- ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time));
- ASSERT_OK(getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
- }
-
- // TODO(schwerin): Unit test election id updating
+ std::vector<HostAndPort> passives = response.getPassives();
+ ASSERT_EQUALS(1U, passives.size());
+ ASSERT_EQUALS(h4, passives[0]);
+ std::vector<HostAndPort> arbiters = response.getArbiters();
+ ASSERT_EQUALS(1U, arbiters.size());
+ ASSERT_EQUALS(h3, arbiters[0]);
+
+ unordered_map<std::string, std::string> tags = response.getTags();
+ ASSERT_EQUALS(2U, tags.size());
+ ASSERT_EQUALS("value1", tags["key1"]);
+ ASSERT_EQUALS("value2", tags["key2"]);
+
+ IsMasterResponse roundTripped;
+ ASSERT_OK(roundTripped.initialize(response.toBSON()));
+}
+
+TEST_F(ReplCoordTest, ShutDownBeforeStartUpFinished) {
+ init();
+ startCapturingLogMessages();
+ getReplCoord()->shutdown();
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1,
+ countLogLinesContaining("shutdown() called before startReplication() finished"));
+}
+
+TEST_F(ReplCoordTest, UpdatePositionWithRIDTest) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3) << BSON("host"
+ << "node5:12345"
+ << "_id" << 4))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ OID selfRID = getReplCoord()->getMyRID();
+ OID client1 = OID::gen();
+ OID client2 = OID::gen();
+ OID client3 = OID::gen();
+ OID client4 = OID::gen();
+ OpTime time1(100, 1);
+ OpTime time2(100, 2);
+ OpTime staleTime(10, 0);
+ getReplCoord()->setMyLastOptime(time2);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 2;
+
+ // receive an updateposition for 3 members, with new enough time, but no handshakes yet
+ UpdatePositionArgs args;
+ ASSERT_OK(args.initialize(BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(BSON("_id" << client1 << "optime" << time1)
+ << BSON("_id" << client2 << "optime" << time1)
+ << BSON("_id" << client3 << "optime" << time1)))));
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, getReplCoord()->processReplSetUpdatePosition(args));
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
+ getReplCoord()->awaitReplication(&txn, time1, writeConcern).status);
+
+ // handshake for middle of three nodes, updatePosition should end early, not updating
+ // any members, write concern 2 should still fail
+ HandshakeArgs handshake2;
+ ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, getReplCoord()->processReplSetUpdatePosition(args));
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
+ getReplCoord()->awaitReplication(&txn, time1, writeConcern).status);
+
+ // handshake for first of three nodes, updatePosition should end early, but the first two
+ // should get through and writeconcern <=3 should pass, but 4 should fail
+ HandshakeArgs handshake1;
+ ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, getReplCoord()->processReplSetUpdatePosition(args));
+ ASSERT_OK(getReplCoord()->awaitReplication(&txn, time1, writeConcern).status);
+ writeConcern.wNumNodes = 3;
+ ASSERT_OK(getReplCoord()->awaitReplication(&txn, time1, writeConcern).status);
+ writeConcern.wNumNodes = 4;
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
+ getReplCoord()->awaitReplication(&txn, time1, writeConcern).status);
+
+ // receive a stale value for ourself, should not cause progress to go backwards
+ HandshakeArgs handshake3;
+ ASSERT_OK(handshake3.initialize(BSON("handshake" << client3 << "member" << 3)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake3));
+ HandshakeArgs handshake4;
+ ASSERT_OK(handshake4.initialize(BSON("handshake" << client4 << "member" << 4)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake4));
+ UpdatePositionArgs args2;
+ ASSERT_OK(args2.initialize(
+ BSON("replSetUpdatePosition"
+ << 1 << "optimes" << BSON_ARRAY(BSON("_id" << selfRID << "optime" << staleTime)
+ << BSON("_id" << client3 << "optime" << time2)
+ << BSON("_id" << client4 << "optime" << time2)))));
+ ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args2));
+ // all nodes should have through time1 and three should have through time2
+ writeConcern.wNumNodes = 5;
+ ASSERT_OK(getReplCoord()->awaitReplication(&txn, time1, writeConcern).status);
+ writeConcern.wNumNodes = 3;
+ ASSERT_OK(getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+ writeConcern.wNumNodes = 4;
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
+ getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+
+ // receive a stale value for another, should not cause progress to go backwards
+ UpdatePositionArgs args3;
+ ASSERT_OK(args3.initialize(
+ BSON("replSetUpdatePosition"
+ << 1 << "optimes" << BSON_ARRAY(BSON("_id" << client1 << "optime" << time2)
+ << BSON("_id" << client2 << "optime" << time2)
+ << BSON("_id" << client3 << "optime" << staleTime)))));
+ ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args3));
+ // all nodes should have through time2
+ writeConcern.wNumNodes = 5;
+ ASSERT_OK(getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+}
+
+TEST_F(ReplCoordTest, UpdatePositionWithConfigVersionAndMemberIdTest) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 0));
+ simulateSuccessfulElection();
+
+ OpTime time1(100, 1);
+ OpTime time2(100, 2);
+ OpTime staleTime(10, 0);
+ getReplCoord()->setMyLastOptime(time1);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 1;
+
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
+ getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+
+ // receive updatePosition containing ourself, should not process the update for self
+ UpdatePositionArgs args;
+ ASSERT_OK(args.initialize(
+ BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 0 << "optime" << time2)))));
+
+ ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args));
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
+ getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+
+ // receive updatePosition with incorrect config version
+ UpdatePositionArgs args2;
+ ASSERT_OK(args2.initialize(
+ BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(BSON("cfgver" << 3 << "memberId" << 1 << "optime" << time2)))));
+
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetUpdatePosition(args2));
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
+ getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+
+ // receive updatePosition with nonexistent member id
+ UpdatePositionArgs args3;
+ ASSERT_OK(args3.initialize(
+ BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 9 << "optime" << time2)))));
+
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, getReplCoord()->processReplSetUpdatePosition(args3));
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
+ getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+
+ // receive a good update position
+ getReplCoord()->setMyLastOptime(time2);
+ UpdatePositionArgs args4;
+ ASSERT_OK(args4.initialize(
+ BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 1 << "optime" << time2)
+ << BSON("cfgver" << 2 << "memberId" << 2 << "optime" << time2)))));
+
+ ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args4));
+ ASSERT_OK(getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+
+ writeConcern.wNumNodes = 3;
+ ASSERT_OK(getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+}
+
+void doReplSetReconfig(ReplicationCoordinatorImpl* replCoord, Status* status) {
+ OperationContextNoop txn;
+ BSONObjBuilder garbage;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345"
+ << "priority" << 3)
+ << BSON("_id" << 1 << "host"
+ << "node2:12345")
+ << BSON("_id" << 2 << "host"
+ << "node3:12345")));
+ *status = replCoord->processReplSetReconfig(&txn, args, &garbage);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationReconfigSimple) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 2));
+ simulateSuccessfulElection();
+
+ OID selfRID = getReplCoord()->getMyRID();
+ OID node2 = OID::gen();
+ OID node3 = OID::gen();
+ OpTime time(100, 2);
+
+ HandshakeArgs handshake;
+ ASSERT_OK(handshake.initialize(BSON("handshake" << node2 << "member" << 1)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
+ ASSERT_OK(handshake.initialize(BSON("handshake" << node3 << "member" << 2)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
+
+ // 3 nodes waiting for time
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wNumNodes = 3;
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+ awaiter.setOpTime(time);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+
+ // reconfig
+ Status status(ErrorCodes::InternalError, "Not Set");
+ boost::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
+
+ NetworkInterfaceMock* net = getNet();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ repl::ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ repl::ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setVersion(2);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+ reconfigThread.join();
+ ASSERT_OK(status);
+
+ // satisfy write concern
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(selfRID, time));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(node2, time));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(node3, time));
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_OK(statusAndDur.status);
+ awaiter.reset();
+}
+
+void doReplSetReconfigToFewer(ReplicationCoordinatorImpl* replCoord, Status* status) {
+ OperationContextNoop txn;
+ BSONObjBuilder garbage;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node3:12345")));
+ *status = replCoord->processReplSetReconfig(&txn, args, &garbage);
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationReconfigNodeCountExceedsNumberOfNodes) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 2));
+ simulateSuccessfulElection();
+
+ OID node2 = OID::gen();
+ OID node3 = OID::gen();
+ OpTime time(100, 2);
+
+ HandshakeArgs handshake;
+ ASSERT_OK(handshake.initialize(BSON("handshake" << node2 << "member" << 1)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
+ ASSERT_OK(handshake.initialize(BSON("handshake" << node3 << "member" << 2)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
+
+ // 3 nodes waiting for time
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wNumNodes = 3;
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+ awaiter.setOpTime(time);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+
+ // reconfig to fewer nodes
+ Status status(ErrorCodes::InternalError, "Not Set");
+ boost::thread reconfigThread(stdx::bind(doReplSetReconfigToFewer, getReplCoord(), &status));
+
+ NetworkInterfaceMock* net = getNet();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ repl::ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ repl::ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setVersion(2);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+ reconfigThread.join();
+ ASSERT_OK(status);
+ std::cout << "asdf" << std::endl;
+
+ // writeconcern feasability should be reevaluated and an error should be returned
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern, statusAndDur.status);
+ awaiter.reset();
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationReconfigToSmallerMajority) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3) << BSON("host"
+ << "node5:12345"
+ << "_id" << 4))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastOptime(OpTime(100, 1));
+ simulateSuccessfulElection();
+
+ OID node2 = OID::gen();
+ OID node3 = OID::gen();
+ OpTime time(100, 2);
+
+ HandshakeArgs handshake;
+ ASSERT_OK(handshake.initialize(BSON("handshake" << node2 << "member" << 1)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
+ ASSERT_OK(handshake.initialize(BSON("handshake" << node3 << "member" << 2)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(node2, time));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(node3, time));
+
+ // majority nodes waiting for time
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
+ writeConcern.wMode = "majority";
+
+ ReplicationAwaiter awaiter(getReplCoord(), &txn);
+ awaiter.setOpTime(time);
+ awaiter.setWriteConcern(writeConcern);
+ awaiter.start(&txn);
+
+ // demonstrate that majority cannot currently be satisfied
+ WriteConcernOptions writeConcern2;
+ writeConcern2.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern2.wMode = "majority";
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
+ getReplCoord()->awaitReplication(&txn, time, writeConcern2).status);
+
+ // reconfig to three nodes
+ Status status(ErrorCodes::InternalError, "Not Set");
+ boost::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
+
+ NetworkInterfaceMock* net = getNet();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ repl::ReplSetHeartbeatArgs hbArgs;
+ ASSERT_OK(hbArgs.initialize(request.cmdObj));
+ repl::ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setVersion(2);
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
+ reconfigThread.join();
+ ASSERT_OK(status);
+
+ // writeconcern feasability should be reevaluated and be satisfied
+ ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
+ ASSERT_OK(statusAndDur.status);
+ awaiter.reset();
+}
+
+TEST_F(ReplCoordTest, AwaitReplicationMajority) {
+ // Test that we can satisfy majority write concern can only be
+ // statisfied by voting data-bearing members.
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2)
+ << BSON("host"
+ << "node4:12345"
+ << "_id" << 3 << "votes" << 0)
+ << BSON("host"
+ << "node5:12345"
+ << "_id" << 4 << "arbiterOnly" << true))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ OpTime time(100, 0);
+ getReplCoord()->setMyLastOptime(time);
+ simulateSuccessfulElection();
+
+ WriteConcernOptions majorityWriteConcern;
+ majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ majorityWriteConcern.wMode = "majority";
+
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
+ getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
+
+ OID client1 = OID::gen();
+ HandshakeArgs handshake1;
+ ASSERT_OK(handshake1.initialize(BSON("handshake" << client1 << "member" << 1)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake1));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client1, time));
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
+ getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
+
+ // this member does not vote and as a result should not count towards write concern
+ OID client3 = OID::gen();
+ HandshakeArgs handshake3;
+ ASSERT_OK(handshake3.initialize(BSON("handshake" << client3 << "member" << 3)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake3));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client3, time));
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
+ getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
+
+ OID client2 = OID::gen();
+ HandshakeArgs handshake2;
+ ASSERT_OK(handshake2.initialize(BSON("handshake" << client2 << "member" << 2)));
+ ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake2));
+ ASSERT_OK(getReplCoord()->setLastOptime_forTest(client2, time));
+ ASSERT_OK(getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
+}
+
+// TODO(schwerin): Unit test election id updating
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp
index c601a8d4f44..b05412174ef 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_mock.cpp
@@ -37,263 +37,258 @@
namespace mongo {
namespace repl {
- using std::vector;
-
- ReplicationCoordinatorMock::ReplicationCoordinatorMock(const ReplSettings& settings) :
- _settings(settings) {}
- ReplicationCoordinatorMock::~ReplicationCoordinatorMock() {}
-
- void ReplicationCoordinatorMock::startReplication(OperationContext* txn) {
- // TODO
- }
-
- void ReplicationCoordinatorMock::shutdown() {
- // TODO
- }
-
- const ReplSettings& ReplicationCoordinatorMock::getSettings() const {
- return _settings;
- }
-
- bool ReplicationCoordinatorMock::isReplEnabled() const {
- return _settings.usingReplSets() || _settings.master || _settings.slave;
- }
-
- ReplicationCoordinator::Mode ReplicationCoordinatorMock::getReplicationMode() const {
- return modeNone;
- }
-
- MemberState ReplicationCoordinatorMock::getMemberState() const {
- // TODO
- invariant(false);
- }
-
- bool ReplicationCoordinatorMock::isInPrimaryOrSecondaryState() const {
- invariant(false);
- }
-
- Seconds ReplicationCoordinatorMock::getSlaveDelaySecs() const {
- return Seconds(0);
- }
-
- void ReplicationCoordinatorMock::clearSyncSourceBlacklist() {}
-
- ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorMock::awaitReplication(
- const OperationContext* txn,
- const OpTime& ts,
- const WriteConcernOptions& writeConcern) {
- // TODO
- return StatusAndDuration(Status::OK(), Milliseconds(0));
- }
-
- ReplicationCoordinator::StatusAndDuration
- ReplicationCoordinatorMock::awaitReplicationOfLastOpForClient(
- const OperationContext* txn,
- const WriteConcernOptions& writeConcern) {
- return StatusAndDuration(Status::OK(), Milliseconds(0));
- }
-
- Status ReplicationCoordinatorMock::stepDown(OperationContext* txn,
- bool force,
- const Milliseconds& waitTime,
- const Milliseconds& stepdownTime) {
- return Status::OK();
- }
-
- bool ReplicationCoordinatorMock::isMasterForReportingPurposes() {
- // TODO
- return true;
- }
-
- bool ReplicationCoordinatorMock::canAcceptWritesForDatabase(const StringData& dbName) {
- // TODO
- return true;
- }
-
- Status ReplicationCoordinatorMock::checkCanServeReadsFor(OperationContext* txn,
- const NamespaceString& ns,
- bool slaveOk) {
- // TODO
- return Status::OK();
- }
-
- bool ReplicationCoordinatorMock::shouldIgnoreUniqueIndex(const IndexDescriptor* idx) {
- // TODO
- return false;
- }
-
- Status ReplicationCoordinatorMock::setLastOptimeForSlave(const OID& rid, const OpTime& ts) {
- return Status::OK();
- }
-
- void ReplicationCoordinatorMock::setMyHeartbeatMessage(const std::string& msg) {
- // TODO
- }
-
- void ReplicationCoordinatorMock::setMyLastOptime(const OpTime& ts) {}
-
- void ReplicationCoordinatorMock::resetMyLastOptime() {}
-
- OpTime ReplicationCoordinatorMock::getMyLastOptime() const {
- // TODO
- return OpTime();
- }
-
-
- OID ReplicationCoordinatorMock::getElectionId() {
- // TODO
- return OID();
- }
-
- OID ReplicationCoordinatorMock::getMyRID() const {
- return OID();
- }
-
- int ReplicationCoordinatorMock::getMyId() const {
- return 0;
- }
-
- bool ReplicationCoordinatorMock::setFollowerMode(const MemberState& newState) {
- return true;
- }
-
- bool ReplicationCoordinatorMock::isWaitingForApplierToDrain() {
- return false;
- }
-
- void ReplicationCoordinatorMock::signalDrainComplete(OperationContext*) {}
-
- void ReplicationCoordinatorMock::signalUpstreamUpdater() {}
-
- bool ReplicationCoordinatorMock::prepareReplSetUpdatePositionCommand(
- BSONObjBuilder* cmdBuilder) {
- return true;
- }
-
- void ReplicationCoordinatorMock::prepareReplSetUpdatePositionCommandHandshakes(
- std::vector<BSONObj>* handshakes) {}
-
- void ReplicationCoordinatorMock::processReplSetGetConfig(BSONObjBuilder* result) {
- // TODO
- }
-
- Status ReplicationCoordinatorMock::processReplSetGetStatus(BSONObjBuilder* result) {
- return Status::OK();
- }
-
- void ReplicationCoordinatorMock::fillIsMasterForReplSet(IsMasterResponse* result) {}
-
- void ReplicationCoordinatorMock::appendSlaveInfoData(BSONObjBuilder* result) {}
-
- Status ReplicationCoordinatorMock::setMaintenanceMode(bool activate) {
- return Status::OK();
- }
-
- bool ReplicationCoordinatorMock::getMaintenanceMode() {
- return false;
- }
-
- Status ReplicationCoordinatorMock::processReplSetSyncFrom(const HostAndPort& target,
- BSONObjBuilder* resultObj) {
- // TODO
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processReplSetFreeze(int secs, BSONObjBuilder* resultObj) {
- // TODO
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processHeartbeat(const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response) {
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processReplSetReconfig(OperationContext* txn,
- const ReplSetReconfigArgs& args,
- BSONObjBuilder* resultObj) {
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processReplSetInitiate(OperationContext* txn,
- const BSONObj& configObj,
- BSONObjBuilder* resultObj) {
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processReplSetGetRBID(BSONObjBuilder* resultObj) {
- return Status::OK();
- }
-
- void ReplicationCoordinatorMock::incrementRollbackID() {}
+using std::vector;
+
+ReplicationCoordinatorMock::ReplicationCoordinatorMock(const ReplSettings& settings)
+ : _settings(settings) {}
+ReplicationCoordinatorMock::~ReplicationCoordinatorMock() {}
+
+void ReplicationCoordinatorMock::startReplication(OperationContext* txn) {
+ // TODO
+}
+
+void ReplicationCoordinatorMock::shutdown() {
+ // TODO
+}
+
+const ReplSettings& ReplicationCoordinatorMock::getSettings() const {
+ return _settings;
+}
+
+bool ReplicationCoordinatorMock::isReplEnabled() const {
+ return _settings.usingReplSets() || _settings.master || _settings.slave;
+}
+
+ReplicationCoordinator::Mode ReplicationCoordinatorMock::getReplicationMode() const {
+ return modeNone;
+}
- Status ReplicationCoordinatorMock::processReplSetFresh(const ReplSetFreshArgs& args,
- BSONObjBuilder* resultObj) {
- return Status::OK();
- }
+MemberState ReplicationCoordinatorMock::getMemberState() const {
+ // TODO
+ invariant(false);
+}
- Status ReplicationCoordinatorMock::processReplSetElect(const ReplSetElectArgs& args,
- BSONObjBuilder* resultObj) {
- // TODO
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processReplSetUpdatePosition(
- const UpdatePositionArgs& updates) {
- // TODO
- return Status::OK();
- }
-
- Status ReplicationCoordinatorMock::processHandshake(OperationContext* txn,
- const HandshakeArgs& handshake) {
- return Status::OK();
- }
-
- bool ReplicationCoordinatorMock::buildsIndexes() {
- // TODO
- return true;
- }
-
- std::vector<HostAndPort> ReplicationCoordinatorMock::getHostsWrittenTo(const OpTime& op) {
- return std::vector<HostAndPort>();
- }
-
- vector<HostAndPort> ReplicationCoordinatorMock::getOtherNodesInReplSet() const {
- return std::vector<HostAndPort>();
- }
-
- Status ReplicationCoordinatorMock::checkIfWriteConcernCanBeSatisfied(
- const WriteConcernOptions& writeConcern) const {
- return Status::OK();
- }
-
- WriteConcernOptions ReplicationCoordinatorMock::getGetLastErrorDefault() {
- return WriteConcernOptions();
- }
-
- Status ReplicationCoordinatorMock::checkReplEnabledForCommand(BSONObjBuilder* result) {
- // TODO
- return Status::OK();
- }
-
- HostAndPort ReplicationCoordinatorMock::chooseNewSyncSource(const OpTime& lastOpTimeFetched) {
- invariant(false);
- return HostAndPort();
- }
-
- void ReplicationCoordinatorMock::blacklistSyncSource(const HostAndPort& host, Date_t until) {
- invariant(false);
- }
-
- void ReplicationCoordinatorMock::resetLastOpTimeFromOplog(OperationContext* txn) {
- invariant(false);
- }
-
- bool ReplicationCoordinatorMock::shouldChangeSyncSource(const HostAndPort& currentSource) {
- invariant(false);
- }
-
- void ReplicationCoordinatorMock::summarizeAsHtml(ReplSetHtmlSummary* output) {}
-
-} // namespace repl
-} // namespace mongo
+bool ReplicationCoordinatorMock::isInPrimaryOrSecondaryState() const {
+ invariant(false);
+}
+
+Seconds ReplicationCoordinatorMock::getSlaveDelaySecs() const {
+ return Seconds(0);
+}
+
+void ReplicationCoordinatorMock::clearSyncSourceBlacklist() {}
+
+ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorMock::awaitReplication(
+ const OperationContext* txn, const OpTime& ts, const WriteConcernOptions& writeConcern) {
+ // TODO
+ return StatusAndDuration(Status::OK(), Milliseconds(0));
+}
+
+ReplicationCoordinator::StatusAndDuration
+ReplicationCoordinatorMock::awaitReplicationOfLastOpForClient(
+ const OperationContext* txn, const WriteConcernOptions& writeConcern) {
+ return StatusAndDuration(Status::OK(), Milliseconds(0));
+}
+
+Status ReplicationCoordinatorMock::stepDown(OperationContext* txn,
+ bool force,
+ const Milliseconds& waitTime,
+ const Milliseconds& stepdownTime) {
+ return Status::OK();
+}
+
+bool ReplicationCoordinatorMock::isMasterForReportingPurposes() {
+ // TODO
+ return true;
+}
+
+bool ReplicationCoordinatorMock::canAcceptWritesForDatabase(const StringData& dbName) {
+ // TODO
+ return true;
+}
+
+Status ReplicationCoordinatorMock::checkCanServeReadsFor(OperationContext* txn,
+ const NamespaceString& ns,
+ bool slaveOk) {
+ // TODO
+ return Status::OK();
+}
+
+bool ReplicationCoordinatorMock::shouldIgnoreUniqueIndex(const IndexDescriptor* idx) {
+ // TODO
+ return false;
+}
+
+Status ReplicationCoordinatorMock::setLastOptimeForSlave(const OID& rid, const OpTime& ts) {
+ return Status::OK();
+}
+
+void ReplicationCoordinatorMock::setMyHeartbeatMessage(const std::string& msg) {
+ // TODO
+}
+
+void ReplicationCoordinatorMock::setMyLastOptime(const OpTime& ts) {}
+
+void ReplicationCoordinatorMock::resetMyLastOptime() {}
+
+OpTime ReplicationCoordinatorMock::getMyLastOptime() const {
+ // TODO
+ return OpTime();
+}
+
+
+OID ReplicationCoordinatorMock::getElectionId() {
+ // TODO
+ return OID();
+}
+
+OID ReplicationCoordinatorMock::getMyRID() const {
+ return OID();
+}
+
+int ReplicationCoordinatorMock::getMyId() const {
+ return 0;
+}
+
+bool ReplicationCoordinatorMock::setFollowerMode(const MemberState& newState) {
+ return true;
+}
+
+bool ReplicationCoordinatorMock::isWaitingForApplierToDrain() {
+ return false;
+}
+
+void ReplicationCoordinatorMock::signalDrainComplete(OperationContext*) {}
+
+void ReplicationCoordinatorMock::signalUpstreamUpdater() {}
+
+bool ReplicationCoordinatorMock::prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) {
+ return true;
+}
+
+void ReplicationCoordinatorMock::prepareReplSetUpdatePositionCommandHandshakes(
+ std::vector<BSONObj>* handshakes) {}
+
+void ReplicationCoordinatorMock::processReplSetGetConfig(BSONObjBuilder* result) {
+ // TODO
+}
+
+Status ReplicationCoordinatorMock::processReplSetGetStatus(BSONObjBuilder* result) {
+ return Status::OK();
+}
+
+void ReplicationCoordinatorMock::fillIsMasterForReplSet(IsMasterResponse* result) {}
+
+void ReplicationCoordinatorMock::appendSlaveInfoData(BSONObjBuilder* result) {}
+
+Status ReplicationCoordinatorMock::setMaintenanceMode(bool activate) {
+ return Status::OK();
+}
+
+bool ReplicationCoordinatorMock::getMaintenanceMode() {
+ return false;
+}
+
+Status ReplicationCoordinatorMock::processReplSetSyncFrom(const HostAndPort& target,
+ BSONObjBuilder* resultObj) {
+ // TODO
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processReplSetFreeze(int secs, BSONObjBuilder* resultObj) {
+ // TODO
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processHeartbeat(const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response) {
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processReplSetReconfig(OperationContext* txn,
+ const ReplSetReconfigArgs& args,
+ BSONObjBuilder* resultObj) {
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processReplSetInitiate(OperationContext* txn,
+ const BSONObj& configObj,
+ BSONObjBuilder* resultObj) {
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processReplSetGetRBID(BSONObjBuilder* resultObj) {
+ return Status::OK();
+}
+
+void ReplicationCoordinatorMock::incrementRollbackID() {}
+
+Status ReplicationCoordinatorMock::processReplSetFresh(const ReplSetFreshArgs& args,
+ BSONObjBuilder* resultObj) {
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processReplSetElect(const ReplSetElectArgs& args,
+ BSONObjBuilder* resultObj) {
+ // TODO
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processReplSetUpdatePosition(const UpdatePositionArgs& updates) {
+ // TODO
+ return Status::OK();
+}
+
+Status ReplicationCoordinatorMock::processHandshake(OperationContext* txn,
+ const HandshakeArgs& handshake) {
+ return Status::OK();
+}
+
+bool ReplicationCoordinatorMock::buildsIndexes() {
+ // TODO
+ return true;
+}
+
+std::vector<HostAndPort> ReplicationCoordinatorMock::getHostsWrittenTo(const OpTime& op) {
+ return std::vector<HostAndPort>();
+}
+
+vector<HostAndPort> ReplicationCoordinatorMock::getOtherNodesInReplSet() const {
+ return std::vector<HostAndPort>();
+}
+
+Status ReplicationCoordinatorMock::checkIfWriteConcernCanBeSatisfied(
+ const WriteConcernOptions& writeConcern) const {
+ return Status::OK();
+}
+
+WriteConcernOptions ReplicationCoordinatorMock::getGetLastErrorDefault() {
+ return WriteConcernOptions();
+}
+
+Status ReplicationCoordinatorMock::checkReplEnabledForCommand(BSONObjBuilder* result) {
+ // TODO
+ return Status::OK();
+}
+
+HostAndPort ReplicationCoordinatorMock::chooseNewSyncSource(const OpTime& lastOpTimeFetched) {
+ invariant(false);
+ return HostAndPort();
+}
+
+void ReplicationCoordinatorMock::blacklistSyncSource(const HostAndPort& host, Date_t until) {
+ invariant(false);
+}
+
+void ReplicationCoordinatorMock::resetLastOpTimeFromOplog(OperationContext* txn) {
+ invariant(false);
+}
+
+bool ReplicationCoordinatorMock::shouldChangeSyncSource(const HostAndPort& currentSource) {
+ invariant(false);
+}
+
+void ReplicationCoordinatorMock::summarizeAsHtml(ReplSetHtmlSummary* output) {}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index 3f645554c7c..1e31629ee36 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -34,158 +34,148 @@
namespace mongo {
namespace repl {
- /**
- * A mock ReplicationCoordinator. Currently it is extremely simple and exists solely to link
- * into dbtests.
- */
- class ReplicationCoordinatorMock : public ReplicationCoordinator {
- MONGO_DISALLOW_COPYING(ReplicationCoordinatorMock);
-
- public:
-
- ReplicationCoordinatorMock(const ReplSettings& settings);
- virtual ~ReplicationCoordinatorMock();
+/**
+ * A mock ReplicationCoordinator. Currently it is extremely simple and exists solely to link
+ * into dbtests.
+ */
+class ReplicationCoordinatorMock : public ReplicationCoordinator {
+ MONGO_DISALLOW_COPYING(ReplicationCoordinatorMock);
- virtual void startReplication(OperationContext* txn);
+public:
+ ReplicationCoordinatorMock(const ReplSettings& settings);
+ virtual ~ReplicationCoordinatorMock();
- virtual void shutdown();
+ virtual void startReplication(OperationContext* txn);
- virtual const ReplSettings& getSettings() const;
+ virtual void shutdown();
- virtual bool isReplEnabled() const;
+ virtual const ReplSettings& getSettings() const;
- virtual Mode getReplicationMode() const;
+ virtual bool isReplEnabled() const;
- virtual MemberState getMemberState() const;
+ virtual Mode getReplicationMode() const;
- virtual bool isInPrimaryOrSecondaryState() const;
+ virtual MemberState getMemberState() const;
- virtual Seconds getSlaveDelaySecs() const;
+ virtual bool isInPrimaryOrSecondaryState() const;
- virtual void clearSyncSourceBlacklist();
+ virtual Seconds getSlaveDelaySecs() const;
- virtual ReplicationCoordinator::StatusAndDuration awaitReplication(
- const OperationContext* txn,
- const OpTime& ts,
- const WriteConcernOptions& writeConcern);
+ virtual void clearSyncSourceBlacklist();
- virtual ReplicationCoordinator::StatusAndDuration awaitReplicationOfLastOpForClient(
- const OperationContext* txn,
- const WriteConcernOptions& writeConcern);
+ virtual ReplicationCoordinator::StatusAndDuration awaitReplication(
+ const OperationContext* txn, const OpTime& ts, const WriteConcernOptions& writeConcern);
- virtual Status stepDown(OperationContext* txn,
- bool force,
- const Milliseconds& waitTime,
- const Milliseconds& stepdownTime);
+ virtual ReplicationCoordinator::StatusAndDuration awaitReplicationOfLastOpForClient(
+ const OperationContext* txn, const WriteConcernOptions& writeConcern);
- virtual bool isMasterForReportingPurposes();
+ virtual Status stepDown(OperationContext* txn,
+ bool force,
+ const Milliseconds& waitTime,
+ const Milliseconds& stepdownTime);
- virtual bool canAcceptWritesForDatabase(const StringData& dbName);
+ virtual bool isMasterForReportingPurposes();
- virtual Status checkIfWriteConcernCanBeSatisfied(
- const WriteConcernOptions& writeConcern) const;
+ virtual bool canAcceptWritesForDatabase(const StringData& dbName);
- virtual Status checkCanServeReadsFor(OperationContext* txn,
- const NamespaceString& ns,
- bool slaveOk);
+ virtual Status checkIfWriteConcernCanBeSatisfied(const WriteConcernOptions& writeConcern) const;
- virtual bool shouldIgnoreUniqueIndex(const IndexDescriptor* idx);
+ virtual Status checkCanServeReadsFor(OperationContext* txn,
+ const NamespaceString& ns,
+ bool slaveOk);
- virtual Status setLastOptimeForSlave(const OID& rid, const OpTime& ts);
+ virtual bool shouldIgnoreUniqueIndex(const IndexDescriptor* idx);
- virtual void setMyLastOptime(const OpTime& ts);
+ virtual Status setLastOptimeForSlave(const OID& rid, const OpTime& ts);
- virtual void resetMyLastOptime();
+ virtual void setMyLastOptime(const OpTime& ts);
- virtual void setMyHeartbeatMessage(const std::string& msg);
+ virtual void resetMyLastOptime();
- virtual OpTime getMyLastOptime() const;
+ virtual void setMyHeartbeatMessage(const std::string& msg);
- virtual OID getElectionId();
+ virtual OpTime getMyLastOptime() const;
- virtual OID getMyRID() const;
+ virtual OID getElectionId();
- virtual int getMyId() const;
+ virtual OID getMyRID() const;
- virtual bool setFollowerMode(const MemberState& newState);
+ virtual int getMyId() const;
- virtual bool isWaitingForApplierToDrain();
+ virtual bool setFollowerMode(const MemberState& newState);
- virtual void signalDrainComplete(OperationContext*);
+ virtual bool isWaitingForApplierToDrain();
- virtual void signalUpstreamUpdater();
+ virtual void signalDrainComplete(OperationContext*);
- virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder);
+ virtual void signalUpstreamUpdater();
- virtual void prepareReplSetUpdatePositionCommandHandshakes(
- std::vector<BSONObj>* handshakes);
+ virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder);
- virtual Status processReplSetGetStatus(BSONObjBuilder* result);
+ virtual void prepareReplSetUpdatePositionCommandHandshakes(std::vector<BSONObj>* handshakes);
- virtual void fillIsMasterForReplSet(IsMasterResponse* result);
+ virtual Status processReplSetGetStatus(BSONObjBuilder* result);
- virtual void appendSlaveInfoData(BSONObjBuilder* result);
+ virtual void fillIsMasterForReplSet(IsMasterResponse* result);
- virtual void processReplSetGetConfig(BSONObjBuilder* result);
+ virtual void appendSlaveInfoData(BSONObjBuilder* result);
- virtual Status setMaintenanceMode(bool activate);
+ virtual void processReplSetGetConfig(BSONObjBuilder* result);
- virtual bool getMaintenanceMode();
+ virtual Status setMaintenanceMode(bool activate);
- virtual Status processReplSetSyncFrom(const HostAndPort& target,
- BSONObjBuilder* resultObj);
+ virtual bool getMaintenanceMode();
- virtual Status processReplSetFreeze(int secs, BSONObjBuilder* resultObj);
+ virtual Status processReplSetSyncFrom(const HostAndPort& target, BSONObjBuilder* resultObj);
- virtual Status processHeartbeat(const ReplSetHeartbeatArgs& args,
- ReplSetHeartbeatResponse* response);
+ virtual Status processReplSetFreeze(int secs, BSONObjBuilder* resultObj);
- virtual Status processReplSetReconfig(OperationContext* txn,
- const ReplSetReconfigArgs& args,
- BSONObjBuilder* resultObj);
+ virtual Status processHeartbeat(const ReplSetHeartbeatArgs& args,
+ ReplSetHeartbeatResponse* response);
- virtual Status processReplSetInitiate(OperationContext* txn,
- const BSONObj& configObj,
- BSONObjBuilder* resultObj);
+ virtual Status processReplSetReconfig(OperationContext* txn,
+ const ReplSetReconfigArgs& args,
+ BSONObjBuilder* resultObj);
- virtual Status processReplSetGetRBID(BSONObjBuilder* resultObj);
+ virtual Status processReplSetInitiate(OperationContext* txn,
+ const BSONObj& configObj,
+ BSONObjBuilder* resultObj);
- virtual void incrementRollbackID();
+ virtual Status processReplSetGetRBID(BSONObjBuilder* resultObj);
- virtual Status processReplSetFresh(const ReplSetFreshArgs& args,
- BSONObjBuilder* resultObj);
+ virtual void incrementRollbackID();
- virtual Status processReplSetElect(const ReplSetElectArgs& args,
- BSONObjBuilder* resultObj);
+ virtual Status processReplSetFresh(const ReplSetFreshArgs& args, BSONObjBuilder* resultObj);
- virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates);
+ virtual Status processReplSetElect(const ReplSetElectArgs& args, BSONObjBuilder* resultObj);
- virtual Status processHandshake(OperationContext* txn, const HandshakeArgs& handshake);
+ virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates);
- virtual bool buildsIndexes();
+ virtual Status processHandshake(OperationContext* txn, const HandshakeArgs& handshake);
- virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op);
+ virtual bool buildsIndexes();
- virtual std::vector<HostAndPort> getOtherNodesInReplSet() const;
+ virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op);
- virtual WriteConcernOptions getGetLastErrorDefault();
+ virtual std::vector<HostAndPort> getOtherNodesInReplSet() const;
- virtual Status checkReplEnabledForCommand(BSONObjBuilder* result);
+ virtual WriteConcernOptions getGetLastErrorDefault();
- virtual HostAndPort chooseNewSyncSource(const OpTime& lastOpTimeFetched);
+ virtual Status checkReplEnabledForCommand(BSONObjBuilder* result);
- virtual void blacklistSyncSource(const HostAndPort& host, Date_t until);
+ virtual HostAndPort chooseNewSyncSource(const OpTime& lastOpTimeFetched);
- virtual void resetLastOpTimeFromOplog(OperationContext* txn);
+ virtual void blacklistSyncSource(const HostAndPort& host, Date_t until);
- virtual bool shouldChangeSyncSource(const HostAndPort& currentSource);
+ virtual void resetLastOpTimeFromOplog(OperationContext* txn);
- virtual void summarizeAsHtml(ReplSetHtmlSummary* output);
+ virtual bool shouldChangeSyncSource(const HostAndPort& currentSource);
- private:
+ virtual void summarizeAsHtml(ReplSetHtmlSummary* output);
- const ReplSettings _settings;
- };
+private:
+ const ReplSettings _settings;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_test_fixture.cpp b/src/mongo/db/repl/replication_coordinator_test_fixture.cpp
index 2479c5d4724..e129a4693df 100644
--- a/src/mongo/db/repl/replication_coordinator_test_fixture.cpp
+++ b/src/mongo/db/repl/replication_coordinator_test_fixture.cpp
@@ -49,225 +49,208 @@ namespace mongo {
namespace repl {
namespace {
- bool stringContains(const std::string &haystack, const std::string& needle) {
- return haystack.find(needle) != std::string::npos;
- }
+bool stringContains(const std::string& haystack, const std::string& needle) {
+ return haystack.find(needle) != std::string::npos;
+}
} // namespace
- ReplicaSetConfig ReplCoordTest::assertMakeRSConfig(const BSONObj& configBson) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(configBson));
- ASSERT_OK(config.validate());
- return config;
- }
-
- ReplCoordTest::ReplCoordTest() : _callShutdown(false) {}
- ReplCoordTest::~ReplCoordTest() {}
-
- void ReplCoordTest::setUp() {
- _settings.replSet = "mySet/node1:12345,node2:54321";
- }
+ReplicaSetConfig ReplCoordTest::assertMakeRSConfig(const BSONObj& configBson) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(configBson));
+ ASSERT_OK(config.validate());
+ return config;
+}
- void ReplCoordTest::tearDown() {
- if (_externalState) {
- _externalState->setStoreLocalConfigDocumentToHang(false);
- }
- if (_callShutdown) {
- shutdown();
- }
- }
+ReplCoordTest::ReplCoordTest() : _callShutdown(false) {}
+ReplCoordTest::~ReplCoordTest() {}
- void ReplCoordTest::assertRunUntil(Date_t newTime) {
- this->_net->runUntil(newTime);
- ASSERT_EQUALS(newTime, getNet()->now());
- }
+void ReplCoordTest::setUp() {
+ _settings.replSet = "mySet/node1:12345,node2:54321";
+}
- void ReplCoordTest::enterNetwork() {
- getNet()->enterNetwork();
+void ReplCoordTest::tearDown() {
+ if (_externalState) {
+ _externalState->setStoreLocalConfigDocumentToHang(false);
}
-
- void ReplCoordTest::exitNetwork() {
- getNet()->exitNetwork();
+ if (_callShutdown) {
+ shutdown();
}
-
- void ReplCoordTest::addSelf(const HostAndPort& selfHost) {
- getExternalState()->addSelf(selfHost);
+}
+
+void ReplCoordTest::assertRunUntil(Date_t newTime) {
+ this->_net->runUntil(newTime);
+ ASSERT_EQUALS(newTime, getNet()->now());
+}
+
+void ReplCoordTest::enterNetwork() {
+ getNet()->enterNetwork();
+}
+
+void ReplCoordTest::exitNetwork() {
+ getNet()->exitNetwork();
+}
+
+void ReplCoordTest::addSelf(const HostAndPort& selfHost) {
+ getExternalState()->addSelf(selfHost);
+}
+
+void ReplCoordTest::init() {
+ invariant(!_repl);
+ invariant(!_callShutdown);
+
+ // PRNG seed for tests.
+ const int64_t seed = 0;
+
+ _topo = new TopologyCoordinatorImpl(Seconds(0));
+ _net = new NetworkInterfaceMock;
+ _externalState = new ReplicationCoordinatorExternalStateMock;
+ _repl.reset(new ReplicationCoordinatorImpl(_settings, _externalState, _net, _topo, seed));
+}
+
+void ReplCoordTest::init(const ReplSettings& settings) {
+ _settings = settings;
+ init();
+}
+
+void ReplCoordTest::init(const std::string& replSet) {
+ _settings.replSet = replSet;
+ init();
+}
+
+void ReplCoordTest::start() {
+ invariant(!_callShutdown);
+ // if we haven't initialized yet, do that first.
+ if (!_repl) {
+ init();
}
- void ReplCoordTest::init() {
- invariant(!_repl);
- invariant(!_callShutdown);
-
- // PRNG seed for tests.
- const int64_t seed = 0;
-
- _topo = new TopologyCoordinatorImpl(Seconds(0));
- _net = new NetworkInterfaceMock;
- _externalState = new ReplicationCoordinatorExternalStateMock;
- _repl.reset(new ReplicationCoordinatorImpl(_settings,
- _externalState,
- _net,
- _topo,
- seed));
- }
+ OperationContextNoop txn;
+ _repl->startReplication(&txn);
+ _repl->waitForStartUpComplete();
+ _callShutdown = true;
+}
- void ReplCoordTest::init(const ReplSettings& settings) {
- _settings = settings;
+void ReplCoordTest::start(const BSONObj& configDoc, const HostAndPort& selfHost) {
+ if (!_repl) {
init();
}
+ _externalState->setLocalConfigDocument(StatusWith<BSONObj>(configDoc));
+ _externalState->addSelf(selfHost);
+ start();
+}
- void ReplCoordTest::init(const std::string& replSet) {
- _settings.replSet = replSet;
+void ReplCoordTest::start(const HostAndPort& selfHost) {
+ if (!_repl) {
init();
}
-
- void ReplCoordTest::start() {
- invariant(!_callShutdown);
- // if we haven't initialized yet, do that first.
- if (!_repl) {
- init();
- }
-
- OperationContextNoop txn;
- _repl->startReplication(&txn);
- _repl->waitForStartUpComplete();
- _callShutdown = true;
- }
-
- void ReplCoordTest::start(const BSONObj& configDoc, const HostAndPort& selfHost) {
- if (!_repl) {
- init();
- }
- _externalState->setLocalConfigDocument(StatusWith<BSONObj>(configDoc));
- _externalState->addSelf(selfHost);
- start();
- }
-
- void ReplCoordTest::start(const HostAndPort& selfHost) {
- if (!_repl) {
- init();
- }
- _externalState->addSelf(selfHost);
- start();
- }
-
- void ReplCoordTest::assertStartSuccess(
- const BSONObj& configDoc,
- const HostAndPort& selfHost) {
- start(configDoc, selfHost);
- ASSERT_NE(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
- }
-
- ResponseStatus ReplCoordTest::makeResponseStatus(const BSONObj& doc, Milliseconds millis) {
- log() << "Responding with " << doc;
- return ResponseStatus(ReplicationExecutor::RemoteCommandResponse(doc, millis));
- }
-
- void ReplCoordTest::simulateSuccessfulElection() {
- OperationContextReplMock txn;
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- NetworkInterfaceMock* net = getNet();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- ASSERT(replCoord->getMemberState().secondary()) <<
- replCoord->getMemberState().toString();
- while (!replCoord->getMemberState().primary()) {
- log() << "Waiting on network in state " << replCoord->getMemberState();
- getNet()->enterNetwork();
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgs hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- ReplSetHeartbeatResponse hbResp;
- hbResp.setSetName(rsConfig.getReplSetName());
- hbResp.setState(MemberState::RS_SECONDARY);
- hbResp.setVersion(rsConfig.getConfigVersion());
- BSONObjBuilder respObj;
- respObj << "ok" << 1;
- hbResp.addToBSON(&respObj);
- net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
- }
- else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetFresh") {
- net->scheduleResponse(noi, net->now(), makeResponseStatus(
- BSON("ok" << 1 <<
- "fresher" << false <<
- "opTime" << Date_t(OpTime(0, 0).asDate()) <<
- "veto" << false)));
- }
- else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetElect") {
- net->scheduleResponse(noi, net->now(), makeResponseStatus(
- BSON("ok" << 1 <<
- "vote" << 1 <<
- "round" << request.cmdObj["round"].OID())));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
+ _externalState->addSelf(selfHost);
+ start();
+}
+
+void ReplCoordTest::assertStartSuccess(const BSONObj& configDoc, const HostAndPort& selfHost) {
+ start(configDoc, selfHost);
+ ASSERT_NE(MemberState::RS_STARTUP, getReplCoord()->getMemberState().s);
+}
+
+ResponseStatus ReplCoordTest::makeResponseStatus(const BSONObj& doc, Milliseconds millis) {
+ log() << "Responding with " << doc;
+ return ResponseStatus(ReplicationExecutor::RemoteCommandResponse(doc, millis));
+}
+
+void ReplCoordTest::simulateSuccessfulElection() {
+ OperationContextReplMock txn;
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ NetworkInterfaceMock* net = getNet();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ ASSERT(replCoord->getMemberState().secondary()) << replCoord->getMemberState().toString();
+ while (!replCoord->getMemberState().primary()) {
+ log() << "Waiting on network in state " << replCoord->getMemberState();
+ getNet()->enterNetwork();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgs hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(rsConfig.getReplSetName());
+ hbResp.setState(MemberState::RS_SECONDARY);
+ hbResp.setVersion(rsConfig.getConfigVersion());
+ BSONObjBuilder respObj;
+ respObj << "ok" << 1;
+ hbResp.addToBSON(&respObj);
+ net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetFresh") {
+ net->scheduleResponse(
+ noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "fresher" << false << "opTime"
+ << Date_t(OpTime(0, 0).asDate()) << "veto" << false)));
+ } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetElect") {
+ net->scheduleResponse(noi,
+ net->now(),
+ makeResponseStatus(BSON("ok" << 1 << "vote" << 1 << "round"
+ << request.cmdObj["round"].OID())));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
- ASSERT(replCoord->isWaitingForApplierToDrain());
- ASSERT(replCoord->getMemberState().primary()) <<
- replCoord->getMemberState().toString();
-
- IsMasterResponse imResponse;
- replCoord->fillIsMasterForReplSet(&imResponse);
- ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
- replCoord->signalDrainComplete(&txn);
- replCoord->fillIsMasterForReplSet(&imResponse);
- ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
- ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
-
- ASSERT(replCoord->getMemberState().primary()) <<
- replCoord->getMemberState().toString();
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
}
-
- void ReplCoordTest::simulateStepDownOnIsolation() {
- ReplicationCoordinatorImpl* replCoord = getReplCoord();
- NetworkInterfaceMock* net = getNet();
- ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
- ASSERT(replCoord->getMemberState().primary()) <<
- replCoord->getMemberState().toString();
- while (replCoord->getMemberState().primary()) {
- log() << "Waiting on network in state " << replCoord->getMemberState();
- getNet()->enterNetwork();
- net->runUntil(net->now() + 10000);
- const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
- log() << request.target.toString() << " processing " << request.cmdObj;
- ReplSetHeartbeatArgs hbArgs;
- if (hbArgs.initialize(request.cmdObj).isOK()) {
- net->scheduleResponse(noi,
- net->now(),
- ResponseStatus(ErrorCodes::NetworkTimeout, "Nobody's home"));
- }
- else {
- error() << "Black holing unexpected request to " << request.target << ": " <<
- request.cmdObj;
- net->blackHole(noi);
- }
- net->runReadyNetworkOperations();
- getNet()->exitNetwork();
+ ASSERT(replCoord->isWaitingForApplierToDrain());
+ ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString();
+
+ IsMasterResponse imResponse;
+ replCoord->fillIsMasterForReplSet(&imResponse);
+ ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+ replCoord->signalDrainComplete(&txn);
+ replCoord->fillIsMasterForReplSet(&imResponse);
+ ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
+ ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+
+ ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString();
+}
+
+void ReplCoordTest::simulateStepDownOnIsolation() {
+ ReplicationCoordinatorImpl* replCoord = getReplCoord();
+ NetworkInterfaceMock* net = getNet();
+ ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
+ ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString();
+ while (replCoord->getMemberState().primary()) {
+ log() << "Waiting on network in state " << replCoord->getMemberState();
+ getNet()->enterNetwork();
+ net->runUntil(net->now() + 10000);
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const ReplicationExecutor::RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ReplSetHeartbeatArgs hbArgs;
+ if (hbArgs.initialize(request.cmdObj).isOK()) {
+ net->scheduleResponse(
+ noi, net->now(), ResponseStatus(ErrorCodes::NetworkTimeout, "Nobody's home"));
+ } else {
+ error() << "Black holing unexpected request to " << request.target << ": "
+ << request.cmdObj;
+ net->blackHole(noi);
}
+ net->runReadyNetworkOperations();
+ getNet()->exitNetwork();
}
-
- void ReplCoordTest::shutdown() {
- invariant(_callShutdown);
- _net->exitNetwork();
- _repl->shutdown();
- _callShutdown = false;
- }
-
- int64_t ReplCoordTest::countLogLinesContaining(const std::string& needle) {
- return std::count_if(getCapturedLogMessages().begin(),
- getCapturedLogMessages().end(),
- stdx::bind(stringContains,
- stdx::placeholders::_1,
- needle));
- }
+}
+
+void ReplCoordTest::shutdown() {
+ invariant(_callShutdown);
+ _net->exitNetwork();
+ _repl->shutdown();
+ _callShutdown = false;
+}
+
+int64_t ReplCoordTest::countLogLinesContaining(const std::string& needle) {
+ return std::count_if(getCapturedLogMessages().begin(),
+ getCapturedLogMessages().end(),
+ stdx::bind(stringContains, stdx::placeholders::_1, needle));
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_test_fixture.h b/src/mongo/db/repl/replication_coordinator_test_fixture.h
index 48f644abbe7..5e4094c466d 100644
--- a/src/mongo/db/repl/replication_coordinator_test_fixture.h
+++ b/src/mongo/db/repl/replication_coordinator_test_fixture.h
@@ -38,155 +38,163 @@
namespace mongo {
- class BSONObj;
- struct HostAndPort;
+class BSONObj;
+struct HostAndPort;
namespace repl {
- class NetworkInterfaceMock;
- class ReplicaSetConfig;
- class ReplicationCoordinatorExternalStateMock;
- class ReplicationCoordinatorImpl;
- class TopologyCoordinatorImpl;
-
- /**
- * Fixture for testing ReplicationCoordinatorImpl behaviors.
- */
- class ReplCoordTest : public mongo::unittest::Test {
- public:
- /**
- * Makes a ResponseStatus with the given "doc" response and optional elapsed time "millis".
- */
- static ResponseStatus makeResponseStatus(const BSONObj& doc,
- Milliseconds millis = Milliseconds(0));
-
- /**
- * Constructs a ReplicaSetConfig from the given BSON, or raises a test failure exception.
- */
- static ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBSON);
-
- ReplCoordTest();
- virtual ~ReplCoordTest();
-
- protected:
- virtual void setUp();
- virtual void tearDown();
-
- /**
- * Gets the network mock.
- */
- NetworkInterfaceMock* getNet() { return _net; }
-
- /**
- * Gets the replication coordinator under test.
- */
- ReplicationCoordinatorImpl* getReplCoord() { return _repl.get();}
-
- /**
- * Gets the topology coordinator used by the replication coordinator under test.
- */
- TopologyCoordinatorImpl& getTopoCoord() { return *_topo;}
-
- /**
- * Gets the external state used by the replication coordinator under test.
- */
- ReplicationCoordinatorExternalStateMock* getExternalState() { return _externalState; }
-
- /**
- * Adds "selfHost" to the list of hosts that identify as "this" host.
- */
- void addSelf(const HostAndPort& selfHost);
-
- /**
- * Moves time forward in the network until the new time, and asserts if now!=newTime after
- */
- void assertRunUntil(Date_t newTime);
-
- /**
- * Shorthand for getNet()->enterNetwork()
- */
- void enterNetwork();
-
- /**
- * Shorthand for getNet()->exitNetwork()
- */
- void exitNetwork();
-
- /**
- * Initializes the objects under test; this behavior is optional, in case you need to call
- * any methods on the network or coordinator objects before calling start.
- */
- void init();
-
- /**
- * Initializes the objects under test, using the given "settings".
- */
- void init(const ReplSettings& settings);
-
- /**
- * Initializes the objects under test, using "replSet" as the name of the replica set under
- * test.
- */
- void init(const std::string& replSet);
-
- /**
- * Starts the replication coordinator under test, with no local config document and
- * no notion of what host or hosts are represented by the network interface.
- */
- void start();
-
- /**
- * Starts the replication coordinator under test, with the given configuration in
- * local storage and the given host name.
- */
- void start(const BSONObj& configDoc, const HostAndPort& selfHost);
-
- /**
- * Starts the replication coordinator under test with the given host name.
- */
- void start(const HostAndPort& selfHost);
-
- /**
- * Brings the repl coord from SECONDARY to PRIMARY by simulating the messages required to
- * elect it.
- *
- * Behavior is unspecified if node does not have a clean config, is not in SECONDARY, etc.
- */
- void simulateSuccessfulElection();
-
- /**
- * Brings the repl coord from PRIMARY to SECONDARY by simulating a period of time in which
- * all heartbeats respond with an error condition, such as time out.
- */
- void simulateStepDownOnIsolation();
-
- /**
- * Asserts that calling start(configDoc, selfHost) successfully initiates the
- * ReplicationCoordinator under test.
- */
- void assertStartSuccess(const BSONObj& configDoc, const HostAndPort& selfHost);
-
- /**
- * Shuts down the objects under test.
- */
- void shutdown();
-
- /**
- * Returns the number of collected log lines containing "needle".
- */
- int64_t countLogLinesContaining(const std::string& needle);
-
- private:
- boost::scoped_ptr<ReplicationCoordinatorImpl> _repl;
- // Owned by ReplicationCoordinatorImpl
- TopologyCoordinatorImpl* _topo;
- // Owned by ReplicationCoordinatorImpl
- NetworkInterfaceMock* _net;
- // Owned by ReplicationCoordinatorImpl
- ReplicationCoordinatorExternalStateMock* _externalState;
- ReplSettings _settings;
- bool _callShutdown;
- };
+class NetworkInterfaceMock;
+class ReplicaSetConfig;
+class ReplicationCoordinatorExternalStateMock;
+class ReplicationCoordinatorImpl;
+class TopologyCoordinatorImpl;
+
+/**
+ * Fixture for testing ReplicationCoordinatorImpl behaviors.
+ */
+class ReplCoordTest : public mongo::unittest::Test {
+public:
+ /**
+ * Makes a ResponseStatus with the given "doc" response and optional elapsed time "millis".
+ */
+ static ResponseStatus makeResponseStatus(const BSONObj& doc,
+ Milliseconds millis = Milliseconds(0));
+
+ /**
+ * Constructs a ReplicaSetConfig from the given BSON, or raises a test failure exception.
+ */
+ static ReplicaSetConfig assertMakeRSConfig(const BSONObj& configBSON);
+
+ ReplCoordTest();
+ virtual ~ReplCoordTest();
+
+protected:
+ virtual void setUp();
+ virtual void tearDown();
+
+ /**
+ * Gets the network mock.
+ */
+ NetworkInterfaceMock* getNet() {
+ return _net;
+ }
+
+ /**
+ * Gets the replication coordinator under test.
+ */
+ ReplicationCoordinatorImpl* getReplCoord() {
+ return _repl.get();
+ }
+
+ /**
+ * Gets the topology coordinator used by the replication coordinator under test.
+ */
+ TopologyCoordinatorImpl& getTopoCoord() {
+ return *_topo;
+ }
+
+ /**
+ * Gets the external state used by the replication coordinator under test.
+ */
+ ReplicationCoordinatorExternalStateMock* getExternalState() {
+ return _externalState;
+ }
+
+ /**
+ * Adds "selfHost" to the list of hosts that identify as "this" host.
+ */
+ void addSelf(const HostAndPort& selfHost);
+
+ /**
+ * Moves time forward in the network until the new time, and asserts if now!=newTime after
+ */
+ void assertRunUntil(Date_t newTime);
+
+ /**
+ * Shorthand for getNet()->enterNetwork()
+ */
+ void enterNetwork();
+
+ /**
+ * Shorthand for getNet()->exitNetwork()
+ */
+ void exitNetwork();
+
+ /**
+ * Initializes the objects under test; this behavior is optional, in case you need to call
+ * any methods on the network or coordinator objects before calling start.
+ */
+ void init();
+
+ /**
+ * Initializes the objects under test, using the given "settings".
+ */
+ void init(const ReplSettings& settings);
+
+ /**
+ * Initializes the objects under test, using "replSet" as the name of the replica set under
+ * test.
+ */
+ void init(const std::string& replSet);
+
+ /**
+ * Starts the replication coordinator under test, with no local config document and
+ * no notion of what host or hosts are represented by the network interface.
+ */
+ void start();
+
+ /**
+ * Starts the replication coordinator under test, with the given configuration in
+ * local storage and the given host name.
+ */
+ void start(const BSONObj& configDoc, const HostAndPort& selfHost);
+
+ /**
+ * Starts the replication coordinator under test with the given host name.
+ */
+ void start(const HostAndPort& selfHost);
+
+ /**
+ * Brings the repl coord from SECONDARY to PRIMARY by simulating the messages required to
+ * elect it.
+ *
+ * Behavior is unspecified if node does not have a clean config, is not in SECONDARY, etc.
+ */
+ void simulateSuccessfulElection();
+
+ /**
+ * Brings the repl coord from PRIMARY to SECONDARY by simulating a period of time in which
+ * all heartbeats respond with an error condition, such as time out.
+ */
+ void simulateStepDownOnIsolation();
+
+ /**
+ * Asserts that calling start(configDoc, selfHost) successfully initiates the
+ * ReplicationCoordinator under test.
+ */
+ void assertStartSuccess(const BSONObj& configDoc, const HostAndPort& selfHost);
+
+ /**
+ * Shuts down the objects under test.
+ */
+ void shutdown();
+
+ /**
+ * Returns the number of collected log lines containing "needle".
+ */
+ int64_t countLogLinesContaining(const std::string& needle);
+
+private:
+ boost::scoped_ptr<ReplicationCoordinatorImpl> _repl;
+ // Owned by ReplicationCoordinatorImpl
+ TopologyCoordinatorImpl* _topo;
+ // Owned by ReplicationCoordinatorImpl
+ NetworkInterfaceMock* _net;
+ // Owned by ReplicationCoordinatorImpl
+ ReplicationCoordinatorExternalStateMock* _externalState;
+ ReplSettings _settings;
+ bool _callShutdown;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_executor.cpp b/src/mongo/db/repl/replication_executor.cpp
index ad24282b404..c7cb4c9cb9c 100644
--- a/src/mongo/db/repl/replication_executor.cpp
+++ b/src/mongo/db/repl/replication_executor.cpp
@@ -39,519 +39,452 @@ namespace mongo {
namespace repl {
namespace {
- stdx::function<void ()> makeNoExcept(const stdx::function<void ()> &fn);
+stdx::function<void()> makeNoExcept(const stdx::function<void()>& fn);
} // namespace
- const ReplicationExecutor::Milliseconds ReplicationExecutor::kNoTimeout(-1);
- const Date_t ReplicationExecutor::kNoExpirationDate(-1);
-
- ReplicationExecutor::ReplicationExecutor(NetworkInterface* netInterface, int64_t prngSeed) :
- _random(prngSeed),
- _networkInterface(netInterface),
- _totalEventWaiters(0),
- _inShutdown(false),
- _dblockWorkers(threadpool::ThreadPool::DoNotStartThreadsTag(),
- 1,
- "replCallbackWithGlobalLock-"),
- _nextId(0) {
- }
-
- ReplicationExecutor::~ReplicationExecutor() {}
-
- std::string ReplicationExecutor::getDiagnosticString() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- return _getDiagnosticString_inlock();
- }
-
- std::string ReplicationExecutor::_getDiagnosticString_inlock() const {
- str::stream output;
- output << "ReplicationExecutor";
- output << " networkInProgress:" << _networkInProgressQueue.size();
- output << " exclusiveInProgress:" << _exclusiveLockInProgressQueue.size();
- output << " sleeperQueue:" << _sleepersQueue.size();
- output << " ready:" << _readyQueue.size();
- output << " free:" << _freeQueue.size();
- output << " unsignaledEvents:" << _unsignaledEvents.size();
- output << " eventWaiters:" << _totalEventWaiters;
- output << " shuttingDown:" << _inShutdown;
- output << " networkInterface:" << _networkInterface->getDiagnosticString();
- return output;
- }
-
- Date_t ReplicationExecutor::now() {
- return _networkInterface->now();
- }
-
- void ReplicationExecutor::run() {
- setThreadName("ReplicationExecutor");
- _networkInterface->startup();
- _dblockWorkers.startThreads();
- std::pair<WorkItem, CallbackHandle> work;
- while ((work = getWork()).first.callback) {
- {
- boost::lock_guard<boost::mutex> lk(_terribleExLockSyncMutex);
- const Status inStatus = work.first.isCanceled ?
- Status(ErrorCodes::CallbackCanceled, "Callback canceled") :
- Status::OK();
- makeNoExcept(stdx::bind(work.first.callback,
- CallbackData(this, work.second, inStatus)))();
- }
- signalEvent(work.first.finishedEvent);
- }
- finishShutdown();
- _networkInterface->shutdown();
- }
-
- void ReplicationExecutor::shutdown() {
- // Correct shutdown needs to:
- // * Disable future work queueing.
- // * drain all of the unsignaled events, sleepers, and ready queue, by running those
- // callbacks with a "shutdown" or "canceled" status.
- // * Signal all threads blocked in waitForEvent, and wait for them to return from that method.
- boost::lock_guard<boost::mutex> lk(_mutex);
- _inShutdown = true;
-
- _readyQueue.splice(_readyQueue.end(), _exclusiveLockInProgressQueue);
- _readyQueue.splice(_readyQueue.end(), _networkInProgressQueue);
- _readyQueue.splice(_readyQueue.end(), _sleepersQueue);
- for (EventList::iterator event = _unsignaledEvents.begin();
- event != _unsignaledEvents.end();
- ++event) {
-
- _readyQueue.splice(_readyQueue.end(), event->waiters);
- }
- for (WorkQueue::iterator readyWork = _readyQueue.begin();
- readyWork != _readyQueue.end();
- ++readyWork) {
-
- readyWork->isCanceled = true;
+const ReplicationExecutor::Milliseconds ReplicationExecutor::kNoTimeout(-1);
+const Date_t ReplicationExecutor::kNoExpirationDate(-1);
+
+ReplicationExecutor::ReplicationExecutor(NetworkInterface* netInterface, int64_t prngSeed)
+ : _random(prngSeed),
+ _networkInterface(netInterface),
+ _totalEventWaiters(0),
+ _inShutdown(false),
+ _dblockWorkers(
+ threadpool::ThreadPool::DoNotStartThreadsTag(), 1, "replCallbackWithGlobalLock-"),
+ _nextId(0) {}
+
+ReplicationExecutor::~ReplicationExecutor() {}
+
+std::string ReplicationExecutor::getDiagnosticString() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ return _getDiagnosticString_inlock();
+}
+
+std::string ReplicationExecutor::_getDiagnosticString_inlock() const {
+ str::stream output;
+ output << "ReplicationExecutor";
+ output << " networkInProgress:" << _networkInProgressQueue.size();
+ output << " exclusiveInProgress:" << _exclusiveLockInProgressQueue.size();
+ output << " sleeperQueue:" << _sleepersQueue.size();
+ output << " ready:" << _readyQueue.size();
+ output << " free:" << _freeQueue.size();
+ output << " unsignaledEvents:" << _unsignaledEvents.size();
+ output << " eventWaiters:" << _totalEventWaiters;
+ output << " shuttingDown:" << _inShutdown;
+ output << " networkInterface:" << _networkInterface->getDiagnosticString();
+ return output;
+}
+
+Date_t ReplicationExecutor::now() {
+ return _networkInterface->now();
+}
+
+void ReplicationExecutor::run() {
+ setThreadName("ReplicationExecutor");
+ _networkInterface->startup();
+ _dblockWorkers.startThreads();
+ std::pair<WorkItem, CallbackHandle> work;
+ while ((work = getWork()).first.callback) {
+ {
+ boost::lock_guard<boost::mutex> lk(_terribleExLockSyncMutex);
+ const Status inStatus = work.first.isCanceled
+ ? Status(ErrorCodes::CallbackCanceled, "Callback canceled")
+ : Status::OK();
+ makeNoExcept(
+ stdx::bind(work.first.callback, CallbackData(this, work.second, inStatus)))();
}
+ signalEvent(work.first.finishedEvent);
+ }
+ finishShutdown();
+ _networkInterface->shutdown();
+}
+
+void ReplicationExecutor::shutdown() {
+ // Correct shutdown needs to:
+ // * Disable future work queueing.
+ // * drain all of the unsignaled events, sleepers, and ready queue, by running those
+ // callbacks with a "shutdown" or "canceled" status.
+ // * Signal all threads blocked in waitForEvent, and wait for them to return from that method.
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ _inShutdown = true;
+
+ _readyQueue.splice(_readyQueue.end(), _exclusiveLockInProgressQueue);
+ _readyQueue.splice(_readyQueue.end(), _networkInProgressQueue);
+ _readyQueue.splice(_readyQueue.end(), _sleepersQueue);
+ for (EventList::iterator event = _unsignaledEvents.begin(); event != _unsignaledEvents.end();
+ ++event) {
+ _readyQueue.splice(_readyQueue.end(), event->waiters);
+ }
+ for (WorkQueue::iterator readyWork = _readyQueue.begin(); readyWork != _readyQueue.end();
+ ++readyWork) {
+ readyWork->isCanceled = true;
+ }
+ _networkInterface->signalWorkAvailable();
+}
+
+void ReplicationExecutor::finishShutdown() {
+ _dblockWorkers.join();
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ invariant(_inShutdown);
+ invariant(_exclusiveLockInProgressQueue.empty());
+ invariant(_readyQueue.empty());
+ invariant(_sleepersQueue.empty());
+
+ while (!_unsignaledEvents.empty()) {
+ EventList::iterator event = _unsignaledEvents.begin();
+ invariant(event->waiters.empty());
+ signalEvent_inlock(EventHandle(event, ++_nextId));
+ }
+
+ while (_totalEventWaiters > 0)
+ _noMoreWaitingThreads.wait(lk);
+
+ invariant(_exclusiveLockInProgressQueue.empty());
+ invariant(_readyQueue.empty());
+ invariant(_sleepersQueue.empty());
+ invariant(_unsignaledEvents.empty());
+}
+
+void ReplicationExecutor::maybeNotifyShutdownComplete_inlock() {
+ if (_totalEventWaiters == 0)
+ _noMoreWaitingThreads.notify_all();
+}
+
+StatusWith<ReplicationExecutor::EventHandle> ReplicationExecutor::makeEvent() {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ return makeEvent_inlock();
+}
+
+StatusWith<ReplicationExecutor::EventHandle> ReplicationExecutor::makeEvent_inlock() {
+ if (_inShutdown)
+ return StatusWith<EventHandle>(ErrorCodes::ShutdownInProgress, "Shutdown in progress");
+
+ if (_signaledEvents.empty())
+ _signaledEvents.push_back(Event());
+ const EventList::iterator iter = _signaledEvents.begin();
+ invariant(iter->waiters.empty());
+ iter->generation++;
+ iter->isSignaled = false;
+ _unsignaledEvents.splice(_unsignaledEvents.end(), _signaledEvents, iter);
+ return StatusWith<EventHandle>(EventHandle(iter, ++_nextId));
+}
+
+void ReplicationExecutor::signalEvent(const EventHandle& event) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ signalEvent_inlock(event);
+}
+
+void ReplicationExecutor::signalEvent_inlock(const EventHandle& event) {
+ invariant(!event._iter->isSignaled);
+ invariant(event._iter->generation == event._generation);
+ event._iter->isSignaled = true;
+ _signaledEvents.splice(_signaledEvents.end(), _unsignaledEvents, event._iter);
+ if (!event._iter->waiters.empty()) {
+ _readyQueue.splice(_readyQueue.end(), event._iter->waiters);
_networkInterface->signalWorkAvailable();
}
-
- void ReplicationExecutor::finishShutdown() {
- _dblockWorkers.join();
- boost::unique_lock<boost::mutex> lk(_mutex);
- invariant(_inShutdown);
- invariant(_exclusiveLockInProgressQueue.empty());
- invariant(_readyQueue.empty());
- invariant(_sleepersQueue.empty());
-
- while (!_unsignaledEvents.empty()) {
- EventList::iterator event = _unsignaledEvents.begin();
- invariant(event->waiters.empty());
- signalEvent_inlock(EventHandle(event, ++_nextId));
- }
-
- while (_totalEventWaiters > 0)
- _noMoreWaitingThreads.wait(lk);
-
- invariant(_exclusiveLockInProgressQueue.empty());
- invariant(_readyQueue.empty());
- invariant(_sleepersQueue.empty());
- invariant(_unsignaledEvents.empty());
- }
-
- void ReplicationExecutor::maybeNotifyShutdownComplete_inlock() {
- if (_totalEventWaiters == 0)
- _noMoreWaitingThreads.notify_all();
- }
-
- StatusWith<ReplicationExecutor::EventHandle> ReplicationExecutor::makeEvent() {
- boost::lock_guard<boost::mutex> lk(_mutex);
- return makeEvent_inlock();
- }
-
- StatusWith<ReplicationExecutor::EventHandle> ReplicationExecutor::makeEvent_inlock() {
- if (_inShutdown)
- return StatusWith<EventHandle>(ErrorCodes::ShutdownInProgress, "Shutdown in progress");
-
- if (_signaledEvents.empty())
- _signaledEvents.push_back(Event());
- const EventList::iterator iter = _signaledEvents.begin();
- invariant(iter->waiters.empty());
- iter->generation++;
- iter->isSignaled = false;
- _unsignaledEvents.splice(_unsignaledEvents.end(), _signaledEvents, iter);
- return StatusWith<EventHandle>(EventHandle(iter, ++_nextId));
- }
-
- void ReplicationExecutor::signalEvent(const EventHandle& event) {
- boost::lock_guard<boost::mutex> lk(_mutex);
- signalEvent_inlock(event);
- }
-
- void ReplicationExecutor::signalEvent_inlock(const EventHandle& event) {
- invariant(!event._iter->isSignaled);
- invariant(event._iter->generation == event._generation);
- event._iter->isSignaled = true;
- _signaledEvents.splice(_signaledEvents.end(), _unsignaledEvents, event._iter);
- if (!event._iter->waiters.empty()) {
- _readyQueue.splice(_readyQueue.end(), event._iter->waiters);
- _networkInterface->signalWorkAvailable();
- }
- event._iter->isSignaledCondition->notify_all();
- }
-
- StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::onEvent(
- const EventHandle& event,
- const CallbackFn& work) {
- boost::lock_guard<boost::mutex> lk(_mutex);
- invariant(event.isValid());
- invariant(event._generation <= event._iter->generation);
- WorkQueue* queue = &_readyQueue;
- if (event._generation == event._iter->generation && !event._iter->isSignaled) {
- queue = &event._iter->waiters;
- }
- else {
- queue = &_readyQueue;
- }
- return enqueueWork_inlock(queue, work);
- }
-
- void ReplicationExecutor::waitForEvent(const EventHandle& event) {
- boost::unique_lock<boost::mutex> lk(_mutex);
- invariant(event.isValid());
- ++_totalEventWaiters;
- while ((event._generation == event._iter->generation) && !event._iter->isSignaled) {
- event._iter->isSignaledCondition->wait(lk);
- }
- --_totalEventWaiters;
- maybeNotifyShutdownComplete_inlock();
- }
-
- static void remoteCommandFinished(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplicationExecutor::RemoteCommandCallbackFn& cb,
- const ReplicationExecutor::RemoteCommandRequest& request,
- const ResponseStatus& response) {
-
- if (cbData.status.isOK()) {
- cb(ReplicationExecutor::RemoteCommandCallbackData(
- cbData.executor, cbData.myHandle, request, response));
- }
- else {
- cb(ReplicationExecutor::RemoteCommandCallbackData(
- cbData.executor,
- cbData.myHandle,
- request,
- ResponseStatus(cbData.status)));
- }
- }
-
- static void remoteCommandFailedEarly(
- const ReplicationExecutor::CallbackData& cbData,
- const ReplicationExecutor::RemoteCommandCallbackFn& cb,
- const ReplicationExecutor::RemoteCommandRequest& request) {
-
- invariant(!cbData.status.isOK());
+ event._iter->isSignaledCondition->notify_all();
+}
+
+StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::onEvent(
+ const EventHandle& event, const CallbackFn& work) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ invariant(event.isValid());
+ invariant(event._generation <= event._iter->generation);
+ WorkQueue* queue = &_readyQueue;
+ if (event._generation == event._iter->generation && !event._iter->isSignaled) {
+ queue = &event._iter->waiters;
+ } else {
+ queue = &_readyQueue;
+ }
+ return enqueueWork_inlock(queue, work);
+}
+
+void ReplicationExecutor::waitForEvent(const EventHandle& event) {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ invariant(event.isValid());
+ ++_totalEventWaiters;
+ while ((event._generation == event._iter->generation) && !event._iter->isSignaled) {
+ event._iter->isSignaledCondition->wait(lk);
+ }
+ --_totalEventWaiters;
+ maybeNotifyShutdownComplete_inlock();
+}
+
+static void remoteCommandFinished(const ReplicationExecutor::CallbackData& cbData,
+ const ReplicationExecutor::RemoteCommandCallbackFn& cb,
+ const ReplicationExecutor::RemoteCommandRequest& request,
+ const ResponseStatus& response) {
+ if (cbData.status.isOK()) {
cb(ReplicationExecutor::RemoteCommandCallbackData(
- cbData.executor,
- cbData.myHandle,
- request,
- ResponseStatus(cbData.status)));
- }
-
- void ReplicationExecutor::_finishRemoteCommand(
- const RemoteCommandRequest& request,
- const ResponseStatus& response,
- const CallbackHandle& cbHandle,
- const uint64_t expectedHandleGeneration,
- const RemoteCommandCallbackFn& cb) {
-
- const WorkQueue::iterator iter = cbHandle._iter;
- boost::lock_guard<boost::mutex> lk(_mutex);
- if (_inShutdown) {
- return;
- }
- if (expectedHandleGeneration != iter->generation) {
- return;
- }
- iter->callback = stdx::bind(remoteCommandFinished,
- stdx::placeholders::_1,
- cb,
- request,
- response);
- _readyQueue.splice(_readyQueue.end(), _networkInProgressQueue, iter);
- }
-
- StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleRemoteCommand(
- const RemoteCommandRequest& request,
- const RemoteCommandCallbackFn& cb) {
- RemoteCommandRequest scheduledRequest = request;
- if (request.timeout == kNoTimeout) {
- scheduledRequest.expirationDate = kNoExpirationDate;
- }
- else {
- scheduledRequest.expirationDate =
- _networkInterface->now() + scheduledRequest.timeout.total_milliseconds();
- }
- boost::lock_guard<boost::mutex> lk(_mutex);
- StatusWith<CallbackHandle> handle = enqueueWork_inlock(
- &_networkInProgressQueue,
- stdx::bind(remoteCommandFailedEarly,
- stdx::placeholders::_1,
- cb,
- scheduledRequest));
- if (handle.isOK()) {
- handle.getValue()._iter->isNetworkOperation = true;
- _networkInterface->startCommand(
- handle.getValue(),
- scheduledRequest,
- stdx::bind(&ReplicationExecutor::_finishRemoteCommand,
- this,
- scheduledRequest,
- stdx::placeholders::_1,
- handle.getValue(),
- handle.getValue()._iter->generation,
- cb));
- }
- return handle;
- }
-
- StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWork(
- const CallbackFn& work) {
- boost::lock_guard<boost::mutex> lk(_mutex);
- _networkInterface->signalWorkAvailable();
- return enqueueWork_inlock(&_readyQueue, work);
- }
-
- StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWorkAt(
- Date_t when,
- const CallbackFn& work) {
-
- boost::lock_guard<boost::mutex> lk(_mutex);
- WorkQueue temp;
- StatusWith<CallbackHandle> cbHandle = enqueueWork_inlock(&temp, work);
- if (!cbHandle.isOK())
- return cbHandle;
- cbHandle.getValue()._iter->readyDate = when;
- WorkQueue::iterator insertBefore = _sleepersQueue.begin();
- while (insertBefore != _sleepersQueue.end() && insertBefore->readyDate <= when)
- ++insertBefore;
- _sleepersQueue.splice(insertBefore, temp, temp.begin());
+ cbData.executor, cbData.myHandle, request, response));
+ } else {
+ cb(ReplicationExecutor::RemoteCommandCallbackData(
+ cbData.executor, cbData.myHandle, request, ResponseStatus(cbData.status)));
+ }
+}
+
+static void remoteCommandFailedEarly(const ReplicationExecutor::CallbackData& cbData,
+ const ReplicationExecutor::RemoteCommandCallbackFn& cb,
+ const ReplicationExecutor::RemoteCommandRequest& request) {
+ invariant(!cbData.status.isOK());
+ cb(ReplicationExecutor::RemoteCommandCallbackData(
+ cbData.executor, cbData.myHandle, request, ResponseStatus(cbData.status)));
+}
+
+void ReplicationExecutor::_finishRemoteCommand(const RemoteCommandRequest& request,
+ const ResponseStatus& response,
+ const CallbackHandle& cbHandle,
+ const uint64_t expectedHandleGeneration,
+ const RemoteCommandCallbackFn& cb) {
+ const WorkQueue::iterator iter = cbHandle._iter;
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ if (_inShutdown) {
+ return;
+ }
+ if (expectedHandleGeneration != iter->generation) {
+ return;
+ }
+ iter->callback =
+ stdx::bind(remoteCommandFinished, stdx::placeholders::_1, cb, request, response);
+ _readyQueue.splice(_readyQueue.end(), _networkInProgressQueue, iter);
+}
+
+StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleRemoteCommand(
+ const RemoteCommandRequest& request, const RemoteCommandCallbackFn& cb) {
+ RemoteCommandRequest scheduledRequest = request;
+ if (request.timeout == kNoTimeout) {
+ scheduledRequest.expirationDate = kNoExpirationDate;
+ } else {
+ scheduledRequest.expirationDate =
+ _networkInterface->now() + scheduledRequest.timeout.total_milliseconds();
+ }
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ StatusWith<CallbackHandle> handle = enqueueWork_inlock(
+ &_networkInProgressQueue,
+ stdx::bind(remoteCommandFailedEarly, stdx::placeholders::_1, cb, scheduledRequest));
+ if (handle.isOK()) {
+ handle.getValue()._iter->isNetworkOperation = true;
+ _networkInterface->startCommand(handle.getValue(),
+ scheduledRequest,
+ stdx::bind(&ReplicationExecutor::_finishRemoteCommand,
+ this,
+ scheduledRequest,
+ stdx::placeholders::_1,
+ handle.getValue(),
+ handle.getValue()._iter->generation,
+ cb));
+ }
+ return handle;
+}
+
+StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWork(
+ const CallbackFn& work) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ _networkInterface->signalWorkAvailable();
+ return enqueueWork_inlock(&_readyQueue, work);
+}
+
+StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::scheduleWorkAt(
+ Date_t when, const CallbackFn& work) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ WorkQueue temp;
+ StatusWith<CallbackHandle> cbHandle = enqueueWork_inlock(&temp, work);
+ if (!cbHandle.isOK())
return cbHandle;
- }
-
- void ReplicationExecutor::doOperationWithGlobalExclusiveLock(
- OperationContext* txn,
- const CallbackHandle& cbHandle) {
- boost::unique_lock<boost::mutex> lk(_mutex);
- if (_inShutdown)
- return;
- const WorkQueue::iterator iter = cbHandle._iter;
- const uint64_t generation = iter->generation;
- invariant(generation == cbHandle._generation);
- WorkItem work = *iter;
- iter->callback = CallbackFn();
- _freeQueue.splice(_freeQueue.begin(), _exclusiveLockInProgressQueue, iter);
+ cbHandle.getValue()._iter->readyDate = when;
+ WorkQueue::iterator insertBefore = _sleepersQueue.begin();
+ while (insertBefore != _sleepersQueue.end() && insertBefore->readyDate <= when)
+ ++insertBefore;
+ _sleepersQueue.splice(insertBefore, temp, temp.begin());
+ return cbHandle;
+}
+
+void ReplicationExecutor::doOperationWithGlobalExclusiveLock(OperationContext* txn,
+ const CallbackHandle& cbHandle) {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ if (_inShutdown)
+ return;
+ const WorkQueue::iterator iter = cbHandle._iter;
+ const uint64_t generation = iter->generation;
+ invariant(generation == cbHandle._generation);
+ WorkItem work = *iter;
+ iter->callback = CallbackFn();
+ _freeQueue.splice(_freeQueue.begin(), _exclusiveLockInProgressQueue, iter);
+ lk.unlock();
+ {
+ boost::lock_guard<boost::mutex> terribleLock(_terribleExLockSyncMutex);
+ work.callback(CallbackData(this,
+ cbHandle,
+ (work.isCanceled
+ ? Status(ErrorCodes::CallbackCanceled, "Callback canceled")
+ : Status::OK()),
+ txn));
+ }
+ lk.lock();
+ signalEvent_inlock(work.finishedEvent);
+}
+
+StatusWith<ReplicationExecutor::CallbackHandle>
+ReplicationExecutor::scheduleWorkWithGlobalExclusiveLock(const CallbackFn& work) {
+ boost::lock_guard<boost::mutex> lk(_mutex);
+ StatusWith<CallbackHandle> handle = enqueueWork_inlock(&_exclusiveLockInProgressQueue, work);
+ if (handle.isOK()) {
+ const stdx::function<void(OperationContext*)> doOp =
+ stdx::bind(&ReplicationExecutor::doOperationWithGlobalExclusiveLock,
+ this,
+ stdx::placeholders::_1,
+ handle.getValue());
+ _dblockWorkers.schedule(makeNoExcept(stdx::bind(
+ &NetworkInterface::runCallbackWithGlobalExclusiveLock, _networkInterface.get(), doOp)));
+ }
+ return handle;
+}
+
+void ReplicationExecutor::cancel(const CallbackHandle& cbHandle) {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ if (cbHandle._iter->generation != cbHandle._generation) {
+ return;
+ }
+ cbHandle._iter->isCanceled = true;
+ if (cbHandle._iter->isNetworkOperation) {
lk.unlock();
- {
- boost::lock_guard<boost::mutex> terribleLock(_terribleExLockSyncMutex);
- work.callback(CallbackData(this,
- cbHandle,
- (work.isCanceled ?
- Status(ErrorCodes::CallbackCanceled, "Callback canceled") :
- Status::OK()),
- txn));
- }
- lk.lock();
- signalEvent_inlock(work.finishedEvent);
- }
-
- StatusWith<ReplicationExecutor::CallbackHandle>
- ReplicationExecutor::scheduleWorkWithGlobalExclusiveLock(
- const CallbackFn& work) {
-
- boost::lock_guard<boost::mutex> lk(_mutex);
- StatusWith<CallbackHandle> handle = enqueueWork_inlock(&_exclusiveLockInProgressQueue,
- work);
- if (handle.isOK()) {
- const stdx::function<void (OperationContext*)> doOp = stdx::bind(
- &ReplicationExecutor::doOperationWithGlobalExclusiveLock,
- this,
- stdx::placeholders::_1,
- handle.getValue());
- _dblockWorkers.schedule(
- makeNoExcept(stdx::bind(
- &NetworkInterface::runCallbackWithGlobalExclusiveLock,
- _networkInterface.get(),
- doOp)));
- }
- return handle;
- }
-
- void ReplicationExecutor::cancel(const CallbackHandle& cbHandle) {
- boost::unique_lock<boost::mutex> lk(_mutex);
- if (cbHandle._iter->generation != cbHandle._generation) {
- return;
- }
- cbHandle._iter->isCanceled = true;
- if (cbHandle._iter->isNetworkOperation) {
- lk.unlock();
- _networkInterface->cancelCommand(cbHandle);
- }
- }
-
- void ReplicationExecutor::wait(const CallbackHandle& cbHandle) {
- waitForEvent(cbHandle._finishedEvent);
- }
-
- std::pair<ReplicationExecutor::WorkItem, ReplicationExecutor::CallbackHandle>
- ReplicationExecutor::getWork() {
- boost::unique_lock<boost::mutex> lk(_mutex);
- while (true) {
- const Date_t now = _networkInterface->now();
- Date_t nextWakeupDate = scheduleReadySleepers_inlock(now);
- if (!_readyQueue.empty()) {
- break;
- }
- else if (_inShutdown) {
- return std::make_pair(WorkItem(), CallbackHandle());
- }
- lk.unlock();
- if (nextWakeupDate == Date_t(~0ULL)) {
- _networkInterface->waitForWork();
- }
- else {
- _networkInterface->waitForWorkUntil(nextWakeupDate);
- }
- lk.lock();
- }
- const CallbackHandle cbHandle(_readyQueue.begin());
- const WorkItem work = *cbHandle._iter;
- _readyQueue.begin()->callback = CallbackFn();
- _freeQueue.splice(_freeQueue.begin(), _readyQueue, _readyQueue.begin());
- return std::make_pair(work, cbHandle);
- }
-
- int64_t ReplicationExecutor::nextRandomInt64(int64_t limit) {
- return _random.nextInt64(limit);
- }
-
- Date_t ReplicationExecutor::scheduleReadySleepers_inlock(const Date_t now) {
- WorkQueue::iterator iter = _sleepersQueue.begin();
- while ((iter != _sleepersQueue.end()) && (iter->readyDate <= now)) {
- ++iter;
- }
- _readyQueue.splice(_readyQueue.end(), _sleepersQueue, _sleepersQueue.begin(), iter);
- if (iter == _sleepersQueue.end()) {
- // indicate no sleeper to wait for
- return Date_t(~0ULL);
+ _networkInterface->cancelCommand(cbHandle);
+ }
+}
+
+void ReplicationExecutor::wait(const CallbackHandle& cbHandle) {
+ waitForEvent(cbHandle._finishedEvent);
+}
+
+std::pair<ReplicationExecutor::WorkItem, ReplicationExecutor::CallbackHandle>
+ReplicationExecutor::getWork() {
+ boost::unique_lock<boost::mutex> lk(_mutex);
+ while (true) {
+ const Date_t now = _networkInterface->now();
+ Date_t nextWakeupDate = scheduleReadySleepers_inlock(now);
+ if (!_readyQueue.empty()) {
+ break;
+ } else if (_inShutdown) {
+ return std::make_pair(WorkItem(), CallbackHandle());
}
- return iter->readyDate;
- }
-
- StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::enqueueWork_inlock(
- WorkQueue* queue, const CallbackFn& callback) {
-
- invariant(callback);
- StatusWith<EventHandle> event = makeEvent_inlock();
- if (!event.isOK())
- return StatusWith<CallbackHandle>(event.getStatus());
-
- if (_freeQueue.empty())
- _freeQueue.push_front(WorkItem());
- const WorkQueue::iterator iter = _freeQueue.begin();
- iter->generation++;
- iter->callback = callback;
- iter->finishedEvent = event.getValue();
- iter->readyDate = Date_t();
- iter->isCanceled = false;
- queue->splice(queue->end(), _freeQueue, iter);
- return StatusWith<CallbackHandle>(CallbackHandle(iter));
- }
-
- ReplicationExecutor::EventHandle::EventHandle(const EventList::iterator& iter, uint64_t id) :
- _iter(iter),
- _generation(iter->generation),
- _id(id) {
- }
-
- ReplicationExecutor::CallbackHandle::CallbackHandle(const WorkQueue::iterator& iter) :
- _iter(iter),
- _generation(iter->generation),
- _finishedEvent(iter->finishedEvent) {
- }
-
- ReplicationExecutor::CallbackData::CallbackData(ReplicationExecutor* theExecutor,
- const CallbackHandle& theHandle,
- const Status& theStatus,
- OperationContext* theTxn) :
- executor(theExecutor),
- myHandle(theHandle),
- status(theStatus),
- txn(theTxn) {
- }
-
- ReplicationExecutor::RemoteCommandRequest::RemoteCommandRequest() :
- timeout(kNoTimeout),
- expirationDate(kNoExpirationDate) {
- }
-
- ReplicationExecutor::RemoteCommandRequest::RemoteCommandRequest(
- const HostAndPort& theTarget,
- const std::string& theDbName,
- const BSONObj& theCmdObj,
- const Milliseconds timeoutMillis) :
- target(theTarget),
- dbname(theDbName),
- cmdObj(theCmdObj),
- timeout(timeoutMillis) {
- if (timeoutMillis == kNoTimeout) {
- expirationDate = kNoExpirationDate;
+ lk.unlock();
+ if (nextWakeupDate == Date_t(~0ULL)) {
+ _networkInterface->waitForWork();
+ } else {
+ _networkInterface->waitForWorkUntil(nextWakeupDate);
}
+ lk.lock();
}
-
- std::string ReplicationExecutor::RemoteCommandRequest::getDiagnosticString() {
- str::stream out;
- out << "RemoteCommand -- target:" << target.toString() << " db:" << dbname;
-
- if (expirationDate != kNoExpirationDate)
- out << " expDate:" << expirationDate.toString();
-
- out << " cmd:" << cmdObj.getOwned().toString();
- return out;
- }
-
- ReplicationExecutor::RemoteCommandCallbackData::RemoteCommandCallbackData(
- ReplicationExecutor* theExecutor,
- const CallbackHandle& theHandle,
- const RemoteCommandRequest& theRequest,
- const ResponseStatus& theResponse) :
- executor(theExecutor),
- myHandle(theHandle),
- request(theRequest),
- response(theResponse) {
- }
-
- ReplicationExecutor::WorkItem::WorkItem() : generation(0U),
- isNetworkOperation(false),
- isCanceled(false) {}
-
- ReplicationExecutor::Event::Event() :
- generation(0),
- isSignaled(false),
- isSignaledCondition(new boost::condition_variable) {
- }
-
- ReplicationExecutor::NetworkInterface::NetworkInterface() {}
- ReplicationExecutor::NetworkInterface::~NetworkInterface() {}
+ const CallbackHandle cbHandle(_readyQueue.begin());
+ const WorkItem work = *cbHandle._iter;
+ _readyQueue.begin()->callback = CallbackFn();
+ _freeQueue.splice(_freeQueue.begin(), _readyQueue, _readyQueue.begin());
+ return std::make_pair(work, cbHandle);
+}
+
+int64_t ReplicationExecutor::nextRandomInt64(int64_t limit) {
+ return _random.nextInt64(limit);
+}
+
+Date_t ReplicationExecutor::scheduleReadySleepers_inlock(const Date_t now) {
+ WorkQueue::iterator iter = _sleepersQueue.begin();
+ while ((iter != _sleepersQueue.end()) && (iter->readyDate <= now)) {
+ ++iter;
+ }
+ _readyQueue.splice(_readyQueue.end(), _sleepersQueue, _sleepersQueue.begin(), iter);
+ if (iter == _sleepersQueue.end()) {
+ // indicate no sleeper to wait for
+ return Date_t(~0ULL);
+ }
+ return iter->readyDate;
+}
+
+StatusWith<ReplicationExecutor::CallbackHandle> ReplicationExecutor::enqueueWork_inlock(
+ WorkQueue* queue, const CallbackFn& callback) {
+ invariant(callback);
+ StatusWith<EventHandle> event = makeEvent_inlock();
+ if (!event.isOK())
+ return StatusWith<CallbackHandle>(event.getStatus());
+
+ if (_freeQueue.empty())
+ _freeQueue.push_front(WorkItem());
+ const WorkQueue::iterator iter = _freeQueue.begin();
+ iter->generation++;
+ iter->callback = callback;
+ iter->finishedEvent = event.getValue();
+ iter->readyDate = Date_t();
+ iter->isCanceled = false;
+ queue->splice(queue->end(), _freeQueue, iter);
+ return StatusWith<CallbackHandle>(CallbackHandle(iter));
+}
+
+ReplicationExecutor::EventHandle::EventHandle(const EventList::iterator& iter, uint64_t id)
+ : _iter(iter), _generation(iter->generation), _id(id) {}
+
+ReplicationExecutor::CallbackHandle::CallbackHandle(const WorkQueue::iterator& iter)
+ : _iter(iter), _generation(iter->generation), _finishedEvent(iter->finishedEvent) {}
+
+ReplicationExecutor::CallbackData::CallbackData(ReplicationExecutor* theExecutor,
+ const CallbackHandle& theHandle,
+ const Status& theStatus,
+ OperationContext* theTxn)
+ : executor(theExecutor), myHandle(theHandle), status(theStatus), txn(theTxn) {}
+
+ReplicationExecutor::RemoteCommandRequest::RemoteCommandRequest()
+ : timeout(kNoTimeout), expirationDate(kNoExpirationDate) {}
+
+ReplicationExecutor::RemoteCommandRequest::RemoteCommandRequest(const HostAndPort& theTarget,
+ const std::string& theDbName,
+ const BSONObj& theCmdObj,
+ const Milliseconds timeoutMillis)
+ : target(theTarget), dbname(theDbName), cmdObj(theCmdObj), timeout(timeoutMillis) {
+ if (timeoutMillis == kNoTimeout) {
+ expirationDate = kNoExpirationDate;
+ }
+}
+
+std::string ReplicationExecutor::RemoteCommandRequest::getDiagnosticString() {
+ str::stream out;
+ out << "RemoteCommand -- target:" << target.toString() << " db:" << dbname;
+
+ if (expirationDate != kNoExpirationDate)
+ out << " expDate:" << expirationDate.toString();
+
+ out << " cmd:" << cmdObj.getOwned().toString();
+ return out;
+}
+
+ReplicationExecutor::RemoteCommandCallbackData::RemoteCommandCallbackData(
+ ReplicationExecutor* theExecutor,
+ const CallbackHandle& theHandle,
+ const RemoteCommandRequest& theRequest,
+ const ResponseStatus& theResponse)
+ : executor(theExecutor), myHandle(theHandle), request(theRequest), response(theResponse) {}
+
+ReplicationExecutor::WorkItem::WorkItem()
+ : generation(0U), isNetworkOperation(false), isCanceled(false) {}
+
+ReplicationExecutor::Event::Event()
+ : generation(0), isSignaled(false), isSignaledCondition(new boost::condition_variable) {}
+
+ReplicationExecutor::NetworkInterface::NetworkInterface() {}
+ReplicationExecutor::NetworkInterface::~NetworkInterface() {}
namespace {
- void callNoExcept(const stdx::function<void ()>& fn) {
- try {
- fn();
- }
- catch (...) {
- std::terminate();
- }
+void callNoExcept(const stdx::function<void()>& fn) {
+ try {
+ fn();
+ } catch (...) {
+ std::terminate();
}
+}
- stdx::function<void ()> makeNoExcept(const stdx::function<void ()> &fn) {
- return stdx::bind(callNoExcept, fn);
- }
+stdx::function<void()> makeNoExcept(const stdx::function<void()>& fn) {
+ return stdx::bind(callNoExcept, fn);
+}
} // namespace
diff --git a/src/mongo/db/repl/replication_executor.h b/src/mongo/db/repl/replication_executor.h
index fcac23469b4..1708b1ec87d 100644
--- a/src/mongo/db/repl/replication_executor.h
+++ b/src/mongo/db/repl/replication_executor.h
@@ -49,576 +49,578 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace repl {
+/**
+ * Event loop for driving state machines in replication.
+ *
+ * The event loop has notions of events and callbacks.
+ *
+ * Callbacks are function objects representing work to be performed in some sequential order by
+ * the executor. They may be scheduled by client threads or by other callbacks. Methods that
+ * schedule callbacks return a CallbackHandle if they are able to enqueue the callback in the
+ * appropriate work queue. Every CallbackHandle represents an invocation of a function that
+ * will happen before the executor returns from run(). Calling cancel(CallbackHandle) schedules
+ * the specified callback to run with a flag indicating that it is "canceled," but it will run.
+ * Client threads may block waiting for a callback to execute by calling wait(CallbackHandle).
+ *
+ * Events are level-triggered and may only be signaled one time. Client threads and callbacks
+ * may schedule callbacks to be run by the executor after the event is signaled, and client
+ * threads may ask the executor to block them until after the event is signaled.
+ *
+ * If an event is unsignaled when shutdown is called, the executor will ensure that any threads
+ * blocked in waitForEvent() eventually return.
+ *
+ * Logically, Callbacks and Events exist for the life of the executor. That means that while
+ * the executor is in scope, no CallbackHandle or EventHandle is stale.
+ *
+ * Usage: Instantiate an executor, schedule a work item, call run().
+ *
+ * Implementation details:
+ *
+ * The executor is composed of several WorkQueues, which are queues of WorkItems. WorkItems
+ * describe units of work -- a callback and state needed to track its lifecycle. The iterators
+ * pointing to WorkItems are spliced between the WorkQueues, rather than copying WorkItems
+ * themselves. Further, those WorkQueue::iterators are never invalidated during the life of an
+ * executor. They may be recycled to represent new work items, but when that happens, a counter
+ * on the WorkItem is incremented, to disambiguate. Handles referencing WorkQueue::iterators,
+ * called CallbackHandles, are thus valid for the life of the executor, simplifying lifecycle
+ * management.
+ *
+ * All work executed by the run() method of the executor is popped off the front of the
+ * _readyQueue. Remote commands blocked on the network can be found in the
+ * _networkInProgressQueue. Callbacks waiting for a timer to expire are in the _sleepersQueue.
+ * When the network returns or the timer expires, items from these two queues are transferred to
+ * the back of the _readyQueue.
+ *
+ * The _exclusiveLockInProgressQueue, which represents work items to execute while holding the
+ * GlobalWrite lock, is exceptional. WorkItems in that queue execute in unspecified order with
+ * respect to work in the _readyQueue or other WorkItems in the _exclusiveLockInProgressQueue,
+ * but they are executed in a single serial order with respect to those other WorkItems. The
+ * _terribleExLockSyncMutex is used to provide this serialization, until such time as the global
+ * lock may be passed from one thread to another.
+ *
+ * Events work similiarly to WorkItems, and EventList is akin to WorkQueue.
+ */
+class ReplicationExecutor {
+ MONGO_DISALLOW_COPYING(ReplicationExecutor);
+
+public:
+ typedef boost::posix_time::milliseconds Milliseconds;
+ struct CallbackData;
+ class CallbackHandle;
+ class EventHandle;
+ class NetworkInterface;
+ struct RemoteCommandCallbackData;
+ struct RemoteCommandRequest;
+ struct RemoteCommandResponse;
+ typedef StatusWith<RemoteCommandResponse> ResponseStatus;
+
+ static const Milliseconds kNoTimeout;
+ static const Date_t kNoExpirationDate;
+
+ /**
+ * Type of a regular callback function.
+ *
+ * The status argument passed at invocation will have code ErrorCodes::CallbackCanceled if
+ * the callback was canceled for any reason (including shutdown). Otherwise, it should have
+ * Status::OK().
+ */
+ typedef stdx::function<void(const CallbackData&)> CallbackFn;
+
+ /**
+ * Type of a callback from a request to run a command on a remote MongoDB node.
+ *
+ * The StatusWith<const BSONObj> will have ErrorCodes::CallbackCanceled if the callback was
+ * canceled. Otherwise, its status will represent any failure to execute the command.
+ * If the command executed and a response came back, then the status object will contain
+ * the BSONObj returned by the command, with the "ok" field indicating the success of the
+ * command in the usual way.
+ */
+ typedef stdx::function<void(const RemoteCommandCallbackData&)> RemoteCommandCallbackFn;
+
+ /**
+ * Constructs a new executor.
+ *
+ * Takes ownership of the passed NetworkInterface object.
+ */
+ explicit ReplicationExecutor(NetworkInterface* netInterface, int64_t pnrgSeed);
+
+ /**
+ * Destroys an executor.
+ */
+ ~ReplicationExecutor();
+
+ /**
+ * Returns diagnostic information.
+ */
+ std::string getDiagnosticString();
+
+ /**
+ * Gets the current time as reported by the network interface.
+ */
+ Date_t now();
+
+ /**
+ * Executes the run loop. May be called up to one time.
+ *
+ * Returns after the executor has been shutdown and is safe to delete.
+ */
+ void run();
+
+ /**
+ * Signals to the executor that it should shut down. The only reliable indication
+ * that shutdown has completed is that the run() method returns.
+ *
+ * May be called by client threads or callbacks running in the executor.
+ */
+ void shutdown();
+
+ /**
+ * Creates a new event. Returns a handle to the event, or ErrorCodes::ShutdownInProgress if
+ * makeEvent() fails because the executor is shutting down.
+ *
+ * May be called by client threads or callbacks running in the executor.
+ */
+ StatusWith<EventHandle> makeEvent();
+
+ /**
+ * Signals the event, making waiting client threads and callbacks runnable.
+ *
+ * May be called up to one time per event.
+ *
+ * May be called by client threads or callbacks running in the executor.
+ */
+ void signalEvent(const EventHandle&);
+
+ /**
+ * Schedules a callback, "work", to run after "event" is signaled. If "event"
+ * has already been signaled, marks "work" as immediately runnable.
+ *
+ * If "event" has yet to be signaled when "shutdown()" is called, "work" will
+ * be scheduled with a status of ErrorCodes::CallbackCanceled.
+ *
+ * May be called by client threads or callbacks running in the executor.
+ */
+ StatusWith<CallbackHandle> onEvent(const EventHandle& event, const CallbackFn& work);
+
+ /**
+ * Blocks the calling thread until after "event" is signaled. Also returns
+ * if the event is never signaled but shutdown() is called on the executor.
+ *
+ * NOTE: Do not call from a callback running in the executor.
+ *
+ * TODO(schwerin): Change return type so that the caller can know which of the two reasons
+ * led to this method returning.
+ */
+ void waitForEvent(const EventHandle& event);
+
+ /**
+ * Schedules "work" to be run by the executor ASAP.
+ *
+ * Returns a handle for waiting on or canceling the callback, or
+ * ErrorCodes::ShutdownInProgress.
+ *
+ * May be called by client threads or callbacks running in the executor.
+ */
+ StatusWith<CallbackHandle> scheduleWork(const CallbackFn& work);
+
+ /**
+ * Schedules "work" to be run by the executor no sooner than "when".
+ *
+ * Returns a handle for waiting on or canceling the callback, or
+ * ErrorCodes::ShutdownInProgress.
+ *
+ * May be called by client threads or callbacks running in the executor.
+ */
+ StatusWith<CallbackHandle> scheduleWorkAt(Date_t when, const CallbackFn& work);
+
+ /**
+ * Schedules "work" to be run by the executor while holding the global exclusive lock.
+ *
+ * The "work" will run exclusively, as though it were executed by the main
+ * run loop, but there are no ordering guarantees provided with respect to
+ * any other work item.
+ *
+ * Returns a handle for waiting on or canceling the callback, or
+ * ErrorCodes::ShutdownInProgress.
+ *
+ * May be called by client threads or callbacks running in the executor.
+ */
+ StatusWith<CallbackHandle> scheduleWorkWithGlobalExclusiveLock(const CallbackFn& work);
+
+ /**
+ * Schedules "cb" to be run by the executor with the result of executing the remote command
+ * described by "request".
+ *
+ * Returns a handle for waiting on or canceling the callback, or
+ * ErrorCodes::ShutdownInProgress.
+ *
+ * May be called by client threads or callbacks running in the executor.
+ */
+ StatusWith<CallbackHandle> scheduleRemoteCommand(const RemoteCommandRequest& request,
+ const RemoteCommandCallbackFn& cb);
+
+ /**
+ * If the callback referenced by "cbHandle" hasn't already executed, marks it as
+ * canceled and runnable.
+ *
+ * May be called by client threads or callbacks running in the executor.
+ */
+ void cancel(const CallbackHandle& cbHandle);
+
+ /**
+ * Blocks until the executor finishes running the callback referenced by "cbHandle".
+ *
+ * Becaue callbacks all run during shutdown if they weren't run beforehand, there is no need
+ * to indicate the reason for returning from wait(CallbackHandle). It is always that the
+ * callback ran.
+ *
+ * NOTE: Do not call from a callback running in the executor.
+ */
+ void wait(const CallbackHandle& cbHandle);
+
+ /**
+ * Returns an int64_t generated by the prng with a max value of "limit".
+ */
+ int64_t nextRandomInt64(int64_t limit);
+
+private:
+ struct Event;
+ struct WorkItem;
+
+ /**
+ * A linked list of WorkItem objects.
+ *
+ * WorkItems get moved among lists by splicing iterators of work lists together,
+ * not by copying underlying WorkItem objects.
+ */
+ typedef stdx::list<WorkItem> WorkQueue;
+
+ /**
+ * A linked list of Event objects, like WorkQueue, above.
+ */
+ typedef stdx::list<Event> EventList;
+
+ /**
+ * Returns diagnostic info
+ */
+ std::string _getDiagnosticString_inlock() const;
+ /**
+ * Implementation of makeEvent() for use when _mutex is already held.
+ */
+ StatusWith<EventHandle> makeEvent_inlock();
+
+ /**
+ * Gets a single piece of work to execute.
+ *
+ * If the "callback" member of the returned WorkItem is falsey, that is a signal
+ * to the run loop to wait for shutdown.
+ */
+ std::pair<WorkItem, CallbackHandle> getWork();
+
+ /**
+ * Marks as runnable any sleepers whose ready date has passed as of "now".
+ * Returns the date when the next sleeper will be ready, or Date_t(~0ULL) if there are no
+ * remaining sleepers.
+ */
+ Date_t scheduleReadySleepers_inlock(Date_t now);
+
+ /**
+ * Enqueues "callback" into "queue".
+ *
+ * Assumes that "queue" is sorted by readyDate, and performs insertion sort, starting
+ * at the back of the "queue" working toward the front.
+ *
+ * Use Date_t(0) for readyDate to mean "ready now".
+ */
+ StatusWith<CallbackHandle> enqueueWork_inlock(WorkQueue* queue, const CallbackFn& callback);
+
+ /**
+ * Implementation of signalEvent() that assumes the caller owns _mutex.
+ */
+ void signalEvent_inlock(const EventHandle& event);
+
+ /**
+ * Notifies interested parties that shutdown has completed, if it has.
+ */
+ void maybeNotifyShutdownComplete_inlock();
+
+ /**
+ * Completes the shutdown process. Called by run().
+ */
+ void finishShutdown();
+
+ void _finishRemoteCommand(const RemoteCommandRequest& request,
+ const StatusWith<RemoteCommandResponse>& response,
+ const CallbackHandle& cbHandle,
+ const uint64_t expectedHandleGeneration,
+ const RemoteCommandCallbackFn& cb);
+
+ /**
+ * Executes the callback referenced by "cbHandle", and moves the underlying
+ * WorkQueue::iterator into the _freeQueue. "txn" is a pointer to the OperationContext
+ * owning the global exclusive lock.
+ *
+ * Serializes execution of "cbHandle" with the execution of other callbacks.
+ */
+ void doOperationWithGlobalExclusiveLock(OperationContext* txn, const CallbackHandle& cbHandle);
+
+ // PRNG; seeded at class construction time.
+ PseudoRandom _random;
+
+ boost::scoped_ptr<NetworkInterface> _networkInterface;
+ boost::mutex _mutex;
+ boost::mutex _terribleExLockSyncMutex;
+ boost::condition_variable _noMoreWaitingThreads;
+ WorkQueue _freeQueue;
+ WorkQueue _readyQueue;
+ WorkQueue _exclusiveLockInProgressQueue;
+ WorkQueue _networkInProgressQueue;
+ WorkQueue _sleepersQueue;
+ EventList _unsignaledEvents;
+ EventList _signaledEvents;
+ int64_t _totalEventWaiters;
+ bool _inShutdown;
+ threadpool::ThreadPool _dblockWorkers;
+ uint64_t _nextId;
+};
+
+/**
+ * Reference to an event object in the executor.
+ */
+class ReplicationExecutor::EventHandle {
+ friend class ReplicationExecutor;
+
+public:
+ EventHandle() : _generation(0), _id(0) {}
+
+ /**
+ * Returns true if the handle is valid, meaning that it identifies
+ */
+ bool isValid() const {
+ return _id != 0;
+ }
+
+ bool operator==(const EventHandle& other) const {
+ return (_id == other._id);
+ }
+
+ bool operator!=(const EventHandle& other) const {
+ return !(*this == other);
+ }
+
+private:
+ EventHandle(const EventList::iterator& iter, const uint64_t id);
+
+ EventList::iterator _iter;
+ uint64_t _generation;
+ uint64_t _id;
+};
+
+/**
+ * Reference to a scheduled callback.
+ */
+class ReplicationExecutor::CallbackHandle {
+ friend class ReplicationExecutor;
+
+public:
+ CallbackHandle() : _generation(0) {}
+
+ bool isValid() const {
+ return _finishedEvent.isValid();
+ }
+
+ bool operator==(const CallbackHandle& other) const {
+ return (_finishedEvent == other._finishedEvent);
+ }
+
+ bool operator!=(const CallbackHandle& other) const {
+ return !(*this == other);
+ }
+
+private:
+ explicit CallbackHandle(const WorkQueue::iterator& iter);
+
+ WorkQueue::iterator _iter;
+ uint64_t _generation;
+ EventHandle _finishedEvent;
+};
+
+struct ReplicationExecutor::CallbackData {
+ CallbackData(ReplicationExecutor* theExecutor,
+ const CallbackHandle& theHandle,
+ const Status& theStatus,
+ OperationContext* txn = NULL);
+
+ ReplicationExecutor* executor;
+ CallbackHandle myHandle;
+ Status status;
+ OperationContext* txn;
+};
+
+/**
+ * Type of object describing a command to execute against a remote MongoDB node.
+ */
+struct ReplicationExecutor::RemoteCommandRequest {
+ RemoteCommandRequest();
+ RemoteCommandRequest(const HostAndPort& theTarget,
+ const std::string& theDbName,
+ const BSONObj& theCmdObj,
+ const Milliseconds timeoutMillis = kNoTimeout);
+
+ // Returns diagnostic info.
+ std::string getDiagnosticString();
+
+ HostAndPort target;
+ std::string dbname;
+ BSONObj cmdObj;
+ Milliseconds timeout;
+ Date_t expirationDate; // Set by scheduleRemoteCommand.
+};
+
+struct ReplicationExecutor::RemoteCommandResponse {
+ RemoteCommandResponse() : data(), elapsedMillis(Milliseconds(0)) {}
+ RemoteCommandResponse(BSONObj obj, Milliseconds millis) : data(obj), elapsedMillis(millis) {}
+
+ BSONObj data;
+ Milliseconds elapsedMillis;
+};
+
+/**
+ * Interface to networking and lock manager.
+ */
+class ReplicationExecutor::NetworkInterface {
+ MONGO_DISALLOW_COPYING(NetworkInterface);
+
+public:
+ typedef RemoteCommandResponse Response;
+ typedef stdx::function<void(const ResponseStatus&)> RemoteCommandCompletionFn;
+
+ virtual ~NetworkInterface();
+
+ /**
+ * Returns diagnostic info.
+ */
+ virtual std::string getDiagnosticString() = 0;
+
+ /**
+ * Starts up the network interface.
+ *
+ * It is valid to call all methods except shutdown() before this method completes. That is,
+ * implementations may not assume that startup() completes before startCommand() first
+ * executes.
+ *
+ * Called by the owning ReplicationExecutor inside its run() method.
+ */
+ virtual void startup() = 0;
+
+ /**
+ * Shuts down the network interface. Must be called before this instance gets deleted,
+ * if startup() is called.
+ *
+ * Called by the owning ReplicationExecutor inside its run() method.
+ */
+ virtual void shutdown() = 0;
+
+ /**
+ * Blocks the current thread (presumably the executor thread) until the network interface
+ * knows of work for the executor to perform.
+ */
+ virtual void waitForWork() = 0;
+
+ /**
+ * Similar to waitForWork, but only blocks until "when".
+ */
+ virtual void waitForWorkUntil(Date_t when) = 0;
+
/**
- * Event loop for driving state machines in replication.
- *
- * The event loop has notions of events and callbacks.
- *
- * Callbacks are function objects representing work to be performed in some sequential order by
- * the executor. They may be scheduled by client threads or by other callbacks. Methods that
- * schedule callbacks return a CallbackHandle if they are able to enqueue the callback in the
- * appropriate work queue. Every CallbackHandle represents an invocation of a function that
- * will happen before the executor returns from run(). Calling cancel(CallbackHandle) schedules
- * the specified callback to run with a flag indicating that it is "canceled," but it will run.
- * Client threads may block waiting for a callback to execute by calling wait(CallbackHandle).
- *
- * Events are level-triggered and may only be signaled one time. Client threads and callbacks
- * may schedule callbacks to be run by the executor after the event is signaled, and client
- * threads may ask the executor to block them until after the event is signaled.
- *
- * If an event is unsignaled when shutdown is called, the executor will ensure that any threads
- * blocked in waitForEvent() eventually return.
- *
- * Logically, Callbacks and Events exist for the life of the executor. That means that while
- * the executor is in scope, no CallbackHandle or EventHandle is stale.
- *
- * Usage: Instantiate an executor, schedule a work item, call run().
- *
- * Implementation details:
- *
- * The executor is composed of several WorkQueues, which are queues of WorkItems. WorkItems
- * describe units of work -- a callback and state needed to track its lifecycle. The iterators
- * pointing to WorkItems are spliced between the WorkQueues, rather than copying WorkItems
- * themselves. Further, those WorkQueue::iterators are never invalidated during the life of an
- * executor. They may be recycled to represent new work items, but when that happens, a counter
- * on the WorkItem is incremented, to disambiguate. Handles referencing WorkQueue::iterators,
- * called CallbackHandles, are thus valid for the life of the executor, simplifying lifecycle
- * management.
- *
- * All work executed by the run() method of the executor is popped off the front of the
- * _readyQueue. Remote commands blocked on the network can be found in the
- * _networkInProgressQueue. Callbacks waiting for a timer to expire are in the _sleepersQueue.
- * When the network returns or the timer expires, items from these two queues are transferred to
- * the back of the _readyQueue.
- *
- * The _exclusiveLockInProgressQueue, which represents work items to execute while holding the
- * GlobalWrite lock, is exceptional. WorkItems in that queue execute in unspecified order with
- * respect to work in the _readyQueue or other WorkItems in the _exclusiveLockInProgressQueue,
- * but they are executed in a single serial order with respect to those other WorkItems. The
- * _terribleExLockSyncMutex is used to provide this serialization, until such time as the global
- * lock may be passed from one thread to another.
- *
- * Events work similiarly to WorkItems, and EventList is akin to WorkQueue.
- */
- class ReplicationExecutor {
- MONGO_DISALLOW_COPYING(ReplicationExecutor);
- public:
- typedef boost::posix_time::milliseconds Milliseconds;
- struct CallbackData;
- class CallbackHandle;
- class EventHandle;
- class NetworkInterface;
- struct RemoteCommandCallbackData;
- struct RemoteCommandRequest;
- struct RemoteCommandResponse;
- typedef StatusWith<RemoteCommandResponse> ResponseStatus;
-
- static const Milliseconds kNoTimeout;
- static const Date_t kNoExpirationDate;
-
- /**
- * Type of a regular callback function.
- *
- * The status argument passed at invocation will have code ErrorCodes::CallbackCanceled if
- * the callback was canceled for any reason (including shutdown). Otherwise, it should have
- * Status::OK().
- */
- typedef stdx::function<void (const CallbackData&)> CallbackFn;
-
- /**
- * Type of a callback from a request to run a command on a remote MongoDB node.
- *
- * The StatusWith<const BSONObj> will have ErrorCodes::CallbackCanceled if the callback was
- * canceled. Otherwise, its status will represent any failure to execute the command.
- * If the command executed and a response came back, then the status object will contain
- * the BSONObj returned by the command, with the "ok" field indicating the success of the
- * command in the usual way.
- */
- typedef stdx::function<void (const RemoteCommandCallbackData&)> RemoteCommandCallbackFn;
-
- /**
- * Constructs a new executor.
- *
- * Takes ownership of the passed NetworkInterface object.
- */
- explicit ReplicationExecutor(NetworkInterface* netInterface, int64_t pnrgSeed);
-
- /**
- * Destroys an executor.
- */
- ~ReplicationExecutor();
-
- /**
- * Returns diagnostic information.
- */
- std::string getDiagnosticString();
-
- /**
- * Gets the current time as reported by the network interface.
- */
- Date_t now();
-
- /**
- * Executes the run loop. May be called up to one time.
- *
- * Returns after the executor has been shutdown and is safe to delete.
- */
- void run();
-
- /**
- * Signals to the executor that it should shut down. The only reliable indication
- * that shutdown has completed is that the run() method returns.
- *
- * May be called by client threads or callbacks running in the executor.
- */
- void shutdown();
-
- /**
- * Creates a new event. Returns a handle to the event, or ErrorCodes::ShutdownInProgress if
- * makeEvent() fails because the executor is shutting down.
- *
- * May be called by client threads or callbacks running in the executor.
- */
- StatusWith<EventHandle> makeEvent();
-
- /**
- * Signals the event, making waiting client threads and callbacks runnable.
- *
- * May be called up to one time per event.
- *
- * May be called by client threads or callbacks running in the executor.
- */
- void signalEvent(const EventHandle&);
-
- /**
- * Schedules a callback, "work", to run after "event" is signaled. If "event"
- * has already been signaled, marks "work" as immediately runnable.
- *
- * If "event" has yet to be signaled when "shutdown()" is called, "work" will
- * be scheduled with a status of ErrorCodes::CallbackCanceled.
- *
- * May be called by client threads or callbacks running in the executor.
- */
- StatusWith<CallbackHandle> onEvent(const EventHandle& event, const CallbackFn& work);
-
- /**
- * Blocks the calling thread until after "event" is signaled. Also returns
- * if the event is never signaled but shutdown() is called on the executor.
- *
- * NOTE: Do not call from a callback running in the executor.
- *
- * TODO(schwerin): Change return type so that the caller can know which of the two reasons
- * led to this method returning.
- */
- void waitForEvent(const EventHandle& event);
-
- /**
- * Schedules "work" to be run by the executor ASAP.
- *
- * Returns a handle for waiting on or canceling the callback, or
- * ErrorCodes::ShutdownInProgress.
- *
- * May be called by client threads or callbacks running in the executor.
- */
- StatusWith<CallbackHandle> scheduleWork(const CallbackFn& work);
-
- /**
- * Schedules "work" to be run by the executor no sooner than "when".
- *
- * Returns a handle for waiting on or canceling the callback, or
- * ErrorCodes::ShutdownInProgress.
- *
- * May be called by client threads or callbacks running in the executor.
- */
- StatusWith<CallbackHandle> scheduleWorkAt(Date_t when, const CallbackFn& work);
-
- /**
- * Schedules "work" to be run by the executor while holding the global exclusive lock.
- *
- * The "work" will run exclusively, as though it were executed by the main
- * run loop, but there are no ordering guarantees provided with respect to
- * any other work item.
- *
- * Returns a handle for waiting on or canceling the callback, or
- * ErrorCodes::ShutdownInProgress.
- *
- * May be called by client threads or callbacks running in the executor.
- */
- StatusWith<CallbackHandle> scheduleWorkWithGlobalExclusiveLock(
- const CallbackFn& work);
-
- /**
- * Schedules "cb" to be run by the executor with the result of executing the remote command
- * described by "request".
- *
- * Returns a handle for waiting on or canceling the callback, or
- * ErrorCodes::ShutdownInProgress.
- *
- * May be called by client threads or callbacks running in the executor.
- */
- StatusWith<CallbackHandle> scheduleRemoteCommand(
- const RemoteCommandRequest& request,
- const RemoteCommandCallbackFn& cb);
-
- /**
- * If the callback referenced by "cbHandle" hasn't already executed, marks it as
- * canceled and runnable.
- *
- * May be called by client threads or callbacks running in the executor.
- */
- void cancel(const CallbackHandle& cbHandle);
-
- /**
- * Blocks until the executor finishes running the callback referenced by "cbHandle".
- *
- * Becaue callbacks all run during shutdown if they weren't run beforehand, there is no need
- * to indicate the reason for returning from wait(CallbackHandle). It is always that the
- * callback ran.
- *
- * NOTE: Do not call from a callback running in the executor.
- */
- void wait(const CallbackHandle& cbHandle);
-
- /**
- * Returns an int64_t generated by the prng with a max value of "limit".
- */
- int64_t nextRandomInt64(int64_t limit);
-
- private:
- struct Event;
- struct WorkItem;
-
- /**
- * A linked list of WorkItem objects.
- *
- * WorkItems get moved among lists by splicing iterators of work lists together,
- * not by copying underlying WorkItem objects.
- */
- typedef stdx::list<WorkItem> WorkQueue;
-
- /**
- * A linked list of Event objects, like WorkQueue, above.
- */
- typedef stdx::list<Event> EventList;
-
- /**
- * Returns diagnostic info
- */
- std::string _getDiagnosticString_inlock() const;
- /**
- * Implementation of makeEvent() for use when _mutex is already held.
- */
- StatusWith<EventHandle> makeEvent_inlock();
-
- /**
- * Gets a single piece of work to execute.
- *
- * If the "callback" member of the returned WorkItem is falsey, that is a signal
- * to the run loop to wait for shutdown.
- */
- std::pair<WorkItem, CallbackHandle> getWork();
-
- /**
- * Marks as runnable any sleepers whose ready date has passed as of "now".
- * Returns the date when the next sleeper will be ready, or Date_t(~0ULL) if there are no
- * remaining sleepers.
- */
- Date_t scheduleReadySleepers_inlock(Date_t now);
-
- /**
- * Enqueues "callback" into "queue".
- *
- * Assumes that "queue" is sorted by readyDate, and performs insertion sort, starting
- * at the back of the "queue" working toward the front.
- *
- * Use Date_t(0) for readyDate to mean "ready now".
- */
- StatusWith<CallbackHandle> enqueueWork_inlock(WorkQueue* queue, const CallbackFn& callback);
-
- /**
- * Implementation of signalEvent() that assumes the caller owns _mutex.
- */
- void signalEvent_inlock(const EventHandle& event);
-
- /**
- * Notifies interested parties that shutdown has completed, if it has.
- */
- void maybeNotifyShutdownComplete_inlock();
-
- /**
- * Completes the shutdown process. Called by run().
- */
- void finishShutdown();
-
- void _finishRemoteCommand(
- const RemoteCommandRequest& request,
- const StatusWith<RemoteCommandResponse>& response,
- const CallbackHandle& cbHandle,
- const uint64_t expectedHandleGeneration,
- const RemoteCommandCallbackFn& cb);
-
- /**
- * Executes the callback referenced by "cbHandle", and moves the underlying
- * WorkQueue::iterator into the _freeQueue. "txn" is a pointer to the OperationContext
- * owning the global exclusive lock.
- *
- * Serializes execution of "cbHandle" with the execution of other callbacks.
- */
- void doOperationWithGlobalExclusiveLock(OperationContext* txn,
- const CallbackHandle& cbHandle);
-
- // PRNG; seeded at class construction time.
- PseudoRandom _random;
-
- boost::scoped_ptr<NetworkInterface> _networkInterface;
- boost::mutex _mutex;
- boost::mutex _terribleExLockSyncMutex;
- boost::condition_variable _noMoreWaitingThreads;
- WorkQueue _freeQueue;
- WorkQueue _readyQueue;
- WorkQueue _exclusiveLockInProgressQueue;
- WorkQueue _networkInProgressQueue;
- WorkQueue _sleepersQueue;
- EventList _unsignaledEvents;
- EventList _signaledEvents;
- int64_t _totalEventWaiters;
- bool _inShutdown;
- threadpool::ThreadPool _dblockWorkers;
- uint64_t _nextId;
- };
-
- /**
- * Reference to an event object in the executor.
- */
- class ReplicationExecutor::EventHandle {
- friend class ReplicationExecutor;
- public:
- EventHandle() : _generation(0), _id(0) {}
-
- /**
- * Returns true if the handle is valid, meaning that it identifies
- */
- bool isValid() const { return _id != 0; }
-
- bool operator==(const EventHandle &other) const {
- return (_id == other._id);
- }
-
- bool operator!=(const EventHandle &other) const {
- return !(*this == other);
- }
-
- private:
- EventHandle(const EventList::iterator& iter, const uint64_t id);
-
- EventList::iterator _iter;
- uint64_t _generation;
- uint64_t _id;
- };
-
- /**
- * Reference to a scheduled callback.
- */
- class ReplicationExecutor::CallbackHandle {
- friend class ReplicationExecutor;
- public:
- CallbackHandle() : _generation(0) {}
-
- bool isValid() const { return _finishedEvent.isValid(); }
-
- bool operator==(const CallbackHandle &other) const {
- return (_finishedEvent == other._finishedEvent);
- }
-
- bool operator!=(const CallbackHandle &other) const {
- return !(*this == other);
- }
-
- private:
- explicit CallbackHandle(const WorkQueue::iterator& iter);
-
- WorkQueue::iterator _iter;
- uint64_t _generation;
- EventHandle _finishedEvent;
- };
-
- struct ReplicationExecutor::CallbackData {
- CallbackData(ReplicationExecutor* theExecutor,
- const CallbackHandle& theHandle,
- const Status& theStatus,
- OperationContext* txn = NULL);
-
- ReplicationExecutor* executor;
- CallbackHandle myHandle;
- Status status;
- OperationContext* txn;
- };
-
- /**
- * Type of object describing a command to execute against a remote MongoDB node.
- */
- struct ReplicationExecutor::RemoteCommandRequest {
- RemoteCommandRequest();
- RemoteCommandRequest(const HostAndPort& theTarget,
- const std::string& theDbName,
- const BSONObj& theCmdObj,
- const Milliseconds timeoutMillis = kNoTimeout);
-
- // Returns diagnostic info.
- std::string getDiagnosticString();
-
- HostAndPort target;
- std::string dbname;
- BSONObj cmdObj;
- Milliseconds timeout;
- Date_t expirationDate; // Set by scheduleRemoteCommand.
- };
-
- struct ReplicationExecutor::RemoteCommandResponse {
- RemoteCommandResponse() : data(), elapsedMillis(Milliseconds(0)) {}
- RemoteCommandResponse(BSONObj obj, Milliseconds millis)
- : data(obj),
- elapsedMillis(millis) {}
-
- BSONObj data;
- Milliseconds elapsedMillis;
- };
-
- /**
- * Interface to networking and lock manager.
- */
- class ReplicationExecutor::NetworkInterface {
- MONGO_DISALLOW_COPYING(NetworkInterface);
- public:
- typedef RemoteCommandResponse Response;
- typedef stdx::function<void (const ResponseStatus&)> RemoteCommandCompletionFn;
-
- virtual ~NetworkInterface();
-
- /**
- * Returns diagnostic info.
- */
- virtual std::string getDiagnosticString() = 0;
-
- /**
- * Starts up the network interface.
- *
- * It is valid to call all methods except shutdown() before this method completes. That is,
- * implementations may not assume that startup() completes before startCommand() first
- * executes.
- *
- * Called by the owning ReplicationExecutor inside its run() method.
- */
- virtual void startup() = 0;
-
- /**
- * Shuts down the network interface. Must be called before this instance gets deleted,
- * if startup() is called.
- *
- * Called by the owning ReplicationExecutor inside its run() method.
- */
- virtual void shutdown() = 0;
-
- /**
- * Blocks the current thread (presumably the executor thread) until the network interface
- * knows of work for the executor to perform.
- */
- virtual void waitForWork() = 0;
-
- /**
- * Similar to waitForWork, but only blocks until "when".
- */
- virtual void waitForWorkUntil(Date_t when) = 0;
-
- /**
- * Signals to the network interface that there is new work (such as a signaled event) for
- * the executor to process. Wakes the executor from waitForWork() and friends.
- */
- virtual void signalWorkAvailable() = 0;
-
- /**
- * Returns the current time.
- */
- virtual Date_t now() = 0;
-
- /**
- * Starts asynchronous execution of the command described by "request".
- */
- virtual void startCommand(const CallbackHandle& cbHandle,
- const RemoteCommandRequest& request,
- const RemoteCommandCompletionFn& onFinish) = 0;
-
- /**
- * Requests cancelation of the network activity associated with "cbHandle" if it has not yet
- * completed.
- */
- virtual void cancelCommand(const CallbackHandle& cbHandle) = 0;
-
- /**
- * Runs the given callback while holding the global exclusive lock.
- */
- virtual void runCallbackWithGlobalExclusiveLock(
- const stdx::function<void (OperationContext*)>& callback) = 0;
-
- protected:
- NetworkInterface();
- };
-
- typedef ReplicationExecutor::ResponseStatus ResponseStatus;
-
- // Must be after NetworkInterface class
- struct ReplicationExecutor::RemoteCommandCallbackData {
- RemoteCommandCallbackData(ReplicationExecutor* theExecutor,
- const CallbackHandle& theHandle,
- const RemoteCommandRequest& theRequest,
- const StatusWith<RemoteCommandResponse>& theResponse);
-
- ReplicationExecutor* executor;
- CallbackHandle myHandle;
- RemoteCommandRequest request;
- StatusWith<RemoteCommandResponse> response;
- };
-
- /**
- * Description of a scheduled but not-yet-run work item.
- *
- * Once created, WorkItem objects remain in scope until the executor is destroyed.
- * However, over their lifetime, they may represent many different work items. This
- * divorces the lifetime of CallbackHandles from the lifetime of WorkItem objects, but
- * requires a unique generation identifier in CallbackHandles and WorkItem objects.
- *
- * WorkItem is copyable so that it may be stored in a list. However, in practice they
- * should only be copied by getWork() and when allocating new entries into a WorkQueue (not
- * when moving entries between work lists).
- */
- struct ReplicationExecutor::WorkItem {
- WorkItem();
- uint64_t generation;
- CallbackFn callback;
- EventHandle finishedEvent;
- Date_t readyDate;
- bool isNetworkOperation;
- bool isCanceled;
- };
-
- /**
- * Description of an unsignaled event.
- *
- * Like WorkItem, above, but for events. On signaling, the executor bumps the
- * generation, marks all waiters as runnable, and moves the event from the "unsignaled"
- * EventList to the "signaled" EventList, the latter being a free list of events.
- */
- struct ReplicationExecutor::Event {
- Event();
- uint64_t generation;
- bool isSignaled;
- WorkQueue waiters;
- boost::shared_ptr<boost::condition_variable> isSignaledCondition;
- };
+ * Signals to the network interface that there is new work (such as a signaled event) for
+ * the executor to process. Wakes the executor from waitForWork() and friends.
+ */
+ virtual void signalWorkAvailable() = 0;
+
+ /**
+ * Returns the current time.
+ */
+ virtual Date_t now() = 0;
+
+ /**
+ * Starts asynchronous execution of the command described by "request".
+ */
+ virtual void startCommand(const CallbackHandle& cbHandle,
+ const RemoteCommandRequest& request,
+ const RemoteCommandCompletionFn& onFinish) = 0;
+
+ /**
+ * Requests cancelation of the network activity associated with "cbHandle" if it has not yet
+ * completed.
+ */
+ virtual void cancelCommand(const CallbackHandle& cbHandle) = 0;
+
+ /**
+ * Runs the given callback while holding the global exclusive lock.
+ */
+ virtual void runCallbackWithGlobalExclusiveLock(
+ const stdx::function<void(OperationContext*)>& callback) = 0;
+
+protected:
+ NetworkInterface();
+};
+
+typedef ReplicationExecutor::ResponseStatus ResponseStatus;
+
+// Must be after NetworkInterface class
+struct ReplicationExecutor::RemoteCommandCallbackData {
+ RemoteCommandCallbackData(ReplicationExecutor* theExecutor,
+ const CallbackHandle& theHandle,
+ const RemoteCommandRequest& theRequest,
+ const StatusWith<RemoteCommandResponse>& theResponse);
+
+ ReplicationExecutor* executor;
+ CallbackHandle myHandle;
+ RemoteCommandRequest request;
+ StatusWith<RemoteCommandResponse> response;
+};
+
+/**
+ * Description of a scheduled but not-yet-run work item.
+ *
+ * Once created, WorkItem objects remain in scope until the executor is destroyed.
+ * However, over their lifetime, they may represent many different work items. This
+ * divorces the lifetime of CallbackHandles from the lifetime of WorkItem objects, but
+ * requires a unique generation identifier in CallbackHandles and WorkItem objects.
+ *
+ * WorkItem is copyable so that it may be stored in a list. However, in practice they
+ * should only be copied by getWork() and when allocating new entries into a WorkQueue (not
+ * when moving entries between work lists).
+ */
+struct ReplicationExecutor::WorkItem {
+ WorkItem();
+ uint64_t generation;
+ CallbackFn callback;
+ EventHandle finishedEvent;
+ Date_t readyDate;
+ bool isNetworkOperation;
+ bool isCanceled;
+};
+
+/**
+ * Description of an unsignaled event.
+ *
+ * Like WorkItem, above, but for events. On signaling, the executor bumps the
+ * generation, marks all waiters as runnable, and moves the event from the "unsignaled"
+ * EventList to the "signaled" EventList, the latter being a free list of events.
+ */
+struct ReplicationExecutor::Event {
+ Event();
+ uint64_t generation;
+ bool isSignaled;
+ WorkQueue waiters;
+ boost::shared_ptr<boost::condition_variable> isSignaledCondition;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_executor_test.cpp b/src/mongo/db/repl/replication_executor_test.cpp
index 24f1109ac33..a6305056a7e 100644
--- a/src/mongo/db/repl/replication_executor_test.cpp
+++ b/src/mongo/db/repl/replication_executor_test.cpp
@@ -44,477 +44,434 @@ namespace repl {
namespace {
- bool operator==(const ReplicationExecutor::RemoteCommandRequest lhs,
- const ReplicationExecutor::RemoteCommandRequest rhs) {
- return lhs.target == rhs.target &&
- lhs.dbname == rhs.dbname &&
- lhs.cmdObj == rhs.cmdObj;
+bool operator==(const ReplicationExecutor::RemoteCommandRequest lhs,
+ const ReplicationExecutor::RemoteCommandRequest rhs) {
+ return lhs.target == rhs.target && lhs.dbname == rhs.dbname && lhs.cmdObj == rhs.cmdObj;
+}
+
+bool operator!=(const ReplicationExecutor::RemoteCommandRequest lhs,
+ const ReplicationExecutor::RemoteCommandRequest rhs) {
+ return !(lhs == rhs);
+}
+
+void setStatus(const ReplicationExecutor::CallbackData& cbData, Status* target) {
+ *target = cbData.status;
+}
+
+void setStatusAndShutdown(const ReplicationExecutor::CallbackData& cbData, Status* target) {
+ setStatus(cbData, target);
+ if (cbData.status != ErrorCodes::CallbackCanceled)
+ cbData.executor->shutdown();
+}
+
+void setStatusAndTriggerEvent(const ReplicationExecutor::CallbackData& cbData,
+ Status* outStatus,
+ ReplicationExecutor::EventHandle event) {
+ *outStatus = cbData.status;
+ if (!cbData.status.isOK())
+ return;
+ cbData.executor->signalEvent(event);
+}
+
+void scheduleSetStatusAndShutdown(const ReplicationExecutor::CallbackData& cbData,
+ Status* outStatus1,
+ Status* outStatus2) {
+ if (!cbData.status.isOK()) {
+ *outStatus1 = cbData.status;
+ return;
}
+ *outStatus1 = cbData.executor->scheduleWork(stdx::bind(setStatusAndShutdown,
+ stdx::placeholders::_1,
+ outStatus2)).getStatus();
+}
- bool operator!=(const ReplicationExecutor::RemoteCommandRequest lhs,
- const ReplicationExecutor::RemoteCommandRequest rhs) {
- return !(lhs == rhs);
- }
-
- void setStatus(const ReplicationExecutor::CallbackData& cbData, Status* target) {
- *target = cbData.status;
- }
-
- void setStatusAndShutdown(const ReplicationExecutor::CallbackData& cbData,
- Status* target) {
- setStatus(cbData, target);
- if (cbData.status != ErrorCodes::CallbackCanceled)
- cbData.executor->shutdown();
- }
-
- void setStatusAndTriggerEvent(const ReplicationExecutor::CallbackData& cbData,
- Status* outStatus,
- ReplicationExecutor::EventHandle event) {
- *outStatus = cbData.status;
- if (!cbData.status.isOK())
- return;
- cbData.executor->signalEvent(event);
- }
+const int64_t prngSeed = 1;
- void scheduleSetStatusAndShutdown(const ReplicationExecutor::CallbackData& cbData,
- Status* outStatus1,
- Status* outStatus2) {
- if (!cbData.status.isOK()) {
- *outStatus1 = cbData.status;
- return;
- }
- *outStatus1= cbData.executor->scheduleWork(stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- outStatus2)).getStatus();
+class ReplicationExecutorTest : public unittest::Test {
+protected:
+ NetworkInterfaceMock* getNet() {
+ return _net;
}
-
- const int64_t prngSeed = 1;
-
- class ReplicationExecutorTest : public unittest::Test {
- protected:
- NetworkInterfaceMock* getNet() { return _net; }
- ReplicationExecutor& getExecutor() { return *_executor; }
-
- void launchExecutorThread();
- void joinExecutorThread();
-
- virtual void setUp();
- virtual void tearDown();
-
- private:
- NetworkInterfaceMock* _net;
- boost::scoped_ptr<ReplicationExecutor> _executor;
- boost::scoped_ptr<boost::thread> _executorThread;
- };
-
- void ReplicationExecutorTest::launchExecutorThread() {
- ASSERT(!_executorThread);
- _executorThread.reset(
- new boost::thread(stdx::bind(&ReplicationExecutor::run, _executor.get())));
- _net->enterNetwork();
- }
-
- void ReplicationExecutorTest::joinExecutorThread() {
- ASSERT(_executorThread);
- _net->exitNetwork();
- _executorThread->join();
- _executorThread.reset();
- }
-
- void ReplicationExecutorTest::setUp() {
- _net = new NetworkInterfaceMock;
- _executor.reset(new ReplicationExecutor(_net, prngSeed));
- }
-
- void ReplicationExecutorTest::tearDown() {
- if (_executorThread) {
- _executor->shutdown();
- joinExecutorThread();
- }
- }
-
- TEST_F(ReplicationExecutorTest, RunOne) {
- ReplicationExecutor& executor = getExecutor();
- Status status(ErrorCodes::InternalError, "Not mutated");
- ASSERT_OK(executor.scheduleWork(stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- &status)).getStatus());
- executor.run();
- ASSERT_OK(status);
- }
-
- TEST_F(ReplicationExecutorTest, Schedule1ButShutdown) {
- ReplicationExecutor& executor = getExecutor();
- Status status(ErrorCodes::InternalError, "Not mutated");
- ASSERT_OK(executor.scheduleWork(stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- &status)).getStatus());
- executor.shutdown();
- executor.run();
- ASSERT_EQUALS(status, ErrorCodes::CallbackCanceled);
+ ReplicationExecutor& getExecutor() {
+ return *_executor;
}
- TEST_F(ReplicationExecutorTest, Schedule2Cancel1) {
- ReplicationExecutor& executor = getExecutor();
- Status status1(ErrorCodes::InternalError, "Not mutated");
- Status status2(ErrorCodes::InternalError, "Not mutated");
- ReplicationExecutor::CallbackHandle cb = unittest::assertGet(
- executor.scheduleWork(stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- &status1)));
- executor.cancel(cb);
- ASSERT_OK(executor.scheduleWork(stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- &status2)).getStatus());
- executor.run();
- ASSERT_EQUALS(status1, ErrorCodes::CallbackCanceled);
- ASSERT_OK(status2);
- }
-
- TEST_F(ReplicationExecutorTest, OneSchedulesAnother) {
- ReplicationExecutor& executor = getExecutor();
- Status status1(ErrorCodes::InternalError, "Not mutated");
- Status status2(ErrorCodes::InternalError, "Not mutated");
- ASSERT_OK(executor.scheduleWork(stdx::bind(scheduleSetStatusAndShutdown,
- stdx::placeholders::_1,
- &status1,
- &status2)).getStatus());
- executor.run();
- ASSERT_OK(status1);
- ASSERT_OK(status2);
- }
-
- class EventChainAndWaitingTest {
- MONGO_DISALLOW_COPYING(EventChainAndWaitingTest);
- public:
- EventChainAndWaitingTest();
- void run();
- private:
- void onGo(const ReplicationExecutor::CallbackData& cbData);
- void onGoAfterTriggered(const ReplicationExecutor::CallbackData& cbData);
-
- NetworkInterfaceMock* net;
- ReplicationExecutor executor;
- boost::thread executorThread;
- const ReplicationExecutor::EventHandle goEvent;
- const ReplicationExecutor::EventHandle event2;
- const ReplicationExecutor::EventHandle event3;
- ReplicationExecutor::EventHandle triggerEvent;
- ReplicationExecutor::CallbackFn triggered2;
- ReplicationExecutor::CallbackFn triggered3;
- Status status1;
- Status status2;
- Status status3;
- Status status4;
- Status status5;
- };
-
- TEST(ReplicationExecutorTest, EventChainAndWaiting) {
- EventChainAndWaitingTest().run();
- }
-
- EventChainAndWaitingTest::EventChainAndWaitingTest() :
- net(new NetworkInterfaceMock),
- executor(net, prngSeed),
- executorThread(stdx::bind(&ReplicationExecutor::run, &executor)),
- goEvent(unittest::assertGet(executor.makeEvent())),
- event2(unittest::assertGet(executor.makeEvent())),
- event3(unittest::assertGet(executor.makeEvent())),
- status1(ErrorCodes::InternalError, "Not mutated"),
- status2(ErrorCodes::InternalError, "Not mutated"),
- status3(ErrorCodes::InternalError, "Not mutated"),
- status4(ErrorCodes::InternalError, "Not mutated"),
- status5(ErrorCodes::InternalError, "Not mutated") {
-
- triggered2 = stdx::bind(setStatusAndTriggerEvent,
- stdx::placeholders::_1,
- &status2,
- event2);
- triggered3 = stdx::bind(setStatusAndTriggerEvent,
- stdx::placeholders::_1,
- &status3,
- event3);
- }
-
- void EventChainAndWaitingTest::run() {
- executor.onEvent(goEvent,
- stdx::bind(&EventChainAndWaitingTest::onGo,
- this,
- stdx::placeholders::_1));
- executor.signalEvent(goEvent);
- executor.waitForEvent(goEvent);
- executor.waitForEvent(event2);
- executor.waitForEvent(event3);
-
- ReplicationExecutor::EventHandle neverSignaledEvent =
- unittest::assertGet(executor.makeEvent());
- boost::thread neverSignaledWaiter(stdx::bind(&ReplicationExecutor::waitForEvent,
- &executor,
- neverSignaledEvent));
- ReplicationExecutor::CallbackHandle shutdownCallback = unittest::assertGet(
- executor.scheduleWork(stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- &status5)));
- executor.wait(shutdownCallback);
- neverSignaledWaiter.join();
- executorThread.join();
- ASSERT_OK(status1);
- ASSERT_OK(status2);
- ASSERT_OK(status3);
- ASSERT_OK(status4);
- ASSERT_OK(status5);
- }
-
- void EventChainAndWaitingTest::onGo(const ReplicationExecutor::CallbackData& cbData) {
- if (!cbData.status.isOK()) {
- status1 = cbData.status;
- return;
- }
- ReplicationExecutor* executor = cbData.executor;
- StatusWith<ReplicationExecutor::EventHandle> errorOrTriggerEvent = executor->makeEvent();
- if (!errorOrTriggerEvent.isOK()) {
- status1 = errorOrTriggerEvent.getStatus();
- executor->shutdown();
- return;
- }
- triggerEvent = errorOrTriggerEvent.getValue();
- StatusWith<ReplicationExecutor::CallbackHandle> cbHandle = executor->onEvent(
- triggerEvent, triggered2);
- if (!cbHandle.isOK()) {
- status1 = cbHandle.getStatus();
- executor->shutdown();
- return;
- }
- cbHandle = executor->onEvent(triggerEvent, triggered3);
- if (!cbHandle.isOK()) {
- status1 = cbHandle.getStatus();
- executor->shutdown();
- return;
- }
-
- cbHandle = executor->onEvent(
- goEvent,
- stdx::bind(&EventChainAndWaitingTest::onGoAfterTriggered,
- this,
- stdx::placeholders::_1));
- if (!cbHandle.isOK()) {
- status1 = cbHandle.getStatus();
- executor->shutdown();
- return;
- }
- status1 = Status::OK();
- }
-
- void EventChainAndWaitingTest::onGoAfterTriggered(
- const ReplicationExecutor::CallbackData& cbData) {
- status4 = cbData.status;
- if (!cbData.status.isOK()) {
- return;
- }
- cbData.executor->signalEvent(triggerEvent);
- }
-
- TEST_F(ReplicationExecutorTest, ScheduleWorkAt) {
- NetworkInterfaceMock* net = getNet();
- ReplicationExecutor& executor = getExecutor();
- launchExecutorThread();
- Status status1(ErrorCodes::InternalError, "Not mutated");
- Status status2(ErrorCodes::InternalError, "Not mutated");
- Status status3(ErrorCodes::InternalError, "Not mutated");
- const Date_t now = net->now();
- const ReplicationExecutor::CallbackHandle cb1 =
- unittest::assertGet(executor.scheduleWorkAt(Date_t(now.millis + 100),
- stdx::bind(setStatus,
- stdx::placeholders::_1,
- &status1)));
- unittest::assertGet(executor.scheduleWorkAt(Date_t(now.millis + 5000),
- stdx::bind(setStatus,
- stdx::placeholders::_1,
- &status3)));
- const ReplicationExecutor::CallbackHandle cb2 =
- unittest::assertGet(executor.scheduleWorkAt(Date_t(now.millis + 200),
- stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- &status2)));
- const Date_t startTime = net->now();
- net->runUntil(startTime + 200 /*ms*/);
- ASSERT_EQUALS(startTime + 200, net->now());
- executor.wait(cb1);
- executor.wait(cb2);
- ASSERT_OK(status1);
- ASSERT_OK(status2);
- executor.shutdown();
+ void launchExecutorThread();
+ void joinExecutorThread();
+
+ virtual void setUp();
+ virtual void tearDown();
+
+private:
+ NetworkInterfaceMock* _net;
+ boost::scoped_ptr<ReplicationExecutor> _executor;
+ boost::scoped_ptr<boost::thread> _executorThread;
+};
+
+void ReplicationExecutorTest::launchExecutorThread() {
+ ASSERT(!_executorThread);
+ _executorThread.reset(
+ new boost::thread(stdx::bind(&ReplicationExecutor::run, _executor.get())));
+ _net->enterNetwork();
+}
+
+void ReplicationExecutorTest::joinExecutorThread() {
+ ASSERT(_executorThread);
+ _net->exitNetwork();
+ _executorThread->join();
+ _executorThread.reset();
+}
+
+void ReplicationExecutorTest::setUp() {
+ _net = new NetworkInterfaceMock;
+ _executor.reset(new ReplicationExecutor(_net, prngSeed));
+}
+
+void ReplicationExecutorTest::tearDown() {
+ if (_executorThread) {
+ _executor->shutdown();
joinExecutorThread();
- ASSERT_EQUALS(status3, ErrorCodes::CallbackCanceled);
}
-
- std::string getRequestDescription(const ReplicationExecutor::RemoteCommandRequest& request) {
- return mongoutils::str::stream() << "Request(" << request.target.toString() << ", " <<
- request.dbname << ", " << request.cmdObj << ')';
+}
+
+TEST_F(ReplicationExecutorTest, RunOne) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status(ErrorCodes::InternalError, "Not mutated");
+ ASSERT_OK(
+ executor.scheduleWork(stdx::bind(setStatusAndShutdown, stdx::placeholders::_1, &status))
+ .getStatus());
+ executor.run();
+ ASSERT_OK(status);
+}
+
+TEST_F(ReplicationExecutorTest, Schedule1ButShutdown) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status(ErrorCodes::InternalError, "Not mutated");
+ ASSERT_OK(
+ executor.scheduleWork(stdx::bind(setStatusAndShutdown, stdx::placeholders::_1, &status))
+ .getStatus());
+ executor.shutdown();
+ executor.run();
+ ASSERT_EQUALS(status, ErrorCodes::CallbackCanceled);
+}
+
+TEST_F(ReplicationExecutorTest, Schedule2Cancel1) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status1(ErrorCodes::InternalError, "Not mutated");
+ Status status2(ErrorCodes::InternalError, "Not mutated");
+ ReplicationExecutor::CallbackHandle cb = unittest::assertGet(
+ executor.scheduleWork(stdx::bind(setStatusAndShutdown, stdx::placeholders::_1, &status1)));
+ executor.cancel(cb);
+ ASSERT_OK(
+ executor.scheduleWork(stdx::bind(setStatusAndShutdown, stdx::placeholders::_1, &status2))
+ .getStatus());
+ executor.run();
+ ASSERT_EQUALS(status1, ErrorCodes::CallbackCanceled);
+ ASSERT_OK(status2);
+}
+
+TEST_F(ReplicationExecutorTest, OneSchedulesAnother) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status1(ErrorCodes::InternalError, "Not mutated");
+ Status status2(ErrorCodes::InternalError, "Not mutated");
+ ASSERT_OK(executor.scheduleWork(stdx::bind(scheduleSetStatusAndShutdown,
+ stdx::placeholders::_1,
+ &status1,
+ &status2)).getStatus());
+ executor.run();
+ ASSERT_OK(status1);
+ ASSERT_OK(status2);
+}
+
+class EventChainAndWaitingTest {
+ MONGO_DISALLOW_COPYING(EventChainAndWaitingTest);
+
+public:
+ EventChainAndWaitingTest();
+ void run();
+
+private:
+ void onGo(const ReplicationExecutor::CallbackData& cbData);
+ void onGoAfterTriggered(const ReplicationExecutor::CallbackData& cbData);
+
+ NetworkInterfaceMock* net;
+ ReplicationExecutor executor;
+ boost::thread executorThread;
+ const ReplicationExecutor::EventHandle goEvent;
+ const ReplicationExecutor::EventHandle event2;
+ const ReplicationExecutor::EventHandle event3;
+ ReplicationExecutor::EventHandle triggerEvent;
+ ReplicationExecutor::CallbackFn triggered2;
+ ReplicationExecutor::CallbackFn triggered3;
+ Status status1;
+ Status status2;
+ Status status3;
+ Status status4;
+ Status status5;
+};
+
+TEST(ReplicationExecutorTest, EventChainAndWaiting) {
+ EventChainAndWaitingTest().run();
+}
+
+EventChainAndWaitingTest::EventChainAndWaitingTest()
+ : net(new NetworkInterfaceMock),
+ executor(net, prngSeed),
+ executorThread(stdx::bind(&ReplicationExecutor::run, &executor)),
+ goEvent(unittest::assertGet(executor.makeEvent())),
+ event2(unittest::assertGet(executor.makeEvent())),
+ event3(unittest::assertGet(executor.makeEvent())),
+ status1(ErrorCodes::InternalError, "Not mutated"),
+ status2(ErrorCodes::InternalError, "Not mutated"),
+ status3(ErrorCodes::InternalError, "Not mutated"),
+ status4(ErrorCodes::InternalError, "Not mutated"),
+ status5(ErrorCodes::InternalError, "Not mutated") {
+ triggered2 = stdx::bind(setStatusAndTriggerEvent, stdx::placeholders::_1, &status2, event2);
+ triggered3 = stdx::bind(setStatusAndTriggerEvent, stdx::placeholders::_1, &status3, event3);
+}
+
+void EventChainAndWaitingTest::run() {
+ executor.onEvent(goEvent,
+ stdx::bind(&EventChainAndWaitingTest::onGo, this, stdx::placeholders::_1));
+ executor.signalEvent(goEvent);
+ executor.waitForEvent(goEvent);
+ executor.waitForEvent(event2);
+ executor.waitForEvent(event3);
+
+ ReplicationExecutor::EventHandle neverSignaledEvent = unittest::assertGet(executor.makeEvent());
+ boost::thread neverSignaledWaiter(
+ stdx::bind(&ReplicationExecutor::waitForEvent, &executor, neverSignaledEvent));
+ ReplicationExecutor::CallbackHandle shutdownCallback = unittest::assertGet(
+ executor.scheduleWork(stdx::bind(setStatusAndShutdown, stdx::placeholders::_1, &status5)));
+ executor.wait(shutdownCallback);
+ neverSignaledWaiter.join();
+ executorThread.join();
+ ASSERT_OK(status1);
+ ASSERT_OK(status2);
+ ASSERT_OK(status3);
+ ASSERT_OK(status4);
+ ASSERT_OK(status5);
+}
+
+void EventChainAndWaitingTest::onGo(const ReplicationExecutor::CallbackData& cbData) {
+ if (!cbData.status.isOK()) {
+ status1 = cbData.status;
+ return;
}
-
- static void setStatusOnRemoteCommandCompletion(
- const ReplicationExecutor::RemoteCommandCallbackData& cbData,
- const ReplicationExecutor::RemoteCommandRequest& expectedRequest,
- Status* outStatus) {
-
- if (cbData.request != expectedRequest) {
- *outStatus = Status(
- ErrorCodes::BadValue,
- mongoutils::str::stream() << "Actual request: " <<
- getRequestDescription(cbData.request) << "; expected: " <<
- getRequestDescription(expectedRequest));
- return;
- }
- *outStatus = cbData.response.getStatus();
+ ReplicationExecutor* executor = cbData.executor;
+ StatusWith<ReplicationExecutor::EventHandle> errorOrTriggerEvent = executor->makeEvent();
+ if (!errorOrTriggerEvent.isOK()) {
+ status1 = errorOrTriggerEvent.getStatus();
+ executor->shutdown();
+ return;
}
-
- TEST_F(ReplicationExecutorTest, ScheduleRemoteCommand) {
- NetworkInterfaceMock* net = getNet();
- ReplicationExecutor& executor = getExecutor();
- launchExecutorThread();
- Status status1(ErrorCodes::InternalError, "Not mutated");
- const ReplicationExecutor::RemoteCommandRequest request(
- HostAndPort("localhost", 27017),
- "mydb",
- BSON("whatsUp" << "doc"));
- ReplicationExecutor::CallbackHandle cbHandle = unittest::assertGet(
- executor.scheduleRemoteCommand(
- request,
- stdx::bind(setStatusOnRemoteCommandCompletion,
- stdx::placeholders::_1,
- request,
- &status1)));
- ASSERT(net->hasReadyRequests());
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now(),
- ResponseStatus(ErrorCodes::NoSuchKey, "I'm missing"));
- net->runReadyNetworkOperations();
- ASSERT(!net->hasReadyRequests());
- executor.wait(cbHandle);
- executor.shutdown();
- joinExecutorThread();
- ASSERT_EQUALS(ErrorCodes::NoSuchKey, status1);
+ triggerEvent = errorOrTriggerEvent.getValue();
+ StatusWith<ReplicationExecutor::CallbackHandle> cbHandle =
+ executor->onEvent(triggerEvent, triggered2);
+ if (!cbHandle.isOK()) {
+ status1 = cbHandle.getStatus();
+ executor->shutdown();
+ return;
}
-
- TEST_F(ReplicationExecutorTest, ScheduleAndCancelRemoteCommand) {
- ReplicationExecutor& executor = getExecutor();
- Status status1(ErrorCodes::InternalError, "Not mutated");
- const ReplicationExecutor::RemoteCommandRequest request(
- HostAndPort("localhost", 27017),
- "mydb",
- BSON("whatsUp" << "doc"));
- ReplicationExecutor::CallbackHandle cbHandle = unittest::assertGet(
- executor.scheduleRemoteCommand(
- request,
- stdx::bind(setStatusOnRemoteCommandCompletion,
- stdx::placeholders::_1,
- request,
- &status1)));
- executor.cancel(cbHandle);
- launchExecutorThread();
- getNet()->runReadyNetworkOperations();
- executor.wait(cbHandle);
- executor.shutdown();
- joinExecutorThread();
- ASSERT_EQUALS(ErrorCodes::CallbackCanceled, status1);
+ cbHandle = executor->onEvent(triggerEvent, triggered3);
+ if (!cbHandle.isOK()) {
+ status1 = cbHandle.getStatus();
+ executor->shutdown();
+ return;
}
- TEST_F(ReplicationExecutorTest, ScheduleExclusiveLockOperation) {
- ReplicationExecutor& executor = getExecutor();
- Status status1(ErrorCodes::InternalError, "Not mutated");
- ASSERT_OK(executor.scheduleWorkWithGlobalExclusiveLock(
- stdx::bind(setStatusAndShutdown,
- stdx::placeholders::_1,
- &status1)).getStatus());
- executor.run();
- ASSERT_OK(status1);
+ cbHandle = executor->onEvent(
+ goEvent,
+ stdx::bind(&EventChainAndWaitingTest::onGoAfterTriggered, this, stdx::placeholders::_1));
+ if (!cbHandle.isOK()) {
+ status1 = cbHandle.getStatus();
+ executor->shutdown();
+ return;
}
+ status1 = Status::OK();
+}
- TEST_F(ReplicationExecutorTest, RemoteCommandWithTimeout) {
- NetworkInterfaceMock* net = getNet();
- ReplicationExecutor& executor = getExecutor();
- Status status(ErrorCodes::InternalError, "");
- launchExecutorThread();
- const ReplicationExecutor::RemoteCommandRequest request(
- HostAndPort("lazy", 27017),
- "admin",
- BSON("sleep" << 1),
- ReplicationExecutor::Milliseconds(1));
- ReplicationExecutor::CallbackHandle cbHandle = unittest::assertGet(
- executor.scheduleRemoteCommand(
- request,
- stdx::bind(setStatusOnRemoteCommandCompletion,
- stdx::placeholders::_1,
- request,
- &status)));
- ASSERT(net->hasReadyRequests());
- const Date_t startTime = net->now();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- startTime + 2,
- ResponseStatus(ErrorCodes::ExceededTimeLimit, "I took too long"));
- net->runUntil(startTime + 2);
- ASSERT_EQUALS(startTime + 2, net->now());
- executor.wait(cbHandle);
- ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
+void EventChainAndWaitingTest::onGoAfterTriggered(const ReplicationExecutor::CallbackData& cbData) {
+ status4 = cbData.status;
+ if (!cbData.status.isOK()) {
+ return;
}
-
- TEST_F(ReplicationExecutorTest, CallbackHandleComparison) {
- ReplicationExecutor& executor = getExecutor();
- Status status(ErrorCodes::InternalError, "");
- const ReplicationExecutor::RemoteCommandRequest request(
- HostAndPort("lazy", 27017),
- "admin",
- BSON("cmd" << 1));
- ReplicationExecutor::CallbackHandle cbHandle1 = unittest::assertGet(
- executor.scheduleRemoteCommand(
- request,
- stdx::bind(setStatusOnRemoteCommandCompletion,
- stdx::placeholders::_1,
- request,
- &status)));
- ReplicationExecutor::CallbackHandle cbHandle2 = unittest::assertGet(
- executor.scheduleRemoteCommand(
- request,
- stdx::bind(setStatusOnRemoteCommandCompletion,
- stdx::placeholders::_1,
- request,
- &status)));
-
- // test equality
- ASSERT_TRUE(cbHandle1 == cbHandle1);
- ASSERT_TRUE(cbHandle2 == cbHandle2);
- ASSERT_FALSE(cbHandle1 != cbHandle1);
- ASSERT_FALSE(cbHandle2 != cbHandle2);
-
- // test inequality
- ASSERT_TRUE(cbHandle1 != cbHandle2);
- ASSERT_TRUE(cbHandle2 != cbHandle1);
- ASSERT_FALSE(cbHandle1 == cbHandle2);
- ASSERT_FALSE(cbHandle2 == cbHandle1);
-
- ReplicationExecutor::CallbackHandle cbHandle1Copy = cbHandle1;
- ASSERT_TRUE(cbHandle1 == cbHandle1Copy);
- ASSERT_TRUE(cbHandle1Copy == cbHandle1);
- ASSERT_FALSE(cbHandle1Copy != cbHandle1);
- ASSERT_FALSE(cbHandle1 != cbHandle1Copy);
-
- std::vector<ReplicationExecutor::CallbackHandle> cbs;
- cbs.push_back(cbHandle1);
- cbs.push_back(cbHandle2);
- ASSERT(cbHandle1 != cbHandle2);
- std::vector<ReplicationExecutor::CallbackHandle>::iterator foundHandle =
- std::find(cbs.begin(),
- cbs.end(),
- cbHandle1);
- ASSERT_TRUE(cbs.end() != foundHandle);
- ASSERT_TRUE(cbHandle1 == *foundHandle);
- launchExecutorThread();
- executor.shutdown();
- joinExecutorThread();
+ cbData.executor->signalEvent(triggerEvent);
+}
+
+TEST_F(ReplicationExecutorTest, ScheduleWorkAt) {
+ NetworkInterfaceMock* net = getNet();
+ ReplicationExecutor& executor = getExecutor();
+ launchExecutorThread();
+ Status status1(ErrorCodes::InternalError, "Not mutated");
+ Status status2(ErrorCodes::InternalError, "Not mutated");
+ Status status3(ErrorCodes::InternalError, "Not mutated");
+ const Date_t now = net->now();
+ const ReplicationExecutor::CallbackHandle cb1 = unittest::assertGet(executor.scheduleWorkAt(
+ Date_t(now.millis + 100), stdx::bind(setStatus, stdx::placeholders::_1, &status1)));
+ unittest::assertGet(executor.scheduleWorkAt(
+ Date_t(now.millis + 5000), stdx::bind(setStatus, stdx::placeholders::_1, &status3)));
+ const ReplicationExecutor::CallbackHandle cb2 = unittest::assertGet(executor.scheduleWorkAt(
+ Date_t(now.millis + 200),
+ stdx::bind(setStatusAndShutdown, stdx::placeholders::_1, &status2)));
+ const Date_t startTime = net->now();
+ net->runUntil(startTime + 200 /*ms*/);
+ ASSERT_EQUALS(startTime + 200, net->now());
+ executor.wait(cb1);
+ executor.wait(cb2);
+ ASSERT_OK(status1);
+ ASSERT_OK(status2);
+ executor.shutdown();
+ joinExecutorThread();
+ ASSERT_EQUALS(status3, ErrorCodes::CallbackCanceled);
+}
+
+std::string getRequestDescription(const ReplicationExecutor::RemoteCommandRequest& request) {
+ return mongoutils::str::stream() << "Request(" << request.target.toString() << ", "
+ << request.dbname << ", " << request.cmdObj << ')';
+}
+
+static void setStatusOnRemoteCommandCompletion(
+ const ReplicationExecutor::RemoteCommandCallbackData& cbData,
+ const ReplicationExecutor::RemoteCommandRequest& expectedRequest,
+ Status* outStatus) {
+ if (cbData.request != expectedRequest) {
+ *outStatus = Status(ErrorCodes::BadValue,
+ mongoutils::str::stream()
+ << "Actual request: " << getRequestDescription(cbData.request)
+ << "; expected: " << getRequestDescription(expectedRequest));
+ return;
}
+ *outStatus = cbData.response.getStatus();
+}
+
+TEST_F(ReplicationExecutorTest, ScheduleRemoteCommand) {
+ NetworkInterfaceMock* net = getNet();
+ ReplicationExecutor& executor = getExecutor();
+ launchExecutorThread();
+ Status status1(ErrorCodes::InternalError, "Not mutated");
+ const ReplicationExecutor::RemoteCommandRequest request(HostAndPort("localhost", 27017),
+ "mydb",
+ BSON("whatsUp"
+ << "doc"));
+ ReplicationExecutor::CallbackHandle cbHandle =
+ unittest::assertGet(executor.scheduleRemoteCommand(
+ request,
+ stdx::bind(
+ setStatusOnRemoteCommandCompletion, stdx::placeholders::_1, request, &status1)));
+ ASSERT(net->hasReadyRequests());
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi, net->now(), ResponseStatus(ErrorCodes::NoSuchKey, "I'm missing"));
+ net->runReadyNetworkOperations();
+ ASSERT(!net->hasReadyRequests());
+ executor.wait(cbHandle);
+ executor.shutdown();
+ joinExecutorThread();
+ ASSERT_EQUALS(ErrorCodes::NoSuchKey, status1);
+}
+
+TEST_F(ReplicationExecutorTest, ScheduleAndCancelRemoteCommand) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status1(ErrorCodes::InternalError, "Not mutated");
+ const ReplicationExecutor::RemoteCommandRequest request(HostAndPort("localhost", 27017),
+ "mydb",
+ BSON("whatsUp"
+ << "doc"));
+ ReplicationExecutor::CallbackHandle cbHandle =
+ unittest::assertGet(executor.scheduleRemoteCommand(
+ request,
+ stdx::bind(
+ setStatusOnRemoteCommandCompletion, stdx::placeholders::_1, request, &status1)));
+ executor.cancel(cbHandle);
+ launchExecutorThread();
+ getNet()->runReadyNetworkOperations();
+ executor.wait(cbHandle);
+ executor.shutdown();
+ joinExecutorThread();
+ ASSERT_EQUALS(ErrorCodes::CallbackCanceled, status1);
+}
+
+TEST_F(ReplicationExecutorTest, ScheduleExclusiveLockOperation) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status1(ErrorCodes::InternalError, "Not mutated");
+ ASSERT_OK(executor.scheduleWorkWithGlobalExclusiveLock(
+ stdx::bind(setStatusAndShutdown, stdx::placeholders::_1, &status1))
+ .getStatus());
+ executor.run();
+ ASSERT_OK(status1);
+}
+
+TEST_F(ReplicationExecutorTest, RemoteCommandWithTimeout) {
+ NetworkInterfaceMock* net = getNet();
+ ReplicationExecutor& executor = getExecutor();
+ Status status(ErrorCodes::InternalError, "");
+ launchExecutorThread();
+ const ReplicationExecutor::RemoteCommandRequest request(HostAndPort("lazy", 27017),
+ "admin",
+ BSON("sleep" << 1),
+ ReplicationExecutor::Milliseconds(1));
+ ReplicationExecutor::CallbackHandle cbHandle =
+ unittest::assertGet(executor.scheduleRemoteCommand(
+ request,
+ stdx::bind(
+ setStatusOnRemoteCommandCompletion, stdx::placeholders::_1, request, &status)));
+ ASSERT(net->hasReadyRequests());
+ const Date_t startTime = net->now();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->scheduleResponse(
+ noi, startTime + 2, ResponseStatus(ErrorCodes::ExceededTimeLimit, "I took too long"));
+ net->runUntil(startTime + 2);
+ ASSERT_EQUALS(startTime + 2, net->now());
+ executor.wait(cbHandle);
+ ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, status);
+}
+
+TEST_F(ReplicationExecutorTest, CallbackHandleComparison) {
+ ReplicationExecutor& executor = getExecutor();
+ Status status(ErrorCodes::InternalError, "");
+ const ReplicationExecutor::RemoteCommandRequest request(
+ HostAndPort("lazy", 27017), "admin", BSON("cmd" << 1));
+ ReplicationExecutor::CallbackHandle cbHandle1 =
+ unittest::assertGet(executor.scheduleRemoteCommand(
+ request,
+ stdx::bind(
+ setStatusOnRemoteCommandCompletion, stdx::placeholders::_1, request, &status)));
+ ReplicationExecutor::CallbackHandle cbHandle2 =
+ unittest::assertGet(executor.scheduleRemoteCommand(
+ request,
+ stdx::bind(
+ setStatusOnRemoteCommandCompletion, stdx::placeholders::_1, request, &status)));
+
+ // test equality
+ ASSERT_TRUE(cbHandle1 == cbHandle1);
+ ASSERT_TRUE(cbHandle2 == cbHandle2);
+ ASSERT_FALSE(cbHandle1 != cbHandle1);
+ ASSERT_FALSE(cbHandle2 != cbHandle2);
+
+ // test inequality
+ ASSERT_TRUE(cbHandle1 != cbHandle2);
+ ASSERT_TRUE(cbHandle2 != cbHandle1);
+ ASSERT_FALSE(cbHandle1 == cbHandle2);
+ ASSERT_FALSE(cbHandle2 == cbHandle1);
+
+ ReplicationExecutor::CallbackHandle cbHandle1Copy = cbHandle1;
+ ASSERT_TRUE(cbHandle1 == cbHandle1Copy);
+ ASSERT_TRUE(cbHandle1Copy == cbHandle1);
+ ASSERT_FALSE(cbHandle1Copy != cbHandle1);
+ ASSERT_FALSE(cbHandle1 != cbHandle1Copy);
+
+ std::vector<ReplicationExecutor::CallbackHandle> cbs;
+ cbs.push_back(cbHandle1);
+ cbs.push_back(cbHandle2);
+ ASSERT(cbHandle1 != cbHandle2);
+ std::vector<ReplicationExecutor::CallbackHandle>::iterator foundHandle =
+ std::find(cbs.begin(), cbs.end(), cbHandle1);
+ ASSERT_TRUE(cbs.end() != foundHandle);
+ ASSERT_TRUE(cbHandle1 == *foundHandle);
+ launchExecutorThread();
+ executor.shutdown();
+ joinExecutorThread();
+}
} // namespace
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_info.cpp b/src/mongo/db/repl/replication_info.cpp
index cf7ff0faabb..72449c7c6a5 100644
--- a/src/mongo/db/repl/replication_info.cpp
+++ b/src/mongo/db/repl/replication_info.cpp
@@ -49,182 +49,193 @@
namespace mongo {
- using std::auto_ptr;
- using std::list;
- using std::string;
- using std::stringstream;
+using std::auto_ptr;
+using std::list;
+using std::string;
+using std::stringstream;
namespace repl {
- void appendReplicationInfo(OperationContext* txn, BSONObjBuilder& result, int level) {
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (replCoord->getSettings().usingReplSets()) {
- IsMasterResponse isMasterResponse;
- replCoord->fillIsMasterForReplSet(&isMasterResponse);
- result.appendElements(isMasterResponse.toBSON());
- if (level) {
- replCoord->appendSlaveInfoData(&result);
- }
- return;
- }
-
- // TODO(dannenberg) replAllDead is bad and should be removed when master slave is removed
- if (replAllDead) {
- result.append("ismaster", 0);
- string s = string("dead: ") + replAllDead;
- result.append("info", s);
+void appendReplicationInfo(OperationContext* txn, BSONObjBuilder& result, int level) {
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (replCoord->getSettings().usingReplSets()) {
+ IsMasterResponse isMasterResponse;
+ replCoord->fillIsMasterForReplSet(&isMasterResponse);
+ result.appendElements(isMasterResponse.toBSON());
+ if (level) {
+ replCoord->appendSlaveInfoData(&result);
}
- else {
- result.appendBool("ismaster",
- getGlobalReplicationCoordinator()->isMasterForReportingPurposes());
+ return;
+ }
+
+ // TODO(dannenberg) replAllDead is bad and should be removed when master slave is removed
+ if (replAllDead) {
+ result.append("ismaster", 0);
+ string s = string("dead: ") + replAllDead;
+ result.append("info", s);
+ } else {
+ result.appendBool("ismaster",
+ getGlobalReplicationCoordinator()->isMasterForReportingPurposes());
+ }
+
+ if (level) {
+ BSONObjBuilder sources(result.subarrayStart("sources"));
+
+ int n = 0;
+ list<BSONObj> src;
+ {
+ const char* localSources = "local.sources";
+ AutoGetCollectionForRead ctx(txn, localSources);
+ auto_ptr<PlanExecutor> exec(
+ InternalPlanner::collectionScan(txn, localSources, ctx.getCollection()));
+ BSONObj obj;
+ PlanExecutor::ExecState state;
+ while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
+ src.push_back(obj);
+ }
}
-
- if (level) {
- BSONObjBuilder sources( result.subarrayStart( "sources" ) );
-
- int n = 0;
- list<BSONObj> src;
+
+ for (list<BSONObj>::const_iterator i = src.begin(); i != src.end(); i++) {
+ BSONObj s = *i;
+ BSONObjBuilder bb;
+ bb.append(s["host"]);
+ string sourcename = s["source"].valuestr();
+ if (sourcename != "main")
+ bb.append(s["source"]);
{
- const char* localSources = "local.sources";
- AutoGetCollectionForRead ctx(txn, localSources);
- auto_ptr<PlanExecutor> exec(
- InternalPlanner::collectionScan(txn, localSources, ctx.getCollection()));
- BSONObj obj;
- PlanExecutor::ExecState state;
- while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
- src.push_back(obj);
- }
+ BSONElement e = s["syncedTo"];
+ BSONObjBuilder t(bb.subobjStart("syncedTo"));
+ t.appendDate("time", e.timestampTime());
+ t.append("inc", e.timestampInc());
+ t.done();
}
-
- for( list<BSONObj>::const_iterator i = src.begin(); i != src.end(); i++ ) {
- BSONObj s = *i;
- BSONObjBuilder bb;
- bb.append( s["host"] );
- string sourcename = s["source"].valuestr();
- if ( sourcename != "main" )
- bb.append( s["source"] );
- {
- BSONElement e = s["syncedTo"];
- BSONObjBuilder t( bb.subobjStart( "syncedTo" ) );
- t.appendDate( "time" , e.timestampTime() );
- t.append( "inc" , e.timestampInc() );
- t.done();
- }
-
- if ( level > 1 ) {
- wassert(!txn->lockState()->isLocked());
- // note: there is no so-style timeout on this connection; perhaps we should have one.
- ScopedDbConnection conn(s["host"].valuestr());
-
- DBClientConnection *cliConn = dynamic_cast< DBClientConnection* >( &conn.conn() );
- if ( cliConn && replAuthenticate(cliConn) ) {
- BSONObj first = conn->findOne( (string)"local.oplog.$" + sourcename,
- Query().sort( BSON( "$natural" << 1 ) ) );
- BSONObj last = conn->findOne( (string)"local.oplog.$" + sourcename,
- Query().sort( BSON( "$natural" << -1 ) ) );
- bb.appendDate( "masterFirst" , first["ts"].timestampTime() );
- bb.appendDate( "masterLast" , last["ts"].timestampTime() );
- double lag = (double) (last["ts"].timestampTime() - s["syncedTo"].timestampTime());
- bb.append( "lagSeconds" , lag / 1000 );
- }
- conn.done();
+
+ if (level > 1) {
+ wassert(!txn->lockState()->isLocked());
+ // note: there is no so-style timeout on this connection; perhaps we should have one.
+ ScopedDbConnection conn(s["host"].valuestr());
+
+ DBClientConnection* cliConn = dynamic_cast<DBClientConnection*>(&conn.conn());
+ if (cliConn && replAuthenticate(cliConn)) {
+ BSONObj first = conn->findOne((string) "local.oplog.$" + sourcename,
+ Query().sort(BSON("$natural" << 1)));
+ BSONObj last = conn->findOne((string) "local.oplog.$" + sourcename,
+ Query().sort(BSON("$natural" << -1)));
+ bb.appendDate("masterFirst", first["ts"].timestampTime());
+ bb.appendDate("masterLast", last["ts"].timestampTime());
+ double lag =
+ (double)(last["ts"].timestampTime() - s["syncedTo"].timestampTime());
+ bb.append("lagSeconds", lag / 1000);
}
-
- sources.append( BSONObjBuilder::numStr( n++ ) , bb.obj() );
+ conn.done();
}
-
- sources.done();
- replCoord->appendSlaveInfoData(&result);
+ sources.append(BSONObjBuilder::numStr(n++), bb.obj());
}
+
+ sources.done();
+
+ replCoord->appendSlaveInfoData(&result);
}
-
- class ReplicationInfoServerStatus : public ServerStatusSection {
- public:
- ReplicationInfoServerStatus() : ServerStatusSection( "repl" ){}
- bool includeByDefault() const { return true; }
-
- BSONObj generateSection(OperationContext* txn,
- const BSONElement& configElement) const {
-
- if (!getGlobalReplicationCoordinator()->isReplEnabled()) {
- return BSONObj();
- }
-
- int level = configElement.numberInt();
-
- BSONObjBuilder result;
- appendReplicationInfo(txn, result, level);
- getGlobalReplicationCoordinator()->processReplSetGetRBID(&result);
-
- return result.obj();
+}
+
+class ReplicationInfoServerStatus : public ServerStatusSection {
+public:
+ ReplicationInfoServerStatus() : ServerStatusSection("repl") {}
+ bool includeByDefault() const {
+ return true;
+ }
+
+ BSONObj generateSection(OperationContext* txn, const BSONElement& configElement) const {
+ if (!getGlobalReplicationCoordinator()->isReplEnabled()) {
+ return BSONObj();
}
- } replicationInfoServerStatus;
+ int level = configElement.numberInt();
- class OplogInfoServerStatus : public ServerStatusSection {
- public:
- OplogInfoServerStatus() : ServerStatusSection( "oplog" ){}
- bool includeByDefault() const { return false; }
+ BSONObjBuilder result;
+ appendReplicationInfo(txn, result, level);
+ getGlobalReplicationCoordinator()->processReplSetGetRBID(&result);
- BSONObj generateSection(OperationContext* txn,
- const BSONElement& configElement) const {
+ return result.obj();
+ }
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (!replCoord->isReplEnabled()) {
- return BSONObj();
- }
+} replicationInfoServerStatus;
- BSONObjBuilder result;
- result.append("latestOptime", replCoord->getMyLastOptime());
-
- const char* oplogNS =
- replCoord->getReplicationMode() == ReplicationCoordinator::modeReplSet ?
- rsoplog : "local.oplog.$main";
- BSONObj o;
- uassert(17347,
- "Problem reading earliest entry from oplog",
- Helpers::getSingleton(txn, oplogNS, o));
- result.append("earliestOptime", o["ts"]._opTime());
- return result.obj();
- }
- } oplogInfoServerStatus;
+class OplogInfoServerStatus : public ServerStatusSection {
+public:
+ OplogInfoServerStatus() : ServerStatusSection("oplog") {}
+ bool includeByDefault() const {
+ return false;
+ }
- class CmdIsMaster : public Command {
- public:
- virtual bool requiresAuth() { return false; }
- virtual bool slaveOk() const {
- return true;
- }
- virtual void help( stringstream &help ) const {
- help << "Check if this server is primary for a replica pair/set; also if it is --master or --slave in simple master/slave setups.\n";
- help << "{ isMaster : 1 }";
- }
- virtual bool isWriteCommandForConfigServer() const { return false; }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {} // No auth required
- CmdIsMaster() : Command("isMaster", true, "ismaster") { }
- virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
- /* currently request to arbiter is (somewhat arbitrarily) an ismaster request that is not
- authenticated.
- */
- if ( cmdObj["forShell"].trueValue() )
- lastError.disableForCommand();
-
- appendReplicationInfo(txn, result, 0);
-
- result.appendNumber("maxBsonObjectSize", BSONObjMaxUserSize);
- result.appendNumber("maxMessageSizeBytes", MaxMessageSizeBytes);
- result.appendNumber("maxWriteBatchSize", BatchedCommandRequest::kMaxWriteBatchSize);
- result.appendDate("localTime", jsTime());
- result.append("maxWireVersion", maxWireVersion);
- result.append("minWireVersion", minWireVersion);
- return true;
+ BSONObj generateSection(OperationContext* txn, const BSONElement& configElement) const {
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (!replCoord->isReplEnabled()) {
+ return BSONObj();
}
- } cmdismaster;
- OpCounterServerStatusSection replOpCounterServerStatusSection( "opcountersRepl", &replOpCounters );
+ BSONObjBuilder result;
+ result.append("latestOptime", replCoord->getMyLastOptime());
+
+ const char* oplogNS = replCoord->getReplicationMode() == ReplicationCoordinator::modeReplSet
+ ? rsoplog
+ : "local.oplog.$main";
+ BSONObj o;
+ uassert(17347,
+ "Problem reading earliest entry from oplog",
+ Helpers::getSingleton(txn, oplogNS, o));
+ result.append("earliestOptime", o["ts"]._opTime());
+ return result.obj();
+ }
+} oplogInfoServerStatus;
+
+class CmdIsMaster : public Command {
+public:
+ virtual bool requiresAuth() {
+ return false;
+ }
+ virtual bool slaveOk() const {
+ return true;
+ }
+ virtual void help(stringstream& help) const {
+ help << "Check if this server is primary for a replica pair/set; also if it is --master or "
+ "--slave in simple master/slave setups.\n";
+ help << "{ isMaster : 1 }";
+ }
+ virtual bool isWriteCommandForConfigServer() const {
+ return false;
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {} // No auth required
+ CmdIsMaster() : Command("isMaster", true, "ismaster") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool /*fromRepl*/) {
+ /* currently request to arbiter is (somewhat arbitrarily) an ismaster request that is not
+ authenticated.
+ */
+ if (cmdObj["forShell"].trueValue())
+ lastError.disableForCommand();
+
+ appendReplicationInfo(txn, result, 0);
+
+ result.appendNumber("maxBsonObjectSize", BSONObjMaxUserSize);
+ result.appendNumber("maxMessageSizeBytes", MaxMessageSizeBytes);
+ result.appendNumber("maxWriteBatchSize", BatchedCommandRequest::kMaxWriteBatchSize);
+ result.appendDate("localTime", jsTime());
+ result.append("maxWireVersion", maxWireVersion);
+ result.append("minWireVersion", minWireVersion);
+ return true;
+ }
+} cmdismaster;
+
+OpCounterServerStatusSection replOpCounterServerStatusSection("opcountersRepl", &replOpCounters);
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replset_commands.cpp b/src/mongo/db/repl/replset_commands.cpp
index 5e03fa43766..20c40774837 100644
--- a/src/mongo/db/repl/replset_commands.cpp
+++ b/src/mongo/db/repl/replset_commands.cpp
@@ -57,671 +57,736 @@
namespace mongo {
namespace repl {
- using std::string;
- using std::stringstream;
- using std::vector;
+using std::string;
+using std::stringstream;
+using std::vector;
- unsigned replSetForceInitialSyncFailure = 0;
+unsigned replSetForceInitialSyncFailure = 0;
- // Testing only, enabled via command-line.
- class CmdReplSetTest : public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "Just for regression tests.\n";
+// Testing only, enabled via command-line.
+class CmdReplSetTest : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "Just for regression tests.\n";
+ }
+ // No auth needed because it only works when enabled via command line.
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {}
+ CmdReplSetTest() : ReplSetCommand("replSetTest") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ log() << "replSet replSetTest command received: " << cmdObj.toString();
+
+ if (cmdObj.hasElement("forceInitialSyncFailure")) {
+ replSetForceInitialSyncFailure = (unsigned)cmdObj["forceInitialSyncFailure"].Number();
+ return true;
}
- // No auth needed because it only works when enabled via command line.
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {}
- CmdReplSetTest() : ReplSetCommand("replSetTest") { }
- virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
- log() << "replSet replSetTest command received: " << cmdObj.toString();
-
- if( cmdObj.hasElement("forceInitialSyncFailure") ) {
- replSetForceInitialSyncFailure = (unsigned) cmdObj["forceInitialSyncFailure"].Number();
- return true;
- }
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
- return false;
- }
- };
- MONGO_INITIALIZER(RegisterReplSetTestCmd)(InitializerContext* context) {
- if (Command::testCommandsEnabled) {
- // Leaked intentionally: a Command registers itself when constructed.
- new CmdReplSetTest();
- }
- return Status::OK();
+ return false;
}
+};
+MONGO_INITIALIZER(RegisterReplSetTestCmd)(InitializerContext* context) {
+ if (Command::testCommandsEnabled) {
+ // Leaked intentionally: a Command registers itself when constructed.
+ new CmdReplSetTest();
+ }
+ return Status::OK();
+}
- /** get rollback id. used to check if a rollback happened during some interval of time.
- as consumed, the rollback id is not in any particular order, it simply changes on each rollback.
- @see incRBID()
- */
- class CmdReplSetGetRBID : public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "internal";
- }
- CmdReplSetGetRBID() : ReplSetCommand("replSetGetRBID") {}
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::internal);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
- virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
-
- status = getGlobalReplicationCoordinator()->processReplSetGetRBID(&result);
+/** get rollback id. used to check if a rollback happened during some interval of time.
+ as consumed, the rollback id is not in any particular order, it simply changes on each rollback.
+ @see incRBID()
+*/
+class CmdReplSetGetRBID : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "internal";
+ }
+ CmdReplSetGetRBID() : ReplSetCommand("replSetGetRBID") {}
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::internal);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
return appendCommandStatus(result, status);
- }
- } cmdReplSetRBID;
-
- class CmdReplSetGetStatus : public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "Report status of a replica set from the POV of this server\n";
- help << "{ replSetGetStatus : 1 }";
- help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
- }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::replSetGetStatus);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
- CmdReplSetGetStatus() : ReplSetCommand("replSetGetStatus", true) { }
- virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
- if ( cmdObj["forShell"].trueValue() )
- lastError.disableForCommand();
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
+ status = getGlobalReplicationCoordinator()->processReplSetGetRBID(&result);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetRBID;
+
+class CmdReplSetGetStatus : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "Report status of a replica set from the POV of this server\n";
+ help << "{ replSetGetStatus : 1 }";
+ help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetGetStatus);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
+ CmdReplSetGetStatus() : ReplSetCommand("replSetGetStatus", true) {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ if (cmdObj["forShell"].trueValue())
+ lastError.disableForCommand();
+
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
- status = getGlobalReplicationCoordinator()->processReplSetGetStatus(&result);
+ status = getGlobalReplicationCoordinator()->processReplSetGetStatus(&result);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetGetStatus;
+
+class CmdReplSetGetConfig : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "Returns the current replica set configuration";
+ help << "{ replSetGetConfig : 1 }";
+ help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetGetConfig);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
+ CmdReplSetGetConfig() : ReplSetCommand("replSetGetConfig", true) {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
return appendCommandStatus(result, status);
- }
- } cmdReplSetGetStatus;
-
- class CmdReplSetGetConfig : public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "Returns the current replica set configuration";
- help << "{ replSetGetConfig : 1 }";
- help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
- }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::replSetGetConfig);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
- CmdReplSetGetConfig() : ReplSetCommand("replSetGetConfig", true) { }
- virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj,
- int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
- getGlobalReplicationCoordinator()->processReplSetGetConfig(&result);
- return true;
- }
- } cmdReplSetGetConfig;
+ getGlobalReplicationCoordinator()->processReplSetGetConfig(&result);
+ return true;
+ }
+} cmdReplSetGetConfig;
namespace {
- HostAndPort someHostAndPortForMe() {
- const char* ips = serverGlobalParams.bind_ip.c_str();
- while (*ips) {
- std::string ip;
- const char* comma = strchr(ips, ',');
- if (comma) {
- ip = std::string(ips, comma - ips);
- ips = comma + 1;
- }
- else {
- ip = std::string(ips);
- ips = "";
- }
- HostAndPort h = HostAndPort(ip, serverGlobalParams.port);
- if (!h.isLocalHost()) {
- return h;
- }
+HostAndPort someHostAndPortForMe() {
+ const char* ips = serverGlobalParams.bind_ip.c_str();
+ while (*ips) {
+ std::string ip;
+ const char* comma = strchr(ips, ',');
+ if (comma) {
+ ip = std::string(ips, comma - ips);
+ ips = comma + 1;
+ } else {
+ ip = std::string(ips);
+ ips = "";
+ }
+ HostAndPort h = HostAndPort(ip, serverGlobalParams.port);
+ if (!h.isLocalHost()) {
+ return h;
}
-
- std::string h = getHostName();
- verify(!h.empty());
- verify(h != "localhost");
- return HostAndPort(h, serverGlobalParams.port);
}
-} // namespace
- class CmdReplSetInitiate : public ReplSetCommand {
- public:
- virtual bool isWriteCommandForConfigServer() const { return false; }
- CmdReplSetInitiate() : ReplSetCommand("replSetInitiate") { }
- virtual void help(stringstream& h) const {
- h << "Initiate/christen a replica set.";
- h << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
- }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::replSetConfigure);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
- virtual bool run(OperationContext* txn,
- const string& ,
- BSONObj& cmdObj,
- int, string& errmsg,
- BSONObjBuilder& result,
- bool fromRepl) {
-
- BSONObj configObj;
- if( cmdObj["replSetInitiate"].type() == Object ) {
- configObj = cmdObj["replSetInitiate"].Obj();
- }
+ std::string h = getHostName();
+ verify(!h.empty());
+ verify(h != "localhost");
+ return HostAndPort(h, serverGlobalParams.port);
+}
+} // namespace
- if (configObj.isEmpty()) {
- result.append("info2", "no configuration explicitly specified -- making one");
- log() << "replSet info initiate : no configuration specified. "
- "Using a default configuration for the set";
-
- ReplicationCoordinatorExternalStateImpl externalState;
- std::string name;
- std::vector<HostAndPort> seeds;
- std::set<HostAndPort> seedSet;
- parseReplSetSeedList(
- &externalState,
- getGlobalReplicationCoordinator()->getSettings().replSet,
- name,
- seeds,
- seedSet); // may throw...
-
- BSONObjBuilder b;
- b.append("_id", name);
- b.append("version", 1);
- BSONObjBuilder members;
- HostAndPort me = someHostAndPortForMe();
- members.append("0", BSON( "_id" << 0 << "host" << me.toString() ));
- result.append("me", me.toString());
- for( unsigned i = 0; i < seeds.size(); i++ ) {
- members.append(BSONObjBuilder::numStr(i+1),
- BSON( "_id" << i+1 << "host" << seeds[i].toString()));
- }
- b.appendArray("members", members.obj());
- configObj = b.obj();
- log() << "replSet created this configuration for initiation : " <<
- configObj.toString();
+class CmdReplSetInitiate : public ReplSetCommand {
+public:
+ virtual bool isWriteCommandForConfigServer() const {
+ return false;
+ }
+ CmdReplSetInitiate() : ReplSetCommand("replSetInitiate") {}
+ virtual void help(stringstream& h) const {
+ h << "Initiate/christen a replica set.";
+ h << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetConfigure);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ BSONObj configObj;
+ if (cmdObj["replSetInitiate"].type() == Object) {
+ configObj = cmdObj["replSetInitiate"].Obj();
+ }
+
+ if (configObj.isEmpty()) {
+ result.append("info2", "no configuration explicitly specified -- making one");
+ log() << "replSet info initiate : no configuration specified. "
+ "Using a default configuration for the set";
+
+ ReplicationCoordinatorExternalStateImpl externalState;
+ std::string name;
+ std::vector<HostAndPort> seeds;
+ std::set<HostAndPort> seedSet;
+ parseReplSetSeedList(&externalState,
+ getGlobalReplicationCoordinator()->getSettings().replSet,
+ name,
+ seeds,
+ seedSet); // may throw...
+
+ BSONObjBuilder b;
+ b.append("_id", name);
+ b.append("version", 1);
+ BSONObjBuilder members;
+ HostAndPort me = someHostAndPortForMe();
+ members.append("0", BSON("_id" << 0 << "host" << me.toString()));
+ result.append("me", me.toString());
+ for (unsigned i = 0; i < seeds.size(); i++) {
+ members.append(BSONObjBuilder::numStr(i + 1),
+ BSON("_id" << i + 1 << "host" << seeds[i].toString()));
}
+ b.appendArray("members", members.obj());
+ configObj = b.obj();
+ log() << "replSet created this configuration for initiation : " << configObj.toString();
+ }
- if (configObj.getField("version").eoo()) {
- // Missing version field defaults to version 1.
- BSONObjBuilder builder;
- builder.appendElements(configObj);
- builder.append("version", 1);
- configObj = builder.obj();
- }
+ if (configObj.getField("version").eoo()) {
+ // Missing version field defaults to version 1.
+ BSONObjBuilder builder;
+ builder.appendElements(configObj);
+ builder.append("version", 1);
+ configObj = builder.obj();
+ }
+
+ Status status =
+ getGlobalReplicationCoordinator()->processReplSetInitiate(txn, configObj, &result);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetInitiate;
+
+class CmdReplSetReconfig : public ReplSetCommand {
+ RWLock mutex; /* we don't need rw but we wanted try capability. :-( */
+public:
+ virtual void help(stringstream& help) const {
+ help << "Adjust configuration of a replica set\n";
+ help << "{ replSetReconfig : config_object }";
+ help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetConfigure);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
+ CmdReplSetReconfig() : ReplSetCommand("replSetReconfig"), mutex("rsreconfig") {}
+ virtual bool run(OperationContext* txn,
+ const string& a,
+ BSONObj& b,
+ int e,
+ string& errmsg,
+ BSONObjBuilder& c,
+ bool d) {
+ try {
+ rwlock_try_write lk(mutex);
+ return _run(txn, a, b, e, errmsg, c, d);
+ } catch (rwlock_try_write::exception&) {
+ }
+ errmsg = "a replSetReconfig is already in progress";
+ return false;
+ }
- Status status = getGlobalReplicationCoordinator()->processReplSetInitiate(txn,
- configObj,
- &result);
+private:
+ bool _run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK()) {
return appendCommandStatus(result, status);
}
- } cmdReplSetInitiate;
-
- class CmdReplSetReconfig : public ReplSetCommand {
- RWLock mutex; /* we don't need rw but we wanted try capability. :-( */
- public:
- virtual void help( stringstream &help ) const {
- help << "Adjust configuration of a replica set\n";
- help << "{ replSetReconfig : config_object }";
- help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
- }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::replSetConfigure);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
- CmdReplSetReconfig() : ReplSetCommand("replSetReconfig"), mutex("rsreconfig") { }
- virtual bool run(OperationContext* txn, const string& a, BSONObj& b, int e, string& errmsg, BSONObjBuilder& c, bool d) {
- try {
- rwlock_try_write lk(mutex);
- return _run(txn, a,b,e,errmsg,c,d);
- }
- catch(rwlock_try_write::exception&) { }
- errmsg = "a replSetReconfig is already in progress";
+
+ if (cmdObj["replSetReconfig"].type() != Object) {
+ errmsg = "no configuration specified";
return false;
}
- private:
- bool _run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK()) {
- return appendCommandStatus(result, status);
- }
- if( cmdObj["replSetReconfig"].type() != Object ) {
- errmsg = "no configuration specified";
- return false;
- }
+ ReplicationCoordinator::ReplSetReconfigArgs parsedArgs;
+ parsedArgs.newConfigObj = cmdObj["replSetReconfig"].Obj();
+ parsedArgs.force = cmdObj.hasField("force") && cmdObj["force"].trueValue();
+ status =
+ getGlobalReplicationCoordinator()->processReplSetReconfig(txn, parsedArgs, &result);
- ReplicationCoordinator::ReplSetReconfigArgs parsedArgs;
- parsedArgs.newConfigObj = cmdObj["replSetReconfig"].Obj();
- parsedArgs.force = cmdObj.hasField("force") && cmdObj["force"].trueValue();
- status = getGlobalReplicationCoordinator()->processReplSetReconfig(txn,
- parsedArgs,
- &result);
+ ScopedTransaction scopedXact(txn, MODE_X);
+ Lock::GlobalWrite globalWrite(txn->lockState());
- ScopedTransaction scopedXact(txn, MODE_X);
- Lock::GlobalWrite globalWrite(txn->lockState());
+ WriteUnitOfWork wuow(txn);
+ if (status.isOK() && !parsedArgs.force) {
+ logOpInitiate(txn,
+ BSON("msg"
+ << "Reconfig set"
+ << "version" << parsedArgs.newConfigObj["version"]));
+ }
+ wuow.commit();
- WriteUnitOfWork wuow(txn);
- if (status.isOK() && !parsedArgs.force) {
- logOpInitiate(txn, BSON("msg" << "Reconfig set" <<
- "version" << parsedArgs.newConfigObj["version"]));
- }
- wuow.commit();
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetReconfig;
+
+class CmdReplSetFreeze : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "{ replSetFreeze : <seconds> }";
+ help << "'freeze' state of member to the extent we can do that. What this really means is "
+ "that\n";
+ help << "this node will not attempt to become primary until the time period specified "
+ "expires.\n";
+ help << "You can call again with {replSetFreeze:0} to unfreeze sooner.\n";
+ help << "A process restart unfreezes the member also.\n";
+ help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetStateChange);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
+ CmdReplSetFreeze() : ReplSetCommand("replSetFreeze") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
+ int secs = (int)cmdObj.firstElement().numberInt();
+ return appendCommandStatus(
+ result, getGlobalReplicationCoordinator()->processReplSetFreeze(secs, &result));
+ }
+} cmdReplSetFreeze;
+
+class CmdReplSetStepDown : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "{ replSetStepDown : <seconds> }\n";
+ help << "Step down as primary. Will not try to reelect self for the specified time period "
+ "(1 minute if no numeric secs value specified).\n";
+ help << "(If another member with same priority takes over in the meantime, it will stay "
+ "primary.)\n";
+ help << "http://dochub.mongodb.org/core/replicasetcommands";
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetStateChange);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
+ CmdReplSetStepDown() : ReplSetCommand("replSetStepDown") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
return appendCommandStatus(result, status);
- }
- } cmdReplSetReconfig;
-
- class CmdReplSetFreeze : public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "{ replSetFreeze : <seconds> }";
- help << "'freeze' state of member to the extent we can do that. What this really means is that\n";
- help << "this node will not attempt to become primary until the time period specified expires.\n";
- help << "You can call again with {replSetFreeze:0} to unfreeze sooner.\n";
- help << "A process restart unfreezes the member also.\n";
- help << "\nhttp://dochub.mongodb.org/core/replicasetcommands";
- }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::replSetStateChange);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
- CmdReplSetFreeze() : ReplSetCommand("replSetFreeze") { }
- virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
- int secs = (int) cmdObj.firstElement().numberInt();
- return appendCommandStatus(
- result,
- getGlobalReplicationCoordinator()->processReplSetFreeze(secs, &result));
- }
- } cmdReplSetFreeze;
-
- class CmdReplSetStepDown: public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "{ replSetStepDown : <seconds> }\n";
- help << "Step down as primary. Will not try to reelect self for the specified time period (1 minute if no numeric secs value specified).\n";
- help << "(If another member with same priority takes over in the meantime, it will stay primary.)\n";
- help << "http://dochub.mongodb.org/core/replicasetcommands";
- }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::replSetStateChange);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
- CmdReplSetStepDown() : ReplSetCommand("replSetStepDown") { }
- virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
+ const bool force = cmdObj["force"].trueValue();
- const bool force = cmdObj["force"].trueValue();
+ long long stepDownForSecs = cmdObj.firstElement().numberLong();
+ if (stepDownForSecs == 0) {
+ stepDownForSecs = 60;
+ } else if (stepDownForSecs < 0) {
+ status = Status(ErrorCodes::BadValue, "stepdown period must be a positive integer");
+ return appendCommandStatus(result, status);
+ }
- long long stepDownForSecs = cmdObj.firstElement().numberLong();
- if (stepDownForSecs == 0) {
- stepDownForSecs = 60;
- }
- else if (stepDownForSecs < 0) {
- status = Status(ErrorCodes::BadValue,
- "stepdown period must be a positive integer");
- return appendCommandStatus(result, status);
+ long long secondaryCatchUpPeriodSecs;
+ status = bsonExtractIntegerField(
+ cmdObj, "secondaryCatchUpPeriodSecs", &secondaryCatchUpPeriodSecs);
+ if (status.code() == ErrorCodes::NoSuchKey) {
+ // if field is absent, default values
+ if (force) {
+ secondaryCatchUpPeriodSecs = 0;
+ } else {
+ secondaryCatchUpPeriodSecs = 10;
}
+ } else if (!status.isOK()) {
+ return appendCommandStatus(result, status);
+ }
- long long secondaryCatchUpPeriodSecs;
- status = bsonExtractIntegerField(cmdObj,
- "secondaryCatchUpPeriodSecs",
- &secondaryCatchUpPeriodSecs);
- if (status.code() == ErrorCodes::NoSuchKey) {
- // if field is absent, default values
- if (force) {
- secondaryCatchUpPeriodSecs = 0;
- }
- else {
- secondaryCatchUpPeriodSecs = 10;
- }
- }
- else if (!status.isOK()) {
- return appendCommandStatus(result, status);
- }
+ if (secondaryCatchUpPeriodSecs < 0) {
+ status = Status(ErrorCodes::BadValue,
+ "secondaryCatchUpPeriodSecs period must be a positive or absent");
+ return appendCommandStatus(result, status);
+ }
- if (secondaryCatchUpPeriodSecs < 0) {
- status = Status(ErrorCodes::BadValue,
- "secondaryCatchUpPeriodSecs period must be a positive or absent");
- return appendCommandStatus(result, status);
- }
+ if (stepDownForSecs < secondaryCatchUpPeriodSecs) {
+ status = Status(ErrorCodes::BadValue,
+ "stepdown period must be longer than secondaryCatchUpPeriodSecs");
+ return appendCommandStatus(result, status);
+ }
- if (stepDownForSecs < secondaryCatchUpPeriodSecs) {
- status = Status(ErrorCodes::BadValue,
- "stepdown period must be longer than secondaryCatchUpPeriodSecs");
- return appendCommandStatus(result, status);
- }
+ log() << "Attempting to step down in response to replSetStepDown command";
- log() << "Attempting to step down in response to replSetStepDown command";
+ status = getGlobalReplicationCoordinator()->stepDown(
+ txn,
+ force,
+ ReplicationCoordinator::Milliseconds(secondaryCatchUpPeriodSecs * 1000),
+ ReplicationCoordinator::Milliseconds(stepDownForSecs * 1000));
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetStepDown;
- status = getGlobalReplicationCoordinator()->stepDown(
- txn,
- force,
- ReplicationCoordinator::Milliseconds(secondaryCatchUpPeriodSecs * 1000),
- ReplicationCoordinator::Milliseconds(stepDownForSecs * 1000));
+class CmdReplSetMaintenance : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "{ replSetMaintenance : bool }\n";
+ help << "Enable or disable maintenance mode.";
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetStateChange);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
+ CmdReplSetMaintenance() : ReplSetCommand("replSetMaintenance") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
return appendCommandStatus(result, status);
- }
- } cmdReplSetStepDown;
- class CmdReplSetMaintenance: public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "{ replSetMaintenance : bool }\n";
- help << "Enable or disable maintenance mode.";
- }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::replSetStateChange);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
- CmdReplSetMaintenance() : ReplSetCommand("replSetMaintenance") { }
- virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
+ return appendCommandStatus(result,
+ getGlobalReplicationCoordinator()->setMaintenanceMode(
+ cmdObj["replSetMaintenance"].trueValue()));
+ }
+} cmdReplSetMaintenance;
- return appendCommandStatus(
- result,
- getGlobalReplicationCoordinator()->setMaintenanceMode(
- cmdObj["replSetMaintenance"].trueValue()));
- }
- } cmdReplSetMaintenance;
+class CmdReplSetSyncFrom : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "{ replSetSyncFrom : \"host:port\" }\n";
+ help << "Change who this member is syncing from.";
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::replSetStateChange);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
+ CmdReplSetSyncFrom() : ReplSetCommand("replSetSyncFrom") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
- class CmdReplSetSyncFrom: public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "{ replSetSyncFrom : \"host:port\" }\n";
- help << "Change who this member is syncing from.";
- }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::replSetStateChange);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
- CmdReplSetSyncFrom() : ReplSetCommand("replSetSyncFrom") { }
- virtual bool run(OperationContext* txn, const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result,
- bool fromRepl) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
+ HostAndPort targetHostAndPort;
+ status = targetHostAndPort.initialize(cmdObj["replSetSyncFrom"].valuestrsafe());
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
- HostAndPort targetHostAndPort;
- status = targetHostAndPort.initialize(cmdObj["replSetSyncFrom"].valuestrsafe());
- if (!status.isOK())
- return appendCommandStatus(result, status);
+ return appendCommandStatus(
+ result,
+ getGlobalReplicationCoordinator()->processReplSetSyncFrom(targetHostAndPort, &result));
+ }
+} cmdReplSetSyncFrom;
- return appendCommandStatus(
- result,
- getGlobalReplicationCoordinator()->processReplSetSyncFrom(targetHostAndPort,
- &result));
- }
- } cmdReplSetSyncFrom;
+class CmdReplSetUpdatePosition : public ReplSetCommand {
+public:
+ virtual void help(stringstream& help) const {
+ help << "internal";
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::internal);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
+ CmdReplSetUpdatePosition() : ReplSetCommand("replSetUpdatePosition") {}
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
- class CmdReplSetUpdatePosition: public ReplSetCommand {
- public:
- virtual void help( stringstream &help ) const {
- help << "internal";
- }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::internal);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
- CmdReplSetUpdatePosition() : ReplSetCommand("replSetUpdatePosition") { }
- virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg,
- BSONObjBuilder& result, bool fromRepl) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (cmdObj.hasField("handshake")) {
+ // we have received a handshake, not an update message
+ // handshakes are done here to ensure the receiving end supports the update command
+
+ HandshakeArgs handshake;
+ status = handshake.initialize(cmdObj["handshake"].embeddedObject());
if (!status.isOK())
return appendCommandStatus(result, status);
- if (cmdObj.hasField("handshake")) {
- // we have received a handshake, not an update message
- // handshakes are done here to ensure the receiving end supports the update command
-
- HandshakeArgs handshake;
- status = handshake.initialize(cmdObj["handshake"].embeddedObject());
- if (!status.isOK())
- return appendCommandStatus(result, status);
-
- if (!handshake.hasMemberId()) {
- return appendCommandStatus(
- result,
- Status(ErrorCodes::NoSuchKey,
- "replSetUpdatePosition handshake was missing 'member' field"));
- }
-
+ if (!handshake.hasMemberId()) {
return appendCommandStatus(
- result,
- getGlobalReplicationCoordinator()->processHandshake(txn, handshake));
+ result,
+ Status(ErrorCodes::NoSuchKey,
+ "replSetUpdatePosition handshake was missing 'member' field"));
}
- UpdatePositionArgs args;
- status = args.initialize(cmdObj);
- if (!status.isOK())
- return appendCommandStatus(result, status);
-
return appendCommandStatus(
- result,
- getGlobalReplicationCoordinator()->processReplSetUpdatePosition(args));
-
+ result, getGlobalReplicationCoordinator()->processHandshake(txn, handshake));
}
- } cmdReplSetUpdatePosition;
+
+ UpdatePositionArgs args;
+ status = args.initialize(cmdObj);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
+
+ return appendCommandStatus(
+ result, getGlobalReplicationCoordinator()->processReplSetUpdatePosition(args));
+ }
+} cmdReplSetUpdatePosition;
namespace {
- /**
- * Returns true if there is no data on this server. Useful when starting replication.
- * The "local" database does NOT count except for "rs.oplog" collection.
- * Used to set the hasData field on replset heartbeat command response.
- */
- bool replHasDatabases(OperationContext* txn) {
- vector<string> names;
- StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
- storageEngine->listDatabases(&names);
-
- if( names.size() >= 2 ) return true;
- if( names.size() == 1 ) {
- if( names[0] != "local" )
- return true;
-
- // we have a local database. return true if oplog isn't empty
- BSONObj o;
- if (Helpers::getSingleton(txn, repl::rsoplog, o)) {
- return true;
- }
+/**
+ * Returns true if there is no data on this server. Useful when starting replication.
+ * The "local" database does NOT count except for "rs.oplog" collection.
+ * Used to set the hasData field on replset heartbeat command response.
+ */
+bool replHasDatabases(OperationContext* txn) {
+ vector<string> names;
+ StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
+ storageEngine->listDatabases(&names);
+
+ if (names.size() >= 2)
+ return true;
+ if (names.size() == 1) {
+ if (names[0] != "local")
+ return true;
+
+ // we have a local database. return true if oplog isn't empty
+ BSONObj o;
+ if (Helpers::getSingleton(txn, repl::rsoplog, o)) {
+ return true;
}
- return false;
}
+ return false;
+}
-} // namespace
+} // namespace
- MONGO_FP_DECLARE(rsDelayHeartbeatResponse);
+MONGO_FP_DECLARE(rsDelayHeartbeatResponse);
- /* { replSetHeartbeat : <setname> } */
- class CmdReplSetHeartbeat : public ReplSetCommand {
- public:
- void help(stringstream& h) const { h << "internal"; }
- CmdReplSetHeartbeat() : ReplSetCommand("replSetHeartbeat") { }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::internal);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+/* { replSetHeartbeat : <setname> } */
+class CmdReplSetHeartbeat : public ReplSetCommand {
+public:
+ void help(stringstream& h) const {
+ h << "internal";
+ }
+ CmdReplSetHeartbeat() : ReplSetCommand("replSetHeartbeat") {}
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::internal);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ MONGO_FAIL_POINT_BLOCK(rsDelayHeartbeatResponse, delay) {
+ const BSONObj& data = delay.getData();
+ sleepsecs(data["delay"].numberInt());
+ }
+
+ Status status = Status(ErrorCodes::InternalError, "status not set in heartbeat code");
+ /* we don't call ReplSetCommand::check() here because heartbeat
+ checks many things that are pre-initialization. */
+ if (!getGlobalReplicationCoordinator()->getSettings().usingReplSets()) {
+ status = Status(ErrorCodes::NoReplicationEnabled, "not running with --replSet");
+ return appendCommandStatus(result, status);
}
- virtual bool run(OperationContext* txn, const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
- MONGO_FAIL_POINT_BLOCK(rsDelayHeartbeatResponse, delay) {
- const BSONObj& data = delay.getData();
- sleepsecs(data["delay"].numberInt());
- }
+ /* we want to keep heartbeat connections open when relinquishing primary.
+ tag them here. */
+ {
+ AbstractMessagingPort* mp = txn->getClient()->port();
+ if (mp)
+ mp->tag |= ScopedConn::keepOpen;
+ }
- Status status = Status(ErrorCodes::InternalError, "status not set in heartbeat code");
- /* we don't call ReplSetCommand::check() here because heartbeat
- checks many things that are pre-initialization. */
- if (!getGlobalReplicationCoordinator()->getSettings().usingReplSets()) {
- status = Status(ErrorCodes::NoReplicationEnabled, "not running with --replSet");
- return appendCommandStatus(result, status);
- }
+ ReplSetHeartbeatArgs args;
+ status = args.initialize(cmdObj);
+ if (!status.isOK()) {
+ return appendCommandStatus(result, status);
+ }
- /* we want to keep heartbeat connections open when relinquishing primary.
- tag them here. */
- {
- AbstractMessagingPort *mp = txn->getClient()->port();
- if( mp )
- mp->tag |= ScopedConn::keepOpen;
- }
+ // ugh.
+ if (args.getCheckEmpty()) {
+ result.append("hasData", replHasDatabases(txn));
+ }
- ReplSetHeartbeatArgs args;
- status = args.initialize(cmdObj);
- if (!status.isOK()) {
- return appendCommandStatus(result, status);
- }
+ ReplSetHeartbeatResponse response;
+ status = getGlobalReplicationCoordinator()->processHeartbeat(args, &response);
+ if (status.isOK())
+ response.addToBSON(&result);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetHeartbeat;
- // ugh.
- if (args.getCheckEmpty()) {
- result.append("hasData", replHasDatabases(txn));
- }
+/** the first cmd called by a node seeking election and it's a basic sanity
+ test: do any of the nodes it can reach know that it can't be the primary?
+ */
+class CmdReplSetFresh : public ReplSetCommand {
+public:
+ void help(stringstream& h) const {
+ h << "internal";
+ }
+ CmdReplSetFresh() : ReplSetCommand("replSetFresh") {}
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::internal);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
- ReplSetHeartbeatResponse response;
- status = getGlobalReplicationCoordinator()->processHeartbeat(args, &response);
- if (status.isOK())
- response.addToBSON(&result);
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
return appendCommandStatus(result, status);
- }
- } cmdReplSetHeartbeat;
-
- /** the first cmd called by a node seeking election and it's a basic sanity
- test: do any of the nodes it can reach know that it can't be the primary?
- */
- class CmdReplSetFresh : public ReplSetCommand {
- public:
- void help(stringstream& h) const { h << "internal"; }
- CmdReplSetFresh() : ReplSetCommand("replSetFresh") { }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::internal);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result,
- bool fromRepl) {
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
+ ReplicationCoordinator::ReplSetFreshArgs parsedArgs;
+ parsedArgs.id = cmdObj["id"].Int();
+ parsedArgs.setName = cmdObj["set"].checkAndGetStringData();
+ parsedArgs.who = HostAndPort(cmdObj["who"].String());
+ BSONElement cfgverElement = cmdObj["cfgver"];
+ uassert(28525,
+ str::stream() << "Expected cfgver argument to replSetFresh command to have "
+ "numeric type, but found " << typeName(cfgverElement.type()),
+ cfgverElement.isNumber());
+ parsedArgs.cfgver = cfgverElement.safeNumberLong();
+ parsedArgs.opTime = OpTime(cmdObj["opTime"].Date());
+
+ status = getGlobalReplicationCoordinator()->processReplSetFresh(parsedArgs, &result);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetFresh;
- ReplicationCoordinator::ReplSetFreshArgs parsedArgs;
- parsedArgs.id = cmdObj["id"].Int();
- parsedArgs.setName = cmdObj["set"].checkAndGetStringData();
- parsedArgs.who = HostAndPort(cmdObj["who"].String());
- BSONElement cfgverElement = cmdObj["cfgver"];
- uassert(28525,
- str::stream() << "Expected cfgver argument to replSetFresh command to have "
- "numeric type, but found " << typeName(cfgverElement.type()),
- cfgverElement.isNumber());
- parsedArgs.cfgver = cfgverElement.safeNumberLong();
- parsedArgs.opTime = OpTime(cmdObj["opTime"].Date());
-
- status = getGlobalReplicationCoordinator()->processReplSetFresh(parsedArgs, &result);
- return appendCommandStatus(result, status);
- }
- } cmdReplSetFresh;
-
- class CmdReplSetElect : public ReplSetCommand {
- public:
- void help(stringstream& h) const { h << "internal"; }
- CmdReplSetElect() : ReplSetCommand("replSetElect") { }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::internal);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
- private:
- virtual bool run(OperationContext* txn,
- const string&,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result,
- bool fromRepl) {
- DEV log() << "replSet received elect msg " << cmdObj.toString();
- else LOG(2) << "replSet received elect msg " << cmdObj.toString();
-
- Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
- if (!status.isOK())
- return appendCommandStatus(result, status);
+class CmdReplSetElect : public ReplSetCommand {
+public:
+ void help(stringstream& h) const {
+ h << "internal";
+ }
+ CmdReplSetElect() : ReplSetCommand("replSetElect") {}
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::internal);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
- ReplicationCoordinator::ReplSetElectArgs parsedArgs;
- parsedArgs.set = cmdObj["set"].checkAndGetStringData();
- parsedArgs.whoid = cmdObj["whoid"].Int();
- BSONElement cfgverElement = cmdObj["cfgver"];
- uassert(28526,
- str::stream() << "Expected cfgver argument to replSetElect command to have "
- "numeric type, but found " << typeName(cfgverElement.type()),
- cfgverElement.isNumber());
- parsedArgs.cfgver = cfgverElement.safeNumberLong();
- parsedArgs.round = cmdObj["round"].OID();
-
- status = getGlobalReplicationCoordinator()->processReplSetElect(parsedArgs, &result);
+private:
+ virtual bool run(OperationContext* txn,
+ const string&,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ DEV log() << "replSet received elect msg " << cmdObj.toString();
+ else LOG(2) << "replSet received elect msg " << cmdObj.toString();
+
+ Status status = getGlobalReplicationCoordinator()->checkReplEnabledForCommand(&result);
+ if (!status.isOK())
return appendCommandStatus(result, status);
- }
- } cmdReplSetElect;
-} // namespace repl
-} // namespace mongo
+ ReplicationCoordinator::ReplSetElectArgs parsedArgs;
+ parsedArgs.set = cmdObj["set"].checkAndGetStringData();
+ parsedArgs.whoid = cmdObj["whoid"].Int();
+ BSONElement cfgverElement = cmdObj["cfgver"];
+ uassert(28526,
+ str::stream() << "Expected cfgver argument to replSetElect command to have "
+ "numeric type, but found " << typeName(cfgverElement.type()),
+ cfgverElement.isNumber());
+ parsedArgs.cfgver = cfgverElement.safeNumberLong();
+ parsedArgs.round = cmdObj["round"].OID();
+
+ status = getGlobalReplicationCoordinator()->processReplSetElect(parsedArgs, &result);
+ return appendCommandStatus(result, status);
+ }
+} cmdReplSetElect;
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replset_commands.h b/src/mongo/db/repl/replset_commands.h
index ff39e063da4..50e764ac691 100644
--- a/src/mongo/db/repl/replset_commands.h
+++ b/src/mongo/db/repl/replset_commands.h
@@ -36,20 +36,28 @@
namespace mongo {
namespace repl {
- extern unsigned replSetForceInitialSyncFailure;
+extern unsigned replSetForceInitialSyncFailure;
- /**
- * Base class for repl set commands. Checks basic things such if we're in
- * rs mode before the command does its real work.
- */
- class ReplSetCommand : public Command {
- protected:
- ReplSetCommand(const char * s, bool show=false) : Command(s, show) { }
- virtual bool slaveOk() const { return true; }
- virtual bool adminOnly() const { return true; }
- virtual bool isWriteCommandForConfigServer() const { return false; }
- virtual void help( std::stringstream &help ) const { help << "internal"; }
- };
+/**
+ * Base class for repl set commands. Checks basic things such if we're in
+ * rs mode before the command does its real work.
+ */
+class ReplSetCommand : public Command {
+protected:
+ ReplSetCommand(const char* s, bool show = false) : Command(s, show) {}
+ virtual bool slaveOk() const {
+ return true;
+ }
+ virtual bool adminOnly() const {
+ return true;
+ }
+ virtual bool isWriteCommandForConfigServer() const {
+ return false;
+ }
+ virtual void help(std::stringstream& help) const {
+ help << "internal";
+ }
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/replset_web_handler.cpp b/src/mongo/db/repl/replset_web_handler.cpp
index 12fe6ab8bd0..6ec53363396 100644
--- a/src/mongo/db/repl/replset_web_handler.cpp
+++ b/src/mongo/db/repl/replset_web_handler.cpp
@@ -41,56 +41,55 @@
namespace mongo {
namespace repl {
- using namespace html;
+using namespace html;
- class ReplSetHandler : public DbWebHandler {
- public:
- ReplSetHandler() : DbWebHandler( "_replSet" , 1 , false ) {}
+class ReplSetHandler : public DbWebHandler {
+public:
+ ReplSetHandler() : DbWebHandler("_replSet", 1, false) {}
- virtual bool handles( const std::string& url ) const {
- return str::startsWith( url , "/_replSet" );
- }
-
- virtual void handle( OperationContext* txn,
- const char *rq,
- const std::string& url,
- BSONObj params,
- std::string& responseMsg,
- int& responseCode,
- std::vector<std::string>& headers,
- const SockAddr &from ) {
- responseMsg = _replSet(txn);
- responseCode = 200;
- }
+ virtual bool handles(const std::string& url) const {
+ return str::startsWith(url, "/_replSet");
+ }
- /* /_replSet show replica set status in html format */
- std::string _replSet(OperationContext* txn) {
- std::stringstream s;
- s << start("Replica Set Status " + prettyHostName());
- s << p( a("/", "back", "Home") + " | " +
- a("/local/system.replset/?html=1", "", "View Replset Config") + " | " +
- a("/replSetGetStatus?text=1", "", "replSetGetStatus") + " | " +
- a("http://dochub.mongodb.org/core/replicasets", "", "Docs")
- );
+ virtual void handle(OperationContext* txn,
+ const char* rq,
+ const std::string& url,
+ BSONObj params,
+ std::string& responseMsg,
+ int& responseCode,
+ std::vector<std::string>& headers,
+ const SockAddr& from) {
+ responseMsg = _replSet(txn);
+ responseCode = 200;
+ }
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (replCoord->getReplicationMode() != ReplicationCoordinator::modeReplSet) {
- s << p("Not using --replSet");
- s << _end();
- return s.str();
- }
+ /* /_replSet show replica set status in html format */
+ std::string _replSet(OperationContext* txn) {
+ std::stringstream s;
+ s << start("Replica Set Status " + prettyHostName());
+ s << p(a("/", "back", "Home") + " | " +
+ a("/local/system.replset/?html=1", "", "View Replset Config") + " | " +
+ a("/replSetGetStatus?text=1", "", "replSetGetStatus") + " | " +
+ a("http://dochub.mongodb.org/core/replicasets", "", "Docs"));
- ReplSetHtmlSummary summary;
- replCoord->summarizeAsHtml(&summary);
- s << summary.toHtmlString();
-
- s << p("Recent replset log activity:");
- fillRsLog(&s);
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (replCoord->getReplicationMode() != ReplicationCoordinator::modeReplSet) {
+ s << p("Not using --replSet");
s << _end();
return s.str();
}
- } replSetHandler;
+ ReplSetHtmlSummary summary;
+ replCoord->summarizeAsHtml(&summary);
+ s << summary.toHtmlString();
+
+ s << p("Recent replset log activity:");
+ fillRsLog(&s);
+ s << _end();
+ return s.str();
+ }
+
+} replSetHandler;
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/resync.cpp b/src/mongo/db/repl/resync.cpp
index 16e385c9b48..7581c572cfb 100644
--- a/src/mongo/db/repl/resync.cpp
+++ b/src/mongo/db/repl/resync.cpp
@@ -35,100 +35,100 @@
namespace mongo {
- using std::string;
- using std::stringstream;
+using std::string;
+using std::stringstream;
namespace repl {
- // operator requested resynchronization of replication (on a slave or secondary). {resync: 1}
- class CmdResync : public Command {
- public:
- virtual bool slaveOk() const {
- return true;
- }
- virtual bool adminOnly() const {
- return true;
- }
- virtual bool isWriteCommandForConfigServer() const { return true; }
- virtual void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) {
- ActionSet actions;
- actions.addAction(ActionType::resync);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
+// operator requested resynchronization of replication (on a slave or secondary). {resync: 1}
+class CmdResync : public Command {
+public:
+ virtual bool slaveOk() const {
+ return true;
+ }
+ virtual bool adminOnly() const {
+ return true;
+ }
+ virtual bool isWriteCommandForConfigServer() const {
+ return true;
+ }
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::resync);
+ out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
+ }
- void help(stringstream& h) const {
- h << "resync (from scratch) a stale slave or replica set secondary node.\n";
- }
+ void help(stringstream& h) const {
+ h << "resync (from scratch) a stale slave or replica set secondary node.\n";
+ }
- CmdResync() : Command("resync") { }
- virtual bool run(OperationContext* txn,
- const string& dbname,
- BSONObj& cmdObj,
- int,
- string& errmsg,
- BSONObjBuilder& result,
- bool fromRepl) {
+ CmdResync() : Command("resync") {}
+ virtual bool run(OperationContext* txn,
+ const string& dbname,
+ BSONObj& cmdObj,
+ int,
+ string& errmsg,
+ BSONObjBuilder& result,
+ bool fromRepl) {
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite globalWriteLock(txn->lockState());
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite globalWriteLock(txn->lockState());
-
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (getGlobalReplicationCoordinator()->getSettings().usingReplSets()) {
- const MemberState memberState = replCoord->getMemberState();
- if (memberState.startup()) {
- return appendCommandStatus(result, Status(ErrorCodes::NotYetInitialized,
- "no replication yet active"));
- }
- if (memberState.primary() ||
- !replCoord->setFollowerMode(MemberState::RS_STARTUP2)) {
- return appendCommandStatus(result, Status(ErrorCodes::NotSecondary,
- "primaries cannot resync"));
- }
- BackgroundSync::get()->setInitialSyncRequestedFlag(true);
- return true;
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (getGlobalReplicationCoordinator()->getSettings().usingReplSets()) {
+ const MemberState memberState = replCoord->getMemberState();
+ if (memberState.startup()) {
+ return appendCommandStatus(
+ result, Status(ErrorCodes::NotYetInitialized, "no replication yet active"));
}
-
- // below this comment pertains only to master/slave replication
- if ( cmdObj.getBoolField( "force" ) ) {
- if ( !waitForSyncToFinish(txn, errmsg ) )
- return false;
- replAllDead = "resync forced";
+ if (memberState.primary() || !replCoord->setFollowerMode(MemberState::RS_STARTUP2)) {
+ return appendCommandStatus(
+ result, Status(ErrorCodes::NotSecondary, "primaries cannot resync"));
}
- // TODO(dannenberg) replAllDead is bad and should be removed when masterslave is removed
- if (!replAllDead) {
- errmsg = "not dead, no need to resync";
- return false;
- }
- if ( !waitForSyncToFinish(txn, errmsg ) )
- return false;
-
- ReplSource::forceResyncDead( txn, "client" );
- result.append( "info", "triggered resync for all sources" );
-
+ BackgroundSync::get()->setInitialSyncRequestedFlag(true);
return true;
}
- bool waitForSyncToFinish(OperationContext* txn, string &errmsg) const {
- // Wait for slave thread to finish syncing, so sources will be be
- // reloaded with new saved state on next pass.
- Timer t;
- while ( 1 ) {
- if ( syncing == 0 || t.millis() > 30000 )
- break;
- {
- Lock::TempRelease t(txn->lockState());
- relinquishSyncingSome = 1;
- sleepmillis(1);
- }
- }
- if ( syncing ) {
- errmsg = "timeout waiting for sync() to finish";
+ // below this comment pertains only to master/slave replication
+ if (cmdObj.getBoolField("force")) {
+ if (!waitForSyncToFinish(txn, errmsg))
return false;
+ replAllDead = "resync forced";
+ }
+ // TODO(dannenberg) replAllDead is bad and should be removed when masterslave is removed
+ if (!replAllDead) {
+ errmsg = "not dead, no need to resync";
+ return false;
+ }
+ if (!waitForSyncToFinish(txn, errmsg))
+ return false;
+
+ ReplSource::forceResyncDead(txn, "client");
+ result.append("info", "triggered resync for all sources");
+
+ return true;
+ }
+
+ bool waitForSyncToFinish(OperationContext* txn, string& errmsg) const {
+ // Wait for slave thread to finish syncing, so sources will be be
+ // reloaded with new saved state on next pass.
+ Timer t;
+ while (1) {
+ if (syncing == 0 || t.millis() > 30000)
+ break;
+ {
+ Lock::TempRelease t(txn->lockState());
+ relinquishSyncingSome = 1;
+ sleepmillis(1);
}
- return true;
}
- } cmdResync;
-} // namespace repl
-} // namespace mongo
+ if (syncing) {
+ errmsg = "timeout waiting for sync() to finish";
+ return false;
+ }
+ return true;
+ }
+} cmdResync;
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rs_base.h b/src/mongo/db/repl/rs_base.h
index 1ba1838ce2b..99f7d188eac 100644
--- a/src/mongo/db/repl/rs_base.h
+++ b/src/mongo/db/repl/rs_base.h
@@ -36,55 +36,61 @@
namespace mongo {
namespace repl {
- /**
- * most operations on a ReplSet object should be done while locked. that
- * logic implemented here.
- *
- * Order of locking: lock the replica set, then take a rwlock.
- */
- class RSBase : boost::noncopyable {
- private:
- mongo::mutex m;
- int _locked;
- ThreadLocalValue<bool> _lockedByMe;
- protected:
- RSBase() : m("RSBase"), _locked(0) { }
- ~RSBase() { }
+/**
+ * most operations on a ReplSet object should be done while locked. that
+ * logic implemented here.
+ *
+ * Order of locking: lock the replica set, then take a rwlock.
+ */
+class RSBase : boost::noncopyable {
+private:
+ mongo::mutex m;
+ int _locked;
+ ThreadLocalValue<bool> _lockedByMe;
+
+protected:
+ RSBase() : m("RSBase"), _locked(0) {}
+ ~RSBase() {}
+
+public:
+ class lock {
+ RSBase& rsbase;
+ std::auto_ptr<scoped_lock> sl;
public:
- class lock {
- RSBase& rsbase;
- std::auto_ptr<scoped_lock> sl;
- public:
- lock(RSBase* b) : rsbase(*b) {
- if( rsbase._lockedByMe.get() )
- return; // recursive is ok...
+ lock(RSBase* b) : rsbase(*b) {
+ if (rsbase._lockedByMe.get())
+ return; // recursive is ok...
- sl.reset( new scoped_lock(rsbase.m) );
- DEV verify(rsbase._locked == 0);
- rsbase._locked++;
- rsbase._lockedByMe.set(true);
+ sl.reset(new scoped_lock(rsbase.m));
+ DEV verify(rsbase._locked == 0);
+ rsbase._locked++;
+ rsbase._lockedByMe.set(true);
+ }
+ ~lock() {
+ if (sl.get()) {
+ verify(rsbase._lockedByMe.get());
+ DEV verify(rsbase._locked == 1);
+ rsbase._lockedByMe.set(false);
+ rsbase._locked--;
}
- ~lock() {
- if( sl.get() ) {
- verify( rsbase._lockedByMe.get() );
- DEV verify(rsbase._locked == 1);
- rsbase._lockedByMe.set(false);
- rsbase._locked--;
- }
- }
- };
+ }
+ };
- /* for asserts */
- bool locked() const { return _locked != 0; }
+ /* for asserts */
+ bool locked() const {
+ return _locked != 0;
+ }
- /** if true, is locked, and was locked by this thread. note if false, it could be in the
- * lock or not for another just for asserts & such so we can make the contracts clear on
- * who locks what when. we don't use these locks that frequently, so the little bit of
- * overhead is fine.
- */
- bool lockedByMe() { return _lockedByMe.get(); }
- };
+ /** if true, is locked, and was locked by this thread. note if false, it could be in the
+ * lock or not for another just for asserts & such so we can make the contracts clear on
+ * who locks what when. we don't use these locks that frequently, so the little bit of
+ * overhead is fine.
+ */
+ bool lockedByMe() {
+ return _lockedByMe.get();
+ }
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rs_exception.h b/src/mongo/db/repl/rs_exception.h
index fcbf2c7d502..862226750d3 100644
--- a/src/mongo/db/repl/rs_exception.h
+++ b/src/mongo/db/repl/rs_exception.h
@@ -33,15 +33,19 @@
namespace mongo {
namespace repl {
- class VoteException : public std::exception {
- public:
- const char * what() const throw () { return "VoteException"; }
- };
+class VoteException : public std::exception {
+public:
+ const char* what() const throw() {
+ return "VoteException";
+ }
+};
- class RetryAfterSleepException : public std::exception {
- public:
- const char * what() const throw () { return "RetryAfterSleepException"; }
- };
+class RetryAfterSleepException : public std::exception {
+public:
+ const char* what() const throw() {
+ return "RetryAfterSleepException";
+ }
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rs_initialsync.cpp b/src/mongo/db/repl/rs_initialsync.cpp
index 2514a88953d..0e2fa1a5a9a 100644
--- a/src/mongo/db/repl/rs_initialsync.cpp
+++ b/src/mongo/db/repl/rs_initialsync.cpp
@@ -58,526 +58,513 @@ namespace mongo {
namespace repl {
namespace {
- using std::list;
- using std::string;
-
- // Failpoint which fails initial sync and leaves on oplog entry in the buffer.
- MONGO_FP_DECLARE(failInitSyncWithBufferedEntriesLeft);
-
- /**
- * Truncates the oplog (removes any documents) and resets internal variables that were
- * originally initialized or affected by using values from the oplog at startup time. These
- * include the last applied optime, the last fetched optime, and the sync source blacklist.
- * Also resets the bgsync thread so that it reconnects its sync source after the oplog has been
- * truncated.
- */
- void truncateAndResetOplog(OperationContext* txn,
- ReplicationCoordinator* replCoord,
- BackgroundSync* bgsync) {
- // Clear minvalid
- setMinValid(txn, OpTime());
-
- AutoGetDb autoDb(txn, "local", MODE_X);
- massert(28585, "no local database found", autoDb.getDb());
- invariant(txn->lockState()->isCollectionLockedForMode(rsoplog, MODE_X));
- // Note: the following order is important.
- // The bgsync thread uses an empty optime as a sentinel to know to wait
- // for initial sync; thus, we must
- // ensure the lastAppliedOptime is empty before restarting the bgsync thread
- // via stop().
- // We must clear the sync source blacklist after calling stop()
- // because the bgsync thread, while running, may update the blacklist.
- replCoord->resetMyLastOptime();
- bgsync->stop();
- bgsync->setLastAppliedHash(0);
- bgsync->clearBuffer();
-
- replCoord->clearSyncSourceBlacklist();
-
- // Truncate the oplog in case there was a prior initial sync that failed.
- Collection* collection = autoDb.getDb()->getCollection(rsoplog);
- fassert(28565, collection);
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- WriteUnitOfWork wunit(txn);
- Status status = collection->truncate(txn);
- fassert(28564, status);
- wunit.commit();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "truncate", collection->ns().ns());
+using std::list;
+using std::string;
+
+// Failpoint which fails initial sync and leaves on oplog entry in the buffer.
+MONGO_FP_DECLARE(failInitSyncWithBufferedEntriesLeft);
+
+/**
+ * Truncates the oplog (removes any documents) and resets internal variables that were
+ * originally initialized or affected by using values from the oplog at startup time. These
+ * include the last applied optime, the last fetched optime, and the sync source blacklist.
+ * Also resets the bgsync thread so that it reconnects its sync source after the oplog has been
+ * truncated.
+ */
+void truncateAndResetOplog(OperationContext* txn,
+ ReplicationCoordinator* replCoord,
+ BackgroundSync* bgsync) {
+ // Clear minvalid
+ setMinValid(txn, OpTime());
+
+ AutoGetDb autoDb(txn, "local", MODE_X);
+ massert(28585, "no local database found", autoDb.getDb());
+ invariant(txn->lockState()->isCollectionLockedForMode(rsoplog, MODE_X));
+ // Note: the following order is important.
+ // The bgsync thread uses an empty optime as a sentinel to know to wait
+ // for initial sync; thus, we must
+ // ensure the lastAppliedOptime is empty before restarting the bgsync thread
+ // via stop().
+ // We must clear the sync source blacklist after calling stop()
+ // because the bgsync thread, while running, may update the blacklist.
+ replCoord->resetMyLastOptime();
+ bgsync->stop();
+ bgsync->setLastAppliedHash(0);
+ bgsync->clearBuffer();
+
+ replCoord->clearSyncSourceBlacklist();
+
+ // Truncate the oplog in case there was a prior initial sync that failed.
+ Collection* collection = autoDb.getDb()->getCollection(rsoplog);
+ fassert(28565, collection);
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ WriteUnitOfWork wunit(txn);
+ Status status = collection->truncate(txn);
+ fassert(28564, status);
+ wunit.commit();
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "truncate", collection->ns().ns());
+}
- /**
- * Confirms that the "admin" database contains a supported version of the auth
- * data schema. Terminates the process if the "admin" contains clearly incompatible
- * auth data.
- */
- void checkAdminDatabasePostClone(OperationContext* txn, Database* adminDb) {
- // Assumes txn holds MODE_X or MODE_S lock on "admin" database.
- if (!adminDb) {
+/**
+ * Confirms that the "admin" database contains a supported version of the auth
+ * data schema. Terminates the process if the "admin" contains clearly incompatible
+ * auth data.
+ */
+void checkAdminDatabasePostClone(OperationContext* txn, Database* adminDb) {
+ // Assumes txn holds MODE_X or MODE_S lock on "admin" database.
+ if (!adminDb) {
+ return;
+ }
+ Collection* const usersCollection =
+ adminDb->getCollection(AuthorizationManager::usersCollectionNamespace);
+ const bool hasUsers =
+ usersCollection && !Helpers::findOne(txn, usersCollection, BSONObj(), false).isNull();
+ Collection* const adminVersionCollection =
+ adminDb->getCollection(AuthorizationManager::versionCollectionNamespace);
+ BSONObj authSchemaVersionDocument;
+ if (!adminVersionCollection ||
+ !Helpers::findOne(txn,
+ adminVersionCollection,
+ AuthorizationManager::versionDocumentQuery,
+ authSchemaVersionDocument)) {
+ if (!hasUsers) {
+ // It's OK to have no auth version document if there are no user documents.
return;
}
- Collection* const usersCollection =
- adminDb->getCollection(AuthorizationManager::usersCollectionNamespace);
- const bool hasUsers = usersCollection &&
- !Helpers::findOne(txn, usersCollection, BSONObj(), false).isNull();
- Collection* const adminVersionCollection =
- adminDb->getCollection(AuthorizationManager::versionCollectionNamespace);
- BSONObj authSchemaVersionDocument;
- if (!adminVersionCollection || !Helpers::findOne(txn,
- adminVersionCollection,
- AuthorizationManager::versionDocumentQuery,
- authSchemaVersionDocument)) {
- if (!hasUsers) {
- // It's OK to have no auth version document if there are no user documents.
- return;
- }
- severe() << "During initial sync, found documents in " <<
- AuthorizationManager::usersCollectionNamespace <<
- " but could not find an auth schema version document in " <<
- AuthorizationManager::versionCollectionNamespace;
- severe() << "This indicates that the primary of this replica set was not successfully "
- "upgraded to schema version " << AuthorizationManager::schemaVersion26Final <<
- ", which is the minimum supported schema version in this version of MongoDB";
- fassertFailedNoTrace(28620);
- }
- long long foundSchemaVersion;
- Status status = bsonExtractIntegerField(authSchemaVersionDocument,
- AuthorizationManager::schemaVersionFieldName,
- &foundSchemaVersion);
- if (!status.isOK()) {
- severe() << "During initial sync, found malformed auth schema version document: " <<
- status << "; document: " << authSchemaVersionDocument;
- fassertFailedNoTrace(28618);
- }
- if ((foundSchemaVersion != AuthorizationManager::schemaVersion26Final) &&
- (foundSchemaVersion != AuthorizationManager::schemaVersion28SCRAM)) {
- severe() << "During initial sync, found auth schema version " << foundSchemaVersion <<
- ", but this version of MongoDB only supports schema versions " <<
- AuthorizationManager::schemaVersion26Final << " and " <<
- AuthorizationManager::schemaVersion28SCRAM;
- fassertFailedNoTrace(28619);
- }
+ severe() << "During initial sync, found documents in "
+ << AuthorizationManager::usersCollectionNamespace
+ << " but could not find an auth schema version document in "
+ << AuthorizationManager::versionCollectionNamespace;
+ severe() << "This indicates that the primary of this replica set was not successfully "
+ "upgraded to schema version " << AuthorizationManager::schemaVersion26Final
+ << ", which is the minimum supported schema version in this version of MongoDB";
+ fassertFailedNoTrace(28620);
}
-
- bool _initialSyncClone(OperationContext* txn,
- Cloner& cloner,
- const std::string& host,
- const list<string>& dbs,
- bool dataPass) {
-
- for( list<string>::const_iterator i = dbs.begin(); i != dbs.end(); i++ ) {
- const string db = *i;
- if ( db == "local" )
- continue;
-
- if ( dataPass )
- log() << "initial sync cloning db: " << db;
- else
- log() << "initial sync cloning indexes for : " << db;
-
- string err;
- int errCode;
- CloneOptions options;
- options.fromDB = db;
- options.logForRepl = false;
- options.slaveOk = true;
- options.useReplAuth = true;
- options.snapshot = false;
- options.mayYield = true;
- options.mayBeInterrupted = false;
- options.syncData = dataPass;
- options.syncIndexes = ! dataPass;
-
- // Make database stable
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock dbWrite(txn->lockState(), db, MODE_X);
-
- if (!cloner.go(txn, db, host, options, NULL, err, &errCode)) {
- log() << "initial sync: error while "
- << (dataPass ? "cloning " : "indexing ") << db
- << ". " << (err.empty() ? "" : err + ". ");
- return false;
- }
-
- if (db == "admin") {
- checkAdminDatabasePostClone(txn, dbHolder().get(txn, db));
- }
- }
-
- return true;
+ long long foundSchemaVersion;
+ Status status = bsonExtractIntegerField(authSchemaVersionDocument,
+ AuthorizationManager::schemaVersionFieldName,
+ &foundSchemaVersion);
+ if (!status.isOK()) {
+ severe() << "During initial sync, found malformed auth schema version document: " << status
+ << "; document: " << authSchemaVersionDocument;
+ fassertFailedNoTrace(28618);
}
-
- /**
- * Replays the sync target's oplog from lastOp to the latest op on the sync target.
- *
- * @param syncer either initial sync (can reclone missing docs) or "normal" sync (no recloning)
- * @param r the oplog reader
- * @return if applying the oplog succeeded
- */
- bool _initialSyncApplyOplog( OperationContext* ctx,
- repl::SyncTail& syncer,
- OplogReader* r) {
- const OpTime startOpTime = getGlobalReplicationCoordinator()->getMyLastOptime();
- BSONObj lastOp;
-
- // If the fail point is set, exit failing.
- if (MONGO_FAIL_POINT(failInitSyncWithBufferedEntriesLeft)) {
- log() << "adding fake oplog entry to buffer.";
- BackgroundSync::get()->pushTestOpToBuffer(
- BSON("ts" << startOpTime << "v" << 1 << "op" << "n"));
+ if ((foundSchemaVersion != AuthorizationManager::schemaVersion26Final) &&
+ (foundSchemaVersion != AuthorizationManager::schemaVersion28SCRAM)) {
+ severe() << "During initial sync, found auth schema version " << foundSchemaVersion
+ << ", but this version of MongoDB only supports schema versions "
+ << AuthorizationManager::schemaVersion26Final << " and "
+ << AuthorizationManager::schemaVersion28SCRAM;
+ fassertFailedNoTrace(28619);
+ }
+}
+
+bool _initialSyncClone(OperationContext* txn,
+ Cloner& cloner,
+ const std::string& host,
+ const list<string>& dbs,
+ bool dataPass) {
+ for (list<string>::const_iterator i = dbs.begin(); i != dbs.end(); i++) {
+ const string db = *i;
+ if (db == "local")
+ continue;
+
+ if (dataPass)
+ log() << "initial sync cloning db: " << db;
+ else
+ log() << "initial sync cloning indexes for : " << db;
+
+ string err;
+ int errCode;
+ CloneOptions options;
+ options.fromDB = db;
+ options.logForRepl = false;
+ options.slaveOk = true;
+ options.useReplAuth = true;
+ options.snapshot = false;
+ options.mayYield = true;
+ options.mayBeInterrupted = false;
+ options.syncData = dataPass;
+ options.syncIndexes = !dataPass;
+
+ // Make database stable
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock dbWrite(txn->lockState(), db, MODE_X);
+
+ if (!cloner.go(txn, db, host, options, NULL, err, &errCode)) {
+ log() << "initial sync: error while " << (dataPass ? "cloning " : "indexing ") << db
+ << ". " << (err.empty() ? "" : err + ". ");
return false;
}
- try {
- // It may have been a long time since we last used this connection to
- // query the oplog, depending on the size of the databases we needed to clone.
- // A common problem is that TCP keepalives are set too infrequent, and thus
- // our connection here is terminated by a firewall due to inactivity.
- // Solution is to increase the TCP keepalive frequency.
- lastOp = r->getLastOp(rsoplog);
- } catch ( SocketException & ) {
- HostAndPort host = r->getHost();
- log() << "connection lost to " << host.toString() <<
- "; is your tcp keepalive interval set appropriately?";
- if ( !r->connect(host) ) {
- error() << "initial sync couldn't connect to " << host.toString();
- throw;
- }
- // retry
- lastOp = r->getLastOp(rsoplog);
+ if (db == "admin") {
+ checkAdminDatabasePostClone(txn, dbHolder().get(txn, db));
}
+ }
- if (lastOp.isEmpty()) {
- error() << "initial sync lastOp is empty";
- sleepsecs(1);
- return false;
+ return true;
+}
+
+/**
+ * Replays the sync target's oplog from lastOp to the latest op on the sync target.
+ *
+ * @param syncer either initial sync (can reclone missing docs) or "normal" sync (no recloning)
+ * @param r the oplog reader
+ * @return if applying the oplog succeeded
+ */
+bool _initialSyncApplyOplog(OperationContext* ctx, repl::SyncTail& syncer, OplogReader* r) {
+ const OpTime startOpTime = getGlobalReplicationCoordinator()->getMyLastOptime();
+ BSONObj lastOp;
+
+ // If the fail point is set, exit failing.
+ if (MONGO_FAIL_POINT(failInitSyncWithBufferedEntriesLeft)) {
+ log() << "adding fake oplog entry to buffer.";
+ BackgroundSync::get()->pushTestOpToBuffer(BSON("ts" << startOpTime << "v" << 1 << "op"
+ << "n"));
+ return false;
+ }
+
+ try {
+ // It may have been a long time since we last used this connection to
+ // query the oplog, depending on the size of the databases we needed to clone.
+ // A common problem is that TCP keepalives are set too infrequent, and thus
+ // our connection here is terminated by a firewall due to inactivity.
+ // Solution is to increase the TCP keepalive frequency.
+ lastOp = r->getLastOp(rsoplog);
+ } catch (SocketException&) {
+ HostAndPort host = r->getHost();
+ log() << "connection lost to " << host.toString()
+ << "; is your tcp keepalive interval set appropriately?";
+ if (!r->connect(host)) {
+ error() << "initial sync couldn't connect to " << host.toString();
+ throw;
}
+ // retry
+ lastOp = r->getLastOp(rsoplog);
+ }
- OpTime stopOpTime = lastOp["ts"]._opTime();
+ if (lastOp.isEmpty()) {
+ error() << "initial sync lastOp is empty";
+ sleepsecs(1);
+ return false;
+ }
- // If we already have what we need then return.
- if (stopOpTime == startOpTime)
- return true;
+ OpTime stopOpTime = lastOp["ts"]._opTime();
- verify( !stopOpTime.isNull() );
- verify( stopOpTime > startOpTime );
+ // If we already have what we need then return.
+ if (stopOpTime == startOpTime)
+ return true;
- // apply till stopOpTime
- try {
- LOG(2) << "Applying oplog entries from " << startOpTime.toStringPretty()
- << " until " << stopOpTime.toStringPretty();
- syncer.oplogApplication(ctx, stopOpTime);
+ verify(!stopOpTime.isNull());
+ verify(stopOpTime > startOpTime);
- if (inShutdown()) {
- return false;
- }
- }
- catch (const DBException&) {
- getGlobalReplicationCoordinator()->resetMyLastOptime();
- BackgroundSync::get()->setLastAppliedHash(0);
- warning() << "initial sync failed during oplog application phase, and will retry";
+ // apply till stopOpTime
+ try {
+ LOG(2) << "Applying oplog entries from " << startOpTime.toStringPretty() << " until "
+ << stopOpTime.toStringPretty();
+ syncer.oplogApplication(ctx, stopOpTime);
- sleepsecs(5);
+ if (inShutdown()) {
return false;
}
-
- return true;
+ } catch (const DBException&) {
+ getGlobalReplicationCoordinator()->resetMyLastOptime();
+ BackgroundSync::get()->setLastAppliedHash(0);
+ warning() << "initial sync failed during oplog application phase, and will retry";
+
+ sleepsecs(5);
+ return false;
}
- void _tryToApplyOpWithRetry(OperationContext* txn, InitialSync* init, const BSONObj& op) {
- try {
- if (!init->syncApply(txn, op)) {
- bool retry;
- {
- ScopedTransaction transaction(txn, MODE_X);
- Lock::GlobalWrite lk(txn->lockState());
- retry = init->shouldRetry(txn, op);
- }
+ return true;
+}
+
+void _tryToApplyOpWithRetry(OperationContext* txn, InitialSync* init, const BSONObj& op) {
+ try {
+ if (!init->syncApply(txn, op)) {
+ bool retry;
+ {
+ ScopedTransaction transaction(txn, MODE_X);
+ Lock::GlobalWrite lk(txn->lockState());
+ retry = init->shouldRetry(txn, op);
+ }
- if (retry) {
- // retry
- if (!init->syncApply(txn, op)) {
- uasserted(28542,
- str::stream() << "During initial sync, failed to apply op: "
- << op);
- }
+ if (retry) {
+ // retry
+ if (!init->syncApply(txn, op)) {
+ uasserted(28542,
+ str::stream() << "During initial sync, failed to apply op: " << op);
}
- // If shouldRetry() returns false, fall through.
- // This can happen if the document that was moved and missed by Cloner
- // subsequently got deleted and no longer exists on the Sync Target at all
}
+ // If shouldRetry() returns false, fall through.
+ // This can happen if the document that was moved and missed by Cloner
+ // subsequently got deleted and no longer exists on the Sync Target at all
}
- catch (const DBException& e) {
- error() << "exception: " << causedBy(e) << " on: " << op.toString();
- uasserted(28541,
- str::stream() << "During initial sync, failed to apply op: "
- << op);
- }
+ } catch (const DBException& e) {
+ error() << "exception: " << causedBy(e) << " on: " << op.toString();
+ uasserted(28541, str::stream() << "During initial sync, failed to apply op: " << op);
}
+}
- /**
- * Do the initial sync for this member. There are several steps to this process:
- *
- * 0. Add _initialSyncFlag to minValid collection to tell us to restart initial sync if we
- * crash in the middle of this procedure
- * 1. Record start time.
- * 2. Clone.
- * 3. Set minValid1 to sync target's latest op time.
- * 4. Apply ops from start to minValid1, fetching missing docs as needed.
- * 5. Set minValid2 to sync target's latest op time.
- * 6. Apply ops from minValid1 to minValid2.
- * 7. Build indexes.
- * 8. Set minValid3 to sync target's latest op time.
- * 9. Apply ops from minValid2 to minValid3.
- 10. Cleanup minValid collection: remove _initialSyncFlag field, set ts to minValid3 OpTime
- *
- * At that point, initial sync is finished. Note that the oplog from the sync target is applied
- * three times: step 4, 6, and 8. 4 may involve refetching, 6 should not. By the end of 6,
- * this member should have consistent data. 8 is "cosmetic," it is only to get this member
- * closer to the latest op time before it can transition out of startup state
- *
- * Returns a Status with ErrorCode::ShutdownInProgress if the node enters shutdown,
- * ErrorCode::InitialSyncOplogSourceMissing if the node fails to find an sync source, Status::OK
- * if everything worked, and ErrorCode::InitialSyncFailure for all other error cases.
- */
- Status _initialSync() {
-
- log() << "initial sync pending";
-
- BackgroundSync* bgsync(BackgroundSync::get());
- OperationContextImpl txn;
- ReplicationCoordinator* replCoord(getGlobalReplicationCoordinator());
-
- // reset state for initial sync
- truncateAndResetOplog(&txn, replCoord, bgsync);
-
- OplogReader r;
- OpTime nullOpTime(0, 0);
-
- while (r.getHost().empty()) {
- // We must prime the sync source selector so that it considers all candidates regardless
- // of oplog position, by passing in "nullOpTime" as the last op fetched time.
- r.connectToSyncSource(&txn, nullOpTime, replCoord);
- if (r.getHost().empty()) {
- std::string msg =
- "no valid sync sources found in current replset to do an initial sync";
- log() << msg;
- return Status(ErrorCodes::InitialSyncOplogSourceMissing, msg);
- }
+/**
+ * Do the initial sync for this member. There are several steps to this process:
+ *
+ * 0. Add _initialSyncFlag to minValid collection to tell us to restart initial sync if we
+ * crash in the middle of this procedure
+ * 1. Record start time.
+ * 2. Clone.
+ * 3. Set minValid1 to sync target's latest op time.
+ * 4. Apply ops from start to minValid1, fetching missing docs as needed.
+ * 5. Set minValid2 to sync target's latest op time.
+ * 6. Apply ops from minValid1 to minValid2.
+ * 7. Build indexes.
+ * 8. Set minValid3 to sync target's latest op time.
+ * 9. Apply ops from minValid2 to minValid3.
+ 10. Cleanup minValid collection: remove _initialSyncFlag field, set ts to minValid3 OpTime
+ *
+ * At that point, initial sync is finished. Note that the oplog from the sync target is applied
+ * three times: step 4, 6, and 8. 4 may involve refetching, 6 should not. By the end of 6,
+ * this member should have consistent data. 8 is "cosmetic," it is only to get this member
+ * closer to the latest op time before it can transition out of startup state
+ *
+ * Returns a Status with ErrorCode::ShutdownInProgress if the node enters shutdown,
+ * ErrorCode::InitialSyncOplogSourceMissing if the node fails to find an sync source, Status::OK
+ * if everything worked, and ErrorCode::InitialSyncFailure for all other error cases.
+ */
+Status _initialSync() {
+ log() << "initial sync pending";
+
+ BackgroundSync* bgsync(BackgroundSync::get());
+ OperationContextImpl txn;
+ ReplicationCoordinator* replCoord(getGlobalReplicationCoordinator());
+
+ // reset state for initial sync
+ truncateAndResetOplog(&txn, replCoord, bgsync);
+
+ OplogReader r;
+ OpTime nullOpTime(0, 0);
+
+ while (r.getHost().empty()) {
+ // We must prime the sync source selector so that it considers all candidates regardless
+ // of oplog position, by passing in "nullOpTime" as the last op fetched time.
+ r.connectToSyncSource(&txn, nullOpTime, replCoord);
+ if (r.getHost().empty()) {
+ std::string msg =
+ "no valid sync sources found in current replset to do an initial sync";
+ log() << msg;
+ return Status(ErrorCodes::InitialSyncOplogSourceMissing, msg);
+ }
- if (inShutdown()) {
- return Status(ErrorCodes::ShutdownInProgress, "shutting down");
- }
+ if (inShutdown()) {
+ return Status(ErrorCodes::ShutdownInProgress, "shutting down");
}
+ }
- InitialSync init(bgsync);
- init.setHostname(r.getHost().toString());
+ InitialSync init(bgsync);
+ init.setHostname(r.getHost().toString());
- BSONObj lastOp = r.getLastOp(rsoplog);
- if ( lastOp.isEmpty() ) {
- std::string msg = "initial sync couldn't read remote oplog";
- log() << msg;
- sleepsecs(15);
- return Status(ErrorCodes::InitialSyncFailure, msg);
- }
+ BSONObj lastOp = r.getLastOp(rsoplog);
+ if (lastOp.isEmpty()) {
+ std::string msg = "initial sync couldn't read remote oplog";
+ log() << msg;
+ sleepsecs(15);
+ return Status(ErrorCodes::InitialSyncFailure, msg);
+ }
- if (getGlobalReplicationCoordinator()->getSettings().fastsync) {
- log() << "fastsync: skipping database clone";
+ if (getGlobalReplicationCoordinator()->getSettings().fastsync) {
+ log() << "fastsync: skipping database clone";
- // prime oplog
- try {
- _tryToApplyOpWithRetry(&txn, &init, lastOp);
- std::deque<BSONObj> ops;
- ops.push_back(lastOp);
- writeOpsToOplog(&txn, ops);
- return Status::OK();
- } catch (DBException& e) {
- // Return if in shutdown
- if (inShutdown()) {
- return Status(ErrorCodes::ShutdownInProgress, "shutdown in progress");
- }
- throw;
+ // prime oplog
+ try {
+ _tryToApplyOpWithRetry(&txn, &init, lastOp);
+ std::deque<BSONObj> ops;
+ ops.push_back(lastOp);
+ writeOpsToOplog(&txn, ops);
+ return Status::OK();
+ } catch (DBException& e) {
+ // Return if in shutdown
+ if (inShutdown()) {
+ return Status(ErrorCodes::ShutdownInProgress, "shutdown in progress");
}
+ throw;
}
+ }
- // Add field to minvalid document to tell us to restart initial sync if we crash
- setInitialSyncFlag(&txn);
+ // Add field to minvalid document to tell us to restart initial sync if we crash
+ setInitialSyncFlag(&txn);
- log() << "initial sync drop all databases";
- dropAllDatabasesExceptLocal(&txn);
+ log() << "initial sync drop all databases";
+ dropAllDatabasesExceptLocal(&txn);
- log() << "initial sync clone all databases";
+ log() << "initial sync clone all databases";
- list<string> dbs = r.conn()->getDatabaseNames();
- {
- // Clone admin database first, to catch schema errors.
- list<string>::iterator admin = std::find(dbs.begin(), dbs.end(), "admin");
- if (admin != dbs.end()) {
- dbs.splice(dbs.begin(), dbs, admin);
- }
+ list<string> dbs = r.conn()->getDatabaseNames();
+ {
+ // Clone admin database first, to catch schema errors.
+ list<string>::iterator admin = std::find(dbs.begin(), dbs.end(), "admin");
+ if (admin != dbs.end()) {
+ dbs.splice(dbs.begin(), dbs, admin);
}
+ }
- Cloner cloner;
- if (!_initialSyncClone(&txn, cloner, r.conn()->getServerAddress(), dbs, true)) {
- return Status(ErrorCodes::InitialSyncFailure, "initial sync failed data cloning");
- }
+ Cloner cloner;
+ if (!_initialSyncClone(&txn, cloner, r.conn()->getServerAddress(), dbs, true)) {
+ return Status(ErrorCodes::InitialSyncFailure, "initial sync failed data cloning");
+ }
- log() << "initial sync data copy, starting syncup";
+ log() << "initial sync data copy, starting syncup";
- // prime oplog
- _tryToApplyOpWithRetry(&txn, &init, lastOp);
- std::deque<BSONObj> ops;
- ops.push_back(lastOp);
- writeOpsToOplog(&txn, ops);
+ // prime oplog
+ _tryToApplyOpWithRetry(&txn, &init, lastOp);
+ std::deque<BSONObj> ops;
+ ops.push_back(lastOp);
+ writeOpsToOplog(&txn, ops);
- std::string msg = "oplog sync 1 of 3";
- log() << msg;
- if (!_initialSyncApplyOplog(&txn, init, &r)) {
- return Status(ErrorCodes::InitialSyncFailure,
- str::stream() << "initial sync failed: " << msg);
- }
+ std::string msg = "oplog sync 1 of 3";
+ log() << msg;
+ if (!_initialSyncApplyOplog(&txn, init, &r)) {
+ return Status(ErrorCodes::InitialSyncFailure,
+ str::stream() << "initial sync failed: " << msg);
+ }
- // Now we sync to the latest op on the sync target _again_, as we may have recloned ops
- // that were "from the future" compared with minValid. During this second application,
- // nothing should need to be recloned.
- msg = "oplog sync 2 of 3";
- log() << msg;
- if (!_initialSyncApplyOplog(&txn, init, &r)) {
- return Status(ErrorCodes::InitialSyncFailure,
- str::stream() << "initial sync failed: " << msg);
- }
- // data should now be consistent
+ // Now we sync to the latest op on the sync target _again_, as we may have recloned ops
+ // that were "from the future" compared with minValid. During this second application,
+ // nothing should need to be recloned.
+ msg = "oplog sync 2 of 3";
+ log() << msg;
+ if (!_initialSyncApplyOplog(&txn, init, &r)) {
+ return Status(ErrorCodes::InitialSyncFailure,
+ str::stream() << "initial sync failed: " << msg);
+ }
+ // data should now be consistent
- msg = "initial sync building indexes";
- log() << msg;
- if (!_initialSyncClone(&txn, cloner, r.conn()->getServerAddress(), dbs, false)) {
- return Status(ErrorCodes::InitialSyncFailure,
- str::stream() << "initial sync failed: " << msg);
- }
+ msg = "initial sync building indexes";
+ log() << msg;
+ if (!_initialSyncClone(&txn, cloner, r.conn()->getServerAddress(), dbs, false)) {
+ return Status(ErrorCodes::InitialSyncFailure,
+ str::stream() << "initial sync failed: " << msg);
+ }
- msg = "oplog sync 3 of 3";
- log() << msg;
+ msg = "oplog sync 3 of 3";
+ log() << msg;
- SyncTail tail(bgsync, multiSyncApply);
- if (!_initialSyncApplyOplog(&txn, tail, &r)) {
- return Status(ErrorCodes::InitialSyncFailure,
- str::stream() << "initial sync failed: " << msg);
- }
-
- // ---------
+ SyncTail tail(bgsync, multiSyncApply);
+ if (!_initialSyncApplyOplog(&txn, tail, &r)) {
+ return Status(ErrorCodes::InitialSyncFailure,
+ str::stream() << "initial sync failed: " << msg);
+ }
- Status status = getGlobalAuthorizationManager()->initialize(&txn);
- if (!status.isOK()) {
- warning() << "Failed to reinitialize auth data after initial sync. " << status;
- return status;
- }
+ // ---------
- log() << "initial sync finishing up";
+ Status status = getGlobalAuthorizationManager()->initialize(&txn);
+ if (!status.isOK()) {
+ warning() << "Failed to reinitialize auth data after initial sync. " << status;
+ return status;
+ }
- {
- ScopedTransaction scopedXact(&txn, MODE_IX);
- AutoGetDb autodb(&txn, "local", MODE_X);
- OpTime lastOpTimeWritten(getGlobalReplicationCoordinator()->getMyLastOptime());
- log() << "replSet set minValid=" << lastOpTimeWritten;
+ log() << "initial sync finishing up";
- // Initial sync is now complete. Flag this by setting minValid to the last thing
- // we synced.
- setMinValid(&txn, lastOpTimeWritten);
+ {
+ ScopedTransaction scopedXact(&txn, MODE_IX);
+ AutoGetDb autodb(&txn, "local", MODE_X);
+ OpTime lastOpTimeWritten(getGlobalReplicationCoordinator()->getMyLastOptime());
+ log() << "replSet set minValid=" << lastOpTimeWritten;
- // Clear the initial sync flag.
- clearInitialSyncFlag(&txn);
- BackgroundSync::get()->setInitialSyncRequestedFlag(false);
- }
+ // Initial sync is now complete. Flag this by setting minValid to the last thing
+ // we synced.
+ setMinValid(&txn, lastOpTimeWritten);
- // If we just cloned & there were no ops applied, we still want the primary to know where
- // we're up to
- bgsync->notify(&txn);
-
- log() << "initial sync done";
- std::vector<BSONObj> handshakeObjs;
- replCoord->prepareReplSetUpdatePositionCommandHandshakes(&handshakeObjs);
- for (std::vector<BSONObj>::iterator it = handshakeObjs.begin();
- it != handshakeObjs.end();
- ++it) {
- BSONObj res;
- try {
- if (!r.conn()->runCommand("admin", *it, res)) {
- warning() << "InitialSync error reporting sync progress during handshake";
- return Status::OK();
- }
- }
- catch (const DBException& e) {
- warning() << "InitialSync error reporting sync progress during handshake: "
- << e.what();
- return Status::OK();
- }
- }
+ // Clear the initial sync flag.
+ clearInitialSyncFlag(&txn);
+ BackgroundSync::get()->setInitialSyncRequestedFlag(false);
+ }
+
+ // If we just cloned & there were no ops applied, we still want the primary to know where
+ // we're up to
+ bgsync->notify(&txn);
- BSONObjBuilder updateCmd;
+ log() << "initial sync done";
+ std::vector<BSONObj> handshakeObjs;
+ replCoord->prepareReplSetUpdatePositionCommandHandshakes(&handshakeObjs);
+ for (std::vector<BSONObj>::iterator it = handshakeObjs.begin(); it != handshakeObjs.end();
+ ++it) {
BSONObj res;
- if (!replCoord->prepareReplSetUpdatePositionCommand(&updateCmd)) {
- warning() << "InitialSync couldn't generate updatePosition command";
- return Status::OK();
- }
try {
- if (!r.conn()->runCommand("admin", updateCmd.obj(), res)) {
- warning() << "InitialSync error reporting sync progress during updatePosition";
+ if (!r.conn()->runCommand("admin", *it, res)) {
+ warning() << "InitialSync error reporting sync progress during handshake";
return Status::OK();
}
- }
- catch (const DBException& e) {
- warning() << "InitialSync error reporting sync progress during updatePosition: "
- << e.what();
+ } catch (const DBException& e) {
+ warning() << "InitialSync error reporting sync progress during handshake: " << e.what();
return Status::OK();
}
+ }
+ BSONObjBuilder updateCmd;
+ BSONObj res;
+ if (!replCoord->prepareReplSetUpdatePositionCommand(&updateCmd)) {
+ warning() << "InitialSync couldn't generate updatePosition command";
+ return Status::OK();
+ }
+ try {
+ if (!r.conn()->runCommand("admin", updateCmd.obj(), res)) {
+ warning() << "InitialSync error reporting sync progress during updatePosition";
+ return Status::OK();
+ }
+ } catch (const DBException& e) {
+ warning() << "InitialSync error reporting sync progress during updatePosition: "
+ << e.what();
return Status::OK();
}
-} // namespace
- void syncDoInitialSync() {
- static const int maxFailedAttempts = 10;
+ return Status::OK();
+}
+} // namespace
- {
- OperationContextImpl txn;
- createOplog(&txn);
- }
+void syncDoInitialSync() {
+ static const int maxFailedAttempts = 10;
- int failedAttempts = 0;
- while ( failedAttempts < maxFailedAttempts ) {
- try {
- // leave loop when successful
- Status status = _initialSync();
- if (status.isOK()) {
- break;
- }
- if (status == ErrorCodes::InitialSyncOplogSourceMissing) {
- sleepsecs(1);
- return;
- }
+ {
+ OperationContextImpl txn;
+ createOplog(&txn);
+ }
+
+ int failedAttempts = 0;
+ while (failedAttempts < maxFailedAttempts) {
+ try {
+ // leave loop when successful
+ Status status = _initialSync();
+ if (status.isOK()) {
+ break;
}
- catch(const DBException& e) {
- error() << e ;
- // Return if in shutdown
- if (inShutdown()) {
- return;
- }
+ if (status == ErrorCodes::InitialSyncOplogSourceMissing) {
+ sleepsecs(1);
+ return;
}
-
+ } catch (const DBException& e) {
+ error() << e;
+ // Return if in shutdown
if (inShutdown()) {
return;
}
-
- error() << "initial sync attempt failed, "
- << (maxFailedAttempts - ++failedAttempts) << " attempts remaining";
- sleepsecs(5);
}
- // No need to print a stack
- if (failedAttempts >= maxFailedAttempts) {
- severe() << "The maximum number of retries have been exhausted for initial sync.";
- fassertFailedNoTrace(16233);
+ if (inShutdown()) {
+ return;
}
+
+ error() << "initial sync attempt failed, " << (maxFailedAttempts - ++failedAttempts)
+ << " attempts remaining";
+ sleepsecs(5);
+ }
+
+ // No need to print a stack
+ if (failedAttempts >= maxFailedAttempts) {
+ severe() << "The maximum number of retries have been exhausted for initial sync.";
+ fassertFailedNoTrace(16233);
}
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rs_initialsync.h b/src/mongo/db/repl/rs_initialsync.h
index 659bb5ad577..7add22b9a37 100644
--- a/src/mongo/db/repl/rs_initialsync.h
+++ b/src/mongo/db/repl/rs_initialsync.h
@@ -30,10 +30,10 @@
namespace mongo {
namespace repl {
- /**
- * Begins an initial sync of a node. This drops all data, chooses a sync source,
- * and runs the cloner from that sync source. The node's state is not changed.
- */
- void syncDoInitialSync();
+/**
+ * Begins an initial sync of a node. This drops all data, chooses a sync source,
+ * and runs the cloner from that sync source. The node's state is not changed.
+ */
+void syncDoInitialSync();
}
}
diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp
index 93740b37827..e1260054e40 100644
--- a/src/mongo/db/repl/rs_rollback.cpp
+++ b/src/mongo/db/repl/rs_rollback.cpp
@@ -98,796 +98,753 @@
namespace mongo {
- using boost::shared_ptr;
- using std::auto_ptr;
- using std::endl;
- using std::list;
- using std::map;
- using std::set;
- using std::string;
- using std::pair;
+using boost::shared_ptr;
+using std::auto_ptr;
+using std::endl;
+using std::list;
+using std::map;
+using std::set;
+using std::string;
+using std::pair;
namespace repl {
namespace {
- class RSFatalException : public std::exception {
- public:
- RSFatalException(std::string m = "replica set fatal exception")
- : msg(m) {}
- virtual ~RSFatalException() throw() {};
- virtual const char* what() const throw() {
- return msg.c_str();
- }
- private:
- std::string msg;
- };
-
- struct DocID {
- // ns and _id both point into ownedObj's buffer
- BSONObj ownedObj;
- const char* ns;
- BSONElement _id;
- bool operator<(const DocID& other) const {
- int comp = strcmp(ns, other.ns);
- if (comp < 0)
- return true;
- if (comp > 0)
- return false;
- return _id < other._id;
- }
- };
+class RSFatalException : public std::exception {
+public:
+ RSFatalException(std::string m = "replica set fatal exception") : msg(m) {}
+ virtual ~RSFatalException() throw(){};
+ virtual const char* what() const throw() {
+ return msg.c_str();
+ }
- struct FixUpInfo {
- // note this is a set -- if there are many $inc's on a single document we need to rollback,
- // we only need to refetch it once.
- set<DocID> toRefetch;
+private:
+ std::string msg;
+};
+
+struct DocID {
+ // ns and _id both point into ownedObj's buffer
+ BSONObj ownedObj;
+ const char* ns;
+ BSONElement _id;
+ bool operator<(const DocID& other) const {
+ int comp = strcmp(ns, other.ns);
+ if (comp < 0)
+ return true;
+ if (comp > 0)
+ return false;
+ return _id < other._id;
+ }
+};
- // collections to drop
- set<string> toDrop;
+struct FixUpInfo {
+ // note this is a set -- if there are many $inc's on a single document we need to rollback,
+ // we only need to refetch it once.
+ set<DocID> toRefetch;
- set<string> collectionsToResync;
+ // collections to drop
+ set<string> toDrop;
- OpTime commonPoint;
- RecordId commonPointOurDiskloc;
+ set<string> collectionsToResync;
- int rbid; // remote server's current rollback sequence #
- };
+ OpTime commonPoint;
+ RecordId commonPointOurDiskloc;
+ int rbid; // remote server's current rollback sequence #
+};
- /** helper to get rollback id from another server. */
- int getRBID(DBClientConnection *c) {
- bo info;
- c->simpleCommand("admin", &info, "replSetGetRBID");
- return info["rbid"].numberInt();
- }
+/** helper to get rollback id from another server. */
+int getRBID(DBClientConnection* c) {
+ bo info;
+ c->simpleCommand("admin", &info, "replSetGetRBID");
+ return info["rbid"].numberInt();
+}
- void refetch(FixUpInfo& fixUpInfo, const BSONObj& ourObj) {
- const char* op = ourObj.getStringField("op");
- if (*op == 'n')
- return;
- if (ourObj.objsize() > 512 * 1024 * 1024)
- throw RSFatalException("rollback too large");
+void refetch(FixUpInfo& fixUpInfo, const BSONObj& ourObj) {
+ const char* op = ourObj.getStringField("op");
+ if (*op == 'n')
+ return;
+
+ if (ourObj.objsize() > 512 * 1024 * 1024)
+ throw RSFatalException("rollback too large");
- DocID doc;
- doc.ownedObj = ourObj.getOwned();
- doc.ns = doc.ownedObj.getStringField("ns");
- if (*doc.ns == '\0') {
- warning() << "replSet WARNING ignoring op on rollback no ns TODO : "
+ DocID doc;
+ doc.ownedObj = ourObj.getOwned();
+ doc.ns = doc.ownedObj.getStringField("ns");
+ if (*doc.ns == '\0') {
+ warning() << "replSet WARNING ignoring op on rollback no ns TODO : "
<< doc.ownedObj.toString();
- return;
- }
+ return;
+ }
- BSONObj obj = doc.ownedObj.getObjectField(*op=='u' ? "o2" : "o");
- if (obj.isEmpty()) {
- warning() << "replSet warning ignoring op on rollback : " << doc.ownedObj.toString();
- return;
- }
+ BSONObj obj = doc.ownedObj.getObjectField(*op == 'u' ? "o2" : "o");
+ if (obj.isEmpty()) {
+ warning() << "replSet warning ignoring op on rollback : " << doc.ownedObj.toString();
+ return;
+ }
- if (*op == 'c') {
- BSONElement first = obj.firstElement();
- NamespaceString nss(doc.ns); // foo.$cmd
- string cmdname = first.fieldName();
- Command* cmd = Command::findCommand(cmdname.c_str());
- if (cmd == NULL) {
- severe() << "replSet warning rollback no such command " << first.fieldName();
- fassertFailedNoTrace(18751);
- }
- if (cmdname == "create") {
- // Create collection operation
- // { ts: ..., h: ..., op: "c", ns: "foo.$cmd", o: { create: "abc", ... } }
- string ns = nss.db().toString() + '.' + obj["create"].String(); // -> foo.abc
- fixUpInfo.toDrop.insert(ns);
- return;
- }
- else if (cmdname == "drop") {
- string ns = nss.db().toString() + '.' + first.valuestr();
- fixUpInfo.collectionsToResync.insert(ns);
- return;
- }
- else if (cmdname == "dropIndexes" || cmdname == "deleteIndexes") {
- // TODO: this is bad. we simply full resync the collection here,
- // which could be very slow.
- warning() << "replSet info rollback of dropIndexes is slow in this version of "
- << "mongod";
- string ns = nss.db().toString() + '.' + first.valuestr();
- fixUpInfo.collectionsToResync.insert(ns);
- return;
- }
- else if (cmdname == "renameCollection") {
- // TODO: slow.
- warning() << "replSet info rollback of renameCollection is slow in this version of "
- << "mongod";
- string from = first.valuestr();
- string to = obj["to"].String();
- fixUpInfo.collectionsToResync.insert(from);
- fixUpInfo.collectionsToResync.insert(to);
- return;
- }
- else if (cmdname == "dropDatabase") {
- severe() << "replSet error rollback : can't rollback drop database full resync "
- << "will be required";
- log() << "replSet " << obj.toString();
- throw RSFatalException();
- }
- else if (cmdname == "collMod") {
- if (obj.nFields() == 2 && obj["usePowerOf2Sizes"].type() == Bool) {
- log() << "replSet not rolling back change of usePowerOf2Sizes: " << obj;
- }
- else {
- severe() << "replSet error cannot rollback a collMod command: " << obj;
- throw RSFatalException();
- }
- }
- else {
- severe() << "replSet error can't rollback this command yet: "
- << obj.toString();
- log() << "replSet cmdname=" << cmdname;
- throw RSFatalException();
- }
+ if (*op == 'c') {
+ BSONElement first = obj.firstElement();
+ NamespaceString nss(doc.ns); // foo.$cmd
+ string cmdname = first.fieldName();
+ Command* cmd = Command::findCommand(cmdname.c_str());
+ if (cmd == NULL) {
+ severe() << "replSet warning rollback no such command " << first.fieldName();
+ fassertFailedNoTrace(18751);
}
-
- doc._id = obj["_id"];
- if (doc._id.eoo()) {
- warning() << "replSet WARNING ignoring op on rollback no _id TODO : " << doc.ns << ' '
- << doc.ownedObj.toString();
+ if (cmdname == "create") {
+ // Create collection operation
+ // { ts: ..., h: ..., op: "c", ns: "foo.$cmd", o: { create: "abc", ... } }
+ string ns = nss.db().toString() + '.' + obj["create"].String(); // -> foo.abc
+ fixUpInfo.toDrop.insert(ns);
+ return;
+ } else if (cmdname == "drop") {
+ string ns = nss.db().toString() + '.' + first.valuestr();
+ fixUpInfo.collectionsToResync.insert(ns);
+ return;
+ } else if (cmdname == "dropIndexes" || cmdname == "deleteIndexes") {
+ // TODO: this is bad. we simply full resync the collection here,
+ // which could be very slow.
+ warning() << "replSet info rollback of dropIndexes is slow in this version of "
+ << "mongod";
+ string ns = nss.db().toString() + '.' + first.valuestr();
+ fixUpInfo.collectionsToResync.insert(ns);
return;
+ } else if (cmdname == "renameCollection") {
+ // TODO: slow.
+ warning() << "replSet info rollback of renameCollection is slow in this version of "
+ << "mongod";
+ string from = first.valuestr();
+ string to = obj["to"].String();
+ fixUpInfo.collectionsToResync.insert(from);
+ fixUpInfo.collectionsToResync.insert(to);
+ return;
+ } else if (cmdname == "dropDatabase") {
+ severe() << "replSet error rollback : can't rollback drop database full resync "
+ << "will be required";
+ log() << "replSet " << obj.toString();
+ throw RSFatalException();
+ } else if (cmdname == "collMod") {
+ if (obj.nFields() == 2 && obj["usePowerOf2Sizes"].type() == Bool) {
+ log() << "replSet not rolling back change of usePowerOf2Sizes: " << obj;
+ } else {
+ severe() << "replSet error cannot rollback a collMod command: " << obj;
+ throw RSFatalException();
+ }
+ } else {
+ severe() << "replSet error can't rollback this command yet: " << obj.toString();
+ log() << "replSet cmdname=" << cmdname;
+ throw RSFatalException();
}
+ }
- fixUpInfo.toRefetch.insert(doc);
+ doc._id = obj["_id"];
+ if (doc._id.eoo()) {
+ warning() << "replSet WARNING ignoring op on rollback no _id TODO : " << doc.ns << ' '
+ << doc.ownedObj.toString();
+ return;
}
- StatusWith<FixUpInfo> syncRollbackFindCommonPoint(OperationContext* txn,
- DBClientConnection* them) {
- Client::Context ctx(txn, rsoplog);
- FixUpInfo fixUpInfo;
+ fixUpInfo.toRefetch.insert(doc);
+}
- boost::scoped_ptr<PlanExecutor> exec(
- InternalPlanner::collectionScan(txn,
- rsoplog,
- ctx.db()->getCollection(rsoplog),
- InternalPlanner::BACKWARD));
+StatusWith<FixUpInfo> syncRollbackFindCommonPoint(OperationContext* txn, DBClientConnection* them) {
+ Client::Context ctx(txn, rsoplog);
+ FixUpInfo fixUpInfo;
- BSONObj ourObj;
- RecordId ourLoc;
+ boost::scoped_ptr<PlanExecutor> exec(InternalPlanner::collectionScan(
+ txn, rsoplog, ctx.db()->getCollection(rsoplog), InternalPlanner::BACKWARD));
- if (PlanExecutor::ADVANCED != exec->getNext(&ourObj, &ourLoc)) {
- return StatusWith<FixUpInfo>(ErrorCodes::OplogStartMissing, "no oplog during initsync");
- }
+ BSONObj ourObj;
+ RecordId ourLoc;
- const Query query = Query().sort(reverseNaturalObj);
- const BSONObj fields = BSON("ts" << 1 << "h" << 1);
+ if (PlanExecutor::ADVANCED != exec->getNext(&ourObj, &ourLoc)) {
+ return StatusWith<FixUpInfo>(ErrorCodes::OplogStartMissing, "no oplog during initsync");
+ }
- //auto_ptr<DBClientCursor> u = us->query(rsoplog, query, 0, 0, &fields, 0, 0);
+ const Query query = Query().sort(reverseNaturalObj);
+ const BSONObj fields = BSON("ts" << 1 << "h" << 1);
- fixUpInfo.rbid = getRBID(them);
- auto_ptr<DBClientCursor> oplogCursor = them->query(rsoplog, query, 0, 0, &fields, 0, 0);
+ // auto_ptr<DBClientCursor> u = us->query(rsoplog, query, 0, 0, &fields, 0, 0);
- if (oplogCursor.get() == NULL || !oplogCursor->more())
- throw RSFatalException("remote oplog empty or unreadable");
+ fixUpInfo.rbid = getRBID(them);
+ auto_ptr<DBClientCursor> oplogCursor = them->query(rsoplog, query, 0, 0, &fields, 0, 0);
- OpTime ourTime = ourObj["ts"]._opTime();
- BSONObj theirObj = oplogCursor->nextSafe();
- OpTime theirTime = theirObj["ts"]._opTime();
+ if (oplogCursor.get() == NULL || !oplogCursor->more())
+ throw RSFatalException("remote oplog empty or unreadable");
- long long diff = static_cast<long long>(ourTime.getSecs())
- - static_cast<long long>(theirTime.getSecs());
- // diff could be positive, negative, or zero
- log() << "replSet info rollback our last optime: " << ourTime.toStringPretty();
- log() << "replSet info rollback their last optime: " << theirTime.toStringPretty();
- log() << "replSet info rollback diff in end of log times: " << diff << " seconds";
- if (diff > 1800) {
- log() << "replSet rollback too long a time period for a rollback.";
- throw RSFatalException("rollback error: not willing to roll back "
- "more than 30 minutes of data");
- }
+ OpTime ourTime = ourObj["ts"]._opTime();
+ BSONObj theirObj = oplogCursor->nextSafe();
+ OpTime theirTime = theirObj["ts"]._opTime();
- unsigned long long scanned = 0;
- while (1) {
- scanned++;
- // todo add code to assure no excessive scanning for too long
- if (ourTime == theirTime) {
- if (ourObj["h"].Long() == theirObj["h"].Long()) {
- // found the point back in time where we match.
- // todo : check a few more just to be careful about hash collisions.
- log() << "replSet rollback found matching events at "
- << ourTime.toStringPretty();
- log() << "replSet rollback findcommonpoint scanned : " << scanned;
- fixUpInfo.commonPoint = ourTime;
- fixUpInfo.commonPointOurDiskloc = ourLoc;
- break;
- }
+ long long diff =
+ static_cast<long long>(ourTime.getSecs()) - static_cast<long long>(theirTime.getSecs());
+ // diff could be positive, negative, or zero
+ log() << "replSet info rollback our last optime: " << ourTime.toStringPretty();
+ log() << "replSet info rollback their last optime: " << theirTime.toStringPretty();
+ log() << "replSet info rollback diff in end of log times: " << diff << " seconds";
+ if (diff > 1800) {
+ log() << "replSet rollback too long a time period for a rollback.";
+ throw RSFatalException(
+ "rollback error: not willing to roll back "
+ "more than 30 minutes of data");
+ }
- refetch(fixUpInfo, ourObj);
+ unsigned long long scanned = 0;
+ while (1) {
+ scanned++;
+ // todo add code to assure no excessive scanning for too long
+ if (ourTime == theirTime) {
+ if (ourObj["h"].Long() == theirObj["h"].Long()) {
+ // found the point back in time where we match.
+ // todo : check a few more just to be careful about hash collisions.
+ log() << "replSet rollback found matching events at " << ourTime.toStringPretty();
+ log() << "replSet rollback findcommonpoint scanned : " << scanned;
+ fixUpInfo.commonPoint = ourTime;
+ fixUpInfo.commonPointOurDiskloc = ourLoc;
+ break;
+ }
- if (!oplogCursor->more()) {
- log() << "replSet rollback error RS100 reached beginning of remote oplog";
- log() << "replSet them: " << them->toString() << " scanned: " << scanned;
- log() << "replSet theirTime: " << theirTime.toStringLong();
- log() << "replSet ourTime: " << ourTime.toStringLong();
- throw RSFatalException("RS100 reached beginning of remote oplog [2]");
- }
- theirObj = oplogCursor->nextSafe();
- theirTime = theirObj["ts"]._opTime();
-
- if (PlanExecutor::ADVANCED != exec->getNext(&ourObj, &ourLoc)) {
- log() << "replSet rollback error RS101 reached beginning of local oplog";
- log() << "replSet them: " << them->toString() << " scanned: " << scanned;
- log() << "replSet theirTime: " << theirTime.toStringLong();
- log() << "replSet ourTime: " << ourTime.toStringLong();
- throw RSFatalException("RS101 reached beginning of local oplog [1]");
- }
- ourTime = ourObj["ts"]._opTime();
+ refetch(fixUpInfo, ourObj);
+
+ if (!oplogCursor->more()) {
+ log() << "replSet rollback error RS100 reached beginning of remote oplog";
+ log() << "replSet them: " << them->toString() << " scanned: " << scanned;
+ log() << "replSet theirTime: " << theirTime.toStringLong();
+ log() << "replSet ourTime: " << ourTime.toStringLong();
+ throw RSFatalException("RS100 reached beginning of remote oplog [2]");
}
- else if (theirTime > ourTime) {
- if (!oplogCursor->more()) {
- log() << "replSet rollback error RS100 reached beginning of remote oplog";
- log() << "replSet them: " << them->toString() << " scanned: "
- << scanned;
- log() << "replSet theirTime: " << theirTime.toStringLong();
- log() << "replSet ourTime: " << ourTime.toStringLong();
- throw RSFatalException("RS100 reached beginning of remote oplog [1]");
- }
- theirObj = oplogCursor->nextSafe();
- theirTime = theirObj["ts"]._opTime();
+ theirObj = oplogCursor->nextSafe();
+ theirTime = theirObj["ts"]._opTime();
+
+ if (PlanExecutor::ADVANCED != exec->getNext(&ourObj, &ourLoc)) {
+ log() << "replSet rollback error RS101 reached beginning of local oplog";
+ log() << "replSet them: " << them->toString() << " scanned: " << scanned;
+ log() << "replSet theirTime: " << theirTime.toStringLong();
+ log() << "replSet ourTime: " << ourTime.toStringLong();
+ throw RSFatalException("RS101 reached beginning of local oplog [1]");
}
- else {
- // theirTime < ourTime
- refetch(fixUpInfo, ourObj);
- if (PlanExecutor::ADVANCED != exec->getNext(&ourObj, &ourLoc)) {
- log() << "replSet rollback error RS101 reached beginning of local oplog";
- log() << "replSet them: " << them->toString() << " scanned: " << scanned;
- log() << "replSet theirTime: " << theirTime.toStringLong();
- log() << "replSet ourTime: " << ourTime.toStringLong();
- throw RSFatalException("RS101 reached beginning of local oplog [2]");
- }
- ourTime = ourObj["ts"]._opTime();
+ ourTime = ourObj["ts"]._opTime();
+ } else if (theirTime > ourTime) {
+ if (!oplogCursor->more()) {
+ log() << "replSet rollback error RS100 reached beginning of remote oplog";
+ log() << "replSet them: " << them->toString() << " scanned: " << scanned;
+ log() << "replSet theirTime: " << theirTime.toStringLong();
+ log() << "replSet ourTime: " << ourTime.toStringLong();
+ throw RSFatalException("RS100 reached beginning of remote oplog [1]");
+ }
+ theirObj = oplogCursor->nextSafe();
+ theirTime = theirObj["ts"]._opTime();
+ } else {
+ // theirTime < ourTime
+ refetch(fixUpInfo, ourObj);
+ if (PlanExecutor::ADVANCED != exec->getNext(&ourObj, &ourLoc)) {
+ log() << "replSet rollback error RS101 reached beginning of local oplog";
+ log() << "replSet them: " << them->toString() << " scanned: " << scanned;
+ log() << "replSet theirTime: " << theirTime.toStringLong();
+ log() << "replSet ourTime: " << ourTime.toStringLong();
+ throw RSFatalException("RS101 reached beginning of local oplog [2]");
}
+ ourTime = ourObj["ts"]._opTime();
}
-
- return StatusWith<FixUpInfo>(fixUpInfo);
}
- bool copyCollectionFromRemote(OperationContext* txn,
- const string& host,
- const string& ns,
- string& errmsg) {
- Cloner cloner;
+ return StatusWith<FixUpInfo>(fixUpInfo);
+}
- DBClientConnection *tmpConn = new DBClientConnection();
- // cloner owns _conn in auto_ptr
- cloner.setConnection(tmpConn);
- uassert(15908, errmsg,
- tmpConn->connect(HostAndPort(host), errmsg) && replAuthenticate(tmpConn));
+bool copyCollectionFromRemote(OperationContext* txn,
+ const string& host,
+ const string& ns,
+ string& errmsg) {
+ Cloner cloner;
- return cloner.copyCollection(txn, ns, BSONObj(), errmsg, true, false, true, false);
- }
+ DBClientConnection* tmpConn = new DBClientConnection();
+ // cloner owns _conn in auto_ptr
+ cloner.setConnection(tmpConn);
+ uassert(
+ 15908, errmsg, tmpConn->connect(HostAndPort(host), errmsg) && replAuthenticate(tmpConn));
- void syncFixUp(OperationContext* txn,
- FixUpInfo& fixUpInfo,
- OplogReader* oplogreader,
- ReplicationCoordinator* replCoord) {
- DBClientConnection* them = oplogreader->conn();
+ return cloner.copyCollection(txn, ns, BSONObj(), errmsg, true, false, true, false);
+}
- // fetch all first so we needn't handle interruption in a fancy way
+void syncFixUp(OperationContext* txn,
+ FixUpInfo& fixUpInfo,
+ OplogReader* oplogreader,
+ ReplicationCoordinator* replCoord) {
+ DBClientConnection* them = oplogreader->conn();
- unsigned long long totalSize = 0;
+ // fetch all first so we needn't handle interruption in a fancy way
- list< pair<DocID, BSONObj> > goodVersions;
+ unsigned long long totalSize = 0;
- BSONObj newMinValid;
+ list<pair<DocID, BSONObj>> goodVersions;
- // fetch all the goodVersions of each document from current primary
- DocID doc;
- unsigned long long numFetched = 0;
- try {
- for (set<DocID>::iterator it = fixUpInfo.toRefetch.begin();
- it != fixUpInfo.toRefetch.end();
- it++) {
- doc = *it;
-
- verify(!doc._id.eoo());
-
- {
- // TODO : slow. lots of round trips.
- numFetched++;
- BSONObj good = them->findOne(doc.ns, doc._id.wrap(),
- NULL, QueryOption_SlaveOk).getOwned();
- totalSize += good.objsize();
- uassert(13410, "replSet too much data to roll back",
- totalSize < 300 * 1024 * 1024);
-
- // note good might be eoo, indicating we should delete it
- goodVersions.push_back(pair<DocID, BSONObj>(doc,good));
- }
- }
- newMinValid = oplogreader->getLastOp(rsoplog);
- if (newMinValid.isEmpty()) {
- error() << "rollback error newMinValid empty?";
- return;
+ BSONObj newMinValid;
+
+ // fetch all the goodVersions of each document from current primary
+ DocID doc;
+ unsigned long long numFetched = 0;
+ try {
+ for (set<DocID>::iterator it = fixUpInfo.toRefetch.begin(); it != fixUpInfo.toRefetch.end();
+ it++) {
+ doc = *it;
+
+ verify(!doc._id.eoo());
+
+ {
+ // TODO : slow. lots of round trips.
+ numFetched++;
+ BSONObj good =
+ them->findOne(doc.ns, doc._id.wrap(), NULL, QueryOption_SlaveOk).getOwned();
+ totalSize += good.objsize();
+ uassert(13410, "replSet too much data to roll back", totalSize < 300 * 1024 * 1024);
+
+ // note good might be eoo, indicating we should delete it
+ goodVersions.push_back(pair<DocID, BSONObj>(doc, good));
}
}
- catch (DBException& e) {
- LOG(1) << "rollback re-get objects: " << e.toString();
- error() << "rollback couldn't re-get ns:" << doc.ns << " _id:" << doc._id << ' '
- << numFetched << '/' << fixUpInfo.toRefetch.size();
- throw e;
- }
-
- log() << "rollback 3.5";
- if (fixUpInfo.rbid != getRBID(oplogreader->conn())) {
- // our source rolled back itself. so the data we received isn't necessarily consistent.
- warning() << "rollback rbid on source changed during rollback, cancelling this attempt";
+ newMinValid = oplogreader->getLastOp(rsoplog);
+ if (newMinValid.isEmpty()) {
+ error() << "rollback error newMinValid empty?";
return;
}
+ } catch (DBException& e) {
+ LOG(1) << "rollback re-get objects: " << e.toString();
+ error() << "rollback couldn't re-get ns:" << doc.ns << " _id:" << doc._id << ' '
+ << numFetched << '/' << fixUpInfo.toRefetch.size();
+ throw e;
+ }
- // update them
- log() << "rollback 4 n:" << goodVersions.size();
-
- bool warn = false;
+ log() << "rollback 3.5";
+ if (fixUpInfo.rbid != getRBID(oplogreader->conn())) {
+ // our source rolled back itself. so the data we received isn't necessarily consistent.
+ warning() << "rollback rbid on source changed during rollback, cancelling this attempt";
+ return;
+ }
- invariant(!fixUpInfo.commonPointOurDiskloc.isNull());
- invariant(txn->lockState()->isW());
+ // update them
+ log() << "rollback 4 n:" << goodVersions.size();
- // we have items we are writing that aren't from a point-in-time. thus best not to come
- // online until we get to that point in freshness.
- OpTime minValid = newMinValid["ts"]._opTime();
- log() << "replSet minvalid=" << minValid.toStringLong();
- setMinValid(txn, minValid);
+ bool warn = false;
- // any full collection resyncs required?
- if (!fixUpInfo.collectionsToResync.empty()) {
- for (set<string>::iterator it = fixUpInfo.collectionsToResync.begin();
- it != fixUpInfo.collectionsToResync.end();
- it++) {
- string ns = *it;
- log() << "rollback 4.1 coll resync " << ns;
+ invariant(!fixUpInfo.commonPointOurDiskloc.isNull());
+ invariant(txn->lockState()->isW());
- const NamespaceString nss(ns);
+ // we have items we are writing that aren't from a point-in-time. thus best not to come
+ // online until we get to that point in freshness.
+ OpTime minValid = newMinValid["ts"]._opTime();
+ log() << "replSet minvalid=" << minValid.toStringLong();
+ setMinValid(txn, minValid);
- Database* db = dbHolder().openDb(txn, nss.db().toString());
- invariant(db);
+ // any full collection resyncs required?
+ if (!fixUpInfo.collectionsToResync.empty()) {
+ for (set<string>::iterator it = fixUpInfo.collectionsToResync.begin();
+ it != fixUpInfo.collectionsToResync.end();
+ it++) {
+ string ns = *it;
+ log() << "rollback 4.1 coll resync " << ns;
- {
- WriteUnitOfWork wunit(txn);
- db->dropCollection(txn, ns);
- wunit.commit();
- }
+ const NamespaceString nss(ns);
- {
- string errmsg;
+ Database* db = dbHolder().openDb(txn, nss.db().toString());
+ invariant(db);
- // This comes as a GlobalWrite lock, so there is no DB to be acquired after
- // resume, so we can skip the DB stability checks. Also
- // copyCollectionFromRemote will acquire its own database pointer, under the
- // appropriate locks, so just releasing and acquiring the lock is safe.
- invariant(txn->lockState()->isW());
- Lock::TempRelease release(txn->lockState());
-
- bool ok = copyCollectionFromRemote(txn, them->getServerAddress(), ns, errmsg);
- uassert(15909, str::stream() << "replSet rollback error resyncing collection "
- << ns << ' ' << errmsg, ok);
- }
+ {
+ WriteUnitOfWork wunit(txn);
+ db->dropCollection(txn, ns);
+ wunit.commit();
}
- // we did more reading from primary, so check it again for a rollback (which would mess
- // us up), and make minValid newer.
- log() << "rollback 4.2";
+ {
+ string errmsg;
- string err;
- try {
- newMinValid = oplogreader->getLastOp(rsoplog);
- if (newMinValid.isEmpty()) {
- err = "can't get minvalid from sync source";
- }
- else {
- OpTime minValid = newMinValid["ts"]._opTime();
- log() << "replSet minvalid=" << minValid.toStringLong();
- setMinValid(txn, minValid);
- }
- }
- catch (DBException& e) {
- err = "can't get/set minvalid: ";
- err += e.what();
- }
- if (fixUpInfo.rbid != getRBID(oplogreader->conn())) {
- // our source rolled back itself. so the data we received isn't necessarily
- // consistent. however, we've now done writes. thus we have a problem.
- err += "rbid at primary changed during resync/rollback";
- }
- if (!err.empty()) {
- error() << "replSet error rolling back : " << err
- << ". A full resync will be necessary.";
- // TODO: reset minvalid so that we are permanently in fatal state
- // TODO: don't be fatal, but rather, get all the data first.
- throw RSFatalException();
+ // This comes as a GlobalWrite lock, so there is no DB to be acquired after
+ // resume, so we can skip the DB stability checks. Also
+ // copyCollectionFromRemote will acquire its own database pointer, under the
+ // appropriate locks, so just releasing and acquiring the lock is safe.
+ invariant(txn->lockState()->isW());
+ Lock::TempRelease release(txn->lockState());
+
+ bool ok = copyCollectionFromRemote(txn, them->getServerAddress(), ns, errmsg);
+ uassert(15909,
+ str::stream() << "replSet rollback error resyncing collection " << ns << ' '
+ << errmsg,
+ ok);
}
- log() << "rollback 4.3";
}
- map<string,shared_ptr<Helpers::RemoveSaver> > removeSavers;
+ // we did more reading from primary, so check it again for a rollback (which would mess
+ // us up), and make minValid newer.
+ log() << "rollback 4.2";
- log() << "rollback 4.6";
- // drop collections to drop before doing individual fixups - that might make things faster
- // below actually if there were subsequent inserts to rollback
- for (set<string>::iterator it = fixUpInfo.toDrop.begin();
- it != fixUpInfo.toDrop.end();
- it++) {
- log() << "replSet rollback drop: " << *it;
-
- Database* db = dbHolder().get(txn, nsToDatabaseSubstring(*it));
- if (db) {
- WriteUnitOfWork wunit(txn);
-
- shared_ptr<Helpers::RemoveSaver>& removeSaver = removeSavers[*it];
- if (!removeSaver)
- removeSaver.reset(new Helpers::RemoveSaver("rollback", "", *it));
-
- // perform a collection scan and write all documents in the collection to disk
- boost::scoped_ptr<PlanExecutor> exec(
- InternalPlanner::collectionScan(txn,
- *it,
- db->getCollection(*it)));
- BSONObj curObj;
- PlanExecutor::ExecState execState;
- while (PlanExecutor::ADVANCED == (execState = exec->getNext(&curObj, NULL))) {
- removeSaver->goingToDelete(curObj);
- }
- if (execState != PlanExecutor::IS_EOF) {
- if (execState == PlanExecutor::FAILURE &&
- WorkingSetCommon::isValidStatusMemberObject(curObj)) {
- Status errorStatus = WorkingSetCommon::getMemberObjectStatus(curObj);
- severe() << "rolling back createCollection on " << *it
- << " failed with " << errorStatus
- << ". A full resync is necessary.";
- }
- else {
- severe() << "rolling back createCollection on " << *it
- << " failed. A full resync is necessary.";
- }
-
- throw RSFatalException();
- }
-
- db->dropCollection(txn, *it);
- wunit.commit();
+ string err;
+ try {
+ newMinValid = oplogreader->getLastOp(rsoplog);
+ if (newMinValid.isEmpty()) {
+ err = "can't get minvalid from sync source";
+ } else {
+ OpTime minValid = newMinValid["ts"]._opTime();
+ log() << "replSet minvalid=" << minValid.toStringLong();
+ setMinValid(txn, minValid);
}
+ } catch (DBException& e) {
+ err = "can't get/set minvalid: ";
+ err += e.what();
+ }
+ if (fixUpInfo.rbid != getRBID(oplogreader->conn())) {
+ // our source rolled back itself. so the data we received isn't necessarily
+ // consistent. however, we've now done writes. thus we have a problem.
+ err += "rbid at primary changed during resync/rollback";
}
+ if (!err.empty()) {
+ error() << "replSet error rolling back : " << err
+ << ". A full resync will be necessary.";
+ // TODO: reset minvalid so that we are permanently in fatal state
+ // TODO: don't be fatal, but rather, get all the data first.
+ throw RSFatalException();
+ }
+ log() << "rollback 4.3";
+ }
- log() << "rollback 4.7";
- Client::Context ctx(txn, rsoplog);
- Collection* oplogCollection = ctx.db()->getCollection(rsoplog);
- uassert(13423,
- str::stream() << "replSet error in rollback can't find " << rsoplog,
- oplogCollection);
-
- unsigned deletes = 0, updates = 0;
- time_t lastProgressUpdate = time(0);
- time_t progressUpdateGap = 10;
- for (list<pair<DocID, BSONObj> >::iterator it = goodVersions.begin();
- it != goodVersions.end();
- it++) {
- time_t now = time(0);
- if (now - lastProgressUpdate > progressUpdateGap) {
- log() << "replSet " << deletes << " delete and "
- << updates << " update operations processed out of "
- << goodVersions.size() << " total operations";
- lastProgressUpdate = now;
+ map<string, shared_ptr<Helpers::RemoveSaver>> removeSavers;
+
+ log() << "rollback 4.6";
+ // drop collections to drop before doing individual fixups - that might make things faster
+ // below actually if there were subsequent inserts to rollback
+ for (set<string>::iterator it = fixUpInfo.toDrop.begin(); it != fixUpInfo.toDrop.end(); it++) {
+ log() << "replSet rollback drop: " << *it;
+
+ Database* db = dbHolder().get(txn, nsToDatabaseSubstring(*it));
+ if (db) {
+ WriteUnitOfWork wunit(txn);
+
+ shared_ptr<Helpers::RemoveSaver>& removeSaver = removeSavers[*it];
+ if (!removeSaver)
+ removeSaver.reset(new Helpers::RemoveSaver("rollback", "", *it));
+
+ // perform a collection scan and write all documents in the collection to disk
+ boost::scoped_ptr<PlanExecutor> exec(
+ InternalPlanner::collectionScan(txn, *it, db->getCollection(*it)));
+ BSONObj curObj;
+ PlanExecutor::ExecState execState;
+ while (PlanExecutor::ADVANCED == (execState = exec->getNext(&curObj, NULL))) {
+ removeSaver->goingToDelete(curObj);
}
- const DocID& doc = it->first;
- BSONObj pattern = doc._id.wrap(); // { _id : ... }
- try {
- verify(doc.ns && *doc.ns);
- if (fixUpInfo.collectionsToResync.count(doc.ns)) {
- // we just synced this entire collection
- continue;
+ if (execState != PlanExecutor::IS_EOF) {
+ if (execState == PlanExecutor::FAILURE &&
+ WorkingSetCommon::isValidStatusMemberObject(curObj)) {
+ Status errorStatus = WorkingSetCommon::getMemberObjectStatus(curObj);
+ severe() << "rolling back createCollection on " << *it << " failed with "
+ << errorStatus << ". A full resync is necessary.";
+ } else {
+ severe() << "rolling back createCollection on " << *it
+ << " failed. A full resync is necessary.";
}
- // keep an archive of items rolled back
- shared_ptr<Helpers::RemoveSaver>& removeSaver = removeSavers[doc.ns];
- if (!removeSaver)
- removeSaver.reset(new Helpers::RemoveSaver("rollback", "", doc.ns));
-
- // todo: lots of overhead in context, this can be faster
- Client::Context ctx(txn, doc.ns);
+ throw RSFatalException();
+ }
- // Add the doc to our rollback file
- BSONObj obj;
+ db->dropCollection(txn, *it);
+ wunit.commit();
+ }
+ }
- Collection* collection = ctx.db()->getCollection(doc.ns);
+ log() << "rollback 4.7";
+ Client::Context ctx(txn, rsoplog);
+ Collection* oplogCollection = ctx.db()->getCollection(rsoplog);
+ uassert(13423,
+ str::stream() << "replSet error in rollback can't find " << rsoplog,
+ oplogCollection);
+
+ unsigned deletes = 0, updates = 0;
+ time_t lastProgressUpdate = time(0);
+ time_t progressUpdateGap = 10;
+ for (list<pair<DocID, BSONObj>>::iterator it = goodVersions.begin(); it != goodVersions.end();
+ it++) {
+ time_t now = time(0);
+ if (now - lastProgressUpdate > progressUpdateGap) {
+ log() << "replSet " << deletes << " delete and " << updates
+ << " update operations processed out of " << goodVersions.size()
+ << " total operations";
+ lastProgressUpdate = now;
+ }
+ const DocID& doc = it->first;
+ BSONObj pattern = doc._id.wrap(); // { _id : ... }
+ try {
+ verify(doc.ns && *doc.ns);
+ if (fixUpInfo.collectionsToResync.count(doc.ns)) {
+ // we just synced this entire collection
+ continue;
+ }
- // Do not log an error when undoing an insert on a no longer existent collection.
- // It is likely that the collection was dropped as part of rolling back a
- // createCollection command and regardless, the document no longer exists.
- if (collection) {
- bool found = Helpers::findOne(txn, collection, pattern, obj, false);
- if (found) {
- removeSaver->goingToDelete(obj);
- }
- else {
- error() << "rollback cannot find object: " << pattern
- << " in namespace " << doc.ns;
- }
+ // keep an archive of items rolled back
+ shared_ptr<Helpers::RemoveSaver>& removeSaver = removeSavers[doc.ns];
+ if (!removeSaver)
+ removeSaver.reset(new Helpers::RemoveSaver("rollback", "", doc.ns));
+
+ // todo: lots of overhead in context, this can be faster
+ Client::Context ctx(txn, doc.ns);
+
+ // Add the doc to our rollback file
+ BSONObj obj;
+
+ Collection* collection = ctx.db()->getCollection(doc.ns);
+
+ // Do not log an error when undoing an insert on a no longer existent collection.
+ // It is likely that the collection was dropped as part of rolling back a
+ // createCollection command and regardless, the document no longer exists.
+ if (collection) {
+ bool found = Helpers::findOne(txn, collection, pattern, obj, false);
+ if (found) {
+ removeSaver->goingToDelete(obj);
+ } else {
+ error() << "rollback cannot find object: " << pattern << " in namespace "
+ << doc.ns;
}
+ }
- if (it->second.isEmpty()) {
- // wasn't on the primary; delete.
- // TODO 1.6 : can't delete from a capped collection. need to handle that here.
- deletes++;
-
- if (collection) {
- if (collection->isCapped()) {
- // can't delete from a capped collection - so we truncate instead. if
- // this item must go, so must all successors!!!
- try {
- // TODO: IIRC cappedTruncateAfter does not handle completely empty.
- // this will crazy slow if no _id index.
- long long start = Listener::getElapsedTimeMillis();
- RecordId loc = Helpers::findOne(txn, collection, pattern, false);
- if (Listener::getElapsedTimeMillis() - start > 200)
- log() << "replSet warning roll back slow no _id index for "
- << doc.ns << " perhaps?";
- // would be faster but requires index:
- // RecordId loc = Helpers::findById(nsd, pattern);
- if (!loc.isNull()) {
- try {
- collection->temp_cappedTruncateAfter(txn, loc, true);
- }
- catch (DBException& e) {
- if (e.getCode() == 13415) {
- // hack: need to just make cappedTruncate do this...
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- WriteUnitOfWork wunit(txn);
- uassertStatusOK(collection->truncate(txn));
- wunit.commit();
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(
- txn,
- "truncate",
- collection->ns().ns());
- }
- else {
- throw e;
+ if (it->second.isEmpty()) {
+ // wasn't on the primary; delete.
+ // TODO 1.6 : can't delete from a capped collection. need to handle that here.
+ deletes++;
+
+ if (collection) {
+ if (collection->isCapped()) {
+ // can't delete from a capped collection - so we truncate instead. if
+ // this item must go, so must all successors!!!
+ try {
+ // TODO: IIRC cappedTruncateAfter does not handle completely empty.
+ // this will crazy slow if no _id index.
+ long long start = Listener::getElapsedTimeMillis();
+ RecordId loc = Helpers::findOne(txn, collection, pattern, false);
+ if (Listener::getElapsedTimeMillis() - start > 200)
+ log() << "replSet warning roll back slow no _id index for "
+ << doc.ns << " perhaps?";
+ // would be faster but requires index:
+ // RecordId loc = Helpers::findById(nsd, pattern);
+ if (!loc.isNull()) {
+ try {
+ collection->temp_cappedTruncateAfter(txn, loc, true);
+ } catch (DBException& e) {
+ if (e.getCode() == 13415) {
+ // hack: need to just make cappedTruncate do this...
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ WriteUnitOfWork wunit(txn);
+ uassertStatusOK(collection->truncate(txn));
+ wunit.commit();
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(
+ txn, "truncate", collection->ns().ns());
+ } else {
+ throw e;
}
}
}
- catch (DBException& e) {
- log() << "replSet error rolling back capped collection rec "
- << doc.ns << ' ' << e.toString();
- }
- }
- else {
- deleteObjects(txn,
- ctx.db(),
- doc.ns,
- pattern,
- PlanExecutor::YIELD_MANUAL,
- true, // justone
- false, // logop
- true); // god
+ } catch (DBException& e) {
+ log() << "replSet error rolling back capped collection rec " << doc.ns
+ << ' ' << e.toString();
}
- // did we just empty the collection? if so let's check if it even
- // exists on the source.
- if (collection->numRecords(txn) == 0) {
- try {
- std::list<BSONObj> lst =
- them->getCollectionInfos( ctx.db()->name(),
- BSON( "name" << nsToCollectionSubstring( doc.ns ) ) );
- if (lst.empty()) {
- // we should drop
- WriteUnitOfWork wunit(txn);
- ctx.db()->dropCollection(txn, doc.ns);
- wunit.commit();
- }
- }
- catch (DBException&) {
- // this isn't *that* big a deal, but is bad.
- log() << "replSet warning rollback error querying for existence of "
- << doc.ns << " at the primary, ignoring";
+ } else {
+ deleteObjects(txn,
+ ctx.db(),
+ doc.ns,
+ pattern,
+ PlanExecutor::YIELD_MANUAL,
+ true, // justone
+ false, // logop
+ true); // god
+ }
+ // did we just empty the collection? if so let's check if it even
+ // exists on the source.
+ if (collection->numRecords(txn) == 0) {
+ try {
+ std::list<BSONObj> lst = them->getCollectionInfos(
+ ctx.db()->name(), BSON("name" << nsToCollectionSubstring(doc.ns)));
+ if (lst.empty()) {
+ // we should drop
+ WriteUnitOfWork wunit(txn);
+ ctx.db()->dropCollection(txn, doc.ns);
+ wunit.commit();
}
+ } catch (DBException&) {
+ // this isn't *that* big a deal, but is bad.
+ log() << "replSet warning rollback error querying for existence of "
+ << doc.ns << " at the primary, ignoring";
}
}
}
- else {
- // TODO faster...
- OpDebug debug;
- updates++;
-
- const NamespaceString requestNs(doc.ns);
- UpdateRequest request(requestNs);
-
- request.setQuery(pattern);
- request.setUpdates(it->second);
- request.setGod();
- request.setUpsert();
- UpdateLifecycleImpl updateLifecycle(true, requestNs);
- request.setLifecycle(&updateLifecycle);
-
- update(txn, ctx.db(), request, &debug);
-
- }
- }
- catch (DBException& e) {
- log() << "replSet exception in rollback ns:" << doc.ns << ' ' << pattern.toString()
- << ' ' << e.toString() << " ndeletes:" << deletes;
- warn = true;
+ } else {
+ // TODO faster...
+ OpDebug debug;
+ updates++;
+
+ const NamespaceString requestNs(doc.ns);
+ UpdateRequest request(requestNs);
+
+ request.setQuery(pattern);
+ request.setUpdates(it->second);
+ request.setGod();
+ request.setUpsert();
+ UpdateLifecycleImpl updateLifecycle(true, requestNs);
+ request.setLifecycle(&updateLifecycle);
+
+ update(txn, ctx.db(), request, &debug);
}
- }
-
- removeSavers.clear(); // this effectively closes all of them
- log() << "rollback 5 d:" << deletes << " u:" << updates;
- log() << "rollback 6";
-
- // clean up oplog
- LOG(2) << "replSet rollback truncate oplog after " <<
- fixUpInfo.commonPoint.toStringPretty();
- // TODO: fatal error if this throws?
- oplogCollection->temp_cappedTruncateAfter(txn, fixUpInfo.commonPointOurDiskloc, false);
-
- Status status = getGlobalAuthorizationManager()->initialize(txn);
- if (!status.isOK()) {
- warning() << "Failed to reinitialize auth data after rollback: " << status;
+ } catch (DBException& e) {
+ log() << "replSet exception in rollback ns:" << doc.ns << ' ' << pattern.toString()
+ << ' ' << e.toString() << " ndeletes:" << deletes;
warn = true;
}
+ }
- // Reload the lastOpTimeApplied value in the replcoord and the lastAppliedHash value in
- // bgsync to reflect our new last op.
- replCoord->resetLastOpTimeFromOplog(txn);
- BackgroundSync::get()->loadLastAppliedHash(txn);
+ removeSavers.clear(); // this effectively closes all of them
+ log() << "rollback 5 d:" << deletes << " u:" << updates;
+ log() << "rollback 6";
- // done
- if (warn)
- warning() << "issues during syncRollback, see log";
- else
- log() << "rollback done";
- }
+ // clean up oplog
+ LOG(2) << "replSet rollback truncate oplog after " << fixUpInfo.commonPoint.toStringPretty();
+ // TODO: fatal error if this throws?
+ oplogCollection->temp_cappedTruncateAfter(txn, fixUpInfo.commonPointOurDiskloc, false);
- unsigned _syncRollback(OperationContext* txn,
- OplogReader* oplogreader,
- ReplicationCoordinator* replCoord) {
- invariant(!txn->lockState()->isLocked());
+ Status status = getGlobalAuthorizationManager()->initialize(txn);
+ if (!status.isOK()) {
+ warning() << "Failed to reinitialize auth data after rollback: " << status;
+ warn = true;
+ }
- log() << "rollback 0";
+ // Reload the lastOpTimeApplied value in the replcoord and the lastAppliedHash value in
+ // bgsync to reflect our new last op.
+ replCoord->resetLastOpTimeFromOplog(txn);
+ BackgroundSync::get()->loadLastAppliedHash(txn);
+
+ // done
+ if (warn)
+ warning() << "issues during syncRollback, see log";
+ else
+ log() << "rollback done";
+}
+
+unsigned _syncRollback(OperationContext* txn,
+ OplogReader* oplogreader,
+ ReplicationCoordinator* replCoord) {
+ invariant(!txn->lockState()->isLocked());
+
+ log() << "rollback 0";
+
+ Lock::GlobalWrite globalWrite(txn->lockState(), 20000);
+ if (!globalWrite.isLocked()) {
+ warning() << "rollback couldn't get write lock in a reasonable time";
+ return 2;
+ }
- Lock::GlobalWrite globalWrite(txn->lockState(), 20000);
- if (!globalWrite.isLocked()) {
- warning() << "rollback couldn't get write lock in a reasonable time";
- return 2;
- }
+ /** by doing this, we will not service reads (return an error as we aren't in secondary
+ * state. that perhaps is moot because of the write lock above, but that write lock
+ * probably gets deferred or removed or yielded later anyway.
+ *
+ * also, this is better for status reporting - we know what is happening.
+ */
+ if (!replCoord->setFollowerMode(MemberState::RS_ROLLBACK)) {
+ warning() << "Cannot transition from " << replCoord->getMemberState() << " to "
+ << MemberState(MemberState::RS_ROLLBACK);
+ return 0;
+ }
- /** by doing this, we will not service reads (return an error as we aren't in secondary
- * state. that perhaps is moot because of the write lock above, but that write lock
- * probably gets deferred or removed or yielded later anyway.
- *
- * also, this is better for status reporting - we know what is happening.
- */
- if (!replCoord->setFollowerMode(MemberState::RS_ROLLBACK)) {
- warning() << "Cannot transition from " << replCoord->getMemberState() <<
- " to " << MemberState(MemberState::RS_ROLLBACK);
- return 0;
- }
+ FixUpInfo how;
+ log() << "rollback 1";
+ {
+ oplogreader->resetCursor();
- FixUpInfo how;
- log() << "rollback 1";
- {
- oplogreader->resetCursor();
-
- log() << "rollback 2 FindCommonPoint";
- try {
- StatusWith<FixUpInfo> res = syncRollbackFindCommonPoint(txn, oplogreader->conn());
- if (!res.isOK()) {
- switch (res.getStatus().code()) {
- case ErrorCodes::OplogStartMissing:
- return 1;
- default:
- throw new RSFatalException(res.getStatus().toString());
- }
- }
- else {
- how = res.getValue();
+ log() << "rollback 2 FindCommonPoint";
+ try {
+ StatusWith<FixUpInfo> res = syncRollbackFindCommonPoint(txn, oplogreader->conn());
+ if (!res.isOK()) {
+ switch (res.getStatus().code()) {
+ case ErrorCodes::OplogStartMissing:
+ return 1;
+ default:
+ throw new RSFatalException(res.getStatus().toString());
}
+ } else {
+ how = res.getValue();
}
- catch (RSFatalException& e) {
- error() << string(e.what());
- fassertFailedNoTrace(18752);
- return 2;
- }
- catch (DBException& e) {
- warning() << string("rollback 2 exception ") + e.toString() + "; sleeping 1 min";
+ } catch (RSFatalException& e) {
+ error() << string(e.what());
+ fassertFailedNoTrace(18752);
+ return 2;
+ } catch (DBException& e) {
+ warning() << string("rollback 2 exception ") + e.toString() + "; sleeping 1 min";
- // Release the GlobalWrite lock while sleeping. We should always come here with a
- // GlobalWrite lock
- invariant(txn->lockState()->isW());
- Lock::TempRelease(txn->lockState());
+ // Release the GlobalWrite lock while sleeping. We should always come here with a
+ // GlobalWrite lock
+ invariant(txn->lockState()->isW());
+ Lock::TempRelease(txn->lockState());
- sleepsecs(60);
- throw;
- }
+ sleepsecs(60);
+ throw;
}
+ }
- log() << "replSet rollback 3 fixup";
+ log() << "replSet rollback 3 fixup";
+ replCoord->incrementRollbackID();
+ try {
+ syncFixUp(txn, how, oplogreader, replCoord);
+ } catch (RSFatalException& e) {
+ error() << "exception during rollback: " << e.what();
+ fassertFailedNoTrace(18753);
+ return 2;
+ } catch (...) {
replCoord->incrementRollbackID();
- try {
- syncFixUp(txn, how, oplogreader, replCoord);
- }
- catch (RSFatalException& e) {
- error() << "exception during rollback: " << e.what();
- fassertFailedNoTrace(18753);
- return 2;
- }
- catch (...) {
- replCoord->incrementRollbackID();
-
- if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
- warning() << "Failed to transition into " <<
- MemberState(MemberState::RS_RECOVERING) << "; expected to be in state " <<
- MemberState(MemberState::RS_ROLLBACK) << "but found self in " <<
- replCoord->getMemberState();
- }
- throw;
- }
- replCoord->incrementRollbackID();
-
- // success - leave "ROLLBACK" state
- // can go to SECONDARY once minvalid is achieved
if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
- warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING) <<
- "; expected to be in state " << MemberState(MemberState::RS_ROLLBACK) <<
- "but found self in " << replCoord->getMemberState();
+ warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING)
+ << "; expected to be in state " << MemberState(MemberState::RS_ROLLBACK)
+ << "but found self in " << replCoord->getMemberState();
}
- return 0;
+ throw;
}
-} // namespace
-
- void syncRollback(OperationContext* txn,
- OpTime lastOpTimeApplied,
- OplogReader* oplogreader,
- ReplicationCoordinator* replCoord) {
- // check that we are at minvalid, otherwise we cannot rollback as we may be in an
- // inconsistent state
- {
- OpTime minvalid = getMinValid(txn);
- if( minvalid > lastOpTimeApplied ) {
- severe() << "replSet need to rollback, but in inconsistent state" << endl;
- log() << "minvalid: " << minvalid.toString() << " our last optime: "
- << lastOpTimeApplied.toString() << endl;
- fassertFailedNoTrace(18750);
- return;
- }
+ replCoord->incrementRollbackID();
+
+ // success - leave "ROLLBACK" state
+ // can go to SECONDARY once minvalid is achieved
+ if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
+ warning() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING)
+ << "; expected to be in state " << MemberState(MemberState::RS_ROLLBACK)
+ << "but found self in " << replCoord->getMemberState();
+ }
+
+ return 0;
+}
+} // namespace
+
+void syncRollback(OperationContext* txn,
+ OpTime lastOpTimeApplied,
+ OplogReader* oplogreader,
+ ReplicationCoordinator* replCoord) {
+ // check that we are at minvalid, otherwise we cannot rollback as we may be in an
+ // inconsistent state
+ {
+ OpTime minvalid = getMinValid(txn);
+ if (minvalid > lastOpTimeApplied) {
+ severe() << "replSet need to rollback, but in inconsistent state" << endl;
+ log() << "minvalid: " << minvalid.toString()
+ << " our last optime: " << lastOpTimeApplied.toString() << endl;
+ fassertFailedNoTrace(18750);
+ return;
}
+ }
- log() << "beginning rollback" << rsLog;
+ log() << "beginning rollback" << rsLog;
- unsigned s = _syncRollback(txn, oplogreader, replCoord);
- if (s)
- sleepsecs(s);
-
- log() << "rollback finished" << rsLog;
- }
+ unsigned s = _syncRollback(txn, oplogreader, replCoord);
+ if (s)
+ sleepsecs(s);
+
+ log() << "rollback finished" << rsLog;
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rs_rollback.h b/src/mongo/db/repl/rs_rollback.h
index 8c5644f816a..955ea5b1199 100644
--- a/src/mongo/db/repl/rs_rollback.h
+++ b/src/mongo/db/repl/rs_rollback.h
@@ -29,39 +29,38 @@
#pragma once
namespace mongo {
- class OperationContext;
- class OpTime;
+class OperationContext;
+class OpTime;
namespace repl {
- class OplogReader;
- class ReplicationCoordinator;
+class OplogReader;
+class ReplicationCoordinator;
- /**
- * Initiates the rollback process.
- * This function assumes the preconditions for undertaking rollback have already been met;
- * we have ops in our oplog that our sync source does not have, and we are not currently
- * PRIMARY.
- * The rollback procedure is:
- * - find the common point between this node and its sync source
- * - undo operations by fetching all documents affected, then replaying
- * the sync source's oplog until we reach the time in the oplog when we fetched the last
- * document.
- * This function can throw std::exception on failures.
- * This function runs a command on the sync source to detect if the sync source rolls back
- * while our rollback is in progress.
- *
- * @param txn Used to read and write from this node's databases
- * @param lastOpTimeWritten The last OpTime applied by the applier
- * @param oplogreader Must already be connected to a sync source. Used to fetch documents.
- * @param replCoord Used to track the rollback ID and to change the follower state
- *
- * Failures: some failure cases are fatal; others throw std::exception.
- */
-
- void syncRollback(OperationContext* txn,
- OpTime lastOpTimeWritten,
- OplogReader* oplogreader,
- ReplicationCoordinator* replCoord);
+/**
+ * Initiates the rollback process.
+ * This function assumes the preconditions for undertaking rollback have already been met;
+ * we have ops in our oplog that our sync source does not have, and we are not currently
+ * PRIMARY.
+ * The rollback procedure is:
+ * - find the common point between this node and its sync source
+ * - undo operations by fetching all documents affected, then replaying
+ * the sync source's oplog until we reach the time in the oplog when we fetched the last
+ * document.
+ * This function can throw std::exception on failures.
+ * This function runs a command on the sync source to detect if the sync source rolls back
+ * while our rollback is in progress.
+ *
+ * @param txn Used to read and write from this node's databases
+ * @param lastOpTimeWritten The last OpTime applied by the applier
+ * @param oplogreader Must already be connected to a sync source. Used to fetch documents.
+ * @param replCoord Used to track the rollback ID and to change the follower state
+ *
+ * Failures: some failure cases are fatal; others throw std::exception.
+ */
+void syncRollback(OperationContext* txn,
+ OpTime lastOpTimeWritten,
+ OplogReader* oplogreader,
+ ReplicationCoordinator* replCoord);
}
}
diff --git a/src/mongo/db/repl/rs_sync.cpp b/src/mongo/db/repl/rs_sync.cpp
index 75154edcd20..9d8be3778ce 100644
--- a/src/mongo/db/repl/rs_sync.cpp
+++ b/src/mongo/db/repl/rs_sync.cpp
@@ -62,89 +62,85 @@
namespace mongo {
namespace repl {
- void runSyncThread() {
- Client::initThread("rsSync");
- cc().getAuthorizationSession()->grantInternalAuthorization();
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
-
- // Set initial indexPrefetch setting
- const std::string& prefetch = replCoord->getSettings().rsIndexPrefetch;
- if (!prefetch.empty()) {
- BackgroundSync::IndexPrefetchConfig prefetchConfig = BackgroundSync::PREFETCH_ALL;
- if (prefetch == "none")
- prefetchConfig = BackgroundSync::PREFETCH_NONE;
- else if (prefetch == "_id_only")
- prefetchConfig = BackgroundSync::PREFETCH_ID_ONLY;
- else if (prefetch == "all")
- prefetchConfig = BackgroundSync::PREFETCH_ALL;
- else {
- warning() << "unrecognized indexPrefetch setting " << prefetch << ", defaulting "
- << "to \"all\"";
- }
- BackgroundSync::get()->setIndexPrefetchConfig(prefetchConfig);
+void runSyncThread() {
+ Client::initThread("rsSync");
+ cc().getAuthorizationSession()->grantInternalAuthorization();
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+
+ // Set initial indexPrefetch setting
+ const std::string& prefetch = replCoord->getSettings().rsIndexPrefetch;
+ if (!prefetch.empty()) {
+ BackgroundSync::IndexPrefetchConfig prefetchConfig = BackgroundSync::PREFETCH_ALL;
+ if (prefetch == "none")
+ prefetchConfig = BackgroundSync::PREFETCH_NONE;
+ else if (prefetch == "_id_only")
+ prefetchConfig = BackgroundSync::PREFETCH_ID_ONLY;
+ else if (prefetch == "all")
+ prefetchConfig = BackgroundSync::PREFETCH_ALL;
+ else {
+ warning() << "unrecognized indexPrefetch setting " << prefetch << ", defaulting "
+ << "to \"all\"";
}
+ BackgroundSync::get()->setIndexPrefetchConfig(prefetchConfig);
+ }
- while (!inShutdown()) {
- // After a reconfig, we may not be in the replica set anymore, so
- // check that we are in the set (and not an arbiter) before
- // trying to sync with other replicas.
- // TODO(spencer): Use a condition variable to await loading a config
- if (replCoord->getMemberState().startup()) {
- log() << "replSet warning did not receive a valid config yet, sleeping 5 seconds ";
- sleepsecs(5);
- continue;
- }
+ while (!inShutdown()) {
+ // After a reconfig, we may not be in the replica set anymore, so
+ // check that we are in the set (and not an arbiter) before
+ // trying to sync with other replicas.
+ // TODO(spencer): Use a condition variable to await loading a config
+ if (replCoord->getMemberState().startup()) {
+ log() << "replSet warning did not receive a valid config yet, sleeping 5 seconds ";
+ sleepsecs(5);
+ continue;
+ }
- const MemberState memberState = replCoord->getMemberState();
+ const MemberState memberState = replCoord->getMemberState();
- // An arbiter can never transition to any other state, and doesn't replicate, ever
- if (memberState.arbiter()) {
- break;
- }
+ // An arbiter can never transition to any other state, and doesn't replicate, ever
+ if (memberState.arbiter()) {
+ break;
+ }
+
+ // If we are removed then we don't belong to the set anymore
+ if (memberState.removed()) {
+ sleepsecs(5);
+ continue;
+ }
- // If we are removed then we don't belong to the set anymore
- if (memberState.removed()) {
- sleepsecs(5);
+ try {
+ if (memberState.primary() && !replCoord->isWaitingForApplierToDrain()) {
+ sleepsecs(1);
continue;
}
- try {
-
- if (memberState.primary() && !replCoord->isWaitingForApplierToDrain()) {
- sleepsecs(1);
- continue;
- }
-
- bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag();
- // Check criteria for doing an initial sync:
- // 1. If the oplog is empty, do an initial sync
- // 2. If minValid has _initialSyncFlag set, do an initial sync
- // 3. If initialSyncRequested is true
- if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
- getInitialSyncFlag() ||
- initialSyncRequested) {
- syncDoInitialSync();
- continue; // start from top again in case sync failed.
- }
- if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
- continue;
- }
-
- /* we have some data. continue tailing. */
- SyncTail tail(BackgroundSync::get(), multiSyncApply);
- tail.oplogApplication();
+ bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag();
+ // Check criteria for doing an initial sync:
+ // 1. If the oplog is empty, do an initial sync
+ // 2. If minValid has _initialSyncFlag set, do an initial sync
+ // 3. If initialSyncRequested is true
+ if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
+ getInitialSyncFlag() || initialSyncRequested) {
+ syncDoInitialSync();
+ continue; // start from top again in case sync failed.
}
- catch(const DBException& e) {
- log() << "Received exception while syncing: " << e.toString();
- sleepsecs(10);
- }
- catch(const std::exception& e) {
- log() << "Received exception while syncing: " << e.what();
- sleepsecs(10);
+ if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
+ continue;
}
+
+ /* we have some data. continue tailing. */
+ SyncTail tail(BackgroundSync::get(), multiSyncApply);
+ tail.oplogApplication();
+ } catch (const DBException& e) {
+ log() << "Received exception while syncing: " << e.toString();
+ sleepsecs(10);
+ } catch (const std::exception& e) {
+ log() << "Received exception while syncing: " << e.what();
+ sleepsecs(10);
}
- cc().shutdown();
}
+ cc().shutdown();
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rs_sync.h b/src/mongo/db/repl/rs_sync.h
index a031f70c611..ad5d9be88a5 100644
--- a/src/mongo/db/repl/rs_sync.h
+++ b/src/mongo/db/repl/rs_sync.h
@@ -41,8 +41,8 @@
namespace mongo {
namespace repl {
- // Body of the thread that will do the background sync.
- void runSyncThread();
+// Body of the thread that will do the background sync.
+void runSyncThread();
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rslog.cpp b/src/mongo/db/repl/rslog.cpp
index 9a02f64ce27..f48d6399847 100644
--- a/src/mongo/db/repl/rslog.cpp
+++ b/src/mongo/db/repl/rslog.cpp
@@ -36,12 +36,12 @@
namespace mongo {
namespace repl {
- static RamLog* _rsLog = RamLog::get("rs");
- logger::Tee* rsLog = _rsLog;
+static RamLog* _rsLog = RamLog::get("rs");
+logger::Tee* rsLog = _rsLog;
- void fillRsLog(std::stringstream* s) {
- _rsLog->toHTML(*s);
- }
+void fillRsLog(std::stringstream* s) {
+ _rsLog->toHTML(*s);
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/rslog.h b/src/mongo/db/repl/rslog.h
index 7a6624c876d..5b0b694d7bf 100644
--- a/src/mongo/db/repl/rslog.h
+++ b/src/mongo/db/repl/rslog.h
@@ -33,15 +33,15 @@
namespace mongo {
namespace logger {
- class Tee;
-} // namespace logger
+class Tee;
+} // namespace logger
namespace repl {
- void fillRsLog(std::stringstream* s);
+void fillRsLog(std::stringstream* s);
- // ramlog used for replSet actions
- extern logger::Tee* rsLog;
+// ramlog used for replSet actions
+extern logger::Tee* rsLog;
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/scatter_gather_algorithm.cpp b/src/mongo/db/repl/scatter_gather_algorithm.cpp
index 5e65a8f2df1..78fc22fa38f 100644
--- a/src/mongo/db/repl/scatter_gather_algorithm.cpp
+++ b/src/mongo/db/repl/scatter_gather_algorithm.cpp
@@ -33,7 +33,7 @@
namespace mongo {
namespace repl {
- ScatterGatherAlgorithm::~ScatterGatherAlgorithm() {}
+ScatterGatherAlgorithm::~ScatterGatherAlgorithm() {}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/scatter_gather_algorithm.h b/src/mongo/db/repl/scatter_gather_algorithm.h
index 7622f0e385e..22d05c1f3a4 100644
--- a/src/mongo/db/repl/scatter_gather_algorithm.h
+++ b/src/mongo/db/repl/scatter_gather_algorithm.h
@@ -34,47 +34,47 @@
namespace mongo {
- template <typename T> class StatusWith;
+template <typename T>
+class StatusWith;
namespace repl {
+/**
+ * Interface for a specialization of a scatter-gather algorithm that sends
+ * requests to a set of targets, and then processes responses until it has
+ * seen enough.
+ *
+ * To use, call getRequests() to get a vector of request objects describing network operations.
+ * Start performing the network operations in any order, and then, until
+ * hasReceivedSufficientResponses() returns true, call processResponse for each response as it
+ * arrives. Once hasReceivedSufficientResponses() you may cancel outstanding network
+ * operations, and must stop calling processResponse. Implementations of this interface may
+ * assume that processResponse() is never called after hasReceivedSufficientResponses() returns
+ * true.
+ */
+class ScatterGatherAlgorithm {
+public:
/**
- * Interface for a specialization of a scatter-gather algorithm that sends
- * requests to a set of targets, and then processes responses until it has
- * seen enough.
- *
- * To use, call getRequests() to get a vector of request objects describing network operations.
- * Start performing the network operations in any order, and then, until
- * hasReceivedSufficientResponses() returns true, call processResponse for each response as it
- * arrives. Once hasReceivedSufficientResponses() you may cancel outstanding network
- * operations, and must stop calling processResponse. Implementations of this interface may
- * assume that processResponse() is never called after hasReceivedSufficientResponses() returns
- * true.
+ * Returns the list of requests that should be sent.
*/
- class ScatterGatherAlgorithm {
- public:
- /**
- * Returns the list of requests that should be sent.
- */
- virtual std::vector<ReplicationExecutor::RemoteCommandRequest> getRequests() const = 0;
+ virtual std::vector<ReplicationExecutor::RemoteCommandRequest> getRequests() const = 0;
- /**
- * Method to call once for each received response.
- */
- virtual void processResponse(
- const ReplicationExecutor::RemoteCommandRequest& request,
- const ResponseStatus& response) = 0;
+ /**
+ * Method to call once for each received response.
+ */
+ virtual void processResponse(const ReplicationExecutor::RemoteCommandRequest& request,
+ const ResponseStatus& response) = 0;
- /**
- * Returns true if no more calls to processResponse are needed to consider the
- * algorithm complete. Once this method returns true, one should no longer
- * call processResponse.
- */
- virtual bool hasReceivedSufficientResponses() const = 0;
+ /**
+ * Returns true if no more calls to processResponse are needed to consider the
+ * algorithm complete. Once this method returns true, one should no longer
+ * call processResponse.
+ */
+ virtual bool hasReceivedSufficientResponses() const = 0;
- protected:
- virtual ~ScatterGatherAlgorithm(); // Shouldn't actually be virtual.
- };
+protected:
+ virtual ~ScatterGatherAlgorithm(); // Shouldn't actually be virtual.
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/scatter_gather_runner.cpp b/src/mongo/db/repl/scatter_gather_runner.cpp
index ce2d8a7dbb9..15934106f59 100644
--- a/src/mongo/db/repl/scatter_gather_runner.cpp
+++ b/src/mongo/db/repl/scatter_gather_runner.cpp
@@ -41,121 +41,105 @@
namespace mongo {
namespace repl {
- ScatterGatherRunner::ScatterGatherRunner(ScatterGatherAlgorithm* algorithm) :
- _algorithm(algorithm),
- _started(false) {
+ScatterGatherRunner::ScatterGatherRunner(ScatterGatherAlgorithm* algorithm)
+ : _algorithm(algorithm), _started(false) {}
+
+ScatterGatherRunner::~ScatterGatherRunner() {}
+
+static void startTrampoline(const ReplicationExecutor::CallbackData& cbData,
+ ScatterGatherRunner* runner,
+ StatusWith<ReplicationExecutor::EventHandle>* result) {
+ *result = runner->start(cbData.executor);
+}
+
+Status ScatterGatherRunner::run(ReplicationExecutor* executor) {
+ StatusWith<ReplicationExecutor::EventHandle> finishEvh(ErrorCodes::InternalError, "Not set");
+ StatusWith<ReplicationExecutor::CallbackHandle> startCBH = executor->scheduleWork(
+ stdx::bind(startTrampoline, stdx::placeholders::_1, this, &finishEvh));
+ if (!startCBH.isOK()) {
+ return startCBH.getStatus();
}
-
- ScatterGatherRunner::~ScatterGatherRunner() {
+ executor->wait(startCBH.getValue());
+ if (!finishEvh.isOK()) {
+ return finishEvh.getStatus();
}
-
- static void startTrampoline(const ReplicationExecutor::CallbackData& cbData,
- ScatterGatherRunner* runner,
- StatusWith<ReplicationExecutor::EventHandle>* result) {
-
- *result = runner->start(cbData.executor);
- }
-
- Status ScatterGatherRunner::run(ReplicationExecutor* executor) {
- StatusWith<ReplicationExecutor::EventHandle> finishEvh(ErrorCodes::InternalError,
- "Not set");
- StatusWith<ReplicationExecutor::CallbackHandle> startCBH = executor->scheduleWork(
- stdx::bind(startTrampoline, stdx::placeholders::_1, this, &finishEvh));
- if (!startCBH.isOK()) {
- return startCBH.getStatus();
- }
- executor->wait(startCBH.getValue());
- if (!finishEvh.isOK()) {
- return finishEvh.getStatus();
- }
- executor->waitForEvent(finishEvh.getValue());
- return Status::OK();
+ executor->waitForEvent(finishEvh.getValue());
+ return Status::OK();
+}
+
+StatusWith<ReplicationExecutor::EventHandle> ScatterGatherRunner::start(
+ ReplicationExecutor* executor, const stdx::function<void()>& onCompletion) {
+ invariant(!_started);
+ _started = true;
+ _actualResponses = 0;
+ _onCompletion = onCompletion;
+ StatusWith<ReplicationExecutor::EventHandle> evh = executor->makeEvent();
+ if (!evh.isOK()) {
+ return evh;
}
-
- StatusWith<ReplicationExecutor::EventHandle> ScatterGatherRunner::start(
- ReplicationExecutor* executor,
- const stdx::function<void ()>& onCompletion) {
-
- invariant(!_started);
- _started = true;
- _actualResponses = 0;
- _onCompletion = onCompletion;
- StatusWith<ReplicationExecutor::EventHandle> evh = executor->makeEvent();
- if (!evh.isOK()) {
- return evh;
- }
- _sufficientResponsesReceived = evh.getValue();
- ScopeGuard earlyReturnGuard = MakeGuard(
- &ScatterGatherRunner::_signalSufficientResponsesReceived,
- this,
- executor);
-
- const ReplicationExecutor::RemoteCommandCallbackFn cb = stdx::bind(
- &ScatterGatherRunner::_processResponse,
- stdx::placeholders::_1,
- this);
-
- std::vector<ReplicationExecutor::RemoteCommandRequest> requests = _algorithm->getRequests();
- for (size_t i = 0; i < requests.size(); ++i) {
- const StatusWith<ReplicationExecutor::CallbackHandle> cbh =
- executor->scheduleRemoteCommand(requests[i], cb);
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return StatusWith<ReplicationExecutor::EventHandle>(cbh.getStatus());
- }
- fassert(18743, cbh.getStatus());
- _callbacks.push_back(cbh.getValue());
+ _sufficientResponsesReceived = evh.getValue();
+ ScopeGuard earlyReturnGuard =
+ MakeGuard(&ScatterGatherRunner::_signalSufficientResponsesReceived, this, executor);
+
+ const ReplicationExecutor::RemoteCommandCallbackFn cb =
+ stdx::bind(&ScatterGatherRunner::_processResponse, stdx::placeholders::_1, this);
+
+ std::vector<ReplicationExecutor::RemoteCommandRequest> requests = _algorithm->getRequests();
+ for (size_t i = 0; i < requests.size(); ++i) {
+ const StatusWith<ReplicationExecutor::CallbackHandle> cbh =
+ executor->scheduleRemoteCommand(requests[i], cb);
+ if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
+ return StatusWith<ReplicationExecutor::EventHandle>(cbh.getStatus());
}
-
- if (_callbacks.empty() || _algorithm->hasReceivedSufficientResponses()) {
- invariant(_algorithm->hasReceivedSufficientResponses());
- _signalSufficientResponsesReceived(executor);
- }
-
- earlyReturnGuard.Dismiss();
- return evh;
+ fassert(18743, cbh.getStatus());
+ _callbacks.push_back(cbh.getValue());
}
- void ScatterGatherRunner::cancel(ReplicationExecutor* executor) {
- invariant(_started);
+ if (_callbacks.empty() || _algorithm->hasReceivedSufficientResponses()) {
+ invariant(_algorithm->hasReceivedSufficientResponses());
_signalSufficientResponsesReceived(executor);
}
- void ScatterGatherRunner::_processResponse(
- const ReplicationExecutor::RemoteCommandCallbackData& cbData,
- ScatterGatherRunner* runner) {
-
- // It is possible that the ScatterGatherRunner has already gone out of scope, if the
- // response indicates the callback was canceled. In that case, do not access any members
- // of "runner" and return immediately.
- if (cbData.response.getStatus() == ErrorCodes::CallbackCanceled) {
- return;
- }
-
- ++runner->_actualResponses;
- runner->_algorithm->processResponse(cbData.request, cbData.response);
- if (runner->_algorithm->hasReceivedSufficientResponses()) {
- runner->_signalSufficientResponsesReceived(cbData.executor);
- }
- else {
- invariant(runner->_actualResponses < runner->_callbacks.size());
- }
+ earlyReturnGuard.Dismiss();
+ return evh;
+}
+
+void ScatterGatherRunner::cancel(ReplicationExecutor* executor) {
+ invariant(_started);
+ _signalSufficientResponsesReceived(executor);
+}
+
+void ScatterGatherRunner::_processResponse(
+ const ReplicationExecutor::RemoteCommandCallbackData& cbData, ScatterGatherRunner* runner) {
+ // It is possible that the ScatterGatherRunner has already gone out of scope, if the
+ // response indicates the callback was canceled. In that case, do not access any members
+ // of "runner" and return immediately.
+ if (cbData.response.getStatus() == ErrorCodes::CallbackCanceled) {
+ return;
}
- void ScatterGatherRunner::_signalSufficientResponsesReceived(ReplicationExecutor* executor) {
- if (_sufficientResponsesReceived.isValid()) {
- std::for_each(_callbacks.begin(),
- _callbacks.end(),
- stdx::bind(&ReplicationExecutor::cancel,
- executor,
- stdx::placeholders::_1));
- const ReplicationExecutor::EventHandle h = _sufficientResponsesReceived;
- _sufficientResponsesReceived = ReplicationExecutor::EventHandle();
- if (_onCompletion) {
- _onCompletion();
- }
- executor->signalEvent(h);
+ ++runner->_actualResponses;
+ runner->_algorithm->processResponse(cbData.request, cbData.response);
+ if (runner->_algorithm->hasReceivedSufficientResponses()) {
+ runner->_signalSufficientResponsesReceived(cbData.executor);
+ } else {
+ invariant(runner->_actualResponses < runner->_callbacks.size());
+ }
+}
+
+void ScatterGatherRunner::_signalSufficientResponsesReceived(ReplicationExecutor* executor) {
+ if (_sufficientResponsesReceived.isValid()) {
+ std::for_each(_callbacks.begin(),
+ _callbacks.end(),
+ stdx::bind(&ReplicationExecutor::cancel, executor, stdx::placeholders::_1));
+ const ReplicationExecutor::EventHandle h = _sufficientResponsesReceived;
+ _sufficientResponsesReceived = ReplicationExecutor::EventHandle();
+ if (_onCompletion) {
+ _onCompletion();
}
+ executor->signalEvent(h);
}
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/scatter_gather_runner.h b/src/mongo/db/repl/scatter_gather_runner.h
index 63921a91596..551db7a2e7a 100644
--- a/src/mongo/db/repl/scatter_gather_runner.h
+++ b/src/mongo/db/repl/scatter_gather_runner.h
@@ -36,88 +36,90 @@
namespace mongo {
- template <typename T> class StatusWith;
+template <typename T>
+class StatusWith;
namespace repl {
- class ScatterGatherAlgorithm;
+class ScatterGatherAlgorithm;
+
+/**
+ * Implementation of a scatter-gather behavior using a ReplicationExecutor.
+ */
+class ScatterGatherRunner {
+ MONGO_DISALLOW_COPYING(ScatterGatherRunner);
+
+public:
+ /**
+ * Constructs a new runner whose underlying algorithm is "algorithm".
+ *
+ * "algorithm" must remain in scope until the runner's destructor completes.
+ */
+ explicit ScatterGatherRunner(ScatterGatherAlgorithm* algorithm);
+
+ ~ScatterGatherRunner();
+
+ /**
+ * Runs the scatter-gather process using "executor", and blocks until it completes.
+ *
+ * Must _not_ be run from inside the executor context.
+ *
+ * Returns ErrorCodes::ShutdownInProgress if the executor enters or is already in
+ * the shutdown state before run() can schedule execution of the scatter-gather
+ * in the executor. Note that if the executor is shut down after the algorithm
+ * is scheduled but before it completes, this method will return Status::OK(),
+ * just as it does when it runs successfully to completion.
+ */
+ Status run(ReplicationExecutor* executor);
+
+ /**
+ * Starts executing the scatter-gather process using "executor".
+ *
+ * On success, returns an event handle that will be signaled when the runner has
+ * finished executing the scatter-gather process. After that event has been
+ * signaled, it is safe for the caller to examine any state on "algorithm".
+ *
+ * This method must be called inside the executor context.
+ *
+ * onCompletion is an optional callback that will be executed in executor context
+ * immediately prior to signaling the event handle returned here. It must never
+ * throw exceptions. It may examine the state of the algorithm object.
+ *
+ * NOTE: If the executor starts to shut down before onCompletion executes, onCompletion may
+ * never execute, even though the returned event will eventually be signaled.
+ */
+ StatusWith<ReplicationExecutor::EventHandle> start(
+ ReplicationExecutor* executor,
+ const stdx::function<void()>& onCompletion = stdx::function<void()>());
+
+ /**
+ * Informs the runner to cancel further processing. The "executor" argument
+ * must point to the same executor passed to "start()".
+ *
+ * Like start, this method must be called from within the executor context.
+ */
+ void cancel(ReplicationExecutor* executor);
+
+private:
+ /**
+ * Callback invoked once for every response from the network.
+ */
+ static void _processResponse(const ReplicationExecutor::RemoteCommandCallbackData& cbData,
+ ScatterGatherRunner* runner);
/**
- * Implementation of a scatter-gather behavior using a ReplicationExecutor.
+ * Method that performs all actions required when _algorithm indicates a sufficient
+ * number of respones have been received.
*/
- class ScatterGatherRunner {
- MONGO_DISALLOW_COPYING(ScatterGatherRunner);
- public:
- /**
- * Constructs a new runner whose underlying algorithm is "algorithm".
- *
- * "algorithm" must remain in scope until the runner's destructor completes.
- */
- explicit ScatterGatherRunner(ScatterGatherAlgorithm* algorithm);
-
- ~ScatterGatherRunner();
-
- /**
- * Runs the scatter-gather process using "executor", and blocks until it completes.
- *
- * Must _not_ be run from inside the executor context.
- *
- * Returns ErrorCodes::ShutdownInProgress if the executor enters or is already in
- * the shutdown state before run() can schedule execution of the scatter-gather
- * in the executor. Note that if the executor is shut down after the algorithm
- * is scheduled but before it completes, this method will return Status::OK(),
- * just as it does when it runs successfully to completion.
- */
- Status run(ReplicationExecutor* executor);
-
- /**
- * Starts executing the scatter-gather process using "executor".
- *
- * On success, returns an event handle that will be signaled when the runner has
- * finished executing the scatter-gather process. After that event has been
- * signaled, it is safe for the caller to examine any state on "algorithm".
- *
- * This method must be called inside the executor context.
- *
- * onCompletion is an optional callback that will be executed in executor context
- * immediately prior to signaling the event handle returned here. It must never
- * throw exceptions. It may examine the state of the algorithm object.
- *
- * NOTE: If the executor starts to shut down before onCompletion executes, onCompletion may
- * never execute, even though the returned event will eventually be signaled.
- */
- StatusWith<ReplicationExecutor::EventHandle> start(
- ReplicationExecutor* executor,
- const stdx::function<void ()>& onCompletion = stdx::function<void ()>());
-
- /**
- * Informs the runner to cancel further processing. The "executor" argument
- * must point to the same executor passed to "start()".
- *
- * Like start, this method must be called from within the executor context.
- */
- void cancel(ReplicationExecutor* executor);
-
- private:
- /**
- * Callback invoked once for every response from the network.
- */
- static void _processResponse(const ReplicationExecutor::RemoteCommandCallbackData& cbData,
- ScatterGatherRunner* runner);
-
- /**
- * Method that performs all actions required when _algorithm indicates a sufficient
- * number of respones have been received.
- */
- void _signalSufficientResponsesReceived(ReplicationExecutor* executor);
-
- ScatterGatherAlgorithm* _algorithm;
- stdx::function<void ()> _onCompletion;
- ReplicationExecutor::EventHandle _sufficientResponsesReceived;
- std::vector<ReplicationExecutor::CallbackHandle> _callbacks;
- size_t _actualResponses;
- bool _started;
- };
+ void _signalSufficientResponsesReceived(ReplicationExecutor* executor);
+
+ ScatterGatherAlgorithm* _algorithm;
+ stdx::function<void()> _onCompletion;
+ ReplicationExecutor::EventHandle _sufficientResponsesReceived;
+ std::vector<ReplicationExecutor::CallbackHandle> _callbacks;
+ size_t _actualResponses;
+ bool _started;
+};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/scatter_gather_test.cpp b/src/mongo/db/repl/scatter_gather_test.cpp
index 270aa53a499..303ce841211 100644
--- a/src/mongo/db/repl/scatter_gather_test.cpp
+++ b/src/mongo/db/repl/scatter_gather_test.cpp
@@ -41,305 +41,292 @@ namespace mongo {
namespace repl {
namespace {
- /**
- * Algorithm for testing the ScatterGatherRunner, which will finish running when finish() is
- * called, or upon receiving responses from two nodes. Creates a three requests algorithm
- * simulating running an algorithm against three other nodes.
- */
- class ScatterGatherTestAlgorithm : public ScatterGatherAlgorithm {
- public:
- ScatterGatherTestAlgorithm(int64_t maxResponses = 2) :
- _done(false),
- _numResponses(0),
- _maxResponses(maxResponses) {}
-
- virtual std::vector<ReplicationExecutor::RemoteCommandRequest> getRequests() const {
- std::vector<ReplicationExecutor::RemoteCommandRequest> requests;
- for (int i = 0; i < 3; i++) {
- requests.push_back(ReplicationExecutor::RemoteCommandRequest(
- HostAndPort("hostname", i),
- "admin",
- BSONObj(),
- Milliseconds(30*1000)));
- }
- return requests;
- }
-
- virtual void processResponse(
- const ReplicationExecutor::RemoteCommandRequest& request,
- const ResponseStatus& response) {
- _numResponses++;
- }
-
- void finish() {
- _done = true;
- }
-
- virtual bool hasReceivedSufficientResponses() const {
- if (_done) {
- return _done;
- }
-
- return _numResponses >= _maxResponses;
- }
-
- int getResponseCount() {
- return _numResponses;
+/**
+ * Algorithm for testing the ScatterGatherRunner, which will finish running when finish() is
+ * called, or upon receiving responses from two nodes. Creates a three requests algorithm
+ * simulating running an algorithm against three other nodes.
+ */
+class ScatterGatherTestAlgorithm : public ScatterGatherAlgorithm {
+public:
+ ScatterGatherTestAlgorithm(int64_t maxResponses = 2)
+ : _done(false), _numResponses(0), _maxResponses(maxResponses) {}
+
+ virtual std::vector<ReplicationExecutor::RemoteCommandRequest> getRequests() const {
+ std::vector<ReplicationExecutor::RemoteCommandRequest> requests;
+ for (int i = 0; i < 3; i++) {
+ requests.push_back(ReplicationExecutor::RemoteCommandRequest(
+ HostAndPort("hostname", i), "admin", BSONObj(), Milliseconds(30 * 1000)));
}
-
- private:
-
- bool _done;
- int64_t _numResponses;
- int64_t _maxResponses;
- };
-
- /**
- * ScatterGatherTest base class which sets up the ReplicationExecutor and NetworkInterfaceMock.
- */
- class ScatterGatherTest : public mongo::unittest::Test {
- protected:
-
- NetworkInterfaceMock* getNet() { return _net; }
- ReplicationExecutor* getExecutor() { return _executor.get(); }
-
- int64_t countLogLinesContaining(const std::string& needle);
- private:
-
- void setUp();
- void tearDown();
-
- // owned by _executor
- NetworkInterfaceMock* _net;
- boost::scoped_ptr<ReplicationExecutor> _executor;
- boost::scoped_ptr<boost::thread> _executorThread;
- };
-
- void ScatterGatherTest::setUp() {
- _net = new NetworkInterfaceMock;
- _executor.reset(new ReplicationExecutor(_net, 1 /* prng seed */));
- _executorThread.reset(new boost::thread(stdx::bind(&ReplicationExecutor::run,
- _executor.get())));
+ return requests;
}
- void ScatterGatherTest::tearDown() {
- _executor->shutdown();
- _executorThread->join();
+ virtual void processResponse(const ReplicationExecutor::RemoteCommandRequest& request,
+ const ResponseStatus& response) {
+ _numResponses++;
}
+ void finish() {
+ _done = true;
+ }
- // Used to run a ScatterGatherRunner in a separate thread, to avoid blocking test execution.
- class ScatterGatherRunnerRunner {
- public:
-
- ScatterGatherRunnerRunner(ScatterGatherRunner* sgr, ReplicationExecutor* executor) :
- _sgr(sgr),
- _executor(executor),
- _result(Status(ErrorCodes::BadValue, "failed to set status")) {}
-
- // Could block if _sgr has not finished
- Status getResult() {
- _thread->join();
- return _result;
- }
-
- void run() {
- _thread.reset(new boost::thread(stdx::bind(&ScatterGatherRunnerRunner::_run,
- this,
- _executor)));
- }
-
- private:
-
- void _run(ReplicationExecutor* executor) {
- _result = _sgr->run(_executor);
+ virtual bool hasReceivedSufficientResponses() const {
+ if (_done) {
+ return _done;
}
- ScatterGatherRunner* _sgr;
- ReplicationExecutor* _executor;
- Status _result;
- boost::scoped_ptr<boost::thread> _thread;
- };
-
- // Simple onCompletion function which will toggle a bool, so that we can check the logs to
- // ensure the onCompletion function ran when expected.
- void onCompletionTestFunction(bool* ran) {
- *ran = true;
+ return _numResponses >= _maxResponses;
}
- // Confirm that running via start() will finish and run the onComplete function once sufficient
- // responses have been received.
- // Confirm that deleting both the ScatterGatherTestAlgorithm and ScatterGatherRunner while
- // scheduled callbacks still exist will not be unsafe (ASAN builder) after the algorithm has
- // completed.
- TEST_F(ScatterGatherTest, DeleteAlgorithmAfterItHasCompleted) {
- ScatterGatherTestAlgorithm* sga = new ScatterGatherTestAlgorithm();
- ScatterGatherRunner* sgr = new ScatterGatherRunner(sga);
- bool ranCompletion = false;
- StatusWith<ReplicationExecutor::EventHandle> status = sgr->start(getExecutor(),
- stdx::bind(&onCompletionTestFunction, &ranCompletion));
- ASSERT_OK(status.getStatus());
- ASSERT_FALSE(ranCompletion);
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now()+2000,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1),
- boost::posix_time::milliseconds(10))));
- ASSERT_FALSE(ranCompletion);
-
- noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now()+2000,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1),
- boost::posix_time::milliseconds(10))));
- ASSERT_FALSE(ranCompletion);
-
- noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now()+5000,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1),
- boost::posix_time::milliseconds(10))));
- ASSERT_FALSE(ranCompletion);
-
- net->runUntil(net->now()+2000);
- ASSERT_TRUE(ranCompletion);
-
- delete sga;
- delete sgr;
-
- net->runReadyNetworkOperations();
-
- net->exitNetwork();
+ int getResponseCount() {
+ return _numResponses;
}
- // Confirm that shutting the ReplicationExecutor down before calling run() will cause run()
- // to return ErrorCodes::ShutdownInProgress.
- TEST_F(ScatterGatherTest, ShutdownExecutorBeforeRun) {
- ScatterGatherTestAlgorithm sga;
- ScatterGatherRunner sgr(&sga);
- getExecutor()->shutdown();
- sga.finish();
- Status status = sgr.run(getExecutor());
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, status);
- }
+private:
+ bool _done;
+ int64_t _numResponses;
+ int64_t _maxResponses;
+};
- // Confirm that shutting the ReplicationExecutor down after calling run(), but before run()
- // finishes will cause run() to return Status::OK().
- TEST_F(ScatterGatherTest, ShutdownExecutorAfterRun) {
- ScatterGatherTestAlgorithm sga;
- ScatterGatherRunner sgr(&sga);
- ScatterGatherRunnerRunner sgrr(&sgr, getExecutor());
- sgrr.run();
- // need to wait for the scatter-gather to be scheduled in the executor
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->blackHole(noi);
- net->exitNetwork();
- getExecutor()->shutdown();
- Status status = sgrr.getResult();
- ASSERT_OK(status);
+/**
+ * ScatterGatherTest base class which sets up the ReplicationExecutor and NetworkInterfaceMock.
+ */
+class ScatterGatherTest : public mongo::unittest::Test {
+protected:
+ NetworkInterfaceMock* getNet() {
+ return _net;
}
-
- // Confirm that shutting the ReplicationExecutor down before calling start() will cause start()
- // to return ErrorCodes::ShutdownInProgress and should not run onCompletion().
- TEST_F(ScatterGatherTest, ShutdownExecutorBeforeStart) {
- ScatterGatherTestAlgorithm sga;
- ScatterGatherRunner sgr(&sga);
- getExecutor()->shutdown();
- bool ranCompletion = false;
- StatusWith<ReplicationExecutor::EventHandle> status = sgr.start(getExecutor(),
- stdx::bind(&onCompletionTestFunction, &ranCompletion));
- sga.finish();
- ASSERT_FALSE(ranCompletion);
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, status.getStatus());
+ ReplicationExecutor* getExecutor() {
+ return _executor.get();
}
- // Confirm that shutting the ReplicationExecutor down after calling start() will cause start()
- // to return Status::OK and should not run onCompletion().
- TEST_F(ScatterGatherTest, ShutdownExecutorAfterStart) {
- ScatterGatherTestAlgorithm sga;
- ScatterGatherRunner sgr(&sga);
- bool ranCompletion = false;
- StatusWith<ReplicationExecutor::EventHandle> status = sgr.start(getExecutor(),
- stdx::bind(&onCompletionTestFunction, &ranCompletion));
- getExecutor()->shutdown();
- sga.finish();
- ASSERT_FALSE(ranCompletion);
- ASSERT_OK(status.getStatus());
+ int64_t countLogLinesContaining(const std::string& needle);
+
+private:
+ void setUp();
+ void tearDown();
+
+ // owned by _executor
+ NetworkInterfaceMock* _net;
+ boost::scoped_ptr<ReplicationExecutor> _executor;
+ boost::scoped_ptr<boost::thread> _executorThread;
+};
+
+void ScatterGatherTest::setUp() {
+ _net = new NetworkInterfaceMock;
+ _executor.reset(new ReplicationExecutor(_net, 1 /* prng seed */));
+ _executorThread.reset(
+ new boost::thread(stdx::bind(&ReplicationExecutor::run, _executor.get())));
+}
+
+void ScatterGatherTest::tearDown() {
+ _executor->shutdown();
+ _executorThread->join();
+}
+
+
+// Used to run a ScatterGatherRunner in a separate thread, to avoid blocking test execution.
+class ScatterGatherRunnerRunner {
+public:
+ ScatterGatherRunnerRunner(ScatterGatherRunner* sgr, ReplicationExecutor* executor)
+ : _sgr(sgr),
+ _executor(executor),
+ _result(Status(ErrorCodes::BadValue, "failed to set status")) {}
+
+ // Could block if _sgr has not finished
+ Status getResult() {
+ _thread->join();
+ return _result;
}
- // Confirm that responses are not processed once sufficient responses have been received.
- TEST_F(ScatterGatherTest, DoNotProcessMoreThanSufficientResponses) {
- ScatterGatherTestAlgorithm sga;
- ScatterGatherRunner sgr(&sga);
- bool ranCompletion = false;
- StatusWith<ReplicationExecutor::EventHandle> status = sgr.start(getExecutor(),
- stdx::bind(&onCompletionTestFunction, &ranCompletion));
- ASSERT_OK(status.getStatus());
- ASSERT_FALSE(ranCompletion);
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now()+2000,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1),
- boost::posix_time::milliseconds(10))));
- ASSERT_FALSE(ranCompletion);
-
- noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now()+2000,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1),
- boost::posix_time::milliseconds(10))));
- ASSERT_FALSE(ranCompletion);
-
- noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now()+5000,
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1),
- boost::posix_time::milliseconds(10))));
- ASSERT_FALSE(ranCompletion);
-
- net->runUntil(net->now()+2000);
- ASSERT_TRUE(ranCompletion);
-
-
- net->runReadyNetworkOperations();
- // the third resposne should not be processed, so the count should not increment
- ASSERT_EQUALS(2, sga.getResponseCount());
-
- net->exitNetwork();
+ void run() {
+ _thread.reset(
+ new boost::thread(stdx::bind(&ScatterGatherRunnerRunner::_run, this, _executor)));
}
- // Confirm that starting with sufficient responses received will immediate complete.
- TEST_F(ScatterGatherTest, DoNotCreateCallbacksIfHasSufficientResponsesReturnsTrueImmediately) {
- ScatterGatherTestAlgorithm sga;
- // set hasReceivedSufficientResponses to return true before the run starts
- sga.finish();
- ScatterGatherRunner sgr(&sga);
- bool ranCompletion = false;
- StatusWith<ReplicationExecutor::EventHandle> status = sgr.start(getExecutor(),
- stdx::bind(&onCompletionTestFunction, &ranCompletion));
- ASSERT_OK(status.getStatus());
- ASSERT_TRUE(ranCompletion);
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- ASSERT_FALSE(net->hasReadyRequests());
- net->exitNetwork();
+private:
+ void _run(ReplicationExecutor* executor) {
+ _result = _sgr->run(_executor);
}
+ ScatterGatherRunner* _sgr;
+ ReplicationExecutor* _executor;
+ Status _result;
+ boost::scoped_ptr<boost::thread> _thread;
+};
+
+// Simple onCompletion function which will toggle a bool, so that we can check the logs to
+// ensure the onCompletion function ran when expected.
+void onCompletionTestFunction(bool* ran) {
+ *ran = true;
+}
+
+// Confirm that running via start() will finish and run the onComplete function once sufficient
+// responses have been received.
+// Confirm that deleting both the ScatterGatherTestAlgorithm and ScatterGatherRunner while
+// scheduled callbacks still exist will not be unsafe (ASAN builder) after the algorithm has
+// completed.
+TEST_F(ScatterGatherTest, DeleteAlgorithmAfterItHasCompleted) {
+ ScatterGatherTestAlgorithm* sga = new ScatterGatherTestAlgorithm();
+ ScatterGatherRunner* sgr = new ScatterGatherRunner(sga);
+ bool ranCompletion = false;
+ StatusWith<ReplicationExecutor::EventHandle> status =
+ sgr->start(getExecutor(), stdx::bind(&onCompletionTestFunction, &ranCompletion));
+ ASSERT_OK(status.getStatus());
+ ASSERT_FALSE(ranCompletion);
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now() + 2000,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), boost::posix_time::milliseconds(10))));
+ ASSERT_FALSE(ranCompletion);
+
+ noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now() + 2000,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), boost::posix_time::milliseconds(10))));
+ ASSERT_FALSE(ranCompletion);
+
+ noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now() + 5000,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), boost::posix_time::milliseconds(10))));
+ ASSERT_FALSE(ranCompletion);
+
+ net->runUntil(net->now() + 2000);
+ ASSERT_TRUE(ranCompletion);
+
+ delete sga;
+ delete sgr;
+
+ net->runReadyNetworkOperations();
+
+ net->exitNetwork();
+}
+
+// Confirm that shutting the ReplicationExecutor down before calling run() will cause run()
+// to return ErrorCodes::ShutdownInProgress.
+TEST_F(ScatterGatherTest, ShutdownExecutorBeforeRun) {
+ ScatterGatherTestAlgorithm sga;
+ ScatterGatherRunner sgr(&sga);
+ getExecutor()->shutdown();
+ sga.finish();
+ Status status = sgr.run(getExecutor());
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, status);
+}
+
+// Confirm that shutting the ReplicationExecutor down after calling run(), but before run()
+// finishes will cause run() to return Status::OK().
+TEST_F(ScatterGatherTest, ShutdownExecutorAfterRun) {
+ ScatterGatherTestAlgorithm sga;
+ ScatterGatherRunner sgr(&sga);
+ ScatterGatherRunnerRunner sgrr(&sgr, getExecutor());
+ sgrr.run();
+ // need to wait for the scatter-gather to be scheduled in the executor
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->blackHole(noi);
+ net->exitNetwork();
+ getExecutor()->shutdown();
+ Status status = sgrr.getResult();
+ ASSERT_OK(status);
+}
+
+// Confirm that shutting the ReplicationExecutor down before calling start() will cause start()
+// to return ErrorCodes::ShutdownInProgress and should not run onCompletion().
+TEST_F(ScatterGatherTest, ShutdownExecutorBeforeStart) {
+ ScatterGatherTestAlgorithm sga;
+ ScatterGatherRunner sgr(&sga);
+ getExecutor()->shutdown();
+ bool ranCompletion = false;
+ StatusWith<ReplicationExecutor::EventHandle> status =
+ sgr.start(getExecutor(), stdx::bind(&onCompletionTestFunction, &ranCompletion));
+ sga.finish();
+ ASSERT_FALSE(ranCompletion);
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, status.getStatus());
+}
+
+// Confirm that shutting the ReplicationExecutor down after calling start() will cause start()
+// to return Status::OK and should not run onCompletion().
+TEST_F(ScatterGatherTest, ShutdownExecutorAfterStart) {
+ ScatterGatherTestAlgorithm sga;
+ ScatterGatherRunner sgr(&sga);
+ bool ranCompletion = false;
+ StatusWith<ReplicationExecutor::EventHandle> status =
+ sgr.start(getExecutor(), stdx::bind(&onCompletionTestFunction, &ranCompletion));
+ getExecutor()->shutdown();
+ sga.finish();
+ ASSERT_FALSE(ranCompletion);
+ ASSERT_OK(status.getStatus());
+}
+
+// Confirm that responses are not processed once sufficient responses have been received.
+TEST_F(ScatterGatherTest, DoNotProcessMoreThanSufficientResponses) {
+ ScatterGatherTestAlgorithm sga;
+ ScatterGatherRunner sgr(&sga);
+ bool ranCompletion = false;
+ StatusWith<ReplicationExecutor::EventHandle> status =
+ sgr.start(getExecutor(), stdx::bind(&onCompletionTestFunction, &ranCompletion));
+ ASSERT_OK(status.getStatus());
+ ASSERT_FALSE(ranCompletion);
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now() + 2000,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), boost::posix_time::milliseconds(10))));
+ ASSERT_FALSE(ranCompletion);
+
+ noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now() + 2000,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), boost::posix_time::milliseconds(10))));
+ ASSERT_FALSE(ranCompletion);
+
+ noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now() + 5000,
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), boost::posix_time::milliseconds(10))));
+ ASSERT_FALSE(ranCompletion);
+
+ net->runUntil(net->now() + 2000);
+ ASSERT_TRUE(ranCompletion);
+
+
+ net->runReadyNetworkOperations();
+ // the third resposne should not be processed, so the count should not increment
+ ASSERT_EQUALS(2, sga.getResponseCount());
+
+ net->exitNetwork();
+}
+
+// Confirm that starting with sufficient responses received will immediate complete.
+TEST_F(ScatterGatherTest, DoNotCreateCallbacksIfHasSufficientResponsesReturnsTrueImmediately) {
+ ScatterGatherTestAlgorithm sga;
+ // set hasReceivedSufficientResponses to return true before the run starts
+ sga.finish();
+ ScatterGatherRunner sgr(&sga);
+ bool ranCompletion = false;
+ StatusWith<ReplicationExecutor::EventHandle> status =
+ sgr.start(getExecutor(), stdx::bind(&onCompletionTestFunction, &ranCompletion));
+ ASSERT_OK(status.getStatus());
+ ASSERT_TRUE(ranCompletion);
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ ASSERT_FALSE(net->hasReadyRequests());
+ net->exitNetwork();
+}
+
#if 0
// TODO Enable this test once we have a way to test for invariants.
@@ -383,41 +370,39 @@ namespace {
net->exitNetwork();
ASSERT_FALSE(ranCompletion);
}
-#endif // 0
-
- // Confirm that running via run() will finish once sufficient responses have been received.
- TEST_F(ScatterGatherTest, SuccessfulScatterGatherViaRun) {
- ScatterGatherTestAlgorithm sga;
- ScatterGatherRunner sgr(&sga);
- ScatterGatherRunnerRunner sgrr(&sgr, getExecutor());
- sgrr.run();
-
- NetworkInterfaceMock* net = getNet();
- net->enterNetwork();
- NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now(),
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1),
- boost::posix_time::milliseconds(10))));
- net->runReadyNetworkOperations();
-
- noi = net->getNextReadyRequest();
- net->blackHole(noi);
- net->runReadyNetworkOperations();
-
- noi = net->getNextReadyRequest();
- net->scheduleResponse(noi,
- net->now(),
- ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
- BSON("ok" << 1),
- boost::posix_time::milliseconds(10))));
- net->runReadyNetworkOperations();
- net->exitNetwork();
-
- Status status = sgrr.getResult();
- ASSERT_OK(status);
- }
+#endif // 0
+
+// Confirm that running via run() will finish once sufficient responses have been received.
+TEST_F(ScatterGatherTest, SuccessfulScatterGatherViaRun) {
+ ScatterGatherTestAlgorithm sga;
+ ScatterGatherRunner sgr(&sga);
+ ScatterGatherRunnerRunner sgrr(&sgr, getExecutor());
+ sgrr.run();
+
+ NetworkInterfaceMock* net = getNet();
+ net->enterNetwork();
+ NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now(),
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), boost::posix_time::milliseconds(10))));
+ net->runReadyNetworkOperations();
+
+ noi = net->getNextReadyRequest();
+ net->blackHole(noi);
+ net->runReadyNetworkOperations();
+
+ noi = net->getNextReadyRequest();
+ net->scheduleResponse(noi,
+ net->now(),
+ ResponseStatus(ReplicationExecutor::RemoteCommandResponse(
+ BSON("ok" << 1), boost::posix_time::milliseconds(10))));
+ net->runReadyNetworkOperations();
+ net->exitNetwork();
+
+ Status status = sgrr.getResult();
+ ASSERT_OK(status);
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/scoped_conn.cpp b/src/mongo/db/repl/scoped_conn.cpp
index e4116fcf584..113261733da 100644
--- a/src/mongo/db/repl/scoped_conn.cpp
+++ b/src/mongo/db/repl/scoped_conn.cpp
@@ -39,41 +39,42 @@
namespace mongo {
namespace repl {
- static const int DEFAULT_HEARTBEAT_TIMEOUT_SECS = 10;
+static const int DEFAULT_HEARTBEAT_TIMEOUT_SECS = 10;
- // This is a bitmask with the first bit set. It's used to mark connections that should be kept
- // open during stepdowns
- const unsigned ScopedConn::keepOpen = 1;
- ScopedConn::M& ScopedConn::_map = *(new ScopedConn::M());
- mutex ScopedConn::mapMutex("ScopedConn::mapMutex");
+// This is a bitmask with the first bit set. It's used to mark connections that should be kept
+// open during stepdowns
+const unsigned ScopedConn::keepOpen = 1;
+ScopedConn::M& ScopedConn::_map = *(new ScopedConn::M());
+mutex ScopedConn::mapMutex("ScopedConn::mapMutex");
- ScopedConn::ConnectionInfo::ConnectionInfo() : lock("ConnectionInfo"),
- cc(new DBClientConnection(/*reconnect*/ true,
- /*timeout*/ DEFAULT_HEARTBEAT_TIMEOUT_SECS)),
- connected(false) {
- cc->_logLevel = logger::LogSeverity::Debug(2);
- }
+ScopedConn::ConnectionInfo::ConnectionInfo()
+ : lock("ConnectionInfo"),
+ cc(new DBClientConnection(/*reconnect*/ true,
+ /*timeout*/ DEFAULT_HEARTBEAT_TIMEOUT_SECS)),
+ connected(false) {
+ cc->_logLevel = logger::LogSeverity::Debug(2);
+}
- // we should already be locked...
- bool ScopedConn::connect() {
- std::string err;
- if (!connInfo->cc->connect(HostAndPort(_hostport), err)) {
- log() << "couldn't connect to " << _hostport << ": " << err;
- return false;
- }
- connInfo->connected = true;
- connInfo->tagPort();
-
- // if we cannot authenticate against a member, then either its key file
- // or our key file has to change. if our key file has to change, we'll
- // be rebooting. if their file has to change, they'll be rebooted so the
- // connection created above will go dead, reconnect, and reauth.
- if (getGlobalAuthorizationManager()->isAuthEnabled()) {
- return authenticateInternalUser(connInfo->cc.get());
- }
+// we should already be locked...
+bool ScopedConn::connect() {
+ std::string err;
+ if (!connInfo->cc->connect(HostAndPort(_hostport), err)) {
+ log() << "couldn't connect to " << _hostport << ": " << err;
+ return false;
+ }
+ connInfo->connected = true;
+ connInfo->tagPort();
- return true;
+ // if we cannot authenticate against a member, then either its key file
+ // or our key file has to change. if our key file has to change, we'll
+ // be rebooting. if their file has to change, they'll be rebooted so the
+ // connection created above will go dead, reconnect, and reauth.
+ if (getGlobalAuthorizationManager()->isAuthEnabled()) {
+ return authenticateInternalUser(connInfo->cc.get());
}
-} // namespace repl
-} // namespace mongo
+ return true;
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/scoped_conn.h b/src/mongo/db/repl/scoped_conn.h
index b357d17648e..85f6dd5080f 100644
--- a/src/mongo/db/repl/scoped_conn.h
+++ b/src/mongo/db/repl/scoped_conn.h
@@ -41,118 +41,122 @@
namespace mongo {
namespace repl {
- /** here we keep a single connection (with reconnect) for a set of hosts,
- one each, and allow one user at a time per host. if in use already for that
- host, we block. so this is an easy way to keep a 1-deep pool of connections
- that many threads can share.
+/** here we keep a single connection (with reconnect) for a set of hosts,
+ one each, and allow one user at a time per host. if in use already for that
+ host, we block. so this is an easy way to keep a 1-deep pool of connections
+ that many threads can share.
- thread-safe.
+ thread-safe.
- Example:
- {
- ScopedConn c("foo.acme.com:9999");
- c->runCommand(...);
- }
+ Example:
+ {
+ ScopedConn c("foo.acme.com:9999");
+ c->runCommand(...);
+ }
- throws exception on connect error (but fine to try again later with a new
- scopedconn object for same host).
- */
- class ScopedConn {
- public:
- // A flag to keep ScopedConns open when all other sockets are disconnected
- static const unsigned keepOpen;
-
- /** throws assertions if connect failure etc. */
- ScopedConn(const std::string& hostport);
- ~ScopedConn() {
- // conLock releases...
- }
- void reconnect() {
- connInfo->cc.reset(new DBClientConnection(true, connInfo->getTimeout()));
- connInfo->cc->_logLevel = logger::LogSeverity::Debug(2);
- connInfo->connected = false;
- connect();
- }
+ throws exception on connect error (but fine to try again later with a new
+ scopedconn object for same host).
+*/
+class ScopedConn {
+public:
+ // A flag to keep ScopedConns open when all other sockets are disconnected
+ static const unsigned keepOpen;
+
+ /** throws assertions if connect failure etc. */
+ ScopedConn(const std::string& hostport);
+ ~ScopedConn() {
+ // conLock releases...
+ }
+ void reconnect() {
+ connInfo->cc.reset(new DBClientConnection(true, connInfo->getTimeout()));
+ connInfo->cc->_logLevel = logger::LogSeverity::Debug(2);
+ connInfo->connected = false;
+ connect();
+ }
- void setTimeout(time_t timeout) {
- connInfo->setTimeout(timeout);
- }
+ void setTimeout(time_t timeout) {
+ connInfo->setTimeout(timeout);
+ }
- /* If we were to run a query and not exhaust the cursor, future use of the connection would be problematic.
- So here what we do is wrapper known safe methods and not allow cursor-style queries at all. This makes
- ScopedConn limited in functionality but very safe. More non-cursor wrappers can be added here if needed.
- */
- bool runCommand(const std::string &dbname, const BSONObj& cmd, BSONObj &info, int options=0) {
- return conn()->runCommand(dbname, cmd, info, options);
- }
- unsigned long long count(const std::string &ns) {
- return conn()->count(ns);
- }
- BSONObj findOne(const std::string &ns, const Query& q, const BSONObj *fieldsToReturn = 0, int queryOptions = 0) {
- return conn()->findOne(ns, q, fieldsToReturn, queryOptions);
+ /* If we were to run a query and not exhaust the cursor, future use of the connection would be problematic.
+ So here what we do is wrapper known safe methods and not allow cursor-style queries at all. This makes
+ ScopedConn limited in functionality but very safe. More non-cursor wrappers can be added here if needed.
+ */
+ bool runCommand(const std::string& dbname, const BSONObj& cmd, BSONObj& info, int options = 0) {
+ return conn()->runCommand(dbname, cmd, info, options);
+ }
+ unsigned long long count(const std::string& ns) {
+ return conn()->count(ns);
+ }
+ BSONObj findOne(const std::string& ns,
+ const Query& q,
+ const BSONObj* fieldsToReturn = 0,
+ int queryOptions = 0) {
+ return conn()->findOne(ns, q, fieldsToReturn, queryOptions);
+ }
+
+private:
+ std::auto_ptr<scoped_lock> connLock;
+ static mongo::mutex mapMutex;
+ struct ConnectionInfo {
+ mongo::mutex lock;
+ boost::scoped_ptr<DBClientConnection> cc;
+ bool connected;
+ ConnectionInfo();
+
+ void tagPort() {
+ MessagingPort& mp = cc->port();
+ mp.tag |= ScopedConn::keepOpen;
}
- private:
- std::auto_ptr<scoped_lock> connLock;
- static mongo::mutex mapMutex;
- struct ConnectionInfo {
- mongo::mutex lock;
- boost::scoped_ptr<DBClientConnection> cc;
- bool connected;
- ConnectionInfo();
-
- void tagPort() {
- MessagingPort& mp = cc->port();
- mp.tag |= ScopedConn::keepOpen;
- }
-
- void setTimeout(time_t timeout) {
- _timeout = timeout;
- cc->setSoTimeout(_timeout);
- }
-
- int getTimeout() {
- return _timeout;
- }
-
- private:
- int _timeout;
- } *connInfo;
- typedef std::map<std::string,ScopedConn::ConnectionInfo*> M;
- static M& _map;
- boost::scoped_ptr<DBClientConnection>& conn() { return connInfo->cc; }
- const std::string _hostport;
-
- // we should already be locked...
- bool connect();
-
- };
-
- inline ScopedConn::ScopedConn(const std::string& hostport) : _hostport(hostport) {
- bool first = false;
- {
- scoped_lock lk(mapMutex);
- connInfo = _map[_hostport];
- if( connInfo == 0 ) {
- connInfo = _map[_hostport] = new ConnectionInfo();
- first = true;
- connLock.reset( new scoped_lock(connInfo->lock) );
- }
+ void setTimeout(time_t timeout) {
+ _timeout = timeout;
+ cc->setSoTimeout(_timeout);
}
- // already locked connLock above
- if (first) {
- connect();
- return;
+ int getTimeout() {
+ return _timeout;
}
- connLock.reset( new scoped_lock(connInfo->lock) );
- if (connInfo->connected) {
- return;
+ private:
+ int _timeout;
+ } * connInfo;
+ typedef std::map<std::string, ScopedConn::ConnectionInfo*> M;
+ static M& _map;
+ boost::scoped_ptr<DBClientConnection>& conn() {
+ return connInfo->cc;
+ }
+ const std::string _hostport;
+
+ // we should already be locked...
+ bool connect();
+};
+
+inline ScopedConn::ScopedConn(const std::string& hostport) : _hostport(hostport) {
+ bool first = false;
+ {
+ scoped_lock lk(mapMutex);
+ connInfo = _map[_hostport];
+ if (connInfo == 0) {
+ connInfo = _map[_hostport] = new ConnectionInfo();
+ first = true;
+ connLock.reset(new scoped_lock(connInfo->lock));
}
+ }
- // Keep trying to connect if we're not yet connected
+ // already locked connLock above
+ if (first) {
connect();
+ return;
}
-} // namespace repl
-} // namespace mongo
+
+ connLock.reset(new scoped_lock(connInfo->lock));
+ if (connInfo->connected) {
+ return;
+ }
+
+ // Keep trying to connect if we're not yet connected
+ connect();
+}
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/server.h b/src/mongo/db/repl/server.h
index 9216c0bc87d..d376bc7faa5 100644
--- a/src/mongo/db/repl/server.h
+++ b/src/mongo/db/repl/server.h
@@ -38,39 +38,43 @@
namespace mongo {
- namespace task {
+namespace task {
- typedef stdx::function<void()> lam;
+typedef stdx::function<void()> lam;
- /** typical usage is: task::fork( new Server("threadname") ); */
- class Server : public Task {
- public:
- /** send a message to the port */
- void send(lam);
+/** typical usage is: task::fork( new Server("threadname") ); */
+class Server : public Task {
+public:
+ /** send a message to the port */
+ void send(lam);
- Server(const std::string& name) : m("server"), _name(name), rq(false) { }
- virtual ~Server() { }
+ Server(const std::string& name) : m("server"), _name(name), rq(false) {}
+ virtual ~Server() {}
- /** send message but block until function completes */
- void call(const lam&);
+ /** send message but block until function completes */
+ void call(const lam&);
- void requeue() { rq = true; }
-
- protected:
- // REMINDER : for use in mongod, you will want to have this call Client::initThread().
- virtual void starting() { }
+ void requeue() {
+ rq = true;
+ }
- private:
- virtual bool initClient() { return true; }
- virtual std::string name() const { return _name; }
- void doWork();
- std::deque<lam> d;
- mongo::mutex m;
- boost::condition c;
- std::string _name;
- bool rq;
- };
+protected:
+ // REMINDER : for use in mongod, you will want to have this call Client::initThread().
+ virtual void starting() {}
+private:
+ virtual bool initClient() {
+ return true;
}
-
+ virtual std::string name() const {
+ return _name;
+ }
+ void doWork();
+ std::deque<lam> d;
+ mongo::mutex m;
+ boost::condition c;
+ std::string _name;
+ bool rq;
+};
+}
}
diff --git a/src/mongo/db/repl/sync.cpp b/src/mongo/db/repl/sync.cpp
index bc7d86c8d31..d77827bc57c 100644
--- a/src/mongo/db/repl/sync.cpp
+++ b/src/mongo/db/repl/sync.cpp
@@ -48,109 +48,103 @@
namespace mongo {
- using std::endl;
- using std::string;
+using std::endl;
+using std::string;
namespace repl {
- void Sync::setHostname(const string& hostname) {
- hn = hostname;
- }
+void Sync::setHostname(const string& hostname) {
+ hn = hostname;
+}
- BSONObj Sync::getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o) {
- OplogReader missingObjReader; // why are we using OplogReader to run a non-oplog query?
- const char *ns = o.getStringField("ns");
+BSONObj Sync::getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o) {
+ OplogReader missingObjReader; // why are we using OplogReader to run a non-oplog query?
+ const char* ns = o.getStringField("ns");
- // capped collections
- Collection* collection = db->getCollection(ns);
- if ( collection && collection->isCapped() ) {
- log() << "replication missing doc, but this is okay for a capped collection (" << ns << ")" << endl;
- return BSONObj();
- }
+ // capped collections
+ Collection* collection = db->getCollection(ns);
+ if (collection && collection->isCapped()) {
+ log() << "replication missing doc, but this is okay for a capped collection (" << ns << ")"
+ << endl;
+ return BSONObj();
+ }
- const int retryMax = 3;
- for (int retryCount = 1; retryCount <= retryMax; ++retryCount) {
- if (retryCount != 1) {
- // if we are retrying, sleep a bit to let the network possibly recover
- sleepsecs(retryCount * retryCount);
- }
- try {
- bool ok = missingObjReader.connect(HostAndPort(hn));
- if (!ok) {
- warning() << "network problem detected while connecting to the "
- << "sync source, attempt " << retryCount << " of "
- << retryMax << endl;
- continue; // try again
- }
- }
- catch (const SocketException&) {
+ const int retryMax = 3;
+ for (int retryCount = 1; retryCount <= retryMax; ++retryCount) {
+ if (retryCount != 1) {
+ // if we are retrying, sleep a bit to let the network possibly recover
+ sleepsecs(retryCount * retryCount);
+ }
+ try {
+ bool ok = missingObjReader.connect(HostAndPort(hn));
+ if (!ok) {
warning() << "network problem detected while connecting to the "
- << "sync source, attempt " << retryCount << " of "
- << retryMax << endl;
- continue; // try again
- }
-
- // might be more than just _id in the update criteria
- BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj();
- BSONObj missingObj;
- try {
- missingObj = missingObjReader.findOne(ns, query);
- }
- catch (const SocketException&) {
- warning() << "network problem detected while fetching a missing document from the "
- << "sync source, attempt " << retryCount << " of "
- << retryMax << endl;
- continue; // try again
- }
- catch (DBException& e) {
- log() << "replication assertion fetching missing object: " << e.what() << endl;
- throw;
+ << "sync source, attempt " << retryCount << " of " << retryMax << endl;
+ continue; // try again
}
+ } catch (const SocketException&) {
+ warning() << "network problem detected while connecting to the "
+ << "sync source, attempt " << retryCount << " of " << retryMax << endl;
+ continue; // try again
+ }
- // success!
- return missingObj;
+ // might be more than just _id in the update criteria
+ BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj();
+ BSONObj missingObj;
+ try {
+ missingObj = missingObjReader.findOne(ns, query);
+ } catch (const SocketException&) {
+ warning() << "network problem detected while fetching a missing document from the "
+ << "sync source, attempt " << retryCount << " of " << retryMax << endl;
+ continue; // try again
+ } catch (DBException& e) {
+ log() << "replication assertion fetching missing object: " << e.what() << endl;
+ throw;
}
- // retry count exceeded
- msgasserted(15916,
- str::stream() << "Can no longer connect to initial sync source: " << hn);
- }
- bool Sync::shouldRetry(OperationContext* txn, const BSONObj& o) {
- const NamespaceString nss(o.getStringField("ns"));
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- // Take an X lock on the database in order to preclude other modifications.
- // Also, the database might not exist yet, so create it.
- AutoGetOrCreateDb autoDb(txn, nss.db(), MODE_X);
- Database* const db = autoDb.getDb();
-
- // we don't have the object yet, which is possible on initial sync. get it.
- log() << "adding missing object" << endl; // rare enough we can log
- BSONObj missingObj = getMissingDoc(txn, db, o);
-
- if( missingObj.isEmpty() ) {
- log() << "missing object not found on source."
- " presumably deleted later in oplog";
- log() << "o2: " << o.getObjectField("o2").toString();
- log() << "o firstfield: " << o.getObjectField("o").firstElementFieldName();
- return false;
- }
- else {
- WriteUnitOfWork wunit(txn);
-
- Collection* const coll = db->getOrCreateCollection(txn, nss.toString());
- invariant(coll);
-
- StatusWith<RecordId> result = coll->insertDocument(txn, missingObj, true);
- uassert(15917,
- str::stream() << "failed to insert missing doc: "
- << result.getStatus().toString(),
- result.isOK() );
- LOG(1) << "inserted missing doc: " << missingObj.toString() << endl;
- wunit.commit();
- return true;
- }
- } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "InsertRetry", nss.ns());
+ // success!
+ return missingObj;
+ }
+ // retry count exceeded
+ msgasserted(15916, str::stream() << "Can no longer connect to initial sync source: " << hn);
+}
+
+bool Sync::shouldRetry(OperationContext* txn, const BSONObj& o) {
+ const NamespaceString nss(o.getStringField("ns"));
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ // Take an X lock on the database in order to preclude other modifications.
+ // Also, the database might not exist yet, so create it.
+ AutoGetOrCreateDb autoDb(txn, nss.db(), MODE_X);
+ Database* const db = autoDb.getDb();
+
+ // we don't have the object yet, which is possible on initial sync. get it.
+ log() << "adding missing object" << endl; // rare enough we can log
+ BSONObj missingObj = getMissingDoc(txn, db, o);
+
+ if (missingObj.isEmpty()) {
+ log() << "missing object not found on source."
+ " presumably deleted later in oplog";
+ log() << "o2: " << o.getObjectField("o2").toString();
+ log() << "o firstfield: " << o.getObjectField("o").firstElementFieldName();
+ return false;
+ } else {
+ WriteUnitOfWork wunit(txn);
+
+ Collection* const coll = db->getOrCreateCollection(txn, nss.toString());
+ invariant(coll);
+
+ StatusWith<RecordId> result = coll->insertDocument(txn, missingObj, true);
+ uassert(
+ 15917,
+ str::stream() << "failed to insert missing doc: " << result.getStatus().toString(),
+ result.isOK());
+ LOG(1) << "inserted missing doc: " << missingObj.toString() << endl;
+ wunit.commit();
+ return true;
+ }
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "InsertRetry", nss.ns());
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/sync.h b/src/mongo/db/repl/sync.h
index cdda55f4f13..0788288429d 100644
--- a/src/mongo/db/repl/sync.h
+++ b/src/mongo/db/repl/sync.h
@@ -33,25 +33,26 @@
#include "mongo/db/jsobj.h"
namespace mongo {
- class Database;
- class OperationContext;
+class Database;
+class OperationContext;
namespace repl {
- class Sync {
- protected:
- std::string hn;
- public:
- Sync(const std::string& hostname) : hn(hostname) {}
- virtual ~Sync() {}
- virtual BSONObj getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o);
-
- /**
- * If applyOperation_inlock should be called again after an update fails.
- */
- virtual bool shouldRetry(OperationContext* txn, const BSONObj& o);
- void setHostname(const std::string& hostname);
- };
-
-} // namespace repl
-} // namespace mongo
+class Sync {
+protected:
+ std::string hn;
+
+public:
+ Sync(const std::string& hostname) : hn(hostname) {}
+ virtual ~Sync() {}
+ virtual BSONObj getMissingDoc(OperationContext* txn, Database* db, const BSONObj& o);
+
+ /**
+ * If applyOperation_inlock should be called again after an update fails.
+ */
+ virtual bool shouldRetry(OperationContext* txn, const BSONObj& o);
+ void setHostname(const std::string& hostname);
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/sync_source_feedback.cpp b/src/mongo/db/repl/sync_source_feedback.cpp
index c70f101904e..1a1057b8fae 100644
--- a/src/mongo/db/repl/sync_source_feedback.cpp
+++ b/src/mongo/db/repl/sync_source_feedback.cpp
@@ -50,270 +50,261 @@
namespace mongo {
- using std::endl;
- using std::string;
+using std::endl;
+using std::string;
namespace repl {
- // used in replAuthenticate
- static const BSONObj userReplQuery = fromjson("{\"user\":\"repl\"}");
+// used in replAuthenticate
+static const BSONObj userReplQuery = fromjson("{\"user\":\"repl\"}");
- SyncSourceFeedback::SyncSourceFeedback() : _positionChanged(false),
- _handshakeNeeded(false),
- _shutdownSignaled(false) {}
- SyncSourceFeedback::~SyncSourceFeedback() {}
+SyncSourceFeedback::SyncSourceFeedback()
+ : _positionChanged(false), _handshakeNeeded(false), _shutdownSignaled(false) {}
+SyncSourceFeedback::~SyncSourceFeedback() {}
- void SyncSourceFeedback::_resetConnection() {
- LOG(1) << "resetting connection in sync source feedback";
- _connection.reset();
- }
-
- bool SyncSourceFeedback::replAuthenticate() {
- if (!getGlobalAuthorizationManager()->isAuthEnabled())
- return true;
-
- if (!isInternalAuthSet())
- return false;
- return authenticateInternalUser(_connection.get());
- }
-
- void SyncSourceFeedback::ensureMe(OperationContext* txn) {
- string myname = getHostName();
- {
- ScopedTransaction transaction(txn, MODE_IX);
- Lock::DBLock dlk(txn->lockState(), "local", MODE_X);
- Client::Context ctx(txn, "local");
-
- // local.me is an identifier for a server for getLastError w:2+
- if (!Helpers::getSingleton(txn, "local.me", _me) ||
- !_me.hasField("host") ||
- _me["host"].String() != myname) {
-
- WriteUnitOfWork wunit(txn);
+void SyncSourceFeedback::_resetConnection() {
+ LOG(1) << "resetting connection in sync source feedback";
+ _connection.reset();
+}
- // clean out local.me
- Helpers::emptyCollection(txn, "local.me");
-
- // repopulate
- BSONObjBuilder b;
- b.appendOID("_id", 0, true);
- b.append("host", myname);
- _me = b.obj();
- Helpers::putSingleton(txn, "local.me", _me);
+bool SyncSourceFeedback::replAuthenticate() {
+ if (!getGlobalAuthorizationManager()->isAuthEnabled())
+ return true;
- wunit.commit();
- }
- // _me is used outside of a read lock, so we must copy it out of the mmap
- _me = _me.getOwned();
+ if (!isInternalAuthSet())
+ return false;
+ return authenticateInternalUser(_connection.get());
+}
+
+void SyncSourceFeedback::ensureMe(OperationContext* txn) {
+ string myname = getHostName();
+ {
+ ScopedTransaction transaction(txn, MODE_IX);
+ Lock::DBLock dlk(txn->lockState(), "local", MODE_X);
+ Client::Context ctx(txn, "local");
+
+ // local.me is an identifier for a server for getLastError w:2+
+ if (!Helpers::getSingleton(txn, "local.me", _me) || !_me.hasField("host") ||
+ _me["host"].String() != myname) {
+ WriteUnitOfWork wunit(txn);
+
+ // clean out local.me
+ Helpers::emptyCollection(txn, "local.me");
+
+ // repopulate
+ BSONObjBuilder b;
+ b.appendOID("_id", 0, true);
+ b.append("host", myname);
+ _me = b.obj();
+ Helpers::putSingleton(txn, "local.me", _me);
+
+ wunit.commit();
}
+ // _me is used outside of a read lock, so we must copy it out of the mmap
+ _me = _me.getOwned();
}
+}
- bool SyncSourceFeedback::replHandshake(OperationContext* txn) {
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (replCoord->getMemberState().primary()) {
- // primary has no one to handshake to
- return true;
- }
- // construct a vector of handshake obj for us as well as all chained members
- std::vector<BSONObj> handshakeObjs;
- replCoord->prepareReplSetUpdatePositionCommandHandshakes(&handshakeObjs);
- LOG(1) << "handshaking upstream updater";
- for (std::vector<BSONObj>::iterator it = handshakeObjs.begin();
- it != handshakeObjs.end();
- ++it) {
- BSONObj res;
- try {
- LOG(2) << "Sending to " << _connection.get()->toString() << " the replication "
- "handshake: " << *it;
- if (!_connection->runCommand("admin", *it, res)) {
- std::string errMsg = res["errmsg"].valuestrsafe();
- massert(17447, "upstream updater is not supported by the member from which we"
- " are syncing, please update all nodes to 2.6 or later.",
- errMsg.find("no such cmd") == std::string::npos);
-
- log() << "replSet error while handshaking the upstream updater: "
- << errMsg;
-
- // sleep half a second if we are not in our sync source's config
- // TODO(dannenberg) after 3.0, remove the string comparison
- if (res["code"].numberInt() == ErrorCodes::NodeNotFound ||
- errMsg.find("could not be found in replica set config while attempting "
- "to associate it with") != std::string::npos) {
-
- // black list sync target for 10 seconds and find a new one
- replCoord->blacklistSyncSource(_syncTarget,
- Date_t(curTimeMillis64() + 10*1000));
- BackgroundSync::get()->clearSyncTarget();
- }
-
- _resetConnection();
- return false;
- }
- }
- catch (const DBException& e) {
- log() << "SyncSourceFeedback error sending handshake: " << e.what() << endl;
- _resetConnection();
- return false;
- }
- }
+bool SyncSourceFeedback::replHandshake(OperationContext* txn) {
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (replCoord->getMemberState().primary()) {
+ // primary has no one to handshake to
return true;
}
-
- bool SyncSourceFeedback::_connect(OperationContext* txn, const HostAndPort& host) {
- if (hasConnection()) {
- return true;
- }
- log() << "replset setting syncSourceFeedback to " << host.toString();
- _connection.reset(new DBClientConnection(false, OplogReader::tcp_timeout));
- string errmsg;
+ // construct a vector of handshake obj for us as well as all chained members
+ std::vector<BSONObj> handshakeObjs;
+ replCoord->prepareReplSetUpdatePositionCommandHandshakes(&handshakeObjs);
+ LOG(1) << "handshaking upstream updater";
+ for (std::vector<BSONObj>::iterator it = handshakeObjs.begin(); it != handshakeObjs.end();
+ ++it) {
+ BSONObj res;
try {
- if (!_connection->connect(host, errmsg) ||
- (getGlobalAuthorizationManager()->isAuthEnabled() && !replAuthenticate())) {
+ LOG(2) << "Sending to " << _connection.get()->toString() << " the replication "
+ "handshake: " << *it;
+ if (!_connection->runCommand("admin", *it, res)) {
+ std::string errMsg = res["errmsg"].valuestrsafe();
+ massert(17447,
+ "upstream updater is not supported by the member from which we"
+ " are syncing, please update all nodes to 2.6 or later.",
+ errMsg.find("no such cmd") == std::string::npos);
+
+ log() << "replSet error while handshaking the upstream updater: " << errMsg;
+
+ // sleep half a second if we are not in our sync source's config
+ // TODO(dannenberg) after 3.0, remove the string comparison
+ if (res["code"].numberInt() == ErrorCodes::NodeNotFound ||
+ errMsg.find(
+ "could not be found in replica set config while attempting "
+ "to associate it with") != std::string::npos) {
+ // black list sync target for 10 seconds and find a new one
+ replCoord->blacklistSyncSource(_syncTarget,
+ Date_t(curTimeMillis64() + 10 * 1000));
+ BackgroundSync::get()->clearSyncTarget();
+ }
+
_resetConnection();
- log() << "repl: " << errmsg << endl;
return false;
}
- }
- catch (const DBException& e) {
- log() << "Error connecting to " << host.toString() << ": " << e.what();
+ } catch (const DBException& e) {
+ log() << "SyncSourceFeedback error sending handshake: " << e.what() << endl;
_resetConnection();
return false;
}
-
- return hasConnection();
}
+ return true;
+}
- void SyncSourceFeedback::forwardSlaveHandshake() {
- boost::unique_lock<boost::mutex> lock(_mtx);
- _handshakeNeeded = true;
- _cond.notify_all();
+bool SyncSourceFeedback::_connect(OperationContext* txn, const HostAndPort& host) {
+ if (hasConnection()) {
+ return true;
}
-
- void SyncSourceFeedback::forwardSlaveProgress() {
- boost::unique_lock<boost::mutex> lock(_mtx);
- _positionChanged = true;
- _cond.notify_all();
+ log() << "replset setting syncSourceFeedback to " << host.toString();
+ _connection.reset(new DBClientConnection(false, OplogReader::tcp_timeout));
+ string errmsg;
+ try {
+ if (!_connection->connect(host, errmsg) ||
+ (getGlobalAuthorizationManager()->isAuthEnabled() && !replAuthenticate())) {
+ _resetConnection();
+ log() << "repl: " << errmsg << endl;
+ return false;
+ }
+ } catch (const DBException& e) {
+ log() << "Error connecting to " << host.toString() << ": " << e.what();
+ _resetConnection();
+ return false;
}
- Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (replCoord->getMemberState().primary()) {
- // primary has no one to update to
+ return hasConnection();
+}
+
+void SyncSourceFeedback::forwardSlaveHandshake() {
+ boost::unique_lock<boost::mutex> lock(_mtx);
+ _handshakeNeeded = true;
+ _cond.notify_all();
+}
+
+void SyncSourceFeedback::forwardSlaveProgress() {
+ boost::unique_lock<boost::mutex> lock(_mtx);
+ _positionChanged = true;
+ _cond.notify_all();
+}
+
+Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (replCoord->getMemberState().primary()) {
+ // primary has no one to update to
+ return Status::OK();
+ }
+ BSONObjBuilder cmd;
+ {
+ boost::unique_lock<boost::mutex> lock(_mtx);
+ if (_handshakeNeeded) {
+ // Don't send updates if there are nodes that haven't yet been handshaked
+ return Status(ErrorCodes::NodeNotFound,
+ "Need to send handshake before updating position upstream");
+ }
+ // the command could not be created, likely because the node was removed from the set
+ if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) {
return Status::OK();
}
- BSONObjBuilder cmd;
+ }
+ BSONObj res;
+
+ LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
+ try {
+ _connection->runCommand("admin", cmd.obj(), res);
+ } catch (const DBException& e) {
+ log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
+ // blacklist sync target for .5 seconds and find a new one
+ replCoord->blacklistSyncSource(_syncTarget, Date_t(curTimeMillis64() + 500));
+ BackgroundSync::get()->clearSyncTarget();
+ _resetConnection();
+ return e.toStatus();
+ }
+
+ Status status = Command::getStatusFromCommandResult(res);
+ if (!status.isOK()) {
+ log() << "SyncSourceFeedback error sending update, response: " << res.toString() << endl;
+ // blacklist sync target for .5 seconds and find a new one
+ replCoord->blacklistSyncSource(_syncTarget, Date_t(curTimeMillis64() + 500));
+ BackgroundSync::get()->clearSyncTarget();
+ _resetConnection();
+ }
+ return status;
+}
+
+void SyncSourceFeedback::shutdown() {
+ boost::unique_lock<boost::mutex> lock(_mtx);
+ _shutdownSignaled = true;
+ _cond.notify_all();
+}
+
+void SyncSourceFeedback::run() {
+ Client::initThread("SyncSourceFeedback");
+ OperationContextImpl txn;
+
+ bool positionChanged = false;
+ bool handshakeNeeded = false;
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ while (!inShutdown()) { // TODO(spencer): Remove once legacy repl coordinator is gone.
{
boost::unique_lock<boost::mutex> lock(_mtx);
- if (_handshakeNeeded) {
- // Don't send updates if there are nodes that haven't yet been handshaked
- return Status(ErrorCodes::NodeNotFound,
- "Need to send handshake before updating position upstream");
+ while (!_positionChanged && !_handshakeNeeded && !_shutdownSignaled) {
+ _cond.wait(lock);
}
- // the command could not be created, likely because the node was removed from the set
- if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) {
- return Status::OK();
+
+ if (_shutdownSignaled) {
+ break;
}
- }
- BSONObj res;
- LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
- try {
- _connection->runCommand("admin", cmd.obj(), res);
+ positionChanged = _positionChanged;
+ handshakeNeeded = _handshakeNeeded;
+ _positionChanged = false;
+ _handshakeNeeded = false;
}
- catch (const DBException& e) {
- log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
- // blacklist sync target for .5 seconds and find a new one
- replCoord->blacklistSyncSource(_syncTarget,
- Date_t(curTimeMillis64() + 500));
- BackgroundSync::get()->clearSyncTarget();
+
+ MemberState state = replCoord->getMemberState();
+ if (state.primary() || state.startup()) {
_resetConnection();
- return e.toStatus();
+ continue;
}
-
- Status status = Command::getStatusFromCommandResult(res);
- if (!status.isOK()) {
- log() << "SyncSourceFeedback error sending update, response: " << res.toString() <<endl;
- // blacklist sync target for .5 seconds and find a new one
- replCoord->blacklistSyncSource(_syncTarget,
- Date_t(curTimeMillis64() + 500));
- BackgroundSync::get()->clearSyncTarget();
+ const HostAndPort target = BackgroundSync::get()->getSyncTarget();
+ if (_syncTarget != target) {
_resetConnection();
+ _syncTarget = target;
}
- return status;
- }
-
- void SyncSourceFeedback::shutdown() {
- boost::unique_lock<boost::mutex> lock(_mtx);
- _shutdownSignaled = true;
- _cond.notify_all();
- }
-
- void SyncSourceFeedback::run() {
- Client::initThread("SyncSourceFeedback");
- OperationContextImpl txn;
-
- bool positionChanged = false;
- bool handshakeNeeded = false;
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- while (!inShutdown()) { // TODO(spencer): Remove once legacy repl coordinator is gone.
- {
- boost::unique_lock<boost::mutex> lock(_mtx);
- while (!_positionChanged && !_handshakeNeeded && !_shutdownSignaled) {
- _cond.wait(lock);
- }
-
- if (_shutdownSignaled) {
- break;
- }
-
- positionChanged = _positionChanged;
- handshakeNeeded = _handshakeNeeded;
- _positionChanged = false;
- _handshakeNeeded = false;
- }
-
- MemberState state = replCoord->getMemberState();
- if (state.primary() || state.startup()) {
- _resetConnection();
+ if (!hasConnection()) {
+ // fix connection if need be
+ if (target.empty()) {
+ sleepmillis(500);
continue;
}
- const HostAndPort target = BackgroundSync::get()->getSyncTarget();
- if (_syncTarget != target) {
- _resetConnection();
- _syncTarget = target;
+ if (!_connect(&txn, target)) {
+ sleepmillis(500);
+ continue;
}
- if (!hasConnection()) {
- // fix connection if need be
- if (target.empty()) {
- sleepmillis(500);
- continue;
- }
- if (!_connect(&txn, target)) {
- sleepmillis(500);
- continue;
- }
- handshakeNeeded = true;
+ handshakeNeeded = true;
+ }
+ if (handshakeNeeded) {
+ positionChanged = true;
+ if (!replHandshake(&txn)) {
+ boost::unique_lock<boost::mutex> lock(_mtx);
+ _handshakeNeeded = true;
+ continue;
}
- if (handshakeNeeded) {
- positionChanged = true;
- if (!replHandshake(&txn)) {
- boost::unique_lock<boost::mutex> lock(_mtx);
+ }
+ if (positionChanged) {
+ Status status = updateUpstream(&txn);
+ if (!status.isOK()) {
+ boost::unique_lock<boost::mutex> lock(_mtx);
+ _positionChanged = true;
+ if (status == ErrorCodes::NodeNotFound) {
_handshakeNeeded = true;
- continue;
- }
- }
- if (positionChanged) {
- Status status = updateUpstream(&txn);
- if (!status.isOK()) {
- boost::unique_lock<boost::mutex> lock(_mtx);
- _positionChanged = true;
- if (status == ErrorCodes::NodeNotFound) {
- _handshakeNeeded = true;
- }
}
}
}
- cc().shutdown();
}
-} // namespace repl
-} // namespace mongo
+ cc().shutdown();
+}
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/sync_source_feedback.h b/src/mongo/db/repl/sync_source_feedback.h
index 0f6d24cb1a0..fce865494a2 100644
--- a/src/mongo/db/repl/sync_source_feedback.h
+++ b/src/mongo/db/repl/sync_source_feedback.h
@@ -38,81 +38,81 @@
#include "mongo/util/net/hostandport.h"
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace repl {
- class SyncSourceFeedback {
- public:
- SyncSourceFeedback();
- ~SyncSourceFeedback();
-
- /// Ensures local.me is populated and populates it if not.
- /// TODO(spencer): Remove this function once the LegacyReplicationCoordinator is gone.
- void ensureMe(OperationContext* txn);
-
- /// Notifies the SyncSourceFeedbackThread to wake up and send a handshake up the replication
- /// chain, upon receiving a handshake.
- void forwardSlaveHandshake();
-
- /// Notifies the SyncSourceFeedbackThread to wake up and send an update upstream of slave
- /// replication progress.
- void forwardSlaveProgress();
-
- /// Loops continuously until shutdown() is called, passing updates when they are present.
- /// TODO(spencer): Currently also can terminate when the global inShutdown() function
- /// returns true. Remove that once the legacy repl coordinator is gone.
- void run();
-
- /// Signals the run() method to terminate.
- void shutdown();
-
- private:
- void _resetConnection();
-
- /**
- * Authenticates _connection using the server's cluster-membership credentials.
- *
- * Returns true on successful authentication.
- */
- bool replAuthenticate();
-
- /* Sends initialization information to our sync target, also determines whether or not they
- * support the updater command.
- */
- bool replHandshake(OperationContext* txn);
-
- /* Inform the sync target of our current position in the oplog, as well as the positions
- * of all secondaries chained through us.
- * ErrorCodes::NodeNotFound indicates that the caller should re-run replHandshake before
- * calling this again.
- */
- Status updateUpstream(OperationContext* txn);
-
- bool hasConnection() {
- return _connection.get();
- }
-
- /// Connect to sync target.
- bool _connect(OperationContext* txn, const HostAndPort& host);
-
- // stores our OID to be passed along in commands
- /// TODO(spencer): Remove this once the LegacyReplicationCoordinator is gone.
- BSONObj _me;
- // the member we are currently syncing from
- HostAndPort _syncTarget;
- // our connection to our sync target
- boost::scoped_ptr<DBClientConnection> _connection;
- // protects cond, _shutdownSignaled, and the indicator bools.
- boost::mutex _mtx;
- // used to alert our thread of changes which need to be passed up the chain
- boost::condition _cond;
- // used to indicate a position change which has not yet been pushed along
- bool _positionChanged;
- // used to indicate a connection change which has not yet been shook on
- bool _handshakeNeeded;
- // Once this is set to true the _run method will terminate
- bool _shutdownSignaled;
- };
-} // namespace repl
-} // namespace mongo
+class SyncSourceFeedback {
+public:
+ SyncSourceFeedback();
+ ~SyncSourceFeedback();
+
+ /// Ensures local.me is populated and populates it if not.
+ /// TODO(spencer): Remove this function once the LegacyReplicationCoordinator is gone.
+ void ensureMe(OperationContext* txn);
+
+ /// Notifies the SyncSourceFeedbackThread to wake up and send a handshake up the replication
+ /// chain, upon receiving a handshake.
+ void forwardSlaveHandshake();
+
+ /// Notifies the SyncSourceFeedbackThread to wake up and send an update upstream of slave
+ /// replication progress.
+ void forwardSlaveProgress();
+
+ /// Loops continuously until shutdown() is called, passing updates when they are present.
+ /// TODO(spencer): Currently also can terminate when the global inShutdown() function
+ /// returns true. Remove that once the legacy repl coordinator is gone.
+ void run();
+
+ /// Signals the run() method to terminate.
+ void shutdown();
+
+private:
+ void _resetConnection();
+
+ /**
+ * Authenticates _connection using the server's cluster-membership credentials.
+ *
+ * Returns true on successful authentication.
+ */
+ bool replAuthenticate();
+
+ /* Sends initialization information to our sync target, also determines whether or not they
+ * support the updater command.
+ */
+ bool replHandshake(OperationContext* txn);
+
+ /* Inform the sync target of our current position in the oplog, as well as the positions
+ * of all secondaries chained through us.
+ * ErrorCodes::NodeNotFound indicates that the caller should re-run replHandshake before
+ * calling this again.
+ */
+ Status updateUpstream(OperationContext* txn);
+
+ bool hasConnection() {
+ return _connection.get();
+ }
+
+ /// Connect to sync target.
+ bool _connect(OperationContext* txn, const HostAndPort& host);
+
+ // stores our OID to be passed along in commands
+ /// TODO(spencer): Remove this once the LegacyReplicationCoordinator is gone.
+ BSONObj _me;
+ // the member we are currently syncing from
+ HostAndPort _syncTarget;
+ // our connection to our sync target
+ boost::scoped_ptr<DBClientConnection> _connection;
+ // protects cond, _shutdownSignaled, and the indicator bools.
+ boost::mutex _mtx;
+ // used to alert our thread of changes which need to be passed up the chain
+ boost::condition _cond;
+ // used to indicate a position change which has not yet been pushed along
+ bool _positionChanged;
+ // used to indicate a connection change which has not yet been shook on
+ bool _handshakeNeeded;
+ // Once this is set to true the _run method will terminate
+ bool _shutdownSignaled;
+};
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index 4b8c8dbfecb..dbde6ca3549 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -59,268 +59,242 @@
namespace mongo {
- using std::endl;
+using std::endl;
namespace repl {
#if defined(MONGO_PLATFORM_64)
- const int replWriterThreadCount = 16;
- const int replPrefetcherThreadCount = 16;
+const int replWriterThreadCount = 16;
+const int replPrefetcherThreadCount = 16;
#elif defined(MONGO_PLATFORM_32)
- const int replWriterThreadCount = 2;
- const int replPrefetcherThreadCount = 2;
+const int replWriterThreadCount = 2;
+const int replPrefetcherThreadCount = 2;
#else
#error need to include something that defines MONGO_PLATFORM_XX
#endif
- static Counter64 opsAppliedStats;
+static Counter64 opsAppliedStats;
- //The oplog entries applied
- static ServerStatusMetricField<Counter64> displayOpsApplied( "repl.apply.ops",
- &opsAppliedStats );
+// The oplog entries applied
+static ServerStatusMetricField<Counter64> displayOpsApplied("repl.apply.ops", &opsAppliedStats);
- MONGO_FP_DECLARE(rsSyncApplyStop);
+MONGO_FP_DECLARE(rsSyncApplyStop);
- // Number and time of each ApplyOps worker pool round
- static TimerStats applyBatchStats;
- static ServerStatusMetricField<TimerStats> displayOpBatchesApplied(
- "repl.apply.batches",
- &applyBatchStats );
- void initializePrefetchThread() {
- if (!ClientBasic::getCurrent()) {
- Client::initThreadIfNotAlready();
- cc().getAuthorizationSession()->grantInternalAuthorization();
- }
+// Number and time of each ApplyOps worker pool round
+static TimerStats applyBatchStats;
+static ServerStatusMetricField<TimerStats> displayOpBatchesApplied("repl.apply.batches",
+ &applyBatchStats);
+void initializePrefetchThread() {
+ if (!ClientBasic::getCurrent()) {
+ Client::initThreadIfNotAlready();
+ cc().getAuthorizationSession()->grantInternalAuthorization();
}
- namespace {
- bool isCrudOpType( const char* field ) {
- switch ( field[0] ) {
- case 'd':
- case 'i':
- case 'u':
- return field[1] == 0;
- }
- return false;
- }
+}
+namespace {
+bool isCrudOpType(const char* field) {
+ switch (field[0]) {
+ case 'd':
+ case 'i':
+ case 'u':
+ return field[1] == 0;
}
+ return false;
+}
+}
- SyncTail::SyncTail(BackgroundSyncInterface *q, MultiSyncApplyFunc func) :
- Sync(""),
- _networkQueue(q),
- _applyFunc(func),
- _writerPool(replWriterThreadCount, "repl writer worker "),
- _prefetcherPool(replPrefetcherThreadCount, "repl prefetch worker ")
- {}
+SyncTail::SyncTail(BackgroundSyncInterface* q, MultiSyncApplyFunc func)
+ : Sync(""),
+ _networkQueue(q),
+ _applyFunc(func),
+ _writerPool(replWriterThreadCount, "repl writer worker "),
+ _prefetcherPool(replPrefetcherThreadCount, "repl prefetch worker ") {}
- SyncTail::~SyncTail() {}
+SyncTail::~SyncTail() {}
- bool SyncTail::peek(BSONObj* op) {
- return _networkQueue->peek(op);
+bool SyncTail::peek(BSONObj* op) {
+ return _networkQueue->peek(op);
+}
+/* apply the log op that is in param o
+ @return bool success (true) or failure (false)
+*/
+bool SyncTail::syncApply(OperationContext* txn, const BSONObj& op, bool convertUpdateToUpsert) {
+ if (inShutdown()) {
+ return true;
}
- /* apply the log op that is in param o
- @return bool success (true) or failure (false)
- */
- bool SyncTail::syncApply(OperationContext* txn,
- const BSONObj &op,
- bool convertUpdateToUpsert) {
- if (inShutdown()) {
- return true;
- }
-
- // Count each log op application as a separate operation, for reporting purposes
- txn->getCurOp()->reset();
+ // Count each log op application as a separate operation, for reporting purposes
+ txn->getCurOp()->reset();
- const char *ns = op.getStringField("ns");
- verify(ns);
+ const char* ns = op.getStringField("ns");
+ verify(ns);
- if ( (*ns == '\0') || (*ns == '.') ) {
- // this is ugly
- // this is often a no-op
- // but can't be 100% sure
- if( *op.getStringField("op") != 'n' ) {
- error() << "replSet skipping bad op in oplog: " << op.toString();
- }
- return true;
+ if ((*ns == '\0') || (*ns == '.')) {
+ // this is ugly
+ // this is often a no-op
+ // but can't be 100% sure
+ if (*op.getStringField("op") != 'n') {
+ error() << "replSet skipping bad op in oplog: " << op.toString();
}
+ return true;
+ }
- const char* opType = op["op"].valuestrsafe();
-
- bool isCommand(opType[0] == 'c');
+ const char* opType = op["op"].valuestrsafe();
- for ( int createCollection = 0; createCollection < 2; createCollection++ ) {
- try {
- boost::scoped_ptr<Lock::GlobalWrite> globalWriteLock;
+ bool isCommand(opType[0] == 'c');
- // DB lock always acquires the global lock
- boost::scoped_ptr<Lock::DBLock> dbLock;
- boost::scoped_ptr<Lock::CollectionLock> collectionLock;
+ for (int createCollection = 0; createCollection < 2; createCollection++) {
+ try {
+ boost::scoped_ptr<Lock::GlobalWrite> globalWriteLock;
- bool isIndexBuild = opType[0] == 'i' &&
- nsToCollectionSubstring( ns ) == "system.indexes";
+ // DB lock always acquires the global lock
+ boost::scoped_ptr<Lock::DBLock> dbLock;
+ boost::scoped_ptr<Lock::CollectionLock> collectionLock;
- if (isCommand) {
- // a command may need a global write lock. so we will conservatively go
- // ahead and grab one here. suboptimal. :-(
- globalWriteLock.reset(new Lock::GlobalWrite(txn->lockState()));
- }
- else if (isIndexBuild) {
- dbLock.reset(new Lock::DBLock(txn->lockState(),
- nsToDatabaseSubstring(ns), MODE_X));
- }
- else if (isCrudOpType(opType)) {
- LockMode mode = createCollection ? MODE_X : MODE_IX;
- dbLock.reset(new Lock::DBLock(txn->lockState(),
- nsToDatabaseSubstring(ns), mode));
- collectionLock.reset(new Lock::CollectionLock(txn->lockState(), ns, mode));
-
- if (!createCollection && !dbHolder().get(txn, nsToDatabaseSubstring(ns))) {
- // need to create database, try again
- continue;
- }
- }
- else {
- // Unknown op?
- dbLock.reset(new Lock::DBLock(txn->lockState(),
- nsToDatabaseSubstring(ns), MODE_X));
- }
+ bool isIndexBuild = opType[0] == 'i' && nsToCollectionSubstring(ns) == "system.indexes";
- Client::Context ctx(txn, ns);
+ if (isCommand) {
+ // a command may need a global write lock. so we will conservatively go
+ // ahead and grab one here. suboptimal. :-(
+ globalWriteLock.reset(new Lock::GlobalWrite(txn->lockState()));
+ } else if (isIndexBuild) {
+ dbLock.reset(new Lock::DBLock(txn->lockState(), nsToDatabaseSubstring(ns), MODE_X));
+ } else if (isCrudOpType(opType)) {
+ LockMode mode = createCollection ? MODE_X : MODE_IX;
+ dbLock.reset(new Lock::DBLock(txn->lockState(), nsToDatabaseSubstring(ns), mode));
+ collectionLock.reset(new Lock::CollectionLock(txn->lockState(), ns, mode));
- if ( createCollection == 0 &&
- !isIndexBuild &&
- isCrudOpType(opType) &&
- ctx.db()->getCollection(ns) == NULL ) {
- // uh, oh, we need to create collection
- // try again
+ if (!createCollection && !dbHolder().get(txn, nsToDatabaseSubstring(ns))) {
+ // need to create database, try again
continue;
}
-
- // For non-initial-sync, we convert updates to upserts
- // to suppress errors when replaying oplog entries.
- bool ok = !applyOperation_inlock(txn, ctx.db(), op, true, convertUpdateToUpsert);
- opsAppliedStats.increment();
- return ok;
+ } else {
+ // Unknown op?
+ dbLock.reset(new Lock::DBLock(txn->lockState(), nsToDatabaseSubstring(ns), MODE_X));
}
- catch (const WriteConflictException&) {
- log() << "WriteConflictException while doing oplog application on: " << ns
- << ", retrying.";
- createCollection--;
+
+ Client::Context ctx(txn, ns);
+
+ if (createCollection == 0 && !isIndexBuild && isCrudOpType(opType) &&
+ ctx.db()->getCollection(ns) == NULL) {
+ // uh, oh, we need to create collection
+ // try again
+ continue;
}
- }
- // Keeps the compiler warnings happy
- invariant(false);
- return false;
+ // For non-initial-sync, we convert updates to upserts
+ // to suppress errors when replaying oplog entries.
+ bool ok = !applyOperation_inlock(txn, ctx.db(), op, true, convertUpdateToUpsert);
+ opsAppliedStats.increment();
+ return ok;
+ } catch (const WriteConflictException&) {
+ log() << "WriteConflictException while doing oplog application on: " << ns
+ << ", retrying.";
+ createCollection--;
+ }
}
- // The pool threads call this to prefetch each op
- void SyncTail::prefetchOp(const BSONObj& op) {
- initializePrefetchThread();
-
- const char *ns = op.getStringField("ns");
- if (ns && (ns[0] != '\0')) {
- try {
- // one possible tweak here would be to stay in the read lock for this database
- // for multiple prefetches if they are for the same database.
- OperationContextImpl txn;
- AutoGetCollectionForRead ctx(&txn, ns);
- Database* db = ctx.getDb();
- if (db) {
- prefetchPagesForReplicatedOp(&txn, db, op);
- }
- }
- catch (const DBException& e) {
- LOG(2) << "ignoring exception in prefetchOp(): " << e.what() << endl;
- }
- catch (const std::exception& e) {
- log() << "Unhandled std::exception in prefetchOp(): " << e.what() << endl;
- fassertFailed(16397);
+ // Keeps the compiler warnings happy
+ invariant(false);
+ return false;
+}
+
+// The pool threads call this to prefetch each op
+void SyncTail::prefetchOp(const BSONObj& op) {
+ initializePrefetchThread();
+
+ const char* ns = op.getStringField("ns");
+ if (ns && (ns[0] != '\0')) {
+ try {
+ // one possible tweak here would be to stay in the read lock for this database
+ // for multiple prefetches if they are for the same database.
+ OperationContextImpl txn;
+ AutoGetCollectionForRead ctx(&txn, ns);
+ Database* db = ctx.getDb();
+ if (db) {
+ prefetchPagesForReplicatedOp(&txn, db, op);
}
+ } catch (const DBException& e) {
+ LOG(2) << "ignoring exception in prefetchOp(): " << e.what() << endl;
+ } catch (const std::exception& e) {
+ log() << "Unhandled std::exception in prefetchOp(): " << e.what() << endl;
+ fassertFailed(16397);
}
}
+}
- // Doles out all the work to the reader pool threads and waits for them to complete
- void SyncTail::prefetchOps(const std::deque<BSONObj>& ops) {
- for (std::deque<BSONObj>::const_iterator it = ops.begin();
- it != ops.end();
- ++it) {
- _prefetcherPool.schedule(&prefetchOp, *it);
- }
- _prefetcherPool.join();
+// Doles out all the work to the reader pool threads and waits for them to complete
+void SyncTail::prefetchOps(const std::deque<BSONObj>& ops) {
+ for (std::deque<BSONObj>::const_iterator it = ops.begin(); it != ops.end(); ++it) {
+ _prefetcherPool.schedule(&prefetchOp, *it);
}
-
- // Doles out all the work to the writer pool threads and waits for them to complete
- void SyncTail::applyOps(const std::vector< std::vector<BSONObj> >& writerVectors) {
- TimerHolder timer(&applyBatchStats);
- for (std::vector< std::vector<BSONObj> >::const_iterator it = writerVectors.begin();
- it != writerVectors.end();
- ++it) {
- if (!it->empty()) {
- _writerPool.schedule(_applyFunc, boost::cref(*it), this);
- }
+ _prefetcherPool.join();
+}
+
+// Doles out all the work to the writer pool threads and waits for them to complete
+void SyncTail::applyOps(const std::vector<std::vector<BSONObj>>& writerVectors) {
+ TimerHolder timer(&applyBatchStats);
+ for (std::vector<std::vector<BSONObj>>::const_iterator it = writerVectors.begin();
+ it != writerVectors.end();
+ ++it) {
+ if (!it->empty()) {
+ _writerPool.schedule(_applyFunc, boost::cref(*it), this);
}
- _writerPool.join();
}
+ _writerPool.join();
+}
- // Doles out all the work to the writer pool threads and waits for them to complete
- OpTime SyncTail::multiApply(OperationContext* txn, std::deque<BSONObj>& ops) {
+// Doles out all the work to the writer pool threads and waits for them to complete
+OpTime SyncTail::multiApply(OperationContext* txn, std::deque<BSONObj>& ops) {
+ if (getGlobalEnvironment()->getGlobalStorageEngine()->isMmapV1()) {
+ // Use a ThreadPool to prefetch all the operations in a batch.
+ prefetchOps(ops);
+ }
- if (getGlobalEnvironment()->getGlobalStorageEngine()->isMmapV1()) {
- // Use a ThreadPool to prefetch all the operations in a batch.
- prefetchOps(ops);
- }
-
- std::vector< std::vector<BSONObj> > writerVectors(replWriterThreadCount);
- fillWriterVectors(ops, &writerVectors);
- LOG(2) << "replication batch size is " << ops.size() << endl;
- // We must grab this because we're going to grab write locks later.
- // We hold this mutex the entire time we're writing; it doesn't matter
- // because all readers are blocked anyway.
- SimpleMutex::scoped_lock fsynclk(filesLockedFsync);
-
- // stop all readers until we're done
- Lock::ParallelBatchWriterMode pbwm(txn->lockState());
-
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- if (replCoord->getMemberState().primary() &&
- !replCoord->isWaitingForApplierToDrain()) {
-
- severe() << "attempting to replicate ops while primary";
- fassertFailed(28527);
- }
+ std::vector<std::vector<BSONObj>> writerVectors(replWriterThreadCount);
+ fillWriterVectors(ops, &writerVectors);
+ LOG(2) << "replication batch size is " << ops.size() << endl;
+ // We must grab this because we're going to grab write locks later.
+ // We hold this mutex the entire time we're writing; it doesn't matter
+ // because all readers are blocked anyway.
+ SimpleMutex::scoped_lock fsynclk(filesLockedFsync);
+
+ // stop all readers until we're done
+ Lock::ParallelBatchWriterMode pbwm(txn->lockState());
+
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ if (replCoord->getMemberState().primary() && !replCoord->isWaitingForApplierToDrain()) {
+ severe() << "attempting to replicate ops while primary";
+ fassertFailed(28527);
+ }
- applyOps(writerVectors);
+ applyOps(writerVectors);
- if (inShutdown()) {
- return OpTime();
- }
+ if (inShutdown()) {
+ return OpTime();
+ }
- OpTime lastOpTime = writeOpsToOplog(txn, ops);
+ OpTime lastOpTime = writeOpsToOplog(txn, ops);
- BackgroundSync::get()->notify(txn);
+ BackgroundSync::get()->notify(txn);
- return lastOpTime;
- }
+ return lastOpTime;
+}
- void SyncTail::fillWriterVectors(const std::deque<BSONObj>& ops,
- std::vector< std::vector<BSONObj> >* writerVectors) {
-
- for (std::deque<BSONObj>::const_iterator it = ops.begin();
- it != ops.end();
- ++it) {
- const BSONElement e = it->getField("ns");
- verify(e.type() == String);
- const char* ns = e.valuestr();
- int len = e.valuestrsize();
- uint32_t hash = 0;
- MurmurHash3_x86_32( ns, len, 0, &hash);
-
- const char* opType = it->getField( "op" ).valuestrsafe();
-
- if (getGlobalEnvironment()->getGlobalStorageEngine()->supportsDocLocking() &&
- isCrudOpType(opType)) {
- BSONElement id;
- switch (opType[0]) {
+void SyncTail::fillWriterVectors(const std::deque<BSONObj>& ops,
+ std::vector<std::vector<BSONObj>>* writerVectors) {
+ for (std::deque<BSONObj>::const_iterator it = ops.begin(); it != ops.end(); ++it) {
+ const BSONElement e = it->getField("ns");
+ verify(e.type() == String);
+ const char* ns = e.valuestr();
+ int len = e.valuestrsize();
+ uint32_t hash = 0;
+ MurmurHash3_x86_32(ns, len, 0, &hash);
+
+ const char* opType = it->getField("op").valuestrsafe();
+
+ if (getGlobalEnvironment()->getGlobalStorageEngine()->supportsDocLocking() &&
+ isCrudOpType(opType)) {
+ BSONElement id;
+ switch (opType[0]) {
case 'u':
id = it->getField("o2").Obj()["_id"];
break;
@@ -328,381 +302,373 @@ namespace repl {
case 'i':
id = it->getField("o").Obj()["_id"];
break;
- }
-
- const size_t idHash = BSONElement::Hasher()( id );
- MurmurHash3_x86_32(&idHash, sizeof(idHash), hash, &hash);
}
- (*writerVectors)[hash % writerVectors->size()].push_back(*it);
+ const size_t idHash = BSONElement::Hasher()(id);
+ MurmurHash3_x86_32(&idHash, sizeof(idHash), hash, &hash);
}
- }
- void SyncTail::oplogApplication(OperationContext* txn, const OpTime& endOpTime) {
- _applyOplogUntil(txn, endOpTime);
- }
- /* applies oplog from "now" until endOpTime using the applier threads for initial sync*/
- void SyncTail::_applyOplogUntil(OperationContext* txn, const OpTime& endOpTime) {
- unsigned long long bytesApplied = 0;
- unsigned long long entriesApplied = 0;
- while (true) {
- OpQueue ops;
-
- while (!tryPopAndWaitForMore(txn, &ops, getGlobalReplicationCoordinator())) {
- // nothing came back last time, so go again
- if (ops.empty()) continue;
-
- // Check if we reached the end
- const BSONObj currentOp = ops.back();
- const OpTime currentOpTime = currentOp["ts"]._opTime();
+ (*writerVectors)[hash % writerVectors->size()].push_back(*it);
+ }
+}
+void SyncTail::oplogApplication(OperationContext* txn, const OpTime& endOpTime) {
+ _applyOplogUntil(txn, endOpTime);
+}
- // When we reach the end return this batch
- if (currentOpTime == endOpTime) {
- break;
- }
- else if (currentOpTime > endOpTime) {
- severe() << "Applied past expected end " << endOpTime << " to " << currentOpTime
- << " without seeing it. Rollback?";
- fassertFailedNoTrace(18693);
- }
+/* applies oplog from "now" until endOpTime using the applier threads for initial sync*/
+void SyncTail::_applyOplogUntil(OperationContext* txn, const OpTime& endOpTime) {
+ unsigned long long bytesApplied = 0;
+ unsigned long long entriesApplied = 0;
+ while (true) {
+ OpQueue ops;
- // apply replication batch limits
- if (ops.getSize() > replBatchLimitBytes)
- break;
- if (ops.getDeque().size() > replBatchLimitOperations)
- break;
- };
+ while (!tryPopAndWaitForMore(txn, &ops, getGlobalReplicationCoordinator())) {
+ // nothing came back last time, so go again
+ if (ops.empty())
+ continue;
- if (ops.empty()) {
- severe() << "got no ops for batch...";
- fassertFailedNoTrace(18692);
+ // Check if we reached the end
+ const BSONObj currentOp = ops.back();
+ const OpTime currentOpTime = currentOp["ts"]._opTime();
+
+ // When we reach the end return this batch
+ if (currentOpTime == endOpTime) {
+ break;
+ } else if (currentOpTime > endOpTime) {
+ severe() << "Applied past expected end " << endOpTime << " to " << currentOpTime
+ << " without seeing it. Rollback?";
+ fassertFailedNoTrace(18693);
}
- const BSONObj lastOp = ops.back().getOwned();
+ // apply replication batch limits
+ if (ops.getSize() > replBatchLimitBytes)
+ break;
+ if (ops.getDeque().size() > replBatchLimitOperations)
+ break;
+ };
- // Tally operation information
- bytesApplied += ops.getSize();
- entriesApplied += ops.getDeque().size();
+ if (ops.empty()) {
+ severe() << "got no ops for batch...";
+ fassertFailedNoTrace(18692);
+ }
- const OpTime lastOpTime = multiApply(txn, ops.getDeque());
+ const BSONObj lastOp = ops.back().getOwned();
- if (inShutdown()) {
- return;
- }
+ // Tally operation information
+ bytesApplied += ops.getSize();
+ entriesApplied += ops.getDeque().size();
- // if the last op applied was our end, return
- if (lastOpTime == endOpTime) {
- LOG(1) << "SyncTail applied " << entriesApplied
- << " entries (" << bytesApplied << " bytes)"
- << " and finished at opTime " << endOpTime.toStringPretty();
- return;
- }
- } // end of while (true)
- }
+ const OpTime lastOpTime = multiApply(txn, ops.getDeque());
-namespace {
- void tryToGoLiveAsASecondary(OperationContext* txn, ReplicationCoordinator* replCoord) {
- if (replCoord->isInPrimaryOrSecondaryState()) {
+ if (inShutdown()) {
return;
}
- ScopedTransaction transaction(txn, MODE_S);
- Lock::GlobalRead readLock(txn->lockState());
-
- if (replCoord->getMaintenanceMode()) {
- // we're not actually going live
+ // if the last op applied was our end, return
+ if (lastOpTime == endOpTime) {
+ LOG(1) << "SyncTail applied " << entriesApplied << " entries (" << bytesApplied
+ << " bytes)"
+ << " and finished at opTime " << endOpTime.toStringPretty();
return;
}
+ } // end of while (true)
+}
- // Only state RECOVERING can transition to SECONDARY.
- MemberState state(replCoord->getMemberState());
- if (!state.recovering()) {
- return;
- }
+namespace {
+void tryToGoLiveAsASecondary(OperationContext* txn, ReplicationCoordinator* replCoord) {
+ if (replCoord->isInPrimaryOrSecondaryState()) {
+ return;
+ }
- OpTime minvalid = getMinValid(txn);
- if (minvalid > replCoord->getMyLastOptime()) {
- return;
- }
+ ScopedTransaction transaction(txn, MODE_S);
+ Lock::GlobalRead readLock(txn->lockState());
- bool worked = replCoord->setFollowerMode(MemberState::RS_SECONDARY);
- if (!worked) {
- warning() << "Failed to transition into " << MemberState(MemberState::RS_SECONDARY)
- << ". Current state: " << replCoord->getMemberState();
- }
+ if (replCoord->getMaintenanceMode()) {
+ // we're not actually going live
+ return;
+ }
+
+ // Only state RECOVERING can transition to SECONDARY.
+ MemberState state(replCoord->getMemberState());
+ if (!state.recovering()) {
+ return;
+ }
+
+ OpTime minvalid = getMinValid(txn);
+ if (minvalid > replCoord->getMyLastOptime()) {
+ return;
+ }
+
+ bool worked = replCoord->setFollowerMode(MemberState::RS_SECONDARY);
+ if (!worked) {
+ warning() << "Failed to transition into " << MemberState(MemberState::RS_SECONDARY)
+ << ". Current state: " << replCoord->getMemberState();
}
}
+}
- /* tail an oplog. ok to return, will be re-called. */
- void SyncTail::oplogApplication() {
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+/* tail an oplog. ok to return, will be re-called. */
+void SyncTail::oplogApplication() {
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- while(!inShutdown()) {
- OpQueue ops;
- OperationContextImpl txn;
+ while (!inShutdown()) {
+ OpQueue ops;
+ OperationContextImpl txn;
- Timer batchTimer;
- int lastTimeChecked = 0;
+ Timer batchTimer;
+ int lastTimeChecked = 0;
- do {
- int now = batchTimer.seconds();
+ do {
+ int now = batchTimer.seconds();
- // apply replication batch limits
- if (!ops.empty()) {
- if (now > replBatchLimitSeconds)
- break;
- if (ops.getDeque().size() > replBatchLimitOperations)
- break;
- }
- // occasionally check some things
- // (always checked in the first iteration of this do-while loop, because
- // ops is empty)
- if (ops.empty() || now > lastTimeChecked) {
- BackgroundSync* bgsync = BackgroundSync::get();
- if (bgsync->getInitialSyncRequestedFlag()) {
- // got a resync command
- return;
- }
- lastTimeChecked = now;
- // can we become secondary?
- // we have to check this before calling mgr, as we must be a secondary to
- // become primary
- tryToGoLiveAsASecondary(&txn, replCoord);
+ // apply replication batch limits
+ if (!ops.empty()) {
+ if (now > replBatchLimitSeconds)
+ break;
+ if (ops.getDeque().size() > replBatchLimitOperations)
+ break;
+ }
+ // occasionally check some things
+ // (always checked in the first iteration of this do-while loop, because
+ // ops is empty)
+ if (ops.empty() || now > lastTimeChecked) {
+ BackgroundSync* bgsync = BackgroundSync::get();
+ if (bgsync->getInitialSyncRequestedFlag()) {
+ // got a resync command
+ return;
}
+ lastTimeChecked = now;
+ // can we become secondary?
+ // we have to check this before calling mgr, as we must be a secondary to
+ // become primary
+ tryToGoLiveAsASecondary(&txn, replCoord);
+ }
- const int slaveDelaySecs = replCoord->getSlaveDelaySecs().total_seconds();
- if (!ops.empty() && slaveDelaySecs > 0) {
- const BSONObj& lastOp = ops.getDeque().back();
- const unsigned int opTimestampSecs = lastOp["ts"]._opTime().getSecs();
+ const int slaveDelaySecs = replCoord->getSlaveDelaySecs().total_seconds();
+ if (!ops.empty() && slaveDelaySecs > 0) {
+ const BSONObj& lastOp = ops.getDeque().back();
+ const unsigned int opTimestampSecs = lastOp["ts"]._opTime().getSecs();
- // Stop the batch as the lastOp is too new to be applied. If we continue
- // on, we can get ops that are way ahead of the delay and this will
- // make this thread sleep longer when handleSlaveDelay is called
- // and apply ops much sooner than we like.
- if (opTimestampSecs > static_cast<unsigned int>(time(0) - slaveDelaySecs)) {
- break;
- }
+ // Stop the batch as the lastOp is too new to be applied. If we continue
+ // on, we can get ops that are way ahead of the delay and this will
+ // make this thread sleep longer when handleSlaveDelay is called
+ // and apply ops much sooner than we like.
+ if (opTimestampSecs > static_cast<unsigned int>(time(0) - slaveDelaySecs)) {
+ break;
}
- // keep fetching more ops as long as we haven't filled up a full batch yet
- } while (!tryPopAndWaitForMore(&txn, &ops, replCoord) && // tryPopAndWaitForMore returns
- // true when we need to end a
- // batch early
- (ops.getSize() < replBatchLimitBytes) &&
- !inShutdown());
-
- // For pausing replication in tests
- while (MONGO_FAIL_POINT(rsSyncApplyStop)) {
- sleepmillis(0);
}
+ // keep fetching more ops as long as we haven't filled up a full batch yet
+ } while (!tryPopAndWaitForMore(&txn, &ops, replCoord) && // tryPopAndWaitForMore returns
+ // true when we need to end a
+ // batch early
+ (ops.getSize() < replBatchLimitBytes) &&
+ !inShutdown());
+
+ // For pausing replication in tests
+ while (MONGO_FAIL_POINT(rsSyncApplyStop)) {
+ sleepmillis(0);
+ }
- if (ops.empty()) {
- continue;
- }
+ if (ops.empty()) {
+ continue;
+ }
- const BSONObj& lastOp = ops.getDeque().back();
- handleSlaveDelay(lastOp);
+ const BSONObj& lastOp = ops.getDeque().back();
+ handleSlaveDelay(lastOp);
- // Set minValid to the last op to be applied in this next batch.
- // This will cause this node to go into RECOVERING state
- // if we should crash and restart before updating the oplog
- OpTime minValid = lastOp["ts"]._opTime();
- setMinValid(&txn, minValid);
- multiApply(&txn, ops.getDeque());
- }
+ // Set minValid to the last op to be applied in this next batch.
+ // This will cause this node to go into RECOVERING state
+ // if we should crash and restart before updating the oplog
+ OpTime minValid = lastOp["ts"]._opTime();
+ setMinValid(&txn, minValid);
+ multiApply(&txn, ops.getDeque());
}
+}
- // Copies ops out of the bgsync queue into the deque passed in as a parameter.
- // Returns true if the batch should be ended early.
- // Batch should end early if we encounter a command, or if
- // there are no further ops in the bgsync queue to read.
- // This function also blocks 1 second waiting for new ops to appear in the bgsync
- // queue. We can't block forever because there are maintenance things we need
- // to periodically check in the loop.
- bool SyncTail::tryPopAndWaitForMore(OperationContext* txn,
- SyncTail::OpQueue* ops,
- ReplicationCoordinator* replCoord) {
- BSONObj op;
- // Check to see if there are ops waiting in the bgsync queue
- bool peek_success = peek(&op);
-
- if (!peek_success) {
- // if we don't have anything in the queue, wait a bit for something to appear
- if (ops->empty()) {
- if (replCoord->isWaitingForApplierToDrain()) {
- BackgroundSync::get()->waitUntilPaused();
- if (peek(&op)) {
- // The producer generated a last batch of ops before pausing so return
- // false so that we'll come back and apply them before signaling the drain
- // is complete.
- return false;
- }
- replCoord->signalDrainComplete(txn);
+// Copies ops out of the bgsync queue into the deque passed in as a parameter.
+// Returns true if the batch should be ended early.
+// Batch should end early if we encounter a command, or if
+// there are no further ops in the bgsync queue to read.
+// This function also blocks 1 second waiting for new ops to appear in the bgsync
+// queue. We can't block forever because there are maintenance things we need
+// to periodically check in the loop.
+bool SyncTail::tryPopAndWaitForMore(OperationContext* txn,
+ SyncTail::OpQueue* ops,
+ ReplicationCoordinator* replCoord) {
+ BSONObj op;
+ // Check to see if there are ops waiting in the bgsync queue
+ bool peek_success = peek(&op);
+
+ if (!peek_success) {
+ // if we don't have anything in the queue, wait a bit for something to appear
+ if (ops->empty()) {
+ if (replCoord->isWaitingForApplierToDrain()) {
+ BackgroundSync::get()->waitUntilPaused();
+ if (peek(&op)) {
+ // The producer generated a last batch of ops before pausing so return
+ // false so that we'll come back and apply them before signaling the drain
+ // is complete.
+ return false;
}
- // block up to 1 second
- _networkQueue->waitForMore();
- return false;
+ replCoord->signalDrainComplete(txn);
}
-
- // otherwise, apply what we have
- return true;
+ // block up to 1 second
+ _networkQueue->waitForMore();
+ return false;
}
- const char* ns = op["ns"].valuestrsafe();
-
- // check for commands
- if ((op["op"].valuestrsafe()[0] == 'c') ||
- // Index builds are acheived through the use of an insert op, not a command op.
- // The following line is the same as what the insert code uses to detect an index build.
- ( *ns != '\0' && nsToCollectionSubstring(ns) == "system.indexes" )) {
-
- if (ops->empty()) {
- // apply commands one-at-a-time
- ops->push_back(op);
- _networkQueue->consume();
- }
+ // otherwise, apply what we have
+ return true;
+ }
- // otherwise, apply what we have so far and come back for the command
- return true;
+ const char* ns = op["ns"].valuestrsafe();
+
+ // check for commands
+ if ((op["op"].valuestrsafe()[0] == 'c') ||
+ // Index builds are acheived through the use of an insert op, not a command op.
+ // The following line is the same as what the insert code uses to detect an index build.
+ (*ns != '\0' && nsToCollectionSubstring(ns) == "system.indexes")) {
+ if (ops->empty()) {
+ // apply commands one-at-a-time
+ ops->push_back(op);
+ _networkQueue->consume();
}
- // check for oplog version change
- BSONElement elemVersion = op["v"];
- int curVersion = 0;
- if (elemVersion.eoo())
- // missing version means version 1
- curVersion = 1;
- else
- curVersion = elemVersion.Int();
-
- if (curVersion != OPLOG_VERSION) {
- severe() << "expected oplog version " << OPLOG_VERSION << " but found version "
- << curVersion << " in oplog entry: " << op;
- fassertFailedNoTrace(18820);
- }
-
- // Copy the op to the deque and remove it from the bgsync queue.
- ops->push_back(op);
- _networkQueue->consume();
+ // otherwise, apply what we have so far and come back for the command
+ return true;
+ }
- // Go back for more ops
- return false;
+ // check for oplog version change
+ BSONElement elemVersion = op["v"];
+ int curVersion = 0;
+ if (elemVersion.eoo())
+ // missing version means version 1
+ curVersion = 1;
+ else
+ curVersion = elemVersion.Int();
+
+ if (curVersion != OPLOG_VERSION) {
+ severe() << "expected oplog version " << OPLOG_VERSION << " but found version "
+ << curVersion << " in oplog entry: " << op;
+ fassertFailedNoTrace(18820);
}
- void SyncTail::handleSlaveDelay(const BSONObj& lastOp) {
- ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
- int slaveDelaySecs = replCoord->getSlaveDelaySecs().total_seconds();
-
- // ignore slaveDelay if the box is still initializing. once
- // it becomes secondary we can worry about it.
- if( slaveDelaySecs > 0 && replCoord->getMemberState().secondary() ) {
- const OpTime ts = lastOp["ts"]._opTime();
- long long a = ts.getSecs();
- long long b = time(0);
- long long lag = b - a;
- long long sleeptime = slaveDelaySecs - lag;
- if( sleeptime > 0 ) {
- uassert(12000, "rs slaveDelay differential too big check clocks and systems",
- sleeptime < 0x40000000);
- if( sleeptime < 60 ) {
- sleepsecs((int) sleeptime);
- }
- else {
- warning() << "replSet slavedelay causing a long sleep of " << sleeptime
- << " seconds";
- // sleep(hours) would prevent reconfigs from taking effect & such!
- long long waitUntil = b + sleeptime;
- while(time(0) < waitUntil) {
- sleepsecs(6);
-
- // Handle reconfigs that changed the slave delay
- if (replCoord->getSlaveDelaySecs().total_seconds() != slaveDelaySecs)
- break;
- }
+ // Copy the op to the deque and remove it from the bgsync queue.
+ ops->push_back(op);
+ _networkQueue->consume();
+
+ // Go back for more ops
+ return false;
+}
+
+void SyncTail::handleSlaveDelay(const BSONObj& lastOp) {
+ ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
+ int slaveDelaySecs = replCoord->getSlaveDelaySecs().total_seconds();
+
+ // ignore slaveDelay if the box is still initializing. once
+ // it becomes secondary we can worry about it.
+ if (slaveDelaySecs > 0 && replCoord->getMemberState().secondary()) {
+ const OpTime ts = lastOp["ts"]._opTime();
+ long long a = ts.getSecs();
+ long long b = time(0);
+ long long lag = b - a;
+ long long sleeptime = slaveDelaySecs - lag;
+ if (sleeptime > 0) {
+ uassert(12000,
+ "rs slaveDelay differential too big check clocks and systems",
+ sleeptime < 0x40000000);
+ if (sleeptime < 60) {
+ sleepsecs((int)sleeptime);
+ } else {
+ warning() << "replSet slavedelay causing a long sleep of " << sleeptime
+ << " seconds";
+ // sleep(hours) would prevent reconfigs from taking effect & such!
+ long long waitUntil = b + sleeptime;
+ while (time(0) < waitUntil) {
+ sleepsecs(6);
+
+ // Handle reconfigs that changed the slave delay
+ if (replCoord->getSlaveDelaySecs().total_seconds() != slaveDelaySecs)
+ break;
}
}
- } // endif slaveDelay
- }
+ }
+ } // endif slaveDelay
+}
- static AtomicUInt32 replWriterWorkerId;
+static AtomicUInt32 replWriterWorkerId;
- static void initializeWriterThread() {
- // Only do this once per thread
- if (!ClientBasic::getCurrent()) {
- Client::initThreadIfNotAlready();
- cc().getAuthorizationSession()->grantInternalAuthorization();
- }
+static void initializeWriterThread() {
+ // Only do this once per thread
+ if (!ClientBasic::getCurrent()) {
+ Client::initThreadIfNotAlready();
+ cc().getAuthorizationSession()->grantInternalAuthorization();
}
+}
- // This free function is used by the writer threads to apply each op
- void multiSyncApply(const std::vector<BSONObj>& ops, SyncTail* st) {
- initializeWriterThread();
+// This free function is used by the writer threads to apply each op
+void multiSyncApply(const std::vector<BSONObj>& ops, SyncTail* st) {
+ initializeWriterThread();
- OperationContextImpl txn;
+ OperationContextImpl txn;
- // allow us to get through the magic barrier
- txn.lockState()->setIsBatchWriter(true);
+ // allow us to get through the magic barrier
+ txn.lockState()->setIsBatchWriter(true);
- bool convertUpdatesToUpserts = true;
+ bool convertUpdatesToUpserts = true;
- for (std::vector<BSONObj>::const_iterator it = ops.begin();
- it != ops.end();
- ++it) {
- try {
- if (!st->syncApply(&txn, *it, convertUpdatesToUpserts)) {
- fassertFailedNoTrace(16359);
- }
+ for (std::vector<BSONObj>::const_iterator it = ops.begin(); it != ops.end(); ++it) {
+ try {
+ if (!st->syncApply(&txn, *it, convertUpdatesToUpserts)) {
+ fassertFailedNoTrace(16359);
}
- catch (const DBException& e) {
- error() << "writer worker caught exception: " << causedBy(e)
- << " on: " << it->toString();
+ } catch (const DBException& e) {
+ error() << "writer worker caught exception: " << causedBy(e)
+ << " on: " << it->toString();
- if (inShutdown()) {
- return;
- }
-
- fassertFailedNoTrace(16360);
+ if (inShutdown()) {
+ return;
}
+
+ fassertFailedNoTrace(16360);
}
}
+}
- // This free function is used by the initial sync writer threads to apply each op
- void multiInitialSyncApply(const std::vector<BSONObj>& ops, SyncTail* st) {
- initializeWriterThread();
-
- OperationContextImpl txn;
+// This free function is used by the initial sync writer threads to apply each op
+void multiInitialSyncApply(const std::vector<BSONObj>& ops, SyncTail* st) {
+ initializeWriterThread();
- // allow us to get through the magic barrier
- txn.lockState()->setIsBatchWriter(true);
+ OperationContextImpl txn;
- for (std::vector<BSONObj>::const_iterator it = ops.begin();
- it != ops.end();
- ++it) {
- try {
- if (!st->syncApply(&txn, *it)) {
+ // allow us to get through the magic barrier
+ txn.lockState()->setIsBatchWriter(true);
- if (st->shouldRetry(&txn, *it)) {
- if (!st->syncApply(&txn, *it)) {
- fassertFailedNoTrace(15915);
- }
+ for (std::vector<BSONObj>::const_iterator it = ops.begin(); it != ops.end(); ++it) {
+ try {
+ if (!st->syncApply(&txn, *it)) {
+ if (st->shouldRetry(&txn, *it)) {
+ if (!st->syncApply(&txn, *it)) {
+ fassertFailedNoTrace(15915);
}
-
- // If shouldRetry() returns false, fall through.
- // This can happen if the document that was moved and missed by Cloner
- // subsequently got deleted and no longer exists on the Sync Target at all
}
- }
- catch (const DBException& e) {
- error() << "writer worker caught exception: " << causedBy(e)
- << " on: " << it->toString();
- if (inShutdown()) {
- return;
- }
+ // If shouldRetry() returns false, fall through.
+ // This can happen if the document that was moved and missed by Cloner
+ // subsequently got deleted and no longer exists on the Sync Target at all
+ }
+ } catch (const DBException& e) {
+ error() << "writer worker caught exception: " << causedBy(e)
+ << " on: " << it->toString();
- fassertFailedNoTrace(16361);
+ if (inShutdown()) {
+ return;
}
+
+ fassertFailedNoTrace(16361);
}
}
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/sync_tail.h b/src/mongo/db/repl/sync_tail.h
index 1802a05f41c..82db52c02c3 100644
--- a/src/mongo/db/repl/sync_tail.h
+++ b/src/mongo/db/repl/sync_tail.h
@@ -36,108 +36,112 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace repl {
- class BackgroundSyncInterface;
- class ReplicationCoordinator;
+class BackgroundSyncInterface;
+class ReplicationCoordinator;
+
+/**
+ * "Normal" replica set syncing
+ */
+class SyncTail : public Sync {
+ typedef void (*MultiSyncApplyFunc)(const std::vector<BSONObj>& ops, SyncTail* st);
+
+public:
+ SyncTail(BackgroundSyncInterface* q, MultiSyncApplyFunc func);
+ virtual ~SyncTail();
+ virtual bool syncApply(OperationContext* txn,
+ const BSONObj& o,
+ bool convertUpdateToUpsert = false);
/**
- * "Normal" replica set syncing
+ * Runs _applyOplogUntil(stopOpTime)
*/
- class SyncTail : public Sync {
- typedef void (*MultiSyncApplyFunc)(const std::vector<BSONObj>& ops, SyncTail* st);
+ virtual void oplogApplication(OperationContext* txn, const OpTime& stopOpTime);
+
+ void oplogApplication();
+ bool peek(BSONObj* obj);
+
+ class OpQueue {
public:
- SyncTail(BackgroundSyncInterface *q, MultiSyncApplyFunc func);
- virtual ~SyncTail();
- virtual bool syncApply(OperationContext* txn,
- const BSONObj &o,
- bool convertUpdateToUpsert = false);
-
- /**
- * Runs _applyOplogUntil(stopOpTime)
- */
- virtual void oplogApplication(OperationContext* txn, const OpTime& stopOpTime);
-
- void oplogApplication();
- bool peek(BSONObj* obj);
-
- class OpQueue {
- public:
- OpQueue() : _size(0) {}
- size_t getSize() { return _size; }
- std::deque<BSONObj>& getDeque() { return _deque; }
- void push_back(BSONObj& op) {
- _deque.push_back(op);
- _size += op.objsize();
- }
- bool empty() {
- return _deque.empty();
- }
-
- BSONObj back() {
- verify(!_deque.empty());
- return _deque.back();
- }
-
- private:
- std::deque<BSONObj> _deque;
- size_t _size;
- };
-
- // returns true if we should continue waiting for BSONObjs, false if we should
- // stop waiting and apply the queue we have. Only returns false if !ops.empty().
- bool tryPopAndWaitForMore(OperationContext* txn,
- OpQueue* ops,
- ReplicationCoordinator* replCoord);
-
- protected:
- // Cap the batches using the limit on journal commits.
- // This works out to be 100 MB (64 bit) or 50 MB (32 bit)
- static const unsigned int replBatchLimitBytes = dur::UncommittedBytesLimit;
- static const int replBatchLimitSeconds = 1;
- static const unsigned int replBatchLimitOperations = 5000;
-
- // Prefetch and write a deque of operations, using the supplied function.
- // Initial Sync and Sync Tail each use a different function.
- // Returns the last OpTime applied.
- OpTime multiApply(OperationContext* txn, std::deque<BSONObj>& ops);
-
- /**
- * Applies oplog entries until reaching "endOpTime".
- *
- * NOTE:Will not transition or check states
- */
- void _applyOplogUntil(OperationContext* txn, const OpTime& endOpTime);
+ OpQueue() : _size(0) {}
+ size_t getSize() {
+ return _size;
+ }
+ std::deque<BSONObj>& getDeque() {
+ return _deque;
+ }
+ void push_back(BSONObj& op) {
+ _deque.push_back(op);
+ _size += op.objsize();
+ }
+ bool empty() {
+ return _deque.empty();
+ }
+
+ BSONObj back() {
+ verify(!_deque.empty());
+ return _deque.back();
+ }
private:
- BackgroundSyncInterface* _networkQueue;
+ std::deque<BSONObj> _deque;
+ size_t _size;
+ };
- // Function to use during applyOps
- MultiSyncApplyFunc _applyFunc;
+ // returns true if we should continue waiting for BSONObjs, false if we should
+ // stop waiting and apply the queue we have. Only returns false if !ops.empty().
+ bool tryPopAndWaitForMore(OperationContext* txn,
+ OpQueue* ops,
+ ReplicationCoordinator* replCoord);
- // Doles out all the work to the reader pool threads and waits for them to complete
- void prefetchOps(const std::deque<BSONObj>& ops);
- // Used by the thread pool readers to prefetch an op
- static void prefetchOp(const BSONObj& op);
+protected:
+ // Cap the batches using the limit on journal commits.
+ // This works out to be 100 MB (64 bit) or 50 MB (32 bit)
+ static const unsigned int replBatchLimitBytes = dur::UncommittedBytesLimit;
+ static const int replBatchLimitSeconds = 1;
+ static const unsigned int replBatchLimitOperations = 5000;
- // Doles out all the work to the writer pool threads and waits for them to complete
- void applyOps(const std::vector< std::vector<BSONObj> >& writerVectors);
+ // Prefetch and write a deque of operations, using the supplied function.
+ // Initial Sync and Sync Tail each use a different function.
+ // Returns the last OpTime applied.
+ OpTime multiApply(OperationContext* txn, std::deque<BSONObj>& ops);
- void fillWriterVectors(const std::deque<BSONObj>& ops,
- std::vector< std::vector<BSONObj> >* writerVectors);
- void handleSlaveDelay(const BSONObj& op);
+ /**
+ * Applies oplog entries until reaching "endOpTime".
+ *
+ * NOTE:Will not transition or check states
+ */
+ void _applyOplogUntil(OperationContext* txn, const OpTime& endOpTime);
- // persistent pool of worker threads for writing ops to the databases
- threadpool::ThreadPool _writerPool;
- // persistent pool of worker threads for prefetching
- threadpool::ThreadPool _prefetcherPool;
+private:
+ BackgroundSyncInterface* _networkQueue;
- };
+ // Function to use during applyOps
+ MultiSyncApplyFunc _applyFunc;
+
+ // Doles out all the work to the reader pool threads and waits for them to complete
+ void prefetchOps(const std::deque<BSONObj>& ops);
+ // Used by the thread pool readers to prefetch an op
+ static void prefetchOp(const BSONObj& op);
+
+ // Doles out all the work to the writer pool threads and waits for them to complete
+ void applyOps(const std::vector<std::vector<BSONObj>>& writerVectors);
+
+ void fillWriterVectors(const std::deque<BSONObj>& ops,
+ std::vector<std::vector<BSONObj>>* writerVectors);
+ void handleSlaveDelay(const BSONObj& op);
+
+ // persistent pool of worker threads for writing ops to the databases
+ threadpool::ThreadPool _writerPool;
+ // persistent pool of worker threads for prefetching
+ threadpool::ThreadPool _prefetcherPool;
+};
- // These free functions are used by the thread pool workers to write ops to the db.
- void multiSyncApply(const std::vector<BSONObj>& ops, SyncTail* st);
- void multiInitialSyncApply(const std::vector<BSONObj>& ops, SyncTail* st);
+// These free functions are used by the thread pool workers to write ops to the db.
+void multiSyncApply(const std::vector<BSONObj>& ops, SyncTail* st);
+void multiInitialSyncApply(const std::vector<BSONObj>& ops, SyncTail* st);
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index 99738a38421..7ca7ba6aa84 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -39,30 +39,30 @@
namespace mongo {
namespace repl {
namespace {
- static const int kLeaderValue = 0;
- static const int kFollowerValue = 1;
- static const int kCandidateValue = 2;
+static const int kLeaderValue = 0;
+static const int kFollowerValue = 1;
+static const int kCandidateValue = 2;
} // namespace
- const TopologyCoordinator::Role TopologyCoordinator::Role::leader(kLeaderValue);
- const TopologyCoordinator::Role TopologyCoordinator::Role::follower(kFollowerValue);
- const TopologyCoordinator::Role TopologyCoordinator::Role::candidate(kCandidateValue);
+const TopologyCoordinator::Role TopologyCoordinator::Role::leader(kLeaderValue);
+const TopologyCoordinator::Role TopologyCoordinator::Role::follower(kFollowerValue);
+const TopologyCoordinator::Role TopologyCoordinator::Role::candidate(kCandidateValue);
- TopologyCoordinator::Role::Role(int value) : _value(value) {}
+TopologyCoordinator::Role::Role(int value) : _value(value) {}
- std::string TopologyCoordinator::Role::toString() const {
- switch(_value) {
+std::string TopologyCoordinator::Role::toString() const {
+ switch (_value) {
case kLeaderValue:
return "leader";
case kFollowerValue:
return "follower";
case kCandidateValue:
return "candidate";
- }
- invariant(false);
}
+ invariant(false);
+}
- TopologyCoordinator::~TopologyCoordinator() {}
+TopologyCoordinator::~TopologyCoordinator() {}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index 97f85faf521..315882020f0 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -42,364 +42,367 @@ namespace mongo {
namespace repl {
- class HeartbeatResponseAction;
- class ReplSetHeartbeatArgs;
- class ReplicaSetConfig;
- class TagSubgroup;
- struct MemberState;
+class HeartbeatResponseAction;
+class ReplSetHeartbeatArgs;
+class ReplicaSetConfig;
+class TagSubgroup;
+struct MemberState;
+
+/**
+ * Replication Topology Coordinator interface.
+ *
+ * This object is responsible for managing the topology of the cluster.
+ * Tasks include consensus and leader election, chaining, and configuration management.
+ * Methods of this class should be non-blocking.
+ */
+class TopologyCoordinator {
+ MONGO_DISALLOW_COPYING(TopologyCoordinator);
+
+public:
+ class Role;
+
+ virtual ~TopologyCoordinator();
+
+ ////////////////////////////////////////////////////////////
+ //
+ // State inspection methods.
+ //
+ ////////////////////////////////////////////////////////////
+
+ /**
+ * Gets the role of this member in the replication protocol.
+ */
+ virtual Role getRole() const = 0;
+
+ /**
+ * Gets the MemberState of this member in the replica set.
+ */
+ virtual MemberState getMemberState() const = 0;
+
+ /**
+ * Returns the address of the current sync source, or an empty HostAndPort if there is no
+ * current sync source.
+ */
+ virtual HostAndPort getSyncSourceAddress() const = 0;
+
+ /**
+ * Retrieves a vector of HostAndPorts containing all nodes that are neither DOWN nor
+ * ourself.
+ */
+ virtual std::vector<HostAndPort> getMaybeUpHostAndPorts() const = 0;
+
+ /**
+ * Gets the earliest time the current node will stand for election.
+ */
+ virtual Date_t getStepDownTime() const = 0;
+
+ /**
+ * Gets the current value of the maintenance mode counter.
+ */
+ virtual int getMaintenanceCount() const = 0;
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Basic state manipulation methods.
+ //
+ ////////////////////////////////////////////////////////////
+
+ /**
+ * Sets the index into the config used when we next choose a sync source
+ */
+ virtual void setForceSyncSourceIndex(int index) = 0;
+
+ /**
+ * Chooses and sets a new sync source, based on our current knowledge of the world.
+ */
+ virtual HostAndPort chooseNewSyncSource(Date_t now, const OpTime& lastOpApplied) = 0;
+
+ /**
+ * Suppresses selecting "host" as sync source until "until".
+ */
+ virtual void blacklistSyncSource(const HostAndPort& host, Date_t until) = 0;
+
+ /**
+ * Removes a single entry "host" from the list of potential sync sources which we
+ * have blacklisted, if it is supposed to be unblacklisted by "now".
+ */
+ virtual void unblacklistSyncSource(const HostAndPort& host, Date_t now) = 0;
+
+ /**
+ * Clears the list of potential sync sources we have blacklisted.
+ */
+ virtual void clearSyncSourceBlacklist() = 0;
+
+ /**
+ * Determines if a new sync source should be chosen, if a better candidate sync source is
+ * available. If the current sync source's last optime is more than _maxSyncSourceLagSecs
+ * behind any syncable source, this function returns true.
+ *
+ * "now" is used to skip over currently blacklisted sync sources.
+ */
+ virtual bool shouldChangeSyncSource(const HostAndPort& currentSource, Date_t now) const = 0;
/**
- * Replication Topology Coordinator interface.
+ * Checks whether we are a single node set and we are not in a stepdown period. If so,
+ * puts us into candidate mode, otherwise does nothing. This is used to ensure that
+ * nodes in a single node replset become primary again when their stepdown period ends.
+ */
+ virtual bool becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(Date_t now) = 0;
+
+ /**
+ * Sets the earliest time the current node will stand for election to "newTime".
*
- * This object is responsible for managing the topology of the cluster.
- * Tasks include consensus and leader election, chaining, and configuration management.
- * Methods of this class should be non-blocking.
+ * Until this time, while the node may report itself as electable, it will not stand
+ * for election.
+ */
+ virtual void setElectionSleepUntil(Date_t newTime) = 0;
+
+ /**
+ * Sets the reported mode of this node to one of RS_SECONDARY, RS_STARTUP2, RS_ROLLBACK or
+ * RS_RECOVERING, when getRole() == Role::follower. This is the interface by which the
+ * applier changes the reported member state of the current node, and enables or suppresses
+ * electability of the current node. All modes but RS_SECONDARY indicate an unelectable
+ * follower state (one that cannot transition to candidate).
*/
- class TopologyCoordinator {
- MONGO_DISALLOW_COPYING(TopologyCoordinator);
- public:
- class Role;
-
- virtual ~TopologyCoordinator();
-
- ////////////////////////////////////////////////////////////
- //
- // State inspection methods.
- //
- ////////////////////////////////////////////////////////////
-
- /**
- * Gets the role of this member in the replication protocol.
- */
- virtual Role getRole() const = 0;
-
- /**
- * Gets the MemberState of this member in the replica set.
- */
- virtual MemberState getMemberState() const = 0;
-
- /**
- * Returns the address of the current sync source, or an empty HostAndPort if there is no
- * current sync source.
- */
- virtual HostAndPort getSyncSourceAddress() const = 0;
-
- /**
- * Retrieves a vector of HostAndPorts containing all nodes that are neither DOWN nor
- * ourself.
- */
- virtual std::vector<HostAndPort> getMaybeUpHostAndPorts() const = 0;
-
- /**
- * Gets the earliest time the current node will stand for election.
- */
- virtual Date_t getStepDownTime() const = 0;
-
- /**
- * Gets the current value of the maintenance mode counter.
- */
- virtual int getMaintenanceCount() const = 0;
-
- ////////////////////////////////////////////////////////////
- //
- // Basic state manipulation methods.
- //
- ////////////////////////////////////////////////////////////
-
- /**
- * Sets the index into the config used when we next choose a sync source
- */
- virtual void setForceSyncSourceIndex(int index) = 0;
-
- /**
- * Chooses and sets a new sync source, based on our current knowledge of the world.
- */
- virtual HostAndPort chooseNewSyncSource(Date_t now, const OpTime& lastOpApplied) = 0;
-
- /**
- * Suppresses selecting "host" as sync source until "until".
- */
- virtual void blacklistSyncSource(const HostAndPort& host, Date_t until) = 0;
-
- /**
- * Removes a single entry "host" from the list of potential sync sources which we
- * have blacklisted, if it is supposed to be unblacklisted by "now".
- */
- virtual void unblacklistSyncSource(const HostAndPort& host, Date_t now) = 0;
-
- /**
- * Clears the list of potential sync sources we have blacklisted.
- */
- virtual void clearSyncSourceBlacklist() = 0;
-
- /**
- * Determines if a new sync source should be chosen, if a better candidate sync source is
- * available. If the current sync source's last optime is more than _maxSyncSourceLagSecs
- * behind any syncable source, this function returns true.
- *
- * "now" is used to skip over currently blacklisted sync sources.
- */
- virtual bool shouldChangeSyncSource(const HostAndPort& currentSource, Date_t now) const = 0;
-
- /**
- * Checks whether we are a single node set and we are not in a stepdown period. If so,
- * puts us into candidate mode, otherwise does nothing. This is used to ensure that
- * nodes in a single node replset become primary again when their stepdown period ends.
- */
- virtual bool becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(Date_t now) = 0;
-
- /**
- * Sets the earliest time the current node will stand for election to "newTime".
- *
- * Until this time, while the node may report itself as electable, it will not stand
- * for election.
- */
- virtual void setElectionSleepUntil(Date_t newTime) = 0;
-
- /**
- * Sets the reported mode of this node to one of RS_SECONDARY, RS_STARTUP2, RS_ROLLBACK or
- * RS_RECOVERING, when getRole() == Role::follower. This is the interface by which the
- * applier changes the reported member state of the current node, and enables or suppresses
- * electability of the current node. All modes but RS_SECONDARY indicate an unelectable
- * follower state (one that cannot transition to candidate).
- */
- virtual void setFollowerMode(MemberState::MS newMode) = 0;
-
- /**
- * Adjusts the maintenance mode count by "inc".
- *
- * It is an error to call this method if getRole() does not return Role::follower.
- * It is an error to allow the maintenance count to go negative.
- */
- virtual void adjustMaintenanceCountBy(int inc) = 0;
-
- ////////////////////////////////////////////////////////////
- //
- // Methods that prepare responses to command requests.
- //
- ////////////////////////////////////////////////////////////
-
- // produces a reply to a replSetSyncFrom command
- virtual void prepareSyncFromResponse(const ReplicationExecutor::CallbackData& data,
- const HostAndPort& target,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result) = 0;
-
- // produce a reply to a replSetFresh command
- virtual void prepareFreshResponse(const ReplicationCoordinator::ReplSetFreshArgs& args,
- Date_t now,
- OpTime lastOpApplied,
- BSONObjBuilder* response,
- Status* result) = 0;
-
- // produce a reply to a received electCmd
- virtual void prepareElectResponse(const ReplicationCoordinator::ReplSetElectArgs& args,
- Date_t now,
- OpTime lastOpApplied,
- BSONObjBuilder* response,
- Status* result) = 0;
-
- // produce a reply to a heartbeat
- virtual Status prepareHeartbeatResponse(Date_t now,
- const ReplSetHeartbeatArgs& args,
- const std::string& ourSetName,
- const OpTime& lastOpApplied,
- ReplSetHeartbeatResponse* response) = 0;
-
- // produce a reply to a status request
- virtual void prepareStatusResponse(const ReplicationExecutor::CallbackData& data,
- Date_t now,
- unsigned uptime,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result) = 0;
-
- // produce a reply to an ismaster request. It is only valid to call this if we are a
- // replset.
- virtual void fillIsMasterForReplSet(IsMasterResponse* response) = 0;
-
- // produce a reply to a freeze request
- virtual void prepareFreezeResponse(Date_t now, int secs, BSONObjBuilder* response) = 0;
-
- ////////////////////////////////////////////////////////////
- //
- // Methods for sending and receiving heartbeats,
- // reconfiguring and handling the results of standing for
- // election.
- //
- ////////////////////////////////////////////////////////////
-
- /**
- * Updates the topology coordinator's notion of the replica set configuration.
- *
- * "newConfig" is the new configuration, and "selfIndex" is the index of this
- * node's configuration information in "newConfig", or "selfIndex" is -1 to
- * indicate that this node is not a member of "newConfig".
- *
- * newConfig.isInitialized() should be true, though implementations may accept
- * configurations where this is not true, for testing purposes.
- */
- virtual void updateConfig(const ReplicaSetConfig& newConfig,
- int selfIndex,
- Date_t now,
- OpTime lastOpApplied) = 0;
-
- /**
- * Prepares a heartbeat request appropriate for sending to "target", assuming the
- * current time is "now". "ourSetName" is used as the name for our replica set if
- * the topology coordinator does not have a valid configuration installed.
- *
- * The returned pair contains proper arguments for a replSetHeartbeat command, and
- * an amount of time to wait for the response.
- *
- * This call should be paired (with intervening network communication) with a call to
- * processHeartbeatResponse for the same "target".
- */
- virtual std::pair<ReplSetHeartbeatArgs, Milliseconds> prepareHeartbeatRequest(
- Date_t now,
- const std::string& ourSetName,
- const HostAndPort& target) = 0;
-
- /**
- * Processes a heartbeat response from "target" that arrived around "now", having
- * spent "networkRoundTripTime" millis on the network.
- *
- * Updates internal topology coordinator state, and returns instructions about what action
- * to take next.
- *
- * If the next action indicates StartElection, the topology coordinator has transitioned to
- * the "candidate" role, and will remain there until processWinElection or
- * processLoseElection are called.
- *
- * If the next action indicates "StepDownSelf", the topology coordinator has transitioned
- * to the "follower" role from "leader", and the caller should take any necessary actions
- * to become a follower.
- *
- * If the next action indicates "StepDownRemotePrimary", the caller should take steps to
- * cause the specified remote host to step down from primary to secondary.
- *
- * If the next action indicates "Reconfig", the caller should verify the configuration in
- * hbResponse is acceptable, perform any other reconfiguration actions it must, and call
- * updateConfig with the new configuration and the appropriate value for "selfIndex". It
- * must also wrap up any outstanding elections (by calling processLoseElection or
- * processWinElection) before calling updateConfig.
- *
- * This call should be paired (with intervening network communication) with a call to
- * prepareHeartbeatRequest for the same "target".
- */
- virtual HeartbeatResponseAction processHeartbeatResponse(
- Date_t now,
- Milliseconds networkRoundTripTime,
- const HostAndPort& target,
- const StatusWith<ReplSetHeartbeatResponse>& hbResponse,
- OpTime myLastOpApplied) = 0;
-
- /**
- * If getRole() == Role::candidate and this node has not voted too recently, updates the
- * lastVote tracker and returns true. Otherwise, returns false.
- */
- virtual bool voteForMyself(Date_t now) = 0;
-
- /**
- * Performs state updates associated with winning an election.
- *
- * It is an error to call this if the topology coordinator is not in candidate mode.
- *
- * Exactly one of either processWinElection or processLoseElection must be called if
- * processHeartbeatResponse returns StartElection, to exit candidate mode.
- */
- virtual void processWinElection(OID electionId, OpTime electionOpTime) = 0;
-
- /**
- * Performs state updates associated with losing an election.
- *
- * It is an error to call this if the topology coordinator is not in candidate mode.
- *
- * Exactly one of either processWinElection or processLoseElection must be called if
- * processHeartbeatResponse returns StartElection, to exit candidate mode.
- */
- virtual void processLoseElection() = 0;
-
- /**
- * Tries to transition the coordinator from the leader role to the follower role.
- *
- * Fails if "force" is not set and no follower is known to be up. It is illegal
- * to call this method if the node is not leader.
- *
- * Returns whether or not the step down succeeded.
- */
- virtual bool stepDown(Date_t until, bool force, OpTime lastOpApplied) = 0;
-
- /**
- * Sometimes a request to step down comes in (like via a heartbeat), but we don't have the
- * global exclusive lock so we can't actually stepdown at that moment. When that happens
- * we record that a stepdown request is pending and schedule work to stepdown in the global
- * lock. This method is called after holding the global lock to perform the actual
- * stepdown, but only if the node hasn't already stepped down another way since the work was
- * scheduled. Returns true if it actually steps down, and false otherwise.
- */
- virtual bool stepDownIfPending() = 0;
-
- /**
- * Considers whether or not this node should stand for election, and returns true
- * if the node has transitioned to candidate role as a result of the call.
- */
- virtual bool checkShouldStandForElection(Date_t now, const OpTime& lastOpApplied) = 0;
-
- /**
- * Set the outgoing heartbeat message from self
- */
- virtual void setMyHeartbeatMessage(const Date_t now, const std::string& s) = 0;
-
- /**
- * Writes into 'output' all the information needed to generate a summary of the current
- * replication state for use by the web interface.
- */
- virtual void summarizeAsHtml(ReplSetHtmlSummary* output) = 0;
-
- protected:
- TopologyCoordinator() {}
- };
+ virtual void setFollowerMode(MemberState::MS newMode) = 0;
/**
- * Type that denotes the role of a node in the replication protocol.
+ * Adjusts the maintenance mode count by "inc".
*
- * The role is distinct from MemberState, in that it only deals with the
- * roles a node plays in the basic protocol -- leader, follower and candidate.
- * The mapping between MemberState and Role is complex -- several MemberStates
- * map to the follower role, and MemberState::RS_SECONDARY maps to either
- * follower or candidate roles, e.g.
+ * It is an error to call this method if getRole() does not return Role::follower.
+ * It is an error to allow the maintenance count to go negative.
*/
- class TopologyCoordinator::Role {
- public:
- /**
- * Constant indicating leader role.
- */
- static const Role leader;
+ virtual void adjustMaintenanceCountBy(int inc) = 0;
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Methods that prepare responses to command requests.
+ //
+ ////////////////////////////////////////////////////////////
+
+ // produces a reply to a replSetSyncFrom command
+ virtual void prepareSyncFromResponse(const ReplicationExecutor::CallbackData& data,
+ const HostAndPort& target,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) = 0;
+
+ // produce a reply to a replSetFresh command
+ virtual void prepareFreshResponse(const ReplicationCoordinator::ReplSetFreshArgs& args,
+ Date_t now,
+ OpTime lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) = 0;
+
+ // produce a reply to a received electCmd
+ virtual void prepareElectResponse(const ReplicationCoordinator::ReplSetElectArgs& args,
+ Date_t now,
+ OpTime lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) = 0;
+
+ // produce a reply to a heartbeat
+ virtual Status prepareHeartbeatResponse(Date_t now,
+ const ReplSetHeartbeatArgs& args,
+ const std::string& ourSetName,
+ const OpTime& lastOpApplied,
+ ReplSetHeartbeatResponse* response) = 0;
+
+ // produce a reply to a status request
+ virtual void prepareStatusResponse(const ReplicationExecutor::CallbackData& data,
+ Date_t now,
+ unsigned uptime,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) = 0;
+
+ // produce a reply to an ismaster request. It is only valid to call this if we are a
+ // replset.
+ virtual void fillIsMasterForReplSet(IsMasterResponse* response) = 0;
+
+ // produce a reply to a freeze request
+ virtual void prepareFreezeResponse(Date_t now, int secs, BSONObjBuilder* response) = 0;
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Methods for sending and receiving heartbeats,
+ // reconfiguring and handling the results of standing for
+ // election.
+ //
+ ////////////////////////////////////////////////////////////
- /**
- * Constant indicating follower role.
- */
- static const Role follower;
+ /**
+ * Updates the topology coordinator's notion of the replica set configuration.
+ *
+ * "newConfig" is the new configuration, and "selfIndex" is the index of this
+ * node's configuration information in "newConfig", or "selfIndex" is -1 to
+ * indicate that this node is not a member of "newConfig".
+ *
+ * newConfig.isInitialized() should be true, though implementations may accept
+ * configurations where this is not true, for testing purposes.
+ */
+ virtual void updateConfig(const ReplicaSetConfig& newConfig,
+ int selfIndex,
+ Date_t now,
+ OpTime lastOpApplied) = 0;
- /**
- * Constant indicating candidate role
- */
- static const Role candidate;
+ /**
+ * Prepares a heartbeat request appropriate for sending to "target", assuming the
+ * current time is "now". "ourSetName" is used as the name for our replica set if
+ * the topology coordinator does not have a valid configuration installed.
+ *
+ * The returned pair contains proper arguments for a replSetHeartbeat command, and
+ * an amount of time to wait for the response.
+ *
+ * This call should be paired (with intervening network communication) with a call to
+ * processHeartbeatResponse for the same "target".
+ */
+ virtual std::pair<ReplSetHeartbeatArgs, Milliseconds> prepareHeartbeatRequest(
+ Date_t now, const std::string& ourSetName, const HostAndPort& target) = 0;
+
+ /**
+ * Processes a heartbeat response from "target" that arrived around "now", having
+ * spent "networkRoundTripTime" millis on the network.
+ *
+ * Updates internal topology coordinator state, and returns instructions about what action
+ * to take next.
+ *
+ * If the next action indicates StartElection, the topology coordinator has transitioned to
+ * the "candidate" role, and will remain there until processWinElection or
+ * processLoseElection are called.
+ *
+ * If the next action indicates "StepDownSelf", the topology coordinator has transitioned
+ * to the "follower" role from "leader", and the caller should take any necessary actions
+ * to become a follower.
+ *
+ * If the next action indicates "StepDownRemotePrimary", the caller should take steps to
+ * cause the specified remote host to step down from primary to secondary.
+ *
+ * If the next action indicates "Reconfig", the caller should verify the configuration in
+ * hbResponse is acceptable, perform any other reconfiguration actions it must, and call
+ * updateConfig with the new configuration and the appropriate value for "selfIndex". It
+ * must also wrap up any outstanding elections (by calling processLoseElection or
+ * processWinElection) before calling updateConfig.
+ *
+ * This call should be paired (with intervening network communication) with a call to
+ * prepareHeartbeatRequest for the same "target".
+ */
+ virtual HeartbeatResponseAction processHeartbeatResponse(
+ Date_t now,
+ Milliseconds networkRoundTripTime,
+ const HostAndPort& target,
+ const StatusWith<ReplSetHeartbeatResponse>& hbResponse,
+ OpTime myLastOpApplied) = 0;
+
+ /**
+ * If getRole() == Role::candidate and this node has not voted too recently, updates the
+ * lastVote tracker and returns true. Otherwise, returns false.
+ */
+ virtual bool voteForMyself(Date_t now) = 0;
+
+ /**
+ * Performs state updates associated with winning an election.
+ *
+ * It is an error to call this if the topology coordinator is not in candidate mode.
+ *
+ * Exactly one of either processWinElection or processLoseElection must be called if
+ * processHeartbeatResponse returns StartElection, to exit candidate mode.
+ */
+ virtual void processWinElection(OID electionId, OpTime electionOpTime) = 0;
+
+ /**
+ * Performs state updates associated with losing an election.
+ *
+ * It is an error to call this if the topology coordinator is not in candidate mode.
+ *
+ * Exactly one of either processWinElection or processLoseElection must be called if
+ * processHeartbeatResponse returns StartElection, to exit candidate mode.
+ */
+ virtual void processLoseElection() = 0;
+
+ /**
+ * Tries to transition the coordinator from the leader role to the follower role.
+ *
+ * Fails if "force" is not set and no follower is known to be up. It is illegal
+ * to call this method if the node is not leader.
+ *
+ * Returns whether or not the step down succeeded.
+ */
+ virtual bool stepDown(Date_t until, bool force, OpTime lastOpApplied) = 0;
+
+ /**
+ * Sometimes a request to step down comes in (like via a heartbeat), but we don't have the
+ * global exclusive lock so we can't actually stepdown at that moment. When that happens
+ * we record that a stepdown request is pending and schedule work to stepdown in the global
+ * lock. This method is called after holding the global lock to perform the actual
+ * stepdown, but only if the node hasn't already stepped down another way since the work was
+ * scheduled. Returns true if it actually steps down, and false otherwise.
+ */
+ virtual bool stepDownIfPending() = 0;
+
+ /**
+ * Considers whether or not this node should stand for election, and returns true
+ * if the node has transitioned to candidate role as a result of the call.
+ */
+ virtual bool checkShouldStandForElection(Date_t now, const OpTime& lastOpApplied) = 0;
+
+ /**
+ * Set the outgoing heartbeat message from self
+ */
+ virtual void setMyHeartbeatMessage(const Date_t now, const std::string& s) = 0;
+
+ /**
+ * Writes into 'output' all the information needed to generate a summary of the current
+ * replication state for use by the web interface.
+ */
+ virtual void summarizeAsHtml(ReplSetHtmlSummary* output) = 0;
+
+protected:
+ TopologyCoordinator() {}
+};
+
+/**
+ * Type that denotes the role of a node in the replication protocol.
+ *
+ * The role is distinct from MemberState, in that it only deals with the
+ * roles a node plays in the basic protocol -- leader, follower and candidate.
+ * The mapping between MemberState and Role is complex -- several MemberStates
+ * map to the follower role, and MemberState::RS_SECONDARY maps to either
+ * follower or candidate roles, e.g.
+ */
+class TopologyCoordinator::Role {
+public:
+ /**
+ * Constant indicating leader role.
+ */
+ static const Role leader;
+
+ /**
+ * Constant indicating follower role.
+ */
+ static const Role follower;
+
+ /**
+ * Constant indicating candidate role
+ */
+ static const Role candidate;
- Role() {}
+ Role() {}
- bool operator==(Role other) const { return _value == other._value; }
- bool operator!=(Role other) const { return _value != other._value; }
+ bool operator==(Role other) const {
+ return _value == other._value;
+ }
+ bool operator!=(Role other) const {
+ return _value != other._value;
+ }
- std::string toString() const;
+ std::string toString() const;
- private:
- explicit Role(int value);
+private:
+ explicit Role(int value);
- int _value;
- };
+ int _value;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp
index 9cb5701faef..8d78463fe6c 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl.cpp
@@ -52,1200 +52,1136 @@
namespace mongo {
namespace repl {
- using std::vector;
+using std::vector;
- const Seconds TopologyCoordinatorImpl::LastVote::leaseTime = Seconds(30);
+const Seconds TopologyCoordinatorImpl::LastVote::leaseTime = Seconds(30);
namespace {
- template <typename T>
- int indexOfIterator(const std::vector<T>& vec,
- typename std::vector<T>::const_iterator& it) {
- return static_cast<int>(it - vec.begin());
- }
-
- // Interval between the time the last heartbeat from a node was received successfully, or
- // the time when we gave up retrying, and when the next heartbeat should be sent to a target.
- const Milliseconds kHeartbeatInterval(Seconds(2).total_milliseconds());
+template <typename T>
+int indexOfIterator(const std::vector<T>& vec, typename std::vector<T>::const_iterator& it) {
+ return static_cast<int>(it - vec.begin());
+}
- // Maximum number of retries for a failed heartbeat.
- const int kMaxHeartbeatRetries = 2;
+// Interval between the time the last heartbeat from a node was received successfully, or
+// the time when we gave up retrying, and when the next heartbeat should be sent to a target.
+const Milliseconds kHeartbeatInterval(Seconds(2).total_milliseconds());
- /**
- * Returns true if the only up heartbeats are auth errors.
- */
- bool _hasOnlyAuthErrorUpHeartbeats(const std::vector<MemberHeartbeatData>& hbdata,
- const int selfIndex) {
- bool foundAuthError = false;
- for (std::vector<MemberHeartbeatData>::const_iterator it = hbdata.begin();
- it != hbdata.end();
- ++it) {
- if (indexOfIterator(hbdata, it) == selfIndex) {
- continue;
- }
+// Maximum number of retries for a failed heartbeat.
+const int kMaxHeartbeatRetries = 2;
- if (it->up()) {
- return false;
- }
+/**
+ * Returns true if the only up heartbeats are auth errors.
+ */
+bool _hasOnlyAuthErrorUpHeartbeats(const std::vector<MemberHeartbeatData>& hbdata,
+ const int selfIndex) {
+ bool foundAuthError = false;
+ for (std::vector<MemberHeartbeatData>::const_iterator it = hbdata.begin(); it != hbdata.end();
+ ++it) {
+ if (indexOfIterator(hbdata, it) == selfIndex) {
+ continue;
+ }
- if (it->hasAuthIssue()) {
- foundAuthError = true;
- }
+ if (it->up()) {
+ return false;
}
- return foundAuthError;
+ if (it->hasAuthIssue()) {
+ foundAuthError = true;
+ }
}
+ return foundAuthError;
+}
+
} // namespace
- PingStats::PingStats() :
- count(0),
- value(std::numeric_limits<unsigned int>::max()),
- _lastHeartbeatStartDate(0),
- _numFailuresSinceLastStart(std::numeric_limits<int>::max()) {
+PingStats::PingStats()
+ : count(0),
+ value(std::numeric_limits<unsigned int>::max()),
+ _lastHeartbeatStartDate(0),
+ _numFailuresSinceLastStart(std::numeric_limits<int>::max()) {}
+
+void PingStats::start(Date_t now) {
+ _lastHeartbeatStartDate = now;
+ _numFailuresSinceLastStart = 0;
+}
+
+void PingStats::hit(int millis) {
+ _numFailuresSinceLastStart = std::numeric_limits<int>::max();
+ ++count;
+ value = value == std::numeric_limits<unsigned int>::max()
+ ? millis
+ : static_cast<unsigned long>((value * .8) + (millis * .2));
+}
+
+void PingStats::miss() {
+ ++_numFailuresSinceLastStart;
+}
+
+TopologyCoordinatorImpl::TopologyCoordinatorImpl(Seconds maxSyncSourceLagSecs)
+ : _role(Role::follower),
+ _currentPrimaryIndex(-1),
+ _forceSyncSourceIndex(-1),
+ _maxSyncSourceLagSecs(maxSyncSourceLagSecs),
+ _selfIndex(-1),
+ _stepDownPending(false),
+ _stepDownUntil(0),
+ _electionSleepUntil(0),
+ _maintenanceModeCalls(0),
+ _followerMode(MemberState::RS_STARTUP2) {
+ invariant(getMemberState() == MemberState::RS_STARTUP);
+}
+
+TopologyCoordinator::Role TopologyCoordinatorImpl::getRole() const {
+ return _role;
+}
+
+void TopologyCoordinatorImpl::setForceSyncSourceIndex(int index) {
+ invariant(_forceSyncSourceIndex < _rsConfig.getNumMembers());
+ _forceSyncSourceIndex = index;
+}
+
+HostAndPort TopologyCoordinatorImpl::getSyncSourceAddress() const {
+ return _syncSource;
+}
+
+HostAndPort TopologyCoordinatorImpl::chooseNewSyncSource(Date_t now, const OpTime& lastOpApplied) {
+ // If we are primary, then we aren't syncing from anyone (else).
+ if (_iAmPrimary()) {
+ return HostAndPort();
+ }
+
+ // If we are not a member of the current replica set configuration, no sync source is valid.
+ if (_selfIndex == -1) {
+ LOG(2) << "Cannot sync from any members because we are not in the replica set config";
+ return HostAndPort();
+ }
+
+ // if we have a target we've requested to sync from, use it
+ if (_forceSyncSourceIndex != -1) {
+ invariant(_forceSyncSourceIndex < _rsConfig.getNumMembers());
+ _syncSource = _rsConfig.getMemberAt(_forceSyncSourceIndex).getHostAndPort();
+ _forceSyncSourceIndex = -1;
+ std::string msg(str::stream() << "syncing from: " << _syncSource.toString()
+ << " by request");
+ log() << msg << rsLog;
+ setMyHeartbeatMessage(now, msg);
+ return _syncSource;
}
- void PingStats::start(Date_t now) {
- _lastHeartbeatStartDate = now;
- _numFailuresSinceLastStart = 0;
- }
+ // wait for 2N pings (not counting ourselves) before choosing a sync target
+ int needMorePings = (_hbdata.size() - 1) * 2 - _getTotalPings();
- void PingStats::hit(int millis) {
- _numFailuresSinceLastStart = std::numeric_limits<int>::max();
- ++count;
- value = value == std::numeric_limits<unsigned int>::max() ? millis :
- static_cast<unsigned long>((value * .8) + (millis * .2));
- }
-
- void PingStats::miss() {
- ++_numFailuresSinceLastStart;
+ if (needMorePings > 0) {
+ OCCASIONALLY log() << "waiting for " << needMorePings
+ << " pings from other members before syncing";
+ _syncSource = HostAndPort();
+ return _syncSource;
}
- TopologyCoordinatorImpl::TopologyCoordinatorImpl(Seconds maxSyncSourceLagSecs) :
- _role(Role::follower),
- _currentPrimaryIndex(-1),
- _forceSyncSourceIndex(-1),
- _maxSyncSourceLagSecs(maxSyncSourceLagSecs),
- _selfIndex(-1),
- _stepDownPending(false),
- _stepDownUntil(0),
- _electionSleepUntil(0),
- _maintenanceModeCalls(0),
- _followerMode(MemberState::RS_STARTUP2)
- {
- invariant(getMemberState() == MemberState::RS_STARTUP);
+ // If we are only allowed to sync from the primary, set that
+ if (!_rsConfig.isChainingAllowed()) {
+ if (_currentPrimaryIndex == -1) {
+ LOG(1) << "Cannot select sync source because chaining is"
+ " not allowed and primary is unknown/down";
+ _syncSource = HostAndPort();
+ return _syncSource;
+ } else if (_memberIsBlacklisted(*_currentPrimaryMember(), now)) {
+ LOG(1) << "Cannot select sync source because chaining is"
+ "not allowed and primary is not currently accepting our updates";
+ _syncSource = HostAndPort();
+ return _syncSource;
+ } else {
+ _syncSource = _rsConfig.getMemberAt(_currentPrimaryIndex).getHostAndPort();
+ std::string msg(str::stream() << "syncing from primary: " << _syncSource.toString());
+ log() << msg << rsLog;
+ setMyHeartbeatMessage(now, msg);
+ return _syncSource;
+ }
}
- TopologyCoordinator::Role TopologyCoordinatorImpl::getRole() const {
- return _role;
- }
+ // find the member with the lowest ping time that is ahead of me
- void TopologyCoordinatorImpl::setForceSyncSourceIndex(int index) {
- invariant(_forceSyncSourceIndex < _rsConfig.getNumMembers());
- _forceSyncSourceIndex = index;
+ // Find primary's oplog time. Reject sync candidates that are more than
+ // maxSyncSourceLagSecs seconds behind.
+ OpTime primaryOpTime;
+ if (_currentPrimaryIndex != -1) {
+ primaryOpTime = _hbdata[_currentPrimaryIndex].getOpTime();
+ } else {
+ // choose a time that will exclude no candidates, since we don't see a primary
+ primaryOpTime = OpTime(_maxSyncSourceLagSecs.total_seconds(), 0);
}
- HostAndPort TopologyCoordinatorImpl::getSyncSourceAddress() const {
- return _syncSource;
+ if (primaryOpTime.getSecs() <
+ static_cast<unsigned int>(_maxSyncSourceLagSecs.total_seconds())) {
+ // erh - I think this means there was just a new election
+ // and we don't yet know the new primary's optime
+ primaryOpTime = OpTime(_maxSyncSourceLagSecs.total_seconds(), 0);
}
- HostAndPort TopologyCoordinatorImpl::chooseNewSyncSource(Date_t now,
- const OpTime& lastOpApplied) {
- // If we are primary, then we aren't syncing from anyone (else).
- if (_iAmPrimary()) {
- return HostAndPort();
- }
+ OpTime oldestSyncOpTime(primaryOpTime.getSecs() - _maxSyncSourceLagSecs.total_seconds(), 0);
- // If we are not a member of the current replica set configuration, no sync source is valid.
- if (_selfIndex == -1) {
- LOG(2) << "Cannot sync from any members because we are not in the replica set config";
- return HostAndPort();
- }
+ int closestIndex = -1;
- // if we have a target we've requested to sync from, use it
- if (_forceSyncSourceIndex != -1) {
- invariant(_forceSyncSourceIndex < _rsConfig.getNumMembers());
- _syncSource = _rsConfig.getMemberAt(_forceSyncSourceIndex).getHostAndPort();
- _forceSyncSourceIndex = -1;
- std::string msg(str::stream() << "syncing from: "
- << _syncSource.toString() << " by request");
- log() << msg << rsLog;
- setMyHeartbeatMessage(now, msg);
- return _syncSource;
- }
-
- // wait for 2N pings (not counting ourselves) before choosing a sync target
- int needMorePings = (_hbdata.size() - 1) * 2 - _getTotalPings();
-
- if (needMorePings > 0) {
- OCCASIONALLY log() << "waiting for " << needMorePings
- << " pings from other members before syncing";
- _syncSource = HostAndPort();
- return _syncSource;
- }
-
- // If we are only allowed to sync from the primary, set that
- if (!_rsConfig.isChainingAllowed()) {
- if (_currentPrimaryIndex == -1) {
- LOG(1) << "Cannot select sync source because chaining is"
- " not allowed and primary is unknown/down";
- _syncSource = HostAndPort();
- return _syncSource;
+ // Make two attempts. The first attempt, we ignore those nodes with
+ // slave delay higher than our own, hidden nodes, and nodes that are excessively lagged.
+ // The second attempt includes such nodes, in case those are the only ones we can reach.
+ // This loop attempts to set 'closestIndex'.
+ for (int attempts = 0; attempts < 2; ++attempts) {
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
+ it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ // Don't consider ourselves.
+ if (itIndex == _selfIndex) {
+ continue;
}
- else if (_memberIsBlacklisted(*_currentPrimaryMember(), now)) {
- LOG(1) << "Cannot select sync source because chaining is"
- "not allowed and primary is not currently accepting our updates";
- _syncSource = HostAndPort();
- return _syncSource;
+ // Candidate must be up to be considered.
+ if (!it->up()) {
+ continue;
}
- else {
- _syncSource = _rsConfig.getMemberAt(_currentPrimaryIndex).getHostAndPort();
- std::string msg(str::stream() << "syncing from primary: "
- << _syncSource.toString());
- log() << msg << rsLog;
- setMyHeartbeatMessage(now, msg);
- return _syncSource;
+ // Candidate must be PRIMARY or SECONDARY state to be considered.
+ if (!it->getState().readable()) {
+ continue;
}
- }
-
- // find the member with the lowest ping time that is ahead of me
- // Find primary's oplog time. Reject sync candidates that are more than
- // maxSyncSourceLagSecs seconds behind.
- OpTime primaryOpTime;
- if (_currentPrimaryIndex != -1) {
- primaryOpTime = _hbdata[_currentPrimaryIndex].getOpTime();
- }
- else {
- // choose a time that will exclude no candidates, since we don't see a primary
- primaryOpTime = OpTime(_maxSyncSourceLagSecs.total_seconds(), 0);
- }
-
- if (primaryOpTime.getSecs() <
- static_cast<unsigned int>(_maxSyncSourceLagSecs.total_seconds())) {
- // erh - I think this means there was just a new election
- // and we don't yet know the new primary's optime
- primaryOpTime = OpTime(_maxSyncSourceLagSecs.total_seconds(), 0);
- }
-
- OpTime oldestSyncOpTime(primaryOpTime.getSecs() - _maxSyncSourceLagSecs.total_seconds(), 0);
-
- int closestIndex = -1;
-
- // Make two attempts. The first attempt, we ignore those nodes with
- // slave delay higher than our own, hidden nodes, and nodes that are excessively lagged.
- // The second attempt includes such nodes, in case those are the only ones we can reach.
- // This loop attempts to set 'closestIndex'.
- for (int attempts = 0; attempts < 2; ++attempts) {
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- // Don't consider ourselves.
- if (itIndex == _selfIndex) {
- continue;
- }
- // Candidate must be up to be considered.
- if (!it->up()) {
- continue;
- }
- // Candidate must be PRIMARY or SECONDARY state to be considered.
- if (!it->getState().readable()) {
- continue;
- }
-
- const MemberConfig& itMemberConfig(_rsConfig.getMemberAt(itIndex));
-
- // Candidate must build indexes if we build indexes, to be considered.
- if (_selfConfig().shouldBuildIndexes()) {
- if (!itMemberConfig.shouldBuildIndexes()) {
- continue;
- }
- }
+ const MemberConfig& itMemberConfig(_rsConfig.getMemberAt(itIndex));
- // only consider candidates that are ahead of where we are
- if (it->getOpTime() <= lastOpApplied) {
+ // Candidate must build indexes if we build indexes, to be considered.
+ if (_selfConfig().shouldBuildIndexes()) {
+ if (!itMemberConfig.shouldBuildIndexes()) {
continue;
}
+ }
- // omit candidates that are excessively behind, on the first attempt at least.
- if (attempts == 0 &&
- it->getOpTime() < oldestSyncOpTime) {
- continue;
- }
+ // only consider candidates that are ahead of where we are
+ if (it->getOpTime() <= lastOpApplied) {
+ continue;
+ }
- // omit nodes that are more latent than anything we've already considered
- if ((closestIndex != -1) &&
- (_getPing(itMemberConfig.getHostAndPort())
- > _getPing(_rsConfig.getMemberAt(closestIndex).getHostAndPort()))) {
- continue;
- }
+ // omit candidates that are excessively behind, on the first attempt at least.
+ if (attempts == 0 && it->getOpTime() < oldestSyncOpTime) {
+ continue;
+ }
- if (attempts == 0) {
- if (_selfConfig().getSlaveDelay() < itMemberConfig.getSlaveDelay()
- || itMemberConfig.isHidden()) {
- continue; // skip this one in the first attempt
- }
- }
+ // omit nodes that are more latent than anything we've already considered
+ if ((closestIndex != -1) &&
+ (_getPing(itMemberConfig.getHostAndPort()) >
+ _getPing(_rsConfig.getMemberAt(closestIndex).getHostAndPort()))) {
+ continue;
+ }
- if (_memberIsBlacklisted(itMemberConfig, now)) {
- continue;
+ if (attempts == 0) {
+ if (_selfConfig().getSlaveDelay() < itMemberConfig.getSlaveDelay() ||
+ itMemberConfig.isHidden()) {
+ continue; // skip this one in the first attempt
}
-
- // This candidate has passed all tests; set 'closestIndex'
- closestIndex = itIndex;
}
- if (closestIndex != -1) break; // no need for second attempt
- }
- if (closestIndex == -1) {
- // Did not find any members to sync from
- std::string msg("could not find member to sync from");
- // Only log when we had a valid sync source before
- if (!_syncSource.empty()) {
- log() << msg << rsLog;
+ if (_memberIsBlacklisted(itMemberConfig, now)) {
+ continue;
}
- setMyHeartbeatMessage(now, msg);
- _syncSource = HostAndPort();
- return _syncSource;
+ // This candidate has passed all tests; set 'closestIndex'
+ closestIndex = itIndex;
}
- _syncSource = _rsConfig.getMemberAt(closestIndex).getHostAndPort();
- std::string msg(str::stream() << "syncing from: " << _syncSource.toString(), 0);
- log() << msg << rsLog;
- setMyHeartbeatMessage(now, msg);
- return _syncSource;
+ if (closestIndex != -1)
+ break; // no need for second attempt
}
- bool TopologyCoordinatorImpl::_memberIsBlacklisted(const MemberConfig& memberConfig,
- Date_t now) const {
- std::map<HostAndPort,Date_t>::const_iterator blacklisted =
- _syncSourceBlacklist.find(memberConfig.getHostAndPort());
- if (blacklisted != _syncSourceBlacklist.end()) {
- if (blacklisted->second > now) {
- return true;
- }
+ if (closestIndex == -1) {
+ // Did not find any members to sync from
+ std::string msg("could not find member to sync from");
+ // Only log when we had a valid sync source before
+ if (!_syncSource.empty()) {
+ log() << msg << rsLog;
}
- return false;
- }
+ setMyHeartbeatMessage(now, msg);
- void TopologyCoordinatorImpl::blacklistSyncSource(const HostAndPort& host, Date_t until) {
- LOG(2) << "blacklisting " << host << " until " << until.toString();
- _syncSourceBlacklist[host] = until;
+ _syncSource = HostAndPort();
+ return _syncSource;
}
-
- void TopologyCoordinatorImpl::unblacklistSyncSource(const HostAndPort& host, Date_t now) {
- std::map<HostAndPort, Date_t>::iterator hostItr = _syncSourceBlacklist.find(host);
- if (hostItr != _syncSourceBlacklist.end() && now >= hostItr->second) {
- LOG(2) << "unblacklisting " << host;
- _syncSourceBlacklist.erase(hostItr);
+ _syncSource = _rsConfig.getMemberAt(closestIndex).getHostAndPort();
+ std::string msg(str::stream() << "syncing from: " << _syncSource.toString(), 0);
+ log() << msg << rsLog;
+ setMyHeartbeatMessage(now, msg);
+ return _syncSource;
+}
+
+bool TopologyCoordinatorImpl::_memberIsBlacklisted(const MemberConfig& memberConfig,
+ Date_t now) const {
+ std::map<HostAndPort, Date_t>::const_iterator blacklisted =
+ _syncSourceBlacklist.find(memberConfig.getHostAndPort());
+ if (blacklisted != _syncSourceBlacklist.end()) {
+ if (blacklisted->second > now) {
+ return true;
}
}
+ return false;
+}
- void TopologyCoordinatorImpl::clearSyncSourceBlacklist() {
- _syncSourceBlacklist.clear();
- }
-
- void TopologyCoordinatorImpl::prepareSyncFromResponse(
- const ReplicationExecutor::CallbackData& data,
- const HostAndPort& target,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result) {
- if (data.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
- return;
- }
-
- response->append("syncFromRequested", target.toString());
-
- if (_selfIndex == -1) {
- *result = Status(ErrorCodes::NotSecondary,
- "Removed and uninitialized nodes do not sync");
- return;
- }
+void TopologyCoordinatorImpl::blacklistSyncSource(const HostAndPort& host, Date_t until) {
+ LOG(2) << "blacklisting " << host << " until " << until.toString();
+ _syncSourceBlacklist[host] = until;
+}
- const MemberConfig& selfConfig = _selfConfig();
- if (selfConfig.isArbiter()) {
- *result = Status(ErrorCodes::NotSecondary, "arbiters don't sync");
- return;
- }
- if (_selfIndex == _currentPrimaryIndex) {
- *result = Status(ErrorCodes::NotSecondary, "primaries don't sync");
- return;
- }
-
- ReplicaSetConfig::MemberIterator targetConfig = _rsConfig.membersEnd();
- int targetIndex = 0;
- for (ReplicaSetConfig::MemberIterator it = _rsConfig.membersBegin();
- it != _rsConfig.membersEnd(); ++it) {
- if (it->getHostAndPort() == target) {
- targetConfig = it;
- break;
- }
- ++targetIndex;
- }
- if (targetConfig == _rsConfig.membersEnd()) {
- *result = Status(ErrorCodes::NodeNotFound,
- str::stream() << "Could not find member \"" << target.toString() <<
- "\" in replica set");
- return;
- }
- if (targetIndex == _selfIndex) {
- *result = Status(ErrorCodes::InvalidOptions, "I cannot sync from myself");
- return;
- }
- if (targetConfig->isArbiter()) {
- *result = Status(ErrorCodes::InvalidOptions,
- str::stream() << "Cannot sync from \"" << target.toString() <<
- "\" because it is an arbiter");
- return;
- }
- if (!targetConfig->shouldBuildIndexes() && selfConfig.shouldBuildIndexes()) {
- *result = Status(ErrorCodes::InvalidOptions,
- str::stream() << "Cannot sync from \"" << target.toString() <<
- "\" because it does not build indexes");
- return;
- }
-
- const MemberHeartbeatData& hbdata = _hbdata[targetIndex];
- if (hbdata.hasAuthIssue()) {
- *result = Status(ErrorCodes::Unauthorized,
- str::stream() << "not authorized to communicate with " <<
- target.toString());
- return;
- }
- if (hbdata.getHealth() == 0) {
- *result = Status(ErrorCodes::HostUnreachable,
- str::stream() << "I cannot reach the requested member: " <<
- target.toString());
- return;
- }
- if (hbdata.getOpTime().getSecs()+10 < lastOpApplied.getSecs()) {
- warning() << "attempting to sync from " << target
- << ", but its latest opTime is " << hbdata.getOpTime().getSecs()
- << " and ours is " << lastOpApplied.getSecs() << " so this may not work";
- response->append("warning",
- str::stream() << "requested member \"" << target.toString() <<
- "\" is more than 10 seconds behind us");
- // not returning bad Status, just warning
- }
+void TopologyCoordinatorImpl::unblacklistSyncSource(const HostAndPort& host, Date_t now) {
+ std::map<HostAndPort, Date_t>::iterator hostItr = _syncSourceBlacklist.find(host);
+ if (hostItr != _syncSourceBlacklist.end() && now >= hostItr->second) {
+ LOG(2) << "unblacklisting " << host;
+ _syncSourceBlacklist.erase(hostItr);
+ }
+}
- HostAndPort prevSyncSource = getSyncSourceAddress();
- if (!prevSyncSource.empty()) {
- response->append("prevSyncTarget", prevSyncSource.toString());
- }
+void TopologyCoordinatorImpl::clearSyncSourceBlacklist() {
+ _syncSourceBlacklist.clear();
+}
- setForceSyncSourceIndex(targetIndex);
- *result = Status::OK();
+void TopologyCoordinatorImpl::prepareSyncFromResponse(const ReplicationExecutor::CallbackData& data,
+ const HostAndPort& target,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (data.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
+ return;
}
- void TopologyCoordinatorImpl::prepareFreshResponse(
- const ReplicationCoordinator::ReplSetFreshArgs& args,
- const Date_t now,
- const OpTime lastOpApplied,
- BSONObjBuilder* response,
- Status* result) {
-
- if (_selfIndex == -1) {
- *result = Status(ErrorCodes::ReplicaSetNotFound,
- "Cannot participate in elections because not initialized");
- return;
- }
+ response->append("syncFromRequested", target.toString());
- if (args.setName != _rsConfig.getReplSetName()) {
- *result = Status(ErrorCodes::ReplicaSetNotFound,
- str::stream() << "Wrong repl set name. Expected: " <<
- _rsConfig.getReplSetName() <<
- ", received: " << args.setName);
- return;
- }
+ if (_selfIndex == -1) {
+ *result = Status(ErrorCodes::NotSecondary, "Removed and uninitialized nodes do not sync");
+ return;
+ }
- if (args.id == static_cast<unsigned>(_selfConfig().getId())) {
- *result = Status(ErrorCodes::BadValue,
- str::stream() << "Received replSetFresh command from member with the "
- "same member ID as ourself: " << args.id);
- return;
- }
+ const MemberConfig& selfConfig = _selfConfig();
+ if (selfConfig.isArbiter()) {
+ *result = Status(ErrorCodes::NotSecondary, "arbiters don't sync");
+ return;
+ }
+ if (_selfIndex == _currentPrimaryIndex) {
+ *result = Status(ErrorCodes::NotSecondary, "primaries don't sync");
+ return;
+ }
- bool weAreFresher = false;
- if( _rsConfig.getConfigVersion() > args.cfgver ) {
- log() << "replSet member " << args.who << " is not yet aware its cfg version "
- << args.cfgver << " is stale";
- response->append("info", "config version stale");
- weAreFresher = true;
- }
- // check not only our own optime, but any other member we can reach
- else if (args.opTime < _latestKnownOpTime(lastOpApplied)) {
- weAreFresher = true;
- }
- response->appendDate("opTime", lastOpApplied.asDate());
- response->append("fresher", weAreFresher);
-
- std::string errmsg;
- bool doVeto = _shouldVetoMember(args, now, lastOpApplied, &errmsg);
- response->append("veto", doVeto);
- if (doVeto) {
- response->append("errmsg", errmsg);
+ ReplicaSetConfig::MemberIterator targetConfig = _rsConfig.membersEnd();
+ int targetIndex = 0;
+ for (ReplicaSetConfig::MemberIterator it = _rsConfig.membersBegin();
+ it != _rsConfig.membersEnd();
+ ++it) {
+ if (it->getHostAndPort() == target) {
+ targetConfig = it;
+ break;
}
- *result = Status::OK();
+ ++targetIndex;
+ }
+ if (targetConfig == _rsConfig.membersEnd()) {
+ *result = Status(ErrorCodes::NodeNotFound,
+ str::stream() << "Could not find member \"" << target.toString()
+ << "\" in replica set");
+ return;
+ }
+ if (targetIndex == _selfIndex) {
+ *result = Status(ErrorCodes::InvalidOptions, "I cannot sync from myself");
+ return;
+ }
+ if (targetConfig->isArbiter()) {
+ *result = Status(ErrorCodes::InvalidOptions,
+ str::stream() << "Cannot sync from \"" << target.toString()
+ << "\" because it is an arbiter");
+ return;
+ }
+ if (!targetConfig->shouldBuildIndexes() && selfConfig.shouldBuildIndexes()) {
+ *result = Status(ErrorCodes::InvalidOptions,
+ str::stream() << "Cannot sync from \"" << target.toString()
+ << "\" because it does not build indexes");
+ return;
+ }
+
+ const MemberHeartbeatData& hbdata = _hbdata[targetIndex];
+ if (hbdata.hasAuthIssue()) {
+ *result =
+ Status(ErrorCodes::Unauthorized,
+ str::stream() << "not authorized to communicate with " << target.toString());
+ return;
+ }
+ if (hbdata.getHealth() == 0) {
+ *result =
+ Status(ErrorCodes::HostUnreachable,
+ str::stream() << "I cannot reach the requested member: " << target.toString());
+ return;
+ }
+ if (hbdata.getOpTime().getSecs() + 10 < lastOpApplied.getSecs()) {
+ warning() << "attempting to sync from " << target << ", but its latest opTime is "
+ << hbdata.getOpTime().getSecs() << " and ours is " << lastOpApplied.getSecs()
+ << " so this may not work";
+ response->append("warning",
+ str::stream() << "requested member \"" << target.toString()
+ << "\" is more than 10 seconds behind us");
+ // not returning bad Status, just warning
+ }
+
+ HostAndPort prevSyncSource = getSyncSourceAddress();
+ if (!prevSyncSource.empty()) {
+ response->append("prevSyncTarget", prevSyncSource.toString());
+ }
+
+ setForceSyncSourceIndex(targetIndex);
+ *result = Status::OK();
+}
+
+void TopologyCoordinatorImpl::prepareFreshResponse(
+ const ReplicationCoordinator::ReplSetFreshArgs& args,
+ const Date_t now,
+ const OpTime lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (_selfIndex == -1) {
+ *result = Status(ErrorCodes::ReplicaSetNotFound,
+ "Cannot participate in elections because not initialized");
+ return;
+ }
+
+ if (args.setName != _rsConfig.getReplSetName()) {
+ *result =
+ Status(ErrorCodes::ReplicaSetNotFound,
+ str::stream() << "Wrong repl set name. Expected: " << _rsConfig.getReplSetName()
+ << ", received: " << args.setName);
+ return;
+ }
+
+ if (args.id == static_cast<unsigned>(_selfConfig().getId())) {
+ *result = Status(ErrorCodes::BadValue,
+ str::stream() << "Received replSetFresh command from member with the "
+ "same member ID as ourself: " << args.id);
+ return;
+ }
+
+ bool weAreFresher = false;
+ if (_rsConfig.getConfigVersion() > args.cfgver) {
+ log() << "replSet member " << args.who << " is not yet aware its cfg version "
+ << args.cfgver << " is stale";
+ response->append("info", "config version stale");
+ weAreFresher = true;
+ }
+ // check not only our own optime, but any other member we can reach
+ else if (args.opTime < _latestKnownOpTime(lastOpApplied)) {
+ weAreFresher = true;
+ }
+ response->appendDate("opTime", lastOpApplied.asDate());
+ response->append("fresher", weAreFresher);
+
+ std::string errmsg;
+ bool doVeto = _shouldVetoMember(args, now, lastOpApplied, &errmsg);
+ response->append("veto", doVeto);
+ if (doVeto) {
+ response->append("errmsg", errmsg);
+ }
+ *result = Status::OK();
+}
+
+bool TopologyCoordinatorImpl::_shouldVetoMember(
+ const ReplicationCoordinator::ReplSetFreshArgs& args,
+ const Date_t& now,
+ const OpTime& lastOpApplied,
+ std::string* errmsg) const {
+ if (_rsConfig.getConfigVersion() < args.cfgver) {
+ // We are stale; do not veto.
+ return false;
}
- bool TopologyCoordinatorImpl::_shouldVetoMember(
- const ReplicationCoordinator::ReplSetFreshArgs& args,
- const Date_t& now,
- const OpTime& lastOpApplied,
- std::string* errmsg) const {
+ const unsigned int memberID = args.id;
+ const int hopefulIndex = _getMemberIndex(memberID);
+ invariant(hopefulIndex != _selfIndex);
+ const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied);
- if (_rsConfig.getConfigVersion() < args.cfgver) {
- // We are stale; do not veto.
- return false;
- }
-
- const unsigned int memberID = args.id;
- const int hopefulIndex = _getMemberIndex(memberID);
- invariant(hopefulIndex != _selfIndex);
- const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied);
-
- if (hopefulIndex == -1) {
- *errmsg = str::stream() << "replSet couldn't find member with id " << memberID;
- return true;
- }
+ if (hopefulIndex == -1) {
+ *errmsg = str::stream() << "replSet couldn't find member with id " << memberID;
+ return true;
+ }
- if (_iAmPrimary() && lastOpApplied >= _hbdata[hopefulIndex].getOpTime()) {
- // hbinfo is not updated for ourself, so if we are primary we have to check the
- // primary's last optime separately
- *errmsg = str::stream() << "I am already primary, " <<
- _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString() <<
- " can try again once I've stepped down";
- return true;
- }
+ if (_iAmPrimary() && lastOpApplied >= _hbdata[hopefulIndex].getOpTime()) {
+ // hbinfo is not updated for ourself, so if we are primary we have to check the
+ // primary's last optime separately
+ *errmsg = str::stream() << "I am already primary, "
+ << _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString()
+ << " can try again once I've stepped down";
+ return true;
+ }
- if (_currentPrimaryIndex != -1 &&
- (hopefulIndex != _currentPrimaryIndex) &&
- (_hbdata[_currentPrimaryIndex].getOpTime() >=
- _hbdata[hopefulIndex].getOpTime())) {
- // other members might be aware of more up-to-date nodes
- *errmsg = str::stream() <<
- _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString() <<
- " is trying to elect itself but " <<
- _rsConfig.getMemberAt(_currentPrimaryIndex).getHostAndPort().toString() <<
- " is already primary and more up-to-date";
- return true;
- }
+ if (_currentPrimaryIndex != -1 && (hopefulIndex != _currentPrimaryIndex) &&
+ (_hbdata[_currentPrimaryIndex].getOpTime() >= _hbdata[hopefulIndex].getOpTime())) {
+ // other members might be aware of more up-to-date nodes
+ *errmsg =
+ str::stream() << _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString()
+ << " is trying to elect itself but "
+ << _rsConfig.getMemberAt(_currentPrimaryIndex).getHostAndPort().toString()
+ << " is already primary and more up-to-date";
+ return true;
+ }
- if ((highestPriorityIndex != -1)) {
- const MemberConfig& hopefulMember = _rsConfig.getMemberAt(hopefulIndex);
- const MemberConfig& priorityMember = _rsConfig.getMemberAt(highestPriorityIndex);
-
- if (priorityMember.getPriority() > hopefulMember.getPriority()) {
- *errmsg = str::stream()
- << hopefulMember.getHostAndPort().toString()
- << " has lower priority of " << hopefulMember.getPriority() << " than "
- << priorityMember.getHostAndPort().toString()
- << " which has a priority of " << priorityMember.getPriority();
- return true;
- }
- }
+ if ((highestPriorityIndex != -1)) {
+ const MemberConfig& hopefulMember = _rsConfig.getMemberAt(hopefulIndex);
+ const MemberConfig& priorityMember = _rsConfig.getMemberAt(highestPriorityIndex);
- UnelectableReasonMask reason = _getUnelectableReason(hopefulIndex, lastOpApplied);
- reason &= ~RefusesToStand;
- if (reason) {
- *errmsg = str::stream()
- << "I don't think "
- << _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString()
- << " is electable because the " << _getUnelectableReasonString(reason);
+ if (priorityMember.getPriority() > hopefulMember.getPriority()) {
+ *errmsg = str::stream() << hopefulMember.getHostAndPort().toString()
+ << " has lower priority of " << hopefulMember.getPriority()
+ << " than " << priorityMember.getHostAndPort().toString()
+ << " which has a priority of " << priorityMember.getPriority();
return true;
}
-
- return false;
}
- // produce a reply to a received electCmd
- void TopologyCoordinatorImpl::prepareElectResponse(
- const ReplicationCoordinator::ReplSetElectArgs& args,
- const Date_t now,
- const OpTime lastOpApplied,
- BSONObjBuilder* response,
- Status* result) {
-
- if (_selfIndex == -1) {
- *result = Status(ErrorCodes::ReplicaSetNotFound,
- "Cannot participate in election because not initialized");
- return;
- }
-
- const long long myver = _rsConfig.getConfigVersion();
- const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied);
-
- const MemberConfig* primary = _currentPrimaryMember();
- const MemberConfig* hopeful = _rsConfig.findMemberByID(args.whoid);
- const MemberConfig* highestPriority = highestPriorityIndex == -1 ? NULL :
- &_rsConfig.getMemberAt(highestPriorityIndex);
-
- int vote = 0;
- if (args.set != _rsConfig.getReplSetName()) {
- log() << "replSet error received an elect request for '" << args.set
- << "' but our set name is '" <<
- _rsConfig.getReplSetName() << "'";
- }
- else if ( myver < args.cfgver ) {
- // we are stale. don't vote
- log() << "replSetElect not voting because our config version is stale. Our version: " <<
- myver << ", their version: " << args.cfgver;
- }
- else if ( myver > args.cfgver ) {
- // they are stale!
- log() << "replSetElect command received stale config version # during election. "
- "Our version: " << myver << ", their version: " << args.cfgver;
- vote = -10000;
- }
- else if (!hopeful) {
- log() << "replSetElect couldn't find member with id " << args.whoid;
- vote = -10000;
- }
- else if (_iAmPrimary()) {
- log() << "I am already primary, " << hopeful->getHostAndPort().toString()
- << " can try again once I've stepped down";
- vote = -10000;
- }
- else if (primary) {
- log() << hopeful->getHostAndPort().toString() << " is trying to elect itself but "
- << primary->getHostAndPort().toString() << " is already primary";
- vote = -10000;
- }
- else if (highestPriority && highestPriority->getPriority() > hopeful->getPriority()) {
- // TODO(spencer): What if the lower-priority member is more up-to-date?
- log() << hopeful->getHostAndPort().toString() << " has lower priority than "
- << highestPriority->getHostAndPort().toString();
- vote = -10000;
- }
- else if (_lastVote.when.millis + LastVote::leaseTime.total_milliseconds() >= now.millis &&
- _lastVote.whoId != args.whoid) {
- log() << "replSet voting no for "
- << hopeful->getHostAndPort().toString()
- << "; voted for " << _lastVote.whoHostAndPort.toString() << ' '
- << (now.millis - _lastVote.when.millis) / 1000 << " secs ago";
- }
- else {
- _lastVote.when = now;
- _lastVote.whoId = args.whoid;
- _lastVote.whoHostAndPort = hopeful->getHostAndPort();
- vote = _selfConfig().getNumVotes();
- invariant(hopeful->getId() == args.whoid);
- if (vote > 0) {
- log() << "replSetElect voting yea for " << hopeful->getHostAndPort().toString()
- << " (" << args.whoid << ')';
- }
- }
-
- response->append("vote", vote);
- response->append("round", args.round);
- *result = Status::OK();
+ UnelectableReasonMask reason = _getUnelectableReason(hopefulIndex, lastOpApplied);
+ reason &= ~RefusesToStand;
+ if (reason) {
+ *errmsg = str::stream() << "I don't think "
+ << _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString()
+ << " is electable because the "
+ << _getUnelectableReasonString(reason);
+ return true;
}
- // produce a reply to a heartbeat
- Status TopologyCoordinatorImpl::prepareHeartbeatResponse(
- Date_t now,
- const ReplSetHeartbeatArgs& args,
- const std::string& ourSetName,
- const OpTime& lastOpApplied,
- ReplSetHeartbeatResponse* response) {
-
- if (args.getProtocolVersion() != 1) {
+ return false;
+}
+
+// produce a reply to a received electCmd
+void TopologyCoordinatorImpl::prepareElectResponse(
+ const ReplicationCoordinator::ReplSetElectArgs& args,
+ const Date_t now,
+ const OpTime lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (_selfIndex == -1) {
+ *result = Status(ErrorCodes::ReplicaSetNotFound,
+ "Cannot participate in election because not initialized");
+ return;
+ }
+
+ const long long myver = _rsConfig.getConfigVersion();
+ const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied);
+
+ const MemberConfig* primary = _currentPrimaryMember();
+ const MemberConfig* hopeful = _rsConfig.findMemberByID(args.whoid);
+ const MemberConfig* highestPriority =
+ highestPriorityIndex == -1 ? NULL : &_rsConfig.getMemberAt(highestPriorityIndex);
+
+ int vote = 0;
+ if (args.set != _rsConfig.getReplSetName()) {
+ log() << "replSet error received an elect request for '" << args.set
+ << "' but our set name is '" << _rsConfig.getReplSetName() << "'";
+ } else if (myver < args.cfgver) {
+ // we are stale. don't vote
+ log() << "replSetElect not voting because our config version is stale. Our version: "
+ << myver << ", their version: " << args.cfgver;
+ } else if (myver > args.cfgver) {
+ // they are stale!
+ log() << "replSetElect command received stale config version # during election. "
+ "Our version: " << myver << ", their version: " << args.cfgver;
+ vote = -10000;
+ } else if (!hopeful) {
+ log() << "replSetElect couldn't find member with id " << args.whoid;
+ vote = -10000;
+ } else if (_iAmPrimary()) {
+ log() << "I am already primary, " << hopeful->getHostAndPort().toString()
+ << " can try again once I've stepped down";
+ vote = -10000;
+ } else if (primary) {
+ log() << hopeful->getHostAndPort().toString() << " is trying to elect itself but "
+ << primary->getHostAndPort().toString() << " is already primary";
+ vote = -10000;
+ } else if (highestPriority && highestPriority->getPriority() > hopeful->getPriority()) {
+ // TODO(spencer): What if the lower-priority member is more up-to-date?
+ log() << hopeful->getHostAndPort().toString() << " has lower priority than "
+ << highestPriority->getHostAndPort().toString();
+ vote = -10000;
+ } else if (_lastVote.when.millis + LastVote::leaseTime.total_milliseconds() >= now.millis &&
+ _lastVote.whoId != args.whoid) {
+ log() << "replSet voting no for " << hopeful->getHostAndPort().toString() << "; voted for "
+ << _lastVote.whoHostAndPort.toString() << ' '
+ << (now.millis - _lastVote.when.millis) / 1000 << " secs ago";
+ } else {
+ _lastVote.when = now;
+ _lastVote.whoId = args.whoid;
+ _lastVote.whoHostAndPort = hopeful->getHostAndPort();
+ vote = _selfConfig().getNumVotes();
+ invariant(hopeful->getId() == args.whoid);
+ if (vote > 0) {
+ log() << "replSetElect voting yea for " << hopeful->getHostAndPort().toString() << " ("
+ << args.whoid << ')';
+ }
+ }
+
+ response->append("vote", vote);
+ response->append("round", args.round);
+ *result = Status::OK();
+}
+
+// produce a reply to a heartbeat
+Status TopologyCoordinatorImpl::prepareHeartbeatResponse(Date_t now,
+ const ReplSetHeartbeatArgs& args,
+ const std::string& ourSetName,
+ const OpTime& lastOpApplied,
+ ReplSetHeartbeatResponse* response) {
+ if (args.getProtocolVersion() != 1) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "replset: incompatible replset protocol version: "
+ << args.getProtocolVersion());
+ }
+
+ // Verify that replica set names match
+ const std::string rshb = args.getSetName();
+ if (ourSetName != rshb) {
+ log() << "replSet set names do not match, ours: " << ourSetName
+ << "; remote node's: " << rshb;
+ response->noteMismatched();
+ return Status(ErrorCodes::InconsistentReplicaSetNames,
+ str::stream() << "Our set name of " << ourSetName << " does not match name "
+ << rshb << " reported by remote node");
+ }
+
+ const MemberState myState = getMemberState();
+ if (_selfIndex == -1) {
+ if (myState.removed()) {
+ return Status(ErrorCodes::InvalidReplicaSetConfig,
+ "Our replica set configuration is invalid or does not include us");
+ }
+ } else {
+ invariant(_rsConfig.getReplSetName() == args.getSetName());
+ if (args.getSenderId() == _selfConfig().getId()) {
return Status(ErrorCodes::BadValue,
- str::stream() << "replset: incompatible replset protocol version: "
- << args.getProtocolVersion());
- }
-
- // Verify that replica set names match
- const std::string rshb = args.getSetName();
- if (ourSetName != rshb) {
- log() << "replSet set names do not match, ours: " << ourSetName <<
- "; remote node's: " << rshb;
- response->noteMismatched();
- return Status(ErrorCodes::InconsistentReplicaSetNames, str::stream() <<
- "Our set name of " << ourSetName << " does not match name " << rshb <<
- " reported by remote node");
+ str::stream() << "Received heartbeat from member with the same "
+ "member ID as ourself: " << args.getSenderId());
}
+ }
- const MemberState myState = getMemberState();
- if (_selfIndex == -1) {
- if (myState.removed()) {
- return Status(ErrorCodes::InvalidReplicaSetConfig,
- "Our replica set configuration is invalid or does not include us");
- }
- }
- else {
- invariant(_rsConfig.getReplSetName() == args.getSetName());
- if (args.getSenderId() == _selfConfig().getId()) {
- return Status(ErrorCodes::BadValue,
- str::stream() << "Received heartbeat from member with the same "
- "member ID as ourself: " << args.getSenderId());
- }
- }
-
- // This is a replica set
- response->noteReplSet();
-
- // For 2.6 compatibility
- if (_rsConfig.isInitialized()) {
- response->setSetName(ourSetName);
- }
- response->setState(myState.s);
- if (myState.primary()) {
- response->setElectionTime(_electionTime);
- }
-
- // Are we electable
- response->setElectable(!_getMyUnelectableReason(now, lastOpApplied));
-
- // Heartbeat status message
- response->setHbMsg(_getHbmsg(now));
- response->setTime(Seconds(Milliseconds(now.asInt64()).total_seconds()));
- response->setOpTime(lastOpApplied.asDate());
-
- if (!_syncSource.empty()) {
- response->setSyncingTo(_syncSource.toString());
- }
+ // This is a replica set
+ response->noteReplSet();
- if (!_rsConfig.isInitialized()) {
- response->setVersion(-2);
- return Status::OK();
- }
+ // For 2.6 compatibility
+ if (_rsConfig.isInitialized()) {
+ response->setSetName(ourSetName);
+ }
+ response->setState(myState.s);
+ if (myState.primary()) {
+ response->setElectionTime(_electionTime);
+ }
- const long long v = _rsConfig.getConfigVersion();
- response->setVersion(v);
- // Deliver new config if caller's version is older than ours
- if (v > args.getConfigVersion()) {
- response->setConfig(_rsConfig);
- }
+ // Are we electable
+ response->setElectable(!_getMyUnelectableReason(now, lastOpApplied));
- // Resolve the caller's id in our Member list
- int from = -1;
- if (v == args.getConfigVersion() && args.getSenderId() != -1) {
- from = _getMemberIndex(args.getSenderId());
- }
- if (from == -1) {
- // Can't find the member, so we leave out the stateDisagreement field
- return Status::OK();
- }
- invariant(from != _selfIndex);
+ // Heartbeat status message
+ response->setHbMsg(_getHbmsg(now));
+ response->setTime(Seconds(Milliseconds(now.asInt64()).total_seconds()));
+ response->setOpTime(lastOpApplied.asDate());
- // if we thought that this node is down, let it know
- if (!_hbdata[from].up()) {
- response->noteStateDisagreement();
- }
+ if (!_syncSource.empty()) {
+ response->setSyncingTo(_syncSource.toString());
+ }
- // note that we got a heartbeat from this node
- _hbdata[from].setLastHeartbeatRecv(now);
+ if (!_rsConfig.isInitialized()) {
+ response->setVersion(-2);
return Status::OK();
}
-
- int TopologyCoordinatorImpl::_getMemberIndex(int id) const {
- int index = 0;
- for (ReplicaSetConfig::MemberIterator it = _rsConfig.membersBegin();
- it != _rsConfig.membersEnd();
- ++it, ++index) {
- if (it->getId() == id) {
- return index;
- }
- }
- return -1;
+ const long long v = _rsConfig.getConfigVersion();
+ response->setVersion(v);
+ // Deliver new config if caller's version is older than ours
+ if (v > args.getConfigVersion()) {
+ response->setConfig(_rsConfig);
}
- std::pair<ReplSetHeartbeatArgs, Milliseconds> TopologyCoordinatorImpl::prepareHeartbeatRequest(
- Date_t now,
- const std::string& ourSetName,
- const HostAndPort& target) {
-
- PingStats& hbStats = _pings[target];
- Milliseconds alreadyElapsed(now.asInt64() - hbStats.getLastHeartbeatStartDate().asInt64());
- if (!_rsConfig.isInitialized() ||
- (hbStats.getNumFailuresSinceLastStart() > kMaxHeartbeatRetries) ||
- (alreadyElapsed >= _rsConfig.getHeartbeatTimeoutPeriodMillis())) {
-
- // This is either the first request ever for "target", or the heartbeat timeout has
- // passed, so we're starting a "new" heartbeat.
- hbStats.start(now);
- alreadyElapsed = Milliseconds(0);
- }
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setProtocolVersion(1);
- hbArgs.setCheckEmpty(false);
- if (_rsConfig.isInitialized()) {
- hbArgs.setSetName(_rsConfig.getReplSetName());
- hbArgs.setConfigVersion(_rsConfig.getConfigVersion());
- if (_selfIndex >= 0) {
- const MemberConfig& me = _selfConfig();
- hbArgs.setSenderHost(me.getHostAndPort());
- hbArgs.setSenderId(me.getId());
- }
- }
- else {
- hbArgs.setSetName(ourSetName);
- hbArgs.setConfigVersion(-2);
- }
-
- const Milliseconds timeoutPeriod(
- _rsConfig.isInitialized() ?
- _rsConfig.getHeartbeatTimeoutPeriodMillis() :
- Milliseconds(
- ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod.total_milliseconds()));
- const Milliseconds timeout(
- timeoutPeriod.total_milliseconds() - alreadyElapsed.total_milliseconds());
- return std::make_pair(hbArgs, timeout);
- }
-
- HeartbeatResponseAction TopologyCoordinatorImpl::processHeartbeatResponse(
- Date_t now,
- Milliseconds networkRoundTripTime,
- const HostAndPort& target,
- const StatusWith<ReplSetHeartbeatResponse>& hbResponse,
- OpTime myLastOpApplied) {
-
- const MemberState originalState = getMemberState();
- PingStats& hbStats = _pings[target];
- invariant(hbStats.getLastHeartbeatStartDate() != Date_t(0));
- if (!hbResponse.isOK()) {
- hbStats.miss();
- }
- else {
- hbStats.hit(networkRoundTripTime.total_milliseconds());
- // Log diagnostics.
- if (hbResponse.getValue().isStateDisagreement()) {
- LOG(1) << target <<
- " thinks that we are down because they cannot send us heartbeats.";
- }
- }
-
- const bool isUnauthorized =
- (hbResponse.getStatus().code() == ErrorCodes::Unauthorized) ||
- (hbResponse.getStatus().code() == ErrorCodes::AuthenticationFailed);
-
- Milliseconds alreadyElapsed(now.asInt64() - hbStats.getLastHeartbeatStartDate().asInt64());
- Date_t nextHeartbeatStartDate;
- // determine next start time
- if (_rsConfig.isInitialized() &&
- (hbStats.getNumFailuresSinceLastStart() <= kMaxHeartbeatRetries) &&
- (alreadyElapsed < _rsConfig.getHeartbeatTimeoutPeriodMillis())) {
-
- if (isUnauthorized) {
- nextHeartbeatStartDate = now + kHeartbeatInterval.total_milliseconds();
- } else {
- nextHeartbeatStartDate = now;
- }
- }
- else {
+ // Resolve the caller's id in our Member list
+ int from = -1;
+ if (v == args.getConfigVersion() && args.getSenderId() != -1) {
+ from = _getMemberIndex(args.getSenderId());
+ }
+ if (from == -1) {
+ // Can't find the member, so we leave out the stateDisagreement field
+ return Status::OK();
+ }
+ invariant(from != _selfIndex);
+
+ // if we thought that this node is down, let it know
+ if (!_hbdata[from].up()) {
+ response->noteStateDisagreement();
+ }
+
+ // note that we got a heartbeat from this node
+ _hbdata[from].setLastHeartbeatRecv(now);
+ return Status::OK();
+}
+
+
+int TopologyCoordinatorImpl::_getMemberIndex(int id) const {
+ int index = 0;
+ for (ReplicaSetConfig::MemberIterator it = _rsConfig.membersBegin();
+ it != _rsConfig.membersEnd();
+ ++it, ++index) {
+ if (it->getId() == id) {
+ return index;
+ }
+ }
+ return -1;
+}
+
+std::pair<ReplSetHeartbeatArgs, Milliseconds> TopologyCoordinatorImpl::prepareHeartbeatRequest(
+ Date_t now, const std::string& ourSetName, const HostAndPort& target) {
+ PingStats& hbStats = _pings[target];
+ Milliseconds alreadyElapsed(now.asInt64() - hbStats.getLastHeartbeatStartDate().asInt64());
+ if (!_rsConfig.isInitialized() ||
+ (hbStats.getNumFailuresSinceLastStart() > kMaxHeartbeatRetries) ||
+ (alreadyElapsed >= _rsConfig.getHeartbeatTimeoutPeriodMillis())) {
+ // This is either the first request ever for "target", or the heartbeat timeout has
+ // passed, so we're starting a "new" heartbeat.
+ hbStats.start(now);
+ alreadyElapsed = Milliseconds(0);
+ }
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setCheckEmpty(false);
+ if (_rsConfig.isInitialized()) {
+ hbArgs.setSetName(_rsConfig.getReplSetName());
+ hbArgs.setConfigVersion(_rsConfig.getConfigVersion());
+ if (_selfIndex >= 0) {
+ const MemberConfig& me = _selfConfig();
+ hbArgs.setSenderHost(me.getHostAndPort());
+ hbArgs.setSenderId(me.getId());
+ }
+ } else {
+ hbArgs.setSetName(ourSetName);
+ hbArgs.setConfigVersion(-2);
+ }
+
+ const Milliseconds timeoutPeriod(
+ _rsConfig.isInitialized()
+ ? _rsConfig.getHeartbeatTimeoutPeriodMillis()
+ : Milliseconds(ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod.total_milliseconds()));
+ const Milliseconds timeout(timeoutPeriod.total_milliseconds() -
+ alreadyElapsed.total_milliseconds());
+ return std::make_pair(hbArgs, timeout);
+}
+
+HeartbeatResponseAction TopologyCoordinatorImpl::processHeartbeatResponse(
+ Date_t now,
+ Milliseconds networkRoundTripTime,
+ const HostAndPort& target,
+ const StatusWith<ReplSetHeartbeatResponse>& hbResponse,
+ OpTime myLastOpApplied) {
+ const MemberState originalState = getMemberState();
+ PingStats& hbStats = _pings[target];
+ invariant(hbStats.getLastHeartbeatStartDate() != Date_t(0));
+ if (!hbResponse.isOK()) {
+ hbStats.miss();
+ } else {
+ hbStats.hit(networkRoundTripTime.total_milliseconds());
+ // Log diagnostics.
+ if (hbResponse.getValue().isStateDisagreement()) {
+ LOG(1) << target << " thinks that we are down because they cannot send us heartbeats.";
+ }
+ }
+
+ const bool isUnauthorized = (hbResponse.getStatus().code() == ErrorCodes::Unauthorized) ||
+ (hbResponse.getStatus().code() == ErrorCodes::AuthenticationFailed);
+
+ Milliseconds alreadyElapsed(now.asInt64() - hbStats.getLastHeartbeatStartDate().asInt64());
+ Date_t nextHeartbeatStartDate;
+ // determine next start time
+ if (_rsConfig.isInitialized() &&
+ (hbStats.getNumFailuresSinceLastStart() <= kMaxHeartbeatRetries) &&
+ (alreadyElapsed < _rsConfig.getHeartbeatTimeoutPeriodMillis())) {
+ if (isUnauthorized) {
nextHeartbeatStartDate = now + kHeartbeatInterval.total_milliseconds();
+ } else {
+ nextHeartbeatStartDate = now;
}
+ } else {
+ nextHeartbeatStartDate = now + kHeartbeatInterval.total_milliseconds();
+ }
- if (hbResponse.isOK() && hbResponse.getValue().hasConfig()) {
- const long long currentConfigVersion =
- _rsConfig.isInitialized() ? _rsConfig.getConfigVersion() : -2;
- const ReplicaSetConfig& newConfig = hbResponse.getValue().getConfig();
- if (newConfig.getConfigVersion() > currentConfigVersion) {
- HeartbeatResponseAction nextAction = HeartbeatResponseAction::makeReconfigAction();
- nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate);
- return nextAction;
- }
- else {
- // Could be we got the newer version before we got the response, or the
- // target erroneously sent us one, even through it isn't newer.
- if (newConfig.getConfigVersion() < currentConfigVersion) {
- LOG(1) << "Config version from heartbeat was older than ours.";
- }
- else {
- LOG(2) << "Config from heartbeat response was same as ours.";
- }
- if (logger::globalLogDomain()->shouldLog(
- MongoLogDefaultComponent_component,
- ::mongo::LogstreamBuilder::severityCast(2))) {
- LogstreamBuilder lsb = log();
- if (_rsConfig.isInitialized()) {
- lsb << "Current config: " << _rsConfig.toBSON() << "; ";
- }
- lsb << "Config in heartbeat: " << newConfig.toBSON();
- }
- }
- }
-
- // Check if the heartbeat target is in our config. If it isn't, there's nothing left to do,
- // so return early.
- if (!_rsConfig.isInitialized()) {
- HeartbeatResponseAction nextAction = HeartbeatResponseAction::makeNoAction();
- nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate);
- return nextAction;
- }
- const int memberIndex = _rsConfig.findMemberIndexByHostAndPort(target);
- if (memberIndex == -1) {
- LOG(1) << "replset: Could not find " << target << " in current config so ignoring --"
- " current config: " << _rsConfig.toBSON();
- HeartbeatResponseAction nextAction = HeartbeatResponseAction::makeNoAction();
+ if (hbResponse.isOK() && hbResponse.getValue().hasConfig()) {
+ const long long currentConfigVersion =
+ _rsConfig.isInitialized() ? _rsConfig.getConfigVersion() : -2;
+ const ReplicaSetConfig& newConfig = hbResponse.getValue().getConfig();
+ if (newConfig.getConfigVersion() > currentConfigVersion) {
+ HeartbeatResponseAction nextAction = HeartbeatResponseAction::makeReconfigAction();
nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate);
return nextAction;
- }
- invariant(memberIndex != _selfIndex);
-
- MemberHeartbeatData& hbData = _hbdata[memberIndex];
- const MemberConfig member = _rsConfig.getMemberAt(memberIndex);
- if (!hbResponse.isOK()) {
- if (isUnauthorized) {
- LOG(1) << "setAuthIssue: heartbeat response failed due to authentication"
- " issue for member _id:" << member.getId();
- hbData.setAuthIssue(now);
- }
- else if (hbStats.getNumFailuresSinceLastStart() > kMaxHeartbeatRetries ||
- alreadyElapsed >= _rsConfig.getHeartbeatTimeoutPeriodMillis()) {
-
- LOG(1) << "setDownValues: heartbeat response failed for member _id:"
- << member.getId() << ", msg: "
- << hbResponse.getStatus().reason();
-
- hbData.setDownValues(now, hbResponse.getStatus().reason());
+ } else {
+ // Could be we got the newer version before we got the response, or the
+ // target erroneously sent us one, even through it isn't newer.
+ if (newConfig.getConfigVersion() < currentConfigVersion) {
+ LOG(1) << "Config version from heartbeat was older than ours.";
+ } else {
+ LOG(2) << "Config from heartbeat response was same as ours.";
}
- else {
- LOG(3) << "Bad heartbeat response from " << target <<
- "; trying again; Retries left: " <<
- (kMaxHeartbeatRetries - hbStats.getNumFailuresSinceLastStart()) <<
- "; " << alreadyElapsed.total_milliseconds() << "ms have already elapsed";
+ if (logger::globalLogDomain()->shouldLog(MongoLogDefaultComponent_component,
+ ::mongo::LogstreamBuilder::severityCast(2))) {
+ LogstreamBuilder lsb = log();
+ if (_rsConfig.isInitialized()) {
+ lsb << "Current config: " << _rsConfig.toBSON() << "; ";
+ }
+ lsb << "Config in heartbeat: " << newConfig.toBSON();
}
}
- else {
- ReplSetHeartbeatResponse hbr = hbResponse.getValue();
- LOG(3) << "setUpValues: heartbeat response good for member _id:"
- << member.getId() << ", msg: "
- << hbr.getHbMsg();
- hbData.setUpValues(now, member.getHostAndPort(), hbr);
- }
- HeartbeatResponseAction nextAction = _updateHeartbeatDataImpl(
- memberIndex,
- originalState,
- now,
- myLastOpApplied);
+ }
+ // Check if the heartbeat target is in our config. If it isn't, there's nothing left to do,
+ // so return early.
+ if (!_rsConfig.isInitialized()) {
+ HeartbeatResponseAction nextAction = HeartbeatResponseAction::makeNoAction();
nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate);
return nextAction;
}
+ const int memberIndex = _rsConfig.findMemberIndexByHostAndPort(target);
+ if (memberIndex == -1) {
+ LOG(1) << "replset: Could not find " << target << " in current config so ignoring --"
+ " current config: " << _rsConfig.toBSON();
+ HeartbeatResponseAction nextAction = HeartbeatResponseAction::makeNoAction();
+ nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate);
+ return nextAction;
+ }
+ invariant(memberIndex != _selfIndex);
+
+ MemberHeartbeatData& hbData = _hbdata[memberIndex];
+ const MemberConfig member = _rsConfig.getMemberAt(memberIndex);
+ if (!hbResponse.isOK()) {
+ if (isUnauthorized) {
+ LOG(1) << "setAuthIssue: heartbeat response failed due to authentication"
+ " issue for member _id:" << member.getId();
+ hbData.setAuthIssue(now);
+ } else if (hbStats.getNumFailuresSinceLastStart() > kMaxHeartbeatRetries ||
+ alreadyElapsed >= _rsConfig.getHeartbeatTimeoutPeriodMillis()) {
+ LOG(1) << "setDownValues: heartbeat response failed for member _id:" << member.getId()
+ << ", msg: " << hbResponse.getStatus().reason();
+
+ hbData.setDownValues(now, hbResponse.getStatus().reason());
+ } else {
+ LOG(3) << "Bad heartbeat response from " << target << "; trying again; Retries left: "
+ << (kMaxHeartbeatRetries - hbStats.getNumFailuresSinceLastStart()) << "; "
+ << alreadyElapsed.total_milliseconds() << "ms have already elapsed";
+ }
+ } else {
+ ReplSetHeartbeatResponse hbr = hbResponse.getValue();
+ LOG(3) << "setUpValues: heartbeat response good for member _id:" << member.getId()
+ << ", msg: " << hbr.getHbMsg();
+ hbData.setUpValues(now, member.getHostAndPort(), hbr);
+ }
+ HeartbeatResponseAction nextAction =
+ _updateHeartbeatDataImpl(memberIndex, originalState, now, myLastOpApplied);
+
+ nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate);
+ return nextAction;
+}
+
+HeartbeatResponseAction TopologyCoordinatorImpl::_updateHeartbeatDataImpl(
+ int updatedConfigIndex,
+ const MemberState& originalState,
+ Date_t now,
+ const OpTime& lastOpApplied) {
+ // This method has two interrelated responsibilities, performed in two phases.
+ //
+ // First, it updates the local notion of which remote node, if any is primary. In the
+ // process, it may request a remote primary to step down because there is a higher priority
+ // node waiting, or because the local node thinks it is primary and that it has a more
+ // recent electionTime. It may instead decide that the local node should step down itself,
+ // because a remote has a more recent election time.
+ //
+ // Second, if there is no remote primary, and the local node is not primary, it considers
+ // whether or not to stand for election.
+ invariant(updatedConfigIndex != _selfIndex);
+
+ // We are missing from the config, so do not participate in primary maintenance or election.
+ if (_selfIndex == -1) {
+ return HeartbeatResponseAction::makeNoAction();
+ }
+
+ ////////////////////
+ // Phase 1
+ ////////////////////
+
+ // If we believe the node whose data was just updated is primary, confirm that
+ // the updated data supports that notion. If not, erase our notion of who is primary.
+ if (updatedConfigIndex == _currentPrimaryIndex) {
+ const MemberHeartbeatData& updatedHBData = _hbdata[updatedConfigIndex];
+ if (!updatedHBData.up() || !updatedHBData.getState().primary()) {
+ _currentPrimaryIndex = -1;
+ }
+ }
+
+ // If the current primary is not highest priority and up to date (within 10s),
+ // have them/me stepdown.
+ if (_currentPrimaryIndex != -1) {
+ // check if we should ask the primary (possibly ourselves) to step down
+ const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied);
+ if (highestPriorityIndex != -1) {
+ const MemberConfig& currentPrimaryMember = _rsConfig.getMemberAt(_currentPrimaryIndex);
+ const MemberConfig& highestPriorityMember = _rsConfig.getMemberAt(highestPriorityIndex);
+ const OpTime highestPriorityMemberOptime = highestPriorityIndex == _selfIndex
+ ? lastOpApplied
+ : _hbdata[highestPriorityIndex].getOpTime();
- HeartbeatResponseAction TopologyCoordinatorImpl::_updateHeartbeatDataImpl(
- int updatedConfigIndex,
- const MemberState& originalState,
- Date_t now,
- const OpTime& lastOpApplied) {
-
- // This method has two interrelated responsibilities, performed in two phases.
- //
- // First, it updates the local notion of which remote node, if any is primary. In the
- // process, it may request a remote primary to step down because there is a higher priority
- // node waiting, or because the local node thinks it is primary and that it has a more
- // recent electionTime. It may instead decide that the local node should step down itself,
- // because a remote has a more recent election time.
- //
- // Second, if there is no remote primary, and the local node is not primary, it considers
- // whether or not to stand for election.
- invariant(updatedConfigIndex != _selfIndex);
-
- // We are missing from the config, so do not participate in primary maintenance or election.
- if (_selfIndex == -1) {
- return HeartbeatResponseAction::makeNoAction();
- }
-
- ////////////////////
- // Phase 1
- ////////////////////
-
- // If we believe the node whose data was just updated is primary, confirm that
- // the updated data supports that notion. If not, erase our notion of who is primary.
- if (updatedConfigIndex == _currentPrimaryIndex) {
- const MemberHeartbeatData& updatedHBData = _hbdata[updatedConfigIndex];
- if (!updatedHBData.up() || !updatedHBData.getState().primary()) {
- _currentPrimaryIndex = -1;
- }
- }
+ if ((highestPriorityMember.getPriority() > currentPrimaryMember.getPriority()) &&
+ _isOpTimeCloseEnoughToLatestToElect(highestPriorityMemberOptime, lastOpApplied)) {
+ const OpTime latestOpTime = _latestKnownOpTime(lastOpApplied);
- // If the current primary is not highest priority and up to date (within 10s),
- // have them/me stepdown.
- if (_currentPrimaryIndex != -1) {
- // check if we should ask the primary (possibly ourselves) to step down
- const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied);
- if (highestPriorityIndex != -1) {
- const MemberConfig& currentPrimaryMember =
- _rsConfig.getMemberAt(_currentPrimaryIndex);
- const MemberConfig& highestPriorityMember =
- _rsConfig.getMemberAt(highestPriorityIndex);
- const OpTime highestPriorityMemberOptime = highestPriorityIndex == _selfIndex ?
- lastOpApplied : _hbdata[highestPriorityIndex].getOpTime();
-
- if ((highestPriorityMember.getPriority() > currentPrimaryMember.getPriority()) &&
- _isOpTimeCloseEnoughToLatestToElect(highestPriorityMemberOptime,
- lastOpApplied)) {
- const OpTime latestOpTime = _latestKnownOpTime(lastOpApplied);
-
- if (_iAmPrimary()) {
- if (_stepDownPending) {
- return HeartbeatResponseAction::makeNoAction();
- }
- _stepDownPending = true;
- log() << "Stepping down self (priority "
- << currentPrimaryMember.getPriority() << ") because "
- << highestPriorityMember.getHostAndPort() << " has higher priority "
- << highestPriorityMember.getPriority() << " and is only "
- << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs())
- << " seconds behind me";
- const Date_t until = now +
- LastVote::leaseTime.total_milliseconds() +
- kHeartbeatInterval.total_milliseconds();
- if (_electionSleepUntil < until) {
- _electionSleepUntil = until;
- }
- return HeartbeatResponseAction::makeStepDownSelfAction(_selfIndex);
+ if (_iAmPrimary()) {
+ if (_stepDownPending) {
+ return HeartbeatResponseAction::makeNoAction();
}
- else if ((highestPriorityMemberOptime == _selfIndex) &&
- (_electionSleepUntil <= now)) {
- // If this node is the highest priority node, and it is not in
- // an inter-election sleep period, ask the current primary to step down.
- // This is an optimization, because the remote primary will almost certainly
- // notice this node's electability promptly, via its own heartbeat process.
- log() << "Requesting that " << currentPrimaryMember.getHostAndPort()
- << " (priority " << currentPrimaryMember.getPriority()
- << ") step down because I have higher priority "
- << highestPriorityMember.getPriority() << " and am only "
- << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs())
- << " seconds behind it";
- int primaryIndex = _currentPrimaryIndex;
- _currentPrimaryIndex = -1;
- return HeartbeatResponseAction::makeStepDownRemoteAction(primaryIndex);
+ _stepDownPending = true;
+ log() << "Stepping down self (priority " << currentPrimaryMember.getPriority()
+ << ") because " << highestPriorityMember.getHostAndPort()
+ << " has higher priority " << highestPriorityMember.getPriority()
+ << " and is only "
+ << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs())
+ << " seconds behind me";
+ const Date_t until = now + LastVote::leaseTime.total_milliseconds() +
+ kHeartbeatInterval.total_milliseconds();
+ if (_electionSleepUntil < until) {
+ _electionSleepUntil = until;
}
+ return HeartbeatResponseAction::makeStepDownSelfAction(_selfIndex);
+ } else if ((highestPriorityMemberOptime == _selfIndex) &&
+ (_electionSleepUntil <= now)) {
+ // If this node is the highest priority node, and it is not in
+ // an inter-election sleep period, ask the current primary to step down.
+ // This is an optimization, because the remote primary will almost certainly
+ // notice this node's electability promptly, via its own heartbeat process.
+ log() << "Requesting that " << currentPrimaryMember.getHostAndPort()
+ << " (priority " << currentPrimaryMember.getPriority()
+ << ") step down because I have higher priority "
+ << highestPriorityMember.getPriority() << " and am only "
+ << (latestOpTime.getSecs() - highestPriorityMemberOptime.getSecs())
+ << " seconds behind it";
+ int primaryIndex = _currentPrimaryIndex;
+ _currentPrimaryIndex = -1;
+ return HeartbeatResponseAction::makeStepDownRemoteAction(primaryIndex);
}
}
}
+ }
- // Scan the member list's heartbeat data for who is primary, and update
- // _currentPrimaryIndex and _role, or request a remote to step down, as necessary.
- {
- int remotePrimaryIndex = -1;
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- if (itIndex == _selfIndex) {
- continue;
- }
-
- if( it->getState().primary() && it->up() ) {
- if (remotePrimaryIndex != -1) {
- // two other nodes think they are primary (asynchronously polled)
- // -- wait for things to settle down.
- log() << "replSet info two remote primaries (transiently)";
- return HeartbeatResponseAction::makeNoAction();
- }
- remotePrimaryIndex = itIndex;
- }
+ // Scan the member list's heartbeat data for who is primary, and update
+ // _currentPrimaryIndex and _role, or request a remote to step down, as necessary.
+ {
+ int remotePrimaryIndex = -1;
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
+ it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ if (itIndex == _selfIndex) {
+ continue;
}
- if (remotePrimaryIndex != -1) {
- // If it's the same as last time, don't do anything further.
- if (_currentPrimaryIndex == remotePrimaryIndex) {
+ if (it->getState().primary() && it->up()) {
+ if (remotePrimaryIndex != -1) {
+ // two other nodes think they are primary (asynchronously polled)
+ // -- wait for things to settle down.
+ log() << "replSet info two remote primaries (transiently)";
return HeartbeatResponseAction::makeNoAction();
}
- // Clear last heartbeat message on ourselves (why?)
- setMyHeartbeatMessage(now, "");
-
- // If we are also primary, this is a problem. Determine who should step down.
- if (_iAmPrimary()) {
- OpTime remoteElectionTime = _hbdata[remotePrimaryIndex].getElectionTime();
- log() << "replset: another primary seen with election time "
- << remoteElectionTime << " my election time is " << _electionTime;
-
- // Step down whomever has the older election time.
- if (remoteElectionTime > _electionTime) {
- if (_stepDownPending) {
- return HeartbeatResponseAction::makeNoAction();
- }
- _stepDownPending = true;
- log() << "stepping down; another primary was elected more recently";
- return HeartbeatResponseAction::makeStepDownSelfAction(_selfIndex);
- }
- else {
- log() << "another PRIMARY detected and it should step down"
- " since it was elected earlier than me";
- return HeartbeatResponseAction::makeStepDownRemoteAction(
- remotePrimaryIndex);
- }
- }
-
- _currentPrimaryIndex = remotePrimaryIndex;
- return HeartbeatResponseAction::makeNoAction();
+ remotePrimaryIndex = itIndex;
}
}
- ////////////////////
- // Phase 2
- ////////////////////
+ if (remotePrimaryIndex != -1) {
+ // If it's the same as last time, don't do anything further.
+ if (_currentPrimaryIndex == remotePrimaryIndex) {
+ return HeartbeatResponseAction::makeNoAction();
+ }
+ // Clear last heartbeat message on ourselves (why?)
+ setMyHeartbeatMessage(now, "");
- // We do not believe any remote to be primary.
+ // If we are also primary, this is a problem. Determine who should step down.
+ if (_iAmPrimary()) {
+ OpTime remoteElectionTime = _hbdata[remotePrimaryIndex].getElectionTime();
+ log() << "replset: another primary seen with election time " << remoteElectionTime
+ << " my election time is " << _electionTime;
- // If we are primary, check if we can still see majority of the set;
- // stepdown if we can't.
- if (_iAmPrimary()) {
- if (CannotSeeMajority & _getMyUnelectableReason(now, lastOpApplied)) {
- if (_stepDownPending) {
- return HeartbeatResponseAction::makeNoAction();
+ // Step down whomever has the older election time.
+ if (remoteElectionTime > _electionTime) {
+ if (_stepDownPending) {
+ return HeartbeatResponseAction::makeNoAction();
+ }
+ _stepDownPending = true;
+ log() << "stepping down; another primary was elected more recently";
+ return HeartbeatResponseAction::makeStepDownSelfAction(_selfIndex);
+ } else {
+ log() << "another PRIMARY detected and it should step down"
+ " since it was elected earlier than me";
+ return HeartbeatResponseAction::makeStepDownRemoteAction(remotePrimaryIndex);
}
- _stepDownPending = true;
- log() << "can't see a majority of the set, relinquishing primary";
- return HeartbeatResponseAction::makeStepDownSelfAction(_selfIndex);
}
- LOG(2) << "Choosing to remain primary";
+ _currentPrimaryIndex = remotePrimaryIndex;
return HeartbeatResponseAction::makeNoAction();
}
+ }
- fassert(18505, _currentPrimaryIndex == -1);
-
- const MemberState currentState = getMemberState();
- if (originalState.recovering() && currentState.secondary()) {
- // We just transitioned from RECOVERING to SECONDARY, this can only happen if we
- // received a heartbeat with an auth error when previously all the heartbeats we'd
- // received had auth errors. In this case, don't return makeElectAction() because
- // that could cause the election to start before the ReplicationCoordinator has updated
- // its notion of the member state to SECONDARY. Instead return noAction so that the
- // ReplicationCooridinator knows to update its tracking of the member state off of the
- // TopologyCoordinator, and leave starting the election until the next heartbeat comes
- // back.
- return HeartbeatResponseAction::makeNoAction();
- }
+ ////////////////////
+ // Phase 2
+ ////////////////////
- // At this point, there is no primary anywhere. Check to see if we should become a
- // candidate.
- if (!checkShouldStandForElection(now, lastOpApplied)) {
- return HeartbeatResponseAction::makeNoAction();
+ // We do not believe any remote to be primary.
+
+ // If we are primary, check if we can still see majority of the set;
+ // stepdown if we can't.
+ if (_iAmPrimary()) {
+ if (CannotSeeMajority & _getMyUnelectableReason(now, lastOpApplied)) {
+ if (_stepDownPending) {
+ return HeartbeatResponseAction::makeNoAction();
+ }
+ _stepDownPending = true;
+ log() << "can't see a majority of the set, relinquishing primary";
+ return HeartbeatResponseAction::makeStepDownSelfAction(_selfIndex);
}
- return HeartbeatResponseAction::makeElectAction();
+
+ LOG(2) << "Choosing to remain primary";
+ return HeartbeatResponseAction::makeNoAction();
}
- bool TopologyCoordinatorImpl::checkShouldStandForElection(
- Date_t now, const OpTime& lastOpApplied) {
- if (_currentPrimaryIndex != -1) {
- return false;
- }
- invariant (_role != Role::leader);
+ fassert(18505, _currentPrimaryIndex == -1);
- if (_role == Role::candidate) {
- LOG(2) << "Not standing for election again; already candidate";
- return false;
- }
+ const MemberState currentState = getMemberState();
+ if (originalState.recovering() && currentState.secondary()) {
+ // We just transitioned from RECOVERING to SECONDARY, this can only happen if we
+ // received a heartbeat with an auth error when previously all the heartbeats we'd
+ // received had auth errors. In this case, don't return makeElectAction() because
+ // that could cause the election to start before the ReplicationCoordinator has updated
+ // its notion of the member state to SECONDARY. Instead return noAction so that the
+ // ReplicationCooridinator knows to update its tracking of the member state off of the
+ // TopologyCoordinator, and leave starting the election until the next heartbeat comes
+ // back.
+ return HeartbeatResponseAction::makeNoAction();
+ }
- const UnelectableReasonMask unelectableReason = _getMyUnelectableReason(now, lastOpApplied);
- if (NotCloseEnoughToLatestOptime & unelectableReason) {
- LOG(2) << "Not standing for election because " <<
- _getUnelectableReasonString(unelectableReason) << "; my last optime is " <<
- lastOpApplied << " and the newest is " << _latestKnownOpTime(lastOpApplied);
- return false;
- }
- if (unelectableReason) {
- LOG(2) << "Not standing for election because " <<
- _getUnelectableReasonString(unelectableReason);
- return false;
- }
- if (_electionSleepUntil > now) {
- LOG(2) << "Not standing for election before " <<
- dateToISOStringLocal(_electionSleepUntil) << " because I stood too recently";
- return false;
- }
- // All checks passed, become a candidate and start election proceedings.
- _role = Role::candidate;
- return true;
+ // At this point, there is no primary anywhere. Check to see if we should become a
+ // candidate.
+ if (!checkShouldStandForElection(now, lastOpApplied)) {
+ return HeartbeatResponseAction::makeNoAction();
}
+ return HeartbeatResponseAction::makeElectAction();
+}
- bool TopologyCoordinatorImpl::_aMajoritySeemsToBeUp() const {
- int vUp = 0;
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- if (itIndex == _selfIndex || it->up()) {
- vUp += _rsConfig.getMemberAt(itIndex).getNumVotes();
- }
- }
+bool TopologyCoordinatorImpl::checkShouldStandForElection(Date_t now, const OpTime& lastOpApplied) {
+ if (_currentPrimaryIndex != -1) {
+ return false;
+ }
+ invariant(_role != Role::leader);
- return vUp * 2 > _rsConfig.getTotalVotingMembers();
+ if (_role == Role::candidate) {
+ LOG(2) << "Not standing for election again; already candidate";
+ return false;
}
- bool TopologyCoordinatorImpl::_isOpTimeCloseEnoughToLatestToElect(
- const OpTime& otherOpTime, const OpTime& ourLastOpApplied) const {
- const OpTime latestKnownOpTime = _latestKnownOpTime(ourLastOpApplied);
- // Use addition instead of subtraction to avoid overflow.
- return otherOpTime.getSecs() + 10 >= (latestKnownOpTime.getSecs());
+ const UnelectableReasonMask unelectableReason = _getMyUnelectableReason(now, lastOpApplied);
+ if (NotCloseEnoughToLatestOptime & unelectableReason) {
+ LOG(2) << "Not standing for election because "
+ << _getUnelectableReasonString(unelectableReason) << "; my last optime is "
+ << lastOpApplied << " and the newest is " << _latestKnownOpTime(lastOpApplied);
+ return false;
+ }
+ if (unelectableReason) {
+ LOG(2) << "Not standing for election because "
+ << _getUnelectableReasonString(unelectableReason);
+ return false;
+ }
+ if (_electionSleepUntil > now) {
+ LOG(2) << "Not standing for election before " << dateToISOStringLocal(_electionSleepUntil)
+ << " because I stood too recently";
+ return false;
}
+ // All checks passed, become a candidate and start election proceedings.
+ _role = Role::candidate;
+ return true;
+}
- bool TopologyCoordinatorImpl::_iAmPrimary() const {
- if (_role == Role::leader) {
- invariant(_currentPrimaryIndex == _selfIndex);
- return true;
+bool TopologyCoordinatorImpl::_aMajoritySeemsToBeUp() const {
+ int vUp = 0;
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ if (itIndex == _selfIndex || it->up()) {
+ vUp += _rsConfig.getMemberAt(itIndex).getNumVotes();
}
- return false;
}
- OpTime TopologyCoordinatorImpl::_latestKnownOpTime(OpTime ourLastOpApplied) const {
- OpTime latest = ourLastOpApplied;
+ return vUp * 2 > _rsConfig.getTotalVotingMembers();
+}
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
+bool TopologyCoordinatorImpl::_isOpTimeCloseEnoughToLatestToElect(
+ const OpTime& otherOpTime, const OpTime& ourLastOpApplied) const {
+ const OpTime latestKnownOpTime = _latestKnownOpTime(ourLastOpApplied);
+ // Use addition instead of subtraction to avoid overflow.
+ return otherOpTime.getSecs() + 10 >= (latestKnownOpTime.getSecs());
+}
- if (indexOfIterator(_hbdata, it) == _selfIndex) {
- continue;
- }
- if (!it->up()) {
- continue;
- }
+bool TopologyCoordinatorImpl::_iAmPrimary() const {
+ if (_role == Role::leader) {
+ invariant(_currentPrimaryIndex == _selfIndex);
+ return true;
+ }
+ return false;
+}
- OpTime optime = it->getOpTime();
+OpTime TopologyCoordinatorImpl::_latestKnownOpTime(OpTime ourLastOpApplied) const {
+ OpTime latest = ourLastOpApplied;
- if (optime > latest) {
- latest = optime;
- }
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end();
+ ++it) {
+ if (indexOfIterator(_hbdata, it) == _selfIndex) {
+ continue;
}
+ if (!it->up()) {
+ continue;
+ }
+
+ OpTime optime = it->getOpTime();
- return latest;
+ if (optime > latest) {
+ latest = optime;
+ }
}
- bool TopologyCoordinatorImpl::_isMemberHigherPriority(int memberOneIndex,
- int memberTwoIndex) const {
- if (memberOneIndex == -1)
- return false;
+ return latest;
+}
- if (memberTwoIndex == -1)
- return true;
+bool TopologyCoordinatorImpl::_isMemberHigherPriority(int memberOneIndex,
+ int memberTwoIndex) const {
+ if (memberOneIndex == -1)
+ return false;
- return _rsConfig.getMemberAt(memberOneIndex).getPriority() >
- _rsConfig.getMemberAt(memberTwoIndex).getPriority();
- }
+ if (memberTwoIndex == -1)
+ return true;
- int TopologyCoordinatorImpl::_getHighestPriorityElectableIndex(
- Date_t now, OpTime lastOpApplied) const {
- int maxIndex = -1;
- for (int currentIndex = 0; currentIndex < _rsConfig.getNumMembers(); currentIndex++) {
- UnelectableReasonMask reason = currentIndex == _selfIndex ?
- _getMyUnelectableReason(now, lastOpApplied) :
- _getUnelectableReason(currentIndex, lastOpApplied);
- if (None == reason && _isMemberHigherPriority(currentIndex, maxIndex)) {
- maxIndex = currentIndex;
- }
- }
+ return _rsConfig.getMemberAt(memberOneIndex).getPriority() >
+ _rsConfig.getMemberAt(memberTwoIndex).getPriority();
+}
- return maxIndex;
+int TopologyCoordinatorImpl::_getHighestPriorityElectableIndex(Date_t now,
+ OpTime lastOpApplied) const {
+ int maxIndex = -1;
+ for (int currentIndex = 0; currentIndex < _rsConfig.getNumMembers(); currentIndex++) {
+ UnelectableReasonMask reason = currentIndex == _selfIndex
+ ? _getMyUnelectableReason(now, lastOpApplied)
+ : _getUnelectableReason(currentIndex, lastOpApplied);
+ if (None == reason && _isMemberHigherPriority(currentIndex, maxIndex)) {
+ maxIndex = currentIndex;
+ }
}
- void TopologyCoordinatorImpl::changeMemberState_forTest(const MemberState& newMemberState,
- OpTime electionTime) {
- invariant(_selfIndex != -1);
- if (newMemberState == getMemberState())
- return;
- switch(newMemberState.s) {
+ return maxIndex;
+}
+
+void TopologyCoordinatorImpl::changeMemberState_forTest(const MemberState& newMemberState,
+ OpTime electionTime) {
+ invariant(_selfIndex != -1);
+ if (newMemberState == getMemberState())
+ return;
+ switch (newMemberState.s) {
case MemberState::RS_PRIMARY:
_role = Role::candidate;
processWinElection(OID(), electionTime);
@@ -1263,708 +1199,675 @@ namespace {
}
break;
case MemberState::RS_STARTUP:
- updateConfig(
- ReplicaSetConfig(),
- -1,
- Date_t(),
- OpTime());
+ updateConfig(ReplicaSetConfig(), -1, Date_t(), OpTime());
break;
default:
severe() << "Cannot switch to state " << newMemberState;
invariant(false);
- }
- if (getMemberState() != newMemberState.s) {
- severe() << "Expected to enter state " << newMemberState << " but am now in " <<
- getMemberState();
- invariant(false);
- }
- log() << "replSet " << newMemberState;
- }
-
- void TopologyCoordinatorImpl::_setCurrentPrimaryForTest(int primaryIndex) {
- if (primaryIndex == _selfIndex) {
- changeMemberState_forTest(MemberState::RS_PRIMARY);
- }
- else {
- if (_iAmPrimary()) {
- changeMemberState_forTest(MemberState::RS_SECONDARY);
- }
- if (primaryIndex != -1) {
- ReplSetHeartbeatResponse hbResponse;
- hbResponse.setState(MemberState::RS_PRIMARY);
- hbResponse.setElectionTime(OpTime());
- hbResponse.setOpTime(_hbdata[primaryIndex].getOpTime());
- hbResponse.setSyncingTo("");
- hbResponse.setHbMsg("");
- _hbdata[primaryIndex].setUpValues(
- _hbdata[primaryIndex].getLastHeartbeat(),
- _rsConfig.getMemberAt(primaryIndex).getHostAndPort(),
- hbResponse);
- }
- _currentPrimaryIndex = primaryIndex;
- }
}
-
- const MemberConfig* TopologyCoordinatorImpl::_currentPrimaryMember() const {
- if (_currentPrimaryIndex == -1)
- return NULL;
-
- return &(_rsConfig.getMemberAt(_currentPrimaryIndex));
+ if (getMemberState() != newMemberState.s) {
+ severe() << "Expected to enter state " << newMemberState << " but am now in "
+ << getMemberState();
+ invariant(false);
}
+ log() << "replSet " << newMemberState;
+}
- void TopologyCoordinatorImpl::prepareStatusResponse(
- const ReplicationExecutor::CallbackData& data,
- Date_t now,
- unsigned selfUptime,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result) {
- if (data.status == ErrorCodes::CallbackCanceled) {
- *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
- return;
- }
-
- // output for each member
- vector<BSONObj> membersOut;
- const MemberState myState = getMemberState();
+void TopologyCoordinatorImpl::_setCurrentPrimaryForTest(int primaryIndex) {
+ if (primaryIndex == _selfIndex) {
+ changeMemberState_forTest(MemberState::RS_PRIMARY);
+ } else {
+ if (_iAmPrimary()) {
+ changeMemberState_forTest(MemberState::RS_SECONDARY);
+ }
+ if (primaryIndex != -1) {
+ ReplSetHeartbeatResponse hbResponse;
+ hbResponse.setState(MemberState::RS_PRIMARY);
+ hbResponse.setElectionTime(OpTime());
+ hbResponse.setOpTime(_hbdata[primaryIndex].getOpTime());
+ hbResponse.setSyncingTo("");
+ hbResponse.setHbMsg("");
+ _hbdata[primaryIndex].setUpValues(_hbdata[primaryIndex].getLastHeartbeat(),
+ _rsConfig.getMemberAt(primaryIndex).getHostAndPort(),
+ hbResponse);
+ }
+ _currentPrimaryIndex = primaryIndex;
+ }
+}
+
+const MemberConfig* TopologyCoordinatorImpl::_currentPrimaryMember() const {
+ if (_currentPrimaryIndex == -1)
+ return NULL;
+
+ return &(_rsConfig.getMemberAt(_currentPrimaryIndex));
+}
+
+void TopologyCoordinatorImpl::prepareStatusResponse(const ReplicationExecutor::CallbackData& data,
+ Date_t now,
+ unsigned selfUptime,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result) {
+ if (data.status == ErrorCodes::CallbackCanceled) {
+ *result = Status(ErrorCodes::ShutdownInProgress, "replication system is shutting down");
+ return;
+ }
+
+ // output for each member
+ vector<BSONObj> membersOut;
+ const MemberState myState = getMemberState();
+
+ if (_selfIndex == -1) {
+ // We're REMOVED or have an invalid config
+ response->append("state", static_cast<int>(myState.s));
+ response->append("stateStr", myState.toString());
+ response->append("uptime", selfUptime);
+ response->append("optime", lastOpApplied);
+ response->appendDate("optimeDate", Date_t(lastOpApplied.getSecs() * 1000ULL));
+ if (_maintenanceModeCalls) {
+ response->append("maintenanceMode", _maintenanceModeCalls);
+ }
+ std::string s = _getHbmsg(now);
+ if (!s.empty())
+ response->append("infoMessage", s);
+ *result = Status(ErrorCodes::InvalidReplicaSetConfig,
+ "Our replica set config is invalid or we are not a member of it");
+ return;
+ }
+
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ if (itIndex == _selfIndex) {
+ // add self
+ BSONObjBuilder bb;
+ bb.append("_id", _selfConfig().getId());
+ bb.append("name", _selfConfig().getHostAndPort().toString());
+ bb.append("health", 1.0);
+ bb.append("state", static_cast<int>(myState.s));
+ bb.append("stateStr", myState.toString());
+ bb.append("uptime", selfUptime);
+ if (!_selfConfig().isArbiter()) {
+ bb.append("optime", lastOpApplied);
+ bb.appendDate("optimeDate", Date_t(lastOpApplied.getSecs() * 1000ULL));
+ }
+
+ if (!_syncSource.empty() && !_iAmPrimary()) {
+ bb.append("syncingTo", _syncSource.toString());
+ }
- if (_selfIndex == -1) {
- // We're REMOVED or have an invalid config
- response->append("state", static_cast<int>(myState.s));
- response->append("stateStr", myState.toString());
- response->append("uptime", selfUptime);
- response->append("optime", lastOpApplied);
- response->appendDate("optimeDate", Date_t(lastOpApplied.getSecs() * 1000ULL));
if (_maintenanceModeCalls) {
- response->append("maintenanceMode", _maintenanceModeCalls);
+ bb.append("maintenanceMode", _maintenanceModeCalls);
}
- std::string s = _getHbmsg(now);
- if( !s.empty() )
- response->append("infoMessage", s);
- *result = Status(ErrorCodes::InvalidReplicaSetConfig,
- "Our replica set config is invalid or we are not a member of it");
- return;
- }
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- if (itIndex == _selfIndex) {
- // add self
- BSONObjBuilder bb;
- bb.append("_id", _selfConfig().getId());
- bb.append("name", _selfConfig().getHostAndPort().toString());
- bb.append("health", 1.0);
- bb.append("state", static_cast<int>(myState.s));
- bb.append("stateStr", myState.toString());
- bb.append("uptime", selfUptime);
- if (!_selfConfig().isArbiter()) {
- bb.append("optime", lastOpApplied);
- bb.appendDate("optimeDate", Date_t(lastOpApplied.getSecs() * 1000ULL));
- }
-
- if (!_syncSource.empty() && !_iAmPrimary()) {
- bb.append("syncingTo", _syncSource.toString());
- }
-
- if (_maintenanceModeCalls) {
- bb.append("maintenanceMode", _maintenanceModeCalls);
- }
-
- std::string s = _getHbmsg(now);
- if( !s.empty() )
- bb.append("infoMessage", s);
-
- if (myState.primary()) {
- bb.append("electionTime", _electionTime);
- bb.appendDate("electionDate", Date_t(_electionTime.getSecs() * 1000ULL));
- }
- bb.appendIntOrLL("configVersion", _rsConfig.getConfigVersion());
- bb.append("self", true);
- membersOut.push_back(bb.obj());
+ std::string s = _getHbmsg(now);
+ if (!s.empty())
+ bb.append("infoMessage", s);
+
+ if (myState.primary()) {
+ bb.append("electionTime", _electionTime);
+ bb.appendDate("electionDate", Date_t(_electionTime.getSecs() * 1000ULL));
+ }
+ bb.appendIntOrLL("configVersion", _rsConfig.getConfigVersion());
+ bb.append("self", true);
+ membersOut.push_back(bb.obj());
+ } else {
+ // add non-self member
+ const MemberConfig& itConfig = _rsConfig.getMemberAt(itIndex);
+ BSONObjBuilder bb;
+ bb.append("_id", itConfig.getId());
+ bb.append("name", itConfig.getHostAndPort().toString());
+ double h = it->getHealth();
+ bb.append("health", h);
+ const MemberState state = it->getState();
+ bb.append("state", static_cast<int>(state.s));
+ if (h == 0) {
+ // if we can't connect the state info is from the past
+ // and could be confusing to show
+ bb.append("stateStr", "(not reachable/healthy)");
+ } else {
+ bb.append("stateStr", it->getState().toString());
+ }
+
+ const unsigned int uptime = static_cast<unsigned int>(
+ (it->getUpSince() ? (now - it->getUpSince()) / 1000 /* convert millis to secs */
+ : 0));
+ bb.append("uptime", uptime);
+ if (!itConfig.isArbiter()) {
+ bb.append("optime", it->getOpTime());
+ bb.appendDate("optimeDate", Date_t(it->getOpTime().getSecs() * 1000ULL));
+ }
+ bb.appendDate("lastHeartbeat", it->getLastHeartbeat());
+ bb.appendDate("lastHeartbeatRecv", it->getLastHeartbeatRecv());
+ const int ping = _getPing(itConfig.getHostAndPort());
+ if (ping != -1) {
+ bb.append("pingMs", ping);
+ std::string s = it->getLastHeartbeatMsg();
+ if (!s.empty())
+ bb.append("lastHeartbeatMessage", s);
+ }
+ if (it->hasAuthIssue()) {
+ bb.append("authenticated", false);
+ }
+ const std::string syncSource = it->getSyncSource();
+ if (!syncSource.empty() && !state.primary()) {
+ bb.append("syncingTo", syncSource);
}
- else {
- // add non-self member
- const MemberConfig& itConfig = _rsConfig.getMemberAt(itIndex);
- BSONObjBuilder bb;
- bb.append("_id", itConfig.getId());
- bb.append("name", itConfig.getHostAndPort().toString());
- double h = it->getHealth();
- bb.append("health", h);
- const MemberState state = it->getState();
- bb.append("state", static_cast<int>(state.s));
- if( h == 0 ) {
- // if we can't connect the state info is from the past
- // and could be confusing to show
- bb.append("stateStr", "(not reachable/healthy)");
- }
- else {
- bb.append("stateStr", it->getState().toString());
- }
-
- const unsigned int uptime = static_cast<unsigned int> ((it->getUpSince() ?
- (now - it->getUpSince()) / 1000 /* convert millis to secs */ : 0));
- bb.append("uptime", uptime);
- if (!itConfig.isArbiter()) {
- bb.append("optime", it->getOpTime());
- bb.appendDate("optimeDate", Date_t(it->getOpTime().getSecs() * 1000ULL));
- }
- bb.appendDate("lastHeartbeat", it->getLastHeartbeat());
- bb.appendDate("lastHeartbeatRecv", it->getLastHeartbeatRecv());
- const int ping = _getPing(itConfig.getHostAndPort());
- if (ping != -1) {
- bb.append("pingMs", ping);
- std::string s = it->getLastHeartbeatMsg();
- if( !s.empty() )
- bb.append("lastHeartbeatMessage", s);
- }
- if (it->hasAuthIssue()) {
- bb.append("authenticated", false);
- }
- const std::string syncSource = it->getSyncSource();
- if (!syncSource.empty() && !state.primary()) {
- bb.append("syncingTo", syncSource);
- }
- if (state == MemberState::RS_PRIMARY) {
- bb.append("electionTime", it->getElectionTime());
- bb.appendDate("electionDate",
- Date_t(it->getElectionTime().getSecs() * 1000ULL));
- }
- bb.appendIntOrLL("configVersion", it->getConfigVersion());
- membersOut.push_back(bb.obj());
+ if (state == MemberState::RS_PRIMARY) {
+ bb.append("electionTime", it->getElectionTime());
+ bb.appendDate("electionDate", Date_t(it->getElectionTime().getSecs() * 1000ULL));
}
+ bb.appendIntOrLL("configVersion", it->getConfigVersion());
+ membersOut.push_back(bb.obj());
}
+ }
- // sort members bson
- sort(membersOut.begin(), membersOut.end());
+ // sort members bson
+ sort(membersOut.begin(), membersOut.end());
- response->append("set",
- _rsConfig.isInitialized() ? _rsConfig.getReplSetName() : "");
- response->append("date", now);
- response->append("myState", myState.s);
+ response->append("set", _rsConfig.isInitialized() ? _rsConfig.getReplSetName() : "");
+ response->append("date", now);
+ response->append("myState", myState.s);
- // Add sync source info
- if (!_syncSource.empty() && !myState.primary() && !myState.removed()) {
- response->append("syncingTo", _syncSource.toString());
- }
-
- response->append("members", membersOut);
- *result = Status::OK();
+ // Add sync source info
+ if (!_syncSource.empty() && !myState.primary() && !myState.removed()) {
+ response->append("syncingTo", _syncSource.toString());
}
- void TopologyCoordinatorImpl::fillIsMasterForReplSet(IsMasterResponse* response) {
-
- const MemberState myState = getMemberState();
- if (!_rsConfig.isInitialized() || myState.removed()) {
- response->markAsNoConfig();
- return;
- }
+ response->append("members", membersOut);
+ *result = Status::OK();
+}
- response->setReplSetName(_rsConfig.getReplSetName());
- response->setReplSetVersion(_rsConfig.getConfigVersion());
- response->setIsMaster(myState.primary());
- response->setIsSecondary(myState.secondary());
+void TopologyCoordinatorImpl::fillIsMasterForReplSet(IsMasterResponse* response) {
+ const MemberState myState = getMemberState();
+ if (!_rsConfig.isInitialized() || myState.removed()) {
+ response->markAsNoConfig();
+ return;
+ }
- {
- for (ReplicaSetConfig::MemberIterator it = _rsConfig.membersBegin();
- it != _rsConfig.membersEnd(); ++it) {
- if (it->isHidden() || it->getSlaveDelay().total_seconds() > 0) {
- continue;
- }
+ response->setReplSetName(_rsConfig.getReplSetName());
+ response->setReplSetVersion(_rsConfig.getConfigVersion());
+ response->setIsMaster(myState.primary());
+ response->setIsSecondary(myState.secondary());
- if (it->isElectable()) {
- response->addHost(it->getHostAndPort());
- }
- else if (it->isArbiter()) {
- response->addArbiter(it->getHostAndPort());
- }
- else {
- response->addPassive(it->getHostAndPort());
- }
+ {
+ for (ReplicaSetConfig::MemberIterator it = _rsConfig.membersBegin();
+ it != _rsConfig.membersEnd();
+ ++it) {
+ if (it->isHidden() || it->getSlaveDelay().total_seconds() > 0) {
+ continue;
}
- }
-
- const MemberConfig* curPrimary = _currentPrimaryMember();
- if (curPrimary) {
- response->setPrimary(curPrimary->getHostAndPort());
- }
- const MemberConfig& selfConfig = _rsConfig.getMemberAt(_selfIndex);
- if (selfConfig.isArbiter()) {
- response->setIsArbiterOnly(true);
- }
- else if (selfConfig.getPriority() == 0) {
- response->setIsPassive(true);
- }
- if (selfConfig.getSlaveDelay().total_seconds()) {
- response->setSlaveDelay(selfConfig.getSlaveDelay());
- }
- if (selfConfig.isHidden()) {
- response->setIsHidden(true);
- }
- if (!selfConfig.shouldBuildIndexes()) {
- response->setShouldBuildIndexes(false);
- }
- const ReplicaSetTagConfig tagConfig = _rsConfig.getTagConfig();
- if (selfConfig.hasTags(tagConfig)) {
- for (MemberConfig::TagIterator tag = selfConfig.tagsBegin();
- tag != selfConfig.tagsEnd(); ++tag) {
- std::string tagKey = tagConfig.getTagKey(*tag);
- if (tagKey[0] == '$') {
- // Filter out internal tags
- continue;
- }
- response->addTag(tagKey, tagConfig.getTagValue(*tag));
+ if (it->isElectable()) {
+ response->addHost(it->getHostAndPort());
+ } else if (it->isArbiter()) {
+ response->addArbiter(it->getHostAndPort());
+ } else {
+ response->addPassive(it->getHostAndPort());
}
}
- response->setMe(selfConfig.getHostAndPort());
- if (_iAmPrimary()) {
- response->setElectionId(_electionId);
- }
}
- void TopologyCoordinatorImpl::prepareFreezeResponse(
- Date_t now, int secs, BSONObjBuilder* response) {
-
- if (secs == 0) {
- _stepDownUntil = now;
- log() << "replSet info 'unfreezing'";
- response->append("info", "unfreezing");
-
- if (_followerMode == MemberState::RS_SECONDARY &&
- _rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 &&
- _rsConfig.getMemberAt(_selfIndex).isElectable()) {
- // If we are a one-node replica set, we're the one member,
- // we're electable, and we are currently in followerMode SECONDARY,
- // we must transition to candidate now that our stepdown period
- // is no longer active, in leiu of heartbeats.
- _role = Role::candidate;
- }
- }
- else {
- if ( secs == 1 )
- response->append("warning", "you really want to freeze for only 1 second?");
+ const MemberConfig* curPrimary = _currentPrimaryMember();
+ if (curPrimary) {
+ response->setPrimary(curPrimary->getHostAndPort());
+ }
- if (!_iAmPrimary()) {
- _stepDownUntil = std::max(_stepDownUntil, Date_t(now + (secs * 1000)));
- log() << "replSet info 'freezing' for " << secs << " seconds";
- }
- else {
- log() << "replSet info received freeze command but we are primary";
+ const MemberConfig& selfConfig = _rsConfig.getMemberAt(_selfIndex);
+ if (selfConfig.isArbiter()) {
+ response->setIsArbiterOnly(true);
+ } else if (selfConfig.getPriority() == 0) {
+ response->setIsPassive(true);
+ }
+ if (selfConfig.getSlaveDelay().total_seconds()) {
+ response->setSlaveDelay(selfConfig.getSlaveDelay());
+ }
+ if (selfConfig.isHidden()) {
+ response->setIsHidden(true);
+ }
+ if (!selfConfig.shouldBuildIndexes()) {
+ response->setShouldBuildIndexes(false);
+ }
+ const ReplicaSetTagConfig tagConfig = _rsConfig.getTagConfig();
+ if (selfConfig.hasTags(tagConfig)) {
+ for (MemberConfig::TagIterator tag = selfConfig.tagsBegin(); tag != selfConfig.tagsEnd();
+ ++tag) {
+ std::string tagKey = tagConfig.getTagKey(*tag);
+ if (tagKey[0] == '$') {
+ // Filter out internal tags
+ continue;
}
+ response->addTag(tagKey, tagConfig.getTagValue(*tag));
}
}
+ response->setMe(selfConfig.getHostAndPort());
+ if (_iAmPrimary()) {
+ response->setElectionId(_electionId);
+ }
+}
- bool TopologyCoordinatorImpl::becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(Date_t now) {
- if (_stepDownUntil > now) {
- return false;
- }
+void TopologyCoordinatorImpl::prepareFreezeResponse(Date_t now,
+ int secs,
+ BSONObjBuilder* response) {
+ if (secs == 0) {
+ _stepDownUntil = now;
+ log() << "replSet info 'unfreezing'";
+ response->append("info", "unfreezing");
- if (_followerMode == MemberState::RS_SECONDARY &&
- _rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 &&
- _rsConfig.getMemberAt(_selfIndex).isElectable()) {
- // If the new config describes a one-node replica set, we're the one member,
+ if (_followerMode == MemberState::RS_SECONDARY && _rsConfig.getNumMembers() == 1 &&
+ _selfIndex == 0 && _rsConfig.getMemberAt(_selfIndex).isElectable()) {
+ // If we are a one-node replica set, we're the one member,
// we're electable, and we are currently in followerMode SECONDARY,
- // we must transition to candidate, in leiu of heartbeats.
+ // we must transition to candidate now that our stepdown period
+ // is no longer active, in leiu of heartbeats.
_role = Role::candidate;
- return true;
}
- return false;
- }
+ } else {
+ if (secs == 1)
+ response->append("warning", "you really want to freeze for only 1 second?");
- void TopologyCoordinatorImpl::setElectionSleepUntil(Date_t newTime) {
- if (_electionSleepUntil < newTime) {
- _electionSleepUntil = newTime;
+ if (!_iAmPrimary()) {
+ _stepDownUntil = std::max(_stepDownUntil, Date_t(now + (secs * 1000)));
+ log() << "replSet info 'freezing' for " << secs << " seconds";
+ } else {
+ log() << "replSet info received freeze command but we are primary";
}
}
+}
- OpTime TopologyCoordinatorImpl::getElectionTime() const {
- return _electionTime;
- }
-
- OID TopologyCoordinatorImpl::getElectionId() const {
- return _electionId;
- }
-
- int TopologyCoordinatorImpl::getCurrentPrimaryIndex() const {
- return _currentPrimaryIndex;
+bool TopologyCoordinatorImpl::becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(Date_t now) {
+ if (_stepDownUntil > now) {
+ return false;
}
- Date_t TopologyCoordinatorImpl::getStepDownTime() const {
- return _stepDownUntil;
+ if (_followerMode == MemberState::RS_SECONDARY && _rsConfig.getNumMembers() == 1 &&
+ _selfIndex == 0 && _rsConfig.getMemberAt(_selfIndex).isElectable()) {
+ // If the new config describes a one-node replica set, we're the one member,
+ // we're electable, and we are currently in followerMode SECONDARY,
+ // we must transition to candidate, in leiu of heartbeats.
+ _role = Role::candidate;
+ return true;
}
-
- void TopologyCoordinatorImpl::_updateHeartbeatDataForReconfig(const ReplicaSetConfig& newConfig,
- int selfIndex,
- Date_t now) {
- std::vector<MemberHeartbeatData> oldHeartbeats;
- _hbdata.swap(oldHeartbeats);
-
- int index = 0;
- for (ReplicaSetConfig::MemberIterator it = newConfig.membersBegin();
- it != newConfig.membersEnd();
- ++it, ++index) {
- const MemberConfig& newMemberConfig = *it;
- // TODO: C++11: use emplace_back()
- if (index == selfIndex) {
- // Insert placeholder for ourself, though we will never consult it.
- _hbdata.push_back(MemberHeartbeatData());
- }
- else {
- MemberHeartbeatData newHeartbeatData;
- for (int oldIndex = 0; oldIndex < _rsConfig.getNumMembers(); ++oldIndex) {
- const MemberConfig& oldMemberConfig = _rsConfig.getMemberAt(oldIndex);
- if (oldMemberConfig.getId() == newMemberConfig.getId() &&
- oldMemberConfig.getHostAndPort() == newMemberConfig.getHostAndPort()) {
- // This member existed in the old config with the same member ID and
- // HostAndPort, so copy its heartbeat data over.
- newHeartbeatData = oldHeartbeats[oldIndex];
- break;
- }
+ return false;
+}
+
+void TopologyCoordinatorImpl::setElectionSleepUntil(Date_t newTime) {
+ if (_electionSleepUntil < newTime) {
+ _electionSleepUntil = newTime;
+ }
+}
+
+OpTime TopologyCoordinatorImpl::getElectionTime() const {
+ return _electionTime;
+}
+
+OID TopologyCoordinatorImpl::getElectionId() const {
+ return _electionId;
+}
+
+int TopologyCoordinatorImpl::getCurrentPrimaryIndex() const {
+ return _currentPrimaryIndex;
+}
+
+Date_t TopologyCoordinatorImpl::getStepDownTime() const {
+ return _stepDownUntil;
+}
+
+void TopologyCoordinatorImpl::_updateHeartbeatDataForReconfig(const ReplicaSetConfig& newConfig,
+ int selfIndex,
+ Date_t now) {
+ std::vector<MemberHeartbeatData> oldHeartbeats;
+ _hbdata.swap(oldHeartbeats);
+
+ int index = 0;
+ for (ReplicaSetConfig::MemberIterator it = newConfig.membersBegin();
+ it != newConfig.membersEnd();
+ ++it, ++index) {
+ const MemberConfig& newMemberConfig = *it;
+ // TODO: C++11: use emplace_back()
+ if (index == selfIndex) {
+ // Insert placeholder for ourself, though we will never consult it.
+ _hbdata.push_back(MemberHeartbeatData());
+ } else {
+ MemberHeartbeatData newHeartbeatData;
+ for (int oldIndex = 0; oldIndex < _rsConfig.getNumMembers(); ++oldIndex) {
+ const MemberConfig& oldMemberConfig = _rsConfig.getMemberAt(oldIndex);
+ if (oldMemberConfig.getId() == newMemberConfig.getId() &&
+ oldMemberConfig.getHostAndPort() == newMemberConfig.getHostAndPort()) {
+ // This member existed in the old config with the same member ID and
+ // HostAndPort, so copy its heartbeat data over.
+ newHeartbeatData = oldHeartbeats[oldIndex];
+ break;
}
- _hbdata.push_back(newHeartbeatData);
}
+ _hbdata.push_back(newHeartbeatData);
}
}
+}
- // This function installs a new config object and recreates MemberHeartbeatData objects
- // that reflect the new config.
- void TopologyCoordinatorImpl::updateConfig(const ReplicaSetConfig& newConfig,
- int selfIndex,
- Date_t now,
- OpTime lastOpApplied) {
- invariant(_role != Role::candidate);
- invariant(selfIndex < newConfig.getNumMembers());
+// This function installs a new config object and recreates MemberHeartbeatData objects
+// that reflect the new config.
+void TopologyCoordinatorImpl::updateConfig(const ReplicaSetConfig& newConfig,
+ int selfIndex,
+ Date_t now,
+ OpTime lastOpApplied) {
+ invariant(_role != Role::candidate);
+ invariant(selfIndex < newConfig.getNumMembers());
- _updateHeartbeatDataForReconfig(newConfig, selfIndex, now);
- _rsConfig = newConfig;
- _selfIndex = selfIndex;
- _forceSyncSourceIndex = -1;
+ _updateHeartbeatDataForReconfig(newConfig, selfIndex, now);
+ _rsConfig = newConfig;
+ _selfIndex = selfIndex;
+ _forceSyncSourceIndex = -1;
- if (_role == Role::leader) {
- if (_selfIndex == -1) {
- log() << "Could not remain primary because no longer a member of the replica set";
- }
- else if (!_selfConfig().isElectable()) {
- log() <<" Could not remain primary because no longer electable";
- }
- else {
- // Don't stepdown if you don't have to.
- _currentPrimaryIndex = _selfIndex;
- return;
- }
- _role = Role::follower;
+ if (_role == Role::leader) {
+ if (_selfIndex == -1) {
+ log() << "Could not remain primary because no longer a member of the replica set";
+ } else if (!_selfConfig().isElectable()) {
+ log() << " Could not remain primary because no longer electable";
+ } else {
+ // Don't stepdown if you don't have to.
+ _currentPrimaryIndex = _selfIndex;
+ return;
}
+ _role = Role::follower;
+ }
- // By this point we know we are in Role::follower
- _currentPrimaryIndex = -1; // force secondaries to re-detect who the primary is
- _stepDownPending = false;
-
- if (_followerMode == MemberState::RS_SECONDARY &&
- _rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 &&
- _rsConfig.getMemberAt(_selfIndex).isElectable()) {
- // If the new config describes a one-node replica set, we're the one member,
- // we're electable, and we are currently in followerMode SECONDARY,
- // we must transition to candidate, in leiu of heartbeats.
- _role = Role::candidate;
- }
+ // By this point we know we are in Role::follower
+ _currentPrimaryIndex = -1; // force secondaries to re-detect who the primary is
+ _stepDownPending = false;
+
+ if (_followerMode == MemberState::RS_SECONDARY && _rsConfig.getNumMembers() == 1 &&
+ _selfIndex == 0 && _rsConfig.getMemberAt(_selfIndex).isElectable()) {
+ // If the new config describes a one-node replica set, we're the one member,
+ // we're electable, and we are currently in followerMode SECONDARY,
+ // we must transition to candidate, in leiu of heartbeats.
+ _role = Role::candidate;
}
- std::string TopologyCoordinatorImpl::_getHbmsg(Date_t now) const {
- // ignore messages over 2 minutes old
- if ((now - _hbmsgTime) > 120) {
- return "";
- }
- return _hbmsg;
+}
+std::string TopologyCoordinatorImpl::_getHbmsg(Date_t now) const {
+ // ignore messages over 2 minutes old
+ if ((now - _hbmsgTime) > 120) {
+ return "";
}
+ return _hbmsg;
+}
- void TopologyCoordinatorImpl::setMyHeartbeatMessage(const Date_t now,
- const std::string& message) {
- _hbmsgTime = now;
- _hbmsg = message;
- }
+void TopologyCoordinatorImpl::setMyHeartbeatMessage(const Date_t now, const std::string& message) {
+ _hbmsgTime = now;
+ _hbmsg = message;
+}
+
+const MemberConfig& TopologyCoordinatorImpl::_selfConfig() const {
+ return _rsConfig.getMemberAt(_selfIndex);
+}
- const MemberConfig& TopologyCoordinatorImpl::_selfConfig() const {
- return _rsConfig.getMemberAt(_selfIndex);
+TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getUnelectableReason(
+ int index, const OpTime& lastOpApplied) const {
+ invariant(index != _selfIndex);
+ const MemberConfig& memberConfig = _rsConfig.getMemberAt(index);
+ const MemberHeartbeatData& hbData = _hbdata[index];
+ UnelectableReasonMask result = None;
+ if (memberConfig.isArbiter()) {
+ result |= ArbiterIAm;
}
+ if (memberConfig.getPriority() <= 0) {
+ result |= NoPriority;
+ }
+ if (hbData.getState() != MemberState::RS_SECONDARY) {
+ result |= NotSecondary;
+ }
+ if (!_isOpTimeCloseEnoughToLatestToElect(hbData.getOpTime(), lastOpApplied)) {
+ result |= NotCloseEnoughToLatestOptime;
+ }
+ if (hbData.up() && hbData.isUnelectable()) {
+ result |= RefusesToStand;
+ }
+ invariant(result || memberConfig.isElectable());
+ return result;
+}
- TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getUnelectableReason(
- int index,
- const OpTime& lastOpApplied) const {
- invariant(index != _selfIndex);
- const MemberConfig& memberConfig = _rsConfig.getMemberAt(index);
- const MemberHeartbeatData& hbData = _hbdata[index];
- UnelectableReasonMask result = None;
- if (memberConfig.isArbiter()) {
- result |= ArbiterIAm;
- }
- if (memberConfig.getPriority() <= 0) {
- result |= NoPriority;
- }
- if (hbData.getState() != MemberState::RS_SECONDARY) {
- result |= NotSecondary;
- }
- if (!_isOpTimeCloseEnoughToLatestToElect(hbData.getOpTime(), lastOpApplied)) {
- result |= NotCloseEnoughToLatestOptime;
- }
- if (hbData.up() && hbData.isUnelectable()) {
- result |= RefusesToStand;
- }
- invariant(result || memberConfig.isElectable());
+TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getMyUnelectableReason(
+ const Date_t now, const OpTime lastApplied) const {
+ UnelectableReasonMask result = None;
+ if (lastApplied.isNull()) {
+ result |= NoData;
+ }
+ if (!_aMajoritySeemsToBeUp()) {
+ result |= CannotSeeMajority;
+ }
+ if (_selfIndex == -1) {
+ result |= NotInitialized;
return result;
}
+ if (_selfConfig().isArbiter()) {
+ result |= ArbiterIAm;
+ }
+ if (_selfConfig().getPriority() <= 0) {
+ result |= NoPriority;
+ }
+ if (_stepDownUntil > now) {
+ result |= StepDownPeriodActive;
+ }
+ if (_lastVote.whoId != -1 && _lastVote.whoId != _rsConfig.getMemberAt(_selfIndex).getId() &&
+ _lastVote.when.millis + LastVote::leaseTime.total_milliseconds() >= now.millis) {
+ result |= VotedTooRecently;
+ }
- TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getMyUnelectableReason(
- const Date_t now,
- const OpTime lastApplied) const {
-
- UnelectableReasonMask result = None;
- if (lastApplied.isNull()) {
- result |= NoData;
- }
- if (!_aMajoritySeemsToBeUp()) {
- result |= CannotSeeMajority;
- }
- if (_selfIndex == -1) {
- result |= NotInitialized;
- return result;
- }
- if (_selfConfig().isArbiter()) {
- result |= ArbiterIAm;
- }
- if (_selfConfig().getPriority() <= 0) {
- result |= NoPriority;
- }
- if (_stepDownUntil > now) {
- result |= StepDownPeriodActive;
- }
- if (_lastVote.whoId != -1 &&
- _lastVote.whoId !=_rsConfig.getMemberAt(_selfIndex).getId() &&
- _lastVote.when.millis + LastVote::leaseTime.total_milliseconds() >= now.millis) {
- result |= VotedTooRecently;
- }
-
- // Cannot be electable unless secondary or already primary
- if (!getMemberState().secondary() && !_iAmPrimary()) {
- result |= NotSecondary;
- }
- if (!_isOpTimeCloseEnoughToLatestToElect(lastApplied, lastApplied)) {
- result |= NotCloseEnoughToLatestOptime;
- }
- return result;
+ // Cannot be electable unless secondary or already primary
+ if (!getMemberState().secondary() && !_iAmPrimary()) {
+ result |= NotSecondary;
}
+ if (!_isOpTimeCloseEnoughToLatestToElect(lastApplied, lastApplied)) {
+ result |= NotCloseEnoughToLatestOptime;
+ }
+ return result;
+}
- std::string TopologyCoordinatorImpl::_getUnelectableReasonString(
- const UnelectableReasonMask ur) const {
- invariant(ur);
- str::stream ss;
- bool hasWrittenToStream = false;
- if (ur & NoData) {
- ss << "node has no applied oplog entries";
- hasWrittenToStream = true;
- }
- if (ur & VotedTooRecently) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "I recently voted for " << _lastVote.whoHostAndPort.toString();
- }
- if (ur & CannotSeeMajority) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "I cannot see a majority";
- }
- if (ur & ArbiterIAm) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "member is an arbiter";
+std::string TopologyCoordinatorImpl::_getUnelectableReasonString(
+ const UnelectableReasonMask ur) const {
+ invariant(ur);
+ str::stream ss;
+ bool hasWrittenToStream = false;
+ if (ur & NoData) {
+ ss << "node has no applied oplog entries";
+ hasWrittenToStream = true;
+ }
+ if (ur & VotedTooRecently) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (ur & NoPriority) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "member has zero priority";
+ hasWrittenToStream = true;
+ ss << "I recently voted for " << _lastVote.whoHostAndPort.toString();
+ }
+ if (ur & CannotSeeMajority) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (ur & StepDownPeriodActive) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "I am still waiting for stepdown period to end at " <<
- dateToISOStringLocal(_stepDownUntil);
+ hasWrittenToStream = true;
+ ss << "I cannot see a majority";
+ }
+ if (ur & ArbiterIAm) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (ur & NotSecondary) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "member is not currently a secondary";
+ hasWrittenToStream = true;
+ ss << "member is an arbiter";
+ }
+ if (ur & NoPriority) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (ur & NotCloseEnoughToLatestOptime) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "member is more than 10 seconds behind the most up-to-date member";
+ hasWrittenToStream = true;
+ ss << "member has zero priority";
+ }
+ if (ur & StepDownPeriodActive) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (ur & NotInitialized) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "node is not a member of a valid replica set configuration";
+ hasWrittenToStream = true;
+ ss << "I am still waiting for stepdown period to end at "
+ << dateToISOStringLocal(_stepDownUntil);
+ }
+ if (ur & NotSecondary) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (ur & RefusesToStand) {
- if (hasWrittenToStream) {
- ss << "; ";
- }
- hasWrittenToStream = true;
- ss << "most recent heartbeat indicates node will not stand for election";
+ hasWrittenToStream = true;
+ ss << "member is not currently a secondary";
+ }
+ if (ur & NotCloseEnoughToLatestOptime) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- if (!hasWrittenToStream) {
- severe() << "Invalid UnelectableReasonMask value 0x" << integerToHex(ur);
- fassertFailed(26011);
+ hasWrittenToStream = true;
+ ss << "member is more than 10 seconds behind the most up-to-date member";
+ }
+ if (ur & NotInitialized) {
+ if (hasWrittenToStream) {
+ ss << "; ";
}
- ss << " (mask 0x" << integerToHex(ur) << ")";
- return ss;
+ hasWrittenToStream = true;
+ ss << "node is not a member of a valid replica set configuration";
}
-
- int TopologyCoordinatorImpl::_getPing(const HostAndPort& host) {
- return _pings[host].getMillis();
+ if (ur & RefusesToStand) {
+ if (hasWrittenToStream) {
+ ss << "; ";
+ }
+ hasWrittenToStream = true;
+ ss << "most recent heartbeat indicates node will not stand for election";
}
-
- void TopologyCoordinatorImpl::_setElectionTime(const OpTime& newElectionTime) {
- _electionTime = newElectionTime;
+ if (!hasWrittenToStream) {
+ severe() << "Invalid UnelectableReasonMask value 0x" << integerToHex(ur);
+ fassertFailed(26011);
}
+ ss << " (mask 0x" << integerToHex(ur) << ")";
+ return ss;
+}
- int TopologyCoordinatorImpl::_getTotalPings() {
- PingMap::iterator it = _pings.begin();
- PingMap::iterator end = _pings.end();
- int totalPings = 0;
- while (it != end) {
- totalPings += it->second.getCount();
- it++;
- }
- return totalPings;
- }
+int TopologyCoordinatorImpl::_getPing(const HostAndPort& host) {
+ return _pings[host].getMillis();
+}
- std::vector<HostAndPort> TopologyCoordinatorImpl::getMaybeUpHostAndPorts() const {
- std::vector<HostAndPort> upHosts;
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- if (itIndex == _selfIndex) {
- continue; // skip ourselves
- }
- if (!it->maybeUp()) {
- continue; // skip DOWN nodes
- }
+void TopologyCoordinatorImpl::_setElectionTime(const OpTime& newElectionTime) {
+ _electionTime = newElectionTime;
+}
- upHosts.push_back(_rsConfig.getMemberAt(itIndex).getHostAndPort());
- }
- return upHosts;
+int TopologyCoordinatorImpl::_getTotalPings() {
+ PingMap::iterator it = _pings.begin();
+ PingMap::iterator end = _pings.end();
+ int totalPings = 0;
+ while (it != end) {
+ totalPings += it->second.getCount();
+ it++;
}
+ return totalPings;
+}
- bool TopologyCoordinatorImpl::voteForMyself(Date_t now) {
- if (_role != Role::candidate) {
- return false;
+std::vector<HostAndPort> TopologyCoordinatorImpl::getMaybeUpHostAndPorts() const {
+ std::vector<HostAndPort> upHosts;
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ if (itIndex == _selfIndex) {
+ continue; // skip ourselves
}
- int selfId = _selfConfig().getId();
- if ((_lastVote.when + LastVote::leaseTime.total_milliseconds() >= now)
- && (_lastVote.whoId != selfId)) {
- log() << "replSet not voting yea for " << selfId <<
- " voted for " << _lastVote.whoHostAndPort.toString() << ' ' <<
- (now - _lastVote.when) / 1000 << " secs ago";
- return false;
+ if (!it->maybeUp()) {
+ continue; // skip DOWN nodes
}
- _lastVote.when = now;
- _lastVote.whoId = selfId;
- _lastVote.whoHostAndPort = _selfConfig().getHostAndPort();
- return true;
+
+ upHosts.push_back(_rsConfig.getMemberAt(itIndex).getHostAndPort());
}
+ return upHosts;
+}
- MemberState TopologyCoordinatorImpl::getMemberState() const {
- if (_selfIndex == -1) {
- if (_rsConfig.isInitialized()) {
- return MemberState::RS_REMOVED;
- }
- return MemberState::RS_STARTUP;
- }
- if (_role == Role::leader) {
- invariant(_currentPrimaryIndex == _selfIndex);
- return MemberState::RS_PRIMARY;
- }
- const MemberConfig& myConfig = _selfConfig();
- if (myConfig.isArbiter()) {
- return MemberState::RS_ARBITER;
- }
- if (((_maintenanceModeCalls > 0) || (_hasOnlyAuthErrorUpHeartbeats(_hbdata, _selfIndex)))
- && (_followerMode == MemberState::RS_SECONDARY)) {
- return MemberState::RS_RECOVERING;
- }
- return _followerMode;
+bool TopologyCoordinatorImpl::voteForMyself(Date_t now) {
+ if (_role != Role::candidate) {
+ return false;
}
+ int selfId = _selfConfig().getId();
+ if ((_lastVote.when + LastVote::leaseTime.total_milliseconds() >= now) &&
+ (_lastVote.whoId != selfId)) {
+ log() << "replSet not voting yea for " << selfId << " voted for "
+ << _lastVote.whoHostAndPort.toString() << ' ' << (now - _lastVote.when) / 1000
+ << " secs ago";
+ return false;
+ }
+ _lastVote.when = now;
+ _lastVote.whoId = selfId;
+ _lastVote.whoHostAndPort = _selfConfig().getHostAndPort();
+ return true;
+}
- void TopologyCoordinatorImpl::processWinElection(
- OID electionId,
- OpTime electionOpTime) {
- invariant(_role == Role::candidate);
- _electionTime = electionOpTime;
- _electionId = electionId;
- _role = Role::leader;
- _currentPrimaryIndex = _selfIndex;
- _syncSource = HostAndPort();
- _forceSyncSourceIndex = -1;
+MemberState TopologyCoordinatorImpl::getMemberState() const {
+ if (_selfIndex == -1) {
+ if (_rsConfig.isInitialized()) {
+ return MemberState::RS_REMOVED;
+ }
+ return MemberState::RS_STARTUP;
+ }
+ if (_role == Role::leader) {
+ invariant(_currentPrimaryIndex == _selfIndex);
+ return MemberState::RS_PRIMARY;
+ }
+ const MemberConfig& myConfig = _selfConfig();
+ if (myConfig.isArbiter()) {
+ return MemberState::RS_ARBITER;
}
+ if (((_maintenanceModeCalls > 0) || (_hasOnlyAuthErrorUpHeartbeats(_hbdata, _selfIndex))) &&
+ (_followerMode == MemberState::RS_SECONDARY)) {
+ return MemberState::RS_RECOVERING;
+ }
+ return _followerMode;
+}
- void TopologyCoordinatorImpl::processLoseElection() {
- invariant(_role == Role::candidate);
- const HostAndPort syncSourceAddress = getSyncSourceAddress();
- _electionTime = OpTime(0, 0);
- _electionId = OID();
- _role = Role::follower;
+void TopologyCoordinatorImpl::processWinElection(OID electionId, OpTime electionOpTime) {
+ invariant(_role == Role::candidate);
+ _electionTime = electionOpTime;
+ _electionId = electionId;
+ _role = Role::leader;
+ _currentPrimaryIndex = _selfIndex;
+ _syncSource = HostAndPort();
+ _forceSyncSourceIndex = -1;
+}
- // Clear lastVote time, if we voted for ourselves in this election.
- // This will allow us to vote for others.
- if (_lastVote.whoId == _selfConfig().getId()) {
- _lastVote.when = 0;
- }
+void TopologyCoordinatorImpl::processLoseElection() {
+ invariant(_role == Role::candidate);
+ const HostAndPort syncSourceAddress = getSyncSourceAddress();
+ _electionTime = OpTime(0, 0);
+ _electionId = OID();
+ _role = Role::follower;
+
+ // Clear lastVote time, if we voted for ourselves in this election.
+ // This will allow us to vote for others.
+ if (_lastVote.whoId == _selfConfig().getId()) {
+ _lastVote.when = 0;
}
+}
- bool TopologyCoordinatorImpl::stepDown(Date_t until, bool force, OpTime lastOpApplied) {
- bool canStepDown = force;
- for (int i = 0; !canStepDown && i < _rsConfig.getNumMembers(); ++i) {
- if (i == _selfIndex) {
- continue;
- }
- UnelectableReasonMask reason = _getUnelectableReason(i, lastOpApplied);
- if (!reason && _hbdata[i].getOpTime() >= lastOpApplied) {
- canStepDown = true;
- }
+bool TopologyCoordinatorImpl::stepDown(Date_t until, bool force, OpTime lastOpApplied) {
+ bool canStepDown = force;
+ for (int i = 0; !canStepDown && i < _rsConfig.getNumMembers(); ++i) {
+ if (i == _selfIndex) {
+ continue;
}
-
- if (!canStepDown) {
- return false;
+ UnelectableReasonMask reason = _getUnelectableReason(i, lastOpApplied);
+ if (!reason && _hbdata[i].getOpTime() >= lastOpApplied) {
+ canStepDown = true;
}
- _stepDownUntil = until;
- _stepDownSelfAndReplaceWith(-1);
- return true;
}
- void TopologyCoordinatorImpl::setFollowerMode(MemberState::MS newMode) {
- invariant(_role == Role::follower);
- switch (newMode) {
+ if (!canStepDown) {
+ return false;
+ }
+ _stepDownUntil = until;
+ _stepDownSelfAndReplaceWith(-1);
+ return true;
+}
+
+void TopologyCoordinatorImpl::setFollowerMode(MemberState::MS newMode) {
+ invariant(_role == Role::follower);
+ switch (newMode) {
case MemberState::RS_RECOVERING:
case MemberState::RS_ROLLBACK:
case MemberState::RS_SECONDARY:
@@ -1973,129 +1876,126 @@ namespace {
break;
default:
invariant(false);
- }
+ }
- if (_followerMode != MemberState::RS_SECONDARY) {
- return;
- }
+ if (_followerMode != MemberState::RS_SECONDARY) {
+ return;
+ }
- // When a single node replica set transitions to SECONDARY, we must check if we should
- // be a candidate here. This is necessary because a single node replica set has no
- // heartbeats that would normally change the role to candidate.
+ // When a single node replica set transitions to SECONDARY, we must check if we should
+ // be a candidate here. This is necessary because a single node replica set has no
+ // heartbeats that would normally change the role to candidate.
- if (_rsConfig.getNumMembers() == 1 &&
- _selfIndex == 0 &&
- _rsConfig.getMemberAt(_selfIndex).isElectable()) {
- _role = Role::candidate;
- }
+ if (_rsConfig.getNumMembers() == 1 && _selfIndex == 0 &&
+ _rsConfig.getMemberAt(_selfIndex).isElectable()) {
+ _role = Role::candidate;
}
+}
- bool TopologyCoordinatorImpl::stepDownIfPending() {
- if (!_stepDownPending) {
- return false;
- }
+bool TopologyCoordinatorImpl::stepDownIfPending() {
+ if (!_stepDownPending) {
+ return false;
+ }
- int remotePrimaryIndex = -1;
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end(); ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- if (itIndex == _selfIndex) {
- continue;
- }
+ int remotePrimaryIndex = -1;
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ if (itIndex == _selfIndex) {
+ continue;
+ }
- if (it->getState().primary() && it->up()) {
- if (remotePrimaryIndex != -1) {
- // two other nodes think they are primary (asynchronously polled)
- // -- wait for things to settle down.
- remotePrimaryIndex = -1;
- log() << "replSet info two remote primaries (transiently)";
- break;
- }
- remotePrimaryIndex = itIndex;
+ if (it->getState().primary() && it->up()) {
+ if (remotePrimaryIndex != -1) {
+ // two other nodes think they are primary (asynchronously polled)
+ // -- wait for things to settle down.
+ remotePrimaryIndex = -1;
+ log() << "replSet info two remote primaries (transiently)";
+ break;
}
- }
- _stepDownSelfAndReplaceWith(remotePrimaryIndex);
+ remotePrimaryIndex = itIndex;
+ }
+ }
+ _stepDownSelfAndReplaceWith(remotePrimaryIndex);
+ return true;
+}
+
+void TopologyCoordinatorImpl::_stepDownSelfAndReplaceWith(int newPrimary) {
+ invariant(_role == Role::leader);
+ invariant(_selfIndex != -1);
+ invariant(_selfIndex != newPrimary);
+ invariant(_selfIndex == _currentPrimaryIndex);
+ _currentPrimaryIndex = newPrimary;
+ _role = Role::follower;
+ _stepDownPending = false;
+}
+
+void TopologyCoordinatorImpl::adjustMaintenanceCountBy(int inc) {
+ invariant(_role == Role::follower);
+ _maintenanceModeCalls += inc;
+ invariant(_maintenanceModeCalls >= 0);
+}
+
+int TopologyCoordinatorImpl::getMaintenanceCount() const {
+ return _maintenanceModeCalls;
+}
+
+bool TopologyCoordinatorImpl::shouldChangeSyncSource(const HostAndPort& currentSource,
+ Date_t now) const {
+ // Methodology:
+ // If there exists a viable sync source member other than currentSource, whose oplog has
+ // reached an optime greater than _maxSyncSourceLagSecs later than currentSource's, return
+ // true.
+
+ // If the user requested a sync source change, return true.
+ if (_forceSyncSourceIndex != -1) {
return true;
}
- void TopologyCoordinatorImpl::_stepDownSelfAndReplaceWith(int newPrimary) {
- invariant(_role == Role::leader);
- invariant(_selfIndex != -1);
- invariant(_selfIndex != newPrimary);
- invariant(_selfIndex == _currentPrimaryIndex);
- _currentPrimaryIndex = newPrimary;
- _role = Role::follower;
- _stepDownPending = false;
- }
-
- void TopologyCoordinatorImpl::adjustMaintenanceCountBy(int inc) {
- invariant(_role == Role::follower);
- _maintenanceModeCalls += inc;
- invariant(_maintenanceModeCalls >= 0);
+ const int currentMemberIndex = _rsConfig.findMemberIndexByHostAndPort(currentSource);
+ if (currentMemberIndex == -1) {
+ return true;
}
+ invariant(currentMemberIndex != _selfIndex);
- int TopologyCoordinatorImpl::getMaintenanceCount() const {
- return _maintenanceModeCalls;
+ OpTime currentOpTime = _hbdata[currentMemberIndex].getOpTime();
+ if (currentOpTime.isNull()) {
+ // Haven't received a heartbeat from the sync source yet, so can't tell if we should
+ // change.
+ return false;
}
-
- bool TopologyCoordinatorImpl::shouldChangeSyncSource(const HostAndPort& currentSource,
- Date_t now) const {
- // Methodology:
- // If there exists a viable sync source member other than currentSource, whose oplog has
- // reached an optime greater than _maxSyncSourceLagSecs later than currentSource's, return
- // true.
-
- // If the user requested a sync source change, return true.
- if (_forceSyncSourceIndex != -1) {
+ unsigned int currentSecs = currentOpTime.getSecs();
+ unsigned int goalSecs = currentSecs + _maxSyncSourceLagSecs.total_seconds();
+
+ for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end();
+ ++it) {
+ const int itIndex = indexOfIterator(_hbdata, it);
+ const MemberConfig& candidateConfig = _rsConfig.getMemberAt(itIndex);
+ if (it->up() &&
+ (candidateConfig.shouldBuildIndexes() || !_selfConfig().shouldBuildIndexes()) &&
+ it->getState().readable() && !_memberIsBlacklisted(candidateConfig, now) &&
+ goalSecs < it->getOpTime().getSecs()) {
+ log() << "changing sync target because current sync target's most recent OpTime is "
+ << currentOpTime.toStringLong() << " which is more than "
+ << _maxSyncSourceLagSecs.total_seconds() << " seconds behind member "
+ << candidateConfig.getHostAndPort().toString() << " whose most recent OpTime is "
+ << it->getOpTime().toStringLong();
+ invariant(itIndex != _selfIndex);
return true;
}
-
- const int currentMemberIndex = _rsConfig.findMemberIndexByHostAndPort(currentSource);
- if (currentMemberIndex == -1) {
- return true;
- }
- invariant(currentMemberIndex != _selfIndex);
-
- OpTime currentOpTime = _hbdata[currentMemberIndex].getOpTime();
- if (currentOpTime.isNull()) {
- // Haven't received a heartbeat from the sync source yet, so can't tell if we should
- // change.
- return false;
- }
- unsigned int currentSecs = currentOpTime.getSecs();
- unsigned int goalSecs = currentSecs + _maxSyncSourceLagSecs.total_seconds();
-
- for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin();
- it != _hbdata.end();
- ++it) {
- const int itIndex = indexOfIterator(_hbdata, it);
- const MemberConfig& candidateConfig = _rsConfig.getMemberAt(itIndex);
- if (it->up() &&
- (candidateConfig.shouldBuildIndexes() || !_selfConfig().shouldBuildIndexes()) &&
- it->getState().readable() &&
- !_memberIsBlacklisted(candidateConfig, now) &&
- goalSecs < it->getOpTime().getSecs()) {
- log() << "changing sync target because current sync target's most recent OpTime is "
- << currentOpTime.toStringLong() << " which is more than "
- << _maxSyncSourceLagSecs.total_seconds() << " seconds behind member "
- << candidateConfig.getHostAndPort().toString()
- << " whose most recent OpTime is " << it->getOpTime().toStringLong();
- invariant(itIndex != _selfIndex);
- return true;
- }
- }
- return false;
}
+ return false;
+}
- void TopologyCoordinatorImpl::summarizeAsHtml(ReplSetHtmlSummary* output) {
- output->setConfig(_rsConfig);
- output->setHBData(_hbdata);
- output->setSelfIndex(_selfIndex);
- output->setPrimaryIndex(_currentPrimaryIndex);
- output->setSelfState(getMemberState());
- output->setSelfHeartbeatMessage(_hbmsg);
- }
+void TopologyCoordinatorImpl::summarizeAsHtml(ReplSetHtmlSummary* output) {
+ output->setConfig(_rsConfig);
+ output->setHBData(_hbdata);
+ output->setSelfIndex(_selfIndex);
+ output->setPrimaryIndex(_currentPrimaryIndex);
+ output->setSelfState(getMemberState());
+ output->setSelfHeartbeatMessage(_hbmsg);
+}
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/topology_coordinator_impl.h b/src/mongo/db/repl/topology_coordinator_impl.h
index 55c199fbd10..64b085bea12 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.h
+++ b/src/mongo/db/repl/topology_coordinator_impl.h
@@ -41,360 +41,358 @@
namespace mongo {
- class OperationContext;
+class OperationContext;
namespace repl {
+/**
+ * Represents a latency measurement for each replica set member based on heartbeat requests.
+ * The measurement is an average weighted 80% to the old value, and 20% to the new value.
+ *
+ * Also stores information about heartbeat progress and retries.
+ */
+class PingStats {
+public:
+ PingStats();
+
/**
- * Represents a latency measurement for each replica set member based on heartbeat requests.
- * The measurement is an average weighted 80% to the old value, and 20% to the new value.
+ * Records that a new heartbeat request started at "now".
*
- * Also stores information about heartbeat progress and retries.
+ * This resets the failure count used in determining whether the next request to a target
+ * should be a retry or a regularly scheduled heartbeat message.
*/
- class PingStats {
- public:
- PingStats();
-
- /**
- * Records that a new heartbeat request started at "now".
- *
- * This resets the failure count used in determining whether the next request to a target
- * should be a retry or a regularly scheduled heartbeat message.
- */
- void start(Date_t now);
-
- /**
- * Records that a heartbeat request completed successfully, and that "millis" milliseconds
- * were spent for a single network roundtrip plus remote processing time.
- */
- void hit(int millis);
-
- /**
- * Records that a heartbeat request failed.
- */
- void miss();
-
- /**
- * Gets the number of hit() calls.
- */
- unsigned int getCount() const { return count; }
-
- /**
- * Gets the weighted average round trip time for heartbeat messages to the target.
- */
- unsigned int getMillis() const { return value; }
-
- /**
- * Gets the date at which start() was last called, which is used to determine if
- * a heartbeat should be retried or if the time limit has expired.
- */
- Date_t getLastHeartbeatStartDate() const { return _lastHeartbeatStartDate; }
-
- /**
- * Gets the number of failures since start() was last called.
- *
- * This value is incremented by calls to miss(), cleared by calls to start() and
- * set to the maximum possible value by calls to hit().
- */
- int getNumFailuresSinceLastStart() const { return _numFailuresSinceLastStart; }
-
- private:
- unsigned int count;
- unsigned int value;
- Date_t _lastHeartbeatStartDate;
- int _numFailuresSinceLastStart;
- };
+ void start(Date_t now);
+
+ /**
+ * Records that a heartbeat request completed successfully, and that "millis" milliseconds
+ * were spent for a single network roundtrip plus remote processing time.
+ */
+ void hit(int millis);
+
+ /**
+ * Records that a heartbeat request failed.
+ */
+ void miss();
+
+ /**
+ * Gets the number of hit() calls.
+ */
+ unsigned int getCount() const {
+ return count;
+ }
+
+ /**
+ * Gets the weighted average round trip time for heartbeat messages to the target.
+ */
+ unsigned int getMillis() const {
+ return value;
+ }
- class TopologyCoordinatorImpl : public TopologyCoordinator {
- public:
- /**
- * Constructs a Topology Coordinator object.
- * @param maxSyncSourceLagSecs a sync source is re-evaluated after it lags behind further
- * than this amount.
- **/
- TopologyCoordinatorImpl(Seconds maxSyncSourceLagSecs);
-
- ////////////////////////////////////////////////////////////
- //
- // Implementation of TopologyCoordinator interface
- //
- ////////////////////////////////////////////////////////////
-
- virtual Role getRole() const;
- virtual MemberState getMemberState() const;
- virtual HostAndPort getSyncSourceAddress() const;
- virtual std::vector<HostAndPort> getMaybeUpHostAndPorts() const;
- virtual int getMaintenanceCount() const;
- virtual void setForceSyncSourceIndex(int index);
- virtual HostAndPort chooseNewSyncSource(Date_t now,
- const OpTime& lastOpApplied);
- virtual void blacklistSyncSource(const HostAndPort& host, Date_t until);
- virtual void unblacklistSyncSource(const HostAndPort& host, Date_t now);
- virtual void clearSyncSourceBlacklist();
- virtual bool shouldChangeSyncSource(const HostAndPort& currentSource, Date_t now) const;
- virtual bool becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(Date_t now);
- virtual void setElectionSleepUntil(Date_t newTime);
- virtual void setFollowerMode(MemberState::MS newMode);
- virtual void adjustMaintenanceCountBy(int inc);
- virtual void prepareSyncFromResponse(const ReplicationExecutor::CallbackData& data,
- const HostAndPort& target,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result);
- virtual void prepareFreshResponse(const ReplicationCoordinator::ReplSetFreshArgs& args,
- Date_t now,
- OpTime lastOpApplied,
- BSONObjBuilder* response,
- Status* result);
- virtual void prepareElectResponse(const ReplicationCoordinator::ReplSetElectArgs& args,
- Date_t now,
- OpTime lastOpApplied,
- BSONObjBuilder* response,
- Status* result);
- virtual Status prepareHeartbeatResponse(Date_t now,
- const ReplSetHeartbeatArgs& args,
- const std::string& ourSetName,
- const OpTime& lastOpApplied,
- ReplSetHeartbeatResponse* response);
- virtual void prepareStatusResponse(const ReplicationExecutor::CallbackData& data,
- Date_t now,
- unsigned uptime,
- const OpTime& lastOpApplied,
- BSONObjBuilder* response,
- Status* result);
- virtual void fillIsMasterForReplSet(IsMasterResponse* response);
- virtual void prepareFreezeResponse(Date_t now, int secs, BSONObjBuilder* response);
- virtual void updateConfig(const ReplicaSetConfig& newConfig,
- int selfIndex,
- Date_t now,
- OpTime lastOpApplied);
- virtual std::pair<ReplSetHeartbeatArgs, Milliseconds> prepareHeartbeatRequest(
- Date_t now,
- const std::string& ourSetName,
- const HostAndPort& target);
- virtual HeartbeatResponseAction processHeartbeatResponse(
- Date_t now,
- Milliseconds networkRoundTripTime,
- const HostAndPort& target,
- const StatusWith<ReplSetHeartbeatResponse>& hbResponse,
- OpTime myLastOpApplied);
- virtual bool voteForMyself(Date_t now);
- virtual void processWinElection(OID electionId, OpTime electionOpTime);
- virtual void processLoseElection();
- virtual bool checkShouldStandForElection(Date_t now, const OpTime& lastOpApplied);
- virtual void setMyHeartbeatMessage(const Date_t now, const std::string& message);
- virtual bool stepDown(Date_t until, bool force, OpTime lastOpApplied);
- virtual bool stepDownIfPending();
- virtual Date_t getStepDownTime() const;
- virtual void summarizeAsHtml(ReplSetHtmlSummary* output);
-
- ////////////////////////////////////////////////////////////
- //
- // Test support methods
- //
- ////////////////////////////////////////////////////////////
-
- // Changes _memberState to newMemberState. Only for testing.
- void changeMemberState_forTest(const MemberState& newMemberState,
- OpTime electionTime = OpTime(0,0));
-
- // Sets "_electionTime" to "newElectionTime". Only for testing.
- void _setElectionTime(const OpTime& newElectionTime);
-
- // Sets _currentPrimaryIndex to the given index. Should only be used in unit tests!
- // TODO(spencer): Remove this once we can easily call for an election in unit tests to
- // set the current primary.
- void _setCurrentPrimaryForTest(int primaryIndex);
-
- // Returns _electionTime. Only used in unittests.
- OpTime getElectionTime() const;
-
- // Returns _electionId. Only used in unittests.
- OID getElectionId() const;
-
- // Returns _currentPrimaryIndex. Only used in unittests.
- int getCurrentPrimaryIndex() const;
-
- private:
-
- enum UnelectableReason {
- None = 0,
- CannotSeeMajority = 1 << 0,
- NotCloseEnoughToLatestOptime = 1 << 1,
- ArbiterIAm = 1 << 2,
- NotSecondary = 1 << 3,
- NoPriority = 1 << 4,
- StepDownPeriodActive = 1 << 5,
- NoData = 1 << 6,
- NotInitialized = 1 << 7,
- VotedTooRecently = 1 << 8,
- RefusesToStand = 1 << 9
- };
- typedef int UnelectableReasonMask;
-
- // Returns the number of heartbeat pings which have occurred.
- int _getTotalPings();
-
- // Returns the current "ping" value for the given member by their address
- int _getPing(const HostAndPort& host);
-
- // Determines if we will veto the member specified by "args.id", given that the last op
- // we have applied locally is "lastOpApplied".
- // If we veto, the errmsg will be filled in with a reason
- bool _shouldVetoMember(const ReplicationCoordinator::ReplSetFreshArgs& args,
- const Date_t& now,
- const OpTime& lastOpApplied,
- std::string* errmsg) const;
-
- // Returns the index of the member with the matching id, or -1 if none match.
- int _getMemberIndex(int id) const;
-
- // Sees if a majority number of votes are held by members who are currently "up"
- bool _aMajoritySeemsToBeUp() const;
-
- // Is otherOpTime close enough (within 10 seconds) to the latest known optime to qualify
- // for an election
- bool _isOpTimeCloseEnoughToLatestToElect(const OpTime& otherOpTime,
- const OpTime& ourLastOpApplied) const;
-
- // Returns reason why "self" member is unelectable
- UnelectableReasonMask _getMyUnelectableReason(
- const Date_t now,
- const OpTime lastOpApplied) const;
-
- // Returns reason why memberIndex is unelectable
- UnelectableReasonMask _getUnelectableReason(
- int memberIndex,
- const OpTime& lastOpApplied) const;
-
- // Returns the nice text of why the node is unelectable
- std::string _getUnelectableReasonString(UnelectableReasonMask ur) const;
-
- // Return true if we are currently primary
- bool _iAmPrimary() const;
-
- // Scans through all members that are 'up' and return the latest known optime.
- OpTime _latestKnownOpTime(OpTime ourLastOpApplied) const;
-
- // Scans the electable set and returns the highest priority member index
- int _getHighestPriorityElectableIndex(Date_t now, OpTime lastOpApplied) const;
-
- // Returns true if "one" member is higher priority than "two" member
- bool _isMemberHigherPriority(int memberOneIndex, int memberTwoIndex) const;
-
- // Helper shortcut to self config
- const MemberConfig& _selfConfig() const;
-
- // Returns NULL if there is no primary, or the MemberConfig* for the current primary
- const MemberConfig* _currentPrimaryMember() const;
-
- /**
- * Performs updating "_hbdata" and "_currentPrimaryIndex" for processHeartbeatResponse().
- */
- HeartbeatResponseAction _updateHeartbeatDataImpl(
- int updatedConfigIndex,
- const MemberState& originalState,
- Date_t now,
- const OpTime& lastOpApplied);
-
- /**
- * Updates _hbdata based on the newConfig, ensuring that every member in the newConfig
- * has an entry in _hbdata. If any nodes in the newConfig are also present in
- * _currentConfig, copies their heartbeat info into the corresponding entry in the updated
- * _hbdata vector.
- */
- void _updateHeartbeatDataForReconfig(const ReplicaSetConfig& newConfig,
- int selfIndex,
- Date_t now);
-
- void _stepDownSelfAndReplaceWith(int newPrimary);
-
- MemberState _getMyState() const;
-
- /**
- * Looks up the provided member in the blacklist and returns true if the member's blacklist
- * expire time is after 'now'. If the member is found but the expire time is before 'now',
- * the function returns false. If the member is not found in the blacklist, the function
- * returns false.
- **/
- bool _memberIsBlacklisted(const MemberConfig& memberConfig, Date_t now) const;
-
- // This node's role in the replication protocol.
- Role _role;
-
- // This is a unique id that is generated and set each time we transition to PRIMARY, as the
- // result of an election.
- OID _electionId;
- // The time at which the current PRIMARY was elected.
- OpTime _electionTime;
-
- // the index of the member we currently believe is primary, if one exists, otherwise -1
- int _currentPrimaryIndex;
-
- // the hostandport we are currently syncing from
- // empty if no sync source (we are primary, or we cannot connect to anyone yet)
- HostAndPort _syncSource;
- // These members are not chosen as sync sources for a period of time, due to connection
- // issues with them
- std::map<HostAndPort, Date_t> _syncSourceBlacklist;
- // The next sync source to be chosen, requested via a replSetSyncFrom command
- int _forceSyncSourceIndex;
- // How far this node must fall behind before considering switching sync sources
- Seconds _maxSyncSourceLagSecs;
-
- // "heartbeat message"
- // sent in requestHeartbeat respond in field "hbm"
- std::string _hbmsg;
- Date_t _hbmsgTime; // when it was logged
-
- // heartbeat msg to send to others; descriptive diagnostic info
- std::string _getHbmsg(Date_t now) const;
-
- int _selfIndex; // this node's index in _members and _currentConfig
-
- ReplicaSetConfig _rsConfig; // The current config, including a vector of MemberConfigs
-
- // heartbeat data for each member. It is guaranteed that this vector will be maintained
- // in the same order as the MemberConfigs in _currentConfig, therefore the member config
- // index can be used to index into this vector as well.
- std::vector<MemberHeartbeatData> _hbdata;
-
- // Indicates that we've received a request to stepdown from PRIMARY (likely via a heartbeat)
- bool _stepDownPending;
-
- // Time when stepDown command expires
- Date_t _stepDownUntil;
-
- // A time before which this node will not stand for election.
- Date_t _electionSleepUntil;
-
- // The number of calls we have had to enter maintenance mode
- int _maintenanceModeCalls;
-
- // The sub-mode of follower that we are in. Legal values are RS_SECONDARY, RS_RECOVERING,
- // RS_STARTUP2 (initial sync) and RS_ROLLBACK. Only meaningful if _role == Role::follower.
- // Configured via setFollowerMode(). If the sub-mode is RS_SECONDARY, then the effective
- // sub-mode is either RS_SECONDARY or RS_RECOVERING, depending on _maintenanceModeCalls.
- // Rather than accesing this variable direclty, one should use the getMemberState() method,
- // which computes the replica set node state on the fly.
- MemberState::MS _followerMode;
-
- typedef std::map<HostAndPort, PingStats> PingMap;
- // Ping stats for each member by HostAndPort;
- PingMap _pings;
-
- // Last vote info from the election
- struct LastVote {
-
- static const Seconds leaseTime;
-
- LastVote() : when(0), whoId(-1) { }
- Date_t when;
- int whoId;
- HostAndPort whoHostAndPort;
- } _lastVote;
+ /**
+ * Gets the date at which start() was last called, which is used to determine if
+ * a heartbeat should be retried or if the time limit has expired.
+ */
+ Date_t getLastHeartbeatStartDate() const {
+ return _lastHeartbeatStartDate;
+ }
+ /**
+ * Gets the number of failures since start() was last called.
+ *
+ * This value is incremented by calls to miss(), cleared by calls to start() and
+ * set to the maximum possible value by calls to hit().
+ */
+ int getNumFailuresSinceLastStart() const {
+ return _numFailuresSinceLastStart;
+ }
+
+private:
+ unsigned int count;
+ unsigned int value;
+ Date_t _lastHeartbeatStartDate;
+ int _numFailuresSinceLastStart;
+};
+
+class TopologyCoordinatorImpl : public TopologyCoordinator {
+public:
+ /**
+ * Constructs a Topology Coordinator object.
+ * @param maxSyncSourceLagSecs a sync source is re-evaluated after it lags behind further
+ * than this amount.
+ **/
+ TopologyCoordinatorImpl(Seconds maxSyncSourceLagSecs);
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Implementation of TopologyCoordinator interface
+ //
+ ////////////////////////////////////////////////////////////
+
+ virtual Role getRole() const;
+ virtual MemberState getMemberState() const;
+ virtual HostAndPort getSyncSourceAddress() const;
+ virtual std::vector<HostAndPort> getMaybeUpHostAndPorts() const;
+ virtual int getMaintenanceCount() const;
+ virtual void setForceSyncSourceIndex(int index);
+ virtual HostAndPort chooseNewSyncSource(Date_t now, const OpTime& lastOpApplied);
+ virtual void blacklistSyncSource(const HostAndPort& host, Date_t until);
+ virtual void unblacklistSyncSource(const HostAndPort& host, Date_t now);
+ virtual void clearSyncSourceBlacklist();
+ virtual bool shouldChangeSyncSource(const HostAndPort& currentSource, Date_t now) const;
+ virtual bool becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(Date_t now);
+ virtual void setElectionSleepUntil(Date_t newTime);
+ virtual void setFollowerMode(MemberState::MS newMode);
+ virtual void adjustMaintenanceCountBy(int inc);
+ virtual void prepareSyncFromResponse(const ReplicationExecutor::CallbackData& data,
+ const HostAndPort& target,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result);
+ virtual void prepareFreshResponse(const ReplicationCoordinator::ReplSetFreshArgs& args,
+ Date_t now,
+ OpTime lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result);
+ virtual void prepareElectResponse(const ReplicationCoordinator::ReplSetElectArgs& args,
+ Date_t now,
+ OpTime lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result);
+ virtual Status prepareHeartbeatResponse(Date_t now,
+ const ReplSetHeartbeatArgs& args,
+ const std::string& ourSetName,
+ const OpTime& lastOpApplied,
+ ReplSetHeartbeatResponse* response);
+ virtual void prepareStatusResponse(const ReplicationExecutor::CallbackData& data,
+ Date_t now,
+ unsigned uptime,
+ const OpTime& lastOpApplied,
+ BSONObjBuilder* response,
+ Status* result);
+ virtual void fillIsMasterForReplSet(IsMasterResponse* response);
+ virtual void prepareFreezeResponse(Date_t now, int secs, BSONObjBuilder* response);
+ virtual void updateConfig(const ReplicaSetConfig& newConfig,
+ int selfIndex,
+ Date_t now,
+ OpTime lastOpApplied);
+ virtual std::pair<ReplSetHeartbeatArgs, Milliseconds> prepareHeartbeatRequest(
+ Date_t now, const std::string& ourSetName, const HostAndPort& target);
+ virtual HeartbeatResponseAction processHeartbeatResponse(
+ Date_t now,
+ Milliseconds networkRoundTripTime,
+ const HostAndPort& target,
+ const StatusWith<ReplSetHeartbeatResponse>& hbResponse,
+ OpTime myLastOpApplied);
+ virtual bool voteForMyself(Date_t now);
+ virtual void processWinElection(OID electionId, OpTime electionOpTime);
+ virtual void processLoseElection();
+ virtual bool checkShouldStandForElection(Date_t now, const OpTime& lastOpApplied);
+ virtual void setMyHeartbeatMessage(const Date_t now, const std::string& message);
+ virtual bool stepDown(Date_t until, bool force, OpTime lastOpApplied);
+ virtual bool stepDownIfPending();
+ virtual Date_t getStepDownTime() const;
+ virtual void summarizeAsHtml(ReplSetHtmlSummary* output);
+
+ ////////////////////////////////////////////////////////////
+ //
+ // Test support methods
+ //
+ ////////////////////////////////////////////////////////////
+
+ // Changes _memberState to newMemberState. Only for testing.
+ void changeMemberState_forTest(const MemberState& newMemberState,
+ OpTime electionTime = OpTime(0, 0));
+
+ // Sets "_electionTime" to "newElectionTime". Only for testing.
+ void _setElectionTime(const OpTime& newElectionTime);
+
+ // Sets _currentPrimaryIndex to the given index. Should only be used in unit tests!
+ // TODO(spencer): Remove this once we can easily call for an election in unit tests to
+ // set the current primary.
+ void _setCurrentPrimaryForTest(int primaryIndex);
+
+ // Returns _electionTime. Only used in unittests.
+ OpTime getElectionTime() const;
+
+ // Returns _electionId. Only used in unittests.
+ OID getElectionId() const;
+
+ // Returns _currentPrimaryIndex. Only used in unittests.
+ int getCurrentPrimaryIndex() const;
+
+private:
+ enum UnelectableReason {
+ None = 0,
+ CannotSeeMajority = 1 << 0,
+ NotCloseEnoughToLatestOptime = 1 << 1,
+ ArbiterIAm = 1 << 2,
+ NotSecondary = 1 << 3,
+ NoPriority = 1 << 4,
+ StepDownPeriodActive = 1 << 5,
+ NoData = 1 << 6,
+ NotInitialized = 1 << 7,
+ VotedTooRecently = 1 << 8,
+ RefusesToStand = 1 << 9
};
+ typedef int UnelectableReasonMask;
+
+ // Returns the number of heartbeat pings which have occurred.
+ int _getTotalPings();
-} // namespace repl
-} // namespace mongo
+ // Returns the current "ping" value for the given member by their address
+ int _getPing(const HostAndPort& host);
+
+ // Determines if we will veto the member specified by "args.id", given that the last op
+ // we have applied locally is "lastOpApplied".
+ // If we veto, the errmsg will be filled in with a reason
+ bool _shouldVetoMember(const ReplicationCoordinator::ReplSetFreshArgs& args,
+ const Date_t& now,
+ const OpTime& lastOpApplied,
+ std::string* errmsg) const;
+
+ // Returns the index of the member with the matching id, or -1 if none match.
+ int _getMemberIndex(int id) const;
+
+ // Sees if a majority number of votes are held by members who are currently "up"
+ bool _aMajoritySeemsToBeUp() const;
+
+ // Is otherOpTime close enough (within 10 seconds) to the latest known optime to qualify
+ // for an election
+ bool _isOpTimeCloseEnoughToLatestToElect(const OpTime& otherOpTime,
+ const OpTime& ourLastOpApplied) const;
+
+ // Returns reason why "self" member is unelectable
+ UnelectableReasonMask _getMyUnelectableReason(const Date_t now,
+ const OpTime lastOpApplied) const;
+
+ // Returns reason why memberIndex is unelectable
+ UnelectableReasonMask _getUnelectableReason(int memberIndex, const OpTime& lastOpApplied) const;
+
+ // Returns the nice text of why the node is unelectable
+ std::string _getUnelectableReasonString(UnelectableReasonMask ur) const;
+
+ // Return true if we are currently primary
+ bool _iAmPrimary() const;
+
+ // Scans through all members that are 'up' and return the latest known optime.
+ OpTime _latestKnownOpTime(OpTime ourLastOpApplied) const;
+
+ // Scans the electable set and returns the highest priority member index
+ int _getHighestPriorityElectableIndex(Date_t now, OpTime lastOpApplied) const;
+
+ // Returns true if "one" member is higher priority than "two" member
+ bool _isMemberHigherPriority(int memberOneIndex, int memberTwoIndex) const;
+
+ // Helper shortcut to self config
+ const MemberConfig& _selfConfig() const;
+
+ // Returns NULL if there is no primary, or the MemberConfig* for the current primary
+ const MemberConfig* _currentPrimaryMember() const;
+
+ /**
+ * Performs updating "_hbdata" and "_currentPrimaryIndex" for processHeartbeatResponse().
+ */
+ HeartbeatResponseAction _updateHeartbeatDataImpl(int updatedConfigIndex,
+ const MemberState& originalState,
+ Date_t now,
+ const OpTime& lastOpApplied);
+
+ /**
+ * Updates _hbdata based on the newConfig, ensuring that every member in the newConfig
+ * has an entry in _hbdata. If any nodes in the newConfig are also present in
+ * _currentConfig, copies their heartbeat info into the corresponding entry in the updated
+ * _hbdata vector.
+ */
+ void _updateHeartbeatDataForReconfig(const ReplicaSetConfig& newConfig,
+ int selfIndex,
+ Date_t now);
+
+ void _stepDownSelfAndReplaceWith(int newPrimary);
+
+ MemberState _getMyState() const;
+
+ /**
+ * Looks up the provided member in the blacklist and returns true if the member's blacklist
+ * expire time is after 'now'. If the member is found but the expire time is before 'now',
+ * the function returns false. If the member is not found in the blacklist, the function
+ * returns false.
+ **/
+ bool _memberIsBlacklisted(const MemberConfig& memberConfig, Date_t now) const;
+
+ // This node's role in the replication protocol.
+ Role _role;
+
+ // This is a unique id that is generated and set each time we transition to PRIMARY, as the
+ // result of an election.
+ OID _electionId;
+ // The time at which the current PRIMARY was elected.
+ OpTime _electionTime;
+
+ // the index of the member we currently believe is primary, if one exists, otherwise -1
+ int _currentPrimaryIndex;
+
+ // the hostandport we are currently syncing from
+ // empty if no sync source (we are primary, or we cannot connect to anyone yet)
+ HostAndPort _syncSource;
+ // These members are not chosen as sync sources for a period of time, due to connection
+ // issues with them
+ std::map<HostAndPort, Date_t> _syncSourceBlacklist;
+ // The next sync source to be chosen, requested via a replSetSyncFrom command
+ int _forceSyncSourceIndex;
+ // How far this node must fall behind before considering switching sync sources
+ Seconds _maxSyncSourceLagSecs;
+
+ // "heartbeat message"
+ // sent in requestHeartbeat respond in field "hbm"
+ std::string _hbmsg;
+ Date_t _hbmsgTime; // when it was logged
+
+ // heartbeat msg to send to others; descriptive diagnostic info
+ std::string _getHbmsg(Date_t now) const;
+
+ int _selfIndex; // this node's index in _members and _currentConfig
+
+ ReplicaSetConfig _rsConfig; // The current config, including a vector of MemberConfigs
+
+ // heartbeat data for each member. It is guaranteed that this vector will be maintained
+ // in the same order as the MemberConfigs in _currentConfig, therefore the member config
+ // index can be used to index into this vector as well.
+ std::vector<MemberHeartbeatData> _hbdata;
+
+ // Indicates that we've received a request to stepdown from PRIMARY (likely via a heartbeat)
+ bool _stepDownPending;
+
+ // Time when stepDown command expires
+ Date_t _stepDownUntil;
+
+ // A time before which this node will not stand for election.
+ Date_t _electionSleepUntil;
+
+ // The number of calls we have had to enter maintenance mode
+ int _maintenanceModeCalls;
+
+ // The sub-mode of follower that we are in. Legal values are RS_SECONDARY, RS_RECOVERING,
+ // RS_STARTUP2 (initial sync) and RS_ROLLBACK. Only meaningful if _role == Role::follower.
+ // Configured via setFollowerMode(). If the sub-mode is RS_SECONDARY, then the effective
+ // sub-mode is either RS_SECONDARY or RS_RECOVERING, depending on _maintenanceModeCalls.
+ // Rather than accesing this variable direclty, one should use the getMemberState() method,
+ // which computes the replica set node state on the fly.
+ MemberState::MS _followerMode;
+
+ typedef std::map<HostAndPort, PingStats> PingMap;
+ // Ping stats for each member by HostAndPort;
+ PingMap _pings;
+
+ // Last vote info from the election
+ struct LastVote {
+ static const Seconds leaseTime;
+
+ LastVote() : when(0), whoId(-1) {}
+ Date_t when;
+ int whoId;
+ HostAndPort whoHostAndPort;
+ } _lastVote;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/topology_coordinator_impl_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
index ade27637a32..78751dc2a01 100644
--- a/src/mongo/db/repl/topology_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
@@ -51,1807 +51,1240 @@ namespace mongo {
namespace repl {
namespace {
- bool stringContains(const std::string &haystack, const std::string& needle) {
- return haystack.find(needle) != std::string::npos;
+bool stringContains(const std::string& haystack, const std::string& needle) {
+ return haystack.find(needle) != std::string::npos;
+}
+
+class TopoCoordTest : public mongo::unittest::Test {
+public:
+ virtual void setUp() {
+ _topo.reset(new TopologyCoordinatorImpl(Seconds(100)));
+ _now = 0;
+ _selfIndex = -1;
+ _cbData.reset(new ReplicationExecutor::CallbackData(
+ NULL, ReplicationExecutor::CallbackHandle(), Status::OK()));
}
- class TopoCoordTest : public mongo::unittest::Test {
- public:
- virtual void setUp() {
- _topo.reset(new TopologyCoordinatorImpl(Seconds(100)));
- _now = 0;
- _selfIndex = -1;
- _cbData.reset(new ReplicationExecutor::CallbackData(
- NULL, ReplicationExecutor::CallbackHandle(), Status::OK()));
- }
-
- virtual void tearDown() {
- _topo.reset(NULL);
- _cbData.reset(NULL);
- }
-
- protected:
- TopologyCoordinatorImpl& getTopoCoord() {return *_topo;}
- ReplicationExecutor::CallbackData cbData() {return *_cbData;}
- Date_t& now() {return _now;}
-
- int64_t countLogLinesContaining(const std::string& needle) {
- return std::count_if(getCapturedLogMessages().begin(),
- getCapturedLogMessages().end(),
- stdx::bind(stringContains,
- stdx::placeholders::_1,
- needle));
- }
-
- void makeSelfPrimary(const OpTime& electionOpTime = OpTime(0,0)) {
- getTopoCoord().changeMemberState_forTest(MemberState::RS_PRIMARY, electionOpTime);
- getTopoCoord()._setCurrentPrimaryForTest(_selfIndex);
- }
-
- void setSelfMemberState(const MemberState& newState) {
- getTopoCoord().changeMemberState_forTest(newState);
- }
-
- int getCurrentPrimaryIndex() {
- return getTopoCoord().getCurrentPrimaryIndex();
- }
- // Update config and set selfIndex
- // If "now" is passed in, set _now to now+1
- void updateConfig(BSONObj cfg,
- int selfIndex,
- Date_t now = Date_t(-1),
- OpTime lastOp = OpTime()) {
- ReplicaSetConfig config;
- ASSERT_OK(config.initialize(cfg));
- ASSERT_OK(config.validate());
-
- _selfIndex = selfIndex;
-
- if (now == Date_t(-1)) {
- getTopoCoord().updateConfig(config, selfIndex, _now++, lastOp);
- }
- else {
- invariant(now > _now);
- getTopoCoord().updateConfig(config, selfIndex, now, lastOp);
- _now = now + 1;
- }
- }
-
- HeartbeatResponseAction receiveUpHeartbeat(
- const HostAndPort& member,
- const std::string& setName,
- MemberState memberState,
- OpTime electionTime,
- OpTime lastOpTimeSender,
- OpTime lastOpTimeReceiver) {
- return _receiveHeartbeatHelper(Status::OK(),
- member,
- setName,
- memberState,
- electionTime,
- lastOpTimeSender,
- lastOpTimeReceiver,
- Milliseconds(1));
- }
-
- HeartbeatResponseAction receiveDownHeartbeat(
- const HostAndPort& member,
- const std::string& setName,
- OpTime lastOpTimeReceiver,
- ErrorCodes::Error errcode = ErrorCodes::HostUnreachable) {
- // timed out heartbeat to mark a node as down
-
- Milliseconds roundTripTime(
- ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod.total_milliseconds());
- return _receiveHeartbeatHelper(Status(errcode, ""),
- member,
- setName,
- MemberState::RS_UNKNOWN,
- OpTime(),
- OpTime(),
- lastOpTimeReceiver,
- roundTripTime);
- }
-
- HeartbeatResponseAction heartbeatFromMember(const HostAndPort& member,
- const std::string& setName,
- MemberState memberState,
- OpTime lastOpTimeSender,
- Milliseconds roundTripTime = Milliseconds(1)) {
- return _receiveHeartbeatHelper(Status::OK(),
- member,
- setName,
- memberState,
- OpTime(),
- lastOpTimeSender,
- OpTime(),
- roundTripTime);
- }
-
- private:
-
- HeartbeatResponseAction _receiveHeartbeatHelper(Status responseStatus,
- const HostAndPort& member,
- const std::string& setName,
- MemberState memberState,
- OpTime electionTime,
- OpTime lastOpTimeSender,
- OpTime lastOpTimeReceiver,
- Milliseconds roundTripTime) {
- StatusWith<ReplSetHeartbeatResponse> hbResponse =
- StatusWith<ReplSetHeartbeatResponse>(responseStatus);
-
- if (responseStatus.isOK()) {
- ReplSetHeartbeatResponse hb;
- hb.setVersion(1);
- hb.setState(memberState);
- hb.setOpTime(lastOpTimeSender);
- hb.setElectionTime(electionTime);
- hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
- }
- getTopoCoord().prepareHeartbeatRequest(now(),
- setName,
- member);
- now() += roundTripTime.total_milliseconds();
- return getTopoCoord().processHeartbeatResponse(now(),
- roundTripTime,
- member,
- hbResponse,
- lastOpTimeReceiver);
- }
-
- private:
- scoped_ptr<TopologyCoordinatorImpl> _topo;
- scoped_ptr<ReplicationExecutor::CallbackData> _cbData;
- Date_t _now;
- int _selfIndex;
- };
-
- TEST_F(TopoCoordTest, ChooseSyncSourceBasic) {
- // if we do not have an index in the config, we should get an empty syncsource
- HostAndPort newSyncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_TRUE(newSyncSource.empty());
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- // member h2 is the furthest ahead
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1,0));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(0,0));
-
- // We start with no sync source
- ASSERT(getTopoCoord().getSyncSourceAddress().empty());
-
- // Fail due to insufficient number of pings
- newSyncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(getTopoCoord().getSyncSourceAddress(), newSyncSource);
- ASSERT(getTopoCoord().getSyncSourceAddress().empty());
-
- // Record 2nd round of pings to allow choosing a new sync source; all members equidistant
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1,0));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(0,0));
-
- // Should choose h2, since it is furthest ahead
- newSyncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(getTopoCoord().getSyncSourceAddress(), newSyncSource);
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- // h3 becomes further ahead, so it should be chosen
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2,0));
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
-
- // h3 becomes an invalid candidate for sync source; should choose h2 again
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_RECOVERING, OpTime(2,0));
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- // h3 back in SECONDARY and ahead
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2,0));
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
-
- // h3 goes down
- receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- // h3 back up and ahead
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2,0));
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
-
+ virtual void tearDown() {
+ _topo.reset(NULL);
+ _cbData.reset(NULL);
}
- TEST_F(TopoCoordTest, ChooseSyncSourceCandidates) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "hself") <<
- BSON("_id" << 10 << "host" << "h1") <<
- BSON("_id" << 20 << "host" << "h2" <<
- "buildIndexes" << false << "priority" << 0) <<
- BSON("_id" << 30 << "host" << "h3" <<
- "hidden" << true << "priority" << 0 << "votes" << 0) <<
- BSON("_id" << 40 << "host" << "h4" <<"arbiterOnly" << true) <<
- BSON("_id" << 50 << "host" << "h5" <<
- "slaveDelay" << 1 << "priority" << 0) <<
- BSON("_id" << 60 << "host" << "h6") <<
- BSON("_id" << 70 << "host" << "hprimary"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
- OpTime lastOpTimeWeApplied = OpTime(100,0);
-
- heartbeatFromMember(HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY,
- OpTime(501, 0), Milliseconds(700));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(501, 0), Milliseconds(600));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(501, 0), Milliseconds(500));
- heartbeatFromMember(HostAndPort("h4"), "rs0", MemberState::RS_SECONDARY,
- OpTime(501, 0), Milliseconds(400));
- heartbeatFromMember(HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY,
- OpTime(501, 0), Milliseconds(300));
-
- // This node is lagged further than maxSyncSourceLagSeconds.
- heartbeatFromMember(HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY,
- OpTime(499, 0), Milliseconds(200));
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- heartbeatFromMember(HostAndPort("hprimary"), "rs0", MemberState::RS_PRIMARY,
- OpTime(600, 0), Milliseconds(100));
- ASSERT_EQUALS(7, getCurrentPrimaryIndex());
-
- // Record 2nd round of pings to allow choosing a new sync source
- heartbeatFromMember(HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY,
- OpTime(501, 0), Milliseconds(700));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(501, 0), Milliseconds(600));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(501, 0), Milliseconds(500));
- heartbeatFromMember(HostAndPort("h4"), "rs0", MemberState::RS_SECONDARY,
- OpTime(501, 0), Milliseconds(400));
- heartbeatFromMember(HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY,
- OpTime(501, 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY,
- OpTime(499, 0), Milliseconds(200));
- heartbeatFromMember(HostAndPort("hprimary"), "rs0", MemberState::RS_PRIMARY,
- OpTime(600, 0), Milliseconds(100));
-
- // Should choose primary first; it's closest
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT_EQUALS(HostAndPort("hprimary"), getTopoCoord().getSyncSourceAddress());
-
- // Primary goes far far away
- heartbeatFromMember(HostAndPort("hprimary"), "rs0", MemberState::RS_PRIMARY,
- OpTime(600, 0), Milliseconds(100000000));
-
- // Should choose h4. (if an arbiter has an oplog, it's a valid sync source)
- // h6 is not considered because it is outside the maxSyncLagSeconds window,
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT_EQUALS(HostAndPort("h4"), getTopoCoord().getSyncSourceAddress());
-
- // h4 goes down; should choose h1
- receiveDownHeartbeat(HostAndPort("h4"), "rs0", OpTime());
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT_EQUALS(HostAndPort("h1"), getTopoCoord().getSyncSourceAddress());
-
- // Primary and h1 go down; should choose h6
- receiveDownHeartbeat(HostAndPort("h1"), "rs0", OpTime());
- receiveDownHeartbeat(HostAndPort("hprimary"), "rs0", OpTime());
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT_EQUALS(HostAndPort("h6"), getTopoCoord().getSyncSourceAddress());
-
- // h6 goes down; should choose h5
- receiveDownHeartbeat(HostAndPort("h6"), "rs0", OpTime());
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT_EQUALS(HostAndPort("h5"), getTopoCoord().getSyncSourceAddress());
-
- // h5 goes down; should choose h3
- receiveDownHeartbeat(HostAndPort("h5"), "rs0", OpTime());
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
-
- // h3 goes down; no sync source candidates remain
- receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
- getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
- ASSERT(getTopoCoord().getSyncSourceAddress().empty());
+protected:
+ TopologyCoordinatorImpl& getTopoCoord() {
+ return *_topo;
}
-
-
- TEST_F(TopoCoordTest, ChooseSyncSourceChainingNotAllowed) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "settings" << BSON("chainingAllowed" << false) <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(1, 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(1, 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(0, 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(0, 0), Milliseconds(300));
-
- // No primary situation: should choose no sync source.
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT(getTopoCoord().getSyncSourceAddress().empty());
-
- // Add primary
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_PRIMARY,
- OpTime(0, 0), Milliseconds(300));
- ASSERT_EQUALS(2, getCurrentPrimaryIndex());
-
- // h3 is primary and should be chosen as sync source, despite being further away than h2
- // and the primary (h3) being behind our most recently applied optime
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(10,0));
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
-
+ ReplicationExecutor::CallbackData cbData() {
+ return *_cbData;
}
-
- TEST_F(TopoCoordTest, EmptySyncSourceOnPrimary) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(1, 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(1, 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(0, 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(0, 0), Milliseconds(300));
-
- // No primary situation: should choose h2 sync source.
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- // Become primary
- makeSelfPrimary(OpTime(3.0));
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- // Check sync source
- ASSERT_EQUALS(HostAndPort(), getTopoCoord().getSyncSourceAddress());
+ Date_t& now() {
+ return _now;
}
- TEST_F(TopoCoordTest, ForceSyncSource) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- // two rounds of heartbeat pings from each member
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(1, 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(1, 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(2, 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(2, 0), Milliseconds(100));
-
- // force should overrule other defaults
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
- getTopoCoord().setForceSyncSourceIndex(1);
- // force should cause shouldChangeSyncSource() to return true
- // even if the currentSource is the force target
- ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("h2"), now()));
- ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("h3"), now()));
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- // force should only work for one call to chooseNewSyncSource
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
- }
-
- TEST_F(TopoCoordTest, BlacklistSyncSource) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(1, 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(1, 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(2, 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(2, 0), Milliseconds(100));
-
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
-
- Date_t expireTime = 1000;
- getTopoCoord().blacklistSyncSource(HostAndPort("h3"), expireTime);
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- // Should choose second best choice now that h3 is blacklisted.
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- // After time has passed, should go back to original sync source
- getTopoCoord().chooseNewSyncSource(expireTime, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
- }
-
- TEST_F(TopoCoordTest, BlacklistSyncSourceNoChaining) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "settings" << BSON("chainingAllowed" << false) <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_PRIMARY,
- OpTime(2, 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_PRIMARY,
- OpTime(2, 0), Milliseconds(100));
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(2, 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(2, 0), Milliseconds(100));
-
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
-
- Date_t expireTime = 1000;
- getTopoCoord().blacklistSyncSource(HostAndPort("h2"), expireTime);
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- // Can't choose any sync source now.
- ASSERT(getTopoCoord().getSyncSourceAddress().empty());
-
- // After time has passed, should go back to the primary
- getTopoCoord().chooseNewSyncSource(expireTime, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
- }
-
- TEST_F(TopoCoordTest, OnlyUnauthorizedUpCausesRecovering) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- // Generate enough heartbeats to select a sync source below
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(1, 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(1, 0), Milliseconds(300));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(2, 0), Milliseconds(100));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY,
- OpTime(2, 0), Milliseconds(100));
-
- ASSERT_EQUALS(HostAndPort("h3"),
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0)));
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
- // Good state setup done
-
- // Mark nodes down, ensure that we have no source and are secondary
- receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime(), ErrorCodes::NetworkTimeout);
- receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime(), ErrorCodes::NetworkTimeout);
- ASSERT_TRUE(getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0)).empty());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
-
- // Mark nodes down + unauth, ensure that we have no source and are secondary
- receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime(), ErrorCodes::NetworkTimeout);
- receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime(), ErrorCodes::Unauthorized);
- ASSERT_TRUE(getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0)).empty());
- ASSERT_EQUALS(MemberState::RS_RECOVERING, getTopoCoord().getMemberState().s);
-
- // Having an auth error but with another node up should bring us out of RECOVERING
- HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("h2"),
- "rs0",
- MemberState::RS_SECONDARY,
- OpTime(0, 0),
- OpTime(2, 0),
- OpTime(2, 0));
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
- // Test that the heartbeat that brings us from RECOVERING to SECONDARY doesn't initiate
- // an election (SERVER-17164)
- ASSERT_NO_ACTION(action.getAction());
- }
-
- TEST_F(TopoCoordTest, ReceiveHeartbeatWhileAbsentFromConfig) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "h1") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- -1);
- ASSERT_NO_ACTION(heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY,
- OpTime(1, 0), Milliseconds(300)).getAction());
- }
-
- TEST_F(TopoCoordTest, PrepareSyncFromResponse) {
- OpTime staleOpTime(1, 1);
- OpTime ourOpTime(staleOpTime.getSecs() + 11, 1);
-
- Status result = Status::OK();
- BSONObjBuilder response;
-
- // if we do not have an index in the config, we should get ErrorCodes::NotSecondary
- getTopoCoord().prepareSyncFromResponse(cbData(), HostAndPort("h1"),
- ourOpTime, &response, &result);
- ASSERT_EQUALS(ErrorCodes::NotSecondary, result);
- ASSERT_EQUALS("Removed and uninitialized nodes do not sync", result.reason());
-
- // Test trying to sync from another node when we are an arbiter
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 <<
- "host" << "hself" <<
- "arbiterOnly" << true) <<
- BSON("_id" << 1 <<
- "host" << "h1"))),
- 0);
-
- getTopoCoord().prepareSyncFromResponse(cbData(), HostAndPort("h1"),
- ourOpTime, &response, &result);
- ASSERT_EQUALS(ErrorCodes::NotSecondary, result);
- ASSERT_EQUALS("arbiters don't sync", result.reason());
-
- // Set up config for the rest of the tests
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "hself") <<
- BSON("_id" << 1 << "host" << "h1" << "arbiterOnly" << true) <<
- BSON("_id" << 2 << "host" << "h2" <<
- "priority" << 0 << "buildIndexes" << false) <<
- BSON("_id" << 3 << "host" << "h3") <<
- BSON("_id" << 4 << "host" << "h4") <<
- BSON("_id" << 5 << "host" << "h5") <<
- BSON("_id" << 6 << "host" << "h6"))),
- 0);
-
- // Try to sync while PRIMARY
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary();
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- getTopoCoord()._setCurrentPrimaryForTest(0);
- BSONObjBuilder response1;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h3"), ourOpTime, &response1, &result);
- ASSERT_EQUALS(ErrorCodes::NotSecondary, result);
- ASSERT_EQUALS("primaries don't sync", result.reason());
- ASSERT_EQUALS("h3:27017", response1.obj()["syncFromRequested"].String());
-
- // Try to sync from non-existent member
- setSelfMemberState(MemberState::RS_SECONDARY);
- getTopoCoord()._setCurrentPrimaryForTest(-1);
- BSONObjBuilder response2;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("fakemember"), ourOpTime, &response2, &result);
- ASSERT_EQUALS(ErrorCodes::NodeNotFound, result);
- ASSERT_EQUALS("Could not find member \"fakemember:27017\" in replica set", result.reason());
-
- // Try to sync from self
- BSONObjBuilder response3;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("hself"), ourOpTime, &response3, &result);
- ASSERT_EQUALS(ErrorCodes::InvalidOptions, result);
- ASSERT_EQUALS("I cannot sync from myself", result.reason());
-
- // Try to sync from an arbiter
- BSONObjBuilder response4;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h1"), ourOpTime, &response4, &result);
- ASSERT_EQUALS(ErrorCodes::InvalidOptions, result);
- ASSERT_EQUALS("Cannot sync from \"h1:27017\" because it is an arbiter", result.reason());
-
- // Try to sync from a node that doesn't build indexes
- BSONObjBuilder response5;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h2"), ourOpTime, &response5, &result);
- ASSERT_EQUALS(ErrorCodes::InvalidOptions, result);
- ASSERT_EQUALS("Cannot sync from \"h2:27017\" because it does not build indexes",
- result.reason());
-
- // Try to sync from a member that is down
- receiveDownHeartbeat(HostAndPort("h4"), "rs0", OpTime());
-
- BSONObjBuilder response7;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h4"), ourOpTime, &response7, &result);
- ASSERT_EQUALS(ErrorCodes::HostUnreachable, result);
- ASSERT_EQUALS("I cannot reach the requested member: h4:27017", result.reason());
-
- // Sync successfully from a member that is stale
- heartbeatFromMember(HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY,
- staleOpTime, Milliseconds(100));
-
- BSONObjBuilder response8;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h5"), ourOpTime, &response8, &result);
- ASSERT_OK(result);
- ASSERT_EQUALS("requested member \"h5:27017\" is more than 10 seconds behind us",
- response8.obj()["warning"].String());
- getTopoCoord().chooseNewSyncSource(now()++, ourOpTime);
- ASSERT_EQUALS(HostAndPort("h5"), getTopoCoord().getSyncSourceAddress());
-
- // Sync successfully from an up-to-date member
- heartbeatFromMember(HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY,
- ourOpTime, Milliseconds(100));
-
- BSONObjBuilder response9;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h6"), ourOpTime, &response9, &result);
- ASSERT_OK(result);
- BSONObj response9Obj = response9.obj();
- ASSERT_FALSE(response9Obj.hasField("warning"));
- ASSERT_EQUALS(HostAndPort("h5").toString(), response9Obj["prevSyncTarget"].String());
- getTopoCoord().chooseNewSyncSource(now()++, ourOpTime);
- ASSERT_EQUALS(HostAndPort("h6"), getTopoCoord().getSyncSourceAddress());
-
- // node goes down between forceSync and chooseNewSyncSource
- BSONObjBuilder response10;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h6"), ourOpTime, &response10, &result);
- BSONObj response10Obj = response10.obj();
- ASSERT_FALSE(response10Obj.hasField("warning"));
- ASSERT_EQUALS(HostAndPort("h6").toString(), response10Obj["prevSyncTarget"].String());
- receiveDownHeartbeat(HostAndPort("h6"), "rs0", OpTime());
- HostAndPort syncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h6"), syncSource);
-
- // Try to sync from a member that is unauth'd
- receiveDownHeartbeat(HostAndPort("h5"), "rs0", OpTime(), ErrorCodes::Unauthorized);
-
- BSONObjBuilder response11;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h5"), ourOpTime, &response11, &result);
- ASSERT_NOT_OK(result);
- ASSERT_EQUALS(ErrorCodes::Unauthorized, result.code());
- ASSERT_EQUALS("not authorized to communicate with h5:27017",
- result.reason());
-
- // Sync successfully from an up-to-date member.
- heartbeatFromMember(HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY,
- ourOpTime, Milliseconds(100));
- BSONObjBuilder response12;
- getTopoCoord().prepareSyncFromResponse(
- cbData(), HostAndPort("h6"), ourOpTime, &response12, &result);
- ASSERT_OK(result);
- syncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
- ASSERT_EQUALS(HostAndPort("h6"), syncSource);
+ int64_t countLogLinesContaining(const std::string& needle) {
+ return std::count_if(getCapturedLogMessages().begin(),
+ getCapturedLogMessages().end(),
+ stdx::bind(stringContains, stdx::placeholders::_1, needle));
}
- TEST_F(TopoCoordTest, ReplSetGetStatus) {
- // This test starts by configuring a TopologyCoordinator as a member of a 4 node replica
- // set, with each node in a different state.
- // The first node is DOWN, as if we tried heartbeating them and it failed in some way.
- // The second node is in state SECONDARY, as if we've received a valid heartbeat from them.
- // The third node is in state UNKNOWN, as if we've not yet had any heartbeating activity
- // with them yet. The fourth node is PRIMARY and corresponds to ourself, which gets its
- // information for replSetGetStatus from a different source than the nodes that aren't
- // ourself. After this setup, we call prepareStatusResponse and make sure that the fields
- // returned for each member match our expectations.
- Date_t startupTime(100);
- Date_t heartbeatTime = 5000;
- Seconds uptimeSecs(10);
- Date_t curTime = heartbeatTime + uptimeSecs.total_milliseconds();
- OpTime electionTime(1, 2);
- OpTime oplogProgress(3, 4);
- std::string setName = "mySet";
-
- updateConfig(BSON("_id" << setName <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test0:1234") <<
- BSON("_id" << 1 << "host" << "test1:1234") <<
- BSON("_id" << 2 << "host" << "test2:1234") <<
- BSON("_id" << 3 << "host" << "test3:1234"))),
- 3,
- startupTime + 1);
-
- // Now that the replica set is setup, put the members into the states we want them in.
- HostAndPort member = HostAndPort("test0:1234");
- StatusWith<ReplSetHeartbeatResponse> hbResponse =
- StatusWith<ReplSetHeartbeatResponse>(Status(ErrorCodes::HostUnreachable, ""));
-
- getTopoCoord().prepareHeartbeatRequest(startupTime + 2, setName, member);
- Date_t timeoutTime = startupTime + 2 +
- ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod.total_milliseconds();
- getTopoCoord().processHeartbeatResponse(timeoutTime,
- Milliseconds(5000),
- member,
- hbResponse,
- OpTime(0,0));
-
- member = HostAndPort("test1:1234");
- ReplSetHeartbeatResponse hb;
- hb.setVersion(1);
- hb.setState(MemberState::RS_SECONDARY);
- hb.setElectionTime(electionTime);
- hb.setHbMsg("READY");
- hb.setOpTime(oplogProgress);
- hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
- getTopoCoord().prepareHeartbeatRequest(startupTime + 2,
- setName,
- member);
- getTopoCoord().processHeartbeatResponse(heartbeatTime,
- Milliseconds(4000),
- member,
- hbResponse,
- OpTime(0,0));
- makeSelfPrimary();
-
- // Now node 0 is down, node 1 is up, and for node 2 we have no heartbeat data yet.
- BSONObjBuilder statusBuilder;
- Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
- getTopoCoord().prepareStatusResponse(cbData(),
- curTime,
- uptimeSecs.total_seconds(),
- oplogProgress,
- &statusBuilder,
- &resultStatus);
- ASSERT_OK(resultStatus);
- BSONObj rsStatus = statusBuilder.obj();
-
- // Test results for all non-self members
- ASSERT_EQUALS(setName, rsStatus["set"].String());
- ASSERT_EQUALS(curTime.asInt64(), rsStatus["date"].Date().asInt64());
- std::vector<BSONElement> memberArray = rsStatus["members"].Array();
- ASSERT_EQUALS(4U, memberArray.size());
- BSONObj member0Status = memberArray[0].Obj();
- BSONObj member1Status = memberArray[1].Obj();
- BSONObj member2Status = memberArray[2].Obj();
-
- // Test member 0, the node that's DOWN
- ASSERT_EQUALS(0, member0Status["_id"].numberInt());
- ASSERT_EQUALS("test0:1234", member0Status["name"].str());
- ASSERT_EQUALS(0, member0Status["health"].numberDouble());
- ASSERT_EQUALS(MemberState::RS_DOWN, member0Status["state"].numberInt());
- ASSERT_EQUALS("(not reachable/healthy)", member0Status["stateStr"].str());
- ASSERT_EQUALS(0, member0Status["uptime"].numberInt());
- ASSERT_EQUALS(OpTime(), OpTime(member0Status["optime"].timestampValue()));
- ASSERT_TRUE(member0Status.hasField("optimeDate"));
- ASSERT_EQUALS(Date_t(OpTime().getSecs() * 1000ULL),
- member0Status["optimeDate"].Date().millis);
- ASSERT_EQUALS(timeoutTime, member0Status["lastHeartbeat"].date());
- ASSERT_EQUALS(Date_t(), member0Status["lastHeartbeatRecv"].date());
-
- // Test member 1, the node that's SECONDARY
- ASSERT_EQUALS(1, member1Status["_id"].Int());
- ASSERT_EQUALS("test1:1234", member1Status["name"].String());
- ASSERT_EQUALS(1, member1Status["health"].Double());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, member1Status["state"].numberInt());
- ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
- member1Status["stateStr"].String());
- ASSERT_EQUALS(uptimeSecs.total_seconds(), member1Status["uptime"].numberInt());
- ASSERT_EQUALS(oplogProgress, OpTime(member1Status["optime"].timestampValue()));
- ASSERT_TRUE(member1Status.hasField("optimeDate"));
- ASSERT_EQUALS(Date_t(oplogProgress.getSecs() * 1000ULL),
- member1Status["optimeDate"].Date().millis);
- ASSERT_EQUALS(heartbeatTime, member1Status["lastHeartbeat"].date());
- ASSERT_EQUALS(Date_t(), member1Status["lastHeartbeatRecv"].date());
- ASSERT_EQUALS("READY", member1Status["lastHeartbeatMessage"].str());
-
- // Test member 2, the node that's UNKNOWN
- ASSERT_EQUALS(2, member2Status["_id"].numberInt());
- ASSERT_EQUALS("test2:1234", member2Status["name"].str());
- ASSERT_EQUALS(-1, member2Status["health"].numberDouble());
- ASSERT_EQUALS(MemberState::RS_UNKNOWN, member2Status["state"].numberInt());
- ASSERT_EQUALS(MemberState(MemberState::RS_UNKNOWN).toString(),
- member2Status["stateStr"].str());
- ASSERT_TRUE(member2Status.hasField("uptime"));
- ASSERT_TRUE(member2Status.hasField("optime"));
- ASSERT_TRUE(member2Status.hasField("optimeDate"));
- ASSERT_FALSE(member2Status.hasField("lastHearbeat"));
- ASSERT_FALSE(member2Status.hasField("lastHearbeatRecv"));
-
- // Now test results for ourself, the PRIMARY
- ASSERT_EQUALS(MemberState::RS_PRIMARY, rsStatus["myState"].numberInt());
- BSONObj selfStatus = memberArray[3].Obj();
- ASSERT_TRUE(selfStatus["self"].boolean());
- ASSERT_EQUALS(3, selfStatus["_id"].numberInt());
- ASSERT_EQUALS("test3:1234", selfStatus["name"].str());
- ASSERT_EQUALS(1, selfStatus["health"].numberDouble());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, selfStatus["state"].numberInt());
- ASSERT_EQUALS(MemberState(MemberState::RS_PRIMARY).toString(),
- selfStatus["stateStr"].str());
- ASSERT_EQUALS(uptimeSecs.total_seconds(), selfStatus["uptime"].numberInt());
- ASSERT_EQUALS(oplogProgress, OpTime(selfStatus["optime"].timestampValue()));
- ASSERT_TRUE(selfStatus.hasField("optimeDate"));
- ASSERT_EQUALS(Date_t(oplogProgress.getSecs() * 1000ULL),
- selfStatus["optimeDate"].Date().millis);
-
- // TODO(spencer): Test electionTime and pingMs are set properly
+ void makeSelfPrimary(const OpTime& electionOpTime = OpTime(0, 0)) {
+ getTopoCoord().changeMemberState_forTest(MemberState::RS_PRIMARY, electionOpTime);
+ getTopoCoord()._setCurrentPrimaryForTest(_selfIndex);
}
- TEST_F(TopoCoordTest, ReplSetGetStatusFails) {
- // This test starts by configuring a TopologyCoordinator to NOT be a member of a 3 node
- // replica set. Then running prepareStatusResponse should fail.
- Date_t startupTime(100);
- Date_t heartbeatTime = 5000;
- Seconds uptimeSecs(10);
- Date_t curTime = heartbeatTime + uptimeSecs.total_milliseconds();
- OpTime oplogProgress(3, 4);
- std::string setName = "mySet";
-
- updateConfig(BSON("_id" << setName <<
- "version" << 1 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "test0:1234") <<
- BSON("_id" << 1 << "host" << "test1:1234") <<
- BSON("_id" << 2 << "host" << "test2:1234"))),
- -1, // This one is not part of the replica set.
- startupTime + 1);
-
- BSONObjBuilder statusBuilder;
- Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
- getTopoCoord().prepareStatusResponse(cbData(),
- curTime,
- uptimeSecs.total_seconds(),
- oplogProgress,
- &statusBuilder,
- &resultStatus);
- ASSERT_NOT_OK(resultStatus);
- ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, resultStatus);
+ void setSelfMemberState(const MemberState& newState) {
+ getTopoCoord().changeMemberState_forTest(newState);
}
- TEST_F(TopoCoordTest, PrepareFreshResponse) {
- ReplicationCoordinator::ReplSetFreshArgs args;
- OpTime freshestOpTime(15, 10);
- OpTime ourOpTime(10, 10);
- OpTime staleOpTime(1, 1);
- Status internalErrorStatus(ErrorCodes::InternalError, "didn't set status");
-
- // if we do not have an index in the config, we should get ErrorCodes::ReplicaSetNotFound
- BSONObjBuilder responseBuilder;
- Status status = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status);
- ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status);
- ASSERT_EQUALS("Cannot participate in elections because not initialized", status.reason());
- ASSERT_TRUE(responseBuilder.obj().isEmpty());
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 10 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 <<
- "host" << "hself" <<
- "priority" << 10) <<
- BSON("_id" << 20 << "host" << "h1") <<
- BSON("_id" << 30 << "host" << "h2") <<
- BSON("_id" << 40 <<
- "host" << "h3" <<
- "priority" << 10))),
- 0);
-
- // Test with incorrect replset name
- args.setName = "fakeset";
-
- BSONObjBuilder responseBuilder0;
- Status status0 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder0, &status0);
- ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status0);
- ASSERT_TRUE(responseBuilder0.obj().isEmpty());
-
- heartbeatFromMember(HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
-
- // Test with old config version
- args.setName = "rs0";
- args.cfgver = 5;
- args.id = 20;
- args.who = HostAndPort("h1");
- args.opTime = ourOpTime;
-
- BSONObjBuilder responseBuilder1;
- Status status1 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder1, &status1);
- ASSERT_OK(status1);
- BSONObj response1 = responseBuilder1.obj();
- ASSERT_EQUALS("config version stale", response1["info"].String());
- ASSERT_EQUALS(ourOpTime, OpTime(response1["opTime"].timestampValue()));
- ASSERT_TRUE(response1["fresher"].Bool());
- ASSERT_FALSE(response1["veto"].Bool());
- ASSERT_FALSE(response1.hasField("errmsg"));
-
- // Test with non-existent node.
- args.cfgver = 10;
- args.id = 0;
- args.who = HostAndPort("fakenode");
-
- BSONObjBuilder responseBuilder2;
- Status status2 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder2, &status2);
- ASSERT_OK(status2);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(ourOpTime, OpTime(response2["opTime"].timestampValue()));
- ASSERT_FALSE(response2["fresher"].Bool());
- ASSERT_TRUE(response2["veto"].Bool());
- ASSERT_EQUALS("replSet couldn't find member with id 0", response2["errmsg"].String());
-
-
- // Test when we are primary.
- args.id = 20;
- args.who = HostAndPort("h1");
-
- makeSelfPrimary();
-
- BSONObjBuilder responseBuilder3;
- Status status3 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder3, &status3);
- ASSERT_OK(status3);
- BSONObj response3 = responseBuilder3.obj();
- ASSERT_FALSE(response3.hasField("info"));
- ASSERT_EQUALS(ourOpTime, OpTime(response3["opTime"].timestampValue()));
- ASSERT_FALSE(response3["fresher"].Bool());
- ASSERT_TRUE(response3["veto"].Bool());
- ASSERT_EQUALS("I am already primary, h1:27017 can try again once I've stepped down",
- response3["errmsg"].String());
-
-
- // Test when someone else is primary.
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
- setSelfMemberState(MemberState::RS_SECONDARY);
- getTopoCoord()._setCurrentPrimaryForTest(2);
-
- BSONObjBuilder responseBuilder4;
- Status status4 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder4, &status4);
- ASSERT_OK(status4);
- BSONObj response4 = responseBuilder4.obj();
- ASSERT_FALSE(response4.hasField("info"));
- ASSERT_EQUALS(ourOpTime, OpTime(response4["opTime"].timestampValue()));
- ASSERT_FALSE(response4["fresher"].Bool());
- ASSERT_TRUE(response4["veto"].Bool());
- ASSERT_EQUALS(
- "h1:27017 is trying to elect itself but h2:27017 is already primary and more "
- "up-to-date",
- response4["errmsg"].String());
-
-
- // Test trying to elect a node that is caught up but isn't the highest priority node.
- heartbeatFromMember(HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, staleOpTime);
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
-
- BSONObjBuilder responseBuilder5;
- Status status5 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder5, &status5);
- ASSERT_OK(status5);
- BSONObj response5 = responseBuilder5.obj();
- ASSERT_FALSE(response5.hasField("info"));
- ASSERT_EQUALS(ourOpTime, OpTime(response5["opTime"].timestampValue()));
- ASSERT_FALSE(response5["fresher"].Bool());
- ASSERT_TRUE(response5["veto"].Bool());
- ASSERT(response5["errmsg"].String().find("h1:27017 has lower priority of 1 than") !=
- std::string::npos) << response5["errmsg"].String();
-
- // Test trying to elect a node that isn't electable because its down
- args.id = 40;
- args.who = HostAndPort("h3");
-
- receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
-
- BSONObjBuilder responseBuilder6;
- Status status6 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder6, &status6);
- ASSERT_OK(status6);
- BSONObj response6 = responseBuilder6.obj();
- ASSERT_FALSE(response6.hasField("info"));
- ASSERT_EQUALS(ourOpTime, OpTime(response6["opTime"].timestampValue()));
- ASSERT_FALSE(response6["fresher"].Bool());
- ASSERT_TRUE(response6["veto"].Bool());
- ASSERT_NE(std::string::npos, response6["errmsg"].String().find(
- "I don't think h3:27017 is electable because the member is not "
- "currently a secondary")) << response6["errmsg"].String();
-
- // Test trying to elect a node that isn't electable because it's PRIMARY
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_PRIMARY, ourOpTime);
- ASSERT_EQUALS(3, getCurrentPrimaryIndex());
-
- BSONObjBuilder responseBuilder7;
- Status status7 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder7, &status7);
- ASSERT_OK(status7);
- BSONObj response7 = responseBuilder7.obj();
- ASSERT_FALSE(response7.hasField("info"));
- ASSERT_EQUALS(ourOpTime, OpTime(response7["opTime"].timestampValue()));
- ASSERT_FALSE(response7["fresher"].Bool());
- ASSERT_TRUE(response7["veto"].Bool());
- ASSERT_NE(std::string::npos, response7["errmsg"].String().find(
- "I don't think h3:27017 is electable because the member is not "
- "currently a secondary")) << response7["errmsg"].String();
-
- // Test trying to elect a node that isn't electable because it's STARTUP
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_STARTUP, ourOpTime);
-
- BSONObjBuilder responseBuilder8;
- Status status8 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder8, &status8);
- ASSERT_OK(status8);
- BSONObj response8 = responseBuilder8.obj();
- ASSERT_FALSE(response8.hasField("info"));
- ASSERT_EQUALS(ourOpTime, OpTime(response8["opTime"].timestampValue()));
- ASSERT_FALSE(response8["fresher"].Bool());
- ASSERT_TRUE(response8["veto"].Bool());
- ASSERT_NE(std::string::npos, response8["errmsg"].String().find(
- "I don't think h3:27017 is electable because the member is not "
- "currently a secondary")) << response8["errmsg"].String();
-
- // Test trying to elect a node that isn't electable because it's RECOVERING
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_RECOVERING, ourOpTime);
-
- BSONObjBuilder responseBuilder9;
- Status status9 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder9, &status9);
- ASSERT_OK(status9);
- BSONObj response9 = responseBuilder9.obj();
- ASSERT_FALSE(response9.hasField("info"));
- ASSERT_EQUALS(ourOpTime, OpTime(response9["opTime"].timestampValue()));
- ASSERT_FALSE(response9["fresher"].Bool());
- ASSERT_TRUE(response9["veto"].Bool());
- ASSERT_NE(std::string::npos, response9["errmsg"].String().find(
- "I don't think h3:27017 is electable because the member is not "
- "currently a secondary")) << response9["errmsg"].String();
-
- // Test trying to elect a node that is fresher but lower priority than the existing primary
- args.id = 30;
- args.who = HostAndPort("h2");
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_PRIMARY, ourOpTime);
- ASSERT_EQUALS(3, getCurrentPrimaryIndex());
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, freshestOpTime);
-
- BSONObjBuilder responseBuilder10;
- Status status10 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder10, &status10);
- ASSERT_OK(status10);
- BSONObj response10 = responseBuilder10.obj();
- ASSERT_FALSE(response10.hasField("info"));
- ASSERT_EQUALS(ourOpTime, OpTime(response10["opTime"].timestampValue()));
- ASSERT_TRUE(response10["fresher"].Bool());
- ASSERT_TRUE(response10["veto"].Bool());
- ASSERT_TRUE(response10.hasField("errmsg"));
-
-
- // Test trying to elect a valid node
- args.id = 40;
- args.who = HostAndPort("h3");
-
- receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime());
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
-
- BSONObjBuilder responseBuilder11;
- Status status11 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(
- args, Date_t(), ourOpTime, &responseBuilder11, &status11);
- ASSERT_OK(status11);
- BSONObj response11 = responseBuilder11.obj();
- ASSERT_FALSE(response11.hasField("info")) << response11.toString();
- ASSERT_EQUALS(ourOpTime, OpTime(response11["opTime"].timestampValue()));
- ASSERT_FALSE(response11["fresher"].Bool()) << response11.toString();
- ASSERT_FALSE(response11["veto"].Bool()) << response11.toString();
- ASSERT_FALSE(response11.hasField("errmsg")) << response11.toString();
-
- // Test with our id
- args.id = 10;
- BSONObjBuilder responseBuilder12;
- Status status12 = internalErrorStatus;
- getTopoCoord().prepareFreshResponse(
- args, Date_t(), ourOpTime, &responseBuilder12, &status12);
- ASSERT_EQUALS(ErrorCodes::BadValue, status12);
- ASSERT_EQUALS(
- "Received replSetFresh command from member with the same member ID as ourself: 10",
- status12.reason());
- ASSERT_TRUE(responseBuilder12.obj().isEmpty());
-
+ int getCurrentPrimaryIndex() {
+ return getTopoCoord().getCurrentPrimaryIndex();
}
-
- class HeartbeatResponseTest : public TopoCoordTest {
- public:
-
- virtual void setUp() {
- TopoCoordTest::setUp();
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)),
- 0);
- }
-
- };
-
- class HeartbeatResponseTestOneRetry : public HeartbeatResponseTest {
- public:
- virtual void setUp() {
- HeartbeatResponseTest::setUp();
-
- // Bring up the node we are heartbeating.
- _target = HostAndPort("host2", 27017);
- Date_t _upRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T12:55Z"));
- std::pair<ReplSetHeartbeatArgs, Milliseconds> uppingRequest =
- getTopoCoord().prepareHeartbeatRequest(_upRequestDate,
- "rs0",
- _target);
- HeartbeatResponseAction upAction =
- getTopoCoord().processHeartbeatResponse(
- _upRequestDate,
- Milliseconds(0),
- _target,
- StatusWith<ReplSetHeartbeatResponse>(Status::OK()),
- OpTime(0, 0)); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, upAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
-
-
- // Time of first request for this heartbeat period
- _firstRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T13:00Z"));
-
- // Initial heartbeat attempt prepared, at t + 0.
- std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(_firstRequestDate,
- "rs0",
- _target);
- // 5 seconds to successfully complete the heartbeat before the timeout expires.
- ASSERT_EQUALS(5000, request.second.total_milliseconds());
-
- // Initial heartbeat request fails at t + 4000ms
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- _firstRequestDate + 4000, // 4 seconds elapsed, retry allowed.
- Milliseconds(3990), // Spent 3.99 of the 4 seconds in the network.
- _target,
- StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit,
- "Took too long"),
- OpTime(0, 0)); // We've never applied anything.
-
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- // Because the heartbeat failed without timing out, we expect to retry immediately.
- ASSERT_EQUALS(Date_t(_firstRequestDate + 4000), action.getNextHeartbeatStartDate());
-
- // First heartbeat retry prepared, at t + 4000ms.
- request =
- getTopoCoord().prepareHeartbeatRequest(
- _firstRequestDate + 4000,
- "rs0",
- _target);
- // One second left to complete the heartbeat.
- ASSERT_EQUALS(1000, request.second.total_milliseconds());
-
- // Ensure a single failed heartbeat did not cause the node to be marked down
- BSONObjBuilder statusBuilder;
- Status resultStatus(ErrorCodes::InternalError,
- "prepareStatusResponse didn't set result");
- getTopoCoord().prepareStatusResponse(cbData(),
- _firstRequestDate + 4000,
- 10,
- OpTime(100,0),
- &statusBuilder,
- &resultStatus);
- ASSERT_OK(resultStatus);
- BSONObj rsStatus = statusBuilder.obj();
- std::vector<BSONElement> memberArray = rsStatus["members"].Array();
- BSONObj member1Status = memberArray[1].Obj();
-
- ASSERT_EQUALS(1, member1Status["_id"].Int());
- ASSERT_EQUALS(1, member1Status["health"].Double());
-
- }
-
- Date_t firstRequestDate() {
- return _firstRequestDate;
+ // Update config and set selfIndex
+ // If "now" is passed in, set _now to now+1
+ void updateConfig(BSONObj cfg,
+ int selfIndex,
+ Date_t now = Date_t(-1),
+ OpTime lastOp = OpTime()) {
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(cfg));
+ ASSERT_OK(config.validate());
+
+ _selfIndex = selfIndex;
+
+ if (now == Date_t(-1)) {
+ getTopoCoord().updateConfig(config, selfIndex, _now++, lastOp);
+ } else {
+ invariant(now > _now);
+ getTopoCoord().updateConfig(config, selfIndex, now, lastOp);
+ _now = now + 1;
}
-
- HostAndPort target() {
- return _target;
- }
-
- private:
- Date_t _firstRequestDate;
- HostAndPort _target;
-
- };
-
- class HeartbeatResponseTestTwoRetries : public HeartbeatResponseTestOneRetry {
- public:
- virtual void setUp() {
- HeartbeatResponseTestOneRetry::setUp();
- // First retry fails at t + 4500ms
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + 4500, // 4.5 of the 5 seconds elapsed; could retry.
- Milliseconds(400), // Spent 0.4 of the 0.5 seconds in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"),
- OpTime(0, 0)); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- // Because the first retry failed without timing out, we expect to retry immediately.
- ASSERT_EQUALS(Date_t(firstRequestDate() + 4500), action.getNextHeartbeatStartDate());
-
- // Second retry prepared at t + 4500ms.
- std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(
- firstRequestDate() + 4500,
- "rs0",
- target());
- // 500ms left to complete the heartbeat.
- ASSERT_EQUALS(500, request.second.total_milliseconds());
-
- // Ensure a second failed heartbeat did not cause the node to be marked down
- BSONObjBuilder statusBuilder;
- Status resultStatus(ErrorCodes::InternalError,
- "prepareStatusResponse didn't set result");
- getTopoCoord().prepareStatusResponse(cbData(),
- firstRequestDate() + 4000,
- 10,
- OpTime(100,0),
- &statusBuilder,
- &resultStatus);
- ASSERT_OK(resultStatus);
- BSONObj rsStatus = statusBuilder.obj();
- std::vector<BSONElement> memberArray = rsStatus["members"].Array();
- BSONObj member1Status = memberArray[1].Obj();
-
- ASSERT_EQUALS(1, member1Status["_id"].Int());
- ASSERT_EQUALS(1, member1Status["health"].Double());
- }
- };
-
- class HeartbeatResponseHighVerbosityTest : public HeartbeatResponseTest {
- public:
-
- virtual void setUp() {
- HeartbeatResponseTest::setUp();
- // set verbosity as high as the highest verbosity log message we'd like to check for
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- }
-
- virtual void tearDown() {
- HeartbeatResponseTest::tearDown();
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
- }
-
- };
-
- TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataNodeBelivesWeAreDown) {
- OpTime lastOpTimeApplied = OpTime(3,0);
-
- // request heartbeat
- std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2"));
-
- ReplSetHeartbeatResponse believesWeAreDownResponse;
- believesWeAreDownResponse.noteReplSet();
- believesWeAreDownResponse.setSetName("rs0");
- believesWeAreDownResponse.setState(MemberState::RS_SECONDARY);
- believesWeAreDownResponse.setElectable(true);
- believesWeAreDownResponse.noteStateDisagreement();
- startCapturingLogMessages();
- HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
- now()++, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- HostAndPort("host2"),
- StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse),
- lastOpTimeApplied);
- stopCapturingLogMessages();
- ASSERT_NO_ACTION(action.getAction());
- ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down"));
-
}
- TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataMemberNotInConfig) {
- OpTime lastOpTimeApplied = OpTime(3,0);
-
- // request heartbeat
- std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host5"));
-
- ReplSetHeartbeatResponse memberMissingResponse;
- memberMissingResponse.noteReplSet();
- memberMissingResponse.setSetName("rs0");
- memberMissingResponse.setState(MemberState::RS_SECONDARY);
- memberMissingResponse.setElectable(true);
- memberMissingResponse.noteStateDisagreement();
- startCapturingLogMessages();
- HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
- now()++, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- HostAndPort("host5"),
- StatusWith<ReplSetHeartbeatResponse>(memberMissingResponse),
- lastOpTimeApplied);
- stopCapturingLogMessages();
- ASSERT_NO_ACTION(action.getAction());
- ASSERT_EQUALS(1, countLogLinesContaining("Could not find host5:27017 in current config"));
+ HeartbeatResponseAction receiveUpHeartbeat(const HostAndPort& member,
+ const std::string& setName,
+ MemberState memberState,
+ OpTime electionTime,
+ OpTime lastOpTimeSender,
+ OpTime lastOpTimeReceiver) {
+ return _receiveHeartbeatHelper(Status::OK(),
+ member,
+ setName,
+ memberState,
+ electionTime,
+ lastOpTimeSender,
+ lastOpTimeReceiver,
+ Milliseconds(1));
}
- TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataSameConfig) {
- OpTime lastOpTimeApplied = OpTime(3,0);
-
- // request heartbeat
- std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2"));
-
- // construct a copy of the original config for log message checking later
- // see HeartbeatResponseTest for the origin of the original config
- ReplicaSetConfig originalConfig;
- originalConfig.initialize(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)));
-
- ReplSetHeartbeatResponse sameConfigResponse;
- sameConfigResponse.noteReplSet();
- sameConfigResponse.setSetName("rs0");
- sameConfigResponse.setState(MemberState::RS_SECONDARY);
- sameConfigResponse.setElectable(true);
- sameConfigResponse.noteStateDisagreement();
- sameConfigResponse.setVersion(2);
- sameConfigResponse.setConfig(originalConfig);
- startCapturingLogMessages();
- HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
- now()++, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- HostAndPort("host2"),
- StatusWith<ReplSetHeartbeatResponse>(sameConfigResponse),
- lastOpTimeApplied);
- stopCapturingLogMessages();
- ASSERT_NO_ACTION(action.getAction());
- ASSERT_EQUALS(1, countLogLinesContaining("Config from heartbeat response was "
- "same as ours."));
+ HeartbeatResponseAction receiveDownHeartbeat(
+ const HostAndPort& member,
+ const std::string& setName,
+ OpTime lastOpTimeReceiver,
+ ErrorCodes::Error errcode = ErrorCodes::HostUnreachable) {
+ // timed out heartbeat to mark a node as down
+
+ Milliseconds roundTripTime(
+ ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod.total_milliseconds());
+ return _receiveHeartbeatHelper(Status(errcode, ""),
+ member,
+ setName,
+ MemberState::RS_UNKNOWN,
+ OpTime(),
+ OpTime(),
+ lastOpTimeReceiver,
+ roundTripTime);
}
- TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataOldConfig) {
- OpTime lastOpTimeApplied = OpTime(3,0);
-
- // request heartbeat
- std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2"));
-
- ReplSetHeartbeatResponse believesWeAreDownResponse;
- believesWeAreDownResponse.noteReplSet();
- believesWeAreDownResponse.setSetName("rs0");
- believesWeAreDownResponse.setState(MemberState::RS_SECONDARY);
- believesWeAreDownResponse.setElectable(true);
- believesWeAreDownResponse.noteStateDisagreement();
- startCapturingLogMessages();
- HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
- now()++, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- HostAndPort("host2"),
- StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse),
- lastOpTimeApplied);
- stopCapturingLogMessages();
- ASSERT_NO_ACTION(action.getAction());
- ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down"));
-
- }
-
- TEST_F(HeartbeatResponseTestOneRetry, DecideToReconfig) {
- // Confirm that action responses can come back from retries; in this, expect a Reconfig
- // action.
- ReplicaSetConfig newConfig;
- ASSERT_OK(newConfig.initialize(
- BSON("_id" << "rs0" <<
- "version" << 7 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017") <<
- BSON("_id" << 3 << "host" << "host4:27017")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5))));
- ASSERT_OK(newConfig.validate());
-
- ReplSetHeartbeatResponse reconfigResponse;
- reconfigResponse.noteReplSet();
- reconfigResponse.setSetName("rs0");
- reconfigResponse.setState(MemberState::RS_SECONDARY);
- reconfigResponse.setElectable(true);
- reconfigResponse.setVersion(7);
- reconfigResponse.setConfig(newConfig);
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + 4500, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(reconfigResponse),
- OpTime(0, 0)); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::Reconfig, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(Date_t(firstRequestDate() + 6500), action.getNextHeartbeatStartDate());
- }
-
- TEST_F(HeartbeatResponseTestOneRetry, DecideToStepDownRemotePrimary) {
- // Confirm that action responses can come back from retries; in this, expect a
- // StepDownRemotePrimary action.
-
- // make self primary
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(OpTime(5,0));
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- ReplSetHeartbeatResponse electedMoreRecentlyResponse;
- electedMoreRecentlyResponse.noteReplSet();
- electedMoreRecentlyResponse.setSetName("rs0");
- electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
- electedMoreRecentlyResponse.setElectable(true);
- electedMoreRecentlyResponse.setElectionTime(OpTime(3,0));
- electedMoreRecentlyResponse.setVersion(5);
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + 4500, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
- OpTime(0,0)); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, action.getAction());
- ASSERT_EQUALS(1, action.getPrimaryConfigIndex());
- ASSERT_EQUALS(Date_t(firstRequestDate() + 6500), action.getNextHeartbeatStartDate());
- }
-
- TEST_F(HeartbeatResponseTestOneRetry, DecideToStepDownSelf) {
- // Confirm that action responses can come back from retries; in this, expect a StepDownSelf
- // action.
-
- // acknowledge the other member so that we see a majority
- HeartbeatResponseAction action = receiveDownHeartbeat(HostAndPort("host3"),
- "rs0",
- OpTime(100, 0));
- ASSERT_NO_ACTION(action.getAction());
-
- // make us PRIMARY
- makeSelfPrimary();
-
- ReplSetHeartbeatResponse electedMoreRecentlyResponse;
- electedMoreRecentlyResponse.noteReplSet();
- electedMoreRecentlyResponse.setSetName("rs0");
- electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
- electedMoreRecentlyResponse.setElectable(false);
- electedMoreRecentlyResponse.setElectionTime(OpTime(10,0));
- electedMoreRecentlyResponse.setVersion(5);
- action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + 4500, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
- OpTime(0, 0)); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, action.getAction());
- ASSERT_EQUALS(0, action.getPrimaryConfigIndex());
- ASSERT_EQUALS(Date_t(firstRequestDate() + 6500), action.getNextHeartbeatStartDate());
- // Doesn't actually do the stepdown until stepDownIfPending is called
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- ASSERT_TRUE(getTopoCoord().stepDownIfPending());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- }
-
- TEST_F(HeartbeatResponseTestOneRetry, DecideToStartElection) {
- // Confirm that action responses can come back from retries; in this, expect a StartElection
- // action.
-
- // acknowledge the other member so that we see a majority
- OpTime election = OpTime(400,0);
- OpTime lastOpTimeApplied = OpTime(300,0);
- HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(action.getAction());
-
- // make sure we are electable
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- ReplSetHeartbeatResponse startElectionResponse;
- startElectionResponse.noteReplSet();
- startElectionResponse.setSetName("rs0");
- startElectionResponse.setState(MemberState::RS_SECONDARY);
- startElectionResponse.setElectable(true);
- startElectionResponse.setVersion(5);
- action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + 4500, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(startElectionResponse),
- election);
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(Date_t(firstRequestDate() + 6500), action.getNextHeartbeatStartDate());
+ HeartbeatResponseAction heartbeatFromMember(const HostAndPort& member,
+ const std::string& setName,
+ MemberState memberState,
+ OpTime lastOpTimeSender,
+ Milliseconds roundTripTime = Milliseconds(1)) {
+ return _receiveHeartbeatHelper(Status::OK(),
+ member,
+ setName,
+ memberState,
+ OpTime(),
+ lastOpTimeSender,
+ OpTime(),
+ roundTripTime);
}
- TEST_F(HeartbeatResponseTestTwoRetries, HeartbeatRetriesAtMostTwice) {
- // Confirm that the topology coordinator attempts to retry a failed heartbeat two times
- // after initial failure, assuming that the heartbeat timeout (set to 5 seconds in the
- // fixture) has not expired.
- //
- // Failed heartbeats propose taking no action, other than scheduling the next heartbeat. We
- // can detect a retry vs the next regularly scheduled heartbeat because retries are
- // scheduled immediately, while subsequent heartbeats are scheduled after the hard-coded
- // heartbeat interval of 2 seconds.
-
- // Second retry fails at t + 4800ms
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + 4800, // 4.8 of the 5 seconds elapsed; could still retry.
- Milliseconds(100), // Spent 0.1 of the 0.3 seconds in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"),
- OpTime(0, 0)); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- // Because this is the second retry, rather than retry again, we expect to wait for the
- // heartbeat interval of 2 seconds to elapse.
- ASSERT_EQUALS(Date_t(firstRequestDate() + 6800), action.getNextHeartbeatStartDate());
-
- // Ensure a third failed heartbeat caused the node to be marked down
- BSONObjBuilder statusBuilder;
- Status resultStatus(ErrorCodes::InternalError,
- "prepareStatusResponse didn't set result");
- getTopoCoord().prepareStatusResponse(cbData(),
- firstRequestDate() + 4900,
- 10,
- OpTime(100,0),
- &statusBuilder,
- &resultStatus);
- ASSERT_OK(resultStatus);
- BSONObj rsStatus = statusBuilder.obj();
- std::vector<BSONElement> memberArray = rsStatus["members"].Array();
- BSONObj member1Status = memberArray[1].Obj();
-
- ASSERT_EQUALS(1, member1Status["_id"].Int());
- ASSERT_EQUALS(0, member1Status["health"].Double());
+private:
+ HeartbeatResponseAction _receiveHeartbeatHelper(Status responseStatus,
+ const HostAndPort& member,
+ const std::string& setName,
+ MemberState memberState,
+ OpTime electionTime,
+ OpTime lastOpTimeSender,
+ OpTime lastOpTimeReceiver,
+ Milliseconds roundTripTime) {
+ StatusWith<ReplSetHeartbeatResponse> hbResponse =
+ StatusWith<ReplSetHeartbeatResponse>(responseStatus);
+
+ if (responseStatus.isOK()) {
+ ReplSetHeartbeatResponse hb;
+ hb.setVersion(1);
+ hb.setState(memberState);
+ hb.setOpTime(lastOpTimeSender);
+ hb.setElectionTime(electionTime);
+ hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
+ }
+ getTopoCoord().prepareHeartbeatRequest(now(), setName, member);
+ now() += roundTripTime.total_milliseconds();
+ return getTopoCoord().processHeartbeatResponse(
+ now(), roundTripTime, member, hbResponse, lastOpTimeReceiver);
}
- TEST_F(HeartbeatResponseTestTwoRetries, DecideToStepDownRemotePrimary) {
- // Confirm that action responses can come back from retries; in this, expect a
- // StepDownRemotePrimary action.
-
- // make self primary
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(OpTime(5,0));
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- ReplSetHeartbeatResponse electedMoreRecentlyResponse;
- electedMoreRecentlyResponse.noteReplSet();
- electedMoreRecentlyResponse.setSetName("rs0");
- electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
- electedMoreRecentlyResponse.setElectable(true);
- electedMoreRecentlyResponse.setElectionTime(OpTime(3,0));
- electedMoreRecentlyResponse.setVersion(5);
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + 5000, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
- OpTime(0,0)); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, action.getAction());
- ASSERT_EQUALS(1, action.getPrimaryConfigIndex());
- ASSERT_EQUALS(Date_t(firstRequestDate() + 7000), action.getNextHeartbeatStartDate());
+private:
+ scoped_ptr<TopologyCoordinatorImpl> _topo;
+ scoped_ptr<ReplicationExecutor::CallbackData> _cbData;
+ Date_t _now;
+ int _selfIndex;
+};
+
+TEST_F(TopoCoordTest, ChooseSyncSourceBasic) {
+ // if we do not have an index in the config, we should get an empty syncsource
+ HostAndPort newSyncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_TRUE(newSyncSource.empty());
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ // member h2 is the furthest ahead
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0));
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(0, 0));
+
+ // We start with no sync source
+ ASSERT(getTopoCoord().getSyncSourceAddress().empty());
+
+ // Fail due to insufficient number of pings
+ newSyncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(getTopoCoord().getSyncSourceAddress(), newSyncSource);
+ ASSERT(getTopoCoord().getSyncSourceAddress().empty());
+
+ // Record 2nd round of pings to allow choosing a new sync source; all members equidistant
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0));
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(0, 0));
+
+ // Should choose h2, since it is furthest ahead
+ newSyncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(getTopoCoord().getSyncSourceAddress(), newSyncSource);
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ // h3 becomes further ahead, so it should be chosen
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2, 0));
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+
+ // h3 becomes an invalid candidate for sync source; should choose h2 again
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_RECOVERING, OpTime(2, 0));
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ // h3 back in SECONDARY and ahead
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2, 0));
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+
+ // h3 goes down
+ receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ // h3 back up and ahead
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2, 0));
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+}
+
+TEST_F(TopoCoordTest, ChooseSyncSourceCandidates) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "hself")
+ << BSON("_id" << 10 << "host"
+ << "h1")
+ << BSON("_id" << 20 << "host"
+ << "h2"
+ << "buildIndexes" << false << "priority" << 0)
+ << BSON("_id" << 30 << "host"
+ << "h3"
+ << "hidden" << true << "priority" << 0 << "votes"
+ << 0) << BSON("_id" << 40 << "host"
+ << "h4"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 50 << "host"
+ << "h5"
+ << "slaveDelay" << 1 << "priority" << 0)
+ << BSON("_id" << 60 << "host"
+ << "h6") << BSON("_id" << 70 << "host"
+ << "hprimary"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ OpTime lastOpTimeWeApplied = OpTime(100, 0);
+
+ heartbeatFromMember(
+ HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY, OpTime(501, 0), Milliseconds(700));
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(501, 0), Milliseconds(600));
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(501, 0), Milliseconds(500));
+ heartbeatFromMember(
+ HostAndPort("h4"), "rs0", MemberState::RS_SECONDARY, OpTime(501, 0), Milliseconds(400));
+ heartbeatFromMember(
+ HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY, OpTime(501, 0), Milliseconds(300));
+
+ // This node is lagged further than maxSyncSourceLagSeconds.
+ heartbeatFromMember(
+ HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY, OpTime(499, 0), Milliseconds(200));
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ heartbeatFromMember(
+ HostAndPort("hprimary"), "rs0", MemberState::RS_PRIMARY, OpTime(600, 0), Milliseconds(100));
+ ASSERT_EQUALS(7, getCurrentPrimaryIndex());
+
+ // Record 2nd round of pings to allow choosing a new sync source
+ heartbeatFromMember(
+ HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY, OpTime(501, 0), Milliseconds(700));
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(501, 0), Milliseconds(600));
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(501, 0), Milliseconds(500));
+ heartbeatFromMember(
+ HostAndPort("h4"), "rs0", MemberState::RS_SECONDARY, OpTime(501, 0), Milliseconds(400));
+ heartbeatFromMember(
+ HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY, OpTime(501, 0), Milliseconds(300));
+ heartbeatFromMember(
+ HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY, OpTime(499, 0), Milliseconds(200));
+ heartbeatFromMember(
+ HostAndPort("hprimary"), "rs0", MemberState::RS_PRIMARY, OpTime(600, 0), Milliseconds(100));
+
+ // Should choose primary first; it's closest
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT_EQUALS(HostAndPort("hprimary"), getTopoCoord().getSyncSourceAddress());
+
+ // Primary goes far far away
+ heartbeatFromMember(HostAndPort("hprimary"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ OpTime(600, 0),
+ Milliseconds(100000000));
+
+ // Should choose h4. (if an arbiter has an oplog, it's a valid sync source)
+ // h6 is not considered because it is outside the maxSyncLagSeconds window,
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT_EQUALS(HostAndPort("h4"), getTopoCoord().getSyncSourceAddress());
+
+ // h4 goes down; should choose h1
+ receiveDownHeartbeat(HostAndPort("h4"), "rs0", OpTime());
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT_EQUALS(HostAndPort("h1"), getTopoCoord().getSyncSourceAddress());
+
+ // Primary and h1 go down; should choose h6
+ receiveDownHeartbeat(HostAndPort("h1"), "rs0", OpTime());
+ receiveDownHeartbeat(HostAndPort("hprimary"), "rs0", OpTime());
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT_EQUALS(HostAndPort("h6"), getTopoCoord().getSyncSourceAddress());
+
+ // h6 goes down; should choose h5
+ receiveDownHeartbeat(HostAndPort("h6"), "rs0", OpTime());
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT_EQUALS(HostAndPort("h5"), getTopoCoord().getSyncSourceAddress());
+
+ // h5 goes down; should choose h3
+ receiveDownHeartbeat(HostAndPort("h5"), "rs0", OpTime());
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+
+ // h3 goes down; no sync source candidates remain
+ receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
+ getTopoCoord().chooseNewSyncSource(now()++, lastOpTimeWeApplied);
+ ASSERT(getTopoCoord().getSyncSourceAddress().empty());
+}
+
+
+TEST_F(TopoCoordTest, ChooseSyncSourceChainingNotAllowed) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "settings" << BSON("chainingAllowed" << false)
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0), Milliseconds(100));
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0), Milliseconds(100));
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(0, 0), Milliseconds(300));
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(0, 0), Milliseconds(300));
+
+ // No primary situation: should choose no sync source.
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT(getTopoCoord().getSyncSourceAddress().empty());
+
+ // Add primary
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_PRIMARY, OpTime(0, 0), Milliseconds(300));
+ ASSERT_EQUALS(2, getCurrentPrimaryIndex());
+
+ // h3 is primary and should be chosen as sync source, despite being further away than h2
+ // and the primary (h3) being behind our most recently applied optime
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(10, 0));
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+}
+
+TEST_F(TopoCoordTest, EmptySyncSourceOnPrimary) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0), Milliseconds(100));
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0), Milliseconds(100));
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(0, 0), Milliseconds(300));
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(0, 0), Milliseconds(300));
+
+ // No primary situation: should choose h2 sync source.
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ // Become primary
+ makeSelfPrimary(OpTime(3.0));
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ // Check sync source
+ ASSERT_EQUALS(HostAndPort(), getTopoCoord().getSyncSourceAddress());
+}
+
+TEST_F(TopoCoordTest, ForceSyncSource) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ // two rounds of heartbeat pings from each member
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0), Milliseconds(300));
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0), Milliseconds(300));
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2, 0), Milliseconds(100));
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2, 0), Milliseconds(100));
+
+ // force should overrule other defaults
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+ getTopoCoord().setForceSyncSourceIndex(1);
+ // force should cause shouldChangeSyncSource() to return true
+ // even if the currentSource is the force target
+ ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("h2"), now()));
+ ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("h3"), now()));
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ // force should only work for one call to chooseNewSyncSource
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+}
+
+TEST_F(TopoCoordTest, BlacklistSyncSource) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0), Milliseconds(300));
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0), Milliseconds(300));
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2, 0), Milliseconds(100));
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2, 0), Milliseconds(100));
+
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+
+ Date_t expireTime = 1000;
+ getTopoCoord().blacklistSyncSource(HostAndPort("h3"), expireTime);
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ // Should choose second best choice now that h3 is blacklisted.
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ // After time has passed, should go back to original sync source
+ getTopoCoord().chooseNewSyncSource(expireTime, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress());
+}
+
+TEST_F(TopoCoordTest, BlacklistSyncSourceNoChaining) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "settings" << BSON("chainingAllowed" << false)
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_PRIMARY, OpTime(2, 0), Milliseconds(100));
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_PRIMARY, OpTime(2, 0), Milliseconds(100));
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2, 0), Milliseconds(100));
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2, 0), Milliseconds(100));
+
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+
+ Date_t expireTime = 1000;
+ getTopoCoord().blacklistSyncSource(HostAndPort("h2"), expireTime);
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ // Can't choose any sync source now.
+ ASSERT(getTopoCoord().getSyncSourceAddress().empty());
+
+ // After time has passed, should go back to the primary
+ getTopoCoord().chooseNewSyncSource(expireTime, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress());
+}
+
+TEST_F(TopoCoordTest, OnlyUnauthorizedUpCausesRecovering) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ // Generate enough heartbeats to select a sync source below
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0), Milliseconds(300));
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0), Milliseconds(300));
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2, 0), Milliseconds(100));
+ heartbeatFromMember(
+ HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(2, 0), Milliseconds(100));
+
+ ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0)));
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+ // Good state setup done
+
+ // Mark nodes down, ensure that we have no source and are secondary
+ receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime(), ErrorCodes::NetworkTimeout);
+ receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime(), ErrorCodes::NetworkTimeout);
+ ASSERT_TRUE(getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0)).empty());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+
+ // Mark nodes down + unauth, ensure that we have no source and are secondary
+ receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime(), ErrorCodes::NetworkTimeout);
+ receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime(), ErrorCodes::Unauthorized);
+ ASSERT_TRUE(getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0)).empty());
+ ASSERT_EQUALS(MemberState::RS_RECOVERING, getTopoCoord().getMemberState().s);
+
+ // Having an auth error but with another node up should bring us out of RECOVERING
+ HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("h2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ OpTime(0, 0),
+ OpTime(2, 0),
+ OpTime(2, 0));
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+ // Test that the heartbeat that brings us from RECOVERING to SECONDARY doesn't initiate
+ // an election (SERVER-17164)
+ ASSERT_NO_ACTION(action.getAction());
+}
+
+TEST_F(TopoCoordTest, ReceiveHeartbeatWhileAbsentFromConfig) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "h1")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ -1);
+ ASSERT_NO_ACTION(
+ heartbeatFromMember(
+ HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0), Milliseconds(300))
+ .getAction());
+}
+
+TEST_F(TopoCoordTest, PrepareSyncFromResponse) {
+ OpTime staleOpTime(1, 1);
+ OpTime ourOpTime(staleOpTime.getSecs() + 11, 1);
+
+ Status result = Status::OK();
+ BSONObjBuilder response;
+
+ // if we do not have an index in the config, we should get ErrorCodes::NotSecondary
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h1"), ourOpTime, &response, &result);
+ ASSERT_EQUALS(ErrorCodes::NotSecondary, result);
+ ASSERT_EQUALS("Removed and uninitialized nodes do not sync", result.reason());
+
+ // Test trying to sync from another node when we are an arbiter
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "hself"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 1 << "host"
+ << "h1"))),
+ 0);
+
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h1"), ourOpTime, &response, &result);
+ ASSERT_EQUALS(ErrorCodes::NotSecondary, result);
+ ASSERT_EQUALS("arbiters don't sync", result.reason());
+
+ // Set up config for the rest of the tests
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "hself")
+ << BSON("_id" << 1 << "host"
+ << "h1"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 2 << "host"
+ << "h2"
+ << "priority" << 0 << "buildIndexes" << false)
+ << BSON("_id" << 3 << "host"
+ << "h3") << BSON("_id" << 4 << "host"
+ << "h4")
+ << BSON("_id" << 5 << "host"
+ << "h5") << BSON("_id" << 6 << "host"
+ << "h6"))),
+ 0);
+
+ // Try to sync while PRIMARY
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary();
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+ getTopoCoord()._setCurrentPrimaryForTest(0);
+ BSONObjBuilder response1;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h3"), ourOpTime, &response1, &result);
+ ASSERT_EQUALS(ErrorCodes::NotSecondary, result);
+ ASSERT_EQUALS("primaries don't sync", result.reason());
+ ASSERT_EQUALS("h3:27017", response1.obj()["syncFromRequested"].String());
+
+ // Try to sync from non-existent member
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ getTopoCoord()._setCurrentPrimaryForTest(-1);
+ BSONObjBuilder response2;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("fakemember"), ourOpTime, &response2, &result);
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, result);
+ ASSERT_EQUALS("Could not find member \"fakemember:27017\" in replica set", result.reason());
+
+ // Try to sync from self
+ BSONObjBuilder response3;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("hself"), ourOpTime, &response3, &result);
+ ASSERT_EQUALS(ErrorCodes::InvalidOptions, result);
+ ASSERT_EQUALS("I cannot sync from myself", result.reason());
+
+ // Try to sync from an arbiter
+ BSONObjBuilder response4;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h1"), ourOpTime, &response4, &result);
+ ASSERT_EQUALS(ErrorCodes::InvalidOptions, result);
+ ASSERT_EQUALS("Cannot sync from \"h1:27017\" because it is an arbiter", result.reason());
+
+ // Try to sync from a node that doesn't build indexes
+ BSONObjBuilder response5;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h2"), ourOpTime, &response5, &result);
+ ASSERT_EQUALS(ErrorCodes::InvalidOptions, result);
+ ASSERT_EQUALS("Cannot sync from \"h2:27017\" because it does not build indexes",
+ result.reason());
+
+ // Try to sync from a member that is down
+ receiveDownHeartbeat(HostAndPort("h4"), "rs0", OpTime());
+
+ BSONObjBuilder response7;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h4"), ourOpTime, &response7, &result);
+ ASSERT_EQUALS(ErrorCodes::HostUnreachable, result);
+ ASSERT_EQUALS("I cannot reach the requested member: h4:27017", result.reason());
+
+ // Sync successfully from a member that is stale
+ heartbeatFromMember(
+ HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY, staleOpTime, Milliseconds(100));
+
+ BSONObjBuilder response8;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h5"), ourOpTime, &response8, &result);
+ ASSERT_OK(result);
+ ASSERT_EQUALS("requested member \"h5:27017\" is more than 10 seconds behind us",
+ response8.obj()["warning"].String());
+ getTopoCoord().chooseNewSyncSource(now()++, ourOpTime);
+ ASSERT_EQUALS(HostAndPort("h5"), getTopoCoord().getSyncSourceAddress());
+
+ // Sync successfully from an up-to-date member
+ heartbeatFromMember(
+ HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY, ourOpTime, Milliseconds(100));
+
+ BSONObjBuilder response9;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h6"), ourOpTime, &response9, &result);
+ ASSERT_OK(result);
+ BSONObj response9Obj = response9.obj();
+ ASSERT_FALSE(response9Obj.hasField("warning"));
+ ASSERT_EQUALS(HostAndPort("h5").toString(), response9Obj["prevSyncTarget"].String());
+ getTopoCoord().chooseNewSyncSource(now()++, ourOpTime);
+ ASSERT_EQUALS(HostAndPort("h6"), getTopoCoord().getSyncSourceAddress());
+
+ // node goes down between forceSync and chooseNewSyncSource
+ BSONObjBuilder response10;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h6"), ourOpTime, &response10, &result);
+ BSONObj response10Obj = response10.obj();
+ ASSERT_FALSE(response10Obj.hasField("warning"));
+ ASSERT_EQUALS(HostAndPort("h6").toString(), response10Obj["prevSyncTarget"].String());
+ receiveDownHeartbeat(HostAndPort("h6"), "rs0", OpTime());
+ HostAndPort syncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h6"), syncSource);
+
+ // Try to sync from a member that is unauth'd
+ receiveDownHeartbeat(HostAndPort("h5"), "rs0", OpTime(), ErrorCodes::Unauthorized);
+
+ BSONObjBuilder response11;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h5"), ourOpTime, &response11, &result);
+ ASSERT_NOT_OK(result);
+ ASSERT_EQUALS(ErrorCodes::Unauthorized, result.code());
+ ASSERT_EQUALS("not authorized to communicate with h5:27017", result.reason());
+
+ // Sync successfully from an up-to-date member.
+ heartbeatFromMember(
+ HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY, ourOpTime, Milliseconds(100));
+ BSONObjBuilder response12;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("h6"), ourOpTime, &response12, &result);
+ ASSERT_OK(result);
+ syncSource = getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+ ASSERT_EQUALS(HostAndPort("h6"), syncSource);
+}
+
+TEST_F(TopoCoordTest, ReplSetGetStatus) {
+ // This test starts by configuring a TopologyCoordinator as a member of a 4 node replica
+ // set, with each node in a different state.
+ // The first node is DOWN, as if we tried heartbeating them and it failed in some way.
+ // The second node is in state SECONDARY, as if we've received a valid heartbeat from them.
+ // The third node is in state UNKNOWN, as if we've not yet had any heartbeating activity
+ // with them yet. The fourth node is PRIMARY and corresponds to ourself, which gets its
+ // information for replSetGetStatus from a different source than the nodes that aren't
+ // ourself. After this setup, we call prepareStatusResponse and make sure that the fields
+ // returned for each member match our expectations.
+ Date_t startupTime(100);
+ Date_t heartbeatTime = 5000;
+ Seconds uptimeSecs(10);
+ Date_t curTime = heartbeatTime + uptimeSecs.total_milliseconds();
+ OpTime electionTime(1, 2);
+ OpTime oplogProgress(3, 4);
+ std::string setName = "mySet";
+
+ updateConfig(
+ BSON("_id" << setName << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test0:1234")
+ << BSON("_id" << 1 << "host"
+ << "test1:1234") << BSON("_id" << 2 << "host"
+ << "test2:1234")
+ << BSON("_id" << 3 << "host"
+ << "test3:1234"))),
+ 3,
+ startupTime + 1);
+
+ // Now that the replica set is setup, put the members into the states we want them in.
+ HostAndPort member = HostAndPort("test0:1234");
+ StatusWith<ReplSetHeartbeatResponse> hbResponse =
+ StatusWith<ReplSetHeartbeatResponse>(Status(ErrorCodes::HostUnreachable, ""));
+
+ getTopoCoord().prepareHeartbeatRequest(startupTime + 2, setName, member);
+ Date_t timeoutTime =
+ startupTime + 2 + ReplicaSetConfig::kDefaultHeartbeatTimeoutPeriod.total_milliseconds();
+ getTopoCoord().processHeartbeatResponse(
+ timeoutTime, Milliseconds(5000), member, hbResponse, OpTime(0, 0));
+
+ member = HostAndPort("test1:1234");
+ ReplSetHeartbeatResponse hb;
+ hb.setVersion(1);
+ hb.setState(MemberState::RS_SECONDARY);
+ hb.setElectionTime(electionTime);
+ hb.setHbMsg("READY");
+ hb.setOpTime(oplogProgress);
+ hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
+ getTopoCoord().prepareHeartbeatRequest(startupTime + 2, setName, member);
+ getTopoCoord().processHeartbeatResponse(
+ heartbeatTime, Milliseconds(4000), member, hbResponse, OpTime(0, 0));
+ makeSelfPrimary();
+
+ // Now node 0 is down, node 1 is up, and for node 2 we have no heartbeat data yet.
+ BSONObjBuilder statusBuilder;
+ Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
+ getTopoCoord().prepareStatusResponse(cbData(),
+ curTime,
+ uptimeSecs.total_seconds(),
+ oplogProgress,
+ &statusBuilder,
+ &resultStatus);
+ ASSERT_OK(resultStatus);
+ BSONObj rsStatus = statusBuilder.obj();
+
+ // Test results for all non-self members
+ ASSERT_EQUALS(setName, rsStatus["set"].String());
+ ASSERT_EQUALS(curTime.asInt64(), rsStatus["date"].Date().asInt64());
+ std::vector<BSONElement> memberArray = rsStatus["members"].Array();
+ ASSERT_EQUALS(4U, memberArray.size());
+ BSONObj member0Status = memberArray[0].Obj();
+ BSONObj member1Status = memberArray[1].Obj();
+ BSONObj member2Status = memberArray[2].Obj();
+
+ // Test member 0, the node that's DOWN
+ ASSERT_EQUALS(0, member0Status["_id"].numberInt());
+ ASSERT_EQUALS("test0:1234", member0Status["name"].str());
+ ASSERT_EQUALS(0, member0Status["health"].numberDouble());
+ ASSERT_EQUALS(MemberState::RS_DOWN, member0Status["state"].numberInt());
+ ASSERT_EQUALS("(not reachable/healthy)", member0Status["stateStr"].str());
+ ASSERT_EQUALS(0, member0Status["uptime"].numberInt());
+ ASSERT_EQUALS(OpTime(), OpTime(member0Status["optime"].timestampValue()));
+ ASSERT_TRUE(member0Status.hasField("optimeDate"));
+ ASSERT_EQUALS(Date_t(OpTime().getSecs() * 1000ULL), member0Status["optimeDate"].Date().millis);
+ ASSERT_EQUALS(timeoutTime, member0Status["lastHeartbeat"].date());
+ ASSERT_EQUALS(Date_t(), member0Status["lastHeartbeatRecv"].date());
+
+ // Test member 1, the node that's SECONDARY
+ ASSERT_EQUALS(1, member1Status["_id"].Int());
+ ASSERT_EQUALS("test1:1234", member1Status["name"].String());
+ ASSERT_EQUALS(1, member1Status["health"].Double());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, member1Status["state"].numberInt());
+ ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY).toString(),
+ member1Status["stateStr"].String());
+ ASSERT_EQUALS(uptimeSecs.total_seconds(), member1Status["uptime"].numberInt());
+ ASSERT_EQUALS(oplogProgress, OpTime(member1Status["optime"].timestampValue()));
+ ASSERT_TRUE(member1Status.hasField("optimeDate"));
+ ASSERT_EQUALS(Date_t(oplogProgress.getSecs() * 1000ULL),
+ member1Status["optimeDate"].Date().millis);
+ ASSERT_EQUALS(heartbeatTime, member1Status["lastHeartbeat"].date());
+ ASSERT_EQUALS(Date_t(), member1Status["lastHeartbeatRecv"].date());
+ ASSERT_EQUALS("READY", member1Status["lastHeartbeatMessage"].str());
+
+ // Test member 2, the node that's UNKNOWN
+ ASSERT_EQUALS(2, member2Status["_id"].numberInt());
+ ASSERT_EQUALS("test2:1234", member2Status["name"].str());
+ ASSERT_EQUALS(-1, member2Status["health"].numberDouble());
+ ASSERT_EQUALS(MemberState::RS_UNKNOWN, member2Status["state"].numberInt());
+ ASSERT_EQUALS(MemberState(MemberState::RS_UNKNOWN).toString(), member2Status["stateStr"].str());
+ ASSERT_TRUE(member2Status.hasField("uptime"));
+ ASSERT_TRUE(member2Status.hasField("optime"));
+ ASSERT_TRUE(member2Status.hasField("optimeDate"));
+ ASSERT_FALSE(member2Status.hasField("lastHearbeat"));
+ ASSERT_FALSE(member2Status.hasField("lastHearbeatRecv"));
+
+ // Now test results for ourself, the PRIMARY
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, rsStatus["myState"].numberInt());
+ BSONObj selfStatus = memberArray[3].Obj();
+ ASSERT_TRUE(selfStatus["self"].boolean());
+ ASSERT_EQUALS(3, selfStatus["_id"].numberInt());
+ ASSERT_EQUALS("test3:1234", selfStatus["name"].str());
+ ASSERT_EQUALS(1, selfStatus["health"].numberDouble());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, selfStatus["state"].numberInt());
+ ASSERT_EQUALS(MemberState(MemberState::RS_PRIMARY).toString(), selfStatus["stateStr"].str());
+ ASSERT_EQUALS(uptimeSecs.total_seconds(), selfStatus["uptime"].numberInt());
+ ASSERT_EQUALS(oplogProgress, OpTime(selfStatus["optime"].timestampValue()));
+ ASSERT_TRUE(selfStatus.hasField("optimeDate"));
+ ASSERT_EQUALS(Date_t(oplogProgress.getSecs() * 1000ULL),
+ selfStatus["optimeDate"].Date().millis);
+
+ // TODO(spencer): Test electionTime and pingMs are set properly
+}
+
+TEST_F(TopoCoordTest, ReplSetGetStatusFails) {
+ // This test starts by configuring a TopologyCoordinator to NOT be a member of a 3 node
+ // replica set. Then running prepareStatusResponse should fail.
+ Date_t startupTime(100);
+ Date_t heartbeatTime = 5000;
+ Seconds uptimeSecs(10);
+ Date_t curTime = heartbeatTime + uptimeSecs.total_milliseconds();
+ OpTime oplogProgress(3, 4);
+ std::string setName = "mySet";
+
+ updateConfig(
+ BSON("_id" << setName << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test0:1234")
+ << BSON("_id" << 1 << "host"
+ << "test1:1234") << BSON("_id" << 2 << "host"
+ << "test2:1234"))),
+ -1, // This one is not part of the replica set.
+ startupTime + 1);
+
+ BSONObjBuilder statusBuilder;
+ Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
+ getTopoCoord().prepareStatusResponse(cbData(),
+ curTime,
+ uptimeSecs.total_seconds(),
+ oplogProgress,
+ &statusBuilder,
+ &resultStatus);
+ ASSERT_NOT_OK(resultStatus);
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, resultStatus);
+}
+
+TEST_F(TopoCoordTest, PrepareFreshResponse) {
+ ReplicationCoordinator::ReplSetFreshArgs args;
+ OpTime freshestOpTime(15, 10);
+ OpTime ourOpTime(10, 10);
+ OpTime staleOpTime(1, 1);
+ Status internalErrorStatus(ErrorCodes::InternalError, "didn't set status");
+
+ // if we do not have an index in the config, we should get ErrorCodes::ReplicaSetNotFound
+ BSONObjBuilder responseBuilder;
+ Status status = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status);
+ ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status);
+ ASSERT_EQUALS("Cannot participate in elections because not initialized", status.reason());
+ ASSERT_TRUE(responseBuilder.obj().isEmpty());
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 10 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself"
+ << "priority" << 10)
+ << BSON("_id" << 20 << "host"
+ << "h1") << BSON("_id" << 30 << "host"
+ << "h2")
+ << BSON("_id" << 40 << "host"
+ << "h3"
+ << "priority" << 10))),
+ 0);
+
+ // Test with incorrect replset name
+ args.setName = "fakeset";
+
+ BSONObjBuilder responseBuilder0;
+ Status status0 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder0, &status0);
+ ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status0);
+ ASSERT_TRUE(responseBuilder0.obj().isEmpty());
+
+ heartbeatFromMember(HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
+
+ // Test with old config version
+ args.setName = "rs0";
+ args.cfgver = 5;
+ args.id = 20;
+ args.who = HostAndPort("h1");
+ args.opTime = ourOpTime;
+
+ BSONObjBuilder responseBuilder1;
+ Status status1 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder1, &status1);
+ ASSERT_OK(status1);
+ BSONObj response1 = responseBuilder1.obj();
+ ASSERT_EQUALS("config version stale", response1["info"].String());
+ ASSERT_EQUALS(ourOpTime, OpTime(response1["opTime"].timestampValue()));
+ ASSERT_TRUE(response1["fresher"].Bool());
+ ASSERT_FALSE(response1["veto"].Bool());
+ ASSERT_FALSE(response1.hasField("errmsg"));
+
+ // Test with non-existent node.
+ args.cfgver = 10;
+ args.id = 0;
+ args.who = HostAndPort("fakenode");
+
+ BSONObjBuilder responseBuilder2;
+ Status status2 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder2, &status2);
+ ASSERT_OK(status2);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(ourOpTime, OpTime(response2["opTime"].timestampValue()));
+ ASSERT_FALSE(response2["fresher"].Bool());
+ ASSERT_TRUE(response2["veto"].Bool());
+ ASSERT_EQUALS("replSet couldn't find member with id 0", response2["errmsg"].String());
+
+
+ // Test when we are primary.
+ args.id = 20;
+ args.who = HostAndPort("h1");
+
+ makeSelfPrimary();
+
+ BSONObjBuilder responseBuilder3;
+ Status status3 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder3, &status3);
+ ASSERT_OK(status3);
+ BSONObj response3 = responseBuilder3.obj();
+ ASSERT_FALSE(response3.hasField("info"));
+ ASSERT_EQUALS(ourOpTime, OpTime(response3["opTime"].timestampValue()));
+ ASSERT_FALSE(response3["fresher"].Bool());
+ ASSERT_TRUE(response3["veto"].Bool());
+ ASSERT_EQUALS("I am already primary, h1:27017 can try again once I've stepped down",
+ response3["errmsg"].String());
+
+
+ // Test when someone else is primary.
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ getTopoCoord()._setCurrentPrimaryForTest(2);
+
+ BSONObjBuilder responseBuilder4;
+ Status status4 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder4, &status4);
+ ASSERT_OK(status4);
+ BSONObj response4 = responseBuilder4.obj();
+ ASSERT_FALSE(response4.hasField("info"));
+ ASSERT_EQUALS(ourOpTime, OpTime(response4["opTime"].timestampValue()));
+ ASSERT_FALSE(response4["fresher"].Bool());
+ ASSERT_TRUE(response4["veto"].Bool());
+ ASSERT_EQUALS(
+ "h1:27017 is trying to elect itself but h2:27017 is already primary and more "
+ "up-to-date",
+ response4["errmsg"].String());
+
+
+ // Test trying to elect a node that is caught up but isn't the highest priority node.
+ heartbeatFromMember(HostAndPort("h1"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, staleOpTime);
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
+
+ BSONObjBuilder responseBuilder5;
+ Status status5 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder5, &status5);
+ ASSERT_OK(status5);
+ BSONObj response5 = responseBuilder5.obj();
+ ASSERT_FALSE(response5.hasField("info"));
+ ASSERT_EQUALS(ourOpTime, OpTime(response5["opTime"].timestampValue()));
+ ASSERT_FALSE(response5["fresher"].Bool());
+ ASSERT_TRUE(response5["veto"].Bool());
+ ASSERT(response5["errmsg"].String().find("h1:27017 has lower priority of 1 than") !=
+ std::string::npos)
+ << response5["errmsg"].String();
+
+ // Test trying to elect a node that isn't electable because its down
+ args.id = 40;
+ args.who = HostAndPort("h3");
+
+ receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
+
+ BSONObjBuilder responseBuilder6;
+ Status status6 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder6, &status6);
+ ASSERT_OK(status6);
+ BSONObj response6 = responseBuilder6.obj();
+ ASSERT_FALSE(response6.hasField("info"));
+ ASSERT_EQUALS(ourOpTime, OpTime(response6["opTime"].timestampValue()));
+ ASSERT_FALSE(response6["fresher"].Bool());
+ ASSERT_TRUE(response6["veto"].Bool());
+ ASSERT_NE(std::string::npos,
+ response6["errmsg"].String().find(
+ "I don't think h3:27017 is electable because the member is not "
+ "currently a secondary"))
+ << response6["errmsg"].String();
+
+ // Test trying to elect a node that isn't electable because it's PRIMARY
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_PRIMARY, ourOpTime);
+ ASSERT_EQUALS(3, getCurrentPrimaryIndex());
+
+ BSONObjBuilder responseBuilder7;
+ Status status7 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder7, &status7);
+ ASSERT_OK(status7);
+ BSONObj response7 = responseBuilder7.obj();
+ ASSERT_FALSE(response7.hasField("info"));
+ ASSERT_EQUALS(ourOpTime, OpTime(response7["opTime"].timestampValue()));
+ ASSERT_FALSE(response7["fresher"].Bool());
+ ASSERT_TRUE(response7["veto"].Bool());
+ ASSERT_NE(std::string::npos,
+ response7["errmsg"].String().find(
+ "I don't think h3:27017 is electable because the member is not "
+ "currently a secondary"))
+ << response7["errmsg"].String();
+
+ // Test trying to elect a node that isn't electable because it's STARTUP
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_STARTUP, ourOpTime);
+
+ BSONObjBuilder responseBuilder8;
+ Status status8 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder8, &status8);
+ ASSERT_OK(status8);
+ BSONObj response8 = responseBuilder8.obj();
+ ASSERT_FALSE(response8.hasField("info"));
+ ASSERT_EQUALS(ourOpTime, OpTime(response8["opTime"].timestampValue()));
+ ASSERT_FALSE(response8["fresher"].Bool());
+ ASSERT_TRUE(response8["veto"].Bool());
+ ASSERT_NE(std::string::npos,
+ response8["errmsg"].String().find(
+ "I don't think h3:27017 is electable because the member is not "
+ "currently a secondary"))
+ << response8["errmsg"].String();
+
+ // Test trying to elect a node that isn't electable because it's RECOVERING
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_RECOVERING, ourOpTime);
+
+ BSONObjBuilder responseBuilder9;
+ Status status9 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder9, &status9);
+ ASSERT_OK(status9);
+ BSONObj response9 = responseBuilder9.obj();
+ ASSERT_FALSE(response9.hasField("info"));
+ ASSERT_EQUALS(ourOpTime, OpTime(response9["opTime"].timestampValue()));
+ ASSERT_FALSE(response9["fresher"].Bool());
+ ASSERT_TRUE(response9["veto"].Bool());
+ ASSERT_NE(std::string::npos,
+ response9["errmsg"].String().find(
+ "I don't think h3:27017 is electable because the member is not "
+ "currently a secondary"))
+ << response9["errmsg"].String();
+
+ // Test trying to elect a node that is fresher but lower priority than the existing primary
+ args.id = 30;
+ args.who = HostAndPort("h2");
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_PRIMARY, ourOpTime);
+ ASSERT_EQUALS(3, getCurrentPrimaryIndex());
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, freshestOpTime);
+
+ BSONObjBuilder responseBuilder10;
+ Status status10 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder10, &status10);
+ ASSERT_OK(status10);
+ BSONObj response10 = responseBuilder10.obj();
+ ASSERT_FALSE(response10.hasField("info"));
+ ASSERT_EQUALS(ourOpTime, OpTime(response10["opTime"].timestampValue()));
+ ASSERT_TRUE(response10["fresher"].Bool());
+ ASSERT_TRUE(response10["veto"].Bool());
+ ASSERT_TRUE(response10.hasField("errmsg"));
+
+
+ // Test trying to elect a valid node
+ args.id = 40;
+ args.who = HostAndPort("h3");
+
+ receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime());
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, ourOpTime);
+
+ BSONObjBuilder responseBuilder11;
+ Status status11 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder11, &status11);
+ ASSERT_OK(status11);
+ BSONObj response11 = responseBuilder11.obj();
+ ASSERT_FALSE(response11.hasField("info")) << response11.toString();
+ ASSERT_EQUALS(ourOpTime, OpTime(response11["opTime"].timestampValue()));
+ ASSERT_FALSE(response11["fresher"].Bool()) << response11.toString();
+ ASSERT_FALSE(response11["veto"].Bool()) << response11.toString();
+ ASSERT_FALSE(response11.hasField("errmsg")) << response11.toString();
+
+ // Test with our id
+ args.id = 10;
+ BSONObjBuilder responseBuilder12;
+ Status status12 = internalErrorStatus;
+ getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder12, &status12);
+ ASSERT_EQUALS(ErrorCodes::BadValue, status12);
+ ASSERT_EQUALS(
+ "Received replSetFresh command from member with the same member ID as ourself: 10",
+ status12.reason());
+ ASSERT_TRUE(responseBuilder12.obj().isEmpty());
+}
+
+class HeartbeatResponseTest : public TopoCoordTest {
+public:
+ virtual void setUp() {
+ TopoCoordTest::setUp();
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))
+ << "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
}
-
- TEST_F(HeartbeatResponseTestTwoRetries, DecideToStepDownSelf) {
- // Confirm that action responses can come back from retries; in this, expect a StepDownSelf
- // action.
-
- // acknowledge the other member so that we see a majority
- HeartbeatResponseAction action = receiveDownHeartbeat(HostAndPort("host3"),
- "rs0",
- OpTime(100, 0));
- ASSERT_NO_ACTION(action.getAction());
-
- // make us PRIMARY
- makeSelfPrimary();
-
- ReplSetHeartbeatResponse electedMoreRecentlyResponse;
- electedMoreRecentlyResponse.noteReplSet();
- electedMoreRecentlyResponse.setSetName("rs0");
- electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
- electedMoreRecentlyResponse.setElectable(false);
- electedMoreRecentlyResponse.setElectionTime(OpTime(10,0));
- electedMoreRecentlyResponse.setVersion(5);
- action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + 5000, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
- OpTime(0, 0)); // We've never applied anything.
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, action.getAction());
- ASSERT_EQUALS(0, action.getPrimaryConfigIndex());
- ASSERT_EQUALS(Date_t(firstRequestDate() + 7000), action.getNextHeartbeatStartDate());
- // Doesn't actually do the stepdown until stepDownIfPending is called
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- ASSERT_TRUE(getTopoCoord().stepDownIfPending());
+};
+
+class HeartbeatResponseTestOneRetry : public HeartbeatResponseTest {
+public:
+ virtual void setUp() {
+ HeartbeatResponseTest::setUp();
+
+ // Bring up the node we are heartbeating.
+ _target = HostAndPort("host2", 27017);
+ Date_t _upRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T12:55Z"));
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> uppingRequest =
+ getTopoCoord().prepareHeartbeatRequest(_upRequestDate, "rs0", _target);
+ HeartbeatResponseAction upAction = getTopoCoord().processHeartbeatResponse(
+ _upRequestDate,
+ Milliseconds(0),
+ _target,
+ StatusWith<ReplSetHeartbeatResponse>(Status::OK()),
+ OpTime(0, 0)); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, upAction.getAction());
ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- }
-
- TEST_F(HeartbeatResponseTestTwoRetries, DecideToStartElection) {
- // Confirm that action responses can come back from retries; in this, expect a StartElection
- // action.
-
- // acknowledge the other member so that we see a majority
- OpTime election = OpTime(400,0);
- OpTime lastOpTimeApplied = OpTime(300,0);
- HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(action.getAction());
-
- // make sure we are electable
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- ReplSetHeartbeatResponse startElectionResponse;
- startElectionResponse.noteReplSet();
- startElectionResponse.setSetName("rs0");
- startElectionResponse.setState(MemberState::RS_SECONDARY);
- startElectionResponse.setElectable(true);
- startElectionResponse.setVersion(5);
- action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + 5000, // Time is left.
- Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(startElectionResponse),
- election);
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(Date_t(firstRequestDate() + 7000), action.getNextHeartbeatStartDate());
- }
- TEST_F(HeartbeatResponseTest, HeartbeatTimeoutSuppressesFirstRetry) {
- // Confirm that the topology coordinator does not schedule an immediate heartbeat retry if
- // the heartbeat timeout period expired before the initial request completed.
- HostAndPort target("host2", 27017);
- Date_t firstRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T13:00Z"));
+ // Time of first request for this heartbeat period
+ _firstRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T13:00Z"));
- // Initial heartbeat request prepared, at t + 0.
+ // Initial heartbeat attempt prepared, at t + 0.
std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
- getTopoCoord().prepareHeartbeatRequest(firstRequestDate,
- "rs0",
- target);
+ getTopoCoord().prepareHeartbeatRequest(_firstRequestDate, "rs0", _target);
// 5 seconds to successfully complete the heartbeat before the timeout expires.
ASSERT_EQUALS(5000, request.second.total_milliseconds());
- // Initial heartbeat request fails at t + 5000ms
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate + 5000, // Entire heartbeat period elapsed; no retry allowed.
- Milliseconds(4990), // Spent 4.99 of the 4 seconds in the network.
- target,
- StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit,
- "Took too long"),
- OpTime(0, 0)); // We've never applied anything.
-
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- // Because the heartbeat timed out, we'll retry in 2 seconds.
- ASSERT_EQUALS(Date_t(firstRequestDate + 7000), action.getNextHeartbeatStartDate());
- }
-
- TEST_F(HeartbeatResponseTestOneRetry, HeartbeatTimeoutSuppressesSecondRetry) {
- // Confirm that the topology coordinator does not schedule an second heartbeat retry if
- // the heartbeat timeout period expired before the first retry completed.
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + 5010, // Entire heartbeat period elapsed; no retry allowed.
- Milliseconds(1000), // Spent 1 of the 1.01 seconds in the network.
- target(),
- StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit,
- "Took too long"),
- OpTime(0, 0)); // We've never applied anything.
-
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- // Because the heartbeat timed out, we'll retry in 2 seconds.
- ASSERT_EQUALS(Date_t(firstRequestDate() + 7010), action.getNextHeartbeatStartDate());
- }
-
- TEST_F(HeartbeatResponseTestTwoRetries, HeartbeatThreeNonconsecutiveFailures) {
- // Confirm that the topology coordinator does not mark a node down on three
- // nonconsecutive heartbeat failures.
- ReplSetHeartbeatResponse response;
- response.noteReplSet();
- response.setSetName("rs0");
- response.setState(MemberState::RS_SECONDARY);
- response.setElectable(true);
- response.setVersion(5);
-
- // successful response (third response due to the two failures in setUp())
- HeartbeatResponseAction action =
- getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + 4500,
- Milliseconds(400),
- target(),
- StatusWith<ReplSetHeartbeatResponse>(response),
- OpTime(0, 0)); // We've never applied anything.
+ // Initial heartbeat request fails at t + 4000ms
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ _firstRequestDate + 4000, // 4 seconds elapsed, retry allowed.
+ Milliseconds(3990), // Spent 3.99 of the 4 seconds in the network.
+ _target,
+ StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"),
+ OpTime(0, 0)); // We've never applied anything.
ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- // Because the heartbeat succeeded, we'll retry in 2 seconds.
- ASSERT_EQUALS(Date_t(firstRequestDate() + 6500), action.getNextHeartbeatStartDate());
-
- // request next heartbeat
- getTopoCoord().prepareHeartbeatRequest(firstRequestDate() + 6500, "rs0", target());
- // third failed response
- action = getTopoCoord().processHeartbeatResponse(
- firstRequestDate() + 7100,
- Milliseconds(400),
- target(),
- StatusWith<ReplSetHeartbeatResponse>(Status(ErrorCodes::HostUnreachable, "")),
- OpTime(0, 0)); // We've never applied anything.
+ // Because the heartbeat failed without timing out, we expect to retry immediately.
+ ASSERT_EQUALS(Date_t(_firstRequestDate + 4000), action.getNextHeartbeatStartDate());
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ // First heartbeat retry prepared, at t + 4000ms.
+ request = getTopoCoord().prepareHeartbeatRequest(_firstRequestDate + 4000, "rs0", _target);
+ // One second left to complete the heartbeat.
+ ASSERT_EQUALS(1000, request.second.total_milliseconds());
- // Ensure a third nonconsecutive heartbeat failure did not cause the node to be marked down
+ // Ensure a single failed heartbeat did not cause the node to be marked down
BSONObjBuilder statusBuilder;
- Status resultStatus(ErrorCodes::InternalError,
- "prepareStatusResponse didn't set result");
- getTopoCoord().prepareStatusResponse(cbData(),
- firstRequestDate() + 7000,
- 600,
- OpTime(100,0),
- &statusBuilder,
- &resultStatus);
+ Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
+ getTopoCoord().prepareStatusResponse(
+ cbData(), _firstRequestDate + 4000, 10, OpTime(100, 0), &statusBuilder, &resultStatus);
ASSERT_OK(resultStatus);
BSONObj rsStatus = statusBuilder.obj();
std::vector<BSONElement> memberArray = rsStatus["members"].Array();
@@ -1859,2382 +1292,2970 @@ namespace {
ASSERT_EQUALS(1, member1Status["_id"].Int());
ASSERT_EQUALS(1, member1Status["health"].Double());
-
}
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataNewPrimary) {
- OpTime election = OpTime(5,0);
- OpTime lastOpTimeApplied = OpTime(3,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ Date_t firstRequestDate() {
+ return _firstRequestDate;
}
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesNewOneOlder) {
- OpTime election = OpTime(5,0);
- OpTime election2 = OpTime(4,0);
- OpTime lastOpTimeApplied = OpTime(3,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_PRIMARY,
- election2,
- election,
- lastOpTimeApplied);
- // second primary does not change primary index
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesNewOneNewer) {
- OpTime election = OpTime(4,0);
- OpTime election2 = OpTime(5,0);
- OpTime lastOpTimeApplied = OpTime(3,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_PRIMARY,
- election2,
- election,
- lastOpTimeApplied);
- // second primary does not change primary index
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesIncludingMeNewOneOlder) {
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(OpTime(5,0));
-
- OpTime election = OpTime(4,0);
- OpTime lastOpTimeApplied = OpTime(3,0);
-
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, nextAction.getAction());
- ASSERT_EQUALS(1, nextAction.getPrimaryConfigIndex());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataStepDownPrimaryForHighPriorityFreshNode) {
- // In this test, the Topology coordinator sees a PRIMARY ("host2") and then sees a higher
- // priority and similarly fresh node ("host3"). However, since the coordinator's node
- // (host1) is not the higher priority node, it takes no action.
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 6 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017" << "priority" << 3)) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- OpTime election = OpTime(0,0);
- OpTime lastOpTimeApplied = OpTime(13,0);
- OpTime slightlyLessFreshLastOpTimeApplied = OpTime(3,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- slightlyLessFreshLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_EQUALS(HeartbeatResponseAction::NoAction, nextAction.getAction());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataStepDownSelfForHighPriorityFreshNode) {
- // In this test, the Topology coordinator becomes PRIMARY and then sees a higher priority
- // and equally fresh node ("host3"). As a result it responds with a StepDownSelf action.
- //
- // Despite having stepped down, we should remain electable, in order to dissuade lower
- // priority nodes from standing for election.
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 6 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017" << "priority" << 3)) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)),
- 0);
- OpTime election = OpTime(1000,0);
-
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(election);
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- election);
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction());
- ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex());
-
- // Process a heartbeat response to confirm that this node, which is no longer primary,
- // still tells other nodes that it is electable. This will stop lower priority nodes
- // from standing for election.
- ReplSetHeartbeatArgs hbArgs;
- hbArgs.setSetName("rs0");
- hbArgs.setProtocolVersion(1);
- hbArgs.setConfigVersion(6);
- hbArgs.setSenderId(1);
- hbArgs.setSenderHost(HostAndPort("host3", 27017));
- ReplSetHeartbeatResponse hbResp;
- ASSERT_OK(getTopoCoord().prepareHeartbeatResponse(now(),
- hbArgs,
- "rs0",
- election,
- &hbResp));
- ASSERT(!hbResp.hasIsElectable() || hbResp.isElectable()) << hbResp.toBSON().toString();
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataDoNotStepDownSelfForHighPriorityStaleNode) {
- // In this test, the Topology coordinator becomes PRIMARY and then sees a higher priority
- // and stale node ("host3"). As a result it responds with NoAction.
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 6 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017" << "priority" << 3)) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)),
- 0);
- OpTime election = OpTime(1000,0);
- OpTime staleTime = OpTime(0,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(election);
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- staleTime,
- election);
- ASSERT_NO_ACTION(nextAction.getAction());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataDoNotStepDownPrimaryForHighPriorityStaleNode) {
- // In this test, the Topology coordinator sees a PRIMARY ("host2") and then sees a higher
- // priority and stale node ("host3"). As a result it responds with NoAction.
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 6 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017" << "priority" << 3)) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- OpTime election = OpTime(1000,0);
- OpTime stale = OpTime(0,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- election);
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- stale,
- election);
- ASSERT_NO_ACTION(nextAction.getAction());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesIncludingMeNewOneNewer) {
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(OpTime(2,0));
-
- OpTime election = OpTime(4,0);
- OpTime lastOpTimeApplied = OpTime(3,0);
-
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction());
- ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex());
- // Doesn't actually do the stepdown until stepDownIfPending is called
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- ASSERT_TRUE(getTopoCoord().stepDownIfPending());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownNoMajority) {
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- OpTime election = OpTime(400,0);
- OpTime lastOpTimeApplied = OpTime(300,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButNoPriority) {
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017" << "priority" << 0) <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
-
- OpTime election = OpTime(400,0);
- OpTime lastOpTimeApplied = OpTime(300,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ HostAndPort target() {
+ return _target;
}
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIAmStarting) {
- setSelfMemberState(MemberState::RS_STARTUP);
-
- OpTime election = OpTime(400,0);
- OpTime lastOpTimeApplied = OpTime(300,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
+private:
+ Date_t _firstRequestDate;
+ HostAndPort _target;
+};
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIAmRecovering) {
- setSelfMemberState(MemberState::RS_RECOVERING);
-
- OpTime election = OpTime(400,0);
- OpTime lastOpTimeApplied = OpTime(300,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+class HeartbeatResponseTestTwoRetries : public HeartbeatResponseTestOneRetry {
+public:
+ virtual void setUp() {
+ HeartbeatResponseTestOneRetry::setUp();
+ // First retry fails at t + 4500ms
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + 4500, // 4.5 of the 5 seconds elapsed; could retry.
+ Milliseconds(400), // Spent 0.4 of the 0.5 seconds in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"),
+ OpTime(0, 0)); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
+ // Because the first retry failed without timing out, we expect to retry immediately.
+ ASSERT_EQUALS(Date_t(firstRequestDate() + 4500), action.getNextHeartbeatStartDate());
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIHaveStepdownWait) {
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- OpTime election = OpTime(400,0);
- OpTime lastOpTimeApplied = OpTime(300,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // freeze node to set stepdown wait
- BSONObjBuilder response;
- getTopoCoord().prepareFreezeResponse(now()++, 20, &response);
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
+ // Second retry prepared at t + 4500ms.
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
+ getTopoCoord().prepareHeartbeatRequest(firstRequestDate() + 4500, "rs0", target());
+ // 500ms left to complete the heartbeat.
+ ASSERT_EQUALS(500, request.second.total_milliseconds());
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIAmArbiter) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017" <<
- "arbiterOnly" << true) <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
+ // Ensure a second failed heartbeat did not cause the node to be marked down
+ BSONObjBuilder statusBuilder;
+ Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
+ getTopoCoord().prepareStatusResponse(
+ cbData(), firstRequestDate() + 4000, 10, OpTime(100, 0), &statusBuilder, &resultStatus);
+ ASSERT_OK(resultStatus);
+ BSONObj rsStatus = statusBuilder.obj();
+ std::vector<BSONElement> memberArray = rsStatus["members"].Array();
+ BSONObj member1Status = memberArray[1].Obj();
- OpTime election = OpTime(400,0);
- OpTime lastOpTimeApplied = OpTime(300,0);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(1, member1Status["_id"].Int());
+ ASSERT_EQUALS(1, member1Status["health"].Double());
}
+};
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajority) {
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- OpTime election = OpTime(400,0);
- OpTime lastOpTimeApplied = OpTime(399,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+class HeartbeatResponseHighVerbosityTest : public HeartbeatResponseTest {
+public:
+ virtual void setUp() {
+ HeartbeatResponseTest::setUp();
+ // set verbosity as high as the highest verbosity log message we'd like to check for
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
}
- TEST_F(HeartbeatResponseTest, ElectionStartElectionWhileCandidate) {
- // In this test, the TopologyCoordinator goes through the steps of a successful election,
- // during which it receives a heartbeat that would normally trigger it to become a candidate
- // and respond with a StartElection HeartbeatResponseAction. However, since it is already in
- // candidate state, it responds with a NoAction HeartbeatResponseAction. Then finishes by
- // being winning the election.
-
- // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
- // 2. "host2" goes down, triggering an election.
- // 3. "host2" comes back, which would normally trigger election, but since the
- // TopologyCoordinator is already in candidate mode, does not.
- // 4. TopologyCoordinator concludes its freshness round successfully and wins the election.
-
- setSelfMemberState(MemberState::RS_SECONDARY);
- now() += 30000; // we need to be more than LastVote::leaseTime from the start of time or
- // else some Date_t math goes horribly awry
-
- OpTime election = OpTime(0,0);
- OpTime lastOpTimeApplied = OpTime(130,0);
- OID round = OID::gen();
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // candidate time!
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // see the downed node as SECONDARY and decide to take no action, but are still a candidate
- nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
-
- // normally this would trigger StartElection, but we are already a candidate
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // now voteForSelf as though we received all our fresh responses
- ASSERT_TRUE(getTopoCoord().voteForMyself(now()++));
-
- // now win election and ensure _electionId and _electionTime are set properly
- getTopoCoord().processWinElection(round, election);
- ASSERT_EQUALS(round, getTopoCoord().getElectionId());
- ASSERT_EQUALS(election, getTopoCoord().getElectionTime());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+ virtual void tearDown() {
+ HeartbeatResponseTest::tearDown();
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
}
+};
+
+TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataNodeBelivesWeAreDown) {
+ OpTime lastOpTimeApplied = OpTime(3, 0);
+
+ // request heartbeat
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
+ getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2"));
+
+ ReplSetHeartbeatResponse believesWeAreDownResponse;
+ believesWeAreDownResponse.noteReplSet();
+ believesWeAreDownResponse.setSetName("rs0");
+ believesWeAreDownResponse.setState(MemberState::RS_SECONDARY);
+ believesWeAreDownResponse.setElectable(true);
+ believesWeAreDownResponse.noteStateDisagreement();
+ startCapturingLogMessages();
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ now()++, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ HostAndPort("host2"),
+ StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse),
+ lastOpTimeApplied);
+ stopCapturingLogMessages();
+ ASSERT_NO_ACTION(action.getAction());
+ ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down"));
+}
+
+TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataMemberNotInConfig) {
+ OpTime lastOpTimeApplied = OpTime(3, 0);
+
+ // request heartbeat
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
+ getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host5"));
+
+ ReplSetHeartbeatResponse memberMissingResponse;
+ memberMissingResponse.noteReplSet();
+ memberMissingResponse.setSetName("rs0");
+ memberMissingResponse.setState(MemberState::RS_SECONDARY);
+ memberMissingResponse.setElectable(true);
+ memberMissingResponse.noteStateDisagreement();
+ startCapturingLogMessages();
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ now()++, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ HostAndPort("host5"),
+ StatusWith<ReplSetHeartbeatResponse>(memberMissingResponse),
+ lastOpTimeApplied);
+ stopCapturingLogMessages();
+ ASSERT_NO_ACTION(action.getAction());
+ ASSERT_EQUALS(1, countLogLinesContaining("Could not find host5:27017 in current config"));
+}
+
+TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataSameConfig) {
+ OpTime lastOpTimeApplied = OpTime(3, 0);
+
+ // request heartbeat
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
+ getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2"));
+
+ // construct a copy of the original config for log message checking later
+ // see HeartbeatResponseTest for the origin of the original config
+ ReplicaSetConfig originalConfig;
+ originalConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017")
+ << BSON("_id" << 2 << "host"
+ << "host3:27017")) << "settings"
+ << BSON("heartbeatTimeoutSecs" << 5)));
+
+ ReplSetHeartbeatResponse sameConfigResponse;
+ sameConfigResponse.noteReplSet();
+ sameConfigResponse.setSetName("rs0");
+ sameConfigResponse.setState(MemberState::RS_SECONDARY);
+ sameConfigResponse.setElectable(true);
+ sameConfigResponse.noteStateDisagreement();
+ sameConfigResponse.setVersion(2);
+ sameConfigResponse.setConfig(originalConfig);
+ startCapturingLogMessages();
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ now()++, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ HostAndPort("host2"),
+ StatusWith<ReplSetHeartbeatResponse>(sameConfigResponse),
+ lastOpTimeApplied);
+ stopCapturingLogMessages();
+ ASSERT_NO_ACTION(action.getAction());
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "Config from heartbeat response was "
+ "same as ours."));
+}
+
+TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataOldConfig) {
+ OpTime lastOpTimeApplied = OpTime(3, 0);
+
+ // request heartbeat
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
+ getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2"));
+
+ ReplSetHeartbeatResponse believesWeAreDownResponse;
+ believesWeAreDownResponse.noteReplSet();
+ believesWeAreDownResponse.setSetName("rs0");
+ believesWeAreDownResponse.setState(MemberState::RS_SECONDARY);
+ believesWeAreDownResponse.setElectable(true);
+ believesWeAreDownResponse.noteStateDisagreement();
+ startCapturingLogMessages();
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ now()++, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ HostAndPort("host2"),
+ StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse),
+ lastOpTimeApplied);
+ stopCapturingLogMessages();
+ ASSERT_NO_ACTION(action.getAction());
+ ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down"));
+}
+
+TEST_F(HeartbeatResponseTestOneRetry, DecideToReconfig) {
+ // Confirm that action responses can come back from retries; in this, expect a Reconfig
+ // action.
+ ReplicaSetConfig newConfig;
+ ASSERT_OK(newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 7 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017")
+ << BSON("_id" << 2 << "host"
+ << "host3:27017")
+ << BSON("_id" << 3 << "host"
+ << "host4:27017")) << "settings"
+ << BSON("heartbeatTimeoutSecs" << 5))));
+ ASSERT_OK(newConfig.validate());
+
+ ReplSetHeartbeatResponse reconfigResponse;
+ reconfigResponse.noteReplSet();
+ reconfigResponse.setSetName("rs0");
+ reconfigResponse.setState(MemberState::RS_SECONDARY);
+ reconfigResponse.setElectable(true);
+ reconfigResponse.setVersion(7);
+ reconfigResponse.setConfig(newConfig);
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + 4500, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(reconfigResponse),
+ OpTime(0, 0)); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::Reconfig, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(Date_t(firstRequestDate() + 6500), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTestOneRetry, DecideToStepDownRemotePrimary) {
+ // Confirm that action responses can come back from retries; in this, expect a
+ // StepDownRemotePrimary action.
+
+ // make self primary
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(OpTime(5, 0));
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ ReplSetHeartbeatResponse electedMoreRecentlyResponse;
+ electedMoreRecentlyResponse.noteReplSet();
+ electedMoreRecentlyResponse.setSetName("rs0");
+ electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
+ electedMoreRecentlyResponse.setElectable(true);
+ electedMoreRecentlyResponse.setElectionTime(OpTime(3, 0));
+ electedMoreRecentlyResponse.setVersion(5);
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + 4500, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
+ OpTime(0, 0)); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, action.getAction());
+ ASSERT_EQUALS(1, action.getPrimaryConfigIndex());
+ ASSERT_EQUALS(Date_t(firstRequestDate() + 6500), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTestOneRetry, DecideToStepDownSelf) {
+ // Confirm that action responses can come back from retries; in this, expect a StepDownSelf
+ // action.
+
+ // acknowledge the other member so that we see a majority
+ HeartbeatResponseAction action =
+ receiveDownHeartbeat(HostAndPort("host3"), "rs0", OpTime(100, 0));
+ ASSERT_NO_ACTION(action.getAction());
+
+ // make us PRIMARY
+ makeSelfPrimary();
+
+ ReplSetHeartbeatResponse electedMoreRecentlyResponse;
+ electedMoreRecentlyResponse.noteReplSet();
+ electedMoreRecentlyResponse.setSetName("rs0");
+ electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
+ electedMoreRecentlyResponse.setElectable(false);
+ electedMoreRecentlyResponse.setElectionTime(OpTime(10, 0));
+ electedMoreRecentlyResponse.setVersion(5);
+ action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + 4500, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
+ OpTime(0, 0)); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, action.getAction());
+ ASSERT_EQUALS(0, action.getPrimaryConfigIndex());
+ ASSERT_EQUALS(Date_t(firstRequestDate() + 6500), action.getNextHeartbeatStartDate());
+ // Doesn't actually do the stepdown until stepDownIfPending is called
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ ASSERT_TRUE(getTopoCoord().stepDownIfPending());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTestOneRetry, DecideToStartElection) {
+ // Confirm that action responses can come back from retries; in this, expect a StartElection
+ // action.
+
+ // acknowledge the other member so that we see a majority
+ OpTime election = OpTime(400, 0);
+ OpTime lastOpTimeApplied = OpTime(300, 0);
+ HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(action.getAction());
+
+ // make sure we are electable
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ ReplSetHeartbeatResponse startElectionResponse;
+ startElectionResponse.noteReplSet();
+ startElectionResponse.setSetName("rs0");
+ startElectionResponse.setState(MemberState::RS_SECONDARY);
+ startElectionResponse.setElectable(true);
+ startElectionResponse.setVersion(5);
+ action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + 4500, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(startElectionResponse),
+ election);
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(Date_t(firstRequestDate() + 6500), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTestTwoRetries, HeartbeatRetriesAtMostTwice) {
+ // Confirm that the topology coordinator attempts to retry a failed heartbeat two times
+ // after initial failure, assuming that the heartbeat timeout (set to 5 seconds in the
+ // fixture) has not expired.
+ //
+ // Failed heartbeats propose taking no action, other than scheduling the next heartbeat. We
+ // can detect a retry vs the next regularly scheduled heartbeat because retries are
+ // scheduled immediately, while subsequent heartbeats are scheduled after the hard-coded
+ // heartbeat interval of 2 seconds.
+
+ // Second retry fails at t + 4800ms
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + 4800, // 4.8 of the 5 seconds elapsed; could still retry.
+ Milliseconds(100), // Spent 0.1 of the 0.3 seconds in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"),
+ OpTime(0, 0)); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ // Because this is the second retry, rather than retry again, we expect to wait for the
+ // heartbeat interval of 2 seconds to elapse.
+ ASSERT_EQUALS(Date_t(firstRequestDate() + 6800), action.getNextHeartbeatStartDate());
+
+ // Ensure a third failed heartbeat caused the node to be marked down
+ BSONObjBuilder statusBuilder;
+ Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
+ getTopoCoord().prepareStatusResponse(
+ cbData(), firstRequestDate() + 4900, 10, OpTime(100, 0), &statusBuilder, &resultStatus);
+ ASSERT_OK(resultStatus);
+ BSONObj rsStatus = statusBuilder.obj();
+ std::vector<BSONElement> memberArray = rsStatus["members"].Array();
+ BSONObj member1Status = memberArray[1].Obj();
+
+ ASSERT_EQUALS(1, member1Status["_id"].Int());
+ ASSERT_EQUALS(0, member1Status["health"].Double());
+}
+
+TEST_F(HeartbeatResponseTestTwoRetries, DecideToStepDownRemotePrimary) {
+ // Confirm that action responses can come back from retries; in this, expect a
+ // StepDownRemotePrimary action.
+
+ // make self primary
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(OpTime(5, 0));
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ ReplSetHeartbeatResponse electedMoreRecentlyResponse;
+ electedMoreRecentlyResponse.noteReplSet();
+ electedMoreRecentlyResponse.setSetName("rs0");
+ electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
+ electedMoreRecentlyResponse.setElectable(true);
+ electedMoreRecentlyResponse.setElectionTime(OpTime(3, 0));
+ electedMoreRecentlyResponse.setVersion(5);
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + 5000, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
+ OpTime(0, 0)); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, action.getAction());
+ ASSERT_EQUALS(1, action.getPrimaryConfigIndex());
+ ASSERT_EQUALS(Date_t(firstRequestDate() + 7000), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTestTwoRetries, DecideToStepDownSelf) {
+ // Confirm that action responses can come back from retries; in this, expect a StepDownSelf
+ // action.
+
+ // acknowledge the other member so that we see a majority
+ HeartbeatResponseAction action =
+ receiveDownHeartbeat(HostAndPort("host3"), "rs0", OpTime(100, 0));
+ ASSERT_NO_ACTION(action.getAction());
+
+ // make us PRIMARY
+ makeSelfPrimary();
+
+ ReplSetHeartbeatResponse electedMoreRecentlyResponse;
+ electedMoreRecentlyResponse.noteReplSet();
+ electedMoreRecentlyResponse.setSetName("rs0");
+ electedMoreRecentlyResponse.setState(MemberState::RS_PRIMARY);
+ electedMoreRecentlyResponse.setElectable(false);
+ electedMoreRecentlyResponse.setElectionTime(OpTime(10, 0));
+ electedMoreRecentlyResponse.setVersion(5);
+ action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + 5000, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse),
+ OpTime(0, 0)); // We've never applied anything.
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, action.getAction());
+ ASSERT_EQUALS(0, action.getPrimaryConfigIndex());
+ ASSERT_EQUALS(Date_t(firstRequestDate() + 7000), action.getNextHeartbeatStartDate());
+ // Doesn't actually do the stepdown until stepDownIfPending is called
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ ASSERT_TRUE(getTopoCoord().stepDownIfPending());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTestTwoRetries, DecideToStartElection) {
+ // Confirm that action responses can come back from retries; in this, expect a StartElection
+ // action.
+
+ // acknowledge the other member so that we see a majority
+ OpTime election = OpTime(400, 0);
+ OpTime lastOpTimeApplied = OpTime(300, 0);
+ HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(action.getAction());
+
+ // make sure we are electable
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ ReplSetHeartbeatResponse startElectionResponse;
+ startElectionResponse.noteReplSet();
+ startElectionResponse.setSetName("rs0");
+ startElectionResponse.setState(MemberState::RS_SECONDARY);
+ startElectionResponse.setElectable(true);
+ startElectionResponse.setVersion(5);
+ action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + 5000, // Time is left.
+ Milliseconds(400), // Spent 0.4 of the 0.5 second in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(startElectionResponse),
+ election);
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(Date_t(firstRequestDate() + 7000), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTest, HeartbeatTimeoutSuppressesFirstRetry) {
+ // Confirm that the topology coordinator does not schedule an immediate heartbeat retry if
+ // the heartbeat timeout period expired before the initial request completed.
+
+ HostAndPort target("host2", 27017);
+ Date_t firstRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T13:00Z"));
+
+ // Initial heartbeat request prepared, at t + 0.
+ std::pair<ReplSetHeartbeatArgs, Milliseconds> request =
+ getTopoCoord().prepareHeartbeatRequest(firstRequestDate, "rs0", target);
+ // 5 seconds to successfully complete the heartbeat before the timeout expires.
+ ASSERT_EQUALS(5000, request.second.total_milliseconds());
+
+ // Initial heartbeat request fails at t + 5000ms
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate + 5000, // Entire heartbeat period elapsed; no retry allowed.
+ Milliseconds(4990), // Spent 4.99 of the 4 seconds in the network.
+ target,
+ StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"),
+ OpTime(0, 0)); // We've never applied anything.
+
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ // Because the heartbeat timed out, we'll retry in 2 seconds.
+ ASSERT_EQUALS(Date_t(firstRequestDate + 7000), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTestOneRetry, HeartbeatTimeoutSuppressesSecondRetry) {
+ // Confirm that the topology coordinator does not schedule an second heartbeat retry if
+ // the heartbeat timeout period expired before the first retry completed.
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + 5010, // Entire heartbeat period elapsed; no retry allowed.
+ Milliseconds(1000), // Spent 1 of the 1.01 seconds in the network.
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"),
+ OpTime(0, 0)); // We've never applied anything.
+
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ // Because the heartbeat timed out, we'll retry in 2 seconds.
+ ASSERT_EQUALS(Date_t(firstRequestDate() + 7010), action.getNextHeartbeatStartDate());
+}
+
+TEST_F(HeartbeatResponseTestTwoRetries, HeartbeatThreeNonconsecutiveFailures) {
+ // Confirm that the topology coordinator does not mark a node down on three
+ // nonconsecutive heartbeat failures.
+ ReplSetHeartbeatResponse response;
+ response.noteReplSet();
+ response.setSetName("rs0");
+ response.setState(MemberState::RS_SECONDARY);
+ response.setElectable(true);
+ response.setVersion(5);
+
+ // successful response (third response due to the two failures in setUp())
+ HeartbeatResponseAction action =
+ getTopoCoord().processHeartbeatResponse(firstRequestDate() + 4500,
+ Milliseconds(400),
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(response),
+ OpTime(0, 0)); // We've never applied anything.
+
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ // Because the heartbeat succeeded, we'll retry in 2 seconds.
+ ASSERT_EQUALS(Date_t(firstRequestDate() + 6500), action.getNextHeartbeatStartDate());
+
+ // request next heartbeat
+ getTopoCoord().prepareHeartbeatRequest(firstRequestDate() + 6500, "rs0", target());
+ // third failed response
+ action = getTopoCoord().processHeartbeatResponse(
+ firstRequestDate() + 7100,
+ Milliseconds(400),
+ target(),
+ StatusWith<ReplSetHeartbeatResponse>(Status(ErrorCodes::HostUnreachable, "")),
+ OpTime(0, 0)); // We've never applied anything.
+
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+
+ // Ensure a third nonconsecutive heartbeat failure did not cause the node to be marked down
+ BSONObjBuilder statusBuilder;
+ Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result");
+ getTopoCoord().prepareStatusResponse(
+ cbData(), firstRequestDate() + 7000, 600, OpTime(100, 0), &statusBuilder, &resultStatus);
+ ASSERT_OK(resultStatus);
+ BSONObj rsStatus = statusBuilder.obj();
+ std::vector<BSONElement> memberArray = rsStatus["members"].Array();
+ BSONObj member1Status = memberArray[1].Obj();
+
+ ASSERT_EQUALS(1, member1Status["_id"].Int());
+ ASSERT_EQUALS(1, member1Status["health"].Double());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataNewPrimary) {
+ OpTime election = OpTime(5, 0);
+ OpTime lastOpTimeApplied = OpTime(3, 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesNewOneOlder) {
+ OpTime election = OpTime(5, 0);
+ OpTime election2 = OpTime(4, 0);
+ OpTime lastOpTimeApplied = OpTime(3, 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election2,
+ election,
+ lastOpTimeApplied);
+ // second primary does not change primary index
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesNewOneNewer) {
+ OpTime election = OpTime(4, 0);
+ OpTime election2 = OpTime(5, 0);
+ OpTime lastOpTimeApplied = OpTime(3, 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election2,
+ election,
+ lastOpTimeApplied);
+ // second primary does not change primary index
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesIncludingMeNewOneOlder) {
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(OpTime(5, 0));
+
+ OpTime election = OpTime(4, 0);
+ OpTime lastOpTimeApplied = OpTime(3, 0);
+
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, nextAction.getAction());
+ ASSERT_EQUALS(1, nextAction.getPrimaryConfigIndex());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataStepDownPrimaryForHighPriorityFreshNode) {
+ // In this test, the Topology coordinator sees a PRIMARY ("host2") and then sees a higher
+ // priority and similarly fresh node ("host3"). However, since the coordinator's node
+ // (host1) is not the higher priority node, it takes no action.
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 6 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"
+ << "priority" << 3))
+ << "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ OpTime election = OpTime(0, 0);
+ OpTime lastOpTimeApplied = OpTime(13, 0);
+ OpTime slightlyLessFreshLastOpTimeApplied = OpTime(3, 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ slightlyLessFreshLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(HeartbeatResponseAction::NoAction, nextAction.getAction());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataStepDownSelfForHighPriorityFreshNode) {
+ // In this test, the Topology coordinator becomes PRIMARY and then sees a higher priority
+ // and equally fresh node ("host3"). As a result it responds with a StepDownSelf action.
+ //
+ // Despite having stepped down, we should remain electable, in order to dissuade lower
+ // priority nodes from standing for election.
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 6 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"
+ << "priority" << 3))
+ << "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+ OpTime election = OpTime(1000, 0);
+
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(election);
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(
+ HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election, election);
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction());
+ ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex());
+
+ // Process a heartbeat response to confirm that this node, which is no longer primary,
+ // still tells other nodes that it is electable. This will stop lower priority nodes
+ // from standing for election.
+ ReplSetHeartbeatArgs hbArgs;
+ hbArgs.setSetName("rs0");
+ hbArgs.setProtocolVersion(1);
+ hbArgs.setConfigVersion(6);
+ hbArgs.setSenderId(1);
+ hbArgs.setSenderHost(HostAndPort("host3", 27017));
+ ReplSetHeartbeatResponse hbResp;
+ ASSERT_OK(getTopoCoord().prepareHeartbeatResponse(now(), hbArgs, "rs0", election, &hbResp));
+ ASSERT(!hbResp.hasIsElectable() || hbResp.isElectable()) << hbResp.toBSON().toString();
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataDoNotStepDownSelfForHighPriorityStaleNode) {
+ // In this test, the Topology coordinator becomes PRIMARY and then sees a higher priority
+ // and stale node ("host3"). As a result it responds with NoAction.
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 6 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"
+ << "priority" << 3))
+ << "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+ OpTime election = OpTime(1000, 0);
+ OpTime staleTime = OpTime(0, 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(election);
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(
+ HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, staleTime, election);
+ ASSERT_NO_ACTION(nextAction.getAction());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataDoNotStepDownPrimaryForHighPriorityStaleNode) {
+ // In this test, the Topology coordinator sees a PRIMARY ("host2") and then sees a higher
+ // priority and stale node ("host3"). As a result it responds with NoAction.
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 6 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"
+ << "priority" << 3))
+ << "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ OpTime election = OpTime(1000, 0);
+ OpTime stale = OpTime(0, 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(
+ HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election, election);
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(
+ HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, stale, election);
+ ASSERT_NO_ACTION(nextAction.getAction());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataTwoPrimariesIncludingMeNewOneNewer) {
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(OpTime(2, 0));
+
+ OpTime election = OpTime(4, 0);
+ OpTime lastOpTimeApplied = OpTime(3, 0);
+
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction());
+ ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex());
+ // Doesn't actually do the stepdown until stepDownIfPending is called
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- TEST_F(HeartbeatResponseTest, ElectionVoteForAnotherNodeBeforeFreshnessReturns) {
- // In this test, the TopologyCoordinator goes through the steps of an election. However,
- // before its freshness round ends, it receives a fresh command followed by an elect command
- // from another node, both of which it responds positively to. The TopologyCoordinator's
- // freshness round then concludes successfully, but it fails to vote for itself, since it
- // recently voted for another node.
-
- // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
- // 2. "host2" goes down, triggering an election.
- // 3. "host3" sends a fresh command, which the TopologyCoordinator responds to positively.
- // 4. "host3" sends an elect command, which the TopologyCoordinator responds to positively.
- // 5. The TopologyCoordinator's concludes its freshness round successfully.
- // 6. The TopologyCoordinator loses the election.
-
- setSelfMemberState(MemberState::RS_SECONDARY);
- now() += 30000; // we need to be more than LastVote::leaseTime from the start of time or
- // else some Date_t math goes horribly awry
-
- OpTime election = OpTime(0,0);
- OpTime lastOpTimeApplied = OpTime(100,0);
- OpTime fresherOpApplied = OpTime(200,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // candidate time!
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- OpTime originalElectionTime = getTopoCoord().getElectionTime();
- OID originalElectionId = getTopoCoord().getElectionId();
- // prepare an incoming fresh command
- ReplicationCoordinator::ReplSetFreshArgs freshArgs;
- freshArgs.setName = "rs0";
- freshArgs.cfgver = 5;
- freshArgs.id = 2;
- freshArgs.who = HostAndPort("host3");
- freshArgs.opTime = fresherOpApplied;
-
- BSONObjBuilder freshResponseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- getTopoCoord().prepareFreshResponse(
- freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result);
- BSONObj response = freshResponseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(lastOpTimeApplied, OpTime(response["opTime"].timestampValue()));
- ASSERT_FALSE(response["fresher"].trueValue());
- ASSERT_FALSE(response["veto"].trueValue());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- // make sure incoming fresh commands do not change electionTime and electionId
- ASSERT_EQUALS(originalElectionTime, getTopoCoord().getElectionTime());
- ASSERT_EQUALS(originalElectionId, getTopoCoord().getElectionId());
-
- // an elect command comes in
- ReplicationCoordinator::ReplSetElectArgs electArgs;
- OID round = OID::gen();
- electArgs.set = "rs0";
- electArgs.round = round;
- electArgs.cfgver = 5;
- electArgs.whoid = 2;
-
- BSONObjBuilder electResponseBuilder;
- result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(
- electArgs, now()++, OpTime(), &electResponseBuilder, &result);
- stopCapturingLogMessages();
- response = electResponseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(1, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("voting yea for host3:27017 (2)"));
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- // make sure incoming elect commands do not change electionTime and electionId
- ASSERT_EQUALS(originalElectionTime, getTopoCoord().getElectionTime());
- ASSERT_EQUALS(originalElectionId, getTopoCoord().getElectionId());
-
- // now voteForSelf as though we received all our fresh responses
- ASSERT_FALSE(getTopoCoord().voteForMyself(now()++));
-
- // receive a heartbeat indicating the other node was elected
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(2, getCurrentPrimaryIndex());
- // make sure seeing a new primary does not change electionTime and electionId
- ASSERT_EQUALS(originalElectionTime, getTopoCoord().getElectionTime());
- ASSERT_EQUALS(originalElectionId, getTopoCoord().getElectionId());
-
- // now lose election and ensure _electionTime and _electionId are 0'd out
- getTopoCoord().processLoseElection();
- ASSERT_EQUALS(OID(), getTopoCoord().getElectionId());
- ASSERT_EQUALS(OpTime(0,0), getTopoCoord().getElectionTime());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(2, getCurrentPrimaryIndex());
- }
+ ASSERT_TRUE(getTopoCoord().stepDownIfPending());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+}
- TEST_F(HeartbeatResponseTest, ElectionRespondToFreshBeforeOurFreshnessReturns) {
- // In this test, the TopologyCoordinator goes through the steps of an election. However,
- // before its freshness round ends, the TopologyCoordinator receives a fresh command from
- // another node, which it responds positively to. Its freshness then ends successfully and
- // it wins the election. The other node's elect command then comes in and is responded to
- // negatively, maintaining the TopologyCoordinator's PRIMARY state.
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownNoMajority) {
+ setSelfMemberState(MemberState::RS_SECONDARY);
- // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
- // 2. "host2" goes down, triggering an election.
- // 3. "host3" sends a fresh command, which the TopologyCoordinator responds to positively.
- // 4. The TopologyCoordinator concludes its freshness round successfully and wins
- // the election.
- // 5. "host3" sends an elect command, which the TopologyCoordinator responds to negatively.
+ OpTime election = OpTime(400, 0);
+ OpTime lastOpTimeApplied = OpTime(300, 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
- now() += 30000; // we need to be more than LastVote::leaseTime from the start of time or
- // else some Date_t math goes horribly awry
-
- OpTime election = OpTime(0,0);
- OpTime lastOpTimeApplied = OpTime(100,0);
- OpTime fresherLastOpTimeApplied = OpTime(200,0);
- OID round = OID::gen();
- OID remoteRound = OID::gen();
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // candidate time!
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // prepare an incoming fresh command
- ReplicationCoordinator::ReplSetFreshArgs freshArgs;
- freshArgs.setName = "rs0";
- freshArgs.cfgver = 5;
- freshArgs.id = 2;
- freshArgs.who = HostAndPort("host3");
- freshArgs.opTime = fresherLastOpTimeApplied;
-
- BSONObjBuilder freshResponseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- getTopoCoord().prepareFreshResponse(
- freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result);
- BSONObj response = freshResponseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(lastOpTimeApplied, OpTime(response["opTime"].timestampValue()));
- ASSERT_FALSE(response["fresher"].trueValue());
- ASSERT_FALSE(response["veto"].trueValue());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // now voteForSelf as though we received all our fresh responses
- ASSERT_TRUE(getTopoCoord().voteForMyself(now()++));
- // now win election and ensure _electionId and _electionTime are set properly
- getTopoCoord().processWinElection(round, election);
- ASSERT_EQUALS(round, getTopoCoord().getElectionId());
- ASSERT_EQUALS(election, getTopoCoord().getElectionTime());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- // an elect command comes in
- ReplicationCoordinator::ReplSetElectArgs electArgs;
- electArgs.set = "rs0";
- electArgs.round = remoteRound;
- electArgs.cfgver = 5;
- electArgs.whoid = 2;
-
- BSONObjBuilder electResponseBuilder;
- result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(
- electArgs, now()++, OpTime(), &electResponseBuilder, &result);
- stopCapturingLogMessages();
- response = electResponseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(remoteRound, response["round"].OID());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- }
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButNoPriority) {
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"
+ << "priority" << 0)
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+
+ OpTime election = OpTime(400, 0);
+ OpTime lastOpTimeApplied = OpTime(300, 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIAmStarting) {
+ setSelfMemberState(MemberState::RS_STARTUP);
+
+ OpTime election = OpTime(400, 0);
+ OpTime lastOpTimeApplied = OpTime(300, 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIAmRecovering) {
+ setSelfMemberState(MemberState::RS_RECOVERING);
+
+ OpTime election = OpTime(400, 0);
+ OpTime lastOpTimeApplied = OpTime(300, 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- TEST_F(HeartbeatResponseTest, ElectionCompleteElectionThenReceiveFresh) {
- // In this test, the TopologyCoordinator goes through the steps of an election. After
- // being successfully elected, a fresher node sends a fresh command, which the
- // TopologyCoordinator responds positively to. The fresher node then sends an elect command,
- // which the Topology coordinator negatively to since the TopologyCoordinator just elected
- // itself.
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
- // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
- // 2. "host2" goes down, triggering an election.
- // 3. The TopologyCoordinator concludes its freshness round successfully and wins
- // the election.
- // 4. "host3" sends a fresh command, which the TopologyCoordinator responds to positively.
- // 5. "host3" sends an elect command, which the TopologyCoordinator responds to negatively.
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIHaveStepdownWait) {
+ setSelfMemberState(MemberState::RS_SECONDARY);
- setSelfMemberState(MemberState::RS_SECONDARY);
- now() += 30000; // we need to be more than LastVote::leaseTime from the start of time or
- // else some Date_t math goes horribly awry
-
- OpTime election = OpTime(0,0);
- OpTime lastOpTimeApplied = OpTime(100,0);
- OpTime fresherLastOpTimeApplied = OpTime(200,0);
- OID round = OID::gen();
- OID remoteRound = OID::gen();
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // candidate time!
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // now voteForSelf as though we received all our fresh responses
- ASSERT_TRUE(getTopoCoord().voteForMyself(now()++));
- // now win election
- getTopoCoord().processWinElection(round, election);
- ASSERT_EQUALS(0, getTopoCoord().getCurrentPrimaryIndex());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
-
- // prepare an incoming fresh command
- ReplicationCoordinator::ReplSetFreshArgs freshArgs;
- freshArgs.setName = "rs0";
- freshArgs.cfgver = 5;
- freshArgs.id = 2;
- freshArgs.who = HostAndPort("host3");
- freshArgs.opTime = fresherLastOpTimeApplied;
-
- BSONObjBuilder freshResponseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- getTopoCoord().prepareFreshResponse(
- freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result);
- BSONObj response = freshResponseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(lastOpTimeApplied, OpTime(response["opTime"].timestampValue()));
- ASSERT_FALSE(response["fresher"].trueValue());
- ASSERT_TRUE(response["veto"].trueValue()) << response["errmsg"];
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- // an elect command comes in
- ReplicationCoordinator::ReplSetElectArgs electArgs;
- electArgs.set = "rs0";
- electArgs.round = remoteRound;
- electArgs.cfgver = 5;
- electArgs.whoid = 2;
-
- BSONObjBuilder electResponseBuilder;
- result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(
- electArgs, now()++, OpTime(), &electResponseBuilder, &result);
- stopCapturingLogMessages();
- response = electResponseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(remoteRound, response["round"].OID());
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
- }
+ OpTime election = OpTime(400, 0);
+ OpTime lastOpTimeApplied = OpTime(300, 0);
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityOfVotersUp) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017" << "votes" << 0) <<
- BSON("_id" << 3 << "host" << "host4:27017" << "votes" << 0) <<
- BSON("_id" << 4 << "host" << "host5:27017" << "votes" << 0) <<
- BSON("_id" << 5 << "host" << "host6:27017" << "votes" << 0) <<
- BSON("_id" << 6 << "host" << "host7:27017")) <<
- "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // freeze node to set stepdown wait
+ BSONObjBuilder response;
+ getTopoCoord().prepareFreezeResponse(now()++, 20, &response);
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityButIAmArbiter) {
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+
+ OpTime election = OpTime(400, 0);
+ OpTime lastOpTimeApplied = OpTime(300, 0);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajority) {
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ OpTime election = OpTime(400, 0);
+ OpTime lastOpTimeApplied = OpTime(399, 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, ElectionStartElectionWhileCandidate) {
+ // In this test, the TopologyCoordinator goes through the steps of a successful election,
+ // during which it receives a heartbeat that would normally trigger it to become a candidate
+ // and respond with a StartElection HeartbeatResponseAction. However, since it is already in
+ // candidate state, it responds with a NoAction HeartbeatResponseAction. Then finishes by
+ // being winning the election.
+
+ // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
+ // 2. "host2" goes down, triggering an election.
+ // 3. "host2" comes back, which would normally trigger election, but since the
+ // TopologyCoordinator is already in candidate mode, does not.
+ // 4. TopologyCoordinator concludes its freshness round successfully and wins the election.
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ now() += 30000; // we need to be more than LastVote::leaseTime from the start of time or
+ // else some Date_t math goes horribly awry
+
+ OpTime election = OpTime(0, 0);
+ OpTime lastOpTimeApplied = OpTime(130, 0);
+ OID round = OID::gen();
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // candidate time!
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // see the downed node as SECONDARY and decide to take no action, but are still a candidate
+ nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+
+ // normally this would trigger StartElection, but we are already a candidate
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // now voteForSelf as though we received all our fresh responses
+ ASSERT_TRUE(getTopoCoord().voteForMyself(now()++));
+
+ // now win election and ensure _electionId and _electionTime are set properly
+ getTopoCoord().processWinElection(round, election);
+ ASSERT_EQUALS(round, getTopoCoord().getElectionId());
+ ASSERT_EQUALS(election, getTopoCoord().getElectionTime());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTest, ElectionVoteForAnotherNodeBeforeFreshnessReturns) {
+ // In this test, the TopologyCoordinator goes through the steps of an election. However,
+ // before its freshness round ends, it receives a fresh command followed by an elect command
+ // from another node, both of which it responds positively to. The TopologyCoordinator's
+ // freshness round then concludes successfully, but it fails to vote for itself, since it
+ // recently voted for another node.
+
+ // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
+ // 2. "host2" goes down, triggering an election.
+ // 3. "host3" sends a fresh command, which the TopologyCoordinator responds to positively.
+ // 4. "host3" sends an elect command, which the TopologyCoordinator responds to positively.
+ // 5. The TopologyCoordinator's concludes its freshness round successfully.
+ // 6. The TopologyCoordinator loses the election.
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ now() += 30000; // we need to be more than LastVote::leaseTime from the start of time or
+ // else some Date_t math goes horribly awry
+
+ OpTime election = OpTime(0, 0);
+ OpTime lastOpTimeApplied = OpTime(100, 0);
+ OpTime fresherOpApplied = OpTime(200, 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // candidate time!
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ OpTime originalElectionTime = getTopoCoord().getElectionTime();
+ OID originalElectionId = getTopoCoord().getElectionId();
+ // prepare an incoming fresh command
+ ReplicationCoordinator::ReplSetFreshArgs freshArgs;
+ freshArgs.setName = "rs0";
+ freshArgs.cfgver = 5;
+ freshArgs.id = 2;
+ freshArgs.who = HostAndPort("host3");
+ freshArgs.opTime = fresherOpApplied;
+
+ BSONObjBuilder freshResponseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ getTopoCoord().prepareFreshResponse(
+ freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result);
+ BSONObj response = freshResponseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(lastOpTimeApplied, OpTime(response["opTime"].timestampValue()));
+ ASSERT_FALSE(response["fresher"].trueValue());
+ ASSERT_FALSE(response["veto"].trueValue());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ // make sure incoming fresh commands do not change electionTime and electionId
+ ASSERT_EQUALS(originalElectionTime, getTopoCoord().getElectionTime());
+ ASSERT_EQUALS(originalElectionId, getTopoCoord().getElectionId());
+
+ // an elect command comes in
+ ReplicationCoordinator::ReplSetElectArgs electArgs;
+ OID round = OID::gen();
+ electArgs.set = "rs0";
+ electArgs.round = round;
+ electArgs.cfgver = 5;
+ electArgs.whoid = 2;
+
+ BSONObjBuilder electResponseBuilder;
+ result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ electArgs, now()++, OpTime(), &electResponseBuilder, &result);
+ stopCapturingLogMessages();
+ response = electResponseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(1, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("voting yea for host3:27017 (2)"));
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ // make sure incoming elect commands do not change electionTime and electionId
+ ASSERT_EQUALS(originalElectionTime, getTopoCoord().getElectionTime());
+ ASSERT_EQUALS(originalElectionId, getTopoCoord().getElectionId());
+
+ // now voteForSelf as though we received all our fresh responses
+ ASSERT_FALSE(getTopoCoord().voteForMyself(now()++));
+
+ // receive a heartbeat indicating the other node was elected
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(2, getCurrentPrimaryIndex());
+ // make sure seeing a new primary does not change electionTime and electionId
+ ASSERT_EQUALS(originalElectionTime, getTopoCoord().getElectionTime());
+ ASSERT_EQUALS(originalElectionId, getTopoCoord().getElectionId());
+
+ // now lose election and ensure _electionTime and _electionId are 0'd out
+ getTopoCoord().processLoseElection();
+ ASSERT_EQUALS(OID(), getTopoCoord().getElectionId());
+ ASSERT_EQUALS(OpTime(0, 0), getTopoCoord().getElectionTime());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(2, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTest, ElectionRespondToFreshBeforeOurFreshnessReturns) {
+ // In this test, the TopologyCoordinator goes through the steps of an election. However,
+ // before its freshness round ends, the TopologyCoordinator receives a fresh command from
+ // another node, which it responds positively to. Its freshness then ends successfully and
+ // it wins the election. The other node's elect command then comes in and is responded to
+ // negatively, maintaining the TopologyCoordinator's PRIMARY state.
+
+ // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
+ // 2. "host2" goes down, triggering an election.
+ // 3. "host3" sends a fresh command, which the TopologyCoordinator responds to positively.
+ // 4. The TopologyCoordinator concludes its freshness round successfully and wins
+ // the election.
+ // 5. "host3" sends an elect command, which the TopologyCoordinator responds to negatively.
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ now() += 30000; // we need to be more than LastVote::leaseTime from the start of time or
+ // else some Date_t math goes horribly awry
+
+ OpTime election = OpTime(0, 0);
+ OpTime lastOpTimeApplied = OpTime(100, 0);
+ OpTime fresherLastOpTimeApplied = OpTime(200, 0);
+ OID round = OID::gen();
+ OID remoteRound = OID::gen();
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // candidate time!
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // prepare an incoming fresh command
+ ReplicationCoordinator::ReplSetFreshArgs freshArgs;
+ freshArgs.setName = "rs0";
+ freshArgs.cfgver = 5;
+ freshArgs.id = 2;
+ freshArgs.who = HostAndPort("host3");
+ freshArgs.opTime = fresherLastOpTimeApplied;
+
+ BSONObjBuilder freshResponseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ getTopoCoord().prepareFreshResponse(
+ freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result);
+ BSONObj response = freshResponseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(lastOpTimeApplied, OpTime(response["opTime"].timestampValue()));
+ ASSERT_FALSE(response["fresher"].trueValue());
+ ASSERT_FALSE(response["veto"].trueValue());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // now voteForSelf as though we received all our fresh responses
+ ASSERT_TRUE(getTopoCoord().voteForMyself(now()++));
+ // now win election and ensure _electionId and _electionTime are set properly
+ getTopoCoord().processWinElection(round, election);
+ ASSERT_EQUALS(round, getTopoCoord().getElectionId());
+ ASSERT_EQUALS(election, getTopoCoord().getElectionTime());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ // an elect command comes in
+ ReplicationCoordinator::ReplSetElectArgs electArgs;
+ electArgs.set = "rs0";
+ electArgs.round = remoteRound;
+ electArgs.cfgver = 5;
+ electArgs.whoid = 2;
+
+ BSONObjBuilder electResponseBuilder;
+ result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ electArgs, now()++, OpTime(), &electResponseBuilder, &result);
+ stopCapturingLogMessages();
+ response = electResponseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(remoteRound, response["round"].OID());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTest, ElectionCompleteElectionThenReceiveFresh) {
+ // In this test, the TopologyCoordinator goes through the steps of an election. After
+ // being successfully elected, a fresher node sends a fresh command, which the
+ // TopologyCoordinator responds positively to. The fresher node then sends an elect command,
+ // which the Topology coordinator negatively to since the TopologyCoordinator just elected
+ // itself.
+
+ // 1. All nodes heartbeat to indicate that they are up and that "host2" is PRIMARY.
+ // 2. "host2" goes down, triggering an election.
+ // 3. The TopologyCoordinator concludes its freshness round successfully and wins
+ // the election.
+ // 4. "host3" sends a fresh command, which the TopologyCoordinator responds to positively.
+ // 5. "host3" sends an elect command, which the TopologyCoordinator responds to negatively.
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ now() += 30000; // we need to be more than LastVote::leaseTime from the start of time or
+ // else some Date_t math goes horribly awry
+
+ OpTime election = OpTime(0, 0);
+ OpTime lastOpTimeApplied = OpTime(100, 0);
+ OpTime fresherLastOpTimeApplied = OpTime(200, 0);
+ OID round = OID::gen();
+ OID remoteRound = OID::gen();
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // candidate time!
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // now voteForSelf as though we received all our fresh responses
+ ASSERT_TRUE(getTopoCoord().voteForMyself(now()++));
+ // now win election
+ getTopoCoord().processWinElection(round, election);
+ ASSERT_EQUALS(0, getTopoCoord().getCurrentPrimaryIndex());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+
+ // prepare an incoming fresh command
+ ReplicationCoordinator::ReplSetFreshArgs freshArgs;
+ freshArgs.setName = "rs0";
+ freshArgs.cfgver = 5;
+ freshArgs.id = 2;
+ freshArgs.who = HostAndPort("host3");
+ freshArgs.opTime = fresherLastOpTimeApplied;
+
+ BSONObjBuilder freshResponseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ getTopoCoord().prepareFreshResponse(
+ freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result);
+ BSONObj response = freshResponseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(lastOpTimeApplied, OpTime(response["opTime"].timestampValue()));
+ ASSERT_FALSE(response["fresher"].trueValue());
+ ASSERT_TRUE(response["veto"].trueValue()) << response["errmsg"];
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ // an elect command comes in
+ ReplicationCoordinator::ReplSetElectArgs electArgs;
+ electArgs.set = "rs0";
+ electArgs.round = remoteRound;
+ electArgs.cfgver = 5;
+ electArgs.whoid = 2;
+
+ BSONObjBuilder electResponseBuilder;
+ result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(
+ electArgs, now()++, OpTime(), &electResponseBuilder, &result);
+ stopCapturingLogMessages();
+ response = electResponseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(remoteRound, response["round"].OID());
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataPrimaryDownMajorityOfVotersUp) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"
+ << "votes" << 0)
+ << BSON("_id" << 3 << "host"
+ << "host4:27017"
+ << "votes" << 0) << BSON("_id" << 4 << "host"
+ << "host5:27017"
+ << "votes" << 0)
+ << BSON("_id" << 5 << "host"
+ << "host6:27017"
+ << "votes" << 0) << BSON("_id" << 6 << "host"
+ << "host7:27017"))
+ << "settings" << BSON("heartbeatTimeoutSecs" << 5)),
+ 0);
+
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ OpTime election = OpTime(400, 0);
+ OpTime lastOpTimeApplied = OpTime(300, 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+
+ // make sure all non-voting nodes are down, that way we do not have a majority of nodes
+ // but do have a majority of votes since one of two voting members is up and so are we
+ nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveDownHeartbeat(HostAndPort("host4"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveDownHeartbeat(HostAndPort("host5"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveDownHeartbeat(HostAndPort("host6"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveUpHeartbeat(HostAndPort("host7"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataRelinquishPrimaryDueToNodeDisappearing) {
+ // become PRIMARY
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ makeSelfPrimary(OpTime(2, 0));
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ // become aware of other nodes
+ heartbeatFromMember(HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0));
+ heartbeatFromMember(HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0));
+ heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, OpTime(0, 0));
+ heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, OpTime(0, 0));
+
+ // lose that awareness and be sure we are going to stepdown
+ HeartbeatResponseAction nextAction =
+ receiveDownHeartbeat(HostAndPort("host2"), "rs0", OpTime(100, 0));
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", OpTime(100, 0));
+ ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction());
+ ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex());
+ // Doesn't actually do the stepdown until stepDownIfPending is called
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(0, getCurrentPrimaryIndex());
+
+ ASSERT_TRUE(getTopoCoord().stepDownIfPending());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+}
+
+TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataRemoteDoesNotExist) {
+ OpTime election = OpTime(5, 0);
+ OpTime lastOpTimeApplied = OpTime(3, 0);
+
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host9"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ election,
+ lastOpTimeApplied);
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+}
+
+class PrepareElectResponseTest : public TopoCoordTest {
+public:
+ PrepareElectResponseTest()
+ : now(0),
+ round(OID::gen()),
+ cbData(NULL, ReplicationExecutor::CallbackHandle(), Status::OK()) {}
+
+ virtual void setUp() {
+ TopoCoordTest::setUp();
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 10 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "hself")
+ << BSON("_id" << 1 << "host"
+ << "h1") << BSON("_id" << 2 << "host"
+ << "h2"
+ << "priority" << 10)
+ << BSON("_id" << 3 << "host"
+ << "h3"
+ << "priority" << 10))),
0);
-
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- OpTime election = OpTime(400,0);
- OpTime lastOpTimeApplied = OpTime(300,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
-
- // make sure all non-voting nodes are down, that way we do not have a majority of nodes
- // but do have a majority of votes since one of two voting members is up and so are we
- nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction = receiveDownHeartbeat(HostAndPort("host4"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction = receiveDownHeartbeat(HostAndPort("host5"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction = receiveDownHeartbeat(HostAndPort("host6"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction = receiveUpHeartbeat(HostAndPort("host7"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_EQUALS(HeartbeatResponseAction::StartElection, nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataRelinquishPrimaryDueToNodeDisappearing) {
- // become PRIMARY
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- makeSelfPrimary(OpTime(2,0));
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- // become aware of other nodes
- heartbeatFromMember(HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(1,0));
- heartbeatFromMember(HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, OpTime(1,0));
- heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, OpTime(0,0));
- heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, OpTime(0,0));
-
- // lose that awareness and be sure we are going to stepdown
- HeartbeatResponseAction nextAction = receiveDownHeartbeat(HostAndPort("host2"),
- "rs0",
- OpTime(100, 0));
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", OpTime(100, 0));
- ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction());
- ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex());
- // Doesn't actually do the stepdown until stepDownIfPending is called
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(0, getCurrentPrimaryIndex());
-
- ASSERT_TRUE(getTopoCoord().stepDownIfPending());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- }
-
- TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataRemoteDoesNotExist) {
- OpTime election = OpTime(5,0);
- OpTime lastOpTimeApplied = OpTime(3,0);
-
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host9"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- election,
- lastOpTimeApplied);
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- }
-
- class PrepareElectResponseTest : public TopoCoordTest {
- public:
-
- PrepareElectResponseTest() :
- now(0),
- round(OID::gen()),
- cbData(NULL, ReplicationExecutor::CallbackHandle(), Status::OK()) {}
-
- virtual void setUp() {
- TopoCoordTest::setUp();
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 10 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "hself") <<
- BSON("_id" << 1 << "host" << "h1") <<
- BSON("_id" << 2 <<
- "host" << "h2" <<
- "priority" << 10) <<
- BSON("_id" << 3 <<
- "host" << "h3" <<
- "priority" << 10))),
- 0);
- }
-
- protected:
- Date_t now;
- OID round;
- ReplicationExecutor::CallbackData cbData;
- };
-
- TEST_F(PrepareElectResponseTest, ElectResponseIncorrectReplSetName) {
- // Test with incorrect replset name
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "fakeset";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 1;
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(0, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1,
- countLogLinesContaining("received an elect request for 'fakeset' but our "
- "set name is 'rs0'"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- args.set = "rs0";
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseOurConfigStale) {
- // Test with us having a stale config version
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 20;
- args.whoid = 1;
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(0, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1,
- countLogLinesContaining("not voting because our config version is stale"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- args.cfgver = 10;
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseTheirConfigStale) {
- // Test with them having a stale config version
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 5;
- args.whoid = 1;
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1,
- countLogLinesContaining("received stale config version # during election"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- args.cfgver = 10;
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseNonExistentNode) {
- // Test with a non-existent node
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 99;
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("couldn't find member with id 99"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- args.whoid = 1;
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseWeArePrimary) {
- // Test when we are already primary
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 1;
-
- getTopoCoord()._setCurrentPrimaryForTest(0);
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("I am already primary"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- getTopoCoord()._setCurrentPrimaryForTest(-1);
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseSomeoneElseIsPrimary) {
- // Test when someone else is already primary
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 1;
- getTopoCoord()._setCurrentPrimaryForTest(2);
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("h2:27017 is already primary"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- getTopoCoord()._setCurrentPrimaryForTest(-1);
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseNotHighestPriority) {
- // Test trying to elect someone who isn't the highest priority node
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 1;
-
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, jsTime());
-
- BSONObjBuilder responseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(-10000, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("h1:27017 has lower priority than h3:27017"));
-
- // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
- args.whoid = 3;
- BSONObjBuilder responseBuilder2;
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_EQUALS(1, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseHighestPriorityOfLiveNodes) {
- // Test trying to elect someone who isn't the highest priority node, but all higher nodes
- // are down
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 1;
-
- receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
- receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime());
-
- BSONObjBuilder responseBuilder;
- Status result = Status::OK();
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
- stopCapturingLogMessages();
- BSONObj response = responseBuilder.obj();
- ASSERT_EQUALS(1, response["vote"].Int());
- ASSERT_EQUALS(round, response["round"].OID());
- }
-
- TEST_F(PrepareElectResponseTest, ElectResponseValidVotes) {
- // Test a valid vote
- ReplicationCoordinator::ReplSetElectArgs args;
- args.set = "rs0";
- args.round = round;
- args.cfgver = 10;
- args.whoid = 2;
- now = 100;
-
- BSONObjBuilder responseBuilder1;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder1, &result);
- stopCapturingLogMessages();
- BSONObj response1 = responseBuilder1.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(1, response1["vote"].Int());
- ASSERT_EQUALS(round, response1["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("voting yea for h2:27017 (2)"));
-
- // Test what would be a valid vote except that we already voted too recently
- args.whoid = 3;
-
- BSONObjBuilder responseBuilder2;
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args, now, OpTime(), &responseBuilder2, &result);
- stopCapturingLogMessages();
- BSONObj response2 = responseBuilder2.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(0, response2["vote"].Int());
- ASSERT_EQUALS(round, response2["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("voting no for h3:27017; "
- "voted for h2:27017 0 secs ago"));
-
- // Test that after enough time passes the same vote can proceed
- now += 30 * 1000 + 1; // just over 30 seconds later
-
- BSONObjBuilder responseBuilder3;
- startCapturingLogMessages();
- getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder3, &result);
- stopCapturingLogMessages();
- BSONObj response3 = responseBuilder3.obj();
- ASSERT_OK(result);
- ASSERT_EQUALS(1, response3["vote"].Int());
- ASSERT_EQUALS(round, response3["round"].OID());
- ASSERT_EQUALS(1, countLogLinesContaining("voting yea for h3:27017 (3)"));
- }
-
- TEST_F(TopoCoordTest, ElectResponseNotInConfig) {
- ReplicationCoordinator::ReplSetElectArgs args;
- BSONObjBuilder response;
- Status status = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- getTopoCoord().prepareElectResponse(args, now(), OpTime(), &response, &status);
- ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status);
- ASSERT_EQUALS("Cannot participate in election because not initialized", status.reason());
- }
-
- class PrepareFreezeResponseTest : public TopoCoordTest {
- public:
-
- virtual void setUp() {
- TopoCoordTest::setUp();
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017"))),
- 0);
- }
-
- BSONObj prepareFreezeResponse(int duration) {
- BSONObjBuilder response;
- startCapturingLogMessages();
- getTopoCoord().prepareFreezeResponse(now()++, duration, &response);
- stopCapturingLogMessages();
- return response.obj();
- }
- };
-
- TEST_F(PrepareFreezeResponseTest, UnfreezeEvenWhenNotFrozen) {
- BSONObj response = prepareFreezeResponse(0);
- ASSERT_EQUALS("unfreezing", response["info"].String());
- ASSERT_EQUALS(1, countLogLinesContaining("replSet info 'unfreezing'"));
- // 1 instead of 0 because it assigns to "now" in this case
- ASSERT_EQUALS(1LL, getTopoCoord().getStepDownTime().asInt64());
}
- TEST_F(PrepareFreezeResponseTest, FreezeForOneSecond) {
- BSONObj response = prepareFreezeResponse(1);
- ASSERT_EQUALS("you really want to freeze for only 1 second?",
- response["warning"].String());
- ASSERT_EQUALS(1, countLogLinesContaining("replSet info 'freezing' for 1 seconds"));
- // 1001 because "now" was incremented once during initialization + 1000 ms wait
- ASSERT_EQUALS(1001LL, getTopoCoord().getStepDownTime().asInt64());
- }
-
- TEST_F(PrepareFreezeResponseTest, FreezeForManySeconds) {
- BSONObj response = prepareFreezeResponse(20);
- ASSERT_TRUE(response.isEmpty());
- ASSERT_EQUALS(1, countLogLinesContaining("replSet info 'freezing' for 20 seconds"));
- // 20001 because "now" was incremented once during initialization + 20000 ms wait
- ASSERT_EQUALS(20001LL, getTopoCoord().getStepDownTime().asInt64());
- }
-
- TEST_F(PrepareFreezeResponseTest, UnfreezeEvenWhenNotFrozenWhilePrimary) {
- makeSelfPrimary();
- BSONObj response = prepareFreezeResponse(0);
- ASSERT_EQUALS("unfreezing", response["info"].String());
- // doesn't mention being primary in this case for some reason
- ASSERT_EQUALS(0, countLogLinesContaining(
- "replSet info received freeze command but we are primary"));
- // 1 instead of 0 because it assigns to "now" in this case
- ASSERT_EQUALS(1LL, getTopoCoord().getStepDownTime().asInt64());
- }
-
- TEST_F(PrepareFreezeResponseTest, FreezeForOneSecondWhilePrimary) {
- makeSelfPrimary();
- BSONObj response = prepareFreezeResponse(1);
- ASSERT_EQUALS("you really want to freeze for only 1 second?",
- response["warning"].String());
- ASSERT_EQUALS(1, countLogLinesContaining(
- "replSet info received freeze command but we are primary"));
- ASSERT_EQUALS(0LL, getTopoCoord().getStepDownTime().asInt64());
- }
-
- TEST_F(PrepareFreezeResponseTest, FreezeForManySecondsWhilePrimary) {
- makeSelfPrimary();
- BSONObj response = prepareFreezeResponse(20);
- ASSERT_TRUE(response.isEmpty());
- ASSERT_EQUALS(1, countLogLinesContaining(
- "replSet info received freeze command but we are primary"));
- ASSERT_EQUALS(0LL, getTopoCoord().getStepDownTime().asInt64());
- }
-
- TEST_F(TopoCoordTest, UnfreezeWhileLoneNode) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 5 <<
- "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "host1:27017"))),
+protected:
+ Date_t now;
+ OID round;
+ ReplicationExecutor::CallbackData cbData;
+};
+
+TEST_F(PrepareElectResponseTest, ElectResponseIncorrectReplSetName) {
+ // Test with incorrect replset name
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "fakeset";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 1;
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(0, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "received an elect request for 'fakeset' but our "
+ "set name is 'rs0'"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ args.set = "rs0";
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseOurConfigStale) {
+ // Test with us having a stale config version
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 20;
+ args.whoid = 1;
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(0, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("not voting because our config version is stale"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ args.cfgver = 10;
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseTheirConfigStale) {
+ // Test with them having a stale config version
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 5;
+ args.whoid = 1;
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("received stale config version # during election"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ args.cfgver = 10;
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseNonExistentNode) {
+ // Test with a non-existent node
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 99;
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("couldn't find member with id 99"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ args.whoid = 1;
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseWeArePrimary) {
+ // Test when we are already primary
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 1;
+
+ getTopoCoord()._setCurrentPrimaryForTest(0);
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("I am already primary"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ getTopoCoord()._setCurrentPrimaryForTest(-1);
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseSomeoneElseIsPrimary) {
+ // Test when someone else is already primary
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 1;
+ getTopoCoord()._setCurrentPrimaryForTest(2);
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("h2:27017 is already primary"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ getTopoCoord()._setCurrentPrimaryForTest(-1);
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseNotHighestPriority) {
+ // Test trying to elect someone who isn't the highest priority node
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 1;
+
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, jsTime());
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(-10000, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("h1:27017 has lower priority than h3:27017"));
+
+ // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would)
+ args.whoid = 3;
+ BSONObjBuilder responseBuilder2;
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result);
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_EQUALS(1, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseHighestPriorityOfLiveNodes) {
+ // Test trying to elect someone who isn't the highest priority node, but all higher nodes
+ // are down
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 1;
+
+ receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime());
+ receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime());
+
+ BSONObjBuilder responseBuilder;
+ Status result = Status::OK();
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder, &result);
+ stopCapturingLogMessages();
+ BSONObj response = responseBuilder.obj();
+ ASSERT_EQUALS(1, response["vote"].Int());
+ ASSERT_EQUALS(round, response["round"].OID());
+}
+
+TEST_F(PrepareElectResponseTest, ElectResponseValidVotes) {
+ // Test a valid vote
+ ReplicationCoordinator::ReplSetElectArgs args;
+ args.set = "rs0";
+ args.round = round;
+ args.cfgver = 10;
+ args.whoid = 2;
+ now = 100;
+
+ BSONObjBuilder responseBuilder1;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(args, now += 60000, OpTime(), &responseBuilder1, &result);
+ stopCapturingLogMessages();
+ BSONObj response1 = responseBuilder1.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(1, response1["vote"].Int());
+ ASSERT_EQUALS(round, response1["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("voting yea for h2:27017 (2)"));
+
+ // Test what would be a valid vote except that we already voted too recently
+ args.whoid = 3;
+
+ BSONObjBuilder responseBuilder2;
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(args, now, OpTime(), &responseBuilder2, &result);
+ stopCapturingLogMessages();
+ BSONObj response2 = responseBuilder2.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(0, response2["vote"].Int());
+ ASSERT_EQUALS(round, response2["round"].OID());
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "voting no for h3:27017; "
+ "voted for h2:27017 0 secs ago"));
+
+ // Test that after enough time passes the same vote can proceed
+ now += 30 * 1000 + 1; // just over 30 seconds later
+
+ BSONObjBuilder responseBuilder3;
+ startCapturingLogMessages();
+ getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder3, &result);
+ stopCapturingLogMessages();
+ BSONObj response3 = responseBuilder3.obj();
+ ASSERT_OK(result);
+ ASSERT_EQUALS(1, response3["vote"].Int());
+ ASSERT_EQUALS(round, response3["round"].OID());
+ ASSERT_EQUALS(1, countLogLinesContaining("voting yea for h3:27017 (3)"));
+}
+
+TEST_F(TopoCoordTest, ElectResponseNotInConfig) {
+ ReplicationCoordinator::ReplSetElectArgs args;
+ BSONObjBuilder response;
+ Status status = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ getTopoCoord().prepareElectResponse(args, now(), OpTime(), &response, &status);
+ ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status);
+ ASSERT_EQUALS("Cannot participate in election because not initialized", status.reason());
+}
+
+class PrepareFreezeResponseTest : public TopoCoordTest {
+public:
+ virtual void setUp() {
+ TopoCoordTest::setUp();
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017"))),
0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- BSONObjBuilder response;
- getTopoCoord().prepareFreezeResponse(now()++, 20, &response);
- ASSERT(response.obj().isEmpty());
- BSONObjBuilder response2;
- getTopoCoord().prepareFreezeResponse(now()++, 0, &response2);
- ASSERT_EQUALS("unfreezing", response2.obj()["info"].String());
- ASSERT(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
}
- class ShutdownInProgressTest : public TopoCoordTest {
- public:
-
- ShutdownInProgressTest() :
- ourCbData(NULL,
- ReplicationExecutor::CallbackHandle(),
- Status(ErrorCodes::CallbackCanceled, "")) {}
-
- virtual ReplicationExecutor::CallbackData cbData() { return ourCbData; }
-
- private:
- ReplicationExecutor::CallbackData ourCbData;
- };
-
- TEST_F(ShutdownInProgressTest, ShutdownInProgressWhenCallbackCanceledSyncFrom) {
- Status result = Status::OK();
+ BSONObj prepareFreezeResponse(int duration) {
BSONObjBuilder response;
- getTopoCoord().prepareSyncFromResponse(cbData(),
- HostAndPort("host2:27017"),
- OpTime(0,0),
- &response,
- &result);
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, result);
- ASSERT_TRUE(response.obj().isEmpty());
-
- }
-
- TEST_F(ShutdownInProgressTest, ShutDownInProgressWhenCallbackCanceledStatus) {
- Status result = Status::OK();
- BSONObjBuilder response;
- getTopoCoord().prepareStatusResponse(cbData(),
- Date_t(0),
- 0,
- OpTime(0,0),
- &response,
- &result);
- ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, result);
- ASSERT_TRUE(response.obj().isEmpty());
- }
-
- class PrepareHeartbeatResponseTest : public TopoCoordTest {
- public:
-
- virtual void setUp() {
- TopoCoordTest::setUp();
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
- }
-
- void prepareHeartbeatResponse(const ReplSetHeartbeatArgs& args,
- OpTime lastOpApplied,
- ReplSetHeartbeatResponse* response,
- Status* result) {
- *result = getTopoCoord().prepareHeartbeatResponse(now()++,
- args,
- "rs0",
- lastOpApplied,
- response);
- }
-
- };
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseBadProtocolVersion) {
- // set up args with bad protocol version
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(3);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(0,0), &response, &result);
- ASSERT_EQUALS(ErrorCodes::BadValue, result);
- ASSERT_EQUALS("replset: incompatible replset protocol version: 3", result.reason());
- ASSERT_EQUALS("", response.getHbMsg());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseFromSelf) {
- // set up args with incorrect replset name
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setSetName("rs0");
- args.setSenderId(10);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
- prepareHeartbeatResponse(args, OpTime(0,0), &response, &result);
- ASSERT_EQUALS(ErrorCodes::BadValue, result);
- ASSERT(result.reason().find("from member with the same member ID as our self")) <<
- "Actual string was \"" << result.reason() << '"';
- ASSERT_EQUALS("", response.getHbMsg());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseBadSetName) {
- // set up args with incorrect replset name
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setSetName("rs1");
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
startCapturingLogMessages();
- prepareHeartbeatResponse(args, OpTime(0,0), &response, &result);
+ getTopoCoord().prepareFreezeResponse(now()++, duration, &response);
stopCapturingLogMessages();
- ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, result);
- ASSERT(result.reason().find("repl set names do not match")) << "Actual string was \"" <<
- result.reason() << '"';
- ASSERT_EQUALS(1,
- countLogLinesContaining("replSet set names do not match, ours: rs0; remote "
- "node's: rs1"));
- ASSERT_TRUE(response.isMismatched());
- ASSERT_EQUALS("", response.getHbMsg());
+ return response.obj();
}
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderIDMissing) {
- // set up args without a senderID
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setSetName("rs0");
- args.setConfigVersion(1);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(0,0), &response, &result);
- ASSERT_OK(result);
- ASSERT_FALSE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(0,0), response.getOpTime());
- ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderIDNotInConfig) {
- // set up args with a senderID which is not present in our config
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setSetName("rs0");
- args.setConfigVersion(1);
- args.setSenderId(2);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(0,0), &response, &result);
- ASSERT_OK(result);
- ASSERT_FALSE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(0,0), response.getOpTime());
- ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getVersion());
+};
+
+TEST_F(PrepareFreezeResponseTest, UnfreezeEvenWhenNotFrozen) {
+ BSONObj response = prepareFreezeResponse(0);
+ ASSERT_EQUALS("unfreezing", response["info"].String());
+ ASSERT_EQUALS(1, countLogLinesContaining("replSet info 'unfreezing'"));
+ // 1 instead of 0 because it assigns to "now" in this case
+ ASSERT_EQUALS(1LL, getTopoCoord().getStepDownTime().asInt64());
+}
+
+TEST_F(PrepareFreezeResponseTest, FreezeForOneSecond) {
+ BSONObj response = prepareFreezeResponse(1);
+ ASSERT_EQUALS("you really want to freeze for only 1 second?", response["warning"].String());
+ ASSERT_EQUALS(1, countLogLinesContaining("replSet info 'freezing' for 1 seconds"));
+ // 1001 because "now" was incremented once during initialization + 1000 ms wait
+ ASSERT_EQUALS(1001LL, getTopoCoord().getStepDownTime().asInt64());
+}
+
+TEST_F(PrepareFreezeResponseTest, FreezeForManySeconds) {
+ BSONObj response = prepareFreezeResponse(20);
+ ASSERT_TRUE(response.isEmpty());
+ ASSERT_EQUALS(1, countLogLinesContaining("replSet info 'freezing' for 20 seconds"));
+ // 20001 because "now" was incremented once during initialization + 20000 ms wait
+ ASSERT_EQUALS(20001LL, getTopoCoord().getStepDownTime().asInt64());
+}
+
+TEST_F(PrepareFreezeResponseTest, UnfreezeEvenWhenNotFrozenWhilePrimary) {
+ makeSelfPrimary();
+ BSONObj response = prepareFreezeResponse(0);
+ ASSERT_EQUALS("unfreezing", response["info"].String());
+ // doesn't mention being primary in this case for some reason
+ ASSERT_EQUALS(
+ 0, countLogLinesContaining("replSet info received freeze command but we are primary"));
+ // 1 instead of 0 because it assigns to "now" in this case
+ ASSERT_EQUALS(1LL, getTopoCoord().getStepDownTime().asInt64());
+}
+
+TEST_F(PrepareFreezeResponseTest, FreezeForOneSecondWhilePrimary) {
+ makeSelfPrimary();
+ BSONObj response = prepareFreezeResponse(1);
+ ASSERT_EQUALS("you really want to freeze for only 1 second?", response["warning"].String());
+ ASSERT_EQUALS(
+ 1, countLogLinesContaining("replSet info received freeze command but we are primary"));
+ ASSERT_EQUALS(0LL, getTopoCoord().getStepDownTime().asInt64());
+}
+
+TEST_F(PrepareFreezeResponseTest, FreezeForManySecondsWhilePrimary) {
+ makeSelfPrimary();
+ BSONObj response = prepareFreezeResponse(20);
+ ASSERT_TRUE(response.isEmpty());
+ ASSERT_EQUALS(
+ 1, countLogLinesContaining("replSet info received freeze command but we are primary"));
+ ASSERT_EQUALS(0LL, getTopoCoord().getStepDownTime().asInt64());
+}
+
+TEST_F(TopoCoordTest, UnfreezeWhileLoneNode) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 5 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ BSONObjBuilder response;
+ getTopoCoord().prepareFreezeResponse(now()++, 20, &response);
+ ASSERT(response.obj().isEmpty());
+ BSONObjBuilder response2;
+ getTopoCoord().prepareFreezeResponse(now()++, 0, &response2);
+ ASSERT_EQUALS("unfreezing", response2.obj()["info"].String());
+ ASSERT(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+}
+
+class ShutdownInProgressTest : public TopoCoordTest {
+public:
+ ShutdownInProgressTest()
+ : ourCbData(NULL,
+ ReplicationExecutor::CallbackHandle(),
+ Status(ErrorCodes::CallbackCanceled, "")) {}
+
+ virtual ReplicationExecutor::CallbackData cbData() {
+ return ourCbData;
}
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseConfigVersionLow) {
- // set up args with a config version lower than ours
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(0);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(0,0), &response, &result);
- ASSERT_OK(result);
- ASSERT_TRUE(response.hasConfig());
- ASSERT_FALSE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(0,0), response.getOpTime());
- ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseConfigVersionHigh) {
- // set up args with a config version higher than ours
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(10);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(0,0), &response, &result);
- ASSERT_OK(result);
- ASSERT_FALSE(response.hasConfig());
- ASSERT_FALSE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(0,0), response.getOpTime());
- ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderDown) {
- // set up args with sender down from our perspective
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(1);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(0,0), &response, &result);
- ASSERT_OK(result);
- ASSERT_FALSE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(0,0), response.getOpTime());
- ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getVersion());
- ASSERT_TRUE(response.isStateDisagreement());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderUp) {
- // set up args and acknowledge sender
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(0,0));
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(1);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(100,0), &response, &result);
- ASSERT_OK(result);
- // this change to true because we can now see a majority, unlike in the previous cases
- ASSERT_TRUE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(100,0), response.getOpTime());
- ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getVersion());
- }
-
- TEST_F(TopoCoordTest, PrepareHeartbeatResponseNoConfigYet) {
- // set up args and acknowledge sender
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(1);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- // prepare response and check the results
- Status result = getTopoCoord().prepareHeartbeatResponse(now()++,
- args,
- "rs0",
- OpTime(0,0),
- &response);
- ASSERT_OK(result);
- // this change to true because we can now see a majority, unlike in the previous cases
- ASSERT_FALSE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_STARTUP, response.getState().s);
- ASSERT_EQUALS(OpTime(0,0), response.getOpTime());
- ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("", response.getReplicaSetName());
- ASSERT_EQUALS(-2, response.getVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseAsPrimary) {
- makeSelfPrimary(OpTime(10,0));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(0,0));
-
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(1);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(11,0), &response, &result);
- ASSERT_OK(result);
- // electable because we are already primary
- ASSERT_TRUE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, response.getState().s);
- ASSERT_EQUALS(OpTime(11,0), response.getOpTime());
- ASSERT_EQUALS(OpTime(10,0), response.getElectionTime());
- ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
- ASSERT_EQUALS("", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getVersion());
- }
-
- TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseWithSyncSource) {
- // get a sync source
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(0,0));
- heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(0,0));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1,0));
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1,0));
- getTopoCoord().chooseNewSyncSource(now()++, OpTime(0,0));
-
- // set up args
- ReplSetHeartbeatArgs args;
- args.setProtocolVersion(1);
- args.setConfigVersion(1);
- args.setSetName("rs0");
- args.setSenderId(20);
- ReplSetHeartbeatResponse response;
- Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
-
- // prepare response and check the results
- prepareHeartbeatResponse(args, OpTime(100,0), &response, &result);
- ASSERT_OK(result);
- ASSERT_TRUE(response.isElectable());
- ASSERT_TRUE(response.isReplSet());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(100,0), response.getOpTime());
- ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
- // changed to a syncing message because our sync source changed recently
- ASSERT_EQUALS("syncing from: h2:27017", response.getHbMsg());
- ASSERT_EQUALS("rs0", response.getReplicaSetName());
- ASSERT_EQUALS(1, response.getVersion());
- ASSERT_EQUALS(HostAndPort("h2").toString(), response.getSyncingTo());
- }
-
- TEST_F(TopoCoordTest, SetFollowerSecondaryWhenLoneNode) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "hself"))),
- 0);
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
-
- // if we are the only node, we should become a candidate when we transition to SECONDARY
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, CandidateWhenLoneSecondaryNodeReconfig) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- ReplicaSetConfig cfg;
- cfg.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "hself" << "priority" << 0))));
- getTopoCoord().updateConfig(cfg, 0, now()++, OpTime());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
-
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
-
- // we should become a candidate when we reconfig to become electable
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "hself"))),
- 0);
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- }
-
- TEST_F(TopoCoordTest, SetFollowerSecondaryWhenLoneUnelectableNode) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- ReplicaSetConfig cfg;
- cfg.initialize(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "hself" << "priority" << 0))));
-
- getTopoCoord().updateConfig(cfg, 0, now()++, OpTime());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
-
- // despite being the only node, we are unelectable, so we should not become a candidate
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, ReconfigToBeAddedToTheSet) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- // config to be absent from the set
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- -1);
- // should become removed since we are not in the set
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_REMOVED, getTopoCoord().getMemberState().s);
-
- // reconfig to add to set
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
- // having been added to the config, we should no longer be REMOVED and should enter STARTUP2
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, ReconfigToBeRemovedFromTheSet) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
-
- // reconfig to remove self
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- -1);
- // should become removed since we are no longer in the set
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_REMOVED, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, ReconfigToBeRemovedFromTheSetAsPrimary) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017"))),
+private:
+ ReplicationExecutor::CallbackData ourCbData;
+};
+
+TEST_F(ShutdownInProgressTest, ShutdownInProgressWhenCallbackCanceledSyncFrom) {
+ Status result = Status::OK();
+ BSONObjBuilder response;
+ getTopoCoord().prepareSyncFromResponse(
+ cbData(), HostAndPort("host2:27017"), OpTime(0, 0), &response, &result);
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, result);
+ ASSERT_TRUE(response.obj().isEmpty());
+}
+
+TEST_F(ShutdownInProgressTest, ShutDownInProgressWhenCallbackCanceledStatus) {
+ Status result = Status::OK();
+ BSONObjBuilder response;
+ getTopoCoord().prepareStatusResponse(cbData(), Date_t(0), 0, OpTime(0, 0), &response, &result);
+ ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, result);
+ ASSERT_TRUE(response.obj().isEmpty());
+}
+
+class PrepareHeartbeatResponseTest : public TopoCoordTest {
+public:
+ virtual void setUp() {
+ TopoCoordTest::setUp();
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
0);
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // win election and primary
- getTopoCoord().processWinElection(OID::gen(), OpTime(0,0));
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
-
- // reconfig to remove self
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- -1);
- // should become removed since we are no longer in the set even though we were primary
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_REMOVED, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, ReconfigCanNoLongerBePrimary) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017"))),
- 0);
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // win election and primary
- getTopoCoord().processWinElection(OID::gen(), OpTime(0,0));
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
-
- // now lose primary due to loss of electability
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017" << "priority" << 0) <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, ReconfigContinueToBePrimary) {
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017"))),
- 0);
-
- ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
- getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
- ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
-
- // win election and primary
- getTopoCoord().processWinElection(OID::gen(), OpTime(0,0));
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
-
- // Now reconfig in ways that leave us electable and ensure we are still the primary.
- // Add hosts
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0,
- Date_t(-1),
- OpTime(10,0));
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
-
- // Change priorities and tags
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017" << "priority" << 10) <<
- BSON("_id" << 1 <<
- "host" << "host2:27017" <<
- "priority" << 5 <<
- "tags" << BSON("dc" << "NA" << "rack" << "rack1")))),
- 0,
- Date_t(-1),
- OpTime(10,0));
- ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, ReconfigKeepSecondary) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 1 << "host" << "host1:27017") <<
- BSON("_id" << 2 << "host" << "host2:27017"))),
- 0);
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
setSelfMemberState(MemberState::RS_SECONDARY);
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
-
- // reconfig and stay secondary
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
- ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
- ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
}
- TEST_F(HeartbeatResponseTest, ReconfigBetweenHeartbeatRequestAndRepsonse) {
- OpTime election = OpTime(14,0);
- OpTime lastOpTimeApplied = OpTime(13,0);
-
- // all three members up and secondaries
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // now request from host3 and receive after host2 has been removed via reconfig
- getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host3"));
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 2 << "host" << "host3:27017"))),
- 0);
-
- ReplSetHeartbeatResponse hb;
- hb.initialize(BSON("ok" << 1 <<
- "v" << 1 <<
- "state" << MemberState::RS_PRIMARY));
- hb.setOpTime(lastOpTimeApplied);
- hb.setElectionTime(election);
- StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
- HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(now()++,
- Milliseconds(0),
- HostAndPort("host3"),
- hbResponse,
- lastOpTimeApplied);
-
- // now primary should be host3, index 1, and we should perform NoAction in response
- ASSERT_EQUALS(1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(action.getAction());
- }
-
- TEST_F(HeartbeatResponseTest, ReconfigNodeRemovedBetweenHeartbeatRequestAndRepsonse) {
- OpTime election = OpTime(14,0);
- OpTime lastOpTimeApplied = OpTime(13,0);
-
- // all three members up and secondaries
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_PRIMARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // now request from host3 and receive after host2 has been removed via reconfig
- getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host3"));
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 2 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "host1:27017") <<
- BSON("_id" << 1 << "host" << "host2:27017"))),
- 0);
-
- ReplSetHeartbeatResponse hb;
- hb.initialize(BSON("ok" << 1 <<
- "v" << 1 <<
- "state" << MemberState::RS_PRIMARY));
- hb.setOpTime(lastOpTimeApplied);
- hb.setElectionTime(election);
- StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
- HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(now()++,
- Milliseconds(0),
- HostAndPort("host3"),
- hbResponse,
- lastOpTimeApplied);
-
- // primary should not be set and we should perform NoAction in response
- ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
- ASSERT_NO_ACTION(action.getAction());
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceMemberNotInConfig) {
- // In this test, the TopologyCoordinator should tell us to change sync sources away from
- // "host4" since "host4" is absent from the config
- ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host4"), now()));
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceMemberHasYetToHeartbeat) {
- // In this test, the TopologyCoordinator should not tell us to change sync sources away from
- // "host2" since we do not yet have a heartbeat (and as a result do not yet have an optime)
- // for "host2"
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherHappierMemberExists) {
- // In this test, the TopologyCoordinator should tell us to change sync sources away from
- // "host2" and to "host3" since "host2" is more than maxSyncSourceLagSecs(30) behind "host3"
- OpTime election = OpTime(0,0);
- OpTime lastOpTimeApplied = OpTime(4,0);
- // ahead by more than maxSyncSourceLagSecs (30)
- OpTime fresherLastOpTimeApplied = OpTime(3005,0);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- fresherLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // set up complete, time for actual check
- startCapturingLogMessages();
- ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("changing sync target"));
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberIsBlackListed) {
- // In this test, the TopologyCoordinator should not tell us to change sync sources away from
- // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
- // "host3", since "host3" is blacklisted
- // Then, confirm that unblacklisting only works if time has passed the blacklist time.
- OpTime election = OpTime(0,0);
- OpTime lastOpTimeApplied = OpTime(400,0);
- // ahead by more than maxSyncSourceLagSecs (30)
- OpTime fresherLastOpTimeApplied = OpTime(3005,0);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- fresherLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- getTopoCoord().blacklistSyncSource(HostAndPort("host3"), now() + 100);
-
- // set up complete, time for actual check
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
-
- // unblacklist with too early a time (node should remained blacklisted)
- getTopoCoord().unblacklistSyncSource(HostAndPort("host3"), now() + 90);
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
-
- // unblacklist and it should succeed
- getTopoCoord().unblacklistSyncSource(HostAndPort("host3"), now() + 100);
- startCapturingLogMessages();
- ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("changing sync target"));
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberIsDown) {
- // In this test, the TopologyCoordinator should not tell us to change sync sources away from
- // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
- // "host3", since "host3" is down
- OpTime election = OpTime(0,0);
- OpTime lastOpTimeApplied = OpTime(400,0);
- // ahead by more than maxSyncSourceLagSecs (30)
- OpTime fresherLastOpTimeApplied = OpTime(3005,0);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- fresherLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // set up complete, time for actual check
- nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberIsNotReadable) {
- // In this test, the TopologyCoordinator should not tell us to change sync sources away from
- // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
- // "host3", since "host3" is in a non-readable mode (RS_ROLLBACK)
- OpTime election = OpTime(0,0);
- OpTime lastOpTimeApplied = OpTime(4,0);
- // ahead by more than maxSyncSourceLagSecs (30)
- OpTime fresherLastOpTimeApplied = OpTime(3005,0);
-
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_ROLLBACK,
- election,
- fresherLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // set up complete, time for actual check
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberDoesNotBuildIndexes) {
- // In this test, the TopologyCoordinator should not tell us to change sync sources away from
- // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
- // "host3", since "host3" does not build indexes
- OpTime election = OpTime(0,0);
- OpTime lastOpTimeApplied = OpTime(4,0);
- // ahead by more than maxSyncSourceLagSecs (30)
- OpTime fresherLastOpTimeApplied = OpTime(3005,0);
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 6 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "hself") <<
- BSON("_id" << 1 << "host" << "host2") <<
- BSON("_id" << 2 << "host" << "host3" <<
- "buildIndexes" << false << "priority" << 0))),
- 0);
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- fresherLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // set up complete, time for actual check
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
- }
-
- TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberDoesNotBuildIndexesNorDoWe) {
- // In this test, the TopologyCoordinator should tell us to change sync sources away from
- // "host2" and to "host3" despite "host3" not building indexes because we do not build
- // indexes either and "host2" is more than maxSyncSourceLagSecs(30) behind "host3"
- OpTime election = OpTime(0,0);
- OpTime lastOpTimeApplied = OpTime(4,0);
- // ahead by more than maxSyncSourceLagSecs (30)
- OpTime fresherLastOpTimeApplied = OpTime(3005,0);
-
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 7 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 0 << "host" << "hself" <<
- "buildIndexes" << false << "priority" << 0) <<
- BSON("_id" << 1 << "host" << "host2") <<
- BSON("_id" << 2 << "host" << "host3" <<
- "buildIndexes" << false << "priority" << 0))),
- 0);
- HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- lastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
- nextAction = receiveUpHeartbeat(HostAndPort("host3"),
- "rs0",
- MemberState::RS_SECONDARY,
- election,
- fresherLastOpTimeApplied,
- lastOpTimeApplied);
- ASSERT_NO_ACTION(nextAction.getAction());
-
- // set up complete, time for actual check
- startCapturingLogMessages();
- ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("changing sync target"));
- }
-
- TEST_F(TopoCoordTest, CheckShouldStandForElectionWithPrimary) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_PRIMARY, OpTime(1,0));
- ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++, OpTime(0,0)));
- }
-
- TEST_F(TopoCoordTest, CheckShouldStandForElectionNotCloseEnoughToLastOptime) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
-
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(10000,0));
- ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++, OpTime(100,0)));
- }
-
- TEST_F(TopoCoordTest, VoteForMyselfFailsWhileNotCandidate) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
- ASSERT_FALSE(getTopoCoord().voteForMyself(now()++));
- }
-
- TEST_F(TopoCoordTest, GetMemberStateArbiter) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself" << "arbiterOnly" << true) <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- ASSERT_EQUALS(MemberState::RS_ARBITER, getTopoCoord().getMemberState().s);
- }
-
- TEST_F(TopoCoordTest, UnelectableIfAbsentFromConfig) {
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- startCapturingLogMessages();
- ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++, OpTime(10,0)));
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("not a member of a valid replica set config"));
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
- }
-
- TEST_F(TopoCoordTest, UnelectableIfVotedRecently) {
- updateConfig(BSON("_id" << "rs0" <<
- "version" << 1 <<
- "members" << BSON_ARRAY(
- BSON("_id" << 10 << "host" << "hself") <<
- BSON("_id" << 20 << "host" << "h2") <<
- BSON("_id" << 30 << "host" << "h3"))),
- 0);
- setSelfMemberState(MemberState::RS_SECONDARY);
- heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(100,0));
-
- // vote for another node
- OID remoteRound = OID::gen();
- ReplicationCoordinator::ReplSetElectArgs electArgs;
- electArgs.set = "rs0";
- electArgs.round = remoteRound;
- electArgs.cfgver = 1;
- electArgs.whoid = 20;
-
- // need to be 30 secs beyond the start of time to pass last vote lease
- now() += 30*1000;
- BSONObjBuilder electResponseBuilder;
- Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
- getTopoCoord().prepareElectResponse(
- electArgs, now()++, OpTime(100,0), &electResponseBuilder, &result);
- BSONObj response = electResponseBuilder.obj();
- ASSERT_OK(result);
- std::cout << response;
- ASSERT_EQUALS(1, response["vote"].Int());
- ASSERT_EQUALS(remoteRound, response["round"].OID());
-
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
- startCapturingLogMessages();
- ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++, OpTime(10,0)));
- stopCapturingLogMessages();
- ASSERT_EQUALS(1, countLogLinesContaining("I recently voted for "));
- logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
+ void prepareHeartbeatResponse(const ReplSetHeartbeatArgs& args,
+ OpTime lastOpApplied,
+ ReplSetHeartbeatResponse* response,
+ Status* result) {
+ *result =
+ getTopoCoord().prepareHeartbeatResponse(now()++, args, "rs0", lastOpApplied, response);
}
+};
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseBadProtocolVersion) {
+ // set up args with bad protocol version
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(3);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(0, 0), &response, &result);
+ ASSERT_EQUALS(ErrorCodes::BadValue, result);
+ ASSERT_EQUALS("replset: incompatible replset protocol version: 3", result.reason());
+ ASSERT_EQUALS("", response.getHbMsg());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseFromSelf) {
+ // set up args with incorrect replset name
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(10);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+ prepareHeartbeatResponse(args, OpTime(0, 0), &response, &result);
+ ASSERT_EQUALS(ErrorCodes::BadValue, result);
+ ASSERT(result.reason().find("from member with the same member ID as our self"))
+ << "Actual string was \"" << result.reason() << '"';
+ ASSERT_EQUALS("", response.getHbMsg());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseBadSetName) {
+ // set up args with incorrect replset name
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setSetName("rs1");
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ startCapturingLogMessages();
+ prepareHeartbeatResponse(args, OpTime(0, 0), &response, &result);
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, result);
+ ASSERT(result.reason().find("repl set names do not match")) << "Actual string was \""
+ << result.reason() << '"';
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ "replSet set names do not match, ours: rs0; remote "
+ "node's: rs1"));
+ ASSERT_TRUE(response.isMismatched());
+ ASSERT_EQUALS("", response.getHbMsg());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderIDMissing) {
+ // set up args without a senderID
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setSetName("rs0");
+ args.setConfigVersion(1);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(0, 0), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_FALSE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(0, 0), response.getOpTime());
+ ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderIDNotInConfig) {
+ // set up args with a senderID which is not present in our config
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setSetName("rs0");
+ args.setConfigVersion(1);
+ args.setSenderId(2);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(0, 0), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_FALSE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(0, 0), response.getOpTime());
+ ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseConfigVersionLow) {
+ // set up args with a config version lower than ours
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(0);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(0, 0), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_TRUE(response.hasConfig());
+ ASSERT_FALSE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(0, 0), response.getOpTime());
+ ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseConfigVersionHigh) {
+ // set up args with a config version higher than ours
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(10);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(0, 0), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_FALSE(response.hasConfig());
+ ASSERT_FALSE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(0, 0), response.getOpTime());
+ ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderDown) {
+ // set up args with sender down from our perspective
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(0, 0), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_FALSE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(0, 0), response.getOpTime());
+ ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getVersion());
+ ASSERT_TRUE(response.isStateDisagreement());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseSenderUp) {
+ // set up args and acknowledge sender
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(0, 0));
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(100, 0), &response, &result);
+ ASSERT_OK(result);
+ // this change to true because we can now see a majority, unlike in the previous cases
+ ASSERT_TRUE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(100, 0), response.getOpTime());
+ ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getVersion());
+}
+
+TEST_F(TopoCoordTest, PrepareHeartbeatResponseNoConfigYet) {
+ // set up args and acknowledge sender
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ // prepare response and check the results
+ Status result =
+ getTopoCoord().prepareHeartbeatResponse(now()++, args, "rs0", OpTime(0, 0), &response);
+ ASSERT_OK(result);
+ // this change to true because we can now see a majority, unlike in the previous cases
+ ASSERT_FALSE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, response.getState().s);
+ ASSERT_EQUALS(OpTime(0, 0), response.getOpTime());
+ ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("", response.getReplicaSetName());
+ ASSERT_EQUALS(-2, response.getVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseAsPrimary) {
+ makeSelfPrimary(OpTime(10, 0));
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(0, 0));
+
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(11, 0), &response, &result);
+ ASSERT_OK(result);
+ // electable because we are already primary
+ ASSERT_TRUE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(11, 0), response.getOpTime());
+ ASSERT_EQUALS(OpTime(10, 0), response.getElectionTime());
+ ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
+ ASSERT_EQUALS("", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getVersion());
+}
+
+TEST_F(PrepareHeartbeatResponseTest, PrepareHeartbeatResponseWithSyncSource) {
+ // get a sync source
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(0, 0));
+ heartbeatFromMember(HostAndPort("h3"), "rs0", MemberState::RS_SECONDARY, OpTime(0, 0));
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0));
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(1, 0));
+ getTopoCoord().chooseNewSyncSource(now()++, OpTime(0, 0));
+
+ // set up args
+ ReplSetHeartbeatArgs args;
+ args.setProtocolVersion(1);
+ args.setConfigVersion(1);
+ args.setSetName("rs0");
+ args.setSenderId(20);
+ ReplSetHeartbeatResponse response;
+ Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result");
+
+ // prepare response and check the results
+ prepareHeartbeatResponse(args, OpTime(100, 0), &response, &result);
+ ASSERT_OK(result);
+ ASSERT_TRUE(response.isElectable());
+ ASSERT_TRUE(response.isReplSet());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
+ ASSERT_EQUALS(OpTime(100, 0), response.getOpTime());
+ ASSERT_EQUALS(Seconds(0).total_milliseconds(), response.getTime().total_milliseconds());
+ // changed to a syncing message because our sync source changed recently
+ ASSERT_EQUALS("syncing from: h2:27017", response.getHbMsg());
+ ASSERT_EQUALS("rs0", response.getReplicaSetName());
+ ASSERT_EQUALS(1, response.getVersion());
+ ASSERT_EQUALS(HostAndPort("h2").toString(), response.getSyncingTo());
+}
+
+TEST_F(TopoCoordTest, SetFollowerSecondaryWhenLoneNode) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "hself"))),
+ 0);
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+
+ // if we are the only node, we should become a candidate when we transition to SECONDARY
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, CandidateWhenLoneSecondaryNodeReconfig) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ ReplicaSetConfig cfg;
+ cfg.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "hself"
+ << "priority" << 0))));
+ getTopoCoord().updateConfig(cfg, 0, now()++, OpTime());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+
+ // we should become a candidate when we reconfig to become electable
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "hself"))),
+ 0);
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+}
+
+TEST_F(TopoCoordTest, SetFollowerSecondaryWhenLoneUnelectableNode) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ ReplicaSetConfig cfg;
+ cfg.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "hself"
+ << "priority" << 0))));
+
+ getTopoCoord().updateConfig(cfg, 0, now()++, OpTime());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+
+ // despite being the only node, we are unelectable, so we should not become a candidate
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, ReconfigToBeAddedToTheSet) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ // config to be absent from the set
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "host2:27017")
+ << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ -1);
+ // should become removed since we are not in the set
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_REMOVED, getTopoCoord().getMemberState().s);
+
+ // reconfig to add to set
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+ // having been added to the config, we should no longer be REMOVED and should enter STARTUP2
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, ReconfigToBeRemovedFromTheSet) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+
+ // reconfig to remove self
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "host2:27017")
+ << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ -1);
+ // should become removed since we are no longer in the set
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_REMOVED, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, ReconfigToBeRemovedFromTheSetAsPrimary) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"))),
+ 0);
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // win election and primary
+ getTopoCoord().processWinElection(OID::gen(), OpTime(0, 0));
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
+
+ // reconfig to remove self
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "host2:27017")
+ << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ -1);
+ // should become removed since we are no longer in the set even though we were primary
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_REMOVED, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, ReconfigCanNoLongerBePrimary) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"))),
+ 0);
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // win election and primary
+ getTopoCoord().processWinElection(OID::gen(), OpTime(0, 0));
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
+
+ // now lose primary due to loss of electability
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"
+ << "priority" << 0)
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, ReconfigContinueToBePrimary) {
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s);
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"))),
+ 0);
+
+ ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+ getTopoCoord().setFollowerMode(MemberState::RS_SECONDARY);
+ ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole());
+
+ // win election and primary
+ getTopoCoord().processWinElection(OID::gen(), OpTime(0, 0));
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
+
+ // Now reconfig in ways that leave us electable and ensure we are still the primary.
+ // Add hosts
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0,
+ Date_t(-1),
+ OpTime(10, 0));
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
+
+ // Change priorities and tags
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017"
+ << "priority" << 10)
+ << BSON("_id" << 1 << "host"
+ << "host2:27017"
+ << "priority" << 5 << "tags" << BSON("dc"
+ << "NA"
+ << "rack"
+ << "rack1")))),
+ 0,
+ Date_t(-1),
+ OpTime(10, 0));
+ ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, ReconfigKeepSecondary) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host2:27017"))),
+ 0);
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+
+ // reconfig and stay secondary
+ updateConfig(
+ BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017") << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+ ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole());
+ ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(HeartbeatResponseTest, ReconfigBetweenHeartbeatRequestAndRepsonse) {
+ OpTime election = OpTime(14, 0);
+ OpTime lastOpTimeApplied = OpTime(13, 0);
+
+ // all three members up and secondaries
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // now request from host3 and receive after host2 has been removed via reconfig
+ getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host3"));
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host3:27017"))),
+ 0);
+
+ ReplSetHeartbeatResponse hb;
+ hb.initialize(BSON("ok" << 1 << "v" << 1 << "state" << MemberState::RS_PRIMARY));
+ hb.setOpTime(lastOpTimeApplied);
+ hb.setElectionTime(election);
+ StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ now()++, Milliseconds(0), HostAndPort("host3"), hbResponse, lastOpTimeApplied);
+
+ // now primary should be host3, index 1, and we should perform NoAction in response
+ ASSERT_EQUALS(1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(action.getAction());
+}
+
+TEST_F(HeartbeatResponseTest, ReconfigNodeRemovedBetweenHeartbeatRequestAndRepsonse) {
+ OpTime election = OpTime(14, 0);
+ OpTime lastOpTimeApplied = OpTime(13, 0);
+
+ // all three members up and secondaries
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_PRIMARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // now request from host3 and receive after host2 has been removed via reconfig
+ getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host3"));
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host1:27017")
+ << BSON("_id" << 1 << "host"
+ << "host2:27017"))),
+ 0);
+
+ ReplSetHeartbeatResponse hb;
+ hb.initialize(BSON("ok" << 1 << "v" << 1 << "state" << MemberState::RS_PRIMARY));
+ hb.setOpTime(lastOpTimeApplied);
+ hb.setElectionTime(election);
+ StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
+ HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
+ now()++, Milliseconds(0), HostAndPort("host3"), hbResponse, lastOpTimeApplied);
+
+ // primary should not be set and we should perform NoAction in response
+ ASSERT_EQUALS(-1, getCurrentPrimaryIndex());
+ ASSERT_NO_ACTION(action.getAction());
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceMemberNotInConfig) {
+ // In this test, the TopologyCoordinator should tell us to change sync sources away from
+ // "host4" since "host4" is absent from the config
+ ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host4"), now()));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceMemberHasYetToHeartbeat) {
+ // In this test, the TopologyCoordinator should not tell us to change sync sources away from
+ // "host2" since we do not yet have a heartbeat (and as a result do not yet have an optime)
+ // for "host2"
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherHappierMemberExists) {
+ // In this test, the TopologyCoordinator should tell us to change sync sources away from
+ // "host2" and to "host3" since "host2" is more than maxSyncSourceLagSecs(30) behind "host3"
+ OpTime election = OpTime(0, 0);
+ OpTime lastOpTimeApplied = OpTime(4, 0);
+ // ahead by more than maxSyncSourceLagSecs (30)
+ OpTime fresherLastOpTimeApplied = OpTime(3005, 0);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ fresherLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // set up complete, time for actual check
+ startCapturingLogMessages();
+ ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("changing sync target"));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberIsBlackListed) {
+ // In this test, the TopologyCoordinator should not tell us to change sync sources away from
+ // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
+ // "host3", since "host3" is blacklisted
+ // Then, confirm that unblacklisting only works if time has passed the blacklist time.
+ OpTime election = OpTime(0, 0);
+ OpTime lastOpTimeApplied = OpTime(400, 0);
+ // ahead by more than maxSyncSourceLagSecs (30)
+ OpTime fresherLastOpTimeApplied = OpTime(3005, 0);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ fresherLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ getTopoCoord().blacklistSyncSource(HostAndPort("host3"), now() + 100);
+
+ // set up complete, time for actual check
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+
+ // unblacklist with too early a time (node should remained blacklisted)
+ getTopoCoord().unblacklistSyncSource(HostAndPort("host3"), now() + 90);
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+
+ // unblacklist and it should succeed
+ getTopoCoord().unblacklistSyncSource(HostAndPort("host3"), now() + 100);
+ startCapturingLogMessages();
+ ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("changing sync target"));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberIsDown) {
+ // In this test, the TopologyCoordinator should not tell us to change sync sources away from
+ // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
+ // "host3", since "host3" is down
+ OpTime election = OpTime(0, 0);
+ OpTime lastOpTimeApplied = OpTime(400, 0);
+ // ahead by more than maxSyncSourceLagSecs (30)
+ OpTime fresherLastOpTimeApplied = OpTime(3005, 0);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ fresherLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // set up complete, time for actual check
+ nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberIsNotReadable) {
+ // In this test, the TopologyCoordinator should not tell us to change sync sources away from
+ // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
+ // "host3", since "host3" is in a non-readable mode (RS_ROLLBACK)
+ OpTime election = OpTime(0, 0);
+ OpTime lastOpTimeApplied = OpTime(4, 0);
+ // ahead by more than maxSyncSourceLagSecs (30)
+ OpTime fresherLastOpTimeApplied = OpTime(3005, 0);
+
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_ROLLBACK,
+ election,
+ fresherLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // set up complete, time for actual check
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberDoesNotBuildIndexes) {
+ // In this test, the TopologyCoordinator should not tell us to change sync sources away from
+ // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind
+ // "host3", since "host3" does not build indexes
+ OpTime election = OpTime(0, 0);
+ OpTime lastOpTimeApplied = OpTime(4, 0);
+ // ahead by more than maxSyncSourceLagSecs (30)
+ OpTime fresherLastOpTimeApplied = OpTime(3005, 0);
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 6 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "hself")
+ << BSON("_id" << 1 << "host"
+ << "host2")
+ << BSON("_id" << 2 << "host"
+ << "host3"
+ << "buildIndexes" << false << "priority" << 0))),
+ 0);
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ fresherLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // set up complete, time for actual check
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+}
+
+TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceFresherMemberDoesNotBuildIndexesNorDoWe) {
+ // In this test, the TopologyCoordinator should tell us to change sync sources away from
+ // "host2" and to "host3" despite "host3" not building indexes because we do not build
+ // indexes either and "host2" is more than maxSyncSourceLagSecs(30) behind "host3"
+ OpTime election = OpTime(0, 0);
+ OpTime lastOpTimeApplied = OpTime(4, 0);
+ // ahead by more than maxSyncSourceLagSecs (30)
+ OpTime fresherLastOpTimeApplied = OpTime(3005, 0);
+
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 7 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "hself"
+ << "buildIndexes" << false << "priority" << 0)
+ << BSON("_id" << 1 << "host"
+ << "host2")
+ << BSON("_id" << 2 << "host"
+ << "host3"
+ << "buildIndexes" << false << "priority" << 0))),
+ 0);
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ lastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+ nextAction = receiveUpHeartbeat(HostAndPort("host3"),
+ "rs0",
+ MemberState::RS_SECONDARY,
+ election,
+ fresherLastOpTimeApplied,
+ lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // set up complete, time for actual check
+ startCapturingLogMessages();
+ ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), now()));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("changing sync target"));
+}
+
+TEST_F(TopoCoordTest, CheckShouldStandForElectionWithPrimary) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_PRIMARY, OpTime(1, 0));
+ ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++, OpTime(0, 0)));
+}
+
+TEST_F(TopoCoordTest, CheckShouldStandForElectionNotCloseEnoughToLastOptime) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(10000, 0));
+ ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++, OpTime(100, 0)));
+}
+
+TEST_F(TopoCoordTest, VoteForMyselfFailsWhileNotCandidate) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ ASSERT_FALSE(getTopoCoord().voteForMyself(now()++));
+}
+
+TEST_F(TopoCoordTest, GetMemberStateArbiter) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself"
+ << "arbiterOnly" << true)
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ ASSERT_EQUALS(MemberState::RS_ARBITER, getTopoCoord().getMemberState().s);
+}
+
+TEST_F(TopoCoordTest, UnelectableIfAbsentFromConfig) {
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ startCapturingLogMessages();
+ ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++, OpTime(10, 0)));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("not a member of a valid replica set config"));
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
+}
+
+TEST_F(TopoCoordTest, UnelectableIfVotedRecently) {
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 10 << "host"
+ << "hself")
+ << BSON("_id" << 20 << "host"
+ << "h2") << BSON("_id" << 30 << "host"
+ << "h3"))),
+ 0);
+ setSelfMemberState(MemberState::RS_SECONDARY);
+ heartbeatFromMember(HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(100, 0));
+
+ // vote for another node
+ OID remoteRound = OID::gen();
+ ReplicationCoordinator::ReplSetElectArgs electArgs;
+ electArgs.set = "rs0";
+ electArgs.round = remoteRound;
+ electArgs.cfgver = 1;
+ electArgs.whoid = 20;
+
+ // need to be 30 secs beyond the start of time to pass last vote lease
+ now() += 30 * 1000;
+ BSONObjBuilder electResponseBuilder;
+ Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse");
+ getTopoCoord().prepareElectResponse(
+ electArgs, now()++, OpTime(100, 0), &electResponseBuilder, &result);
+ BSONObj response = electResponseBuilder.obj();
+ ASSERT_OK(result);
+ std::cout << response;
+ ASSERT_EQUALS(1, response["vote"].Int());
+ ASSERT_EQUALS(remoteRound, response["round"].OID());
+
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Debug(3));
+ startCapturingLogMessages();
+ ASSERT_FALSE(getTopoCoord().checkShouldStandForElection(now()++, OpTime(10, 0)));
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1, countLogLinesContaining("I recently voted for "));
+ logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogSeverity::Log());
+}
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/update_position_args.cpp b/src/mongo/db/repl/update_position_args.cpp
index 78b08bfc483..3cf98b33173 100644
--- a/src/mongo/db/repl/update_position_args.cpp
+++ b/src/mongo/db/repl/update_position_args.cpp
@@ -39,108 +39,105 @@ namespace mongo {
namespace repl {
- UpdatePositionArgs::UpdateInfo::UpdateInfo(
- const OID& anRid, const OpTime& aTs, long long aCfgver, long long aMemberId)
- : rid(anRid), ts(aTs), cfgver(aCfgver), memberId(aMemberId) {}
+UpdatePositionArgs::UpdateInfo::UpdateInfo(const OID& anRid,
+ const OpTime& aTs,
+ long long aCfgver,
+ long long aMemberId)
+ : rid(anRid), ts(aTs), cfgver(aCfgver), memberId(aMemberId) {}
namespace {
- const std::string kCommandFieldName = "replSetUpdatePosition";
- const std::string kUpdateArrayFieldName = "optimes";
-
- const std::string kLegalUpdatePositionFieldNames[] = {
- kCommandFieldName,
- kUpdateArrayFieldName,
- };
-
- const std::string kMemberRIDFieldName = "_id";
- const std::string kMemberConfigFieldName = "config";
- const std::string kOpTimeFieldName = "optime";
- const std::string kMemberIdFieldName = "memberId";
- const std::string kConfigVersionFieldName = "cfgver";
-
- const std::string kLegalUpdateInfoFieldNames[] = {
- kMemberConfigFieldName,
- kMemberRIDFieldName,
- kOpTimeFieldName,
- kMemberIdFieldName,
- kConfigVersionFieldName,
- };
-
-} // namespace
+const std::string kCommandFieldName = "replSetUpdatePosition";
+const std::string kUpdateArrayFieldName = "optimes";
+
+const std::string kLegalUpdatePositionFieldNames[] = {
+ kCommandFieldName, kUpdateArrayFieldName,
+};
+
+const std::string kMemberRIDFieldName = "_id";
+const std::string kMemberConfigFieldName = "config";
+const std::string kOpTimeFieldName = "optime";
+const std::string kMemberIdFieldName = "memberId";
+const std::string kConfigVersionFieldName = "cfgver";
+
+const std::string kLegalUpdateInfoFieldNames[] = {
+ kMemberConfigFieldName,
+ kMemberRIDFieldName,
+ kOpTimeFieldName,
+ kMemberIdFieldName,
+ kConfigVersionFieldName,
+};
+
+} // namespace
+
+Status UpdatePositionArgs::initialize(const BSONObj& argsObj) {
+ Status status =
+ bsonCheckOnlyHasFields("UpdatePositionArgs", argsObj, kLegalUpdatePositionFieldNames);
+
+ if (!status.isOK())
+ return status;
+
+ // grab the array of changes
+ BSONElement updateArray;
+ status = bsonExtractTypedField(argsObj, kUpdateArrayFieldName, Array, &updateArray);
+ if (!status.isOK())
+ return status;
+
+ // now parse each array entry into an update
+ BSONObjIterator i(updateArray.Obj());
+ while (i.more()) {
+ BSONObj entry = i.next().Obj();
+ status = bsonCheckOnlyHasFields("UpdateInfoArgs", entry, kLegalUpdateInfoFieldNames);
+ if (!status.isOK())
+ return status;
- Status UpdatePositionArgs::initialize(const BSONObj& argsObj) {
- Status status = bsonCheckOnlyHasFields("UpdatePositionArgs",
- argsObj,
- kLegalUpdatePositionFieldNames);
+ OpTime ts;
+ status = bsonExtractOpTimeField(entry, kOpTimeFieldName, &ts);
+ if (!status.isOK())
+ return status;
+ // TODO(spencer): The following three fields are optional in 3.0, but should be made
+ // required or ignored in 3.0
+ long long cfgver;
+ status = bsonExtractIntegerFieldWithDefault(entry, kConfigVersionFieldName, -1, &cfgver);
if (!status.isOK())
return status;
- // grab the array of changes
- BSONElement updateArray;
- status = bsonExtractTypedField(argsObj, kUpdateArrayFieldName, Array, &updateArray);
+ OID rid;
+ status = bsonExtractOIDFieldWithDefault(entry, kMemberRIDFieldName, OID(), &rid);
if (!status.isOK())
return status;
- // now parse each array entry into an update
- BSONObjIterator i(updateArray.Obj());
- while(i.more()) {
- BSONObj entry = i.next().Obj();
- status = bsonCheckOnlyHasFields("UpdateInfoArgs",
- entry,
- kLegalUpdateInfoFieldNames);
- if (!status.isOK())
- return status;
-
- OpTime ts;
- status = bsonExtractOpTimeField(entry, kOpTimeFieldName, &ts);
- if (!status.isOK())
- return status;
-
- // TODO(spencer): The following three fields are optional in 3.0, but should be made
- // required or ignored in 3.0
- long long cfgver;
- status = bsonExtractIntegerFieldWithDefault(entry, kConfigVersionFieldName, -1, &cfgver);
- if (!status.isOK())
- return status;
-
- OID rid;
- status = bsonExtractOIDFieldWithDefault(entry, kMemberRIDFieldName, OID(), &rid);
- if (!status.isOK())
- return status;
-
- long long memberID;
- status = bsonExtractIntegerFieldWithDefault(entry, kMemberIdFieldName, -1, &memberID);
- if (!status.isOK())
- return status;
-
- _updates.push_back(UpdateInfo(rid, ts, cfgver, memberID));
- }
+ long long memberID;
+ status = bsonExtractIntegerFieldWithDefault(entry, kMemberIdFieldName, -1, &memberID);
+ if (!status.isOK())
+ return status;
- return Status::OK();
+ _updates.push_back(UpdateInfo(rid, ts, cfgver, memberID));
}
- BSONObj UpdatePositionArgs::toBSON() const {
- BSONObjBuilder builder;
- // add command name
- builder.append(kCommandFieldName, 1);
-
- // build array of updates
- if (!_updates.empty()) {
- BSONArrayBuilder updateArray(builder.subarrayStart(kUpdateArrayFieldName));
- for (UpdatePositionArgs::UpdateIterator update = updatesBegin();
- update != updatesEnd();
- ++update) {
- updateArray.append(BSON(kMemberRIDFieldName << update->rid <<
- kOpTimeFieldName << update->ts <<
- kConfigVersionFieldName << update->cfgver <<
- kMemberIdFieldName << update->memberId));
- }
- updateArray.doneFast();
+ return Status::OK();
+}
+
+BSONObj UpdatePositionArgs::toBSON() const {
+ BSONObjBuilder builder;
+ // add command name
+ builder.append(kCommandFieldName, 1);
+
+ // build array of updates
+ if (!_updates.empty()) {
+ BSONArrayBuilder updateArray(builder.subarrayStart(kUpdateArrayFieldName));
+ for (UpdatePositionArgs::UpdateIterator update = updatesBegin(); update != updatesEnd();
+ ++update) {
+ updateArray.append(BSON(kMemberRIDFieldName << update->rid << kOpTimeFieldName
+ << update->ts << kConfigVersionFieldName
+ << update->cfgver << kMemberIdFieldName
+ << update->memberId));
}
- return builder.obj();
+ updateArray.doneFast();
}
+ return builder.obj();
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/update_position_args.h b/src/mongo/db/repl/update_position_args.h
index 9d9afebced2..a30b5b8029f 100644
--- a/src/mongo/db/repl/update_position_args.h
+++ b/src/mongo/db/repl/update_position_args.h
@@ -34,49 +34,54 @@
namespace mongo {
- class Status;
+class Status;
namespace repl {
- /**
- * Arguments to the handshake command.
- */
- class UpdatePositionArgs {
- public:
- struct UpdateInfo {
- UpdateInfo(const OID& anRid, const OpTime& aTs, long long aCfgver, long long aMemberId);
+/**
+ * Arguments to the handshake command.
+ */
+class UpdatePositionArgs {
+public:
+ struct UpdateInfo {
+ UpdateInfo(const OID& anRid, const OpTime& aTs, long long aCfgver, long long aMemberId);
- OID rid;
- OpTime ts;
- long long cfgver;
- long long memberId;
- };
+ OID rid;
+ OpTime ts;
+ long long cfgver;
+ long long memberId;
+ };
- typedef std::vector<UpdateInfo>::const_iterator UpdateIterator;
+ typedef std::vector<UpdateInfo>::const_iterator UpdateIterator;
- /**
- * Initializes this UpdatePositionArgs from the contents of "argsObj".
- */
- Status initialize(const BSONObj& argsObj);
+ /**
+ * Initializes this UpdatePositionArgs from the contents of "argsObj".
+ */
+ Status initialize(const BSONObj& argsObj);
- /**
- * Gets a begin iterator over the UpdateInfos stored in this UpdatePositionArgs.
- */
- UpdateIterator updatesBegin() const { return _updates.begin(); }
+ /**
+ * Gets a begin iterator over the UpdateInfos stored in this UpdatePositionArgs.
+ */
+ UpdateIterator updatesBegin() const {
+ return _updates.begin();
+ }
- /**
- * Gets an end iterator over the UpdateInfos stored in this UpdatePositionArgs.
- */
- UpdateIterator updatesEnd() const { return _updates.end(); }
+ /**
+ * Gets an end iterator over the UpdateInfos stored in this UpdatePositionArgs.
+ */
+ UpdateIterator updatesEnd() const {
+ return _updates.end();
+ }
- /**
- * Returns a BSONified version of the object.
- * _updates is only included if it is not empty.
- */
- BSONObj toBSON() const;
- private:
- std::vector<UpdateInfo> _updates;
- };
+ /**
+ * Returns a BSONified version of the object.
+ * _updates is only included if it is not empty.
+ */
+ BSONObj toBSON() const;
+
+private:
+ std::vector<UpdateInfo> _updates;
+};
-} // namespace repl
-} // namespace mongo
+} // namespace repl
+} // namespace mongo