diff options
author | Matthew Russotto <matthew.russotto@10gen.com> | 2017-05-04 15:18:56 -0400 |
---|---|---|
committer | Matthew Russotto <matthew.russotto@10gen.com> | 2017-05-04 15:18:56 -0400 |
commit | b2d70219b85a3462fb902618500b2f63f5f188b5 (patch) | |
tree | e6d7c3bfbfaec6a1dc0418672e41fdc1b6ac2956 | |
parent | 32dccaea13fda911c5367287a39290706380941e (diff) | |
download | mongo-b2d70219b85a3462fb902618500b2f63f5f188b5.tar.gz |
Revert "SERVER-26990 Unify tracking of secondary state between replication and topology coordinators"
This reverts commit 6adc71f6cf069803f9c1288aef88ffe0d21c6ffe.
-rw-r--r-- | src/mongo/db/repl/SConscript | 2 | ||||
-rw-r--r-- | src/mongo/db/repl/heartbeat_response_action.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/heartbeat_response_action.h | 14 | ||||
-rw-r--r-- | src/mongo/db/repl/member_heartbeat_data.cpp | 56 | ||||
-rw-r--r-- | src/mongo/db/repl/member_heartbeat_data.h | 137 | ||||
-rw-r--r-- | src/mongo/db/repl/repl_set_html_summary.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 548 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.h | 137 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_elect.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp | 103 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.h | 161 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_impl.cpp | 586 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_impl.h | 103 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_impl_test.cpp | 929 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp | 1020 |
16 files changed, 2164 insertions, 1655 deletions
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index 56025019ef4..335cd86b51b 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -521,7 +521,6 @@ env.CppUnitTest( env.Library('topology_coordinator', [ 'heartbeat_response_action.cpp', - 'member_heartbeat_data.cpp', 'topology_coordinator.cpp', ], LIBDEPS=[ @@ -531,6 +530,7 @@ env.Library('topology_coordinator', env.Library('topology_coordinator_impl', [ + 'member_heartbeat_data.cpp', 'topology_coordinator_impl.cpp', ], LIBDEPS=[ diff --git a/src/mongo/db/repl/heartbeat_response_action.cpp b/src/mongo/db/repl/heartbeat_response_action.cpp index 7e787d42c96..97bfbd0c29a 100644 --- a/src/mongo/db/repl/heartbeat_response_action.cpp +++ b/src/mongo/db/repl/heartbeat_response_action.cpp @@ -75,9 +75,5 @@ void HeartbeatResponseAction::setNextHeartbeatStartDate(Date_t when) { _nextHeartbeatStartDate = when; } -void HeartbeatResponseAction::setAdvancedOpTime(bool advanced) { - _advancedOpTime = advanced; -} - } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/heartbeat_response_action.h b/src/mongo/db/repl/heartbeat_response_action.h index bb009600e5a..23c35d9c884 100644 --- a/src/mongo/db/repl/heartbeat_response_action.h +++ b/src/mongo/db/repl/heartbeat_response_action.h @@ -101,11 +101,6 @@ public: void setNextHeartbeatStartDate(Date_t when); /** - * Sets whether or not the heartbeat response advanced the member's opTime. - */ - void setAdvancedOpTime(bool advanced); - - /** * Gets the action type of this action. */ Action getAction() const { @@ -128,19 +123,10 @@ public: return _primaryIndex; } - /* - * Returns true if the heartbeat response resulting in our conception of the - * member's optime moving forward, so we need to recalculate lastCommittedOpTime. - */ - bool getAdvancedOpTime() const { - return _advancedOpTime; - } - private: Action _action; int _primaryIndex; Date_t _nextHeartbeatStartDate; - bool _advancedOpTime = false; }; } // namespace repl diff --git a/src/mongo/db/repl/member_heartbeat_data.cpp b/src/mongo/db/repl/member_heartbeat_data.cpp index 7b553a1e682..1b9b9ea3f13 100644 --- a/src/mongo/db/repl/member_heartbeat_data.cpp +++ b/src/mongo/db/repl/member_heartbeat_data.cpp @@ -39,14 +39,13 @@ namespace mongo { namespace repl { -MemberHeartbeatData::MemberHeartbeatData() - : _health(-1), _authIssue(false), _configIndex(-1), _isSelf(false) { +MemberHeartbeatData::MemberHeartbeatData() : _health(-1), _authIssue(false) { _lastResponse.setState(MemberState::RS_UNKNOWN); _lastResponse.setElectionTime(Timestamp()); _lastResponse.setAppliedOpTime(OpTime()); } -bool MemberHeartbeatData::setUpValues(Date_t now, +void MemberHeartbeatData::setUpValues(Date_t now, const HostAndPort& host, ReplSetHeartbeatResponse&& hbResponse) { _health = 1; @@ -55,8 +54,6 @@ bool MemberHeartbeatData::setUpValues(Date_t now, } _authIssue = false; _lastHeartbeat = now; - _lastUpdate = now; - _lastUpdateStale = false; _updatedSinceRestart = true; if (!hbResponse.hasState()) { @@ -74,11 +71,7 @@ bool MemberHeartbeatData::setUpValues(Date_t now, << hbResponse.getState().toString() << rsLog; } - bool opTimeAdvanced = advanceLastAppliedOpTime(hbResponse.getAppliedOpTime(), now); - auto durableOpTime = hbResponse.hasDurableOpTime() ? hbResponse.getDurableOpTime() : OpTime(); - opTimeAdvanced = advanceLastDurableOpTime(durableOpTime, now) || opTimeAdvanced; _lastResponse = std::move(hbResponse); - return opTimeAdvanced; } void MemberHeartbeatData::setDownValues(Date_t now, const std::string& heartbeatMessage) { @@ -94,9 +87,6 @@ void MemberHeartbeatData::setDownValues(Date_t now, const std::string& heartbeat _lastResponse.setAppliedOpTime(OpTime()); _lastResponse.setHbMsg(heartbeatMessage); _lastResponse.setSyncingTo(HostAndPort()); - - // The _lastAppliedOpTime/_lastDurableOpTime fields don't get cleared merely by missing a - // heartbeat. } void MemberHeartbeatData::setAuthIssue(Date_t now) { @@ -114,47 +104,5 @@ void MemberHeartbeatData::setAuthIssue(Date_t now) { _lastResponse.setSyncingTo(HostAndPort()); } -void MemberHeartbeatData::setLastAppliedOpTime(OpTime opTime, Date_t now) { - _lastUpdate = now; - _lastUpdateStale = false; - _lastAppliedOpTime = opTime; -} - -void MemberHeartbeatData::setLastDurableOpTime(OpTime opTime, Date_t now) { - _lastUpdate = now; - _lastUpdateStale = false; - if (_lastAppliedOpTime < opTime) { - // TODO(russotto): We think this should never happen, rollback or no rollback. Make this an - // invariant and see what happens. - log() << "Durable progress (" << opTime << ") is ahead of the applied progress (" - << _lastAppliedOpTime << ". This is likely due to a " - "rollback." - << " memberid: " << _memberId << " rid: " << _rid << " host " - << _hostAndPort.toString() << " previous durable progress: " << _lastDurableOpTime; - } else { - _lastDurableOpTime = opTime; - } -} - -bool MemberHeartbeatData::advanceLastAppliedOpTime(OpTime opTime, Date_t now) { - _lastUpdate = now; - _lastUpdateStale = false; - if (_lastAppliedOpTime < opTime) { - setLastAppliedOpTime(opTime, now); - return true; - } - return false; -} - -bool MemberHeartbeatData::advanceLastDurableOpTime(OpTime opTime, Date_t now) { - _lastUpdate = now; - _lastUpdateStale = false; - if (_lastDurableOpTime < opTime) { - setLastDurableOpTime(opTime, now); - return true; - } - return false; -} - } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/member_heartbeat_data.h b/src/mongo/db/repl/member_heartbeat_data.h index 4122ff86ec2..f67a0a87757 100644 --- a/src/mongo/db/repl/member_heartbeat_data.h +++ b/src/mongo/db/repl/member_heartbeat_data.h @@ -68,10 +68,10 @@ public: const HostAndPort& getSyncSource() const { return _lastResponse.getSyncingTo(); } - OpTime getHeartbeatAppliedOpTime() const { + OpTime getAppliedOpTime() const { return _lastResponse.getAppliedOpTime(); } - OpTime getHeartbeatDurableOpTime() const { + OpTime getDurableOpTime() const { return _lastResponse.hasDurableOpTime() ? _lastResponse.getDurableOpTime() : OpTime(); } int getConfigVersion() const { @@ -105,49 +105,10 @@ public: return _health != 0; } - OpTime getLastAppliedOpTime() const { - return _lastAppliedOpTime; - } - - OpTime getLastDurableOpTime() const { - return _lastDurableOpTime; - } - - // When was the last time this data was updated via any means? - Date_t getLastUpdate() const { - return _lastUpdate; - } - // Was the last update stale as of the last check? - bool lastUpdateStale() const { - return _lastUpdateStale; - } - - // Index of this member in the replica set config member list. - int getConfigIndex() const { - return _configIndex; - } - - int getMemberId() const { - return _memberId; - } - - OID getRid() const { - return _rid; - } - - bool isSelf() const { - return _isSelf; - } - - HostAndPort getHostAndPort() const { - return _hostAndPort; - } - /** * Sets values in this object from the results of a successful heartbeat command. - * Returns whether or not the optimes advanced as a result of this heartbeat response. */ - bool setUpValues(Date_t now, const HostAndPort& host, ReplSetHeartbeatResponse&& hbResponse); + void setUpValues(Date_t now, const HostAndPort& host, ReplSetHeartbeatResponse&& hbResponse); /** * Sets values in this object from the results of a erroring/failed heartbeat command. @@ -173,66 +134,6 @@ public: return _updatedSinceRestart; } - /** - * Sets the last applied op time (not the heartbeat applied op time) and updates the - * lastUpdate time. - */ - void setLastAppliedOpTime(OpTime opTime, Date_t now); - - /** - * Sets the last durable op time (not the heartbeat durable op time) - */ - void setLastDurableOpTime(OpTime opTime, Date_t now); - - /** - * Sets the last applied op time (not the heartbeat applied op time) iff the new optime is - * later than the current optime, and updates the lastUpdate time. Returns true if the - * optime was advanced. - */ - bool advanceLastAppliedOpTime(OpTime opTime, Date_t now); - - /** - * Sets the last durable op time (not the heartbeat applied op time) iff the new optime is - * later than the current optime, and updates the lastUpdate time. Returns true if the - * optime was advanced. - */ - bool advanceLastDurableOpTime(OpTime opTime, Date_t now); - - /* - * Indicates that this data is stale, based on _lastUpdateTime. - */ - void markLastUpdateStale() { - _lastUpdateStale = true; - } - - /* - * Updates the _lastUpdateTime and clears staleness without changing anything else. - */ - void updateLiveness(Date_t now) { - _lastUpdate = now; - _lastUpdateStale = false; - } - - void setConfigIndex(int configIndex) { - _configIndex = configIndex; - } - - void setIsSelf(bool isSelf) { - _isSelf = isSelf; - } - - void setHostAndPort(HostAndPort hostAndPort) { - _hostAndPort = hostAndPort; - } - - void setMemberId(int memberId) { - _memberId = memberId; - } - - void setRid(OID rid) { - _rid = rid; - } - private: // -1 = not checked yet, 0 = member is down/unreachable, 1 = member is up int _health; @@ -252,38 +153,6 @@ private: // Have we received heartbeats since the last restart? bool _updatedSinceRestart = false; - - // Last time we got any information about this member, whether heartbeat - // or replSetUpdatePosition. - Date_t _lastUpdate; - - // Set when lastUpdate time exceeds the election timeout. Implies that the member is down - // on the primary, but not the secondaries. - bool _lastUpdateStale = false; - - // Last known OpTime that the replica has applied and journaled to. - OpTime _lastDurableOpTime; - - // Last known OpTime that the replica has applied, whether journaled or unjournaled. - OpTime _lastAppliedOpTime; - - // TODO(russotto): Since memberHeartbeatData is kept in config order, _configIndex - // and _isSelf may not be necessary. - // Index of this member in the replica set configuration. - int _configIndex; - - // Is this the data for this member? - bool _isSelf; - - // This member's RID, used only in master/slave replication. - OID _rid; - - // This member's member ID. memberId and hostAndPort duplicate information in the - // configuration for replica sets, but are required to be here for master/slave replication. - int _memberId = -1; - - // Client address of this member. - HostAndPort _hostAndPort; }; } // namespace repl diff --git a/src/mongo/db/repl/repl_set_html_summary.cpp b/src/mongo/db/repl/repl_set_html_summary.cpp index d59a7c31649..14c2ff81b7d 100644 --- a/src/mongo/db/repl/repl_set_html_summary.cpp +++ b/src/mongo/db/repl/repl_set_html_summary.cpp @@ -187,7 +187,7 @@ const std::string ReplSetHtmlSummary::toHtmlString() const { // TODO(dannenberg): change timestamp to optime in V1 memberTable << td(memberHB.getLastHeartbeat() == Date_t() ? "?" - : memberHB.getHeartbeatAppliedOpTime().toString()); + : memberHB.getAppliedOpTime().toString()); } memberTable << _tr(); } @@ -201,7 +201,7 @@ const std::string ReplSetHtmlSummary::toHtmlString() const { const MemberConfig& selfConfig = _config.getMemberAt(_selfIndex); if (_primaryIndex >= 0 && _primaryIndex != _selfIndex && !selfConfig.isArbiter()) { - int lag = _hbData[_primaryIndex].getHeartbeatAppliedOpTime().getTimestamp().getSecs() - + int lag = _hbData[_primaryIndex].getAppliedOpTime().getTimestamp().getSecs() - _selfOptime.getTimestamp().getSecs(); s << tr("Lag: ", str::stream() << lag << " secs"); } diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 7f8af45852c..4eb100e6de7 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -136,6 +136,25 @@ BSONObj incrementConfigVersionByRandom(BSONObj config) { const Seconds kNoopWriterPeriod(10); } // namespace +BSONObj ReplicationCoordinatorImpl::SlaveInfo::toBSON() const { + BSONObjBuilder bo; + bo.append("id", memberId); + bo.append("rid", rid); + bo.append("host", hostAndPort.toString()); + bo.append("lastDurableOpTime", lastDurableOpTime.toBSON()); + bo.append("lastAppliedOpTime", lastAppliedOpTime.toBSON()); + if (self) + bo.append("self", true); + if (down) + bo.append("down", true); + bo.append("lastUpdated", lastUpdate); + return bo.obj(); +} + +std::string ReplicationCoordinatorImpl::SlaveInfo::toString() const { + return toBSON().toString(); +} + ReplicationCoordinatorImpl::Waiter::Waiter(OpTime _opTime, const WriteConcernOptions* _writeConcern) : opTime(std::move(_opTime)), writeConcern(_writeConcern) {} @@ -313,6 +332,11 @@ ReplicationCoordinatorImpl::ReplicationCoordinatorImpl( return; } + // Make sure there is always an entry in _slaveInfo for ourself. + SlaveInfo selfInfo; + selfInfo.self = true; + _slaveInfo.push_back(selfInfo); + _externalState->setupNoopWriter(kNoopWriterPeriod); } @@ -658,7 +682,7 @@ void ReplicationCoordinatorImpl::startup(OperationContext* opCtx) { fassert(18822, !_inShutdown); _setConfigState_inlock(kConfigStartingUp); _myRID = rid; - _topCoord->getMyMemberHeartbeatData()->setRid(rid); + _slaveInfo[_getMyIndexInSlaveInfo_inlock()].rid = rid; } if (!_settings.usingReplSets()) { @@ -894,7 +918,7 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx, lk.unlock(); OpTime firstOpTime = _externalState->onTransitionToPrimary(opCtx, isV1ElectionProtocol()); lk.lock(); - _topCoord->setFirstOpTimeOfMyTerm(firstOpTime); + _setFirstOpTimeOfMyTerm_inlock(firstOpTime); // Must calculate the commit level again because firstOpTimeOfMyTerm wasn't set when we logged // our election in onTransitionToPrimary(), above. @@ -923,13 +947,127 @@ void ReplicationCoordinatorImpl::signalUpstreamUpdater() { _externalState->forwardSlaveProgress(); } -void ReplicationCoordinatorImpl::_updateLastCommittedOpTimeAndWake_inlock() { +ReplicationCoordinatorImpl::SlaveInfo* ReplicationCoordinatorImpl::_findSlaveInfoByMemberID_inlock( + int memberId) { + for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) { + if (it->memberId == memberId) { + return &(*it); + } + } + return NULL; +} + +ReplicationCoordinatorImpl::SlaveInfo* ReplicationCoordinatorImpl::_findSlaveInfoByRID_inlock( + const OID& rid) { + for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) { + if (it->rid == rid) { + return &(*it); + } + } + return NULL; +} + +void ReplicationCoordinatorImpl::_addSlaveInfo_inlock(const SlaveInfo& slaveInfo) { + invariant(getReplicationMode() == modeMasterSlave); + _slaveInfo.push_back(slaveInfo); + _updateLastCommittedOpTime_inlock(); // Wake up any threads waiting for replication that now have their replication // check satisfied _wakeReadyWaiters_inlock(); } +void ReplicationCoordinatorImpl::_updateSlaveInfoAppliedOpTime_inlock(SlaveInfo* slaveInfo, + const OpTime& opTime) { + slaveInfo->lastAppliedOpTime = opTime; + slaveInfo->lastUpdate = _replExecutor->now(); + slaveInfo->down = false; + + _updateLastCommittedOpTime_inlock(); + // Wake up any threads waiting for replication that now have their replication + // check satisfied + _wakeReadyWaiters_inlock(); +} + +void ReplicationCoordinatorImpl::_updateSlaveInfoDurableOpTime_inlock(SlaveInfo* slaveInfo, + const OpTime& opTime) { + // lastAppliedOpTime cannot be behind lastDurableOpTime. + if (slaveInfo->lastAppliedOpTime < opTime) { + log() << "Durable progress (" << opTime << ") is ahead of the applied progress (" + << slaveInfo->lastAppliedOpTime << ". This is likely due to a " + "rollback. slaveInfo: " + << slaveInfo->toString(); + return; + } + slaveInfo->lastDurableOpTime = opTime; + slaveInfo->lastUpdate = _replExecutor->now(); + slaveInfo->down = false; + + _updateLastCommittedOpTime_inlock(); + // Wake up any threads waiting for replication that now have their replication + // check satisfied + _wakeReadyWaiters_inlock(); +} + +void ReplicationCoordinatorImpl::_updateSlaveInfoFromConfig_inlock() { + invariant(_settings.usingReplSets()); + + SlaveInfoVector oldSlaveInfos; + _slaveInfo.swap(oldSlaveInfos); + + if (_selfIndex == -1) { + // If we aren't in the config then the only data we care about is for ourself + for (SlaveInfoVector::const_iterator it = oldSlaveInfos.begin(); it != oldSlaveInfos.end(); + ++it) { + if (it->self) { + SlaveInfo slaveInfo = *it; + slaveInfo.memberId = -1; + _slaveInfo.push_back(slaveInfo); + return; + } + } + invariant(false); // There should always have been an entry for ourself + } + + for (int i = 0; i < _rsConfig.getNumMembers(); ++i) { + const MemberConfig& memberConfig = _rsConfig.getMemberAt(i); + int memberId = memberConfig.getId(); + const HostAndPort& memberHostAndPort = memberConfig.getHostAndPort(); + + SlaveInfo slaveInfo; + + // Check if the node existed with the same member ID and hostname in the old data + for (SlaveInfoVector::const_iterator it = oldSlaveInfos.begin(); it != oldSlaveInfos.end(); + ++it) { + if ((it->memberId == memberId && it->hostAndPort == memberHostAndPort) || + (i == _selfIndex && it->self)) { + slaveInfo = *it; + } + } + + // Make sure you have the most up-to-date info for member ID and hostAndPort. + slaveInfo.memberId = memberId; + slaveInfo.hostAndPort = memberHostAndPort; + _slaveInfo.push_back(slaveInfo); + } + invariant(static_cast<int>(_slaveInfo.size()) == _rsConfig.getNumMembers()); +} + +size_t ReplicationCoordinatorImpl::_getMyIndexInSlaveInfo_inlock() const { + if (getReplicationMode() == modeMasterSlave) { + // Self data always lives in the first entry in _slaveInfo for master/slave + return 0; + } else { + invariant(_settings.usingReplSets()); + if (_selfIndex == -1) { + invariant(_slaveInfo.size() == 1); + return 0; + } else { + return _selfIndex; + } + } +} + Status ReplicationCoordinatorImpl::setLastOptimeForSlave(const OID& rid, const Timestamp& ts) { stdx::unique_lock<stdx::mutex> lock(_mutex); massert(28576, @@ -940,14 +1078,17 @@ Status ReplicationCoordinatorImpl::setLastOptimeForSlave(const OID& rid, const T // term == -1 for master-slave OpTime opTime(ts, OpTime::kUninitializedTerm); - MemberHeartbeatData* memberHeartbeatData = _topCoord->findMemberHeartbeatDataByRid(rid); - if (memberHeartbeatData) { - memberHeartbeatData->advanceLastAppliedOpTime(opTime, _replExecutor->now()); + SlaveInfo* slaveInfo = _findSlaveInfoByRID_inlock(rid); + if (slaveInfo) { + if (slaveInfo->lastAppliedOpTime < opTime) { + _updateSlaveInfoAppliedOpTime_inlock(slaveInfo, opTime); + } } else { - auto* memberHeartbeatData = _topCoord->addSlaveMemberData(rid); - memberHeartbeatData->setLastAppliedOpTime(opTime, _replExecutor->now()); + SlaveInfo newSlaveInfo; + newSlaveInfo.rid = rid; + newSlaveInfo.lastAppliedOpTime = opTime; + _addSlaveInfo_inlock(newSlaveInfo); } - _updateLastCommittedOpTimeAndWake_inlock(); return Status::OK(); } @@ -1016,20 +1157,27 @@ void ReplicationCoordinatorImpl::_reportUpstream_inlock(stdx::unique_lock<stdx:: void ReplicationCoordinatorImpl::_setMyLastAppliedOpTime_inlock(const OpTime& opTime, bool isRollbackAllowed) { - auto* myMemberHeartbeatData = _topCoord->getMyMemberHeartbeatData(); - invariant(isRollbackAllowed || myMemberHeartbeatData->getLastAppliedOpTime() <= opTime); - myMemberHeartbeatData->setLastAppliedOpTime(opTime, _replExecutor->now()); - _updateLastCommittedOpTimeAndWake_inlock(); + SlaveInfo* mySlaveInfo = &_slaveInfo[_getMyIndexInSlaveInfo_inlock()]; + invariant(isRollbackAllowed || mySlaveInfo->lastAppliedOpTime <= opTime); + _updateSlaveInfoAppliedOpTime_inlock(mySlaveInfo, opTime); + _opTimeWaiterList.signalAndRemoveIf_inlock( [opTime](Waiter* waiter) { return waiter->opTime <= opTime; }); } void ReplicationCoordinatorImpl::_setMyLastDurableOpTime_inlock(const OpTime& opTime, bool isRollbackAllowed) { - auto* myMemberHeartbeatData = _topCoord->getMyMemberHeartbeatData(); - invariant(isRollbackAllowed || myMemberHeartbeatData->getLastDurableOpTime() <= opTime); - myMemberHeartbeatData->setLastDurableOpTime(opTime, _replExecutor->now()); - _updateLastCommittedOpTimeAndWake_inlock(); + SlaveInfo* mySlaveInfo = &_slaveInfo[_getMyIndexInSlaveInfo_inlock()]; + invariant(isRollbackAllowed || mySlaveInfo->lastDurableOpTime <= opTime); + // lastAppliedOpTime cannot be behind lastDurableOpTime. + if (mySlaveInfo->lastAppliedOpTime < opTime) { + log() << "My durable progress (" << opTime << ") is ahead of my applied progress (" + << mySlaveInfo->lastAppliedOpTime << ". This is likely due to a " + "rollback. slaveInfo: " + << mySlaveInfo->toString(); + return; + } + _updateSlaveInfoDurableOpTime_inlock(mySlaveInfo, opTime); } OpTime ReplicationCoordinatorImpl::getMyLastAppliedOpTime() const { @@ -1196,11 +1344,11 @@ Status ReplicationCoordinatorImpl::_waitUntilOpTimeForReadDeprecated( } OpTime ReplicationCoordinatorImpl::_getMyLastAppliedOpTime_inlock() const { - return _topCoord->getMyLastAppliedOpTime(); + return _slaveInfo[_getMyIndexInSlaveInfo_inlock()].lastAppliedOpTime; } OpTime ReplicationCoordinatorImpl::_getMyLastDurableOpTime_inlock() const { - return _topCoord->getMyLastDurableOpTime(); + return _slaveInfo[_getMyIndexInSlaveInfo_inlock()].lastDurableOpTime; } Status ReplicationCoordinatorImpl::setLastDurableOptime_forTest(long long cfgVer, @@ -1255,6 +1403,7 @@ Status ReplicationCoordinatorImpl::_setLastOptime_inlock( << " in config with version " << args.cfgver << " has durably reached optime: " << args.ts; + SlaveInfo* slaveInfo = NULL; if (args.cfgver != _rsConfig.getConfigVersion()) { std::string errmsg = str::stream() << "Received replSetUpdatePosition for node with memberId " << args.memberId @@ -1265,8 +1414,8 @@ Status ReplicationCoordinatorImpl::_setLastOptime_inlock( return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg); } - auto* memberHeartbeatData = _topCoord->findMemberHeartbeatDataByMemberId(args.memberId); - if (!memberHeartbeatData) { + slaveInfo = _findSlaveInfoByMemberID_inlock(args.memberId); + if (!slaveInfo) { invariant(!_rsConfig.findMemberByID(args.memberId)); std::string errmsg = str::stream() @@ -1276,22 +1425,25 @@ Status ReplicationCoordinatorImpl::_setLastOptime_inlock( return Status(ErrorCodes::NodeNotFound, errmsg); } - invariant(args.memberId == memberHeartbeatData->getMemberId()); + invariant(args.memberId == slaveInfo->memberId); - LOG(3) << "Node with memberID " << args.memberId << " has durably applied operations through " - << memberHeartbeatData->getLastDurableOpTime() << " and has applied operations through " - << memberHeartbeatData->getLastAppliedOpTime() - << "; updating to new durable operation with timestamp " << args.ts; + LOG(3) << "Node with memberID " << args.memberId << " has durably applied operationss through " + << slaveInfo->lastDurableOpTime << " and has applied operations through " + << slaveInfo->lastAppliedOpTime << "; updating to new durable operation with timestamp " + << args.ts; - auto now(_replExecutor->now()); - bool advancedOpTime = memberHeartbeatData->advanceLastAppliedOpTime(args.ts, now); - advancedOpTime = memberHeartbeatData->advanceLastDurableOpTime(args.ts, now) || advancedOpTime; - - // Only update committed optime if the remote optimes increased. - if (advancedOpTime) { - _updateLastCommittedOpTimeAndWake_inlock(); + // Only update remote optimes if they increase. + if (slaveInfo->lastAppliedOpTime < args.ts) { + _updateSlaveInfoAppliedOpTime_inlock(slaveInfo, args.ts); + } + if (slaveInfo->lastDurableOpTime < args.ts) { + _updateSlaveInfoDurableOpTime_inlock(slaveInfo, args.ts); } + + // Update liveness for this node. + slaveInfo->lastUpdate = _replExecutor->now(); + slaveInfo->down = false; _cancelAndRescheduleLivenessUpdate_inlock(args.memberId); return Status::OK(); } @@ -1323,6 +1475,7 @@ Status ReplicationCoordinatorImpl::_setLastOptime_inlock(const UpdatePositionArg << " has reached optime: " << args.appliedOpTime << " and is durable through: " << args.durableOpTime; + SlaveInfo* slaveInfo = NULL; if (args.cfgver != _rsConfig.getConfigVersion()) { std::string errmsg = str::stream() << "Received replSetUpdatePosition for node with memberId " << args.memberId @@ -1333,8 +1486,8 @@ Status ReplicationCoordinatorImpl::_setLastOptime_inlock(const UpdatePositionArg return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg); } - auto* memberHeartbeatData = _topCoord->findMemberHeartbeatDataByMemberId(args.memberId); - if (!memberHeartbeatData) { + slaveInfo = _findSlaveInfoByMemberID_inlock(args.memberId); + if (!slaveInfo) { invariant(!_rsConfig.findMemberByID(args.memberId)); std::string errmsg = str::stream() @@ -1344,24 +1497,25 @@ Status ReplicationCoordinatorImpl::_setLastOptime_inlock(const UpdatePositionArg return Status(ErrorCodes::NodeNotFound, errmsg); } - invariant(args.memberId == memberHeartbeatData->getMemberId()); + invariant(args.memberId == slaveInfo->memberId); LOG(3) << "Node with memberID " << args.memberId << " currently has optime " - << memberHeartbeatData->getLastAppliedOpTime() << " durable through " - << memberHeartbeatData->getLastDurableOpTime() << "; updating to optime " - << args.appliedOpTime << " and durable through " << args.durableOpTime; - + << slaveInfo->lastAppliedOpTime << " durable through " << slaveInfo->lastDurableOpTime + << "; updating to optime " << args.appliedOpTime << " and durable through " + << args.durableOpTime; - auto now(_replExecutor->now()); - bool advancedOpTime = memberHeartbeatData->advanceLastAppliedOpTime(args.appliedOpTime, now); - advancedOpTime = - memberHeartbeatData->advanceLastDurableOpTime(args.durableOpTime, now) || advancedOpTime; - // Only update committed optime if the remote optimes increased. - if (advancedOpTime) { - _updateLastCommittedOpTimeAndWake_inlock(); + // Only update remote optimes if they increase. + if (slaveInfo->lastAppliedOpTime < args.appliedOpTime) { + _updateSlaveInfoAppliedOpTime_inlock(slaveInfo, args.appliedOpTime); + } + if (slaveInfo->lastDurableOpTime < args.durableOpTime) { + _updateSlaveInfoDurableOpTime_inlock(slaveInfo, args.durableOpTime); } + // Update liveness for this node. + slaveInfo->lastUpdate = _replExecutor->now(); + slaveInfo->down = false; _cancelAndRescheduleLivenessUpdate_inlock(args.memberId); return Status::OK(); } @@ -1378,8 +1532,7 @@ bool ReplicationCoordinatorImpl::_doneWaitingForReplication_inlock( const bool useDurableOpTime = writeConcern.syncMode == WriteConcernOptions::SyncMode::JOURNAL; if (writeConcern.wMode.empty()) { - return _topCoord->haveNumNodesReachedOpTime( - opTime, writeConcern.wNumNodes, useDurableOpTime); + return _haveNumNodesReachedOpTime_inlock(opTime, writeConcern.wNumNodes, useDurableOpTime); } StringData patternName; @@ -1412,7 +1565,53 @@ bool ReplicationCoordinatorImpl::_doneWaitingForReplication_inlock( if (!tagPattern.isOK()) { return true; } - return _topCoord->haveTaggedNodesReachedOpTime(opTime, tagPattern.getValue(), useDurableOpTime); + return _haveTaggedNodesReachedOpTime_inlock(opTime, tagPattern.getValue(), useDurableOpTime); +} + +bool ReplicationCoordinatorImpl::_haveNumNodesReachedOpTime_inlock(const OpTime& targetOpTime, + int numNodes, + bool durablyWritten) { + // Replication progress that is for some reason ahead of us should not allow us to + // satisfy a write concern if we aren't caught up ourselves. + OpTime myOpTime = + durablyWritten ? _getMyLastDurableOpTime_inlock() : _getMyLastAppliedOpTime_inlock(); + if (myOpTime < targetOpTime) { + return false; + } + + for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) { + const OpTime& slaveTime = durablyWritten ? it->lastDurableOpTime : it->lastAppliedOpTime; + if (slaveTime >= targetOpTime) { + --numNodes; + } + + if (numNodes <= 0) { + return true; + } + } + return false; +} + +bool ReplicationCoordinatorImpl::_haveTaggedNodesReachedOpTime_inlock( + const OpTime& opTime, const ReplSetTagPattern& tagPattern, bool durablyWritten) { + ReplSetTagMatch matcher(tagPattern); + for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) { + const OpTime& slaveTime = durablyWritten ? it->lastDurableOpTime : it->lastAppliedOpTime; + if (slaveTime >= opTime) { + // This node has reached the desired optime, now we need to check if it is a part + // of the tagPattern. + const MemberConfig* memberConfig = _rsConfig.findMemberByID(it->memberId); + invariant(memberConfig); + for (MemberConfig::TagIterator it = memberConfig->tagsBegin(); + it != memberConfig->tagsEnd(); + ++it) { + if (matcher.update(*it)) { + return true; + } + } + } + } + return false; } ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::awaitReplication( @@ -1538,7 +1737,7 @@ Status ReplicationCoordinatorImpl::_awaitReplication_inlock( if (Command::testCommandsEnabled) { // log state of replica set on timeout to help with diagnosis. BSONObjBuilder progress; - _topCoord->fillMemberData(&progress); + _appendSlaveInfoData_inlock(&progress); log() << "Replication for failed WC: " << writeConcern.toBSON() << ", waitInfo: " << waiter << ", opID: " << opCtx->getOpID() << ", progress: " << progress.done(); @@ -1637,7 +1836,7 @@ bool ReplicationCoordinatorImpl::_tryToStepDown_inlock(const Date_t waitUntil, OpTime lastApplied = _getMyLastAppliedOpTime_inlock(); if (forceNow) { - return _topCoord->stepDown(stepDownUntil, forceNow); + return _topCoord->stepDown(stepDownUntil, forceNow, lastApplied); } auto tagStatus = _rsConfig.findCustomWriteMode(ReplSetConfig::kMajorityWriteConcernModeName); @@ -1645,8 +1844,8 @@ bool ReplicationCoordinatorImpl::_tryToStepDown_inlock(const Date_t waitUntil, // Check if a majority of nodes have reached the last applied optime // and there exist an electable node that has my last applied optime. - if (_topCoord->haveTaggedNodesReachedOpTime(lastApplied, tagStatus.getValue(), false) && - _topCoord->stepDown(stepDownUntil, forceNow)) { + if (_haveTaggedNodesReachedOpTime_inlock(lastApplied, tagStatus.getValue(), false) && + _topCoord->stepDown(stepDownUntil, forceNow, lastApplied)) { return true; } @@ -1862,9 +2061,57 @@ Status ReplicationCoordinatorImpl::resyncData(OperationContext* opCtx, bool wait StatusWith<BSONObj> ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommand( ReplicationCoordinator::ReplSetUpdatePositionCommandStyle commandStyle) const { - stdx::lock_guard<stdx::mutex> lock(_mutex); - return _topCoord->prepareReplSetUpdatePositionCommand( - commandStyle, _getCurrentCommittedSnapshotOpTime_inlock()); + BSONObjBuilder cmdBuilder; + { + stdx::lock_guard<stdx::mutex> lock(_mutex); + invariant(_rsConfig.isInitialized()); + // Do not send updates if we have been removed from the config. + if (_selfIndex == -1) { + return Status(ErrorCodes::NodeNotFound, + "This node is not in the current replset configuration."); + } + cmdBuilder.append(UpdatePositionArgs::kCommandFieldName, 1); + // Create an array containing objects each live member connected to us and for ourself. + BSONArrayBuilder arrayBuilder(cmdBuilder.subarrayStart("optimes")); + for (const auto& slaveInfo : _slaveInfo) { + if (slaveInfo.lastAppliedOpTime.isNull()) { + // Don't include info on members we haven't heard from yet. + continue; + } + // Don't include members we think are down. + if (!slaveInfo.self && slaveInfo.down) { + continue; + } + + BSONObjBuilder entry(arrayBuilder.subobjStart()); + switch (commandStyle) { + case ReplSetUpdatePositionCommandStyle::kNewStyle: + slaveInfo.lastDurableOpTime.append(&entry, + UpdatePositionArgs::kDurableOpTimeFieldName); + slaveInfo.lastAppliedOpTime.append(&entry, + UpdatePositionArgs::kAppliedOpTimeFieldName); + break; + case ReplSetUpdatePositionCommandStyle::kOldStyle: + entry.append("_id", slaveInfo.rid); + if (isV1ElectionProtocol()) { + slaveInfo.lastDurableOpTime.append(&entry, "optime"); + } else { + entry.append("optime", slaveInfo.lastDurableOpTime.getTimestamp()); + } + break; + } + entry.append(UpdatePositionArgs::kMemberIdFieldName, slaveInfo.memberId); + entry.append(UpdatePositionArgs::kConfigVersionFieldName, _rsConfig.getConfigVersion()); + } + arrayBuilder.done(); + } + + // Add metadata to command. Old style parsing logic will reject the metadata. + if (commandStyle == ReplSetUpdatePositionCommandStyle::kNewStyle) { + stdx::lock_guard<stdx::mutex> lock(_mutex); + _prepareReplSetMetadata_inlock(OpTime(), &cmdBuilder); + } + return cmdBuilder.obj(); } Status ReplicationCoordinatorImpl::processReplSetGetStatus( @@ -1884,6 +2131,9 @@ Status ReplicationCoordinatorImpl::processReplSetGetStatus( TopologyCoordinator::ReplSetStatusArgs{ _replExecutor->now(), static_cast<unsigned>(time(0) - serverGlobalParams.started), + _getMyLastAppliedOpTime_inlock(), + _getMyLastDurableOpTime_inlock(), + _lastCommittedOpTime, _getCurrentCommittedSnapshotOpTime_inlock(), initialSyncProgress}, response, @@ -1913,7 +2163,34 @@ void ReplicationCoordinatorImpl::fillIsMasterForReplSet(IsMasterResponse* respon void ReplicationCoordinatorImpl::appendSlaveInfoData(BSONObjBuilder* result) { stdx::lock_guard<stdx::mutex> lock(_mutex); - _topCoord->fillMemberData(result); + _appendSlaveInfoData_inlock(result); +} + +void ReplicationCoordinatorImpl::_appendSlaveInfoData_inlock(BSONObjBuilder* result) { + BSONArrayBuilder replicationProgress(result->subarrayStart("replicationProgress")); + { + for (SlaveInfoVector::const_iterator itr = _slaveInfo.begin(); itr != _slaveInfo.end(); + ++itr) { + BSONObjBuilder entry(replicationProgress.subobjStart()); + entry.append("rid", itr->rid); + if (isV1ElectionProtocol()) { + BSONObjBuilder opTime(entry.subobjStart("optime")); + opTime.append("ts", itr->lastDurableOpTime.getTimestamp()); + opTime.append("term", itr->lastDurableOpTime.getTerm()); + opTime.done(); + } else { + entry.append("optime", itr->lastDurableOpTime.getTimestamp()); + } + entry.append("host", itr->hostAndPort.toString()); + if (getReplicationMode() == modeReplSet) { + if (_selfIndex == -1) { + continue; + } + invariant(itr->memberId >= 0); + entry.append("memberId", itr->memberId); + } + } + } } ReplSetConfig ReplicationCoordinatorImpl::getConfig() const { @@ -2002,7 +2279,8 @@ Status ReplicationCoordinatorImpl::processReplSetSyncFrom(OperationContext* opCt auto doResync = false; { stdx::lock_guard<stdx::mutex> lk(_mutex); - _topCoord->prepareSyncFromResponse(target, resultObj, &result); + auto opTime = _getMyLastAppliedOpTime_inlock(); + _topCoord->prepareSyncFromResponse(target, opTime, resultObj, &result); // If we are in the middle of an initial sync, do a resync. doResync = result.isOK() && _initialSyncer && _initialSyncer->isActive(); } @@ -2048,8 +2326,12 @@ Status ReplicationCoordinatorImpl::processHeartbeat(const ReplSetHeartbeatArgs& stdx::lock_guard<stdx::mutex> lk(_mutex); const Date_t now = _replExecutor->now(); - Status result = - _topCoord->prepareHeartbeatResponse(now, args, _settings.ourSetName(), response); + Status result = _topCoord->prepareHeartbeatResponse(now, + args, + _settings.ourSetName(), + _getMyLastAppliedOpTime_inlock(), + _getMyLastDurableOpTime_inlock(), + response); if ((result.isOK() || result == ErrorCodes::InvalidReplicaSetConfig) && _selfIndex < 0) { // If this node does not belong to the configuration it knows about, send heartbeats // back to any node that sends us a heartbeat, in case one of those remote nodes has @@ -2612,7 +2894,8 @@ Status ReplicationCoordinatorImpl::processReplSetFresh(const ReplSetFreshArgs& a BSONObjBuilder* resultObj) { stdx::lock_guard<stdx::mutex> lk(_mutex); Status result(ErrorCodes::InternalError, "didn't set status in prepareFreshResponse"); - _topCoord->prepareFreshResponse(args, _replExecutor->now(), resultObj, &result); + _topCoord->prepareFreshResponse( + args, _replExecutor->now(), _getMyLastAppliedOpTime_inlock(), resultObj, &result); return result; } @@ -2620,7 +2903,8 @@ Status ReplicationCoordinatorImpl::processReplSetElect(const ReplSetElectArgs& a BSONObjBuilder* responseObj) { stdx::lock_guard<stdx::mutex> lk(_mutex); Status result = Status(ErrorCodes::InternalError, "status not set by callback"); - _topCoord->prepareElectResponse(args, _replExecutor->now(), responseObj, &result); + _topCoord->prepareElectResponse( + args, _replExecutor->now(), _getMyLastAppliedOpTime_inlock(), responseObj, &result); return result; } @@ -2631,7 +2915,9 @@ ReplicationCoordinatorImpl::_setCurrentRSConfig_inlock(const ReplSetConfig& newC _cancelHeartbeats_inlock(); _setConfigState_inlock(kConfigSteady); - _topCoord->updateConfig(newConfig, myIndex, _replExecutor->now()); + // Must get this before changing our config. + OpTime myOptime = _getMyLastAppliedOpTime_inlock(); + _topCoord->updateConfig(newConfig, myIndex, _replExecutor->now(), myOptime); const ReplSetConfig oldConfig = _rsConfig; _rsConfig = newConfig; _protVersion.store(_rsConfig.getProtocolVersion()); @@ -2648,6 +2934,7 @@ ReplicationCoordinatorImpl::_setCurrentRSConfig_inlock(const ReplSetConfig& newC _cancelAndRescheduleElectionTimeout_inlock(); const PostMemberStateUpdateAction action = _updateMemberStateFromTopologyCoordinator_inlock(); + _updateSlaveInfoFromConfig_inlock(); if (_selfIndex >= 0) { // Don't send heartbeats if we're not in the config, if we get re-added one of the // nodes in the set will contact us. @@ -2734,20 +3021,24 @@ Status ReplicationCoordinatorImpl::processHandshake(OperationContext* opCtx, const HandshakeArgs& handshake) { LOG(2) << "Received handshake " << handshake.toBSON(); - stdx::lock_guard<stdx::mutex> lock(_mutex); + stdx::unique_lock<stdx::mutex> lock(_mutex); if (getReplicationMode() != modeMasterSlave) { return Status(ErrorCodes::IllegalOperation, "The handshake command is only used for master/slave replication"); } - auto* memberHeartbeatData = _topCoord->findMemberHeartbeatDataByRid(handshake.getRid()); - if (memberHeartbeatData) { + SlaveInfo* slaveInfo = _findSlaveInfoByRID_inlock(handshake.getRid()); + if (slaveInfo) { return Status::OK(); // nothing to do } - memberHeartbeatData = _topCoord->addSlaveMemberData(handshake.getRid()); - memberHeartbeatData->setHostAndPort(_externalState->getClientHostAndPort(opCtx)); + SlaveInfo newSlaveInfo; + newSlaveInfo.rid = handshake.getRid(); + newSlaveInfo.memberId = -1; + newSlaveInfo.hostAndPort = _externalState->getClientHostAndPort(opCtx); + // Don't call _addSlaveInfo_inlock as that would wake sleepers unnecessarily. + _slaveInfo.push_back(newSlaveInfo); return Status::OK(); } @@ -2763,10 +3054,26 @@ bool ReplicationCoordinatorImpl::buildsIndexes() { std::vector<HostAndPort> ReplicationCoordinatorImpl::getHostsWrittenTo(const OpTime& op, bool durablyWritten) { + std::vector<HostAndPort> hosts; stdx::lock_guard<stdx::mutex> lk(_mutex); - /* skip self in master-slave mode because our own HostAndPort is unknown */ - const bool skipSelf = getReplicationMode() == modeMasterSlave; - return _topCoord->getHostsWrittenTo(op, durablyWritten, skipSelf); + for (size_t i = 0; i < _slaveInfo.size(); ++i) { + const SlaveInfo& slaveInfo = _slaveInfo[i]; + if (getReplicationMode() == modeMasterSlave && slaveInfo.rid == _getMyRID_inlock()) { + // Master-slave doesn't know the HostAndPort for itself at this point. + continue; + } + + if (durablyWritten) { + if (slaveInfo.lastDurableOpTime < op) { + continue; + } + } else if (slaveInfo.lastAppliedOpTime < op) { + continue; + } + + hosts.push_back(slaveInfo.hostAndPort); + } + return hosts; } std::vector<HostAndPort> ReplicationCoordinatorImpl::getOtherNodesInReplSet() const { @@ -2905,14 +3212,42 @@ bool ReplicationCoordinatorImpl::shouldChangeSyncSource( const rpc::ReplSetMetadata& replMetadata, boost::optional<rpc::OplogQueryMetadata> oqMetadata) { stdx::lock_guard<stdx::mutex> lock(_mutex); - return _topCoord->shouldChangeSyncSource( - currentSource, replMetadata, oqMetadata, _replExecutor->now()); + return _topCoord->shouldChangeSyncSource(currentSource, + _getMyLastAppliedOpTime_inlock(), + replMetadata, + oqMetadata, + _replExecutor->now()); } void ReplicationCoordinatorImpl::_updateLastCommittedOpTime_inlock() { - if (_topCoord->updateLastCommittedOpTime()) { - _updateCommitPoint_inlock(); + if (!_getMemberState_inlock().primary() || _topCoord->isStepDownPending()) { + return; } + + std::vector<OpTime> votingNodesOpTimes; + + // Whether we use the applied or durable OpTime for the commit point is decided here. + const bool useDurableOpTime = getWriteConcernMajorityShouldJournal_inlock(); + + for (const auto& sI : _slaveInfo) { + auto memberConfig = _rsConfig.findMemberByID(sI.memberId); + invariant(memberConfig); + if (memberConfig->isVoter()) { + const auto opTime = useDurableOpTime ? sI.lastDurableOpTime : sI.lastAppliedOpTime; + votingNodesOpTimes.push_back(opTime); + } + } + + invariant(votingNodesOpTimes.size() > 0); + if (votingNodesOpTimes.size() < static_cast<unsigned long>(_rsConfig.getWriteMajority())) { + return; + } + std::sort(votingNodesOpTimes.begin(), votingNodesOpTimes.end()); + + // need the majority to have this OpTime + OpTime committedOpTime = + votingNodesOpTimes[votingNodesOpTimes.size() - _rsConfig.getWriteMajority()]; + _advanceCommitPoint_inlock(committedOpTime); } void ReplicationCoordinatorImpl::advanceCommitPoint(const OpTime& committedOpTime) { @@ -2921,17 +3256,28 @@ void ReplicationCoordinatorImpl::advanceCommitPoint(const OpTime& committedOpTim } void ReplicationCoordinatorImpl::_advanceCommitPoint_inlock(const OpTime& committedOpTime) { - if (_topCoord->advanceLastCommittedOpTime(committedOpTime)) { - if (_getMemberState_inlock().arbiter()) { - _setMyLastAppliedOpTime_inlock(committedOpTime, false); - } + if (committedOpTime == _lastCommittedOpTime) { + return; // Hasn't changed, so ignore it. + } else if (committedOpTime < _lastCommittedOpTime) { + LOG(1) << "Ignoring older committed snapshot optime: " << committedOpTime + << ", currentCommittedOpTime: " << _lastCommittedOpTime; + return; // This may have come from an out-of-order heartbeat. Ignore it. + } + + // This check is performed to ensure primaries do not commit an OpTime from a previous term. + if (_getMemberState_inlock().primary() && committedOpTime < _firstOpTimeOfMyTerm) { + LOG(1) << "Ignoring older committed snapshot from before I became primary, optime: " + << committedOpTime << ", firstOpTimeOfMyTerm: " << _firstOpTimeOfMyTerm; + return; + } - _updateCommitPoint_inlock(); + if (_getMemberState_inlock().arbiter()) { + _setMyLastAppliedOpTime_inlock(committedOpTime, false); } -} -void ReplicationCoordinatorImpl::_updateCommitPoint_inlock() { - auto committedOpTime = _topCoord->getLastCommittedOpTime(); + LOG(2) << "Updating _lastCommittedOpTime to " << committedOpTime; + _lastCommittedOpTime = committedOpTime; + _externalState->notifyOplogMetadataWaiters(); auto maxSnapshotForOpTime = SnapshotInfo{committedOpTime, SnapshotName::max()}; @@ -2957,9 +3303,13 @@ void ReplicationCoordinatorImpl::_updateCommitPoint_inlock() { } } +void ReplicationCoordinatorImpl::_setFirstOpTimeOfMyTerm_inlock(const OpTime& newOpTime) { + _firstOpTimeOfMyTerm = newOpTime; +} + OpTime ReplicationCoordinatorImpl::getLastCommittedOpTime() const { stdx::unique_lock<stdx::mutex> lk(_mutex); - return _topCoord->getLastCommittedOpTime(); + return _lastCommittedOpTime; } Status ReplicationCoordinatorImpl::processReplSetRequestVotes( @@ -2976,7 +3326,7 @@ Status ReplicationCoordinatorImpl::processReplSetRequestVotes( { stdx::lock_guard<stdx::mutex> lk(_mutex); - _topCoord->processReplSetRequestVotes(args, response); + _topCoord->processReplSetRequestVotes(args, response, _getMyLastAppliedOpTime_inlock()); } if (!args.isADryRun() && response->getVoteGranted()) { @@ -3025,13 +3375,16 @@ void ReplicationCoordinatorImpl::_prepareReplSetMetadata_inlock(const OpTime& la BSONObjBuilder* builder) const { OpTime lastVisibleOpTime = std::max(lastOpTimeFromClient, _getCurrentCommittedSnapshotOpTime_inlock()); - auto metadata = _topCoord->prepareReplSetMetadata(lastVisibleOpTime); + auto metadata = _topCoord->prepareReplSetMetadata(lastVisibleOpTime, _lastCommittedOpTime); metadata.writeToMetadata(builder); } void ReplicationCoordinatorImpl::_prepareOplogQueryMetadata_inlock(int rbid, BSONObjBuilder* builder) const { - _topCoord->prepareOplogQueryMetadata(rbid).writeToMetadata(builder); + OpTime lastAppliedOpTime = _getMyLastAppliedOpTime_inlock(); + auto metadata = + _topCoord->prepareOplogQueryMetadata(_lastCommittedOpTime, lastAppliedOpTime, rbid); + metadata.writeToMetadata(builder); } bool ReplicationCoordinatorImpl::isV1ElectionProtocol() const { @@ -3061,7 +3414,12 @@ Status ReplicationCoordinatorImpl::processHeartbeatV1(const ReplSetHeartbeatArgs auto senderHost(args.getSenderHost()); const Date_t now = _replExecutor->now(); - result = _topCoord->prepareHeartbeatResponseV1(now, args, _settings.ourSetName(), response); + result = _topCoord->prepareHeartbeatResponseV1(now, + args, + _settings.ourSetName(), + _getMyLastAppliedOpTime_inlock(), + _getMyLastDurableOpTime_inlock(), + response); if ((result.isOK() || result == ErrorCodes::InvalidReplicaSetConfig) && _selfIndex < 0) { // If this node does not belong to the configuration it knows about, send heartbeats @@ -3081,12 +3439,12 @@ Status ReplicationCoordinatorImpl::processHeartbeatV1(const ReplSetHeartbeatArgs } } else if (result.isOK()) { // Update liveness for sending node. - auto* memberHeartbeatData = - _topCoord->findMemberHeartbeatDataByMemberId(args.getSenderId()); - if (!memberHeartbeatData) { + auto slaveInfo = _findSlaveInfoByMemberID_inlock(args.getSenderId()); + if (!slaveInfo) { return result; } - memberHeartbeatData->updateLiveness(_replExecutor->now()); + slaveInfo->lastUpdate = _replExecutor->now(); + slaveInfo->down = false; } return result; } @@ -3217,7 +3575,7 @@ void ReplicationCoordinatorImpl::createSnapshot(OperationContext* opCtx, _externalState->createSnapshot(opCtx, name); auto snapshotInfo = SnapshotInfo{timeOfSnapshot, name}; - if (timeOfSnapshot <= _topCoord->getLastCommittedOpTime()) { + if (timeOfSnapshot <= _lastCommittedOpTime) { // This snapshot is ready to be marked as committed. invariant(_uncommittedSnapshots.empty()); _updateCommittedSnapshot_inlock(snapshotInfo); @@ -3239,7 +3597,7 @@ void ReplicationCoordinatorImpl::createSnapshot(OperationContext* opCtx, void ReplicationCoordinatorImpl::_updateCommittedSnapshot_inlock( SnapshotInfo newCommittedSnapshot) { invariant(!newCommittedSnapshot.opTime.isNull()); - invariant(newCommittedSnapshot.opTime <= _topCoord->getLastCommittedOpTime()); + invariant(newCommittedSnapshot.opTime <= _lastCommittedOpTime); if (_currentCommittedSnapshot) { invariant(newCommittedSnapshot.opTime >= _currentCommittedSnapshot->opTime); invariant(newCommittedSnapshot.name > _currentCommittedSnapshot->name); diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index c09c831ee94..a2a049cd75e 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -535,6 +535,28 @@ private: std::vector<WaiterType> _list; }; + // Struct that holds information about nodes in this replication group, mainly used for + // tracking replication progress for write concern satisfaction. + struct SlaveInfo { + // Our last known OpTime that this slave has applied and journaled to. + OpTime lastDurableOpTime; + // Our last known OpTime that this slave has applied, whether journaled or unjournaled. + OpTime lastAppliedOpTime; + HostAndPort hostAndPort; // Client address of the slave. + int memberId = + -1; // Id of the node in the replica set config, or -1 if we're not a replSet. + OID rid; // RID of the node. + bool self = false; // Whether this SlaveInfo stores the information about ourself + Date_t lastUpdate = + Date_t::max(); // The last time we heard from this node; used for liveness detection + bool down = false; // Indicator set when lastUpdate time exceeds the election timeout. + + BSONObj toBSON() const; + std::string toString() const; + }; + + typedef std::vector<SlaveInfo> SlaveInfoVector; + typedef std::vector<executor::TaskExecutor::CallbackHandle> HeartbeatHandles; // The state and logic of primary catchup. @@ -575,6 +597,50 @@ private: std::unique_ptr<CallbackWaiter> _waiter; }; + /** + * Appends a "replicationProgress" section with data for each member in set. + */ + void _appendSlaveInfoData_inlock(BSONObjBuilder* result); + + /** + * Looks up the SlaveInfo in _slaveInfo associated with the given RID and returns a pointer + * to it, or returns NULL if there is no SlaveInfo with the given RID. + */ + SlaveInfo* _findSlaveInfoByRID_inlock(const OID& rid); + + /** + * Looks up the SlaveInfo in _slaveInfo associated with the given member ID and returns a + * pointer to it, or returns NULL if there is no SlaveInfo with the given member ID. + */ + SlaveInfo* _findSlaveInfoByMemberID_inlock(int memberID); + + /** + * Adds the given SlaveInfo to _slaveInfo and wakes up any threads waiting for replication + * that now have their write concern satisfied. Only valid to call in master/slave setups. + */ + void _addSlaveInfo_inlock(const SlaveInfo& slaveInfo); + + /** + * Updates the durableOpTime field on the item in _slaveInfo pointed to by 'slaveInfo' with the + * given OpTime 'opTime' and wakes up any threads waiting for replication that now have their + * write concern satisfied. + */ + void _updateSlaveInfoDurableOpTime_inlock(SlaveInfo* slaveInfo, const OpTime& opTime); + + /** + * Updates the appliedOpTime field on the item in _slaveInfo pointed to by 'slaveInfo' with the + * given OpTime 'opTime' and wakes up any threads waiting for replication that now have their + * write concern satisfied. + */ + void _updateSlaveInfoAppliedOpTime_inlock(SlaveInfo* slaveInfo, const OpTime& opTime); + + /** + * Returns the index into _slaveInfo where data corresponding to ourself is stored. + * For more info on the rules about how we know where our entry is, see the comment for + * _slaveInfo. + */ + size_t _getMyIndexInSlaveInfo_inlock() const; + void _resetMyLastOpTimes_inlock(); /** @@ -598,6 +664,14 @@ private: Status _validateReadConcern(OperationContext* opCtx, const ReadConcernArgs& readConcern); /** + * Helper method that removes entries from _slaveInfo if they correspond to a node + * with a member ID that is not in the current replica set config. Will always leave an + * entry for ourself at the beginning of _slaveInfo, even if we aren't present in the + * config. + */ + void _updateSlaveInfoFromConfig_inlock(); + + /** * Helper to update our saved config, cancel any pending heartbeats, and kick off sending * new heartbeats based on the new config. * @@ -608,6 +682,12 @@ private: int myIndex); /** + * Updates the last committed OpTime to be "committedOpTime" if it is more recent than the + * current last committed OpTime. + */ + void _advanceCommitPoint_inlock(const OpTime& committedOpTime); + + /** * Helper to wake waiters in _replicationWaiterList that are doneWaitingForReplication. */ void _wakeReadyWaiters_inlock(); @@ -639,6 +719,21 @@ private: SnapshotName minSnapshot, const WriteConcernOptions& writeConcern); + /** + * Helper for _doneWaitingForReplication_inlock that takes an integer write concern. + * "durablyWritten" indicates whether the operation has to be durably applied. + */ + bool _haveNumNodesReachedOpTime_inlock(const OpTime& opTime, int numNodes, bool durablyWritten); + + /** + * Helper for _doneWaitingForReplication_inlock that takes a tag pattern representing a + * named write concern mode. + * "durablyWritten" indicates whether the operation has to be durably applied. + */ + bool _haveTaggedNodesReachedOpTime_inlock(const OpTime& opTime, + const ReplSetTagPattern& tagPattern, + bool durablyWritten); + Status _checkIfWriteConcernCanBeSatisfied_inlock(const WriteConcernOptions& writeConcern) const; /** @@ -953,30 +1048,17 @@ private: stdx::unique_lock<stdx::mutex> lock); /** - * Updates the last committed OpTime to be "committedOpTime" if it is more recent than the - * current last committed OpTime. - */ - void _advanceCommitPoint_inlock(const OpTime& committedOpTime); - - /** - * Helper for advanceCommitPoint and updateLastCommittedOpTime. Notifies external waiters - * waiting on oplog metadata changes (not read or write concerns) of a change in - * lastCommittedOpTime and updates our committed snapshot. - */ - void _updateCommitPoint_inlock(); - - /** - * Scan the memberHeartbeatData and determine the highest OplogEntry present on a majority of - * servers; set the topology coordinator's lastCommittedOpTime to this new entry, - * if greater than the current entry. + * Scan the SlaveInfoVector and determine the highest OplogEntry present on a majority of + * servers; set _lastCommittedOpTime to this new entry, if greater than the current entry. */ void _updateLastCommittedOpTime_inlock(); /** - * Updates the topology coordinator's lastCommittedOpTime based on member state information - * and wakes up any threads waiting for replication that now have their write concern satisfied. + * This is used to set a floor of "newOpTime" on the OpTimes we will consider committed. + * This prevents entries from before our election from counting as committed in our view, + * until our election (the "newOpTime" op) has been committed. */ - void _updateLastCommittedOpTimeAndWake_inlock(); + void _setFirstOpTimeOfMyTerm_inlock(const OpTime& newOpTime); /** * Callback that attempts to set the current term in topology coordinator and @@ -1220,6 +1302,16 @@ private: // Election ID of the last election that resulted in this node becoming primary. OID _electionId; // (M) + // Vector containing known information about each member (such as replication + // progress and member ID) in our replica set or each member replicating from + // us in a master-slave deployment. In master/slave, the first entry is + // guaranteed to correspond to ourself. In replica sets where we don't have a + // valid config or are in state REMOVED then the vector will be a single element + // just with info about ourself. In replica sets with a valid config the elements + // will be in the same order as the members in the replica set config, thus + // the entry for ourself will be at _thisMemberConfigIndex. + SlaveInfoVector _slaveInfo; // (M) + // Used to signal threads waiting for changes to _memberState. stdx::condition_variable _memberStateChange; // (M) @@ -1283,6 +1375,13 @@ private: // _canAcceptNonLocalWrites, its value is only meaningful on replica set secondaries. AtomicUInt32 _canServeNonLocalReads; // (S) + // OpTime of the latest committed operation. Matches the concurrency level of _slaveInfo. + OpTime _lastCommittedOpTime; // (M) + + // OpTime representing our transition to PRIMARY and the start of our term. + // _lastCommittedOpTime cannot be set to an earlier OpTime. + OpTime _firstOpTimeOfMyTerm; // (M) + // ReplicationProcess used to hold information related to the replication and application of // operations from the sync source. ReplicationProcess* const _replicationProcess; // (PS) diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp index 322c84246e6..be7d6ea79d7 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp @@ -278,11 +278,12 @@ void ReplicationCoordinatorImpl::_recoverFromElectionTie( stdx::unique_lock<stdx::mutex> lk(_mutex); auto now = _replExecutor->now(); - const auto status = _topCoord->checkShouldStandForElection(now); + auto lastOpApplied = _getMyLastAppliedOpTime_inlock(); + const auto status = _topCoord->checkShouldStandForElection(now, lastOpApplied); if (!status.isOK()) { LOG(2) << "ReplicationCoordinatorImpl::_recoverFromElectionTie -- " << status.reason(); } else { - fassertStatusOK(28817, _topCoord->becomeCandidateIfElectable(now, false)); + fassertStatusOK(28817, _topCoord->becomeCandidateIfElectable(now, lastOpApplied, false)); _startElectSelf_inlock(); } } diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp index 4d42efe75eb..cf543852022 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp @@ -275,10 +275,16 @@ void ReplicationCoordinatorImpl::_onVoteRequestComplete(long long originalTerm) // Mark all nodes that responded to our vote request as up to avoid immediately // relinquishing primary. Date_t now = _replExecutor->now(); - _topCoord->resetMemberTimeouts(now, _voteRequester->getResponders()); + const unordered_set<HostAndPort> liveNodes = _voteRequester->getResponders(); + for (auto& nodeInfo : _slaveInfo) { + if (liveNodes.count(nodeInfo.hostAndPort)) { + nodeInfo.down = false; + nodeInfo.lastUpdate = now; + } + } // Prevent last committed optime from updating until we finish draining. - _topCoord->setFirstOpTimeOfMyTerm( + _setFirstOpTimeOfMyTerm_inlock( OpTime(Timestamp(std::numeric_limits<int>::max(), 0), std::numeric_limits<int>::max())); _voteRequester.reset(); diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index d5974aeded0..99b39be285f 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -39,7 +39,6 @@ #include "mongo/db/repl/elect_cmd_runner.h" #include "mongo/db/repl/freshness_checker.h" #include "mongo/db/repl/heartbeat_response_action.h" -#include "mongo/db/repl/member_heartbeat_data.h" #include "mongo/db/repl/repl_set_config_checks.h" #include "mongo/db/repl/repl_set_heartbeat_args.h" #include "mongo/db/repl/repl_set_heartbeat_args_v1.h" @@ -192,6 +191,7 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse( } } const Date_t now = _replExecutor->now(); + const OpTime lastApplied = _getMyLastAppliedOpTime_inlock(); Milliseconds networkTime(0); StatusWith<ReplSetHeartbeatResponse> hbStatusResponse(hbResponse); @@ -213,14 +213,21 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse( hbStatusResponse = StatusWith<ReplSetHeartbeatResponse>(responseStatus); } - HeartbeatResponseAction action = - _topCoord->processHeartbeatResponse(now, networkTime, target, hbStatusResponse); + HeartbeatResponseAction action = _topCoord->processHeartbeatResponse( + now, networkTime, target, hbStatusResponse, lastApplied); if (action.getAction() == HeartbeatResponseAction::NoAction && hbStatusResponse.isOK() && - hbStatusResponse.getValue().hasState() && - hbStatusResponse.getValue().getState() != MemberState::RS_PRIMARY && - action.getAdvancedOpTime()) { - _updateLastCommittedOpTimeAndWake_inlock(); + targetIndex >= 0 && hbStatusResponse.getValue().hasState() && + hbStatusResponse.getValue().getState() != MemberState::RS_PRIMARY) { + ReplSetHeartbeatResponse hbResp = hbStatusResponse.getValue(); + if (hbResp.hasAppliedOpTime()) { + if (hbResp.getConfigVersion() == _rsConfig.getConfigVersion()) { + _updateOpTimesFromHeartbeat_inlock( + targetIndex, + hbResp.hasDurableOpTime() ? hbResp.getDurableOpTime() : OpTime(), + hbResp.getAppliedOpTime()); + } + } } // Wake the stepdown waiter when our updated OpTime allows it to finish stepping down. @@ -237,6 +244,21 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse( _handleHeartbeatResponseAction_inlock(action, hbStatusResponse, std::move(lk)); } +void ReplicationCoordinatorImpl::_updateOpTimesFromHeartbeat_inlock(int targetIndex, + const OpTime& durableOpTime, + const OpTime& appliedOpTime) { + invariant(_selfIndex >= 0); + invariant(targetIndex >= 0); + + SlaveInfo& slaveInfo = _slaveInfo[targetIndex]; + if (appliedOpTime > slaveInfo.lastAppliedOpTime) { + _updateSlaveInfoAppliedOpTime_inlock(&slaveInfo, appliedOpTime); + } + if (durableOpTime > slaveInfo.lastDurableOpTime) { + _updateSlaveInfoDurableOpTime_inlock(&slaveInfo, durableOpTime); + } +} + stdx::unique_lock<stdx::mutex> ReplicationCoordinatorImpl::_handleHeartbeatResponseAction_inlock( const HeartbeatResponseAction& action, const StatusWith<ReplSetHeartbeatResponse>& responseStatus, @@ -651,7 +673,10 @@ void ReplicationCoordinatorImpl::_startHeartbeats_inlock() { _topCoord->restartHeartbeats(); if (isV1ElectionProtocol()) { - _topCoord->resetAllMemberTimeouts(_replExecutor->now()); + for (auto&& slaveInfo : _slaveInfo) { + slaveInfo.lastUpdate = _replExecutor->now(); + slaveInfo.down = false; + } _scheduleNextLivenessUpdate_inlock(); } } @@ -671,12 +696,37 @@ void ReplicationCoordinatorImpl::_handleLivenessTimeout( } // Scan liveness table for problems and mark nodes as down by calling into topocoord. - HeartbeatResponseAction action = _topCoord->checkMemberTimeouts(_replExecutor->now()); - // Don't mind potential asynchronous stepdown as this is the last step of - // liveness check. - lk = _handleHeartbeatResponseAction_inlock( - action, makeStatusWith<ReplSetHeartbeatResponse>(), std::move(lk)); + auto now(_replExecutor->now()); + for (auto&& slaveInfo : _slaveInfo) { + if (slaveInfo.self) { + continue; + } + if (slaveInfo.down) { + continue; + } + if (now - slaveInfo.lastUpdate >= _rsConfig.getElectionTimeoutPeriod()) { + int memberIndex = _rsConfig.findMemberIndexByConfigId(slaveInfo.memberId); + if (memberIndex == -1) { + continue; + } + + slaveInfo.down = true; + + if (_memberState.primary()) { + // Only adjust hbdata if we are primary, since only the primary has a full view + // of the entire cluster. + // Secondaries might not see other secondaries in the cluster if they are not + // downstream. + HeartbeatResponseAction action = + _topCoord->setMemberAsDown(now, memberIndex, _getMyLastDurableOpTime_inlock()); + // Don't mind potential asynchronous stepdown as this is the last step of + // liveness check. + lk = _handleHeartbeatResponseAction_inlock( + action, makeStatusWith<ReplSetHeartbeatResponse>(), std::move(lk)); + } + } + } _scheduleNextLivenessUpdate_inlock(); } @@ -686,10 +736,23 @@ void ReplicationCoordinatorImpl::_scheduleNextLivenessUpdate_inlock() { } // Scan liveness table for earliest date; schedule a run at (that date plus election // timeout). - Date_t earliestDate; - int earliestMemberId; - std::tie(earliestMemberId, earliestDate) = _topCoord->getStalestLiveMember(); - + Date_t earliestDate = Date_t::max(); + int earliestMemberId = -1; + for (auto&& slaveInfo : _slaveInfo) { + if (slaveInfo.self) { + continue; + } + if (slaveInfo.down) { + // Already down. + continue; + } + LOG(3) << "slaveinfo lastupdate is: " << slaveInfo.lastUpdate; + if (earliestDate > slaveInfo.lastUpdate) { + earliestDate = slaveInfo.lastUpdate; + earliestMemberId = slaveInfo.memberId; + } + } + LOG(3) << "earliest member " << earliestMemberId << " date: " << earliestDate; if (earliestMemberId == -1 || earliestDate == Date_t::max()) { _earliestMemberId = -1; // Nobody here but us. @@ -793,8 +856,10 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(StartElectionV1Reas } } - const auto status = _topCoord->becomeCandidateIfElectable( - _replExecutor->now(), reason == StartElectionV1Reason::kPriorityTakeover); + const auto status = + _topCoord->becomeCandidateIfElectable(_replExecutor->now(), + _getMyLastAppliedOpTime_inlock(), + reason == StartElectionV1Reason::kPriorityTakeover); if (!status.isOK()) { switch (reason) { case StartElectionV1Reason::kElectionTimeout: diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h index b85753c819a..d45117847cb 100644 --- a/src/mongo/db/repl/topology_coordinator.h +++ b/src/mongo/db/repl/topology_coordinator.h @@ -45,7 +45,6 @@ class Timestamp; namespace repl { class HeartbeatResponseAction; -class MemberHeartbeatData; class OpTime; class ReplSetHeartbeatArgs; class ReplSetConfig; @@ -170,6 +169,7 @@ public: * TODO (SERVER-27668): Make OplogQueryMetadata non-optional in mongodb 3.8. */ virtual bool shouldChangeSyncSource(const HostAndPort& currentSource, + const OpTime& myLastOpTime, const rpc::ReplSetMetadata& replMetadata, boost::optional<rpc::OplogQueryMetadata> oqMetadata, Date_t now) const = 0; @@ -199,31 +199,6 @@ public: virtual void setFollowerMode(MemberState::MS newMode) = 0; /** - * Scan the memberHeartbeatData and determine the highest OplogEntry present on a majority of - * servers; set _lastCommittedOpTime to this new entry. - * Returns true if the _lastCommittedOpTime was changed. - */ - virtual bool updateLastCommittedOpTime() = 0; - - /** - * Updates _lastCommittedOpTime to be "committedOpTime" if it is more recent than the - * current last committed OpTime. Returns true if _lastCommittedOpTime is changed. - */ - virtual bool advanceLastCommittedOpTime(const OpTime& committedOpTime) = 0; - - /** - * Returns the OpTime of the latest majority-committed op known to this server. - */ - virtual OpTime getLastCommittedOpTime() const = 0; - - /** - * This is used to set a floor of "newOpTime" on the OpTimes we will consider committed. - * This prevents entries from before our election from counting as committed in our view, - * until our election (the "newOpTime" op) has been committed. - */ - virtual void setFirstOpTimeOfMyTerm(const OpTime& newOpTime) = 0; - - /** * Adjusts the maintenance mode count by "inc". * * It is an error to call this method if getRole() does not return Role::follower. @@ -239,18 +214,21 @@ public: // produces a reply to a replSetSyncFrom command virtual void prepareSyncFromResponse(const HostAndPort& target, + const OpTime& lastOpApplied, BSONObjBuilder* response, Status* result) = 0; // produce a reply to a replSetFresh command virtual void prepareFreshResponse(const ReplicationCoordinator::ReplSetFreshArgs& args, Date_t now, + const OpTime& lastOpApplied, BSONObjBuilder* response, Status* result) = 0; // produce a reply to a received electCmd virtual void prepareElectResponse(const ReplicationCoordinator::ReplSetElectArgs& args, Date_t now, + const OpTime& lastOpApplied, BSONObjBuilder* response, Status* result) = 0; @@ -258,17 +236,24 @@ public: virtual Status prepareHeartbeatResponse(Date_t now, const ReplSetHeartbeatArgs& args, const std::string& ourSetName, + const OpTime& lastOpApplied, + const OpTime& lastOpDurable, ReplSetHeartbeatResponse* response) = 0; // produce a reply to a V1 heartbeat virtual Status prepareHeartbeatResponseV1(Date_t now, const ReplSetHeartbeatArgsV1& args, const std::string& ourSetName, + const OpTime& lastOpApplied, + const OpTime& lastOpDurable, ReplSetHeartbeatResponse* response) = 0; struct ReplSetStatusArgs { Date_t now; unsigned selfUptime; + const OpTime& lastOpApplied; + const OpTime& lastOpDurable; + const OpTime& lastCommittedOpTime; const OpTime& readConcernMajorityOpTime; const BSONObj& initialSyncStatus; }; @@ -278,18 +263,10 @@ public: BSONObjBuilder* response, Status* result) = 0; - // Produce a replSetUpdatePosition command to be sent to the node's sync source. - virtual StatusWith<BSONObj> prepareReplSetUpdatePositionCommand( - ReplicationCoordinator::ReplSetUpdatePositionCommandStyle commandStyle, - OpTime currentCommittedSnapshotOpTime) const = 0; - // produce a reply to an ismaster request. It is only valid to call this if we are a // replset. virtual void fillIsMasterForReplSet(IsMasterResponse* response) = 0; - // Produce member data for the serverStatus command and diagnostic logging. - virtual void fillMemberData(BSONObjBuilder* result) = 0; - enum class PrepareFreezeResponseResult { kNoAction, kElectSelf }; /** @@ -317,7 +294,10 @@ public: * newConfig.isInitialized() should be true, though implementations may accept * configurations where this is not true, for testing purposes. */ - virtual void updateConfig(const ReplSetConfig& newConfig, int selfIndex, Date_t now) = 0; + virtual void updateConfig(const ReplSetConfig& newConfig, + int selfIndex, + Date_t now, + const OpTime& lastOpApplied) = 0; /** * Prepares a heartbeat request appropriate for sending to "target", assuming the @@ -366,99 +346,16 @@ public: Date_t now, Milliseconds networkRoundTripTime, const HostAndPort& target, - const StatusWith<ReplSetHeartbeatResponse>& hbResponse) = 0; - - /** - * Returns whether or not at least 'numNodes' have reached the given opTime. - * "durablyWritten" indicates whether the operation has to be durably applied. - */ - virtual bool haveNumNodesReachedOpTime(const OpTime& opTime, - int numNodes, - bool durablyWritten) = 0; - - /** - * Returns whether or not at least one node matching the tagPattern has reached - * the given opTime. - * "durablyWritten" indicates whether the operation has to be durably applied. - */ - virtual bool haveTaggedNodesReachedOpTime(const OpTime& opTime, - const ReplSetTagPattern& tagPattern, - bool durablyWritten) = 0; - - /** - * Returns a vector of members that have applied the operation with OpTime 'op'. - * "durablyWritten" indicates whether the operation has to be durably applied. - * "skipSelf" means to exclude this node whether or not the op has been applied. - */ - virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op, - bool durablyWritten, - bool skipSelf) = 0; + const StatusWith<ReplSetHeartbeatResponse>& hbResponse, + const OpTime& myLastOpApplied) = 0; /** * Marks a member has down from our persepctive and returns a HeartbeatResponseAction, which * will be StepDownSelf if we can no longer see a majority of the nodes. */ - virtual HeartbeatResponseAction setMemberAsDown(Date_t now, const int memberIndex) = 0; - - /** - * Goes through the memberHeartbeatData and determines which member that is currently live - * has the stalest (earliest) last update time. Returns (-1, Date_t::max()) if there are - * no other members. - */ - virtual std::pair<int, Date_t> getStalestLiveMember() const = 0; - - /** - * Go through the memberHeartbeatData, and mark nodes which haven't been updated - * recently (within an election timeout) as "down". Returns a HeartbeatResponseAction, which - * will be StepDownSelf if we can no longer see a majority of the nodes, otherwise NoAction. - */ - virtual HeartbeatResponseAction checkMemberTimeouts(Date_t now) = 0; - - /** - * Set all nodes in memberHeartbeatData to not stale with a lastUpdate of "now". - */ - virtual void resetAllMemberTimeouts(Date_t now) = 0; - - /** - * Set all nodes in memberHeartbeatData that are present in member_set - * to not stale with a lastUpdate of "now". - */ - virtual void resetMemberTimeouts(Date_t now, - const stdx::unordered_set<HostAndPort>& member_set) = 0; - - /* - * Returns the last optime that this node has applied, whether journaled or unjournaled. - */ - virtual OpTime getMyLastAppliedOpTime() const = 0; - - /* - * Returns the last optime that this node has applied, whether journaled or unjournaled. - */ - virtual OpTime getMyLastDurableOpTime() const = 0; - - /* - * Returns information we have on the state of this node. - */ - virtual MemberHeartbeatData* getMyMemberHeartbeatData() = 0; - - /* - * Returns information we have on the state of the node identified by memberId. Returns - * nullptr if memberId is not found in the configuration. - */ - virtual MemberHeartbeatData* findMemberHeartbeatDataByMemberId(const int memberId) = 0; - - /* - * Returns information we have on the state of the node identified by rid. Returns - * nullptr if rid is not found in the heartbeat data. This method is used only for - * master/slave replication. - */ - virtual MemberHeartbeatData* findMemberHeartbeatDataByRid(const OID rid) = 0; - - /* - * Adds and returns a memberHeartbeatData entry for the given RID. - * Used only in master/slave mode. - */ - virtual MemberHeartbeatData* addSlaveMemberData(const OID rid) = 0; + virtual HeartbeatResponseAction setMemberAsDown(Date_t now, + const int memberIndex, + const OpTime& myLastOpApplied) = 0; /** * If getRole() == Role::candidate and this node has not voted too recently, updates the @@ -515,7 +412,7 @@ public: * * NOTE: It is illegal to call this method if the node is not a primary. */ - virtual bool stepDown(Date_t until, bool force) = 0; + virtual bool stepDown(Date_t until, bool force, const OpTime& lastOpApplied) = 0; /** * Sometimes a request to step down comes in (like via a heartbeat), but we don't have the @@ -536,7 +433,7 @@ public: * Considers whether or not this node should stand for election, and returns true * if the node has transitioned to candidate role as a result of the call. */ - virtual Status checkShouldStandForElection(Date_t now) const = 0; + virtual Status checkShouldStandForElection(Date_t now, const OpTime& lastOpApplied) const = 0; /** * Set the outgoing heartbeat message from self @@ -547,13 +444,16 @@ public: * Prepares a ReplSetMetadata object describing the current term, primary, and lastOp * information. */ - virtual rpc::ReplSetMetadata prepareReplSetMetadata(const OpTime& lastVisibleOpTime) const = 0; + virtual rpc::ReplSetMetadata prepareReplSetMetadata( + const OpTime& lastVisibleOpTime, const OpTime& lastCommittedOpTime) const = 0; /** * Prepares an OplogQueryMetadata object describing the current sync source, rbid, primary, * lastOpApplied, and lastOpCommitted. */ - virtual rpc::OplogQueryMetadata prepareOplogQueryMetadata(int rbid) const = 0; + virtual rpc::OplogQueryMetadata prepareOplogQueryMetadata(const OpTime& lastCommittedOpTime, + const OpTime& lastAppliedOpTime, + int rbid) const = 0; /** * Writes into 'output' all the information needed to generate a summary of the current @@ -565,7 +465,8 @@ public: * Prepares a ReplSetRequestVotesResponse. */ virtual void processReplSetRequestVotes(const ReplSetRequestVotesArgs& args, - ReplSetRequestVotesResponse* response) = 0; + ReplSetRequestVotesResponse* response, + const OpTime& lastAppliedOpTime) = 0; /** * Loads an initial LastVote document, which was read from local storage. @@ -587,7 +488,9 @@ public: /** * Transitions to the candidate role if the node is electable. */ - virtual Status becomeCandidateIfElectable(const Date_t now, bool isPriorityTakeover) = 0; + virtual Status becomeCandidateIfElectable(const Date_t now, + const OpTime& lastOpApplied, + bool isPriorityTakeover) = 0; /** * Updates the storage engine read committed support in the TopologyCoordinator options after diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp index ee305f3da38..a3bcbfaa8c0 100644 --- a/src/mongo/db/repl/topology_coordinator_impl.cpp +++ b/src/mongo/db/repl/topology_coordinator_impl.cpp @@ -46,7 +46,6 @@ #include "mongo/db/repl/repl_set_html_summary.h" #include "mongo/db/repl/repl_set_request_votes_args.h" #include "mongo/db/repl/rslog.h" -#include "mongo/db/repl/update_position_args.h" #include "mongo/db/server_parameters.h" #include "mongo/rpc/metadata/oplog_query_metadata.h" #include "mongo/rpc/metadata/repl_set_metadata.h" @@ -60,6 +59,7 @@ namespace mongo { namespace repl { using std::vector; + const Seconds TopologyCoordinatorImpl::VoteLease::leaseTime = Seconds(30); // Controls how caught up in replication a secondary with higher priority than the current primary @@ -144,9 +144,6 @@ TopologyCoordinatorImpl::TopologyCoordinatorImpl(Options options) _maintenanceModeCalls(0), _followerMode(MemberState::RS_STARTUP2) { invariant(getMemberState() == MemberState::RS_STARTUP); - // Need an entry for self in the memberHearbeatData. - _hbdata.emplace_back(); - _hbdata.back().setIsSelf(true); } TopologyCoordinator::Role TopologyCoordinatorImpl::getRole() const { @@ -230,7 +227,7 @@ HostAndPort TopologyCoordinatorImpl::chooseNewSyncSource(Date_t now, // Find primary's oplog time. Reject sync candidates that are more than // _options.maxSyncSourceLagSecs seconds behind. if (_currentPrimaryIndex != -1) { - OpTime primaryOpTime = _hbdata.at(_currentPrimaryIndex).getHeartbeatAppliedOpTime(); + OpTime primaryOpTime = _hbdata.at(_currentPrimaryIndex).getAppliedOpTime(); // Check if primaryOpTime is still close to 0 because we haven't received // our first heartbeat from a new primary yet. @@ -292,10 +289,10 @@ HostAndPort TopologyCoordinatorImpl::chooseNewSyncSource(Date_t now, continue; } // Candidates cannot be excessively behind. - if (it->getHeartbeatAppliedOpTime() < oldestSyncOpTime) { + if (it->getAppliedOpTime() < oldestSyncOpTime) { LOG(2) << "Cannot select sync source because it is too far behind." << "Latest optime of sync candidate " << itMemberConfig.getHostAndPort() - << ": " << it->getHeartbeatAppliedOpTime() + << ": " << it->getAppliedOpTime() << ", oldest acceptable optime: " << oldestSyncOpTime; continue; } @@ -315,12 +312,12 @@ HostAndPort TopologyCoordinatorImpl::chooseNewSyncSource(Date_t now, } } // only consider candidates that are ahead of where we are - if (it->getHeartbeatAppliedOpTime() <= lastOpTimeFetched) { + if (it->getAppliedOpTime() <= lastOpTimeFetched) { LOG(1) << "Cannot select sync source equal to or behind our last fetched optime. " << "My last fetched oplog optime: " << lastOpTimeFetched.toBSON() << ", latest oplog optime of sync candidate " << itMemberConfig.getHostAndPort() << ": " - << it->getHeartbeatAppliedOpTime().toBSON(); + << it->getAppliedOpTime().toBSON(); continue; } // Candidate cannot be more latent than anything we've already considered. @@ -395,6 +392,7 @@ void TopologyCoordinatorImpl::clearSyncSourceBlacklist() { } void TopologyCoordinatorImpl::prepareSyncFromResponse(const HostAndPort& target, + const OpTime& lastOpApplied, BSONObjBuilder* response, Status* result) { response->append("syncFromRequested", target.toString()); @@ -467,10 +465,9 @@ void TopologyCoordinatorImpl::prepareSyncFromResponse(const HostAndPort& target, str::stream() << "I cannot reach the requested member: " << target.toString()); return; } - const OpTime lastOpApplied = getMyLastAppliedOpTime(); - if (hbdata.getHeartbeatAppliedOpTime().getSecs() + 10 < lastOpApplied.getSecs()) { + if (hbdata.getAppliedOpTime().getSecs() + 10 < lastOpApplied.getSecs()) { warning() << "attempting to sync from " << target << ", but its latest opTime is " - << hbdata.getHeartbeatAppliedOpTime().getSecs() << " and ours is " + << hbdata.getAppliedOpTime().getSecs() << " and ours is " << lastOpApplied.getSecs() << " so this may not work"; response->append("warning", str::stream() << "requested member \"" << target.toString() @@ -490,6 +487,7 @@ void TopologyCoordinatorImpl::prepareSyncFromResponse(const HostAndPort& target, void TopologyCoordinatorImpl::prepareFreshResponse( const ReplicationCoordinator::ReplSetFreshArgs& args, const Date_t now, + const OpTime& lastOpApplied, BSONObjBuilder* response, Status* result) { if (_rsConfig.getProtocolVersion() != 0) { @@ -523,7 +521,6 @@ void TopologyCoordinatorImpl::prepareFreshResponse( } bool weAreFresher = false; - const OpTime lastOpApplied = getMyLastAppliedOpTime(); if (_rsConfig.getConfigVersion() > args.cfgver) { log() << "replSet member " << args.who << " is not yet aware its cfg version " << args.cfgver << " is stale"; @@ -531,7 +528,7 @@ void TopologyCoordinatorImpl::prepareFreshResponse( weAreFresher = true; } // check not only our own optime, but any other member we can reach - else if (OpTime(args.opTime, _term) < _latestKnownOpTime()) { + else if (OpTime(args.opTime, _term) < _latestKnownOpTime(lastOpApplied)) { weAreFresher = true; } response->appendDate("opTime", @@ -539,7 +536,7 @@ void TopologyCoordinatorImpl::prepareFreshResponse( response->append("fresher", weAreFresher); std::string errmsg; - bool doVeto = _shouldVetoMember(args, now, &errmsg); + bool doVeto = _shouldVetoMember(args, now, lastOpApplied, &errmsg); response->append("veto", doVeto); if (doVeto) { response->append("errmsg", errmsg); @@ -550,6 +547,7 @@ void TopologyCoordinatorImpl::prepareFreshResponse( bool TopologyCoordinatorImpl::_shouldVetoMember( const ReplicationCoordinator::ReplSetFreshArgs& args, const Date_t& now, + const OpTime& lastOpApplied, std::string* errmsg) const { if (_rsConfig.getConfigVersion() < args.cfgver) { // We are stale; do not veto. @@ -559,14 +557,14 @@ bool TopologyCoordinatorImpl::_shouldVetoMember( const unsigned int memberID = args.id; const int hopefulIndex = _getMemberIndex(memberID); invariant(hopefulIndex != _selfIndex); - const int highestPriorityIndex = _getHighestPriorityElectableIndex(now); + const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied); if (hopefulIndex == -1) { *errmsg = str::stream() << "replSet couldn't find member with id " << memberID; return true; } - const OpTime lastOpApplied = getMyLastAppliedOpTime(); - if (_iAmPrimary() && lastOpApplied >= _hbdata.at(hopefulIndex).getHeartbeatAppliedOpTime()) { + + if (_iAmPrimary() && lastOpApplied >= _hbdata.at(hopefulIndex).getAppliedOpTime()) { // hbinfo is not updated for ourself, so if we are primary we have to check the // primary's last optime separately *errmsg = str::stream() << "I am already primary, " @@ -576,8 +574,8 @@ bool TopologyCoordinatorImpl::_shouldVetoMember( } if (_currentPrimaryIndex != -1 && (hopefulIndex != _currentPrimaryIndex) && - (_hbdata.at(_currentPrimaryIndex).getHeartbeatAppliedOpTime() >= - _hbdata.at(hopefulIndex).getHeartbeatAppliedOpTime())) { + (_hbdata.at(_currentPrimaryIndex).getAppliedOpTime() >= + _hbdata.at(hopefulIndex).getAppliedOpTime())) { // other members might be aware of more up-to-date nodes *errmsg = str::stream() << _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString() @@ -600,7 +598,7 @@ bool TopologyCoordinatorImpl::_shouldVetoMember( } } - UnelectableReasonMask reason = _getUnelectableReason(hopefulIndex); + UnelectableReasonMask reason = _getUnelectableReason(hopefulIndex, lastOpApplied); reason &= ~RefusesToStand; if (reason) { *errmsg = str::stream() << "I don't think " @@ -617,6 +615,7 @@ bool TopologyCoordinatorImpl::_shouldVetoMember( void TopologyCoordinatorImpl::prepareElectResponse( const ReplicationCoordinator::ReplSetElectArgs& args, const Date_t now, + const OpTime& lastOpApplied, BSONObjBuilder* response, Status* result) { if (_rsConfig.getProtocolVersion() != 0) { @@ -632,7 +631,7 @@ void TopologyCoordinatorImpl::prepareElectResponse( } const long long myver = _rsConfig.getConfigVersion(); - const int highestPriorityIndex = _getHighestPriorityElectableIndex(now); + const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied); const MemberConfig* primary = _currentPrimaryMember(); const MemberConfig* hopeful = _rsConfig.findMemberByID(args.whoid); @@ -694,6 +693,8 @@ void TopologyCoordinatorImpl::prepareElectResponse( Status TopologyCoordinatorImpl::prepareHeartbeatResponse(Date_t now, const ReplSetHeartbeatArgs& args, const std::string& ourSetName, + const OpTime& lastOpApplied, + const OpTime& lastOpDurable, ReplSetHeartbeatResponse* response) { if (args.getProtocolVersion() != 1) { return Status(ErrorCodes::BadValue, @@ -738,11 +739,8 @@ Status TopologyCoordinatorImpl::prepareHeartbeatResponse(Date_t now, response->setElectionTime(_electionTime); } - const OpTime lastOpApplied = getMyLastAppliedOpTime(); - const OpTime lastOpDurable = getMyLastDurableOpTime(); - // Are we electable - response->setElectable(!_getMyUnelectableReason(now, false)); + response->setElectable(!_getMyUnelectableReason(now, lastOpApplied, false)); // Heartbeat status message response->setHbMsg(_getHbmsg(now)); @@ -790,6 +788,8 @@ Status TopologyCoordinatorImpl::prepareHeartbeatResponse(Date_t now, Status TopologyCoordinatorImpl::prepareHeartbeatResponseV1(Date_t now, const ReplSetHeartbeatArgsV1& args, const std::string& ourSetName, + const OpTime& lastOpApplied, + const OpTime& lastOpDurable, ReplSetHeartbeatResponse* response) { // Verify that replica set names match const std::string rshb = args.getSetName(); @@ -825,8 +825,6 @@ Status TopologyCoordinatorImpl::prepareHeartbeatResponseV1(Date_t now, response->setElectionTime(_electionTime); } - const OpTime lastOpApplied = getMyLastAppliedOpTime(); - const OpTime lastOpDurable = getMyLastDurableOpTime(); response->setAppliedOpTime(lastOpApplied); response->setDurableOpTime(lastOpDurable); @@ -953,7 +951,8 @@ HeartbeatResponseAction TopologyCoordinatorImpl::processHeartbeatResponse( Date_t now, Milliseconds networkRoundTripTime, const HostAndPort& target, - const StatusWith<ReplSetHeartbeatResponse>& hbResponse) { + const StatusWith<ReplSetHeartbeatResponse>& hbResponse, + const OpTime& myLastOpApplied) { const MemberState originalState = getMemberState(); PingStats& hbStats = _pings[target]; invariant(hbStats.getLastHeartbeatStartDate() != Date_t()); @@ -1033,14 +1032,6 @@ HeartbeatResponseAction TopologyCoordinatorImpl::processHeartbeatResponse( nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate); return nextAction; } - // If we're not in the config, we don't need to respond to heartbeats. - if (_selfIndex == -1) { - LOG(1) << "Could not find ourself in current config so ignoring heartbeat from " << target - << " -- current config: " << _rsConfig.toBSON(); - HeartbeatResponseAction nextAction = HeartbeatResponseAction::makeNoAction(); - nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate); - return nextAction; - } const int memberIndex = _rsConfig.findMemberIndexByHostAndPort(target); if (memberIndex == -1) { LOG(1) << "Could not find " << target << " in current config so ignoring --" @@ -1055,7 +1046,6 @@ HeartbeatResponseAction TopologyCoordinatorImpl::processHeartbeatResponse( MemberHeartbeatData& hbData = _hbdata.at(memberIndex); const MemberConfig member = _rsConfig.getMemberAt(memberIndex); - bool advancedOpTime = false; if (!hbResponse.isOK()) { if (isUnauthorized) { LOG(1) << "setAuthIssue: heartbeat response failed due to authentication" @@ -1077,120 +1067,30 @@ HeartbeatResponseAction TopologyCoordinatorImpl::processHeartbeatResponse( ReplSetHeartbeatResponse hbr = std::move(hbResponse.getValue()); LOG(3) << "setUpValues: heartbeat response good for member _id:" << member.getId() << ", msg: " << hbr.getHbMsg(); - advancedOpTime = hbData.setUpValues(now, member.getHostAndPort(), std::move(hbr)); + hbData.setUpValues(now, member.getHostAndPort(), std::move(hbr)); } HeartbeatResponseAction nextAction; if (_rsConfig.getProtocolVersion() == 0) { - nextAction = _updatePrimaryFromHBData(memberIndex, originalState, now); + nextAction = _updatePrimaryFromHBData(memberIndex, originalState, now, myLastOpApplied); } else { - nextAction = _updatePrimaryFromHBDataV1(memberIndex, originalState, now); + nextAction = _updatePrimaryFromHBDataV1(memberIndex, originalState, now, myLastOpApplied); } nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate); - nextAction.setAdvancedOpTime(advancedOpTime); return nextAction; } -bool TopologyCoordinatorImpl::haveNumNodesReachedOpTime(const OpTime& targetOpTime, - int numNodes, - bool durablyWritten) { - // Replication progress that is for some reason ahead of us should not allow us to - // satisfy a write concern if we aren't caught up ourselves. - OpTime myOpTime = durablyWritten ? getMyLastDurableOpTime() : getMyLastAppliedOpTime(); - if (myOpTime < targetOpTime) { - return false; - } - - for (auto&& memberHeartbeatData : _hbdata) { - const OpTime& memberOpTime = durablyWritten ? memberHeartbeatData.getLastDurableOpTime() - : memberHeartbeatData.getLastAppliedOpTime(); - if (memberOpTime >= targetOpTime) { - --numNodes; - } - - if (numNodes <= 0) { - return true; - } - } - return false; -} - -bool TopologyCoordinatorImpl::haveTaggedNodesReachedOpTime(const OpTime& opTime, - const ReplSetTagPattern& tagPattern, - bool durablyWritten) { - ReplSetTagMatch matcher(tagPattern); - for (auto&& memberHeartbeatData : _hbdata) { - const OpTime& memberOpTime = durablyWritten ? memberHeartbeatData.getLastDurableOpTime() - : memberHeartbeatData.getLastAppliedOpTime(); - if (memberOpTime >= opTime) { - // This node has reached the desired optime, now we need to check if it is a part - // of the tagPattern. - int memberIndex = memberHeartbeatData.getConfigIndex(); - invariant(memberIndex >= 0); - const MemberConfig& memberConfig = _rsConfig.getMemberAt(memberIndex); - for (MemberConfig::TagIterator it = memberConfig.tagsBegin(); - it != memberConfig.tagsEnd(); - ++it) { - if (matcher.update(*it)) { - return true; - } - } - } - } - return false; -} - -HeartbeatResponseAction TopologyCoordinatorImpl::checkMemberTimeouts(Date_t now) { - HeartbeatResponseAction result = HeartbeatResponseAction::makeNoAction(); - for (int memberIndex = 0; memberIndex < static_cast<int>(_hbdata.size()); memberIndex++) { - auto& memberHeartbeatData = _hbdata[memberIndex]; - if (!memberHeartbeatData.isSelf() && !memberHeartbeatData.lastUpdateStale() && - now - memberHeartbeatData.getLastUpdate() >= _rsConfig.getElectionTimeoutPeriod()) { - memberHeartbeatData.markLastUpdateStale(); - if (getMemberState().primary()) { - HeartbeatResponseAction action = setMemberAsDown(now, memberIndex); - if (action.getAction() != HeartbeatResponseAction::NoAction) { - invariant(action.getAction() == HeartbeatResponseAction::StepDownSelf); - result = action; - } - } - } - } - return result; -} - -std::vector<HostAndPort> TopologyCoordinatorImpl::getHostsWrittenTo(const OpTime& op, - bool durablyWritten, - bool skipSelf) { - std::vector<HostAndPort> hosts; - for (const auto& memberHeartbeatData : _hbdata) { - if (skipSelf && memberHeartbeatData.isSelf()) { - continue; - } - - if (durablyWritten) { - if (memberHeartbeatData.getLastDurableOpTime() < op) { - continue; - } - } else if (memberHeartbeatData.getLastAppliedOpTime() < op) { - continue; - } - - hosts.push_back(memberHeartbeatData.getHostAndPort()); - } - return hosts; -} - HeartbeatResponseAction TopologyCoordinatorImpl::setMemberAsDown(Date_t now, - const int memberIndex) { + const int memberIndex, + const OpTime& myLastOpApplied) { invariant(memberIndex != _selfIndex); invariant(memberIndex != -1); invariant(_currentPrimaryIndex == _selfIndex); MemberHeartbeatData& hbData = _hbdata.at(memberIndex); hbData.setDownValues(now, "no response within election timeout period"); - if (CannotSeeMajority & _getMyUnelectableReason(now, false)) { + if (CannotSeeMajority & _getMyUnelectableReason(now, myLastOpApplied, false)) { if (_stepDownPending) { return HeartbeatResponseAction::makeNoAction(); } @@ -1202,79 +1102,11 @@ HeartbeatResponseAction TopologyCoordinatorImpl::setMemberAsDown(Date_t now, return HeartbeatResponseAction::makeNoAction(); } -std::pair<int, Date_t> TopologyCoordinatorImpl::getStalestLiveMember() const { - Date_t earliestDate = Date_t::max(); - int earliestMemberId = -1; - for (const auto& memberHeartbeatData : _hbdata) { - if (memberHeartbeatData.isSelf()) { - continue; - } - if (memberHeartbeatData.lastUpdateStale()) { - // Already stale. - continue; - } - LOG(3) << "memberHeartbeatData lastupdate is: " << memberHeartbeatData.getLastUpdate(); - if (earliestDate > memberHeartbeatData.getLastUpdate()) { - earliestDate = memberHeartbeatData.getLastUpdate(); - earliestMemberId = memberHeartbeatData.getMemberId(); - } - } - LOG(3) << "stalest member " << earliestMemberId << " date: " << earliestDate; - return std::make_pair(earliestMemberId, earliestDate); -} - -void TopologyCoordinatorImpl::resetAllMemberTimeouts(Date_t now) { - for (auto&& memberHeartbeatData : _hbdata) - memberHeartbeatData.updateLiveness(now); -} - -void TopologyCoordinatorImpl::resetMemberTimeouts( - Date_t now, const stdx::unordered_set<HostAndPort>& member_set) { - for (auto&& memberHeartbeatData : _hbdata) { - if (member_set.count(memberHeartbeatData.getHostAndPort())) - memberHeartbeatData.updateLiveness(now); - } -} - -OpTime TopologyCoordinatorImpl::getMyLastAppliedOpTime() const { - return _selfMemberHeartbeatData().getLastAppliedOpTime(); -} - -OpTime TopologyCoordinatorImpl::getMyLastDurableOpTime() const { - return _selfMemberHeartbeatData().getLastDurableOpTime(); -} - -MemberHeartbeatData* TopologyCoordinatorImpl::getMyMemberHeartbeatData() { - return &_hbdata[_selfMemberHeartbeatDataIndex()]; -} - -MemberHeartbeatData* TopologyCoordinatorImpl::findMemberHeartbeatDataByMemberId( - const int memberId) { - const int memberIndex = _getMemberIndex(memberId); - if (memberIndex >= 0) - return &_hbdata[memberIndex]; - return nullptr; -} - -MemberHeartbeatData* TopologyCoordinatorImpl::findMemberHeartbeatDataByRid(const OID rid) { - for (auto& memberHeartbeatData : _hbdata) { - if (memberHeartbeatData.getRid() == rid) - return &memberHeartbeatData; - } - return nullptr; -} - -MemberHeartbeatData* TopologyCoordinatorImpl::addSlaveMemberData(const OID rid) { - invariant(!_hbdata.empty()); // Must always have our own entry first. - invariant(!_rsConfig.isInitialized()); // Used only for master-slave. - _hbdata.emplace_back(); - auto* result = &_hbdata.back(); - result->setRid(rid); - return result; -} - HeartbeatResponseAction TopologyCoordinatorImpl::_updatePrimaryFromHBDataV1( - int updatedConfigIndex, const MemberState& originalState, Date_t now) { + int updatedConfigIndex, + const MemberState& originalState, + Date_t now, + const OpTime& lastOpApplied) { // // Updates the local notion of which remote node, if any is primary. // Start the priority takeover process if we are eligible. @@ -1329,7 +1161,10 @@ HeartbeatResponseAction TopologyCoordinatorImpl::_updatePrimaryFromHBDataV1( } HeartbeatResponseAction TopologyCoordinatorImpl::_updatePrimaryFromHBData( - int updatedConfigIndex, const MemberState& originalState, Date_t now) { + int updatedConfigIndex, + const MemberState& originalState, + Date_t now, + const OpTime& lastOpApplied) { // This method has two interrelated responsibilities, performed in two phases. // // First, it updates the local notion of which remote node, if any is primary. In the @@ -1364,17 +1199,17 @@ HeartbeatResponseAction TopologyCoordinatorImpl::_updatePrimaryFromHBData( // have them/me stepdown. if (_currentPrimaryIndex != -1) { // check if we should ask the primary (possibly ourselves) to step down - const int highestPriorityIndex = _getHighestPriorityElectableIndex(now); + const int highestPriorityIndex = _getHighestPriorityElectableIndex(now, lastOpApplied); if (highestPriorityIndex != -1) { const MemberConfig& currentPrimaryMember = _rsConfig.getMemberAt(_currentPrimaryIndex); const MemberConfig& highestPriorityMember = _rsConfig.getMemberAt(highestPriorityIndex); const OpTime highestPriorityMemberOptime = highestPriorityIndex == _selfIndex - ? getMyLastAppliedOpTime() - : _hbdata.at(highestPriorityIndex).getHeartbeatAppliedOpTime(); + ? lastOpApplied + : _hbdata.at(highestPriorityIndex).getAppliedOpTime(); if ((highestPriorityMember.getPriority() > currentPrimaryMember.getPriority()) && - _isOpTimeCloseEnoughToLatestToElect(highestPriorityMemberOptime)) { - const OpTime latestOpTime = _latestKnownOpTime(); + _isOpTimeCloseEnoughToLatestToElect(highestPriorityMemberOptime, lastOpApplied)) { + const OpTime latestOpTime = _latestKnownOpTime(lastOpApplied); if (_iAmPrimary()) { if (_stepDownPending) { @@ -1478,7 +1313,7 @@ HeartbeatResponseAction TopologyCoordinatorImpl::_updatePrimaryFromHBData( // If we are primary, check if we can still see majority of the set; // stepdown if we can't. if (_iAmPrimary()) { - if (CannotSeeMajority & _getMyUnelectableReason(now, false)) { + if (CannotSeeMajority & _getMyUnelectableReason(now, lastOpApplied, false)) { if (_stepDownPending) { return HeartbeatResponseAction::makeNoAction(); } @@ -1507,17 +1342,18 @@ HeartbeatResponseAction TopologyCoordinatorImpl::_updatePrimaryFromHBData( } // At this point, there is no primary anywhere. Check to see if we should become a candidate. - const auto status = checkShouldStandForElection(now); + const auto status = checkShouldStandForElection(now, lastOpApplied); if (!status.isOK()) { // NOTE: This log line is checked in unit test(s). LOG(2) << "TopologyCoordinatorImpl::_updatePrimaryFromHBData - " << status.reason(); return HeartbeatResponseAction::makeNoAction(); } - fassertStatusOK(28816, becomeCandidateIfElectable(now, false)); + fassertStatusOK(28816, becomeCandidateIfElectable(now, lastOpApplied, false)); return HeartbeatResponseAction::makeElectAction(); } -Status TopologyCoordinatorImpl::checkShouldStandForElection(Date_t now) const { +Status TopologyCoordinatorImpl::checkShouldStandForElection(Date_t now, + const OpTime& lastOpApplied) const { if (_currentPrimaryIndex != -1) { return {ErrorCodes::NodeNotElectable, "Not standing for election since there is a Primary"}; } @@ -1527,15 +1363,16 @@ Status TopologyCoordinatorImpl::checkShouldStandForElection(Date_t now) const { return {ErrorCodes::NodeNotElectable, "Not standing for election again; already candidate"}; } - const UnelectableReasonMask unelectableReason = _getMyUnelectableReason(now, false); + const UnelectableReasonMask unelectableReason = + _getMyUnelectableReason(now, lastOpApplied, false); if (NotCloseEnoughToLatestOptime & unelectableReason) { return {ErrorCodes::NodeNotElectable, str::stream() << "Not standing for election because " << _getUnelectableReasonString(unelectableReason) << "; my last optime is " - << getMyLastAppliedOpTime().toString() + << lastOpApplied.toString() << " and the newest is " - << _latestKnownOpTime().toString()}; + << _latestKnownOpTime(lastOpApplied).toString()}; } if (unelectableReason) { return {ErrorCodes::NodeNotElectable, @@ -1590,14 +1427,16 @@ bool TopologyCoordinatorImpl::_canSeeHealthyPrimaryOfEqualOrGreaterPriority( return false; } -bool TopologyCoordinatorImpl::_isOpTimeCloseEnoughToLatestToElect(const OpTime& otherOpTime) const { - const OpTime latestKnownOpTime = _latestKnownOpTime(); +bool TopologyCoordinatorImpl::_isOpTimeCloseEnoughToLatestToElect( + const OpTime& otherOpTime, const OpTime& ourLastOpApplied) const { + const OpTime latestKnownOpTime = _latestKnownOpTime(ourLastOpApplied); // Use addition instead of subtraction to avoid overflow. return otherOpTime.getSecs() + 10 >= (latestKnownOpTime.getSecs()); } -bool TopologyCoordinatorImpl::_amIFreshEnoughForPriorityTakeover() const { - const OpTime latestKnownOpTime = _latestKnownOpTime(); +bool TopologyCoordinatorImpl::_amIFreshEnoughForPriorityTakeover( + const OpTime& ourLastOpApplied) const { + const OpTime latestKnownOpTime = _latestKnownOpTime(ourLastOpApplied); // Rules are: // - If the terms don't match, we don't call for priority takeover. @@ -1610,7 +1449,6 @@ bool TopologyCoordinatorImpl::_amIFreshEnoughForPriorityTakeover() const { // component of all future oplog entries generated will be the same, until real world time // passes the timestamp component of the last oplog entry. - const OpTime ourLastOpApplied = getMyLastAppliedOpTime(); if (ourLastOpApplied.getTerm() != latestKnownOpTime.getTerm()) { return false; } @@ -1632,15 +1470,15 @@ bool TopologyCoordinatorImpl::_iAmPrimary() const { return false; } -OpTime TopologyCoordinatorImpl::_latestKnownOpTime() const { - OpTime latest = getMyLastAppliedOpTime(); +OpTime TopologyCoordinatorImpl::_latestKnownOpTime(const OpTime& ourLastOpApplied) const { + OpTime latest = ourLastOpApplied; + for (std::vector<MemberHeartbeatData>::const_iterator it = _hbdata.begin(); it != _hbdata.end(); ++it) { - // Ignore self - // TODO(russotto): Simplify when heartbeat and spanning tree times are combined. - if (it->isSelf()) { + if (indexOfIterator(_hbdata, it) == _selfIndex) { continue; } + // Ignore down members if (!it->up()) { continue; @@ -1650,7 +1488,7 @@ OpTime TopologyCoordinatorImpl::_latestKnownOpTime() const { continue; } - OpTime optime = it->getHeartbeatAppliedOpTime(); + OpTime optime = it->getAppliedOpTime(); if (optime > latest) { latest = optime; @@ -1672,12 +1510,13 @@ bool TopologyCoordinatorImpl::_isMemberHigherPriority(int memberOneIndex, _rsConfig.getMemberAt(memberTwoIndex).getPriority(); } -int TopologyCoordinatorImpl::_getHighestPriorityElectableIndex(Date_t now) const { +int TopologyCoordinatorImpl::_getHighestPriorityElectableIndex(Date_t now, + const OpTime& lastOpApplied) const { int maxIndex = -1; for (int currentIndex = 0; currentIndex < _rsConfig.getNumMembers(); currentIndex++) { UnelectableReasonMask reason = currentIndex == _selfIndex - ? _getMyUnelectableReason(now, false) - : _getUnelectableReason(currentIndex); + ? _getMyUnelectableReason(now, lastOpApplied, false) + : _getUnelectableReason(currentIndex, lastOpApplied); if (None == reason && _isMemberHigherPriority(currentIndex, maxIndex)) { maxIndex = currentIndex; } @@ -1713,7 +1552,7 @@ void TopologyCoordinatorImpl::changeMemberState_forTest(const MemberState& newMe } break; case MemberState::RS_STARTUP: - updateConfig(ReplSetConfig(), -1, Date_t()); + updateConfig(ReplSetConfig(), -1, Date_t(), OpTime()); break; default: severe() << "Cannot switch to state " << newMemberState; @@ -1738,7 +1577,7 @@ void TopologyCoordinatorImpl::_setCurrentPrimaryForTest(int primaryIndex) { ReplSetHeartbeatResponse hbResponse; hbResponse.setState(MemberState::RS_PRIMARY); hbResponse.setElectionTime(Timestamp()); - hbResponse.setAppliedOpTime(_hbdata.at(primaryIndex).getHeartbeatAppliedOpTime()); + hbResponse.setAppliedOpTime(_hbdata.at(primaryIndex).getAppliedOpTime()); hbResponse.setSyncingTo(HostAndPort()); hbResponse.setHbMsg(""); _hbdata.at(primaryIndex) @@ -1764,8 +1603,8 @@ void TopologyCoordinatorImpl::prepareStatusResponse(const ReplSetStatusArgs& rsS vector<BSONObj> membersOut; const MemberState myState = getMemberState(); const Date_t now = rsStatusArgs.now; - const OpTime lastOpApplied = getMyLastAppliedOpTime(); - const OpTime lastOpDurable = getMyLastDurableOpTime(); + const OpTime& lastOpApplied = rsStatusArgs.lastOpApplied; + const OpTime& lastOpDurable = rsStatusArgs.lastOpDurable; const BSONObj& initialSyncStatus = rsStatusArgs.initialSyncStatus; if (_selfIndex == -1) { @@ -1849,19 +1688,16 @@ void TopologyCoordinatorImpl::prepareStatusResponse(const ReplSetStatusArgs& rsS it->getUpSince() != Date_t() ? durationCount<Seconds>(now - it->getUpSince()) : 0)); bb.append("uptime", uptime); if (!itConfig.isArbiter()) { + appendOpTime(&bb, "optime", it->getAppliedOpTime(), _rsConfig.getProtocolVersion()); appendOpTime( - &bb, "optime", it->getHeartbeatAppliedOpTime(), _rsConfig.getProtocolVersion()); - appendOpTime(&bb, - "optimeDurable", - it->getHeartbeatDurableOpTime(), - _rsConfig.getProtocolVersion()); + &bb, "optimeDurable", it->getDurableOpTime(), _rsConfig.getProtocolVersion()); - bb.appendDate("optimeDate", - Date_t::fromDurationSinceEpoch( - Seconds(it->getHeartbeatAppliedOpTime().getSecs()))); - bb.appendDate("optimeDurableDate", - Date_t::fromDurationSinceEpoch( - Seconds(it->getHeartbeatDurableOpTime().getSecs()))); + bb.appendDate( + "optimeDate", + Date_t::fromDurationSinceEpoch(Seconds(it->getAppliedOpTime().getSecs()))); + bb.appendDate( + "optimeDurableDate", + Date_t::fromDurationSinceEpoch(Seconds(it->getDurableOpTime().getSecs()))); } bb.appendDate("lastHeartbeat", it->getLastHeartbeat()); bb.appendDate("lastHeartbeatRecv", it->getLastHeartbeatRecv()); @@ -1913,7 +1749,7 @@ void TopologyCoordinatorImpl::prepareStatusResponse(const ReplSetStatusArgs& rsS // New optimes, to hold them all. BSONObjBuilder optimes; - _lastCommittedOpTime.append(&optimes, "lastCommittedOpTime"); + rsStatusArgs.lastCommittedOpTime.append(&optimes, "lastCommittedOpTime"); if (!rsStatusArgs.readConcernMajorityOpTime.isNull()) { rsStatusArgs.readConcernMajorityOpTime.append(&optimes, "readConcernMajorityOpTime"); } @@ -1930,84 +1766,6 @@ void TopologyCoordinatorImpl::prepareStatusResponse(const ReplSetStatusArgs& rsS *result = Status::OK(); } -StatusWith<BSONObj> TopologyCoordinatorImpl::prepareReplSetUpdatePositionCommand( - ReplicationCoordinator::ReplSetUpdatePositionCommandStyle commandStyle, - OpTime currentCommittedSnapshotOpTime) const { - BSONObjBuilder cmdBuilder; - invariant(_rsConfig.isInitialized()); - // Do not send updates if we have been removed from the config. - if (_selfIndex == -1) { - return Status(ErrorCodes::NodeNotFound, - "This node is not in the current replset configuration."); - } - cmdBuilder.append(UpdatePositionArgs::kCommandFieldName, 1); - // Create an array containing objects each live member connected to us and for ourself. - BSONArrayBuilder arrayBuilder(cmdBuilder.subarrayStart("optimes")); - for (const auto& memberHeartbeatData : _hbdata) { - if (memberHeartbeatData.getLastAppliedOpTime().isNull()) { - // Don't include info on members we haven't heard from yet. - continue; - } - // Don't include members we think are down. - if (!memberHeartbeatData.isSelf() && memberHeartbeatData.lastUpdateStale()) { - continue; - } - - BSONObjBuilder entry(arrayBuilder.subobjStart()); - switch (commandStyle) { - case ReplicationCoordinator::ReplSetUpdatePositionCommandStyle::kNewStyle: - memberHeartbeatData.getLastDurableOpTime().append( - &entry, UpdatePositionArgs::kDurableOpTimeFieldName); - memberHeartbeatData.getLastAppliedOpTime().append( - &entry, UpdatePositionArgs::kAppliedOpTimeFieldName); - break; - case ReplicationCoordinator::ReplSetUpdatePositionCommandStyle::kOldStyle: - entry.append("_id", memberHeartbeatData.getRid()); - if (_rsConfig.getProtocolVersion() == 1) { - memberHeartbeatData.getLastDurableOpTime().append(&entry, "optime"); - } else { - entry.append("optime", - memberHeartbeatData.getLastDurableOpTime().getTimestamp()); - } - break; - } - entry.append(UpdatePositionArgs::kMemberIdFieldName, memberHeartbeatData.getMemberId()); - entry.append(UpdatePositionArgs::kConfigVersionFieldName, _rsConfig.getConfigVersion()); - } - arrayBuilder.done(); - - // Add metadata to command. Old style parsing logic will reject the metadata. - if (commandStyle == ReplicationCoordinator::ReplSetUpdatePositionCommandStyle::kNewStyle) { - prepareReplSetMetadata(currentCommittedSnapshotOpTime).writeToMetadata(&cmdBuilder); - } - return cmdBuilder.obj(); -} - -void TopologyCoordinatorImpl::fillMemberData(BSONObjBuilder* result) { - BSONArrayBuilder replicationProgress(result->subarrayStart("replicationProgress")); - { - for (const auto& memberHeartbeatData : _hbdata) { - BSONObjBuilder entry(replicationProgress.subobjStart()); - entry.append("rid", memberHeartbeatData.getRid()); - const auto lastDurableOpTime = memberHeartbeatData.getLastDurableOpTime(); - if (_rsConfig.getProtocolVersion() == 1) { - BSONObjBuilder opTime(entry.subobjStart("optime")); - opTime.append("ts", lastDurableOpTime.getTimestamp()); - opTime.append("term", lastDurableOpTime.getTerm()); - opTime.done(); - } else { - entry.append("optime", lastDurableOpTime.getTimestamp()); - } - entry.append("host", memberHeartbeatData.getHostAndPort().toString()); - if (_selfIndex >= 0) { - const int memberId = memberHeartbeatData.getMemberId(); - invariant(memberId >= 0); - entry.append("memberId", memberId); - } - } - } -} - void TopologyCoordinatorImpl::fillIsMasterForReplSet(IsMasterResponse* response) { const MemberState myState = getMemberState(); if (!_rsConfig.isInitialized()) { @@ -2161,37 +1919,24 @@ void TopologyCoordinatorImpl::_updateHeartbeatDataForReconfig(const ReplSetConfi for (ReplSetConfig::MemberIterator it = newConfig.membersBegin(); it != newConfig.membersEnd(); ++it, ++index) { const MemberConfig& newMemberConfig = *it; - MemberHeartbeatData newHeartbeatData; - for (auto&& oldMemberHeartbeatData : oldHeartbeats) { - if ((oldMemberHeartbeatData.getMemberId() == newMemberConfig.getId() && - oldMemberHeartbeatData.getHostAndPort() == newMemberConfig.getHostAndPort()) || - (index == selfIndex && oldMemberHeartbeatData.isSelf())) { - // This member existed in the old config with the same member ID and - // HostAndPort, so copy its heartbeat data over. - newHeartbeatData = oldMemberHeartbeatData; - break; - } - } - newHeartbeatData.setConfigIndex(index); - newHeartbeatData.setIsSelf(index == selfIndex); - newHeartbeatData.setHostAndPort(newMemberConfig.getHostAndPort()); - newHeartbeatData.setMemberId(newMemberConfig.getId()); - _hbdata.push_back(newHeartbeatData); - } - if (selfIndex < 0) { - // It's necessary to have self member data even if self isn't in the configuration. - // We don't need data for the other nodes (which no longer know about us, or soon won't) - _hbdata.clear(); - MemberHeartbeatData newHeartbeatData; - for (auto&& oldMemberHeartbeatData : oldHeartbeats) { - if (oldMemberHeartbeatData.isSelf()) { - newHeartbeatData = oldMemberHeartbeatData; - break; + // TODO: C++11: use emplace_back() + if (index == selfIndex) { + // Insert placeholder for ourself, though we will never consult it. + _hbdata.push_back(MemberHeartbeatData()); + } else { + MemberHeartbeatData newHeartbeatData; + for (int oldIndex = 0; oldIndex < _rsConfig.getNumMembers(); ++oldIndex) { + const MemberConfig& oldMemberConfig = _rsConfig.getMemberAt(oldIndex); + if (oldMemberConfig.getId() == newMemberConfig.getId() && + oldMemberConfig.getHostAndPort() == newMemberConfig.getHostAndPort()) { + // This member existed in the old config with the same member ID and + // HostAndPort, so copy its heartbeat data over. + newHeartbeatData = oldHeartbeats[oldIndex]; + break; + } } + _hbdata.push_back(newHeartbeatData); } - newHeartbeatData.setConfigIndex(-1); - newHeartbeatData.setIsSelf(true); - _hbdata.push_back(newHeartbeatData); } } @@ -2199,7 +1944,8 @@ void TopologyCoordinatorImpl::_updateHeartbeatDataForReconfig(const ReplSetConfi // that reflect the new config. void TopologyCoordinatorImpl::updateConfig(const ReplSetConfig& newConfig, int selfIndex, - Date_t now) { + Date_t now, + const OpTime& lastOpApplied) { invariant(_role != Role::candidate); invariant(selfIndex < newConfig.getNumMembers()); @@ -2262,21 +2008,8 @@ const MemberConfig& TopologyCoordinatorImpl::_selfConfig() const { return _rsConfig.getMemberAt(_selfIndex); } -const MemberHeartbeatData& TopologyCoordinatorImpl::_selfMemberHeartbeatData() const { - return _hbdata[_selfMemberHeartbeatDataIndex()]; -} - -const int TopologyCoordinatorImpl::_selfMemberHeartbeatDataIndex() const { - invariant(!_hbdata.empty()); - if (_selfIndex >= 0) - return _selfIndex; - // In master-slave mode, the first entry is for self. If there is no config - // or we're not in the config, the first-and-only entry should be for self. - return 0; -} - TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getUnelectableReason( - int index) const { + int index, const OpTime& lastOpApplied) const { invariant(index != _selfIndex); const MemberConfig& memberConfig = _rsConfig.getMemberAt(index); const MemberHeartbeatData& hbData = _hbdata.at(index); @@ -2291,7 +2024,7 @@ TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getUnel result |= NotSecondary; } if (_rsConfig.getProtocolVersion() == 0 && - !_isOpTimeCloseEnoughToLatestToElect(hbData.getHeartbeatAppliedOpTime())) { + !_isOpTimeCloseEnoughToLatestToElect(hbData.getAppliedOpTime(), lastOpApplied)) { result |= NotCloseEnoughToLatestOptime; } if (hbData.up() && hbData.isUnelectable()) { @@ -2302,9 +2035,8 @@ TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getUnel } TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getMyUnelectableReason( - const Date_t now, bool isPriorityTakeover) const { + const Date_t now, const OpTime& lastApplied, bool isPriorityTakeover) const { UnelectableReasonMask result = None; - const OpTime lastApplied = getMyLastAppliedOpTime(); if (lastApplied.isNull()) { result |= NoData; } @@ -2337,13 +2069,13 @@ TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getMyUn _voteLease.when + VoteLease::leaseTime >= now) { result |= VotedTooRecently; } - if (!_isOpTimeCloseEnoughToLatestToElect(lastApplied)) { + if (!_isOpTimeCloseEnoughToLatestToElect(lastApplied, lastApplied)) { result |= NotCloseEnoughToLatestOptime; } } else { // Election rules only for protocol version 1. invariant(_rsConfig.getProtocolVersion() == 1); - if (isPriorityTakeover && !_amIFreshEnoughForPriorityTakeover()) { + if (isPriorityTakeover && !_amIFreshEnoughForPriorityTakeover(lastApplied)) { result |= NotCloseEnoughToLatestForPriorityTakeover; } } @@ -2560,7 +2292,7 @@ void TopologyCoordinatorImpl::processLoseElection() { } } -bool TopologyCoordinatorImpl::stepDown(Date_t until, bool force) { +bool TopologyCoordinatorImpl::stepDown(Date_t until, bool force, const OpTime& lastOpApplied) { // force==true overrides all other checks. if (force) { @@ -2571,14 +2303,13 @@ bool TopologyCoordinatorImpl::stepDown(Date_t until, bool force) { // We already checked in ReplicationCoordinator that a majority of nodes are caught up. // Here we must check that we also have at least one caught up node that is electable. - const OpTime lastOpApplied = getMyLastAppliedOpTime(); for (int memberIndex = 0; memberIndex < _rsConfig.getNumMembers(); memberIndex++) { // ignore your self if (memberIndex == _selfIndex) { continue; } - UnelectableReasonMask reason = _getUnelectableReason(memberIndex); - if (!reason && _hbdata.at(memberIndex).getHeartbeatAppliedOpTime() >= lastOpApplied) { + UnelectableReasonMask reason = _getUnelectableReason(memberIndex, lastOpApplied); + if (!reason && _hbdata.at(memberIndex).getAppliedOpTime() >= lastOpApplied) { // Found a caught up and electable node, succeed with step down. _stepDownUntil = until; _stepDownSelfAndReplaceWith(-1); @@ -2657,67 +2388,6 @@ void TopologyCoordinatorImpl::_stepDownSelfAndReplaceWith(int newPrimary) { _stepDownPending = false; } -bool TopologyCoordinatorImpl::updateLastCommittedOpTime() { - if (!getMemberState().primary() || isStepDownPending()) { - return false; - } - - // Whether we use the applied or durable OpTime for the commit point is decided here. - const bool useDurableOpTime = _rsConfig.getWriteConcernMajorityShouldJournal(); - - std::vector<OpTime> votingNodesOpTimes; - for (const auto& memberHeartbeatData : _hbdata) { - int memberIndex = memberHeartbeatData.getConfigIndex(); - invariant(memberIndex >= 0); - const auto& memberConfig = _rsConfig.getMemberAt(memberIndex); - if (memberConfig.isVoter()) { - const auto opTime = useDurableOpTime ? memberHeartbeatData.getLastDurableOpTime() - : memberHeartbeatData.getLastAppliedOpTime(); - votingNodesOpTimes.push_back(opTime); - } - } - - invariant(votingNodesOpTimes.size() > 0); - if (votingNodesOpTimes.size() < static_cast<unsigned long>(_rsConfig.getWriteMajority())) { - return false; - } - std::sort(votingNodesOpTimes.begin(), votingNodesOpTimes.end()); - - // need the majority to have this OpTime - OpTime committedOpTime = - votingNodesOpTimes[votingNodesOpTimes.size() - _rsConfig.getWriteMajority()]; - return advanceLastCommittedOpTime(committedOpTime); -} - -bool TopologyCoordinatorImpl::advanceLastCommittedOpTime(const OpTime& committedOpTime) { - if (committedOpTime == _lastCommittedOpTime) { - return false; // Hasn't changed, so ignore it. - } else if (committedOpTime < _lastCommittedOpTime) { - LOG(1) << "Ignoring older committed snapshot optime: " << committedOpTime - << ", currentCommittedOpTime: " << _lastCommittedOpTime; - return false; // This may have come from an out-of-order heartbeat. Ignore it. - } - - // This check is performed to ensure primaries do not commit an OpTime from a previous term. - if (getMemberState().primary() && committedOpTime < _firstOpTimeOfMyTerm) { - LOG(1) << "Ignoring older committed snapshot from before I became primary, optime: " - << committedOpTime << ", firstOpTimeOfMyTerm: " << _firstOpTimeOfMyTerm; - return false; - } - - LOG(2) << "Updating _lastCommittedOpTime to " << committedOpTime; - _lastCommittedOpTime = committedOpTime; - return true; -} - -OpTime TopologyCoordinatorImpl::getLastCommittedOpTime() const { - return _lastCommittedOpTime; -} - -void TopologyCoordinatorImpl::setFirstOpTimeOfMyTerm(const OpTime& newOpTime) { - _firstOpTimeOfMyTerm = newOpTime; -} - void TopologyCoordinatorImpl::adjustMaintenanceCountBy(int inc) { invariant(_role == Role::follower); _maintenanceModeCalls += inc; @@ -2755,6 +2425,7 @@ long long TopologyCoordinatorImpl::getTerm() { // replset. Passing metadata is unnecessary. bool TopologyCoordinatorImpl::shouldChangeSyncSource( const HostAndPort& currentSource, + const OpTime& myLastOpTime, const rpc::ReplSetMetadata& replMetadata, boost::optional<rpc::OplogQueryMetadata> oqMetadata, Date_t now) const { @@ -2798,12 +2469,12 @@ bool TopologyCoordinatorImpl::shouldChangeSyncSource( int primaryIndex = -1; if (oqMetadata) { currentSourceOpTime = std::max(oqMetadata->getLastOpApplied(), - _hbdata.at(currentSourceIndex).getHeartbeatAppliedOpTime()); + _hbdata.at(currentSourceIndex).getAppliedOpTime()); syncSourceIndex = oqMetadata->getSyncSourceIndex(); primaryIndex = oqMetadata->getPrimaryIndex(); } else { currentSourceOpTime = std::max(replMetadata.getLastOpVisible(), - _hbdata.at(currentSourceIndex).getHeartbeatAppliedOpTime()); + _hbdata.at(currentSourceIndex).getAppliedOpTime()); syncSourceIndex = replMetadata.getSyncSourceIndex(); primaryIndex = replMetadata.getPrimaryIndex(); } @@ -2816,7 +2487,6 @@ bool TopologyCoordinatorImpl::shouldChangeSyncSource( // Change sync source if they are not ahead of us, and don't have a sync source, // unless they are primary. - const OpTime myLastOpTime = getMyLastAppliedOpTime(); if (_rsConfig.getProtocolVersion() == 1 && syncSourceIndex == -1 && currentSourceOpTime <= myLastOpTime && primaryIndex != currentSourceIndex) { std::stringstream logMessage; @@ -2850,14 +2520,13 @@ bool TopologyCoordinatorImpl::shouldChangeSyncSource( if (it->up() && (candidateConfig.isVoter() || !_selfConfig().isVoter()) && (candidateConfig.shouldBuildIndexes() || !_selfConfig().shouldBuildIndexes()) && it->getState().readable() && !_memberIsBlacklisted(candidateConfig, now) && - goalSecs < it->getHeartbeatAppliedOpTime().getSecs()) { + goalSecs < it->getAppliedOpTime().getSecs()) { log() << "Choosing new sync source because the most recent OpTime of our sync " "source, " << currentSource << ", is " << currentSourceOpTime.toString() << " which is more than " << _options.maxSyncSourceLagSecs << " behind member " << candidateConfig.getHostAndPort().toString() - << " whose most recent OpTime is " - << it->getHeartbeatAppliedOpTime().toString(); + << " whose most recent OpTime is " << it->getAppliedOpTime().toString(); invariant(itIndex != _selfIndex); return true; } @@ -2868,9 +2537,9 @@ bool TopologyCoordinatorImpl::shouldChangeSyncSource( } rpc::ReplSetMetadata TopologyCoordinatorImpl::prepareReplSetMetadata( - const OpTime& lastVisibleOpTime) const { + const OpTime& lastVisibleOpTime, const OpTime& lastCommittedOpTime) const { return rpc::ReplSetMetadata(_term, - _lastCommittedOpTime, + lastCommittedOpTime, lastVisibleOpTime, _rsConfig.getConfigVersion(), _rsConfig.getReplicaSetId(), @@ -2878,9 +2547,10 @@ rpc::ReplSetMetadata TopologyCoordinatorImpl::prepareReplSetMetadata( _rsConfig.findMemberIndexByHostAndPort(getSyncSourceAddress())); } -rpc::OplogQueryMetadata TopologyCoordinatorImpl::prepareOplogQueryMetadata(int rbid) const { - return rpc::OplogQueryMetadata(_lastCommittedOpTime, - getMyLastAppliedOpTime(), +rpc::OplogQueryMetadata TopologyCoordinatorImpl::prepareOplogQueryMetadata( + const OpTime& lastCommittedOpTime, const OpTime& lastAppliedOpTime, int rbid) const { + return rpc::OplogQueryMetadata(lastCommittedOpTime, + lastAppliedOpTime, rbid, _currentPrimaryIndex, _rsConfig.findMemberIndexByHostAndPort(getSyncSourceAddress())); @@ -2896,7 +2566,8 @@ void TopologyCoordinatorImpl::summarizeAsHtml(ReplSetHtmlSummary* output) { } void TopologyCoordinatorImpl::processReplSetRequestVotes(const ReplSetRequestVotesArgs& args, - ReplSetRequestVotesResponse* response) { + ReplSetRequestVotesResponse* response, + const OpTime& lastAppliedOpTime) { response->setTerm(_term); if (args.getTerm() < _term) { @@ -2908,7 +2579,7 @@ void TopologyCoordinatorImpl::processReplSetRequestVotes(const ReplSetRequestVot } else if (args.getSetName() != _rsConfig.getReplSetName()) { response->setVoteGranted(false); response->setReason("candidate's set name differs from mine"); - } else if (args.getLastDurableOpTime() < getMyLastAppliedOpTime()) { + } else if (args.getLastDurableOpTime() < lastAppliedOpTime) { response->setVoteGranted(false); response->setReason("candidate's data is staler than mine"); } else if (!args.isADryRun() && _lastVote.getTerm() == args.getTerm()) { @@ -2941,6 +2612,7 @@ void TopologyCoordinatorImpl::setPrimaryIndex(long long primaryIndex) { } Status TopologyCoordinatorImpl::becomeCandidateIfElectable(const Date_t now, + const OpTime& lastOpApplied, bool isPriorityTakeover) { if (_role == Role::leader) { return {ErrorCodes::NodeNotElectable, "Not standing for election again; already primary"}; @@ -2951,7 +2623,7 @@ Status TopologyCoordinatorImpl::becomeCandidateIfElectable(const Date_t now, } const UnelectableReasonMask unelectableReason = - _getMyUnelectableReason(now, isPriorityTakeover); + _getMyUnelectableReason(now, lastOpApplied, isPriorityTakeover); if (unelectableReason) { return {ErrorCodes::NodeNotElectable, str::stream() << "Not standing for election because " @@ -2992,8 +2664,8 @@ boost::optional<OpTime> TopologyCoordinatorImpl::latestKnownOpTimeSinceHeartbeat if (!peer.up()) { continue; } - if (peer.getHeartbeatAppliedOpTime() > latest) { - latest = peer.getHeartbeatAppliedOpTime(); + if (peer.getAppliedOpTime() > latest) { + latest = peer.getAppliedOpTime(); } } return latest; diff --git a/src/mongo/db/repl/topology_coordinator_impl.h b/src/mongo/db/repl/topology_coordinator_impl.h index 16ab196842f..49771ff1728 100644 --- a/src/mongo/db/repl/topology_coordinator_impl.h +++ b/src/mongo/db/repl/topology_coordinator_impl.h @@ -157,49 +157,51 @@ public: virtual void unblacklistSyncSource(const HostAndPort& host, Date_t now); virtual void clearSyncSourceBlacklist(); virtual bool shouldChangeSyncSource(const HostAndPort& currentSource, + const OpTime& myLastOpTime, const rpc::ReplSetMetadata& replMetadata, boost::optional<rpc::OplogQueryMetadata> oqMetadata, Date_t now) const; virtual bool becomeCandidateIfStepdownPeriodOverAndSingleNodeSet(Date_t now); virtual void setElectionSleepUntil(Date_t newTime); virtual void setFollowerMode(MemberState::MS newMode); - virtual bool updateLastCommittedOpTime(); - virtual bool advanceLastCommittedOpTime(const OpTime& committedOpTime); - virtual OpTime getLastCommittedOpTime() const; - virtual void setFirstOpTimeOfMyTerm(const OpTime& newOpTime); virtual void adjustMaintenanceCountBy(int inc); virtual void prepareSyncFromResponse(const HostAndPort& target, + const OpTime& lastOpApplied, BSONObjBuilder* response, Status* result); virtual void prepareFreshResponse(const ReplicationCoordinator::ReplSetFreshArgs& args, Date_t now, + const OpTime& lastOpApplied, BSONObjBuilder* response, Status* result); virtual void prepareElectResponse(const ReplicationCoordinator::ReplSetElectArgs& args, Date_t now, + const OpTime& lastOpApplied, BSONObjBuilder* response, Status* result); virtual Status prepareHeartbeatResponse(Date_t now, const ReplSetHeartbeatArgs& args, const std::string& ourSetName, + const OpTime& lastOpApplied, + const OpTime& lastOpDurable, ReplSetHeartbeatResponse* response); virtual Status prepareHeartbeatResponseV1(Date_t now, const ReplSetHeartbeatArgsV1& args, const std::string& ourSetName, + const OpTime& lastOpApplied, + const OpTime& lastOpDurable, ReplSetHeartbeatResponse* response); virtual void prepareStatusResponse(const ReplSetStatusArgs& rsStatusArgs, BSONObjBuilder* response, Status* result); - virtual StatusWith<BSONObj> prepareReplSetUpdatePositionCommand( - ReplicationCoordinator::ReplSetUpdatePositionCommandStyle commandStyle, - OpTime currentCommittedSnapshotOpTime) const; - virtual void fillIsMasterForReplSet(IsMasterResponse* response); - virtual void fillMemberData(BSONObjBuilder* result); virtual StatusWith<PrepareFreezeResponseResult> prepareFreezeResponse(Date_t now, int secs, BSONObjBuilder* response); - virtual void updateConfig(const ReplSetConfig& newConfig, int selfIndex, Date_t now); + virtual void updateConfig(const ReplSetConfig& newConfig, + int selfIndex, + Date_t now, + const OpTime& lastOpApplied); virtual std::pair<ReplSetHeartbeatArgs, Milliseconds> prepareHeartbeatRequest( Date_t now, const std::string& ourSetName, const HostAndPort& target); virtual std::pair<ReplSetHeartbeatArgsV1, Milliseconds> prepareHeartbeatRequestV1( @@ -208,46 +210,37 @@ public: Date_t now, Milliseconds networkRoundTripTime, const HostAndPort& target, - const StatusWith<ReplSetHeartbeatResponse>& hbResponse); + const StatusWith<ReplSetHeartbeatResponse>& hbResponse, + const OpTime& myLastOpApplied); virtual bool voteForMyself(Date_t now); virtual void setElectionInfo(OID electionId, Timestamp electionOpTime); virtual void processWinElection(OID electionId, Timestamp electionOpTime); virtual void processLoseElection(); - virtual Status checkShouldStandForElection(Date_t now) const; + virtual Status checkShouldStandForElection(Date_t now, const OpTime& lastOpApplied) const; virtual void setMyHeartbeatMessage(const Date_t now, const std::string& message); - virtual bool stepDown(Date_t until, bool force); + virtual bool stepDown(Date_t until, bool force, const OpTime& lastOpApplied); virtual bool stepDownIfPending(); virtual bool isStepDownPending() const; virtual Date_t getStepDownTime() const; - virtual rpc::ReplSetMetadata prepareReplSetMetadata(const OpTime& lastVisibleOpTime) const; - virtual rpc::OplogQueryMetadata prepareOplogQueryMetadata(int rbid) const; + virtual rpc::ReplSetMetadata prepareReplSetMetadata(const OpTime& lastVisibleOpTime, + const OpTime& lastCommitttedOpTime) const; + virtual rpc::OplogQueryMetadata prepareOplogQueryMetadata(const OpTime& lastCommittedOpTime, + const OpTime& lastAppliedOpTime, + int rbid) const; virtual void processReplSetRequestVotes(const ReplSetRequestVotesArgs& args, - ReplSetRequestVotesResponse* response); + ReplSetRequestVotesResponse* response, + const OpTime& lastAppliedOpTime); virtual void summarizeAsHtml(ReplSetHtmlSummary* output); virtual void loadLastVote(const LastVote& lastVote); virtual void voteForMyselfV1(); virtual void prepareForStepDown(); virtual void setPrimaryIndex(long long primaryIndex); - virtual bool haveNumNodesReachedOpTime(const OpTime& opTime, int numNodes, bool durablyWritten); - virtual bool haveTaggedNodesReachedOpTime(const OpTime& opTime, - const ReplSetTagPattern& tagPattern, - bool durablyWritten); - virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op, - bool durablyWritten, - bool skipSelf); - virtual HeartbeatResponseAction setMemberAsDown(Date_t now, const int memberIndex); - virtual std::pair<int, Date_t> getStalestLiveMember() const; - virtual HeartbeatResponseAction checkMemberTimeouts(Date_t now); - virtual void resetAllMemberTimeouts(Date_t now); - virtual void resetMemberTimeouts(Date_t now, - const stdx::unordered_set<HostAndPort>& member_set); - virtual OpTime getMyLastAppliedOpTime() const; - virtual OpTime getMyLastDurableOpTime() const; - virtual MemberHeartbeatData* getMyMemberHeartbeatData(); - virtual MemberHeartbeatData* findMemberHeartbeatDataByMemberId(const int memberId); - virtual MemberHeartbeatData* findMemberHeartbeatDataByRid(const OID rid); - virtual MemberHeartbeatData* addSlaveMemberData(const OID rid); - virtual Status becomeCandidateIfElectable(const Date_t now, bool isPriorityTakeover); + virtual HeartbeatResponseAction setMemberAsDown(Date_t now, + const int memberIndex, + const OpTime& myLastOpApplied); + virtual Status becomeCandidateIfElectable(const Date_t now, + const OpTime& lastOpApplied, + bool isPriorityTakeover); virtual void setStorageEngineSupportsReadCommitted(bool supported); virtual void restartHeartbeats(); @@ -304,10 +297,12 @@ private: // Returns the current "ping" value for the given member by their address Milliseconds _getPing(const HostAndPort& host); - // Determines if we will veto the member specified by "args.id". + // Determines if we will veto the member specified by "args.id", given that the last op + // we have applied locally is "lastOpApplied". // If we veto, the errmsg will be filled in with a reason bool _shouldVetoMember(const ReplicationCoordinator::ReplSetFreshArgs& args, const Date_t& now, + const OpTime& lastOpApplied, std::string* errmsg) const; // Returns the index of the member with the matching id, or -1 if none match. @@ -322,16 +317,19 @@ private: // Is otherOpTime close enough (within 10 seconds) to the latest known optime to qualify // for an election - bool _isOpTimeCloseEnoughToLatestToElect(const OpTime& otherOpTime) const; + bool _isOpTimeCloseEnoughToLatestToElect(const OpTime& otherOpTime, + const OpTime& ourLastOpApplied) const; // Is our optime close enough to the latest known optime to call for a priority takeover. - bool _amIFreshEnoughForPriorityTakeover() const; + bool _amIFreshEnoughForPriorityTakeover(const OpTime& ourLastOpApplied) const; // Returns reason why "self" member is unelectable - UnelectableReasonMask _getMyUnelectableReason(const Date_t now, bool isPriorityTakeover) const; + UnelectableReasonMask _getMyUnelectableReason(const Date_t now, + const OpTime& lastOpApplied, + bool isPriorityTakeover) const; // Returns reason why memberIndex is unelectable - UnelectableReasonMask _getUnelectableReason(int memberIndex) const; + UnelectableReasonMask _getUnelectableReason(int memberIndex, const OpTime& lastOpApplied) const; // Returns the nice text of why the node is unelectable std::string _getUnelectableReasonString(UnelectableReasonMask ur) const; @@ -340,10 +338,10 @@ private: bool _iAmPrimary() const; // Scans through all members that are 'up' and return the latest known optime. - OpTime _latestKnownOpTime() const; + OpTime _latestKnownOpTime(const OpTime& ourLastOpApplied) const; // Scans the electable set and returns the highest priority member index - int _getHighestPriorityElectableIndex(Date_t now) const; + int _getHighestPriorityElectableIndex(Date_t now, const OpTime& lastOpApplied) const; // Returns true if "one" member is higher priority than "two" member bool _isMemberHigherPriority(int memberOneIndex, int memberTwoIndex) const; @@ -351,12 +349,6 @@ private: // Helper shortcut to self config const MemberConfig& _selfConfig() const; - // Helper shortcut to self member data - const MemberHeartbeatData& _selfMemberHeartbeatData() const; - - // Index of self member in member heartbeat data. - const int _selfMemberHeartbeatDataIndex() const; - // Returns NULL if there is no primary, or the MemberConfig* for the current primary const MemberConfig* _currentPrimaryMember() const; @@ -368,10 +360,12 @@ private: */ HeartbeatResponseAction _updatePrimaryFromHBData(int updatedConfigIndex, const MemberState& originalState, - Date_t now); + Date_t now, + const OpTime& lastOpApplied); HeartbeatResponseAction _updatePrimaryFromHBDataV1(int updatedConfigIndex, const MemberState& originalState, - Date_t now); + Date_t now, + const OpTime& lastOpApplied); /** * Updates _hbdata based on the newConfig, ensuring that every member in the newConfig @@ -447,13 +441,6 @@ private: // a new term. Date_t _electionSleepUntil; - // OpTime of the latest committed operation. - OpTime _lastCommittedOpTime; - - // OpTime representing our transition to PRIMARY and the start of our term. - // _lastCommittedOpTime cannot be set to an earlier OpTime. - OpTime _firstOpTimeOfMyTerm; - // The number of calls we have had to enter maintenance mode int _maintenanceModeCalls; diff --git a/src/mongo/db/repl/topology_coordinator_impl_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_test.cpp index 29b09922cc5..0cac80e1f52 100644 --- a/src/mongo/db/repl/topology_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_impl_test.cpp @@ -139,11 +139,11 @@ protected: _selfIndex = selfIndex; if (now == Date_t::fromMillisSinceEpoch(-1)) { - getTopoCoord().updateConfig(config, selfIndex, _now); + getTopoCoord().updateConfig(config, selfIndex, _now, lastOp); _now += Milliseconds(1); } else { invariant(now > _now); - getTopoCoord().updateConfig(config, selfIndex, now); + getTopoCoord().updateConfig(config, selfIndex, now, lastOp); _now = now + Milliseconds(1); } @@ -171,13 +171,15 @@ protected: const std::string& setName, MemberState memberState, const OpTime& electionTime, - const OpTime& lastOpTimeSender) { + const OpTime& lastOpTimeSender, + const OpTime& lastOpTimeReceiver) { return _receiveHeartbeatHelper(Status::OK(), member, setName, memberState, electionTime.getTimestamp(), lastOpTimeSender, + lastOpTimeReceiver, Milliseconds(1)); } @@ -195,6 +197,7 @@ protected: MemberState::RS_UNKNOWN, Timestamp(), OpTime(), + lastOpTimeReceiver, roundTripTime); } @@ -209,6 +212,7 @@ protected: memberState, Timestamp(), lastOpTimeSender, + OpTime(), roundTripTime); } @@ -219,6 +223,7 @@ private: MemberState memberState, Timestamp electionTime, const OpTime& lastOpTimeSender, + const OpTime& lastOpTimeReceiver, Milliseconds roundTripTime) { ReplSetHeartbeatResponse hb; hb.setConfigVersion(1); @@ -233,7 +238,8 @@ private: getTopoCoord().prepareHeartbeatRequest(now(), setName, member); now() += roundTripTime; - return getTopoCoord().processHeartbeatResponse(now(), roundTripTime, member, hbResponse); + return getTopoCoord().processHeartbeatResponse( + now(), roundTripTime, member, hbResponse, lastOpTimeReceiver); } private: @@ -728,9 +734,9 @@ TEST_F(TopoCoordTest, ChooseRequestedSyncSourceOnlyTheFirstTimeAfterTheSyncSourc // force should cause shouldChangeSyncSource() to return true // even if the currentSource is the force target ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("h2"), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); + HostAndPort("h2"), OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("h3"), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); + HostAndPort("h3"), OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); getTopoCoord().chooseNewSyncSource( now()++, OpTime(), TopologyCoordinator::ChainingPreference::kUseConfiguration); ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress()); @@ -920,8 +926,12 @@ TEST_F(TopoCoordTest, NodeChangesToRecoveringWhenOnlyUnauthorizedNodesAreUp) { ASSERT_EQUALS(MemberState::RS_RECOVERING, getTopoCoord().getMemberState().s); // Having an auth error but with another node up should bring us out of RECOVERING - HeartbeatResponseAction action = receiveUpHeartbeat( - HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(), OpTime(Timestamp(2, 0), 0)); + HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("h2"), + "rs0", + MemberState::RS_SECONDARY, + OpTime(), + OpTime(Timestamp(2, 0), 0), + OpTime(Timestamp(2, 0), 0)); ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s); // Test that the heartbeat that brings us from RECOVERING to SECONDARY doesn't initiate // an election (SERVER-17164) @@ -950,16 +960,22 @@ TEST_F(TopoCoordTest, NodeDoesNotActOnHeartbeatsWhenAbsentFromConfig) { } TEST_F(TopoCoordTest, NodeReturnsNotSecondaryWhenSyncFromIsRunPriorToHavingAConfig) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; // if we do not have an index in the config, we should get ErrorCodes::NotSecondary - getTopoCoord().prepareSyncFromResponse(HostAndPort("h1"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h1"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::NotSecondary, result); ASSERT_EQUALS("Removed and uninitialized nodes do not sync", result.reason()); } TEST_F(TopoCoordTest, NodeReturnsNotSecondaryWhenSyncFromIsRunAgainstArbiter) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -978,12 +994,15 @@ TEST_F(TopoCoordTest, NodeReturnsNotSecondaryWhenSyncFromIsRunAgainstArbiter) { << "h1"))), 0); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h1"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h1"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::NotSecondary, result); ASSERT_EQUALS("arbiters don't sync", result.reason()); } TEST_F(TopoCoordTest, NodeReturnsNotSecondaryWhenSyncFromIsRunAgainstPrimary) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -1019,13 +1038,16 @@ TEST_F(TopoCoordTest, NodeReturnsNotSecondaryWhenSyncFromIsRunAgainstPrimary) { makeSelfPrimary(); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); getTopoCoord()._setCurrentPrimaryForTest(0); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h3"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h3"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::NotSecondary, result); ASSERT_EQUALS("primaries don't sync", result.reason()); ASSERT_EQUALS("h3:27017", response.obj()["syncFromRequested"].String()); } TEST_F(TopoCoordTest, NodeReturnsNodeNotFoundWhenSyncFromRequestsANodeNotInConfig) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -1057,12 +1079,16 @@ TEST_F(TopoCoordTest, NodeReturnsNodeNotFoundWhenSyncFromRequestsANodeNotInConfi 0); setSelfMemberState(MemberState::RS_SECONDARY); - getTopoCoord().prepareSyncFromResponse(HostAndPort("fakemember"), &response, &result); + getTopoCoord().prepareSyncFromResponse( + HostAndPort("fakemember"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::NodeNotFound, result); ASSERT_EQUALS("Could not find member \"fakemember:27017\" in replica set", result.reason()); } TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenSyncFromRequestsSelf) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -1095,12 +1121,15 @@ TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenSyncFromRequestsSelf) { setSelfMemberState(MemberState::RS_SECONDARY); // Try to sync from self - getTopoCoord().prepareSyncFromResponse(HostAndPort("hself"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("hself"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::InvalidOptions, result); ASSERT_EQUALS("I cannot sync from myself", result.reason()); } TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenSyncFromRequestsArbiter) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -1134,12 +1163,15 @@ TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenSyncFromRequestsArbiter) { // Try to sync from an arbiter - getTopoCoord().prepareSyncFromResponse(HostAndPort("h1"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h1"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::InvalidOptions, result); ASSERT_EQUALS("Cannot sync from \"h1:27017\" because it is an arbiter", result.reason()); } TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenSyncFromRequestsAnIndexNonbuilder) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -1172,13 +1204,16 @@ TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenSyncFromRequestsAnIndexNonbui setSelfMemberState(MemberState::RS_SECONDARY); // Try to sync from a node that doesn't build indexes - getTopoCoord().prepareSyncFromResponse(HostAndPort("h2"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h2"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::InvalidOptions, result); ASSERT_EQUALS("Cannot sync from \"h2:27017\" because it does not build indexes", result.reason()); } TEST_F(TopoCoordTest, NodeReturnsHostUnreachableWhenSyncFromRequestsADownNode) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -1213,7 +1248,7 @@ TEST_F(TopoCoordTest, NodeReturnsHostUnreachableWhenSyncFromRequestsADownNode) { // Try to sync from a member that is down receiveDownHeartbeat(HostAndPort("h4"), "rs0", OpTime()); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h4"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h4"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::HostUnreachable, result); ASSERT_EQUALS("I cannot reach the requested member: h4:27017", result.reason()); } @@ -1252,13 +1287,12 @@ TEST_F(TopoCoordTest, ChooseRequestedNodeWhenSyncFromRequestsAStaleNode) { << "h6"))), 0); setSelfMemberState(MemberState::RS_SECONDARY); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); // Sync successfully from a member that is stale heartbeatFromMember( HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY, staleOpTime, Milliseconds(100)); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h5"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h5"), ourOpTime, &response, &result); ASSERT_OK(result); ASSERT_EQUALS("requested member \"h5:27017\" is more than 10 seconds behind us", response.obj()["warning"].String()); @@ -1306,7 +1340,7 @@ TEST_F(TopoCoordTest, ChooseRequestedNodeWhenSyncFromRequestsAValidNode) { heartbeatFromMember( HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY, ourOpTime, Milliseconds(100)); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h6"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h6"), ourOpTime, &response, &result); ASSERT_OK(result); BSONObj responseObj = response.obj(); ASSERT_FALSE(responseObj.hasField("warning")); @@ -1355,7 +1389,7 @@ TEST_F(TopoCoordTest, HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY, ourOpTime, Milliseconds(100)); // node goes down between forceSync and chooseNewSyncSource - getTopoCoord().prepareSyncFromResponse(HostAndPort("h6"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h6"), ourOpTime, &response, &result); BSONObj responseObj = response.obj(); ASSERT_FALSE(responseObj.hasField("warning")); receiveDownHeartbeat(HostAndPort("h6"), "rs0", OpTime()); @@ -1402,7 +1436,7 @@ TEST_F(TopoCoordTest, NodeReturnsUnauthorizedWhenSyncFromRequestsANodeWeAreNotAu // Try to sync from a member that is unauth'd receiveDownHeartbeat(HostAndPort("h5"), "rs0", OpTime(), ErrorCodes::Unauthorized); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h5"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h5"), ourOpTime, &response, &result); ASSERT_NOT_OK(result); ASSERT_EQUALS(ErrorCodes::Unauthorized, result.code()); ASSERT_EQUALS("not authorized to communicate with h5:27017", result.reason()); @@ -1423,7 +1457,7 @@ TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenAskedToSyncFromANonVoterAsAVo "]}"), 0); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h2"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h2"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::InvalidOptions, result); ASSERT_EQUALS("Cannot sync from \"h2:27017\" because it is not a voter", result.reason()); } @@ -1469,7 +1503,7 @@ TEST_F(TopoCoordTest, heartbeatFromMember( HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY, ourOpTime, Milliseconds(100)); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h5"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h5"), ourOpTime, &response, &result); ASSERT_OK(result); BSONObj responseObj = response.obj(); ASSERT_FALSE(responseObj.hasField("warning")); @@ -1482,7 +1516,7 @@ TEST_F(TopoCoordTest, HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY, ourOpTime, Milliseconds(100)); // Sync successfully from another up-to-date member. - getTopoCoord().prepareSyncFromResponse(HostAndPort("h6"), &response2, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h6"), ourOpTime, &response2, &result); BSONObj response2Obj = response2.obj(); ASSERT_FALSE(response2Obj.hasField("warning")); ASSERT_EQUALS(HostAndPort("h5").toString(), response2Obj["prevSyncTarget"].String()); @@ -1504,7 +1538,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { Seconds uptimeSecs(10); Date_t curTime = heartbeatTime + uptimeSecs; Timestamp electionTime(1, 2); - OpTime oplogProgress(Timestamp(3, 4), 2); + OpTime oplogProgress(Timestamp(3, 4), 0); OpTime oplogDurable(Timestamp(3, 4), 1); OpTime lastCommittedOpTime(Timestamp(2, 3), -1); OpTime readConcernMajorityOpTime(Timestamp(4, 5), -1); @@ -1535,7 +1569,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { HostAndPort member = HostAndPort("test0:1234"); getTopoCoord().prepareHeartbeatRequest(startupTime + Milliseconds(1), setName, member); getTopoCoord().processHeartbeatResponse( - startupTime + Milliseconds(2), Milliseconds(1), member, hbResponseGood); + startupTime + Milliseconds(2), Milliseconds(1), member, hbResponseGood, OpTime()); getTopoCoord().prepareHeartbeatRequest(startupTime + Milliseconds(3), setName, member); Date_t timeoutTime = startupTime + Milliseconds(3) + ReplSetConfig::kDefaultHeartbeatTimeoutPeriod; @@ -1544,16 +1578,13 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { StatusWith<ReplSetHeartbeatResponse>(Status(ErrorCodes::HostUnreachable, "")); getTopoCoord().processHeartbeatResponse( - timeoutTime, Milliseconds(5000), member, hbResponseDown); + timeoutTime, Milliseconds(5000), member, hbResponseDown, OpTime()); member = HostAndPort("test1:1234"); getTopoCoord().prepareHeartbeatRequest(startupTime + Milliseconds(2), setName, member); getTopoCoord().processHeartbeatResponse( - heartbeatTime, Milliseconds(4000), member, hbResponseGood); + heartbeatTime, Milliseconds(4000), member, hbResponseGood, OpTime()); makeSelfPrimary(); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(oplogProgress, startupTime); - getTopoCoord().getMyMemberHeartbeatData()->setLastDurableOpTime(oplogDurable, startupTime); - getTopoCoord().advanceLastCommittedOpTime(lastCommittedOpTime); // Now node 0 is down, node 1 is up, and for node 2 we have no heartbeat data yet. BSONObjBuilder statusBuilder; @@ -1562,6 +1593,9 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { TopologyCoordinator::ReplSetStatusArgs{ curTime, static_cast<unsigned>(durationCount<Seconds>(uptimeSecs)), + oplogProgress, + oplogDurable, + lastCommittedOpTime, readConcernMajorityOpTime, BSONObj()}, &statusBuilder, @@ -1677,6 +1711,9 @@ TEST_F(TopoCoordTest, NodeReturnsInvalidReplicaSetConfigInResponseToGetStatusWhe TopologyCoordinator::ReplSetStatusArgs{ curTime, static_cast<unsigned>(durationCount<Seconds>(uptimeSecs)), + oplogProgress, + oplogProgress, + OpTime(), OpTime(), BSONObj()}, &statusBuilder, @@ -1695,7 +1732,7 @@ TEST_F(TopoCoordTest, NodeReturnsReplicaSetNotFoundWhenFreshnessIsCheckedPriorTo // if we do not have an index in the config, we should get ErrorCodes::ReplicaSetNotFound BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status); ASSERT_EQUALS("Cannot participate in elections because not initialized", status.reason()); ASSERT_TRUE(responseBuilder.obj().isEmpty()); @@ -1734,7 +1771,7 @@ TEST_F(TopoCoordTest, BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status); ASSERT_TRUE(responseBuilder.obj().isEmpty()); } @@ -1775,8 +1812,7 @@ TEST_F(TopoCoordTest, NodeReturnsFresherWhenFreshnessIsCheckedWithStaleConfigVer BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_OK(status); BSONObj response = responseBuilder.obj(); ASSERT_EQUALS("config version stale", response["info"].String()); @@ -1822,8 +1858,7 @@ TEST_F(TopoCoordTest, VetoWhenFreshnessIsCheckedWithAMemberWhoIsNotInTheConfig) BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_OK(status); BSONObj response = responseBuilder.obj(); ASSERT_EQUALS(ourOpTime.getTimestamp(), Timestamp(response["opTime"].timestampValue())); @@ -1871,9 +1906,7 @@ TEST_F(TopoCoordTest, VetoWhenFreshnessIsCheckedWhilePrimary) { BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_OK(status); BSONObj response = responseBuilder.obj(); ASSERT_FALSE(response.hasField("info")); @@ -1924,8 +1957,7 @@ TEST_F(TopoCoordTest, VetoWhenFreshnessIsCheckedWhilePrimaryExists) { BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_OK(status); BSONObj response = responseBuilder.obj(); ASSERT_FALSE(response.hasField("info")); @@ -1978,8 +2010,7 @@ TEST_F(TopoCoordTest, NodeReturnsNotFreshestWhenFreshnessIsCheckedByALowPriority BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_OK(status); BSONObj response = responseBuilder.obj(); ASSERT_FALSE(response.hasField("info")); @@ -2032,8 +2063,7 @@ TEST_F(TopoCoordTest, VetoWhenFreshnessIsCheckedByANodeWeBelieveToBeDown) { BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_OK(status); BSONObj response = responseBuilder.obj(); ASSERT_FALSE(response.hasField("info")); @@ -2087,8 +2117,7 @@ TEST_F(TopoCoordTest, VetoWhenFreshnessIsCheckedByANodeThatIsPrimary) { BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_OK(status); BSONObj response = responseBuilder.obj(); ASSERT_FALSE(response.hasField("info")); @@ -2140,8 +2169,7 @@ TEST_F(TopoCoordTest, VetoWhenFreshnessIsCheckedByANodeThatIsInStartup) { BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_OK(status); BSONObj response = responseBuilder.obj(); ASSERT_FALSE(response.hasField("info")); @@ -2193,8 +2221,7 @@ TEST_F(TopoCoordTest, VetoWhenFreshnessIsCheckedByANodeThatIsRecovering) { BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_OK(status); BSONObj response = responseBuilder.obj(); ASSERT_FALSE(response.hasField("info")); @@ -2248,8 +2275,7 @@ TEST_F(TopoCoordTest, BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_OK(status); BSONObj response = responseBuilder.obj(); ASSERT_FALSE(response.hasField("info")); @@ -2299,8 +2325,7 @@ TEST_F(TopoCoordTest, RespondPositivelyWhenFreshnessIsCheckedByAnElectableNode) BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_OK(status); BSONObj response = responseBuilder.obj(); ASSERT_FALSE(response.hasField("info")) << response.toString(); @@ -2346,8 +2371,7 @@ TEST_F(TopoCoordTest, NodeReturnsBadValueWhenFreshnessIsCheckedByANodeWithOurID) BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareFreshResponse(args, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(args, Date_t(), ourOpTime, &responseBuilder, &status); ASSERT_EQUALS(ErrorCodes::BadValue, status); ASSERT_EQUALS( "Received replSetFresh command from member with the same member ID as ourself: 10", @@ -2367,8 +2391,12 @@ TEST_F(TopoCoordTest, HeartbeatFrequencyShouldBeHalfElectionTimeoutWhenArbiter) Date_t requestDate = now(); std::pair<ReplSetHeartbeatArgs, Milliseconds> uppingRequest = getTopoCoord().prepareHeartbeatRequest(requestDate, "myset", target); - auto action = getTopoCoord().processHeartbeatResponse( - requestDate, Milliseconds(0), target, makeStatusWith<ReplSetHeartbeatResponse>()); + auto action = + getTopoCoord().processHeartbeatResponse(requestDate, + Milliseconds(0), + target, + makeStatusWith<ReplSetHeartbeatResponse>(), + OpTime(Timestamp(0, 0), 0)); Date_t expected(now() + Milliseconds(2500)); ASSERT_EQUALS(expected, action.getNextHeartbeatStartDate()); } @@ -2408,7 +2436,8 @@ public: _upRequestDate, Milliseconds(0), _target, - makeStatusWith<ReplSetHeartbeatResponse>()); // We've never applied anything. + makeStatusWith<ReplSetHeartbeatResponse>(), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, upAction.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -2427,7 +2456,8 @@ public: _firstRequestDate + Seconds(4), // 4 seconds elapsed, retry allowed. Milliseconds(3990), // Spent 3.99 of the 4 seconds in the network. _target, - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long")); + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -2444,8 +2474,13 @@ public: BSONObjBuilder statusBuilder; Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result"); getTopoCoord().prepareStatusResponse( - TopologyCoordinator::ReplSetStatusArgs{ - _firstRequestDate + Milliseconds(4000), 10, OpTime(), BSONObj()}, + TopologyCoordinator::ReplSetStatusArgs{_firstRequestDate + Milliseconds(4000), + 10, + OpTime(Timestamp(100, 0), 0), + OpTime(Timestamp(100, 0), 0), + OpTime(), + OpTime(), + BSONObj()}, &statusBuilder, &resultStatus); ASSERT_OK(resultStatus); @@ -2485,7 +2520,8 @@ public: // could retry. Milliseconds(400), // Spent 0.4 of the 0.5 seconds in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?")); + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); // Because the first retry failed without timing out, we expect to retry immediately. @@ -2502,8 +2538,13 @@ public: BSONObjBuilder statusBuilder; Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result"); getTopoCoord().prepareStatusResponse( - TopologyCoordinator::ReplSetStatusArgs{ - firstRequestDate() + Seconds(4), 10, OpTime(), BSONObj()}, + TopologyCoordinator::ReplSetStatusArgs{firstRequestDate() + Seconds(4), + 10, + OpTime(Timestamp(100, 0), 0), + OpTime(Timestamp(100, 0), 0), + OpTime(), + OpTime(), + BSONObj()}, &statusBuilder, &resultStatus); ASSERT_OK(resultStatus); @@ -2532,6 +2573,8 @@ public: TEST_F(HeartbeatResponseHighVerbosityTest, LogMessageAndTakeNoActionWhenReceivingAHeartbeatResponseFromANodeThatBelievesWeAreDown) { + OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); + // request heartbeat std::pair<ReplSetHeartbeatArgs, Milliseconds> request = getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2")); @@ -2547,7 +2590,8 @@ TEST_F(HeartbeatResponseHighVerbosityTest, now()++, // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. HostAndPort("host2"), - StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse)); + StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse), + lastOpTimeApplied); stopCapturingLogMessages(); ASSERT_NO_ACTION(action.getAction()); ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down")); @@ -2555,6 +2599,8 @@ TEST_F(HeartbeatResponseHighVerbosityTest, TEST_F(HeartbeatResponseHighVerbosityTest, LogMessageAndTakeNoActionWhenReceivingAHeartbeatResponseFromANodeThatIsNotInConfig) { + OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); + // request heartbeat std::pair<ReplSetHeartbeatArgs, Milliseconds> request = getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host5")); @@ -2570,7 +2616,8 @@ TEST_F(HeartbeatResponseHighVerbosityTest, now()++, // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. HostAndPort("host5"), - StatusWith<ReplSetHeartbeatResponse>(memberMissingResponse)); + StatusWith<ReplSetHeartbeatResponse>(memberMissingResponse), + lastOpTimeApplied); stopCapturingLogMessages(); ASSERT_NO_ACTION(action.getAction()); ASSERT_EQUALS(1, countLogLinesContaining("Could not find host5:27017 in current config")); @@ -2578,6 +2625,8 @@ TEST_F(HeartbeatResponseHighVerbosityTest, // TODO(dannenberg) figure out why this test is useful TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataSameConfig) { + OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); + // request heartbeat std::pair<ReplSetHeartbeatArgs, Milliseconds> request = getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2")); @@ -2612,7 +2661,8 @@ TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataSameConfig) { now()++, // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. HostAndPort("host2"), - StatusWith<ReplSetHeartbeatResponse>(sameConfigResponse)); + StatusWith<ReplSetHeartbeatResponse>(sameConfigResponse), + lastOpTimeApplied); stopCapturingLogMessages(); ASSERT_NO_ACTION(action.getAction()); ASSERT_EQUALS(1, countLogLinesContaining("Config from heartbeat response was same as ours.")); @@ -2620,6 +2670,8 @@ TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataSameConfig) { // TODO(dannenberg) change the name and functionality of this to match what this claims it is TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataOldConfig) { + OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); + // request heartbeat std::pair<ReplSetHeartbeatArgs, Milliseconds> request = getTopoCoord().prepareHeartbeatRequest(now()++, "rs0", HostAndPort("host2")); @@ -2635,7 +2687,8 @@ TEST_F(HeartbeatResponseHighVerbosityTest, UpdateHeartbeatDataOldConfig) { now()++, // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. HostAndPort("host2"), - StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse)); + StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse), + lastOpTimeApplied); stopCapturingLogMessages(); ASSERT_NO_ACTION(action.getAction()); ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down")); @@ -2673,7 +2726,8 @@ TEST_F(HeartbeatResponseTestOneRetry, ReconfigWhenHeartbeatResponseContainsAConf firstRequestDate() + Milliseconds(4500), // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(reconfigResponse)); + StatusWith<ReplSetHeartbeatResponse>(reconfigResponse), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::Reconfig, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate()); @@ -2699,7 +2753,8 @@ TEST_F(HeartbeatResponseTestOneRetry, StepDownRemotePrimaryWhenWeWereElectedMore firstRequestDate() + Milliseconds(4500), // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse)); + StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse), + OpTime()); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, action.getAction()); ASSERT_EQUALS(1, action.getPrimaryConfigIndex()); ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate()); @@ -2728,7 +2783,8 @@ TEST_F(HeartbeatResponseTestOneRetry, StepDownSelfWhenRemoteNodeWasElectedMoreRe firstRequestDate() + Milliseconds(4500), // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse)); + StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, action.getAction()); ASSERT_EQUALS(0, action.getPrimaryConfigIndex()); ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate()); @@ -2749,14 +2805,16 @@ TEST_F(HeartbeatResponseTestOneRetry, // acknowledge the other member so that we see a majority OpTime election = OpTime(Timestamp(400, 0), 0); OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction action = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(action.getAction()); // make sure we are electable setSelfMemberState(MemberState::RS_SECONDARY); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(election, Date_t()); ReplSetHeartbeatResponse startElectionResponse; startElectionResponse.noteReplSet(); @@ -2768,7 +2826,8 @@ TEST_F(HeartbeatResponseTestOneRetry, firstRequestDate() + Milliseconds(4500), // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(startElectionResponse)); + StatusWith<ReplSetHeartbeatResponse>(startElectionResponse), + election); ASSERT_EQUALS(HeartbeatResponseAction::StartElection, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole()); ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate()); @@ -2790,7 +2849,8 @@ TEST_F(HeartbeatResponseTestTwoRetries, NodeDoesNotRetryHeartbeatsAfterFailingTw // could still retry. Milliseconds(100), // Spent 0.1 of the 0.3 seconds in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?")); + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); // Because this is the second retry, rather than retry again, we expect to wait for the @@ -2801,8 +2861,13 @@ TEST_F(HeartbeatResponseTestTwoRetries, NodeDoesNotRetryHeartbeatsAfterFailingTw BSONObjBuilder statusBuilder; Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result"); getTopoCoord().prepareStatusResponse( - TopologyCoordinator::ReplSetStatusArgs{ - firstRequestDate() + Milliseconds(4900), 10, OpTime(), BSONObj()}, + TopologyCoordinator::ReplSetStatusArgs{firstRequestDate() + Milliseconds(4900), + 10, + OpTime(Timestamp(100, 0), 0), + OpTime(Timestamp(100, 0), 0), + OpTime(), + OpTime(), + BSONObj()}, &statusBuilder, &resultStatus); ASSERT_OK(resultStatus); @@ -2846,7 +2911,8 @@ TEST_F(HeartbeatResponseTestTwoRetries, ReconfigWhenHeartbeatResponseContainsACo firstRequestDate() + Milliseconds(4500), // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(reconfigResponse)); + StatusWith<ReplSetHeartbeatResponse>(reconfigResponse), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::Reconfig, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); ASSERT_EQUALS(firstRequestDate() + Milliseconds(6500), action.getNextHeartbeatStartDate()); @@ -2872,7 +2938,8 @@ TEST_F(HeartbeatResponseTestTwoRetries, StepDownRemotePrimaryWhenWeWereElectedMo firstRequestDate() + Milliseconds(5000), // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse)); + StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse), + OpTime()); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, action.getAction()); ASSERT_EQUALS(1, action.getPrimaryConfigIndex()); ASSERT_EQUALS(firstRequestDate() + Milliseconds(7000), action.getNextHeartbeatStartDate()); @@ -2901,7 +2968,8 @@ TEST_F(HeartbeatResponseTestTwoRetries, StepDownSelfWhenRemoteNodeWasElectedMore firstRequestDate() + Milliseconds(5000), // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse)); + StatusWith<ReplSetHeartbeatResponse>(electedMoreRecentlyResponse), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, action.getAction()); ASSERT_EQUALS(0, action.getPrimaryConfigIndex()); ASSERT_EQUALS(firstRequestDate() + Milliseconds(7000), action.getNextHeartbeatStartDate()); @@ -2922,14 +2990,16 @@ TEST_F(HeartbeatResponseTestTwoRetries, // acknowledge the other member so that we see a majority OpTime election = OpTime(Timestamp(400, 0), 0); OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction action = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(action.getAction()); // make sure we are electable setSelfMemberState(MemberState::RS_SECONDARY); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(election, Date_t()); ReplSetHeartbeatResponse startElectionResponse; startElectionResponse.noteReplSet(); @@ -2941,7 +3011,8 @@ TEST_F(HeartbeatResponseTestTwoRetries, firstRequestDate() + Milliseconds(5000), // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(startElectionResponse)); + StatusWith<ReplSetHeartbeatResponse>(startElectionResponse), + election); ASSERT_EQUALS(HeartbeatResponseAction::StartElection, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole()); ASSERT_EQUALS(firstRequestDate() + Milliseconds(7000), action.getNextHeartbeatStartDate()); @@ -2966,7 +3037,8 @@ TEST_F(HeartbeatResponseTest, NodeDoesNotRetryHeartbeatIfTheFirstFailureTakesThe // no retry allowed. Milliseconds(4990), // Spent 4.99 of the 5 seconds in the network. target, - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long")); + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -2983,7 +3055,8 @@ TEST_F(HeartbeatResponseTestOneRetry, // no retry allowed. Milliseconds(1000), // Spent 1 of the 1.01 seconds in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long")); + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -3003,11 +3076,12 @@ TEST_F(HeartbeatResponseTestTwoRetries, response.setConfigVersion(5); // successful response (third response due to the two failures in setUp()) - HeartbeatResponseAction action = - getTopoCoord().processHeartbeatResponse(firstRequestDate() + Milliseconds(4500), - Milliseconds(400), - target(), - StatusWith<ReplSetHeartbeatResponse>(response)); + HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( + firstRequestDate() + Milliseconds(4500), + Milliseconds(400), + target(), + StatusWith<ReplSetHeartbeatResponse>(response), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -3022,7 +3096,8 @@ TEST_F(HeartbeatResponseTestTwoRetries, firstRequestDate() + Milliseconds(7100), Milliseconds(400), target(), - StatusWith<ReplSetHeartbeatResponse>(Status{ErrorCodes::HostUnreachable, ""})); + StatusWith<ReplSetHeartbeatResponse>(Status{ErrorCodes::HostUnreachable, ""}), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -3031,8 +3106,13 @@ TEST_F(HeartbeatResponseTestTwoRetries, BSONObjBuilder statusBuilder; Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result"); getTopoCoord().prepareStatusResponse( - TopologyCoordinator::ReplSetStatusArgs{ - firstRequestDate() + Milliseconds(7000), 600, OpTime(), BSONObj()}, + TopologyCoordinator::ReplSetStatusArgs{firstRequestDate() + Milliseconds(7000), + 600, + OpTime(Timestamp(100, 0), 0), + OpTime(Timestamp(100, 0), 0), + OpTime(), + OpTime(), + BSONObj()}, &statusBuilder, &resultStatus); ASSERT_OK(resultStatus); @@ -3049,9 +3129,12 @@ TEST_F(HeartbeatResponseTest, UpdatePrimaryIndexWhenAHeartbeatMakesNodeAwareOfAN OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -3064,14 +3147,21 @@ TEST_F(HeartbeatResponseTest, OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_PRIMARY, election2, election); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_PRIMARY, + election2, + election, + lastOpTimeApplied); // second primary does not change primary index ASSERT_EQUALS(1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); @@ -3085,14 +3175,21 @@ TEST_F(HeartbeatResponseTest, OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_PRIMARY, election2, election); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_PRIMARY, + election2, + election, + lastOpTimeApplied); // second primary does not change primary index ASSERT_EQUALS(1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); @@ -3108,9 +3205,12 @@ TEST_F(HeartbeatResponseTest, OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); ASSERT_EQUALS(HeartbeatResponseAction::StepDownRemotePrimary, nextAction.getAction()); ASSERT_EQUALS(1, nextAction.getPrimaryConfigIndex()); @@ -3145,15 +3245,20 @@ TEST_F(HeartbeatResponseTest, UpdateHeartbeatDataStepDownPrimaryForHighPriorityF OpTime slightlyLessFreshLastOpTimeApplied = OpTime(Timestamp(3, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); nextAction = receiveUpHeartbeat(HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, - slightlyLessFreshLastOpTimeApplied); + slightlyLessFreshLastOpTimeApplied, + lastOpTimeApplied); ASSERT_EQUALS(HeartbeatResponseAction::NoAction, nextAction.getAction()); } @@ -3189,7 +3294,7 @@ TEST_F( ASSERT_EQUALS(0, getCurrentPrimaryIndex()); HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election, election); ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction()); ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex()); @@ -3203,9 +3308,8 @@ TEST_F( hbArgs.setSenderId(1); hbArgs.setSenderHost(HostAndPort("host3", 27017)); ReplSetHeartbeatResponse hbResp; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(election, Date_t()); - getTopoCoord().getMyMemberHeartbeatData()->setLastDurableOpTime(election, Date_t()); - ASSERT_OK(getTopoCoord().prepareHeartbeatResponse(now(), hbArgs, "rs0", &hbResp)); + ASSERT_OK( + getTopoCoord().prepareHeartbeatResponse(now(), hbArgs, "rs0", election, election, &hbResp)); ASSERT(!hbResp.hasIsElectable() || hbResp.isElectable()) << hbResp.toString(); } @@ -3236,9 +3340,8 @@ TEST_F(HeartbeatResponseTest, makeSelfPrimary(election.getTimestamp()); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(election, Date_t()); HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, staleTime); + HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, staleTime, election); ASSERT_NO_ACTION(nextAction.getAction()); } @@ -3268,13 +3371,12 @@ TEST_F(HeartbeatResponseTest, OpTime stale = OpTime(); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(election, Date_t()); HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election, election); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = - receiveUpHeartbeat(HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, stale); + nextAction = receiveUpHeartbeat( + HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, stale, election); ASSERT_NO_ACTION(nextAction.getAction()); } @@ -3286,9 +3388,12 @@ TEST_F(HeartbeatResponseTest, StepDownSelfWhenRemoteNodeWasElectedMoreRecently) OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_EQUALS(HeartbeatResponseAction::StepDownSelf, nextAction.getAction()); ASSERT_EQUALS(0, nextAction.getPrimaryConfigIndex()); // Doesn't actually do the stepdown until stepDownIfPending is called @@ -3308,8 +3413,12 @@ TEST_F(HeartbeatResponseTest, OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); @@ -3342,13 +3451,21 @@ TEST_F(HeartbeatResponseTest, OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); @@ -3366,13 +3483,21 @@ TEST_F(HeartbeatResponseTest, OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied); @@ -3389,8 +3514,12 @@ TEST_F(HeartbeatResponseTest, OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); @@ -3408,13 +3537,21 @@ TEST_F(HeartbeatResponseTest, OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // freeze node to set stepdown wait @@ -3449,13 +3586,21 @@ TEST_F(HeartbeatResponseTest, OpTime election = OpTime(Timestamp(400, 0), 0); OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); @@ -3472,14 +3617,21 @@ TEST_F(HeartbeatResponseTest, StartElectionWhenPrimaryIsMarkedDownAndWeAreElecta OpTime lastOpTimeApplied = OpTime(Timestamp(399, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied); @@ -3510,14 +3662,21 @@ TEST_F(HeartbeatResponseTest, NodeDoesNotStartElectionWhileAlreadyCandidate) { OID round = OID::gen(); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // candidate time! @@ -3527,8 +3686,12 @@ TEST_F(HeartbeatResponseTest, NodeDoesNotStartElectionWhileAlreadyCandidate) { ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole()); // see the downed node as SECONDARY and decide to take no action, but are still a candidate - nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); // normally this would trigger StartElection, but we are already a candidate @@ -3569,14 +3732,21 @@ TEST_F(HeartbeatResponseTest, LoseElectionWhenVotingForAnotherNodeWhileRunningTh OpTime fresherOpApplied = OpTime(Timestamp(200, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // candidate time! @@ -3597,8 +3767,8 @@ TEST_F(HeartbeatResponseTest, LoseElectionWhenVotingForAnotherNodeWhileRunningTh BSONObjBuilder freshResponseBuilder; Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - getTopoCoord().prepareFreshResponse(freshArgs, now()++, &freshResponseBuilder, &result); + getTopoCoord().prepareFreshResponse( + freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result); BSONObj response = freshResponseBuilder.obj(); ASSERT_OK(result); ASSERT_EQUALS(lastOpTimeApplied.getTimestamp(), Timestamp(response["opTime"].timestampValue())); @@ -3620,7 +3790,8 @@ TEST_F(HeartbeatResponseTest, LoseElectionWhenVotingForAnotherNodeWhileRunningTh BSONObjBuilder electResponseBuilder; result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(electArgs, now()++, &electResponseBuilder, &result); + getTopoCoord().prepareElectResponse( + electArgs, now()++, OpTime(), &electResponseBuilder, &result); stopCapturingLogMessages(); response = electResponseBuilder.obj(); ASSERT_OK(result); @@ -3636,8 +3807,12 @@ TEST_F(HeartbeatResponseTest, LoseElectionWhenVotingForAnotherNodeWhileRunningTh ASSERT_FALSE(getTopoCoord().voteForMyself(now()++)); // receive a heartbeat indicating the other node was elected - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_PRIMARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(2, getCurrentPrimaryIndex()); // make sure seeing a new primary does not change electionTime and electionId @@ -3678,14 +3853,21 @@ TEST_F(HeartbeatResponseTest, OID remoteRound = OID::gen(); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // candidate time! @@ -3704,8 +3886,8 @@ TEST_F(HeartbeatResponseTest, BSONObjBuilder freshResponseBuilder; Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - getTopoCoord().prepareFreshResponse(freshArgs, now()++, &freshResponseBuilder, &result); + getTopoCoord().prepareFreshResponse( + freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result); BSONObj response = freshResponseBuilder.obj(); ASSERT_OK(result); ASSERT_EQUALS(lastOpTimeApplied.getTimestamp(), Timestamp(response["opTime"].timestampValue())); @@ -3732,7 +3914,8 @@ TEST_F(HeartbeatResponseTest, BSONObjBuilder electResponseBuilder; result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(electArgs, now()++, &electResponseBuilder, &result); + getTopoCoord().prepareElectResponse( + electArgs, now()++, OpTime(), &electResponseBuilder, &result); stopCapturingLogMessages(); response = electResponseBuilder.obj(); ASSERT_OK(result); @@ -3768,14 +3951,21 @@ TEST_F(HeartbeatResponseTest, OID remoteRound = OID::gen(); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // candidate time! @@ -3801,8 +3991,8 @@ TEST_F(HeartbeatResponseTest, BSONObjBuilder freshResponseBuilder; Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - getTopoCoord().prepareFreshResponse(freshArgs, now()++, &freshResponseBuilder, &result); + getTopoCoord().prepareFreshResponse( + freshArgs, now()++, lastOpTimeApplied, &freshResponseBuilder, &result); BSONObj response = freshResponseBuilder.obj(); ASSERT_OK(result); ASSERT_EQUALS(lastOpTimeApplied.getTimestamp(), Timestamp(response["opTime"].timestampValue())); @@ -3821,7 +4011,8 @@ TEST_F(HeartbeatResponseTest, BSONObjBuilder electResponseBuilder; result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(electArgs, now()++, &electResponseBuilder, &result); + getTopoCoord().prepareElectResponse( + electArgs, now()++, OpTime(), &electResponseBuilder, &result); stopCapturingLogMessages(); response = electResponseBuilder.obj(); ASSERT_OK(result); @@ -3878,9 +4069,12 @@ TEST_F(HeartbeatResponseTest, OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); @@ -3894,8 +4088,12 @@ TEST_F(HeartbeatResponseTest, ASSERT_NO_ACTION(nextAction.getAction()); nextAction = receiveDownHeartbeat(HostAndPort("host6"), "rs0", lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host7"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host7"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied); @@ -3978,7 +4176,8 @@ TEST_F(PrepareElectResponseTest, RespondNegativelyWhenElectCommandHasTheWrongRep BSONObjBuilder responseBuilder; Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(args, now += Seconds(60), &responseBuilder, &result); + getTopoCoord().prepareElectResponse( + args, now += Seconds(60), OpTime(), &responseBuilder, &result); stopCapturingLogMessages(); BSONObj response = responseBuilder.obj(); ASSERT_OK(result); @@ -3991,7 +4190,7 @@ TEST_F(PrepareElectResponseTest, RespondNegativelyWhenElectCommandHasTheWrongRep // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would) args.set = "rs0"; BSONObjBuilder responseBuilder2; - getTopoCoord().prepareElectResponse(args, now++, &responseBuilder2, &result); + getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result); BSONObj response2 = responseBuilder2.obj(); ASSERT_EQUALS(1, response2["vote"].Int()); ASSERT_EQUALS(round, response2["round"].OID()); @@ -4008,7 +4207,8 @@ TEST_F(PrepareElectResponseTest, RespondNegativelyWhenElectCommandHasANewerConfi BSONObjBuilder responseBuilder; Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(args, now += Seconds(60), &responseBuilder, &result); + getTopoCoord().prepareElectResponse( + args, now += Seconds(60), OpTime(), &responseBuilder, &result); stopCapturingLogMessages(); BSONObj response = responseBuilder.obj(); ASSERT_OK(result); @@ -4019,7 +4219,7 @@ TEST_F(PrepareElectResponseTest, RespondNegativelyWhenElectCommandHasANewerConfi // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would) args.cfgver = 10; BSONObjBuilder responseBuilder2; - getTopoCoord().prepareElectResponse(args, now++, &responseBuilder2, &result); + getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result); BSONObj response2 = responseBuilder2.obj(); ASSERT_EQUALS(1, response2["vote"].Int()); ASSERT_EQUALS(round, response2["round"].OID()); @@ -4036,7 +4236,8 @@ TEST_F(PrepareElectResponseTest, RespondWithAVetoWhenElectCommandHasAnOlderConfi BSONObjBuilder responseBuilder; Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(args, now += Seconds(60), &responseBuilder, &result); + getTopoCoord().prepareElectResponse( + args, now += Seconds(60), OpTime(), &responseBuilder, &result); stopCapturingLogMessages(); BSONObj response = responseBuilder.obj(); ASSERT_OK(result); @@ -4047,7 +4248,7 @@ TEST_F(PrepareElectResponseTest, RespondWithAVetoWhenElectCommandHasAnOlderConfi // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would) args.cfgver = 10; BSONObjBuilder responseBuilder2; - getTopoCoord().prepareElectResponse(args, now++, &responseBuilder2, &result); + getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result); BSONObj response2 = responseBuilder2.obj(); ASSERT_EQUALS(1, response2["vote"].Int()); ASSERT_EQUALS(round, response2["round"].OID()); @@ -4064,7 +4265,8 @@ TEST_F(PrepareElectResponseTest, RespondWithAVetoWhenElectCommandHasANonExistent BSONObjBuilder responseBuilder; Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(args, now += Seconds(60), &responseBuilder, &result); + getTopoCoord().prepareElectResponse( + args, now += Seconds(60), OpTime(), &responseBuilder, &result); stopCapturingLogMessages(); BSONObj response = responseBuilder.obj(); ASSERT_OK(result); @@ -4075,7 +4277,7 @@ TEST_F(PrepareElectResponseTest, RespondWithAVetoWhenElectCommandHasANonExistent // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would) args.whoid = 1; BSONObjBuilder responseBuilder2; - getTopoCoord().prepareElectResponse(args, now++, &responseBuilder2, &result); + getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result); BSONObj response2 = responseBuilder2.obj(); ASSERT_EQUALS(1, response2["vote"].Int()); ASSERT_EQUALS(round, response2["round"].OID()); @@ -4094,7 +4296,8 @@ TEST_F(PrepareElectResponseTest, RespondWithAVetoWhenElectCommandIsReceivedByPri BSONObjBuilder responseBuilder; Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(args, now += Seconds(60), &responseBuilder, &result); + getTopoCoord().prepareElectResponse( + args, now += Seconds(60), OpTime(), &responseBuilder, &result); stopCapturingLogMessages(); BSONObj response = responseBuilder.obj(); ASSERT_OK(result); @@ -4105,7 +4308,7 @@ TEST_F(PrepareElectResponseTest, RespondWithAVetoWhenElectCommandIsReceivedByPri // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would) getTopoCoord()._setCurrentPrimaryForTest(-1); BSONObjBuilder responseBuilder2; - getTopoCoord().prepareElectResponse(args, now++, &responseBuilder2, &result); + getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result); BSONObj response2 = responseBuilder2.obj(); ASSERT_EQUALS(1, response2["vote"].Int()); ASSERT_EQUALS(round, response2["round"].OID()); @@ -4123,7 +4326,8 @@ TEST_F(PrepareElectResponseTest, RespondWithAVetoWhenElectCommandIsReceivedWhile BSONObjBuilder responseBuilder; Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(args, now += Seconds(60), &responseBuilder, &result); + getTopoCoord().prepareElectResponse( + args, now += Seconds(60), OpTime(), &responseBuilder, &result); stopCapturingLogMessages(); BSONObj response = responseBuilder.obj(); ASSERT_OK(result); @@ -4134,7 +4338,7 @@ TEST_F(PrepareElectResponseTest, RespondWithAVetoWhenElectCommandIsReceivedWhile // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would) getTopoCoord()._setCurrentPrimaryForTest(-1); BSONObjBuilder responseBuilder2; - getTopoCoord().prepareElectResponse(args, now++, &responseBuilder2, &result); + getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result); BSONObj response2 = responseBuilder2.obj(); ASSERT_EQUALS(1, response2["vote"].Int()); ASSERT_EQUALS(round, response2["round"].OID()); @@ -4153,7 +4357,8 @@ TEST_F(PrepareElectResponseTest, RespondWithAVetoWhenAHigherPriorityNodeExistsDu BSONObjBuilder responseBuilder; Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(args, now += Seconds(60), &responseBuilder, &result); + getTopoCoord().prepareElectResponse( + args, now += Seconds(60), OpTime(), &responseBuilder, &result); stopCapturingLogMessages(); BSONObj response = responseBuilder.obj(); ASSERT_OK(result); @@ -4164,7 +4369,7 @@ TEST_F(PrepareElectResponseTest, RespondWithAVetoWhenAHigherPriorityNodeExistsDu // Make sure nay votes, do not prevent subsequent yeas (the way a yea vote would) args.whoid = 3; BSONObjBuilder responseBuilder2; - getTopoCoord().prepareElectResponse(args, now++, &responseBuilder2, &result); + getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder2, &result); BSONObj response2 = responseBuilder2.obj(); ASSERT_EQUALS(1, response2["vote"].Int()); ASSERT_EQUALS(round, response2["round"].OID()); @@ -4185,7 +4390,8 @@ TEST_F(PrepareElectResponseTest, RespondPositivelyWhenElectCommandComesFromHighe BSONObjBuilder responseBuilder; Status result = Status::OK(); startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(args, now += Seconds(60), &responseBuilder, &result); + getTopoCoord().prepareElectResponse( + args, now += Seconds(60), OpTime(), &responseBuilder, &result); stopCapturingLogMessages(); BSONObj response = responseBuilder.obj(); ASSERT_EQUALS(1, response["vote"].Int()); @@ -4205,7 +4411,8 @@ TEST_F(PrepareElectResponseTest, BSONObjBuilder responseBuilder1; Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(args, now += Seconds(60), &responseBuilder1, &result); + getTopoCoord().prepareElectResponse( + args, now += Seconds(60), OpTime(), &responseBuilder1, &result); stopCapturingLogMessages(); BSONObj response1 = responseBuilder1.obj(); ASSERT_OK(result); @@ -4218,7 +4425,7 @@ TEST_F(PrepareElectResponseTest, BSONObjBuilder responseBuilder2; startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(args, now, &responseBuilder2, &result); + getTopoCoord().prepareElectResponse(args, now, OpTime(), &responseBuilder2, &result); stopCapturingLogMessages(); BSONObj response2 = responseBuilder2.obj(); ASSERT_OK(result); @@ -4233,7 +4440,7 @@ TEST_F(PrepareElectResponseTest, BSONObjBuilder responseBuilder3; startCapturingLogMessages(); - getTopoCoord().prepareElectResponse(args, now++, &responseBuilder3, &result); + getTopoCoord().prepareElectResponse(args, now++, OpTime(), &responseBuilder3, &result); stopCapturingLogMessages(); BSONObj response3 = responseBuilder3.obj(); ASSERT_OK(result); @@ -4269,7 +4476,7 @@ TEST_F(TopoCoordTest, NodeReturnsReplicaSetNotFoundWhenReceivingElectCommandWhil ReplicationCoordinator::ReplSetElectArgs args; BSONObjBuilder response; Status status = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); - getTopoCoord().prepareElectResponse(args, now(), &response, &status); + getTopoCoord().prepareElectResponse(args, now(), OpTime(), &response, &status); ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status); ASSERT_EQUALS("Cannot participate in election because not initialized", status.reason()); } @@ -4278,7 +4485,7 @@ TEST_F(TopoCoordTest, NodeReturnsReplicaSetNotFoundWhenReceivingElectCommandWhil ReplicationCoordinator::ReplSetElectArgs args; BSONObjBuilder response; Status status = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); - getTopoCoord().prepareElectResponse(args, now(), &response, &status); + getTopoCoord().prepareElectResponse(args, now(), OpTime(), &response, &status); ASSERT_EQUALS(ErrorCodes::ReplicaSetNotFound, status); ASSERT_EQUALS("Cannot participate in election because not initialized", status.reason()); } @@ -4434,9 +4641,8 @@ public: OpTime lastOpApplied, ReplSetHeartbeatResponse* response, Status* result) { - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpApplied, Date_t()); - getTopoCoord().getMyMemberHeartbeatData()->setLastDurableOpTime(lastOpApplied, Date_t()); - *result = getTopoCoord().prepareHeartbeatResponse(now()++, args, "rs0", response); + *result = getTopoCoord().prepareHeartbeatResponse( + now()++, args, "rs0", lastOpApplied, lastOpApplied, response); } }; @@ -4649,7 +4855,8 @@ TEST_F(TopoCoordTest, SetConfigVersionToNegativeTwoInHeartbeatResponseWhenNoConf args.setSenderId(20); ReplSetHeartbeatResponse response; // prepare response and check the results - Status result = getTopoCoord().prepareHeartbeatResponse(now()++, args, "rs0", &response); + Status result = getTopoCoord().prepareHeartbeatResponse( + now()++, args, "rs0", OpTime(), OpTime(), &response); ASSERT_OK(result); ASSERT_FALSE(response.isElectable()); ASSERT_TRUE(response.isReplSet()); @@ -4758,7 +4965,7 @@ TEST_F(TopoCoordTest, BecomeCandidateWhenReconfigToBeElectableInSingleNodeSet) { << "hself" << "priority" << 0)))); - getTopoCoord().updateConfig(cfg, 0, now()++); + getTopoCoord().updateConfig(cfg, 0, now()++, OpTime()); ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s); ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole()); @@ -4793,7 +5000,7 @@ TEST_F(TopoCoordTest, NodeDoesNotBecomeCandidateWhenBecomingSecondaryInSingleNod << "priority" << 0)))); - getTopoCoord().updateConfig(cfg, 0, now()++); + getTopoCoord().updateConfig(cfg, 0, now()++, OpTime()); ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s); // despite being the only node, we are unelectable, so we should not become a candidate @@ -5058,12 +5265,20 @@ TEST_F(HeartbeatResponseTest, ReconfigBetweenHeartbeatRequestAndRepsonse) { // all three members up and secondaries setSelfMemberState(MemberState::RS_SECONDARY); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_PRIMARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // now request from host3 and receive after host2 has been removed via reconfig @@ -5086,7 +5301,7 @@ TEST_F(HeartbeatResponseTest, ReconfigBetweenHeartbeatRequestAndRepsonse) { hb.setElectionTime(election.getTimestamp()); StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( - now()++, Milliseconds(0), HostAndPort("host3"), hbResponse); + now()++, Milliseconds(0), HostAndPort("host3"), hbResponse, lastOpTimeApplied); // now primary should be host3, index 1, and we should perform NoAction in response ASSERT_EQUALS(1, getCurrentPrimaryIndex()); @@ -5101,12 +5316,20 @@ TEST_F(HeartbeatResponseTest, ReconfigNodeRemovedBetweenHeartbeatRequestAndRepso // all three members up and secondaries setSelfMemberState(MemberState::RS_SECONDARY); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_PRIMARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // now request from host3 and receive after host2 has been removed via reconfig @@ -5129,7 +5352,7 @@ TEST_F(HeartbeatResponseTest, ReconfigNodeRemovedBetweenHeartbeatRequestAndRepso hb.setElectionTime(election.getTimestamp()); StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( - now()++, Milliseconds(0), HostAndPort("host3"), hbResponse); + now()++, Milliseconds(0), HostAndPort("host3"), hbResponse, lastOpTimeApplied); // primary should not be set and we should perform NoAction in response ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); @@ -5141,7 +5364,7 @@ TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceWhenMemberNotInConfig) { // "host4" since "host4" is absent from the config of version 10. ReplSetMetadata metadata(0, OpTime(), OpTime(), 10, OID(), -1, -1); ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host4"), metadata, makeOplogQueryMetadata(), now())); + HostAndPort("host4"), OpTime(), metadata, makeOplogQueryMetadata(), now())); } TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceWhenMemberHasYetToHeartbeatUs) { @@ -5149,7 +5372,7 @@ TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceWhenMemberHasYetToHeartbeatU // "host2" since we do not yet have a heartbeat (and as a result do not yet have an optime) // for "host2" ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); + HostAndPort("host2"), OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); } TEST_F(HeartbeatResponseTest, ShouldNotChangeSyncSourceWhenNodeIsFreshByHeartbeatButNotMetadata) { @@ -5161,17 +5384,26 @@ TEST_F(HeartbeatResponseTest, ShouldNotChangeSyncSourceWhenNodeIsFreshByHeartbea // ahead by more than maxSyncSourceLagSecs (30) OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // set up complete, time for actual check startCapturingLogMessages(); ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(lastOpTimeApplied), now())); @@ -5180,8 +5412,11 @@ TEST_F(HeartbeatResponseTest, ShouldNotChangeSyncSourceWhenNodeIsFreshByHeartbea // set up complete, time for actual check startCapturingLogMessages(); - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(lastOpTimeApplied), boost::none, now())); + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), + makeReplSetMetadata(lastOpTimeApplied), + boost::none, + now())); stopCapturingLogMessages(); ASSERT_EQUALS(0, countLogLinesContaining("Choosing new sync source")); } @@ -5195,18 +5430,27 @@ TEST_F(HeartbeatResponseTest, ShouldNotChangeSyncSourceWhenNodeIsStaleByHeartbea // ahead by more than maxSyncSourceLagSecs (30) OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // set up complete, time for actual check startCapturingLogMessages(); ASSERT_FALSE( getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(fresherLastOpTimeApplied), now())); @@ -5215,8 +5459,12 @@ TEST_F(HeartbeatResponseTest, ShouldNotChangeSyncSourceWhenNodeIsStaleByHeartbea // set up complete, time for actual check startCapturingLogMessages(); - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(fresherLastOpTimeApplied), boost::none, now())); + ASSERT_FALSE( + getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), + makeReplSetMetadata(fresherLastOpTimeApplied), + boost::none, + now())); stopCapturingLogMessages(); ASSERT_EQUALS(0, countLogLinesContaining("Choosing new sync source")); } @@ -5229,18 +5477,26 @@ TEST_F(HeartbeatResponseTest, ShouldChangeSyncSourceWhenFresherMemberExists) { // ahead by more than maxSyncSourceLagSecs (30) OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // set up complete, time for actual check startCapturingLogMessages(); ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); + HostAndPort("host2"), OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); stopCapturingLogMessages(); ASSERT_EQUALS(1, countLogLinesContaining("Choosing new sync source")); } @@ -5255,29 +5511,37 @@ TEST_F(HeartbeatResponseTest, ShouldNotChangeSyncSourceWhileFresherMemberIsBlack // ahead by more than maxSyncSourceLagSecs (30) OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); getTopoCoord().blacklistSyncSource(HostAndPort("host3"), now() + Milliseconds(100)); // set up complete, time for actual check ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); + HostAndPort("host2"), OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); // unblacklist with too early a time (node should remained blacklisted) getTopoCoord().unblacklistSyncSource(HostAndPort("host3"), now() + Milliseconds(90)); ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); + HostAndPort("host2"), OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); // unblacklist and it should succeed getTopoCoord().unblacklistSyncSource(HostAndPort("host3"), now() + Milliseconds(100)); startCapturingLogMessages(); ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); + HostAndPort("host2"), OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); stopCapturingLogMessages(); ASSERT_EQUALS(1, countLogLinesContaining("Choosing new sync source")); } @@ -5291,19 +5555,27 @@ TEST_F(HeartbeatResponseTest, ShouldNotChangeSyncSourceWhenFresherMemberIsDown) // ahead by more than maxSyncSourceLagSecs (30) OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // set up complete, time for actual check nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); + HostAndPort("host2"), OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); } TEST_F(HeartbeatResponseTest, ShouldNotChangeSyncSourceWhenFresherMemberIsNotReadable) { @@ -5315,17 +5587,25 @@ TEST_F(HeartbeatResponseTest, ShouldNotChangeSyncSourceWhenFresherMemberIsNotRea // ahead by more than maxSyncSourceLagSecs (30) OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_ROLLBACK, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_ROLLBACK, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // set up complete, time for actual check ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); + HostAndPort("host2"), OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); } TEST_F(HeartbeatResponseTest, ShouldNotChangeSyncSourceWhenFresherMemberDoesNotBuildIndexes) { @@ -5353,16 +5633,24 @@ TEST_F(HeartbeatResponseTest, ShouldNotChangeSyncSourceWhenFresherMemberDoesNotB << "priority" << 0))), 0); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // set up complete, time for actual check ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); + HostAndPort("host2"), OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); } TEST_F(HeartbeatResponseTest, @@ -5395,17 +5683,25 @@ TEST_F(HeartbeatResponseTest, << "priority" << 0))), 0); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // set up complete, time for actual check startCapturingLogMessages(); ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); + HostAndPort("host2"), OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); stopCapturingLogMessages(); ASSERT_EQUALS(1, countLogLinesContaining("Choosing new sync source")); } @@ -5427,7 +5723,7 @@ TEST_F(TopoCoordTest, ShouldNotStandForElectionWhileAwareOfPrimary) { heartbeatFromMember( HostAndPort("h2"), "rs0", MemberState::RS_PRIMARY, OpTime(Timestamp(1, 0), 0)); - const auto status = getTopoCoord().checkShouldStandForElection(now()++); + const auto status = getTopoCoord().checkShouldStandForElection(now()++, OpTime()); ASSERT_EQ(ErrorCodes::NodeNotElectable, status); ASSERT_STRING_CONTAINS(status.reason(), "there is a Primary"); } @@ -5449,7 +5745,8 @@ TEST_F(TopoCoordTest, ShouldNotStandForElectionWhileTooStale) { heartbeatFromMember( HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(10000, 0), 0)); - const auto status = getTopoCoord().checkShouldStandForElection(now()++); + const auto status = + getTopoCoord().checkShouldStandForElection(now()++, OpTime(Timestamp(100, 0), 0)); ASSERT_EQ(ErrorCodes::NodeNotElectable, status); ASSERT_STRING_CONTAINS(status.reason(), "my last optime is"); } @@ -5490,7 +5787,8 @@ TEST_F(TopoCoordTest, NodeReturnsArbiterWhenGetMemberStateRunsAgainstArbiter) { } TEST_F(TopoCoordTest, ShouldNotStandForElectionWhileRemovedFromTheConfig) { - const auto status = getTopoCoord().checkShouldStandForElection(now()++); + const auto status = + getTopoCoord().checkShouldStandForElection(now()++, OpTime(Timestamp(10, 0), 0)); ASSERT_EQ(ErrorCodes::NodeNotElectable, status); ASSERT_STRING_CONTAINS(status.reason(), "not a member of a valid replica set config"); } @@ -5524,14 +5822,16 @@ TEST_F(TopoCoordTest, ShouldNotStandForElectionWhenAPositiveResponseWasGivenInTh now() += Seconds(30); BSONObjBuilder electResponseBuilder; Status result = Status(ErrorCodes::InternalError, "status not set by prepareElectResponse"); - getTopoCoord().prepareElectResponse(electArgs, now()++, &electResponseBuilder, &result); + getTopoCoord().prepareElectResponse( + electArgs, now()++, OpTime(Timestamp(100, 0), 0), &electResponseBuilder, &result); BSONObj response = electResponseBuilder.obj(); ASSERT_OK(result); std::cout << response; ASSERT_EQUALS(1, response["vote"].Int()); ASSERT_EQUALS(remoteRound, response["round"].OID()); - const auto status = getTopoCoord().checkShouldStandForElection(now()++); + const auto status = + getTopoCoord().checkShouldStandForElection(now()++, OpTime(Timestamp(10, 0), 0)); ASSERT_EQ(ErrorCodes::NodeNotElectable, status); ASSERT_STRING_CONTAINS(status.reason(), "I recently voted for "); } @@ -5563,8 +5863,9 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVotesToTwoDifferentNodesInTheSameTerm) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("", response.getReason()); ASSERT_TRUE(response.getVoteGranted()); @@ -5583,7 +5884,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVotesToTwoDifferentNodesInTheSameTerm) { ReplSetRequestVotesResponse response2; // different candidate same term, should be a problem - getTopoCoord().processReplSetRequestVotes(args2, &response2); + getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime); ASSERT_EQUALS("already voted for another candidate this term", response2.getReason()); ASSERT_FALSE(response2.getVoteGranted()); } @@ -5618,8 +5919,9 @@ TEST_F(TopoCoordTest, DryRunVoteRequestShouldNotPreventSubsequentDryRunsForThatT << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("", response.getReason()); ASSERT_TRUE(response.getVoteGranted()); @@ -5640,7 +5942,7 @@ TEST_F(TopoCoordTest, DryRunVoteRequestShouldNotPreventSubsequentDryRunsForThatT << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response2; - getTopoCoord().processReplSetRequestVotes(args2, &response2); + getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime); ASSERT_EQUALS("", response2.getReason()); ASSERT_TRUE(response2.getVoteGranted()); } @@ -5675,8 +5977,9 @@ TEST_F(TopoCoordTest, VoteRequestShouldNotPreventDryRunsForThatTerm) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("", response.getReason()); ASSERT_TRUE(response.getVoteGranted()); @@ -5697,7 +6000,7 @@ TEST_F(TopoCoordTest, VoteRequestShouldNotPreventDryRunsForThatTerm) { << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response2; - getTopoCoord().processReplSetRequestVotes(args2, &response2); + getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime); ASSERT_EQUALS("already voted for another candidate this term", response2.getReason()); ASSERT_FALSE(response2.getVoteGranted()); } @@ -5730,8 +6033,9 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenReplSetNameDoesNotMatch) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("candidate's set name differs from mine", response.getReason()); ASSERT_FALSE(response.getVoteGranted()); } @@ -5764,8 +6068,9 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenConfigVersionDoesNotMatch) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("candidate's config version differs from mine", response.getReason()); ASSERT_FALSE(response.getVoteGranted()); } @@ -5810,8 +6115,9 @@ TEST_F(TopoCoordTest, ArbiterDoesNotGrantVoteWhenItCanSeeAHealthyPrimaryOfEqualO << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("can see a healthy primary of equal or greater priority", response.getReason()); ASSERT_FALSE(response.getVoteGranted()); } @@ -5848,8 +6154,9 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenTermIsStale) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("candidate's term is lower than mine", response.getReason()); ASSERT_EQUALS(2, response.getTerm()); ASSERT_FALSE(response.getVoteGranted()); @@ -5884,10 +6191,9 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenOpTimeIsStale) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime2 = {Timestamp(20, 0), 0}; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime({Timestamp(20, 0), 0}, - Date_t()); - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime2); ASSERT_EQUALS("candidate's data is staler than mine", response.getReason()); ASSERT_FALSE(response.getVoteGranted()); } @@ -5923,8 +6229,10 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenReplSetNameDoesNotMatch) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse responseForRealVote; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(argsForRealVote, &responseForRealVote); + getTopoCoord().processReplSetRequestVotes( + argsForRealVote, &responseForRealVote, lastAppliedOpTime); ASSERT_EQUALS("", responseForRealVote.getReason()); ASSERT_TRUE(responseForRealVote.getVoteGranted()); @@ -5945,7 +6253,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenReplSetNameDoesNotMatch) { << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("candidate's set name differs from mine", response.getReason()); ASSERT_EQUALS(1, response.getTerm()); ASSERT_FALSE(response.getVoteGranted()); @@ -5982,8 +6290,10 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenConfigVersionDoesNotMatch) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse responseForRealVote; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(argsForRealVote, &responseForRealVote); + getTopoCoord().processReplSetRequestVotes( + argsForRealVote, &responseForRealVote, lastAppliedOpTime); ASSERT_EQUALS("", responseForRealVote.getReason()); ASSERT_TRUE(responseForRealVote.getVoteGranted()); @@ -6004,7 +6314,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenConfigVersionDoesNotMatch) { << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("candidate's config version differs from mine", response.getReason()); ASSERT_EQUALS(1, response.getTerm()); ASSERT_FALSE(response.getVoteGranted()); @@ -6041,8 +6351,10 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenTermIsStale) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse responseForRealVote; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(argsForRealVote, &responseForRealVote); + getTopoCoord().processReplSetRequestVotes( + argsForRealVote, &responseForRealVote, lastAppliedOpTime); ASSERT_EQUALS("", responseForRealVote.getReason()); ASSERT_TRUE(responseForRealVote.getVoteGranted()); @@ -6062,7 +6374,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenTermIsStale) { << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("candidate's term is lower than mine", response.getReason()); ASSERT_EQUALS(1, response.getTerm()); ASSERT_FALSE(response.getVoteGranted()); @@ -6099,8 +6411,10 @@ TEST_F(TopoCoordTest, GrantDryRunVoteEvenWhenTermHasBeenSeen) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse responseForRealVote; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(argsForRealVote, &responseForRealVote); + getTopoCoord().processReplSetRequestVotes( + argsForRealVote, &responseForRealVote, lastAppliedOpTime); ASSERT_EQUALS("", responseForRealVote.getReason()); ASSERT_TRUE(responseForRealVote.getVoteGranted()); @@ -6121,7 +6435,7 @@ TEST_F(TopoCoordTest, GrantDryRunVoteEvenWhenTermHasBeenSeen) { << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("", response.getReason()); ASSERT_EQUALS(1, response.getTerm()); ASSERT_TRUE(response.getVoteGranted()); @@ -6158,8 +6472,10 @@ TEST_F(TopoCoordTest, DoNotGrantDryRunVoteWhenOpTimeIsStale) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse responseForRealVote; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(argsForRealVote, &responseForRealVote); + getTopoCoord().processReplSetRequestVotes( + argsForRealVote, &responseForRealVote, lastAppliedOpTime); ASSERT_EQUALS("", responseForRealVote.getReason()); ASSERT_TRUE(responseForRealVote.getVoteGranted()); @@ -6179,10 +6495,9 @@ TEST_F(TopoCoordTest, DoNotGrantDryRunVoteWhenOpTimeIsStale) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime2 = {Timestamp(20, 0), 0}; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime({Timestamp(20, 0), 0}, - Date_t()); - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime2); ASSERT_EQUALS("candidate's data is staler than mine", response.getReason()); ASSERT_EQUALS(1, response.getTerm()); ASSERT_FALSE(response.getVoteGranted()); diff --git a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp index 60445499198..f1026f6110c 100644 --- a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp @@ -129,7 +129,10 @@ protected: // Update config and set selfIndex // If "now" is passed in, set _now to now+1 - void updateConfig(BSONObj cfg, int selfIndex, Date_t now = Date_t::fromMillisSinceEpoch(-1)) { + void updateConfig(BSONObj cfg, + int selfIndex, + Date_t now = Date_t::fromMillisSinceEpoch(-1), + const OpTime& lastOp = OpTime()) { ReplSetConfig config; // Use Protocol version 1 by default. ASSERT_OK(config.initialize(cfg, true)); @@ -138,11 +141,11 @@ protected: _selfIndex = selfIndex; if (now == Date_t::fromMillisSinceEpoch(-1)) { - getTopoCoord().updateConfig(config, selfIndex, _now); + getTopoCoord().updateConfig(config, selfIndex, _now, lastOp); _now += Milliseconds(1); } else { invariant(now > _now); - getTopoCoord().updateConfig(config, selfIndex, now); + getTopoCoord().updateConfig(config, selfIndex, now, lastOp); _now = now + Milliseconds(1); } @@ -175,19 +178,22 @@ protected: const std::string& setName, MemberState memberState, const OpTime& electionTime, - const OpTime& lastOpTimeSender) { + const OpTime& lastOpTimeSender, + const OpTime& lastOpTimeReceiver) { return _receiveHeartbeatHelper(Status::OK(), member, setName, memberState, electionTime.getTimestamp(), lastOpTimeSender, + lastOpTimeReceiver, Milliseconds(1)); } HeartbeatResponseAction receiveDownHeartbeat( const HostAndPort& member, const std::string& setName, + const OpTime& lastOpTimeReceiver, ErrorCodes::Error errcode = ErrorCodes::HostUnreachable) { // timed out heartbeat to mark a node as down @@ -198,6 +204,7 @@ protected: MemberState::RS_UNKNOWN, Timestamp(), OpTime(), + lastOpTimeReceiver, roundTripTime); } @@ -212,6 +219,7 @@ protected: memberState, Timestamp(), lastOpTimeSender, + OpTime(), roundTripTime); } @@ -222,6 +230,7 @@ private: MemberState memberState, Timestamp electionTime, const OpTime& lastOpTimeSender, + const OpTime& lastOpTimeReceiver, Milliseconds roundTripTime) { ReplSetHeartbeatResponse hb; hb.setConfigVersion(1); @@ -237,7 +246,8 @@ private: getTopoCoord().prepareHeartbeatRequestV1(now(), setName, member); now() += roundTripTime; - return getTopoCoord().processHeartbeatResponse(now(), roundTripTime, member, hbResponse); + return getTopoCoord().processHeartbeatResponse( + now(), roundTripTime, member, hbResponse, lastOpTimeReceiver); } private: @@ -316,7 +326,7 @@ TEST_F(TopoCoordTest, NodeReturnsSecondaryWithMostRecentDataAsSyncSource) { ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress()); // h3 goes down - receiveDownHeartbeat(HostAndPort("h3"), "rs0"); + receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime()); getTopoCoord().chooseNewSyncSource( now()++, OpTime(), TopologyCoordinator::ChainingPreference::kUseConfiguration); ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress()); @@ -469,33 +479,33 @@ TEST_F(TopoCoordTest, NodeReturnsClosestValidSyncSourceAsSyncSource) { ASSERT_EQUALS(HostAndPort("h4"), getTopoCoord().getSyncSourceAddress()); // h4 goes down; should choose h1 - receiveDownHeartbeat(HostAndPort("h4"), "rs0"); + receiveDownHeartbeat(HostAndPort("h4"), "rs0", OpTime()); getTopoCoord().chooseNewSyncSource( now()++, lastOpTimeWeApplied, TopologyCoordinator::ChainingPreference::kUseConfiguration); ASSERT_EQUALS(HostAndPort("h1"), getTopoCoord().getSyncSourceAddress()); // Primary and h1 go down; should choose h6 - receiveDownHeartbeat(HostAndPort("h1"), "rs0"); - receiveDownHeartbeat(HostAndPort("hprimary"), "rs0"); + receiveDownHeartbeat(HostAndPort("h1"), "rs0", OpTime()); + receiveDownHeartbeat(HostAndPort("hprimary"), "rs0", OpTime()); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); getTopoCoord().chooseNewSyncSource( now()++, lastOpTimeWeApplied, TopologyCoordinator::ChainingPreference::kUseConfiguration); ASSERT_EQUALS(HostAndPort("h6"), getTopoCoord().getSyncSourceAddress()); // h6 goes down; should choose h5 - receiveDownHeartbeat(HostAndPort("h6"), "rs0"); + receiveDownHeartbeat(HostAndPort("h6"), "rs0", OpTime()); getTopoCoord().chooseNewSyncSource( now()++, lastOpTimeWeApplied, TopologyCoordinator::ChainingPreference::kUseConfiguration); ASSERT_EQUALS(HostAndPort("h5"), getTopoCoord().getSyncSourceAddress()); // h5 goes down; should choose h3 - receiveDownHeartbeat(HostAndPort("h5"), "rs0"); + receiveDownHeartbeat(HostAndPort("h5"), "rs0", OpTime()); getTopoCoord().chooseNewSyncSource( now()++, lastOpTimeWeApplied, TopologyCoordinator::ChainingPreference::kUseConfiguration); ASSERT_EQUALS(HostAndPort("h3"), getTopoCoord().getSyncSourceAddress()); // h3 goes down; no sync source candidates remain - receiveDownHeartbeat(HostAndPort("h3"), "rs0"); + receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime()); getTopoCoord().chooseNewSyncSource( now()++, lastOpTimeWeApplied, TopologyCoordinator::ChainingPreference::kUseConfiguration); ASSERT(getTopoCoord().getSyncSourceAddress().empty()); @@ -546,7 +556,7 @@ TEST_F(TopoCoordTest, NodeWontChooseSyncSourceFromOlderTerm) { ASSERT_EQUALS(HostAndPort("h1"), getTopoCoord().getSyncSourceAddress()); // h1 goes down; no sync source candidates remain - receiveDownHeartbeat(HostAndPort("h1"), "rs0"); + receiveDownHeartbeat(HostAndPort("h1"), "rs0", OpTime()); getTopoCoord().chooseNewSyncSource( now()++, lastOpTimeWeApplied, TopologyCoordinator::ChainingPreference::kUseConfiguration); ASSERT(getTopoCoord().getSyncSourceAddress().empty()); @@ -772,14 +782,20 @@ TEST_F(TopoCoordTest, ChooseRequestedSyncSourceOnlyTheFirstTimeAfterTheSyncSourc getTopoCoord().setForceSyncSourceIndex(1); // force should cause shouldChangeSyncSource() to return true // even if the currentSource is the force target + ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("h2"), + OpTime(), + makeReplSetMetadata(), + makeOplogQueryMetadata(oldOpTime), + now())); + ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("h3"), + OpTime(), + makeReplSetMetadata(), + makeOplogQueryMetadata(newOpTime), + now())); ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("h2"), makeReplSetMetadata(), makeOplogQueryMetadata(oldOpTime), now())); - ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("h3"), makeReplSetMetadata(), makeOplogQueryMetadata(newOpTime), now())); - ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("h2"), makeReplSetMetadata(oldOpTime), boost::none, now())); + HostAndPort("h2"), OpTime(), makeReplSetMetadata(oldOpTime), boost::none, now())); ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("h3"), makeReplSetMetadata(newOpTime), boost::none, now())); + HostAndPort("h3"), OpTime(), makeReplSetMetadata(newOpTime), boost::none, now())); getTopoCoord().chooseNewSyncSource( now()++, OpTime(), TopologyCoordinator::ChainingPreference::kUseConfiguration); ASSERT_EQUALS(HostAndPort("h2"), getTopoCoord().getSyncSourceAddress()); @@ -949,8 +965,8 @@ TEST_F(TopoCoordTest, NodeChangesToRecoveringWhenOnlyUnauthorizedNodesAreUp) { // Good state setup done // Mark nodes down, ensure that we have no source and are secondary - receiveDownHeartbeat(HostAndPort("h2"), "rs0", ErrorCodes::NetworkTimeout); - receiveDownHeartbeat(HostAndPort("h3"), "rs0", ErrorCodes::NetworkTimeout); + receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime(), ErrorCodes::NetworkTimeout); + receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime(), ErrorCodes::NetworkTimeout); ASSERT_TRUE(getTopoCoord() .chooseNewSyncSource(now()++, OpTime(), @@ -959,8 +975,8 @@ TEST_F(TopoCoordTest, NodeChangesToRecoveringWhenOnlyUnauthorizedNodesAreUp) { ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s); // Mark nodes down + unauth, ensure that we have no source and are secondary - receiveDownHeartbeat(HostAndPort("h2"), "rs0", ErrorCodes::Unauthorized); - receiveDownHeartbeat(HostAndPort("h3"), "rs0", ErrorCodes::Unauthorized); + receiveDownHeartbeat(HostAndPort("h2"), "rs0", OpTime(), ErrorCodes::Unauthorized); + receiveDownHeartbeat(HostAndPort("h3"), "rs0", OpTime(), ErrorCodes::Unauthorized); ASSERT_TRUE(getTopoCoord() .chooseNewSyncSource(now()++, OpTime(), @@ -969,10 +985,12 @@ TEST_F(TopoCoordTest, NodeChangesToRecoveringWhenOnlyUnauthorizedNodesAreUp) { ASSERT_EQUALS(MemberState::RS_RECOVERING, getTopoCoord().getMemberState().s); // Having an auth error but with another node up should bring us out of RECOVERING - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(OpTime(Timestamp(2, 0), 0), - Date_t()); - HeartbeatResponseAction action = receiveUpHeartbeat( - HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(), OpTime(Timestamp(2, 0), 0)); + HeartbeatResponseAction action = receiveUpHeartbeat(HostAndPort("h2"), + "rs0", + MemberState::RS_SECONDARY, + OpTime(), + OpTime(Timestamp(2, 0), 0), + OpTime(Timestamp(2, 0), 0)); ASSERT_EQUALS(MemberState::RS_SECONDARY, getTopoCoord().getMemberState().s); // Test that the heartbeat that brings us from RECOVERING to SECONDARY doesn't initiate // an election (SERVER-17164) @@ -1001,16 +1019,22 @@ TEST_F(TopoCoordTest, NodeDoesNotActOnHeartbeatsWhenAbsentFromConfig) { } TEST_F(TopoCoordTest, NodeReturnsNotSecondaryWhenSyncFromIsRunPriorToHavingAConfig) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; // if we do not have an index in the config, we should get ErrorCodes::NotSecondary - getTopoCoord().prepareSyncFromResponse(HostAndPort("h1"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h1"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::NotSecondary, result); ASSERT_EQUALS("Removed and uninitialized nodes do not sync", result.reason()); } TEST_F(TopoCoordTest, NodeReturnsNotSecondaryWhenSyncFromIsRunAgainstArbiter) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -1029,12 +1053,15 @@ TEST_F(TopoCoordTest, NodeReturnsNotSecondaryWhenSyncFromIsRunAgainstArbiter) { << "h1"))), 0); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h1"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h1"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::NotSecondary, result); ASSERT_EQUALS("arbiters don't sync", result.reason()); } TEST_F(TopoCoordTest, NodeReturnsNotSecondaryWhenSyncFromIsRunAgainstPrimary) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -1070,13 +1097,16 @@ TEST_F(TopoCoordTest, NodeReturnsNotSecondaryWhenSyncFromIsRunAgainstPrimary) { makeSelfPrimary(); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); getTopoCoord()._setCurrentPrimaryForTest(0); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h3"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h3"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::NotSecondary, result); ASSERT_EQUALS("primaries don't sync", result.reason()); ASSERT_EQUALS("h3:27017", response.obj()["syncFromRequested"].String()); } TEST_F(TopoCoordTest, NodeReturnsNodeNotFoundWhenSyncFromRequestsANodeNotInConfig) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -1108,12 +1138,16 @@ TEST_F(TopoCoordTest, NodeReturnsNodeNotFoundWhenSyncFromRequestsANodeNotInConfi 0); setSelfMemberState(MemberState::RS_SECONDARY); - getTopoCoord().prepareSyncFromResponse(HostAndPort("fakemember"), &response, &result); + getTopoCoord().prepareSyncFromResponse( + HostAndPort("fakemember"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::NodeNotFound, result); ASSERT_EQUALS("Could not find member \"fakemember:27017\" in replica set", result.reason()); } TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenSyncFromRequestsSelf) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -1146,12 +1180,15 @@ TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenSyncFromRequestsSelf) { setSelfMemberState(MemberState::RS_SECONDARY); // Try to sync from self - getTopoCoord().prepareSyncFromResponse(HostAndPort("hself"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("hself"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::InvalidOptions, result); ASSERT_EQUALS("I cannot sync from myself", result.reason()); } TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenSyncFromRequestsArbiter) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -1185,12 +1222,15 @@ TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenSyncFromRequestsArbiter) { // Try to sync from an arbiter - getTopoCoord().prepareSyncFromResponse(HostAndPort("h1"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h1"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::InvalidOptions, result); ASSERT_EQUALS("Cannot sync from \"h1:27017\" because it is an arbiter", result.reason()); } TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenSyncFromRequestsAnIndexNonbuilder) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -1223,13 +1263,16 @@ TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenSyncFromRequestsAnIndexNonbui setSelfMemberState(MemberState::RS_SECONDARY); // Try to sync from a node that doesn't build indexes - getTopoCoord().prepareSyncFromResponse(HostAndPort("h2"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h2"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::InvalidOptions, result); ASSERT_EQUALS("Cannot sync from \"h2:27017\" because it does not build indexes", result.reason()); } TEST_F(TopoCoordTest, NodeReturnsHostUnreachableWhenSyncFromRequestsADownNode) { + OpTime staleOpTime(Timestamp(1, 1), 0); + OpTime ourOpTime(Timestamp(staleOpTime.getSecs() + 11, 1), 0); + Status result = Status::OK(); BSONObjBuilder response; @@ -1262,9 +1305,9 @@ TEST_F(TopoCoordTest, NodeReturnsHostUnreachableWhenSyncFromRequestsADownNode) { setSelfMemberState(MemberState::RS_SECONDARY); // Try to sync from a member that is down - receiveDownHeartbeat(HostAndPort("h4"), "rs0"); + receiveDownHeartbeat(HostAndPort("h4"), "rs0", OpTime()); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h4"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h4"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::HostUnreachable, result); ASSERT_EQUALS("I cannot reach the requested member: h4:27017", result.reason()); } @@ -1308,8 +1351,7 @@ TEST_F(TopoCoordTest, ChooseRequestedNodeWhenSyncFromRequestsAStaleNode) { heartbeatFromMember( HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY, staleOpTime, Milliseconds(100)); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h5"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h5"), ourOpTime, &response, &result); ASSERT_OK(result); ASSERT_EQUALS("requested member \"h5:27017\" is more than 10 seconds behind us", response.obj()["warning"].String()); @@ -1357,8 +1399,7 @@ TEST_F(TopoCoordTest, ChooseRequestedNodeWhenSyncFromRequestsAValidNode) { heartbeatFromMember( HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY, ourOpTime, Milliseconds(100)); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h6"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h6"), ourOpTime, &response, &result); ASSERT_OK(result); BSONObj responseObj = response.obj(); ASSERT_FALSE(responseObj.hasField("warning")); @@ -1407,11 +1448,10 @@ TEST_F(TopoCoordTest, HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY, ourOpTime, Milliseconds(100)); // node goes down between forceSync and chooseNewSyncSource - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h6"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h6"), ourOpTime, &response, &result); BSONObj responseObj = response.obj(); ASSERT_FALSE(responseObj.hasField("warning")); - receiveDownHeartbeat(HostAndPort("h6"), "rs0"); + receiveDownHeartbeat(HostAndPort("h6"), "rs0", OpTime()); HostAndPort syncSource = getTopoCoord().chooseNewSyncSource( now()++, OpTime(), TopologyCoordinator::ChainingPreference::kUseConfiguration); ASSERT_EQUALS(HostAndPort("h6"), syncSource); @@ -1453,10 +1493,9 @@ TEST_F(TopoCoordTest, NodeReturnsUnauthorizedWhenSyncFromRequestsANodeWeAreNotAu setSelfMemberState(MemberState::RS_SECONDARY); // Try to sync from a member that is unauth'd - receiveDownHeartbeat(HostAndPort("h5"), "rs0", ErrorCodes::Unauthorized); + receiveDownHeartbeat(HostAndPort("h5"), "rs0", OpTime(), ErrorCodes::Unauthorized); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h5"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h5"), ourOpTime, &response, &result); ASSERT_NOT_OK(result); ASSERT_EQUALS(ErrorCodes::Unauthorized, result.code()); ASSERT_EQUALS("not authorized to communicate with h5:27017", result.reason()); @@ -1469,7 +1508,6 @@ TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenAskedToSyncFromANonVoterAsAVo Status result = Status::OK(); BSONObjBuilder response; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); // Test trying to sync from another node updateConfig(fromjson("{_id:'rs0', version:1, members:[" "{_id:0, host:'self'}," @@ -1478,7 +1516,7 @@ TEST_F(TopoCoordTest, NodeReturnsInvalidOptionsWhenAskedToSyncFromANonVoterAsAVo "]}"), 0); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h2"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h2"), ourOpTime, &response, &result); ASSERT_EQUALS(ErrorCodes::InvalidOptions, result); ASSERT_EQUALS("Cannot sync from \"h2:27017\" because it is not a voter", result.reason()); } @@ -1524,8 +1562,7 @@ TEST_F(TopoCoordTest, heartbeatFromMember( HostAndPort("h5"), "rs0", MemberState::RS_SECONDARY, ourOpTime, Milliseconds(100)); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(ourOpTime, Date_t()); - getTopoCoord().prepareSyncFromResponse(HostAndPort("h5"), &response, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h5"), ourOpTime, &response, &result); ASSERT_OK(result); BSONObj responseObj = response.obj(); ASSERT_FALSE(responseObj.hasField("warning")); @@ -1538,7 +1575,7 @@ TEST_F(TopoCoordTest, HostAndPort("h6"), "rs0", MemberState::RS_SECONDARY, ourOpTime, Milliseconds(100)); // Sync successfully from another up-to-date member. - getTopoCoord().prepareSyncFromResponse(HostAndPort("h6"), &response2, &result); + getTopoCoord().prepareSyncFromResponse(HostAndPort("h6"), ourOpTime, &response2, &result); BSONObj response2Obj = response2.obj(); ASSERT_FALSE(response2Obj.hasField("warning")); ASSERT_EQUALS(HostAndPort("h5").toString(), response2Obj["prevSyncTarget"].String()); @@ -1592,7 +1629,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { HostAndPort member = HostAndPort("test0:1234"); getTopoCoord().prepareHeartbeatRequestV1(startupTime + Milliseconds(1), setName, member); getTopoCoord().processHeartbeatResponse( - startupTime + Milliseconds(2), Milliseconds(1), member, hbResponseGood); + startupTime + Milliseconds(2), Milliseconds(1), member, hbResponseGood, OpTime()); getTopoCoord().prepareHeartbeatRequestV1(startupTime + Milliseconds(3), setName, member); Date_t timeoutTime = startupTime + Milliseconds(3) + ReplSetConfig::kDefaultHeartbeatTimeoutPeriod; @@ -1601,16 +1638,13 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { StatusWith<ReplSetHeartbeatResponse>(Status(ErrorCodes::HostUnreachable, "")); getTopoCoord().processHeartbeatResponse( - timeoutTime, Milliseconds(5000), member, hbResponseDown); + timeoutTime, Milliseconds(5000), member, hbResponseDown, OpTime()); member = HostAndPort("test1:1234"); getTopoCoord().prepareHeartbeatRequestV1(startupTime + Milliseconds(2), setName, member); getTopoCoord().processHeartbeatResponse( - heartbeatTime, Milliseconds(4000), member, hbResponseGood); + heartbeatTime, Milliseconds(4000), member, hbResponseGood, OpTime()); makeSelfPrimary(); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(oplogProgress, startupTime); - getTopoCoord().getMyMemberHeartbeatData()->setLastDurableOpTime(oplogDurable, startupTime); - getTopoCoord().advanceLastCommittedOpTime(lastCommittedOpTime); // Now node 0 is down, node 1 is up, and for node 2 we have no heartbeat data yet. BSONObjBuilder statusBuilder; @@ -1619,6 +1653,9 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { TopologyCoordinator::ReplSetStatusArgs{ curTime, static_cast<unsigned>(durationCount<Seconds>(uptimeSecs)), + oplogProgress, + oplogDurable, + lastCommittedOpTime, readConcernMajorityOpTime, initialSyncStatus}, &statusBuilder, @@ -1734,6 +1771,9 @@ TEST_F(TopoCoordTest, NodeReturnsInvalidReplicaSetConfigInResponseToGetStatusWhe TopologyCoordinator::ReplSetStatusArgs{ curTime, static_cast<unsigned>(durationCount<Seconds>(uptimeSecs)), + oplogProgress, + oplogProgress, + OpTime(), OpTime(), BSONObj()}, &statusBuilder, @@ -1754,8 +1794,12 @@ TEST_F(TopoCoordTest, HeartbeatFrequencyShouldBeHalfElectionTimeoutWhenArbiter) Date_t requestDate = now(); std::pair<ReplSetHeartbeatArgs, Milliseconds> uppingRequest = getTopoCoord().prepareHeartbeatRequest(requestDate, "myset", target); - auto action = getTopoCoord().processHeartbeatResponse( - requestDate, Milliseconds(0), target, makeStatusWith<ReplSetHeartbeatResponse>()); + auto action = + getTopoCoord().processHeartbeatResponse(requestDate, + Milliseconds(0), + target, + makeStatusWith<ReplSetHeartbeatResponse>(), + OpTime(Timestamp(0, 0), 0)); Date_t expected(now() + Milliseconds(2500)); ASSERT_EQUALS(expected, action.getNextHeartbeatStartDate()); } @@ -1782,9 +1826,11 @@ public: } void prepareHeartbeatResponseV1(const ReplSetHeartbeatArgsV1& args, + OpTime lastOpApplied, ReplSetHeartbeatResponse* response, Status* result) { - *result = getTopoCoord().prepareHeartbeatResponseV1(now()++, args, "rs0", response); + *result = getTopoCoord().prepareHeartbeatResponseV1( + now()++, args, "rs0", lastOpApplied, lastOpApplied, response); } }; @@ -1797,7 +1843,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result"); startCapturingLogMessages(); - prepareHeartbeatResponseV1(args, &response, &result); + prepareHeartbeatResponseV1(args, OpTime(), &response, &result); stopCapturingLogMessages(); ASSERT_EQUALS(ErrorCodes::InconsistentReplicaSetNames, result); ASSERT(result.reason().find("repl set names do not match")) << "Actual string was \"" @@ -1829,7 +1875,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, args.setSenderId(20); ReplSetHeartbeatResponse response; Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result"); - prepareHeartbeatResponseV1(args, &response, &result); + prepareHeartbeatResponseV1(args, OpTime(), &response, &result); ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, result); ASSERT(result.reason().find("replica set configuration is invalid or does not include us")) << "Actual string was \"" << result.reason() << '"'; @@ -1844,7 +1890,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, NodeReturnsBadValueWhenAHeartbeatRequestI args.setSenderId(10); ReplSetHeartbeatResponse response; Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result"); - prepareHeartbeatResponseV1(args, &response, &result); + prepareHeartbeatResponseV1(args, OpTime(), &response, &result); ASSERT_EQUALS(ErrorCodes::BadValue, result); ASSERT(result.reason().find("from member with the same member ID as our self")) << "Actual string was \"" << result.reason() << '"'; @@ -1859,7 +1905,8 @@ TEST_F(TopoCoordTest, SetConfigVersionToNegativeTwoInHeartbeatResponseWhenNoConf args.setSenderId(20); ReplSetHeartbeatResponse response; // prepare response and check the results - Status result = getTopoCoord().prepareHeartbeatResponseV1(now()++, args, "rs0", &response); + Status result = getTopoCoord().prepareHeartbeatResponseV1( + now()++, args, "rs0", OpTime(), OpTime(), &response); ASSERT_OK(result); // this change to true because we can now see a majority, unlike in the previous cases ASSERT_EQUALS("rs0", response.getReplicaSetName()); @@ -1880,7 +1927,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result"); // prepare response and check the results - prepareHeartbeatResponseV1(args, &response, &result); + prepareHeartbeatResponseV1(args, OpTime(), &response, &result); ASSERT_OK(result); ASSERT_EQUALS("rs0", response.getReplicaSetName()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); @@ -1900,7 +1947,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result"); // prepare response and check the results - prepareHeartbeatResponseV1(args, &response, &result); + prepareHeartbeatResponseV1(args, OpTime(), &response, &result); ASSERT_OK(result); ASSERT_EQUALS("rs0", response.getReplicaSetName()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); @@ -1920,7 +1967,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result"); // prepare response and check the results - prepareHeartbeatResponseV1(args, &response, &result); + prepareHeartbeatResponseV1(args, OpTime(), &response, &result); ASSERT_OK(result); ASSERT_TRUE(response.hasConfig()); ASSERT_EQUALS("rs0", response.getReplicaSetName()); @@ -1941,7 +1988,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result"); // prepare response and check the results - prepareHeartbeatResponseV1(args, &response, &result); + prepareHeartbeatResponseV1(args, OpTime(), &response, &result); ASSERT_OK(result); ASSERT_FALSE(response.hasConfig()); ASSERT_EQUALS("rs0", response.getReplicaSetName()); @@ -1962,10 +2009,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, SetStatePrimaryInHeartbeatResponseWhenPri Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result"); // prepare response and check the results - OpTime lastOpTime(Timestamp(11, 0), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTime, Date_t()); - getTopoCoord().getMyMemberHeartbeatData()->setLastDurableOpTime(lastOpTime, Date_t()); - prepareHeartbeatResponseV1(args, &response, &result); + prepareHeartbeatResponseV1(args, OpTime(Timestamp(11, 0), 0), &response, &result); ASSERT_OK(result); ASSERT_FALSE(response.hasConfig()); ASSERT_EQUALS("rs0", response.getReplicaSetName()); @@ -1998,10 +2042,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, Status result(ErrorCodes::InternalError, "prepareHeartbeatResponse didn't set result"); // prepare response and check the results - OpTime lastOpTime(Timestamp(100, 0), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTime, Date_t()); - getTopoCoord().getMyMemberHeartbeatData()->setLastDurableOpTime(lastOpTime, Date_t()); - prepareHeartbeatResponseV1(args, &response, &result); + prepareHeartbeatResponseV1(args, OpTime(Timestamp(100, 0), 0), &response, &result); ASSERT_OK(result); ASSERT_FALSE(response.hasConfig()); ASSERT_EQUALS("rs0", response.getReplicaSetName()); @@ -2046,7 +2087,7 @@ TEST_F(TopoCoordTest, BecomeCandidateWhenReconfigToBeElectableInSingleNodeSet) { << "hself" << "priority" << 0)))); - getTopoCoord().updateConfig(cfg, 0, now()++); + getTopoCoord().updateConfig(cfg, 0, now()++, OpTime()); ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s); ASSERT_FALSE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole()); @@ -2081,7 +2122,7 @@ TEST_F(TopoCoordTest, NodeDoesNotBecomeCandidateWhenBecomingSecondaryInSingleNod << "priority" << 0)))); - getTopoCoord().updateConfig(cfg, 0, now()++); + getTopoCoord().updateConfig(cfg, 0, now()++, OpTime()); ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s); // despite being the only node, we are unelectable, so we should not become a candidate @@ -2274,7 +2315,8 @@ TEST_F(TopoCoordTest, NodeMaintainsPrimaryStateAcrossReconfigIfNodeRemainsElecta << BSON("_id" << 2 << "host" << "host3:27017"))), 0, - Date_t::fromMillisSinceEpoch(-1)); + Date_t::fromMillisSinceEpoch(-1), + OpTime(Timestamp(10, 0), 0)); ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole()); ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s); @@ -2298,7 +2340,8 @@ TEST_F(TopoCoordTest, NodeMaintainsPrimaryStateAcrossReconfigIfNodeRemainsElecta << "rack" << "rack1")))), 0, - Date_t::fromMillisSinceEpoch(-1)); + Date_t::fromMillisSinceEpoch(-1), + OpTime(Timestamp(10, 0), 0)); ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole()); ASSERT_EQUALS(MemberState::RS_PRIMARY, getTopoCoord().getMemberState().s); } @@ -2353,7 +2396,7 @@ TEST_F(TopoCoordTest, ShouldNotStandForElectionWhileAwareOfPrimary) { heartbeatFromMember( HostAndPort("h2"), "rs0", MemberState::RS_PRIMARY, OpTime(Timestamp(1, 0), 0)); - ASSERT_NOT_OK(getTopoCoord().checkShouldStandForElection(now()++)); + ASSERT_NOT_OK(getTopoCoord().checkShouldStandForElection(now()++, OpTime())); } TEST_F(TopoCoordTest, ShouldStandForElectionDespiteNotCloseEnoughToLastOptime) { @@ -2371,11 +2414,9 @@ TEST_F(TopoCoordTest, ShouldStandForElectionDespiteNotCloseEnoughToLastOptime) { 0); setSelfMemberState(MemberState::RS_SECONDARY); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(OpTime(Timestamp(100, 0), 0), - Date_t()); heartbeatFromMember( HostAndPort("h2"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(10000, 0), 0)); - ASSERT_OK(getTopoCoord().checkShouldStandForElection(now()++)); + ASSERT_OK(getTopoCoord().checkShouldStandForElection(now()++, OpTime(Timestamp(100, 0), 0))); } TEST_F(TopoCoordTest, VoteForMyselfFailsWhileNotCandidate) { @@ -2414,7 +2455,8 @@ TEST_F(TopoCoordTest, NodeReturnsArbiterWhenGetMemberStateRunsAgainstArbiter) { } TEST_F(TopoCoordTest, ShouldNotStandForElectionWhileRemovedFromTheConfig) { - const auto status = getTopoCoord().checkShouldStandForElection(now()++); + const auto status = + getTopoCoord().checkShouldStandForElection(now()++, OpTime(Timestamp(10, 0), 0)); ASSERT_NOT_OK(status); ASSERT_STRING_CONTAINS(status.reason(), "not a member of a valid replica set config"); } @@ -2446,8 +2488,9 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVotesToTwoDifferentNodesInTheSameTerm) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("", response.getReason()); ASSERT_TRUE(response.getVoteGranted()); @@ -2466,7 +2509,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVotesToTwoDifferentNodesInTheSameTerm) { ReplSetRequestVotesResponse response2; // different candidate same term, should be a problem - getTopoCoord().processReplSetRequestVotes(args2, &response2); + getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime); ASSERT_EQUALS("already voted for another candidate this term", response2.getReason()); ASSERT_FALSE(response2.getVoteGranted()); } @@ -2501,8 +2544,9 @@ TEST_F(TopoCoordTest, DryRunVoteRequestShouldNotPreventSubsequentDryRunsForThatT << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("", response.getReason()); ASSERT_TRUE(response.getVoteGranted()); @@ -2523,7 +2567,7 @@ TEST_F(TopoCoordTest, DryRunVoteRequestShouldNotPreventSubsequentDryRunsForThatT << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response2; - getTopoCoord().processReplSetRequestVotes(args2, &response2); + getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime); ASSERT_EQUALS("", response2.getReason()); ASSERT_TRUE(response2.getVoteGranted()); @@ -2544,7 +2588,7 @@ TEST_F(TopoCoordTest, DryRunVoteRequestShouldNotPreventSubsequentDryRunsForThatT << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response3; - getTopoCoord().processReplSetRequestVotes(args3, &response3); + getTopoCoord().processReplSetRequestVotes(args3, &response3, lastAppliedOpTime); ASSERT_EQUALS("", response3.getReason()); ASSERT_TRUE(response3.getVoteGranted()); @@ -2565,7 +2609,7 @@ TEST_F(TopoCoordTest, DryRunVoteRequestShouldNotPreventSubsequentDryRunsForThatT << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response4; - getTopoCoord().processReplSetRequestVotes(args4, &response4); + getTopoCoord().processReplSetRequestVotes(args4, &response4, lastAppliedOpTime); ASSERT_EQUALS("already voted for another candidate this term", response4.getReason()); ASSERT_FALSE(response4.getVoteGranted()); } @@ -2600,8 +2644,9 @@ TEST_F(TopoCoordTest, VoteRequestShouldNotPreventDryRunsForThatTerm) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("", response.getReason()); ASSERT_TRUE(response.getVoteGranted()); @@ -2622,7 +2667,7 @@ TEST_F(TopoCoordTest, VoteRequestShouldNotPreventDryRunsForThatTerm) { << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response2; - getTopoCoord().processReplSetRequestVotes(args2, &response2); + getTopoCoord().processReplSetRequestVotes(args2, &response2, lastAppliedOpTime); ASSERT_EQUALS("already voted for another candidate this term", response2.getReason()); ASSERT_FALSE(response2.getVoteGranted()); } @@ -2655,8 +2700,9 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenReplSetNameDoesNotMatch) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("candidate's set name differs from mine", response.getReason()); ASSERT_FALSE(response.getVoteGranted()); } @@ -2689,8 +2735,9 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenConfigVersionDoesNotMatch) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("candidate's config version differs from mine", response.getReason()); ASSERT_FALSE(response.getVoteGranted()); } @@ -2727,8 +2774,9 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenTermIsStale) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("candidate's term is lower than mine", response.getReason()); ASSERT_EQUALS(2, response.getTerm()); ASSERT_FALSE(response.getVoteGranted()); @@ -2763,10 +2811,9 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenOpTimeIsStale) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime2 = {Timestamp(20, 0), 0}; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime({Timestamp(20, 0), 0}, - Date_t()); - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime2); ASSERT_EQUALS("candidate's data is staler than mine", response.getReason()); ASSERT_FALSE(response.getVoteGranted()); } @@ -2802,8 +2849,10 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenReplSetNameDoesNotMatch) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse responseForRealVote; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(argsForRealVote, &responseForRealVote); + getTopoCoord().processReplSetRequestVotes( + argsForRealVote, &responseForRealVote, lastAppliedOpTime); ASSERT_EQUALS("", responseForRealVote.getReason()); ASSERT_TRUE(responseForRealVote.getVoteGranted()); @@ -2824,7 +2873,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenReplSetNameDoesNotMatch) { << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("candidate's set name differs from mine", response.getReason()); ASSERT_EQUALS(1, response.getTerm()); ASSERT_FALSE(response.getVoteGranted()); @@ -2861,8 +2910,10 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenConfigVersionDoesNotMatch) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse responseForRealVote; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(argsForRealVote, &responseForRealVote); + getTopoCoord().processReplSetRequestVotes( + argsForRealVote, &responseForRealVote, lastAppliedOpTime); ASSERT_EQUALS("", responseForRealVote.getReason()); ASSERT_TRUE(responseForRealVote.getVoteGranted()); @@ -2883,7 +2934,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenConfigVersionDoesNotMatch) { << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("candidate's config version differs from mine", response.getReason()); ASSERT_EQUALS(1, response.getTerm()); ASSERT_FALSE(response.getVoteGranted()); @@ -2920,8 +2971,10 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenTermIsStale) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse responseForRealVote; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(argsForRealVote, &responseForRealVote); + getTopoCoord().processReplSetRequestVotes( + argsForRealVote, &responseForRealVote, lastAppliedOpTime); ASSERT_EQUALS("", responseForRealVote.getReason()); ASSERT_TRUE(responseForRealVote.getVoteGranted()); @@ -2941,7 +2994,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenTermIsStale) { << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("candidate's term is lower than mine", response.getReason()); ASSERT_EQUALS(1, response.getTerm()); ASSERT_FALSE(response.getVoteGranted()); @@ -2978,8 +3031,10 @@ TEST_F(TopoCoordTest, GrantDryRunVoteEvenWhenTermHasBeenSeen) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse responseForRealVote; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(argsForRealVote, &responseForRealVote); + getTopoCoord().processReplSetRequestVotes( + argsForRealVote, &responseForRealVote, lastAppliedOpTime); ASSERT_EQUALS("", responseForRealVote.getReason()); ASSERT_TRUE(responseForRealVote.getVoteGranted()); @@ -3000,7 +3055,7 @@ TEST_F(TopoCoordTest, GrantDryRunVoteEvenWhenTermHasBeenSeen) { << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime); ASSERT_EQUALS("", response.getReason()); ASSERT_EQUALS(1, response.getTerm()); ASSERT_TRUE(response.getVoteGranted()); @@ -3037,8 +3092,10 @@ TEST_F(TopoCoordTest, DoNotGrantDryRunVoteWhenOpTimeIsStale) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse responseForRealVote; + OpTime lastAppliedOpTime; - getTopoCoord().processReplSetRequestVotes(argsForRealVote, &responseForRealVote); + getTopoCoord().processReplSetRequestVotes( + argsForRealVote, &responseForRealVote, lastAppliedOpTime); ASSERT_EQUALS("", responseForRealVote.getReason()); ASSERT_TRUE(responseForRealVote.getVoteGranted()); @@ -3058,10 +3115,9 @@ TEST_F(TopoCoordTest, DoNotGrantDryRunVoteWhenOpTimeIsStale) { << "lastCommittedOp" << BSON("ts" << Timestamp(10, 0) << "term" << 0LL))); ReplSetRequestVotesResponse response; + OpTime lastAppliedOpTime2 = {Timestamp(20, 0), 0}; - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime({Timestamp(20, 0), 0}, - Date_t()); - getTopoCoord().processReplSetRequestVotes(args, &response); + getTopoCoord().processReplSetRequestVotes(args, &response, lastAppliedOpTime2); ASSERT_EQUALS("candidate's data is staler than mine", response.getReason()); ASSERT_EQUALS(1, response.getTerm()); ASSERT_FALSE(response.getVoteGranted()); @@ -3177,17 +3233,25 @@ TEST_F(HeartbeatResponseTestV1, << "priority" << 0))), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // set up complete, time for actual check startCapturingLogMessages(); ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(lastOpTimeApplied), now())); @@ -3196,8 +3260,11 @@ TEST_F(HeartbeatResponseTestV1, // set up complete, time for actual check startCapturingLogMessages(); - ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(lastOpTimeApplied), boost::none, now())); + ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), + makeReplSetMetadata(lastOpTimeApplied), + boost::none, + now())); stopCapturingLogMessages(); ASSERT_EQUALS(1, countLogLinesContaining("Choosing new sync source")); } @@ -3208,7 +3275,7 @@ TEST_F(HeartbeatResponseTestV1, NodeReturnsBadValueWhenProcessingPV0ElectionComm Status internalErrorStatus(ErrorCodes::InternalError, "didn't set status"); BSONObjBuilder responseBuilder; Status status = internalErrorStatus; - getTopoCoord().prepareFreshResponse(freshArgs, Date_t(), &responseBuilder, &status); + getTopoCoord().prepareFreshResponse(freshArgs, Date_t(), OpTime(), &responseBuilder, &status); ASSERT_EQUALS(ErrorCodes::BadValue, status); ASSERT_EQUALS("replset: incompatible replset protocol version: 1", status.reason()); ASSERT_TRUE(responseBuilder.obj().isEmpty()); @@ -3216,7 +3283,8 @@ TEST_F(HeartbeatResponseTestV1, NodeReturnsBadValueWhenProcessingPV0ElectionComm BSONObjBuilder electResponseBuilder; ReplicationCoordinator::ReplSetElectArgs electArgs; status = internalErrorStatus; - getTopoCoord().prepareElectResponse(electArgs, Date_t(), &electResponseBuilder, &status); + getTopoCoord().prepareElectResponse( + electArgs, Date_t(), OpTime(), &electResponseBuilder, &status); ASSERT_EQUALS(ErrorCodes::BadValue, status); ASSERT_EQUALS("replset: incompatible replset protocol version: 1", status.reason()); ASSERT_TRUE(electResponseBuilder.obj().isEmpty()); @@ -3229,51 +3297,76 @@ TEST_F(HeartbeatResponseTestV1, OpTime election = OpTime(); OpTime lastOpTimeApplied = OpTime(Timestamp(400, 0), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // Show we like host2 while it is primary. ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + lastOpTimeApplied, makeReplSetMetadata(), makeOplogQueryMetadata(lastOpTimeApplied, 1), now())); - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(lastOpTimeApplied, 1), boost::none, now())); + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + lastOpTimeApplied, + makeReplSetMetadata(lastOpTimeApplied, 1), + boost::none, + now())); // Show that we also like host2 while it has a sync source. - nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_FALSE( getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + lastOpTimeApplied, makeReplSetMetadata(), makeOplogQueryMetadata(lastOpTimeApplied, 2, 2), now())); - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(lastOpTimeApplied, 2, 2), boost::none, now())); + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + lastOpTimeApplied, + makeReplSetMetadata(lastOpTimeApplied, 2, 2), + boost::none, + now())); // Show that we do not like it when it is not PRIMARY and lacks a sync source and lacks progress // beyond our own. - nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + lastOpTimeApplied, makeReplSetMetadata(), makeOplogQueryMetadata(lastOpTimeApplied), now())); - ASSERT(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(lastOpTimeApplied), boost::none, now())); + ASSERT(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + lastOpTimeApplied, + makeReplSetMetadata(lastOpTimeApplied), + boost::none, + now())); // Sometimes the heartbeat is stale and the metadata says it's the primary. Trust the metadata. ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( HostAndPort("host2"), + lastOpTimeApplied, makeReplSetMetadata(), makeOplogQueryMetadata( lastOpTimeApplied, 1 /* host2 is primary */, -1 /* no sync source */), now())); ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( HostAndPort("host2"), + lastOpTimeApplied, makeReplSetMetadata(lastOpTimeApplied, 1 /* host2 is primary */, -1 /* no sync source */), boost::none, now())); @@ -3284,15 +3377,21 @@ TEST_F(HeartbeatResponseTestV1, "rs0", MemberState::RS_SECONDARY, election, + newerThanLastOpTimeApplied, newerThanLastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_FALSE( getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + lastOpTimeApplied, makeReplSetMetadata(), makeOplogQueryMetadata(newerThanLastOpTimeApplied), now())); - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(newerThanLastOpTimeApplied), boost::none, now())); + ASSERT_FALSE( + getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + lastOpTimeApplied, + makeReplSetMetadata(newerThanLastOpTimeApplied), + boost::none, + now())); } TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhenFresherMemberIsDown) { @@ -3300,38 +3399,40 @@ TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhenFresherMemberIsDown // "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind // "host3", since "host3" is down OpTime election = OpTime(); - // Our last op time applied must be behind host2, or we'll hit the case where we change - // sync sources due to the sync source being behind, without a sync source, and not primary. OpTime lastOpTimeApplied = OpTime(Timestamp(400, 0), 0); - OpTime syncSourceOpTime = OpTime(Timestamp(400, 1), 0); // ahead by more than maxSyncSourceLagSecs (30) OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, syncSourceOpTime); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - // while the host is up, we should want to change to its sync source - ASSERT(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), - makeReplSetMetadata(), - makeOplogQueryMetadata(syncSourceOpTime), - now())); - // set up complete, time for actual check - nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), makeReplSetMetadata(), - makeOplogQueryMetadata(syncSourceOpTime), + makeOplogQueryMetadata(lastOpTimeApplied), now())); - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(syncSourceOpTime), boost::none, now())); + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), + makeReplSetMetadata(lastOpTimeApplied), + boost::none, + now())); } TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhileFresherMemberIsBlackListed) { @@ -3340,53 +3441,69 @@ TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhileFresherMemberIsBla // "host3", since "host3" is blacklisted // Then, confirm that unblacklisting only works if time has passed the blacklist time. OpTime election = OpTime(); - // Our last op time applied must be behind host2, or we'll hit the case where we change - // sync sources due to the sync source being behind, without a sync source, and not primary. OpTime lastOpTimeApplied = OpTime(Timestamp(400, 0), 0); - OpTime syncSourceOpTime = OpTime(Timestamp(400, 1), 0); // ahead by more than maxSyncSourceLagSecs (30) OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, syncSourceOpTime); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); getTopoCoord().blacklistSyncSource(HostAndPort("host3"), now() + Milliseconds(100)); // set up complete, time for actual check ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), makeReplSetMetadata(), - makeOplogQueryMetadata(syncSourceOpTime), + makeOplogQueryMetadata(lastOpTimeApplied), + now())); + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), + makeReplSetMetadata(lastOpTimeApplied), + boost::none, now())); - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(syncSourceOpTime), boost::none, now())); // unblacklist with too early a time (node should remained blacklisted) getTopoCoord().unblacklistSyncSource(HostAndPort("host3"), now() + Milliseconds(90)); ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), makeReplSetMetadata(), - makeOplogQueryMetadata(syncSourceOpTime), + makeOplogQueryMetadata(lastOpTimeApplied), + now())); + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), + makeReplSetMetadata(lastOpTimeApplied), + boost::none, now())); - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(syncSourceOpTime), boost::none, now())); // unblacklist and it should succeed getTopoCoord().unblacklistSyncSource(HostAndPort("host3"), now() + Milliseconds(100)); startCapturingLogMessages(); ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), makeReplSetMetadata(), - makeOplogQueryMetadata(syncSourceOpTime), + makeOplogQueryMetadata(lastOpTimeApplied), now())); stopCapturingLogMessages(); ASSERT_EQUALS(1, countLogLinesContaining("Choosing new sync source")); startCapturingLogMessages(); - ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(syncSourceOpTime), boost::none, now())); + ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), + makeReplSetMetadata(lastOpTimeApplied), + boost::none, + now())); stopCapturingLogMessages(); ASSERT_EQUALS(1, countLogLinesContaining("Choosing new sync source")); } @@ -3400,18 +3517,26 @@ TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceIfNodeIsFreshByHeartbea // ahead by more than maxSyncSourceLagSecs (30) OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // set up complete, time for actual check startCapturingLogMessages(); ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(lastOpTimeApplied), now())); @@ -3419,8 +3544,11 @@ TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceIfNodeIsFreshByHeartbea ASSERT_EQUALS(0, countLogLinesContaining("Choosing new sync source")); startCapturingLogMessages(); - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(lastOpTimeApplied), boost::none, now())); + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), + makeReplSetMetadata(lastOpTimeApplied), + boost::none, + now())); stopCapturingLogMessages(); ASSERT_EQUALS(0, countLogLinesContaining("Choosing new sync source")); } @@ -3434,19 +3562,27 @@ TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceIfNodeIsStaleByHeartbea // ahead by more than maxSyncSourceLagSecs (30) OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // set up complete, time for actual check startCapturingLogMessages(); ASSERT_FALSE( getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(fresherLastOpTimeApplied), now())); @@ -3454,8 +3590,12 @@ TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceIfNodeIsStaleByHeartbea ASSERT_EQUALS(0, countLogLinesContaining("Choosing new sync source")); // set up complete, time for actual check startCapturingLogMessages(); - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(fresherLastOpTimeApplied), boost::none, now())); + ASSERT_FALSE( + getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), + makeReplSetMetadata(fresherLastOpTimeApplied), + boost::none, + now())); stopCapturingLogMessages(); ASSERT_EQUALS(0, countLogLinesContaining("Choosing new sync source")); } @@ -3468,18 +3608,26 @@ TEST_F(HeartbeatResponseTestV1, ShouldChangeSyncSourceWhenFresherMemberExists) { // ahead by more than maxSyncSourceLagSecs (30) OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // set up complete, time for actual check startCapturingLogMessages(); ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(lastOpTimeApplied), now())); @@ -3488,8 +3636,11 @@ TEST_F(HeartbeatResponseTestV1, ShouldChangeSyncSourceWhenFresherMemberExists) { // set up complete, time for actual check startCapturingLogMessages(); - ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(lastOpTimeApplied), boost::none, now())); + ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), + makeReplSetMetadata(lastOpTimeApplied), + boost::none, + now())); stopCapturingLogMessages(); ASSERT_EQUALS(1, countLogLinesContaining("Choosing new sync source")); } @@ -3498,7 +3649,7 @@ TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhenMemberHasYetToHeart // In this test, the TopologyCoordinator should not tell us to change sync sources away from // "host2" since we do not use the member's heartbeatdata in pv1. ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); + HostAndPort("host2"), OpTime(), makeReplSetMetadata(), makeOplogQueryMetadata(), now())); } TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhenMemberNotInConfig) { @@ -3506,7 +3657,7 @@ TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhenMemberNotInConfig) // "host4" since "host4" is absent from the config of version 10. ReplSetMetadata replMetadata(0, OpTime(), OpTime(), 10, OID(), -1, -1); ASSERT_TRUE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host4"), replMetadata, makeOplogQueryMetadata(), now())); + HostAndPort("host4"), OpTime(), replMetadata, makeOplogQueryMetadata(), now())); } // TODO(dannenberg) figure out what this is trying to test.. @@ -3517,13 +3668,20 @@ TEST_F(HeartbeatResponseTestV1, ReconfigNodeRemovedBetweenHeartbeatRequestAndRep // all three members up and secondaries setSelfMemberState(MemberState::RS_SECONDARY); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_PRIMARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // now request from host3 and receive after host2 has been removed via reconfig @@ -3548,7 +3706,7 @@ TEST_F(HeartbeatResponseTestV1, ReconfigNodeRemovedBetweenHeartbeatRequestAndRep hb.setElectionTime(election.getTimestamp()); StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( - now()++, Milliseconds(0), HostAndPort("host3"), hbResponse); + now()++, Milliseconds(0), HostAndPort("host3"), hbResponse, lastOpTimeApplied); // primary should not be set and we should perform NoAction in response ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); @@ -3563,13 +3721,20 @@ TEST_F(HeartbeatResponseTestV1, ReconfigBetweenHeartbeatRequestAndRepsonse) { // all three members up and secondaries setSelfMemberState(MemberState::RS_SECONDARY); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_PRIMARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // now request from host3 and receive after host2 has been removed via reconfig @@ -3593,9 +3758,8 @@ TEST_F(HeartbeatResponseTestV1, ReconfigBetweenHeartbeatRequestAndRepsonse) { hb.setDurableOpTime(lastOpTimeApplied); hb.setElectionTime(election.getTimestamp()); StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( - now()++, Milliseconds(0), HostAndPort("host3"), hbResponse); + now()++, Milliseconds(0), HostAndPort("host3"), hbResponse, lastOpTimeApplied); // now primary should be host3, index 1, and we should perform NoAction in response ASSERT_EQUALS(1, getCurrentPrimaryIndex()); @@ -3607,9 +3771,12 @@ TEST_F(HeartbeatResponseTestV1, NodeDoesNotUpdateHeartbeatDataIfNodeIsAbsentFrom OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host9"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host9"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -3631,9 +3798,10 @@ TEST_F(HeartbeatResponseTestV1, RelinquishPrimaryWhenMajorityOfVotersIsNoLongerV // Lose that awareness, but we are not going to step down, because stepdown only // depends on liveness. - HeartbeatResponseAction nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0"); + HeartbeatResponseAction nextAction = + receiveDownHeartbeat(HostAndPort("host2"), "rs0", OpTime(Timestamp(100, 0), 0)); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", OpTime(Timestamp(100, 0), 0)); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole()); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); @@ -3667,9 +3835,12 @@ TEST_F(HeartbeatResponseTestV1, OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_EQUALS(HeartbeatResponseAction::PriorityTakeover, nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); } @@ -3704,15 +3875,22 @@ TEST_F(HeartbeatResponseTestV1, UpdateHeartbeatDataTermPreventsPriorityTakeover) // Host 2 is the current primary in term 1. getTopoCoord().updateTerm(1, now()); ASSERT_EQUALS(getTopoCoord().getTerm(), 1); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_EQUALS(HeartbeatResponseAction::PriorityTakeover, nextAction.getAction()); ASSERT_EQUALS(2, getCurrentPrimaryIndex()); // Heartbeat from a secondary node shouldn't schedule a priority takeover. - nextAction = receiveUpHeartbeat( - HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, election, election); + nextAction = receiveUpHeartbeat(HostAndPort("host1"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(2, getCurrentPrimaryIndex()); @@ -3723,8 +3901,12 @@ TEST_F(HeartbeatResponseTestV1, UpdateHeartbeatDataTermPreventsPriorityTakeover) // This heartbeat shouldn't schedule priority takeover, because the current primary // host 1 is not in my term. - nextAction = receiveUpHeartbeat( - HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, election, election); + nextAction = receiveUpHeartbeat(HostAndPort("host1"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_EQUALS(HeartbeatResponseAction::NoAction, nextAction.getAction()); ASSERT_EQUALS(2, getCurrentPrimaryIndex()); } @@ -3778,32 +3960,39 @@ TEST_F(HeartbeatResponseTestV1, OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); // Make sure all non-voting nodes are down, that way we do not have a majority of nodes // but do have a majority of votes since one of two voting members is up and so are we. - nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host3"), "rs0", lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveDownHeartbeat(HostAndPort("host4"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host4"), "rs0", lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveDownHeartbeat(HostAndPort("host5"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host5"), "rs0", lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveDownHeartbeat(HostAndPort("host6"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host6"), "rs0", lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host7"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host7"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); // We are electable now. - ASSERT_OK(getTopoCoord().becomeCandidateIfElectable(now(), false)); + ASSERT_OK(getTopoCoord().becomeCandidateIfElectable(now(), lastOpTimeApplied, false)); ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole()); } @@ -3814,21 +4003,28 @@ TEST_F(HeartbeatResponseTestV1, ScheduleElectionWhenPrimaryIsMarkedDownAndWeAreE OpTime lastOpTimeApplied = OpTime(Timestamp(399, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); // We are electable now. - ASSERT_OK(getTopoCoord().becomeCandidateIfElectable(now(), false)); + ASSERT_OK(getTopoCoord().becomeCandidateIfElectable(now(), lastOpTimeApplied, false)); ASSERT_TRUE(TopologyCoordinator::Role::candidate == getTopoCoord().getRole()); } @@ -3854,18 +4050,25 @@ TEST_F(HeartbeatResponseTestV1, OpTime election = OpTime(Timestamp(400, 0), 0); OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -3879,21 +4082,28 @@ TEST_F(HeartbeatResponseTestV1, OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // Freeze node to set stepdown wait. BSONObjBuilder response; getTopoCoord().prepareFreezeResponse(now()++, 20, &response); - nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -3907,13 +4117,16 @@ TEST_F(HeartbeatResponseTestV1, OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -3927,17 +4140,24 @@ TEST_F(HeartbeatResponseTestV1, OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -3968,18 +4188,25 @@ TEST_F(HeartbeatResponseTestV1, OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -3993,13 +4220,16 @@ TEST_F(HeartbeatResponseTestV1, OpTime lastOpTimeApplied = OpTime(Timestamp(300, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -4014,16 +4244,24 @@ TEST_F(HeartbeatResponseTestV1, NodeDoesNotStepDownSelfWhenRemoteNodeWasElectedM OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - receiveUpHeartbeat(HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + election, + lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole()); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); // If the other PRIMARY falls down, this node should set its primaryIndex to itself. - nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0"); + nextAction = receiveDownHeartbeat(HostAndPort("host2"), "rs0", lastOpTimeApplied); ASSERT_TRUE(TopologyCoordinator::Role::leader == getTopoCoord().getRole()); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); @@ -4058,13 +4296,12 @@ TEST_F(HeartbeatResponseTestV1, OpTime stale = OpTime(); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(election, Date_t()); HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election, election); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); - nextAction = - receiveUpHeartbeat(HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, stale); + nextAction = receiveUpHeartbeat( + HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, stale, election); ASSERT_NO_ACTION(nextAction.getAction()); } @@ -4098,9 +4335,8 @@ TEST_F(HeartbeatResponseTestV1, makeSelfPrimary(election.getTimestamp()); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(election, Date_t()); HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, staleTime); + HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, staleTime, election); ASSERT_NO_ACTION(nextAction.getAction()); } @@ -4134,9 +4370,8 @@ TEST_F(HeartbeatResponseTestV1, makeSelfPrimary(election.getTimestamp()); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(election, Date_t()); HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election); + HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, election, election); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_EQUALS(-1, nextAction.getPrimaryConfigIndex()); } @@ -4171,16 +4406,20 @@ TEST_F(HeartbeatResponseTestV1, OpTime slightlyLessFreshLastOpTimeApplied = OpTime(Timestamp(3, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); nextAction = receiveUpHeartbeat(HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, - slightlyLessFreshLastOpTimeApplied); + slightlyLessFreshLastOpTimeApplied, + lastOpTimeApplied); ASSERT_EQUALS(HeartbeatResponseAction::NoAction, nextAction.getAction()); } @@ -4192,9 +4431,12 @@ TEST_F(HeartbeatResponseTestV1, NodeDoesNotStepDownSelfWhenRemoteNodeWasElectedL OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_EQUALS(0, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); } @@ -4206,14 +4448,21 @@ TEST_F(HeartbeatResponseTestV1, OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_PRIMARY, election2, election); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_PRIMARY, + election2, + election, + lastOpTimeApplied); // Second primary does not change primary index. ASSERT_EQUALS(1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); @@ -4227,14 +4476,21 @@ TEST_F(HeartbeatResponseTestV1, OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_PRIMARY, election2, election); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_PRIMARY, + election2, + election, + lastOpTimeApplied); // Second primary does not change primary index. ASSERT_EQUALS(1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); @@ -4246,9 +4502,12 @@ TEST_F(HeartbeatResponseTestV1, UpdatePrimaryIndexWhenAHeartbeatMakesNodeAwareOf OpTime lastOpTimeApplied = OpTime(Timestamp(3, 0), 0); ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, election); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_PRIMARY, + election, + election, + lastOpTimeApplied); ASSERT_EQUALS(1, getCurrentPrimaryIndex()); ASSERT_NO_ACTION(nextAction.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -4273,7 +4532,8 @@ TEST_F(HeartbeatResponseTestV1, NodeDoesNotRetryHeartbeatIfTheFirstFailureTakesT // no retry allowed. Milliseconds(4990), // Spent 4.99 of the 5 seconds in the network. target, - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long")); + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -4288,10 +4548,7 @@ TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhenFresherMemberDoesNo // from "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind // "host3", since "host3" does not build indexes OpTime election = OpTime(); - // Our last op time applied must be behind host2, or we'll hit the case where we change - // sync sources due to the sync source being behind, without a sync source, and not primary. - OpTime lastOpTimeApplied = OpTime(Timestamp(400, 0), 0); - OpTime syncSourceOpTime = OpTime(Timestamp(400, 1), 0); + OpTime lastOpTimeApplied = OpTime(Timestamp(4, 0), 0); // ahead by more than maxSyncSourceLagSecs (30) OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0); @@ -4313,22 +4570,33 @@ TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhenFresherMemberDoesNo << "protocolVersion" << 1), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, syncSourceOpTime); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_SECONDARY, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // set up complete, time for actual check ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), makeReplSetMetadata(), - makeOplogQueryMetadata(syncSourceOpTime), + makeOplogQueryMetadata(lastOpTimeApplied), now())); // set up complete, time for actual check - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(syncSourceOpTime), boost::none, now())); + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), + makeReplSetMetadata(lastOpTimeApplied), + boost::none, + now())); } TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhenFresherMemberIsNotReadable) { @@ -4336,31 +4604,39 @@ TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhenFresherMemberIsNotR // from "host2" and to "host3" despite "host2" being more than maxSyncSourceLagSecs(30) behind // "host3", since "host3" is in a non-readable mode (RS_ROLLBACK) OpTime election = OpTime(); - // Our last op time applied must be behind host2, or we'll hit the case where we change - // sync sources due to the sync source being behind, without a sync source, and not primary. - OpTime lastOpTimeApplied = OpTime(Timestamp(400, 0), 0); - OpTime syncSourceOpTime = OpTime(Timestamp(400, 1), 0); + OpTime lastOpTimeApplied = OpTime(Timestamp(4, 0), 0); // ahead by more than maxSyncSourceLagSecs (30) OpTime fresherLastOpTimeApplied = OpTime(Timestamp(3005, 0), 0); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); - HeartbeatResponseAction nextAction = receiveUpHeartbeat( - HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, syncSourceOpTime); + HeartbeatResponseAction nextAction = receiveUpHeartbeat(HostAndPort("host2"), + "rs0", + MemberState::RS_SECONDARY, + election, + lastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - nextAction = receiveUpHeartbeat( - HostAndPort("host3"), "rs0", MemberState::RS_ROLLBACK, election, fresherLastOpTimeApplied); + nextAction = receiveUpHeartbeat(HostAndPort("host3"), + "rs0", + MemberState::RS_ROLLBACK, + election, + fresherLastOpTimeApplied, + lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // set up complete, time for actual check ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), makeReplSetMetadata(), - makeOplogQueryMetadata(syncSourceOpTime), + makeOplogQueryMetadata(lastOpTimeApplied), now())); // set up complete, time for actual check - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), makeReplSetMetadata(syncSourceOpTime), boost::none, now())); + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), + OpTime(), + makeReplSetMetadata(lastOpTimeApplied), + boost::none, + now())); } class HeartbeatResponseTestOneRetryV1 : public HeartbeatResponseTestV1 { @@ -4374,7 +4650,11 @@ public: std::pair<ReplSetHeartbeatArgsV1, Milliseconds> uppingRequest = getTopoCoord().prepareHeartbeatRequestV1(_upRequestDate, "rs0", _target); HeartbeatResponseAction upAction = getTopoCoord().processHeartbeatResponse( - _upRequestDate, Milliseconds(0), _target, makeStatusWith<ReplSetHeartbeatResponse>()); + _upRequestDate, + Milliseconds(0), + _target, + makeStatusWith<ReplSetHeartbeatResponse>(), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, upAction.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -4393,8 +4673,8 @@ public: _firstRequestDate + Seconds(4), // 4 seconds elapsed, retry allowed. Milliseconds(3990), // Spent 3.99 of the 4 seconds in the network. _target, - StatusWith<ReplSetHeartbeatResponse>( - ErrorCodes::ExceededTimeLimit, "Took too long")); // We've never applied anything. + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -4411,8 +4691,13 @@ public: BSONObjBuilder statusBuilder; Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result"); getTopoCoord().prepareStatusResponse( - TopologyCoordinator::ReplSetStatusArgs{ - _firstRequestDate + Milliseconds(4000), 10, OpTime(), BSONObj()}, + TopologyCoordinator::ReplSetStatusArgs{_firstRequestDate + Milliseconds(4000), + 10, + OpTime(Timestamp(100, 0), 0), + OpTime(Timestamp(100, 0), 0), + OpTime(), + OpTime(), + BSONObj()}, &statusBuilder, &resultStatus); ASSERT_OK(resultStatus); @@ -4451,8 +4736,8 @@ TEST_F(HeartbeatResponseTestOneRetryV1, // no retry allowed. Milliseconds(1000), // Spent 1 of the 1.01 seconds in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, - "Took too long")); // We've never applied anything. + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -4472,7 +4757,8 @@ public: // could retry. Milliseconds(400), // Spent 0.4 of the 0.5 seconds in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?")); + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); // Because the first retry failed without timing out, we expect to retry immediately. @@ -4489,8 +4775,13 @@ public: BSONObjBuilder statusBuilder; Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result"); getTopoCoord().prepareStatusResponse( - TopologyCoordinator::ReplSetStatusArgs{ - firstRequestDate() + Seconds(4), 10, OpTime(), BSONObj()}, + TopologyCoordinator::ReplSetStatusArgs{firstRequestDate() + Seconds(4), + 10, + OpTime(Timestamp(100, 0), 0), + OpTime(Timestamp(100, 0), 0), + OpTime(), + OpTime(), + BSONObj()}, &statusBuilder, &resultStatus); ASSERT_OK(resultStatus); @@ -4519,7 +4810,8 @@ TEST_F(HeartbeatResponseTestTwoRetriesV1, NodeDoesNotRetryHeartbeatsAfterFailing // could still retry. Milliseconds(100), // Spent 0.1 of the 0.3 seconds in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?")); + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); // Because this is the second retry, rather than retry again, we expect to wait for half @@ -4532,8 +4824,13 @@ TEST_F(HeartbeatResponseTestTwoRetriesV1, NodeDoesNotRetryHeartbeatsAfterFailing BSONObjBuilder statusBuilder; Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result"); getTopoCoord().prepareStatusResponse( - TopologyCoordinator::ReplSetStatusArgs{ - firstRequestDate() + Milliseconds(4900), 10, OpTime(), BSONObj()}, + TopologyCoordinator::ReplSetStatusArgs{firstRequestDate() + Milliseconds(4900), + 10, + OpTime(Timestamp(100, 0), 0), + OpTime(Timestamp(100, 0), 0), + OpTime(), + OpTime(), + BSONObj()}, &statusBuilder, &resultStatus); ASSERT_OK(resultStatus); @@ -4556,11 +4853,12 @@ TEST_F(HeartbeatResponseTestTwoRetriesV1, HeartbeatThreeNonconsecutiveFailures) response.setConfigVersion(5); // successful response (third response due to the two failures in setUp()) - HeartbeatResponseAction action = - getTopoCoord().processHeartbeatResponse(firstRequestDate() + Milliseconds(4500), - Milliseconds(400), - target(), - StatusWith<ReplSetHeartbeatResponse>(response)); + HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( + firstRequestDate() + Milliseconds(4500), + Milliseconds(400), + target(), + StatusWith<ReplSetHeartbeatResponse>(response), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -4577,7 +4875,8 @@ TEST_F(HeartbeatResponseTestTwoRetriesV1, HeartbeatThreeNonconsecutiveFailures) firstRequestDate() + Milliseconds(7100), Milliseconds(400), target(), - StatusWith<ReplSetHeartbeatResponse>(Status{ErrorCodes::HostUnreachable, ""})); + StatusWith<ReplSetHeartbeatResponse>(Status{ErrorCodes::HostUnreachable, ""}), + OpTime(Timestamp(0, 0), 0)); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::follower == getTopoCoord().getRole()); @@ -4586,8 +4885,13 @@ TEST_F(HeartbeatResponseTestTwoRetriesV1, HeartbeatThreeNonconsecutiveFailures) BSONObjBuilder statusBuilder; Status resultStatus(ErrorCodes::InternalError, "prepareStatusResponse didn't set result"); getTopoCoord().prepareStatusResponse( - TopologyCoordinator::ReplSetStatusArgs{ - firstRequestDate() + Milliseconds(7000), 600, OpTime(), BSONObj()}, + TopologyCoordinator::ReplSetStatusArgs{firstRequestDate() + Milliseconds(7000), + 600, + OpTime(Timestamp(100, 0), 0), + OpTime(Timestamp(100, 0), 0), + OpTime(), + OpTime(), + BSONObj()}, &statusBuilder, &resultStatus); ASSERT_OK(resultStatus); @@ -4628,12 +4932,12 @@ TEST_F(HeartbeatResponseHighVerbosityTestV1, UpdateHeartbeatDataOldConfig) { believesWeAreDownResponse.setElectable(true); believesWeAreDownResponse.noteStateDisagreement(); startCapturingLogMessages(); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( now()++, // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. HostAndPort("host2"), - StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse)); + StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse), + lastOpTimeApplied); stopCapturingLogMessages(); ASSERT_NO_ACTION(action.getAction()); ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down")); @@ -4675,12 +4979,12 @@ TEST_F(HeartbeatResponseHighVerbosityTestV1, UpdateHeartbeatDataSameConfig) { sameConfigResponse.setConfigVersion(2); sameConfigResponse.setConfig(originalConfig); startCapturingLogMessages(); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( now()++, // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. HostAndPort("host2"), - StatusWith<ReplSetHeartbeatResponse>(sameConfigResponse)); + StatusWith<ReplSetHeartbeatResponse>(sameConfigResponse), + lastOpTimeApplied); stopCapturingLogMessages(); ASSERT_NO_ACTION(action.getAction()); ASSERT_EQUALS(1, @@ -4703,12 +5007,12 @@ TEST_F(HeartbeatResponseHighVerbosityTestV1, memberMissingResponse.setElectable(true); memberMissingResponse.noteStateDisagreement(); startCapturingLogMessages(); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( now()++, // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. HostAndPort("host5"), - StatusWith<ReplSetHeartbeatResponse>(memberMissingResponse)); + StatusWith<ReplSetHeartbeatResponse>(memberMissingResponse), + lastOpTimeApplied); stopCapturingLogMessages(); ASSERT_NO_ACTION(action.getAction()); ASSERT_EQUALS(1, countLogLinesContaining("Could not find host5:27017 in current config")); @@ -4729,12 +5033,12 @@ TEST_F(HeartbeatResponseHighVerbosityTestV1, believesWeAreDownResponse.setElectable(true); believesWeAreDownResponse.noteStateDisagreement(); startCapturingLogMessages(); - getTopoCoord().getMyMemberHeartbeatData()->setLastAppliedOpTime(lastOpTimeApplied, Date_t()); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( now()++, // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. HostAndPort("host2"), - StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse)); + StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse), + lastOpTimeApplied); stopCapturingLogMessages(); ASSERT_NO_ACTION(action.getAction()); ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down")); |