diff options
author | Tess Avitabile <tess.avitabile@mongodb.com> | 2019-03-19 12:57:06 -0400 |
---|---|---|
committer | Tess Avitabile <tess.avitabile@mongodb.com> | 2019-03-22 16:29:30 -0400 |
commit | 3038797f87b9e355ff5151777b8474e57adb419c (patch) | |
tree | c9cc61edd9638beab9891687e994757c7e8bf093 | |
parent | 91c069aaf7057d31a751840c1fe0da2928487afb (diff) | |
download | mongo-3038797f87b9e355ff5151777b8474e57adb419c.tar.gz |
Revert "SERVER-33248 Allow choosing a sync source that we are up to date with if it has a higher lastOpCommitted"
This reverts commit 5df9e94b0c4840680d1d17fcf2f04412cf6d70cf.
17 files changed, 150 insertions, 319 deletions
diff --git a/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough.yml index 6ada79e5ca0..8eaa08c3f36 100644 --- a/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough.yml @@ -136,6 +136,10 @@ selector: - jstests/core/explain_upsert.js # The `dbstats` command builds in-memory structures that are not causally consistent. - jstests/core/dbstats.js + # TODO SERVER-33248: Remove this test from the blacklist. The secondary can get stuck without a + # sync source if the RestartCatalog command kills its oplog fetching cursor, which blocks + # secondary majority reads. + - jstests/core/restart_catalog.js exclude_with_any_tags: - assumes_against_mongod_not_mongos ## diff --git a/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough_auth.yml b/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough_auth.yml index 1a94d618226..bbe744b2286 100644 --- a/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough_auth.yml +++ b/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough_auth.yml @@ -154,6 +154,10 @@ selector: - jstests/core/explain_upsert.js # The `dbstats` command builds in-memory structures that are not causally consistent. - jstests/core/dbstats.js + # TODO SERVER-33248: Remove this test from the blacklist. The secondary can get stuck without a + # sync source if the RestartCatalog command kills its oplog fetching cursor, which blocks + # secondary majority reads. + - jstests/core/restart_catalog.js # These include operations the root user auth'd on the test database is not authorized to perform, # e.g. dropping or creating system collections. diff --git a/buildscripts/resmokeconfig/suites/sharded_causally_consistent_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/sharded_causally_consistent_jscore_passthrough.yml index b3ae37b1873..aa24ddbc2ef 100644 --- a/buildscripts/resmokeconfig/suites/sharded_causally_consistent_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/sharded_causally_consistent_jscore_passthrough.yml @@ -176,6 +176,10 @@ selector: - jstests/core/orf.js #explain.executionStats is not CC # The `dbstats` command builds in-memory structures that are not causally consistent. - jstests/core/dbstats.js + # TODO SERVER-33248: Remove this test from the blacklist. The secondary can get stuck without a + # sync source if the RestartCatalog command kills its oplog fetching cursor, which blocks + # secondary majority reads. + - jstests/core/restart_catalog.js exclude_with_any_tags: # Tests tagged with the following will fail because they assume collections are not sharded. - assumes_no_implicit_collection_creation_after_drop diff --git a/buildscripts/resmokelib/testing/fixtures/replicaset.py b/buildscripts/resmokelib/testing/fixtures/replicaset.py index 5f5e1568e4a..eaaf7463594 100644 --- a/buildscripts/resmokelib/testing/fixtures/replicaset.py +++ b/buildscripts/resmokelib/testing/fixtures/replicaset.py @@ -273,13 +273,21 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst primary_client = self.nodes[0].mongo_client() self.auth(primary_client, self.auth_options) - # All nodes must be in primary/secondary state prior to this point. Perform a majority - # write to ensure there is a committed operation on the set. The commit point will - # propagate to all members and trigger a stable checkpoint on all persisted storage engines - # nodes. + # Algorithm precondition: All nodes must be in primary/secondary state. + # + # 1) Perform a majority write. This will guarantee the primary updates its commit point + # to the value of this write. + # + # 2) Perform a second write. This will guarantee that all nodes will update their commit + # point to a time that is >= the previous write. That will trigger a stable checkpoint + # on all nodes. + # TODO(SERVER-33248): Remove this block. We should not need to prod the replica set to + # advance the commit point if the commit point being lagged is sufficient to choose a + # sync source. admin = primary_client.get_database( "admin", write_concern=pymongo.write_concern.WriteConcern(w="majority")) - admin.command("appendOplogNote", data={"await_stable_recovery_timestamp": 1}) + admin.command("appendOplogNote", data={"await_stable_checkpoint": 1}) + admin.command("appendOplogNote", data={"await_stable_checkpoint": 2}) for node in self.nodes: self.logger.info("Waiting for node on port %d to have a stable checkpoint.", node.port) diff --git a/jstests/libs/override_methods/check_uuids_consistent_across_cluster.js b/jstests/libs/override_methods/check_uuids_consistent_across_cluster.js index 6412b894ebd..0a05e19b25c 100644 --- a/jstests/libs/override_methods/check_uuids_consistent_across_cluster.js +++ b/jstests/libs/override_methods/check_uuids_consistent_across_cluster.js @@ -47,6 +47,11 @@ ShardingTest.prototype.checkUUIDsConsistentAcrossCluster = function() { continue; } var rs = this._rs[i].test; + // The noop writer needs to be enabled in case a sync source isn't set, so that + // awaitLastOpCommitted() is guaranteed to finish. + // SERVER-33248 for reference. + rs.getPrimary().adminCommand({setParameter: 1, periodicNoopIntervalSecs: 1}); + rs.getPrimary().adminCommand({setParameter: 1, writePeriodicNoops: true}); var keyFile = this._otherParams.keyFile; if (keyFile) { authutil.asCluster(rs.nodes, keyFile, function() { diff --git a/jstests/noPassthrough/readConcern_atClusterTime_snapshot_selection.js b/jstests/noPassthrough/readConcern_atClusterTime_snapshot_selection.js index a9a5bc022b7..7bcb38f93a0 100644 --- a/jstests/noPassthrough/readConcern_atClusterTime_snapshot_selection.js +++ b/jstests/noPassthrough/readConcern_atClusterTime_snapshot_selection.js @@ -29,10 +29,20 @@ // Create the collection and insert one document. Get the op time of the write. let res = assert.commandWorked(primaryDB.runCommand( {insert: collName, documents: [{_id: "before"}], writeConcern: {w: "majority"}})); - const clusterTimePrimaryBefore = res.opTime.ts; + let clusterTimePrimaryBefore; // Wait for the majority commit point on 'secondaryDB0' to include the {_id: "before"} write. assert.soonNoExcept(function() { + // Without a consistent stream of writes, secondary majority reads are not guaranteed + // to complete, since the commit point being stale is not sufficient to establish a sync + // source. + // TODO (SERVER-33248): Remove this write and increase the maxTimeMS on the read. + res = assert.commandWorked(primaryDB.runCommand( + {insert: "otherColl", documents: [{a: 1}], writeConcern: {w: "majority"}})); + assert(res.hasOwnProperty("opTime"), tojson(res)); + assert(res.opTime.hasOwnProperty("ts"), tojson(res)); + clusterTimePrimaryBefore = res.opTime.ts; + return assert .commandWorked(secondaryDB0.runCommand( {find: collName, readConcern: {level: "majority"}, maxTimeMS: 10000})) diff --git a/jstests/replsets/dbhash_at_cluster_time.js b/jstests/replsets/dbhash_at_cluster_time.js index 8c52ab741db..27f4e0f79db 100644 --- a/jstests/replsets/dbhash_at_cluster_time.js +++ b/jstests/replsets/dbhash_at_cluster_time.js @@ -19,6 +19,17 @@ const db = session.getDatabase("test"); let txnNumber = 0; + // We force 'secondary' to sync from 'primary' using the "forceSyncSourceCandidate" failpoint to + // ensure that an intermittent connectivity issue doesn't lead to the secondary not advancing + // its belief of the majority commit point. This avoids any complications that would arise due + // to SERVER-33248. + assert.commandWorked(secondary.adminCommand({ + configureFailPoint: "forceSyncSourceCandidate", + mode: "alwaysOn", + data: {hostAndPort: primary.host} + })); + rst.awaitSyncSource(secondary, primary); + // We also prevent all nodes in the replica set from advancing oldest_timestamp. This ensures // that the snapshot associated with 'clusterTime' is retained for the duration of this test. rst.nodes.forEach(conn => { diff --git a/jstests/replsets/libs/secondary_reads_test.js b/jstests/replsets/libs/secondary_reads_test.js index da22f9b73b5..2ca1117b6ed 100644 --- a/jstests/replsets/libs/secondary_reads_test.js +++ b/jstests/replsets/libs/secondary_reads_test.js @@ -24,7 +24,9 @@ function SecondaryReadsTest(name = "secondary_reads_test") { * two-node replica set running with the latest version. */ function performStandardSetup() { - let replSet = new ReplSetTest({name, nodes: 2}); + // TODO: Periodic noop writes can be removed once SERVER-33248 is complete. + let replSet = new ReplSetTest( + {name, nodes: 2, nodeOptions: {setParameter: {writePeriodicNoops: true}}}); replSet.startSet(); const nodes = replSet.nodeList(); diff --git a/src/mongo/db/repl/member_data.cpp b/src/mongo/db/repl/member_data.cpp index 83890cc2a51..50e2725ca81 100644 --- a/src/mongo/db/repl/member_data.cpp +++ b/src/mongo/db/repl/member_data.cpp @@ -47,9 +47,7 @@ MemberData::MemberData() : _health(-1), _authIssue(false), _configIndex(-1), _is _lastResponse.setAppliedOpTime(OpTime()); } -bool MemberData::setUpValues(Date_t now, - ReplSetHeartbeatResponse&& hbResponse, - OpTime lastOpCommitted) { +bool MemberData::setUpValues(Date_t now, ReplSetHeartbeatResponse&& hbResponse) { _health = 1; if (_upSince == Date_t()) { _upSince = now; @@ -60,10 +58,6 @@ bool MemberData::setUpValues(Date_t now, _lastUpdateStale = false; _updatedSinceRestart = true; - if (!lastOpCommitted.isNull()) { - _lastOpCommitted = lastOpCommitted; - } - if (!hbResponse.hasState()) { hbResponse.setState(MemberState::RS_UNKNOWN); } diff --git a/src/mongo/db/repl/member_data.h b/src/mongo/db/repl/member_data.h index ab4352704bb..5e1af9ac22e 100644 --- a/src/mongo/db/repl/member_data.h +++ b/src/mongo/db/repl/member_data.h @@ -76,9 +76,6 @@ public: OpTime getHeartbeatDurableOpTime() const { return _lastResponse.hasDurableOpTime() ? _lastResponse.getDurableOpTime() : OpTime(); } - OpTime getHeartbeatLastOpCommitted() const { - return _lastOpCommitted; - } int getConfigVersion() const { return _lastResponse.getConfigVersion(); } @@ -146,10 +143,9 @@ public: /** * Sets values in this object from the results of a successful heartbeat command. - * 'lastOpCommitted' should be extracted from the heartbeat metadata. * Returns whether or not the optimes advanced as a result of this heartbeat response. */ - bool setUpValues(Date_t now, ReplSetHeartbeatResponse&& hbResponse, OpTime lastOpCommitted); + bool setUpValues(Date_t now, ReplSetHeartbeatResponse&& hbResponse); /** * Sets values in this object from the results of a erroring/failed heartbeat command. @@ -265,11 +261,6 @@ private: // Last known OpTime that the replica has applied, whether journaled or unjournaled. OpTime _lastAppliedOpTime; - // OpTime of the most recently committed op of which the node was aware, extracted from the - // heartbeat metadata. Note that only arbiters should update their knowledge of the commit point - // from heartbeat data. - OpTime _lastOpCommitted; - // TODO(russotto): Since memberData is kept in config order, _configIndex // and _isSelf may not be necessary. // Index of this member in the replica set configuration. diff --git a/src/mongo/db/repl/oplog_fetcher.cpp b/src/mongo/db/repl/oplog_fetcher.cpp index 73808edf3b8..ce499fd0c89 100644 --- a/src/mongo/db/repl/oplog_fetcher.cpp +++ b/src/mongo/db/repl/oplog_fetcher.cpp @@ -119,11 +119,8 @@ BSONObj makeMetadataObject(bool isV1ElectionProtocol) { * Checks the first batch of results from query. * 'documents' are the first batch of results returned from tailing the remote oplog. * 'lastFetched' optime and hash should be consistent with the predicate in the query. - * 'lastOpCommitted' is the OpTime of the most recently committed op of which this node is aware. * 'remoteLastOpApplied' is the last OpTime applied on the sync source. This is optional for * compatibility with 3.4 servers that do not send OplogQueryMetadata. - * 'remoteLastOpCommitted' is the OpTime of the most recently committed op of which the sync source - * is aware. * 'requiredRBID' is a RollbackID received when we chose the sync source that we use here to * guarantee we have not rolled back since we confirmed the sync source had our minValid. * 'remoteRBID' is a RollbackId for the sync source returned in this oplog query. This is optional @@ -132,17 +129,14 @@ BSONObj makeMetadataObject(bool isV1ElectionProtocol) { * oplog to be ahead of ours. If false, the sync source's oplog is allowed to be at the same point * as ours, but still cannot be behind ours. * - * TODO (SERVER-27668): Make remoteLastOpApplied, remoteLastOpCommitted, and remoteRBID - * non-optional. + * TODO (SERVER-27668): Make remoteLastOpApplied and remoteRBID non-optional in mongodb 3.8. * * Returns OplogStartMissing if we cannot find the optime of the last fetched operation in * the remote oplog. */ Status checkRemoteOplogStart(const Fetcher::Documents& documents, OpTimeWithHash lastFetched, - OpTime lastOpCommitted, boost::optional<OpTime> remoteLastOpApplied, - boost::optional<OpTime> remoteLastOpCommitted, int requiredRBID, boost::optional<int> remoteRBID, bool requireFresherSyncSource) { @@ -166,49 +160,34 @@ Status checkRemoteOplogStart(const Fetcher::Documents& documents, } } - // The sync source could be behind us if it rolled back after we selected it. We could have - // failed to detect the rollback if it occurred between sync source selection (when we check the - // candidate is ahead of us) and sync source resolution (when we got 'requiredRBID'). If the - // sync source is now behind us, choose a new sync source to prevent going into rollback. - if (remoteLastOpApplied && (*remoteLastOpApplied < lastFetched.opTime)) { + // The SyncSourceResolver never checks that the sync source candidate is actually ahead of + // us. Rather than have it check there with an extra network roundtrip, we check here. + if (requireFresherSyncSource && remoteLastOpApplied && + (*remoteLastOpApplied <= lastFetched.opTime)) { return Status(ErrorCodes::InvalidSyncSource, str::stream() << "Sync source's last applied OpTime " << remoteLastOpApplied->toString() - << " is older than our last fetched OpTime " + << " is not greater than our last fetched OpTime " << lastFetched.opTime.toString() << ". Choosing new sync source."); - } - - // If 'requireFresherSyncSource' is true, we must check that the sync source's - // lastApplied/lastOpCommitted is ahead of us to prevent forming a cycle. Although we check for - // this condition in sync source selection, if an undetected rollback occurred between sync - // source selection and sync source resolution, this condition may no longer hold. - // 'requireFresherSyncSource' is false for initial sync, since no other node can sync off an - // initial syncing node, so we do not need to check for cycles. In addition, it would be - // problematic to check this condition for initial sync, since the 'lastFetched' OpTime will - // almost always equal the 'remoteLastApplied', since we fetch the sync source's last applied - // OpTime to determine where to start our OplogFetcher. - if (requireFresherSyncSource && remoteLastOpApplied && remoteLastOpCommitted && - std::tie(*remoteLastOpApplied, *remoteLastOpCommitted) <= - std::tie(lastFetched.opTime, lastOpCommitted)) { + } else if (remoteLastOpApplied && (*remoteLastOpApplied < lastFetched.opTime)) { + // In initial sync, the lastFetched OpTime will almost always equal the remoteLastOpApplied + // since we fetch the sync source's last applied OpTime to determine where to start our + // OplogFetcher. This is fine since no other node can sync off of an initial syncing node + // and thus cannot form a sync source cycle. To account for this, we must relax the + // constraint on our sync source being fresher. return Status(ErrorCodes::InvalidSyncSource, - str::stream() - << "Sync source cannot be behind me, and if I am up-to-date with the " - "sync source, it must have a higher lastOpCommitted. " - << "My last fetched oplog optime: " - << lastFetched.opTime.toString() - << ", latest oplog optime of sync source: " - << remoteLastOpApplied->toString() - << ", my lastOpCommitted: " - << lastOpCommitted.toString() - << ", lastOpCommitted of sync source: " - << remoteLastOpCommitted->toString()); + str::stream() << "Sync source's last applied OpTime " + << remoteLastOpApplied->toString() + << " is older than our last fetched OpTime " + << lastFetched.opTime.toString() + << ". Choosing new sync source."); } - // At this point we know that our sync source has our minValid and is not behind us, so if our + // At this point we know that our sync source has our minValid and is ahead of us, so if our // history diverges from our sync source's we should prefer its history and roll back ours. - // Since we checked for rollback and our sync source is not behind us, an empty batch means that + // Since we checked for rollback and our sync source is ahead of us, an empty batch means that // we have a higher timestamp on our last fetched OpTime than our sync source's last applied // OpTime, but a lower term. When this occurs, we must roll back our inconsistent oplog entry. if (documents.empty()) { @@ -450,17 +429,12 @@ StatusWith<BSONObj> OplogFetcher::_onSuccessfulBatch(const Fetcher::QueryRespons auto remoteRBID = oqMetadata ? boost::make_optional(oqMetadata->getRBID()) : boost::none; auto remoteLastApplied = oqMetadata ? boost::make_optional(oqMetadata->getLastOpApplied()) : boost::none; - auto remoteLastOpCommitted = - oqMetadata ? boost::make_optional(oqMetadata->getLastOpCommitted()) : boost::none; - auto status = checkRemoteOplogStart( - documents, - lastFetched, - _dataReplicatorExternalState->getCurrentTermAndLastCommittedOpTime().opTime, - remoteLastApplied, - remoteLastOpCommitted, - _requiredRBID, - remoteRBID, - _requireFresherSyncSource); + auto status = checkRemoteOplogStart(documents, + lastFetched, + remoteLastApplied, + _requiredRBID, + remoteRBID, + _requireFresherSyncSource); if (!status.isOK()) { // Stop oplog fetcher and execute rollback if necessary. return status; diff --git a/src/mongo/db/repl/oplog_fetcher_test.cpp b/src/mongo/db/repl/oplog_fetcher_test.cpp index b4d9a5ccbae..ce93a2b0e4a 100644 --- a/src/mongo/db/repl/oplog_fetcher_test.cpp +++ b/src/mongo/db/repl/oplog_fetcher_test.cpp @@ -361,22 +361,6 @@ TEST_F(OplogFetcherTest, MetadataAndBatchAreNotProcessedWhenSyncSourceIsNotAhead } TEST_F(OplogFetcherTest, - MetadataAndBatchAreProcessedWhenSyncSourceIsNotAheadButHasHigherLastOpCommitted) { - rpc::ReplSetMetadata replMetadata(1, OpTime(), OpTime(), 1, OID::gen(), -1, -1); - rpc::OplogQueryMetadata oqMetadata(remoteNewerOpTime, lastFetched.opTime, rbid, 2, 2); - BSONObjBuilder bob; - ASSERT_OK(replMetadata.writeToMetadata(&bob)); - ASSERT_OK(oqMetadata.writeToMetadata(&bob)); - auto metadataObj = bob.obj(); - - auto entry = makeNoopOplogEntry(lastFetched); - auto shutdownState = - processSingleBatch({makeCursorResponse(0, {entry}), metadataObj, Milliseconds(0)}, false); - ASSERT_OK(shutdownState->getStatus()); - ASSERT(dataReplicatorExternalState->metadataWasProcessed); -} - -TEST_F(OplogFetcherTest, MetadataAndBatchAreNotProcessedWhenSyncSourceIsBehindWithoutRequiringFresherSyncSource) { rpc::ReplSetMetadata replMetadata(1, OpTime(), OpTime(), 1, OID::gen(), -1, -1); rpc::OplogQueryMetadata oqMetadata(staleOpTime, staleOpTime, rbid, 2, 2); diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index 5e63283a129..01af85ca544 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -154,7 +154,6 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse( } ReplSetHeartbeatResponse hbResponse; - OpTime lastOpCommitted; BSONObj resp; if (responseStatus.isOK()) { resp = cbData.response.data; @@ -183,11 +182,9 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse( replMetadata = responseStatus; } if (replMetadata.isOK()) { - lastOpCommitted = replMetadata.getValue().getLastOpCommitted(); - // Arbiters are the only nodes allowed to advance their commit point via heartbeats. if (_getMemberState_inlock().arbiter()) { - _advanceCommitPoint_inlock(lastOpCommitted); + _advanceCommitPoint_inlock(replMetadata.getValue().getLastOpCommitted()); } // Asynchronous stepdown could happen, but it will wait for _mutex and execute // after this function, so we cannot and don't need to wait for it to finish. @@ -216,8 +213,8 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse( hbStatusResponse = StatusWith<ReplSetHeartbeatResponse>(responseStatus); } - HeartbeatResponseAction action = _topCoord->processHeartbeatResponse( - now, networkTime, target, hbStatusResponse, lastOpCommitted); + HeartbeatResponseAction action = + _topCoord->processHeartbeatResponse(now, networkTime, target, hbStatusResponse); if (action.getAction() == HeartbeatResponseAction::NoAction && hbStatusResponse.isOK() && hbStatusResponse.getValue().hasState() && diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp index 9fbf9dcfa51..1d1d242ed14 100644 --- a/src/mongo/db/repl/topology_coordinator.cpp +++ b/src/mongo/db/repl/topology_coordinator.cpp @@ -387,21 +387,13 @@ HostAndPort TopologyCoordinator::chooseNewSyncSource(Date_t now, continue; } } - // Do not select a candidate that is behind me. If I am up to date with the candidate, - // only select them if they have a higher lastOpCommitted. - if (std::tuple<OpTime, OpTime>(it->getHeartbeatAppliedOpTime(), - it->getHeartbeatLastOpCommitted()) <= - std::tie(lastOpTimeFetched, _lastCommittedOpTime)) { - LOG(1) << "Cannot select this sync source. Sync source cannot be behind me, and if " - "I am up-to-date with the sync source, it must have a higher " - "lastOpCommitted. " - << "Sync candidate: " << itMemberConfig.getHostAndPort() - << ", my last fetched oplog optime: " << lastOpTimeFetched.toBSON() - << ", latest oplog optime of sync candidate: " - << it->getHeartbeatAppliedOpTime().toBSON() - << ", my lastOpCommitted: " << _lastCommittedOpTime - << ", lastOpCommitted of sync candidate: " - << it->getHeartbeatLastOpCommitted(); + // only consider candidates that are ahead of where we are + if (it->getHeartbeatAppliedOpTime() <= lastOpTimeFetched) { + LOG(1) << "Cannot select sync source equal to or behind our last fetched optime. " + << "My last fetched oplog optime: " << lastOpTimeFetched.toBSON() + << ", latest oplog optime of sync candidate " + << itMemberConfig.getHostAndPort() << ": " + << it->getHeartbeatAppliedOpTime().toBSON(); continue; } // Candidate cannot be more latent than anything we've already considered. @@ -1035,8 +1027,7 @@ HeartbeatResponseAction TopologyCoordinator::processHeartbeatResponse( Date_t now, Milliseconds networkRoundTripTime, const HostAndPort& target, - const StatusWith<ReplSetHeartbeatResponse>& hbResponse, - OpTime lastOpCommitted) { + const StatusWith<ReplSetHeartbeatResponse>& hbResponse) { const MemberState originalState = getMemberState(); PingStats& hbStats = _pings[target]; invariant(hbStats.getLastHeartbeatStartDate() != Date_t()); @@ -1157,7 +1148,7 @@ HeartbeatResponseAction TopologyCoordinator::processHeartbeatResponse( ReplSetHeartbeatResponse hbr = std::move(hbResponse.getValue()); LOG(3) << "setUpValues: heartbeat response good for member _id:" << member.getId() << ", msg: " << hbr.getHbMsg(); - advancedOpTime = hbData.setUpValues(now, std::move(hbr), lastOpCommitted); + advancedOpTime = hbData.setUpValues(now, std::move(hbr)); } HeartbeatResponseAction nextAction; @@ -1995,8 +1986,7 @@ void TopologyCoordinator::setCurrentPrimary_forTest(int primaryIndex, hbResponse.setHbMsg(""); _memberData.at(primaryIndex) .setUpValues(_memberData.at(primaryIndex).getLastHeartbeat(), - std::move(hbResponse), - _memberData.at(primaryIndex).getHeartbeatLastOpCommitted()); + std::move(hbResponse)); } _currentPrimaryIndex = primaryIndex; } @@ -2263,10 +2253,6 @@ void TopologyCoordinator::fillMemberData(BSONObjBuilder* result) { entry.append("optime", lastDurableOpTime.getTimestamp()); } entry.append("host", memberData.getHostAndPort().toString()); - - const auto lastOpCommitted = memberData.getHeartbeatLastOpCommitted(); - entry.append("heartbeatLastOpCommitted", lastOpCommitted.toBSON()); - if (_selfIndex >= 0) { const int memberId = memberData.getMemberId(); invariant(memberId >= 0); @@ -3234,25 +3220,18 @@ bool TopologyCoordinator::shouldChangeSyncSource( // If OplogQueryMetadata was provided, use its values, otherwise use the ones in // ReplSetMetadata. OpTime currentSourceOpTime; - OpTime currentSourceLastOpCommitted; int syncSourceIndex = -1; int primaryIndex = -1; if (oqMetadata) { currentSourceOpTime = std::max(oqMetadata->getLastOpApplied(), _memberData.at(currentSourceIndex).getHeartbeatAppliedOpTime()); - currentSourceLastOpCommitted = - std::max(oqMetadata->getLastOpCommitted(), - _memberData.at(currentSourceIndex).getHeartbeatLastOpCommitted()); syncSourceIndex = oqMetadata->getSyncSourceIndex(); primaryIndex = oqMetadata->getPrimaryIndex(); } else { currentSourceOpTime = std::max(replMetadata.getLastOpVisible(), _memberData.at(currentSourceIndex).getHeartbeatAppliedOpTime()); - currentSourceLastOpCommitted = - std::max(replMetadata.getLastOpCommitted(), - _memberData.at(currentSourceIndex).getHeartbeatLastOpCommitted()); syncSourceIndex = replMetadata.getSyncSourceIndex(); primaryIndex = replMetadata.getPrimaryIndex(); } @@ -3267,20 +3246,12 @@ bool TopologyCoordinator::shouldChangeSyncSource( // unless they are primary. const OpTime myLastOpTime = getMyLastAppliedOpTime(); if (_rsConfig.getProtocolVersion() == 1 && syncSourceIndex == -1 && - std::tie(currentSourceOpTime, currentSourceLastOpCommitted) <= - std::tie(myLastOpTime, _lastCommittedOpTime) && - primaryIndex != currentSourceIndex) { + currentSourceOpTime <= myLastOpTime && primaryIndex != currentSourceIndex) { std::stringstream logMessage; - - logMessage << "Choosing new sync source. Our current sync source is not primary and does " - "not have a sync source, so we require that it is not behind us, and that if " - "we are up-to-date with it, it has a higher lastOpCommitted. " - << "Current sync source: " << currentSource.toString() - << ", my last fetched oplog optime: " << myLastOpTime - << ", latest oplog optime of sync source: " << currentSourceOpTime - << ", my lastOpCommitted: " << _lastCommittedOpTime - << ", lastOpCommitted of sync source: " << currentSourceLastOpCommitted; - + logMessage << "Choosing new sync source because our current sync source, " + << currentSource.toString() << ", has an OpTime (" << currentSourceOpTime + << ") which is not ahead of ours (" << myLastOpTime + << "), it does not have a sync source, and it's not the primary"; if (primaryIndex >= 0) { logMessage << " (" << _rsConfig.getMemberAt(primaryIndex).getHostAndPort() << " is)"; } else { diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h index 55cc38292f6..feb1c351c43 100644 --- a/src/mongo/db/repl/topology_coordinator.h +++ b/src/mongo/db/repl/topology_coordinator.h @@ -392,8 +392,8 @@ public: Date_t now, const std::string& ourSetName, const HostAndPort& target); /** - * Processes a heartbeat response from "target" that arrived around "now" with "lastOpCommitted" - * in the metadata, having spent "networkRoundTripTime" millis on the network. + * Processes a heartbeat response from "target" that arrived around "now", having + * spent "networkRoundTripTime" millis on the network. * * Updates internal topology coordinator state, and returns instructions about what action * to take next. @@ -422,8 +422,7 @@ public: Date_t now, Milliseconds networkRoundTripTime, const HostAndPort& target, - const StatusWith<ReplSetHeartbeatResponse>& hbResponse, - OpTime lastOpCommitted); + const StatusWith<ReplSetHeartbeatResponse>& hbResponse); /** * Returns whether or not at least 'numNodes' have reached the given opTime. diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp index e60d324dd5d..a45f54805da 100644 --- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp @@ -172,10 +172,9 @@ protected: // Only set visibleOpTime, primaryIndex and syncSourceIndex ReplSetMetadata makeReplSetMetadata(OpTime visibleOpTime = OpTime(), int primaryIndex = -1, - int syncSourceIndex = -1, - OpTime committedOpTime = OpTime()) { + int syncSourceIndex = -1) { return ReplSetMetadata(_topo->getTerm(), - committedOpTime, + OpTime(), visibleOpTime, _currentConfig.getConfigVersion(), OID(), @@ -187,10 +186,8 @@ protected: // Only set lastAppliedOpTime, primaryIndex and syncSourceIndex OplogQueryMetadata makeOplogQueryMetadata(OpTime lastAppliedOpTime = OpTime(), int primaryIndex = -1, - int syncSourceIndex = -1, - OpTime committedOpTime = OpTime()) { - return OplogQueryMetadata( - committedOpTime, lastAppliedOpTime, -1, primaryIndex, syncSourceIndex); + int syncSourceIndex = -1) { + return OplogQueryMetadata(OpTime(), lastAppliedOpTime, -1, primaryIndex, syncSourceIndex); } HeartbeatResponseAction receiveUpHeartbeat(const HostAndPort& member, @@ -198,15 +195,13 @@ protected: MemberState memberState, const OpTime& electionTime, const OpTime& lastOpTimeSender, - const HostAndPort& syncingTo = HostAndPort(), - const OpTime& lastOpCommittedSender = OpTime()) { + const HostAndPort& syncingTo = HostAndPort()) { return _receiveHeartbeatHelper(Status::OK(), member, setName, memberState, electionTime.getTimestamp(), lastOpTimeSender, - lastOpCommittedSender, Milliseconds(1), syncingTo); } @@ -224,7 +219,6 @@ protected: MemberState::RS_UNKNOWN, Timestamp(), OpTime(), - OpTime(), roundTripTime, HostAndPort()); } @@ -233,15 +227,13 @@ protected: const std::string& setName, MemberState memberState, const OpTime& lastOpTimeSender, - Milliseconds roundTripTime = Milliseconds(1), - const OpTime& lastOpCommittedSender = OpTime()) { + Milliseconds roundTripTime = Milliseconds(1)) { return _receiveHeartbeatHelper(Status::OK(), member, setName, memberState, Timestamp(), lastOpTimeSender, - lastOpCommittedSender, roundTripTime, HostAndPort()); } @@ -253,7 +245,6 @@ private: MemberState memberState, Timestamp electionTime, const OpTime& lastOpTimeSender, - const OpTime& lastOpCommittedSender, Milliseconds roundTripTime, const HostAndPort& syncingTo) { ReplSetHeartbeatResponse hb; @@ -271,8 +262,7 @@ private: getTopoCoord().prepareHeartbeatRequestV1(now(), setName, member); now() += roundTripTime; - return getTopoCoord().processHeartbeatResponse( - now(), roundTripTime, member, hbResponse, lastOpCommittedSender); + return getTopoCoord().processHeartbeatResponse(now(), roundTripTime, member, hbResponse); } private: @@ -587,80 +577,6 @@ TEST_F(TopoCoordTest, NodeWontChooseSyncSourceFromOlderTerm) { ASSERT(getTopoCoord().getSyncSourceAddress().empty()); } -TEST_F(TopoCoordTest, NodeCanChooseSyncSourceWithSameLastAppliedAndHigherLastOpCommitted) { - updateConfig(BSON("_id" - << "rs0" - << "version" - << 1 - << "members" - << BSON_ARRAY(BSON("_id" << 1 << "host" - << "hself") - << BSON("_id" << 10 << "host" - << "h1"))), - 0); - - setSelfMemberState(MemberState::RS_SECONDARY); - OpTime lastApplied = OpTime(Timestamp(100, 3), 3); - OpTime ourLastOpCommitted = OpTime(Timestamp(100, 1), 3); - OpTime lastOpCommittedSyncSource = OpTime(Timestamp(100, 2), 3); - - heartbeatFromMember(HostAndPort("h1"), - "rs0", - MemberState::RS_SECONDARY, - lastApplied, - Milliseconds(100), - lastOpCommittedSyncSource); - - // Record 2nd round of pings to allow choosing a new sync source. - heartbeatFromMember(HostAndPort("h1"), - "rs0", - MemberState::RS_SECONDARY, - lastApplied, - Milliseconds(100), - lastOpCommittedSyncSource); - - ASSERT(getTopoCoord().advanceLastCommittedOpTime(ourLastOpCommitted)); - getTopoCoord().chooseNewSyncSource( - now()++, lastApplied, TopologyCoordinator::ChainingPreference::kUseConfiguration); - ASSERT_EQUALS(HostAndPort("h1"), getTopoCoord().getSyncSourceAddress()); -} - -TEST_F(TopoCoordTest, NodeCannotChooseSyncSourceWithSameLastAppliedAndSameLastOpCommitted) { - updateConfig(BSON("_id" - << "rs0" - << "version" - << 1 - << "members" - << BSON_ARRAY(BSON("_id" << 1 << "host" - << "hself") - << BSON("_id" << 10 << "host" - << "h1"))), - 0); - - setSelfMemberState(MemberState::RS_SECONDARY); - OpTime lastApplied = OpTime(Timestamp(100, 3), 3); - OpTime lastOpCommitted = OpTime(Timestamp(100, 1), 3); - - heartbeatFromMember(HostAndPort("h1"), - "rs0", - MemberState::RS_SECONDARY, - lastApplied, - Milliseconds(100), - lastOpCommitted); - - // Record 2nd round of pings to allow choosing a new sync source. - heartbeatFromMember(HostAndPort("h1"), - "rs0", - MemberState::RS_SECONDARY, - lastApplied, - Milliseconds(100), - lastOpCommitted); - - ASSERT(getTopoCoord().advanceLastCommittedOpTime(lastOpCommitted)); - getTopoCoord().chooseNewSyncSource( - now()++, lastApplied, TopologyCoordinator::ChainingPreference::kUseConfiguration); - ASSERT(getTopoCoord().getSyncSourceAddress().empty()); -} TEST_F(TopoCoordTest, ChooseOnlyPrimaryAsSyncSourceWhenChainingIsDisallowed) { updateConfig(BSON("_id" @@ -1700,7 +1616,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { HostAndPort member = HostAndPort("test0:1234"); getTopoCoord().prepareHeartbeatRequestV1(startupTime + Milliseconds(1), setName, member); getTopoCoord().processHeartbeatResponse( - startupTime + Milliseconds(2), Milliseconds(1), member, hbResponseGood, OpTime()); + startupTime + Milliseconds(2), Milliseconds(1), member, hbResponseGood); getTopoCoord().prepareHeartbeatRequestV1(startupTime + Milliseconds(3), setName, member); Date_t timeoutTime = startupTime + Milliseconds(3) + ReplSetConfig::kDefaultHeartbeatTimeoutPeriod; @@ -1709,12 +1625,12 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { StatusWith<ReplSetHeartbeatResponse>(Status(ErrorCodes::HostUnreachable, "")); getTopoCoord().processHeartbeatResponse( - timeoutTime, Milliseconds(5000), member, hbResponseDown, OpTime()); + timeoutTime, Milliseconds(5000), member, hbResponseDown); member = HostAndPort("test1:1234"); getTopoCoord().prepareHeartbeatRequestV1(startupTime + Milliseconds(2), setName, member); getTopoCoord().processHeartbeatResponse( - heartbeatTime, Milliseconds(4000), member, hbResponseGood, OpTime()); + heartbeatTime, Milliseconds(4000), member, hbResponseGood); makeSelfPrimary(electionTime); getTopoCoord().setMyLastAppliedOpTime(oplogProgress, startupTime, false); getTopoCoord().setMyLastDurableOpTime(oplogDurable, startupTime, false); @@ -1894,7 +1810,7 @@ TEST_F(TopoCoordTest, HeartbeatFrequencyShouldBeHalfElectionTimeoutWhenArbiter) std::pair<ReplSetHeartbeatArgsV1, Milliseconds> uppingRequest = getTopoCoord().prepareHeartbeatRequestV1(requestDate, "myset", target); auto action = getTopoCoord().processHeartbeatResponse( - requestDate, Milliseconds(0), target, makeStatusWith<ReplSetHeartbeatResponse>(), OpTime()); + requestDate, Milliseconds(0), target, makeStatusWith<ReplSetHeartbeatResponse>()); Date_t expected(now() + Milliseconds(2500)); ASSERT_EQUALS(expected, action.getNextHeartbeatStartDate()); } @@ -3554,31 +3470,6 @@ TEST_F(HeartbeatResponseTestV1, now())); ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( HostAndPort("host2"), makeReplSetMetadata(newerThanLastOpTimeApplied), boost::none, now())); - - // If we are as up-to-date as this sync source, but it has a higher lastOpCommitted, we will not - // change sync sources. - OpTime lastOpCommitted = OpTime(Timestamp(100, 0), 0); - OpTime newerLastOpCommitted = OpTime(Timestamp(200, 0), 0); - ASSERT(getTopoCoord().advanceLastCommittedOpTime(lastOpCommitted)); - nextAction = receiveUpHeartbeat(HostAndPort("host2"), - "rs0", - MemberState::RS_SECONDARY, - election, - lastOpTimeApplied, - HostAndPort(), - newerLastOpCommitted); - ASSERT_NO_ACTION(nextAction.getAction()); - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), - makeReplSetMetadata(), - makeOplogQueryMetadata(lastOpTimeApplied, -1, -1, newerLastOpCommitted), - now())); - - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), - makeReplSetMetadata(lastOpTimeApplied, -1, -1, newerLastOpCommitted), - boost::none, - now())); } TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhenFresherMemberIsDown) { @@ -3835,7 +3726,7 @@ TEST_F(HeartbeatResponseTestV1, ReconfigNodeRemovedBetweenHeartbeatRequestAndRep hb.setElectionTime(election.getTimestamp()); StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( - now()++, Milliseconds(0), HostAndPort("host3"), hbResponse, OpTime()); + now()++, Milliseconds(0), HostAndPort("host3"), hbResponse); // primary should not be set and we should perform NoAction in response ASSERT_EQUALS(-1, getCurrentPrimaryIndex()); @@ -3883,7 +3774,7 @@ TEST_F(HeartbeatResponseTestV1, ReconfigBetweenHeartbeatRequestAndRepsonse) { StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb); getTopoCoord().setMyLastAppliedOpTime(lastOpTimeApplied, Date_t(), false); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( - now()++, Milliseconds(0), HostAndPort("host3"), hbResponse, OpTime()); + now()++, Milliseconds(0), HostAndPort("host3"), hbResponse); // now primary should be host3, index 1, and we should perform NoAction in response ASSERT_EQUALS(1, getCurrentPrimaryIndex()); @@ -4059,15 +3950,13 @@ TEST_F(TopoCoordTest, FreshestNodeDoesCatchupTakeover) { getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000), Milliseconds(999), HostAndPort("host3:27017"), - StatusWith<ReplSetHeartbeatResponse>(hbResp), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(hbResp)); hbResp.setAppliedOpTime(behindOptime); hbResp.setState(MemberState::RS_PRIMARY); getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000), Milliseconds(999), HostAndPort("host2:27017"), - StatusWith<ReplSetHeartbeatResponse>(hbResp), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(hbResp)); getTopoCoord().updateTerm(1, Date_t()); ASSERT_OK(getTopoCoord().becomeCandidateIfElectable( @@ -4114,15 +4003,13 @@ TEST_F(TopoCoordTest, StaleNodeDoesntDoCatchupTakeover) { getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000), Milliseconds(999), HostAndPort("host3:27017"), - StatusWith<ReplSetHeartbeatResponse>(hbResp), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(hbResp)); hbResp.setAppliedOpTime(behindOptime); hbResp.setState(MemberState::RS_PRIMARY); getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000), Milliseconds(999), HostAndPort("host2:27017"), - StatusWith<ReplSetHeartbeatResponse>(hbResp), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(hbResp)); getTopoCoord().updateTerm(1, Date_t()); Status result = getTopoCoord().becomeCandidateIfElectable( @@ -4172,14 +4059,12 @@ TEST_F(TopoCoordTest, NodeDoesntDoCatchupTakeoverHeartbeatSaysPrimaryCaughtUp) { getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000), Milliseconds(999), HostAndPort("host3:27017"), - StatusWith<ReplSetHeartbeatResponse>(hbResp), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(hbResp)); hbResp.setState(MemberState::RS_PRIMARY); getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000), Milliseconds(999), HostAndPort("host2:27017"), - StatusWith<ReplSetHeartbeatResponse>(hbResp), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(hbResp)); getTopoCoord().updateTerm(1, Date_t()); Status result = getTopoCoord().becomeCandidateIfElectable( @@ -4232,15 +4117,13 @@ TEST_F(TopoCoordTest, NodeDoesntDoCatchupTakeoverIfTermNumbersSayPrimaryCaughtUp getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000), Milliseconds(999), HostAndPort("host3:27017"), - StatusWith<ReplSetHeartbeatResponse>(hbResp), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(hbResp)); hbResp.setAppliedOpTime(behindOptime); hbResp.setState(MemberState::RS_PRIMARY); getTopoCoord().processHeartbeatResponse(firstRequestDate + Milliseconds(1000), Milliseconds(999), HostAndPort("host2:27017"), - StatusWith<ReplSetHeartbeatResponse>(hbResp), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(hbResp)); getTopoCoord().updateTerm(1, Date_t()); Status result = getTopoCoord().becomeCandidateIfElectable( @@ -5805,8 +5688,7 @@ TEST_F(HeartbeatResponseTestV1, NodeDoesNotRetryHeartbeatIfTheFirstFailureTakesT // no retry allowed. Milliseconds(4990), // Spent 4.99 of the 5 seconds in the network. target, - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long")); ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::kFollower == getTopoCoord().getRole()); @@ -5906,12 +5788,8 @@ public: Date_t _upRequestDate = unittest::assertGet(dateFromISOString("2014-08-29T12:55Z")); std::pair<ReplSetHeartbeatArgsV1, Milliseconds> uppingRequest = getTopoCoord().prepareHeartbeatRequestV1(_upRequestDate, "rs0", _target); - HeartbeatResponseAction upAction = - getTopoCoord().processHeartbeatResponse(_upRequestDate, - Milliseconds(0), - _target, - makeStatusWith<ReplSetHeartbeatResponse>(), - OpTime()); + HeartbeatResponseAction upAction = getTopoCoord().processHeartbeatResponse( + _upRequestDate, Milliseconds(0), _target, makeStatusWith<ReplSetHeartbeatResponse>()); ASSERT_EQUALS(HeartbeatResponseAction::NoAction, upAction.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::kFollower == getTopoCoord().getRole()); @@ -5930,8 +5808,8 @@ public: _firstRequestDate + Seconds(4), // 4 seconds elapsed, retry allowed. Milliseconds(3990), // Spent 3.99 of the 4 seconds in the network. _target, - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"), - OpTime()); // We've never applied anything. + StatusWith<ReplSetHeartbeatResponse>( + ErrorCodes::ExceededTimeLimit, "Took too long")); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::kFollower == getTopoCoord().getRole()); @@ -5988,8 +5866,8 @@ TEST_F(HeartbeatResponseTestOneRetryV1, // no retry allowed. Milliseconds(1000), // Spent 1 of the 1.01 seconds in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, "Took too long"), - OpTime()); // We've never applied anything. + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::ExceededTimeLimit, + "Took too long")); // We've never applied anything. ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::kFollower == getTopoCoord().getRole()); @@ -6009,8 +5887,7 @@ public: // could retry. Milliseconds(400), // Spent 0.4 of the 0.5 seconds in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?")); ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::kFollower == getTopoCoord().getRole()); // Because the first retry failed without timing out, we expect to retry immediately. @@ -6057,8 +5934,7 @@ TEST_F(HeartbeatResponseTestTwoRetriesV1, NodeDoesNotRetryHeartbeatsAfterFailing // could still retry. Milliseconds(100), // Spent 0.1 of the 0.3 seconds in the network. target(), - StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?"), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(ErrorCodes::NodeNotFound, "Bad DNS?")); ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::kFollower == getTopoCoord().getRole()); // Because this is the second retry, rather than retry again, we expect to wait for a quarter @@ -6099,8 +5975,7 @@ TEST_F(HeartbeatResponseTestTwoRetriesV1, HeartbeatThreeNonconsecutiveFailures) getTopoCoord().processHeartbeatResponse(firstRequestDate() + Milliseconds(4500), Milliseconds(400), target(), - StatusWith<ReplSetHeartbeatResponse>(response), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(response)); ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::kFollower == getTopoCoord().getRole()); @@ -6117,8 +5992,7 @@ TEST_F(HeartbeatResponseTestTwoRetriesV1, HeartbeatThreeNonconsecutiveFailures) firstRequestDate() + Milliseconds(7100), Milliseconds(400), target(), - StatusWith<ReplSetHeartbeatResponse>(Status{ErrorCodes::HostUnreachable, ""}), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(Status{ErrorCodes::HostUnreachable, ""})); ASSERT_EQUALS(HeartbeatResponseAction::NoAction, action.getAction()); ASSERT_TRUE(TopologyCoordinator::Role::kFollower == getTopoCoord().getRole()); @@ -6174,8 +6048,7 @@ TEST_F(HeartbeatResponseHighVerbosityTestV1, UpdateHeartbeatDataOldConfig) { now()++, // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. HostAndPort("host2"), - StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse)); stopCapturingLogMessages(); ASSERT_NO_ACTION(action.getAction()); ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down")); @@ -6224,8 +6097,7 @@ TEST_F(HeartbeatResponseHighVerbosityTestV1, UpdateHeartbeatDataSameConfig) { now()++, // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. HostAndPort("host2"), - StatusWith<ReplSetHeartbeatResponse>(sameConfigResponse), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(sameConfigResponse)); stopCapturingLogMessages(); ASSERT_NO_ACTION(action.getAction()); ASSERT_EQUALS(1, @@ -6253,8 +6125,7 @@ TEST_F(HeartbeatResponseHighVerbosityTestV1, now()++, // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. HostAndPort("host5"), - StatusWith<ReplSetHeartbeatResponse>(memberMissingResponse), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(memberMissingResponse)); stopCapturingLogMessages(); ASSERT_NO_ACTION(action.getAction()); ASSERT_EQUALS(1, countLogLinesContaining("Could not find host5:27017 in current config")); @@ -6280,8 +6151,7 @@ TEST_F(HeartbeatResponseHighVerbosityTestV1, now()++, // Time is left. Milliseconds(400), // Spent 0.4 of the 0.5 second in the network. HostAndPort("host2"), - StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse), - OpTime()); + StatusWith<ReplSetHeartbeatResponse>(believesWeAreDownResponse)); stopCapturingLogMessages(); ASSERT_NO_ACTION(action.getAction()); ASSERT_EQUALS(1, countLogLinesContaining("host2:27017 thinks that we are down")); diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js index 5848b2f288e..3df3c69cc0e 100644 --- a/src/mongo/shell/replsettest.js +++ b/src/mongo/shell/replsettest.js @@ -1241,10 +1241,17 @@ var ReplSetTest = function(opts) { let master = rst.getPrimary(); let id = tojson(rst.nodeList()); - // All nodes must be in primary/secondary state prior to this point. Perform a majority - // write to ensure there is a committed operation on the set. The commit point will - // propagate to all members and trigger a stable checkpoint on all persisted storage engines - // nodes. + // Algorithm precondition: All nodes must be in primary/secondary state. + // + // 1) Perform a majority write. This will guarantee the primary updates its commit point + // to the value of this write. + // + // 2) Perform a second write. This will guarantee that all nodes will update their commit + // point to a time that is >= the previous write. That will trigger a stable checkpoint + // on all nodes. + // TODO(SERVER-33248): Remove this block. We should not need to prod the replica set to + // advance the commit point if the commit point being lagged is sufficient to choose a + // sync source. function advanceCommitPoint(master) { // Shadow 'db' so that we can call 'advanceCommitPoint' directly on the primary node. let db = master.getDB('admin'); @@ -1254,10 +1261,6 @@ var ReplSetTest = function(opts) { "data": {"awaitLastStableCheckpointTimestamp": 1}, "writeConcern": {"w": "majority", "wtimeout": ReplSetTest.kDefaultTimeoutMS} })); - - // TODO(SERVER-36758): Remove the second write. We should not need to prod the - // replica set to advance the commit point once all nodes are running a version with - // SERVER-33248. This can be removed once the last stable version includes the fix. assert.commandWorked(db.adminCommand( {"appendOplogNote": 1, "data": {"awaitLastStableCheckpointTimestamp": 2}})); }; |