diff options
author | A. Jesse Jiryu Davis <jesse@mongodb.com> | 2020-03-27 09:39:27 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-03-30 00:00:04 +0000 |
commit | d8acf3c44e41fd4423caf8a337264a6dac86b8c5 (patch) | |
tree | 5f77ae3828dd70b34a00f235164ac7339f1697f1 /src | |
parent | 97db26582d50d4028a6109144546d036d2885a7e (diff) | |
download | mongo-d8acf3c44e41fd4423caf8a337264a6dac86b8c5.tar.gz |
SERVER-47097 Use ReplSetMetadata.isPrimary to choose sync source
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_v1_test.cpp | 121 | ||||
-rw-r--r-- | src/mongo/rpc/metadata/repl_set_metadata.h | 5 |
3 files changed, 118 insertions, 24 deletions
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp index 3116173a018..88be564cd2a 100644 --- a/src/mongo/db/repl/topology_coordinator.cpp +++ b/src/mongo/db/repl/topology_coordinator.cpp @@ -2946,20 +2946,24 @@ bool TopologyCoordinator::shouldChangeSyncSource(const HostAndPort& currentSourc fassert(4612000, !currentSourceOpTime.isNull()); int syncSourceIndex = oqMetadata.getSyncSourceIndex(); - int primaryIndex = oqMetadata.getPrimaryIndex(); + // A 4.2 sync source's primaryIndex is unreliable, because we don't know what config version the + // index is valid for. Prefer the new 4.4 field isPrimary. + // TODO(SERVER-47125): Require isPrimary and stop using primaryIndex. + bool sourceIsPrimary = + replMetadata.getIsPrimary().value_or(oqMetadata.getPrimaryIndex() == currentSourceIndex); // Change sync source if they are not ahead of us, and don't have a sync source, // unless they are primary. const OpTime myLastOpTime = getMyLastAppliedOpTime(); - if (syncSourceIndex == -1 && currentSourceOpTime <= myLastOpTime && - primaryIndex != currentSourceIndex) { + if (syncSourceIndex == -1 && currentSourceOpTime <= myLastOpTime && !sourceIsPrimary) { logv2::DynamicAttributes attrs; attrs.add("syncSource", currentSource); attrs.add("lastFetchedOpTime", myLastOpTime); attrs.add("syncSourceLatestOplogOpTime", currentSourceOpTime); - - if (primaryIndex >= 0) { - attrs.add("primary", _rsConfig.getMemberAt(primaryIndex).getHostAndPort()); + if (replMetadata.getIsPrimary().is_initialized()) { + attrs.add("isPrimary", replMetadata.getIsPrimary().value()); + } else { + attrs.add("isPrimary", "unset"); } LOGV2(21832, diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp index 01d658c6cb8..bffb58ece9a 100644 --- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp @@ -4071,20 +4071,21 @@ TEST_F(HeartbeatResponseTestV1, HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); // Show we like host2 while it is primary. - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), - makeReplSetMetadata(), - makeOplogQueryMetadata(lastOpTimeApplied, 1), - now())); + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( + HostAndPort("host2"), + makeReplSetMetadata(OpTime() /* visibleOpTime */, 1 /* primaryIndex */), + makeOplogQueryMetadata(lastOpTimeApplied, 1 /* primaryIndex */), + now())); // Show that we also like host2 while it has a sync source. nextAction = receiveUpHeartbeat( HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied); ASSERT_NO_ACTION(nextAction.getAction()); - ASSERT_FALSE( - getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"), - makeReplSetMetadata(), - makeOplogQueryMetadata(lastOpTimeApplied, 2, 2), - now())); + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( + HostAndPort("host2"), + makeReplSetMetadata(OpTime() /* visibleOpTime */, 2 /* primaryIndex */), + makeOplogQueryMetadata(lastOpTimeApplied, 2, 2), + now())); // Show that we do not like it when it is not PRIMARY and lacks a sync source and lacks progress // beyond our own. @@ -4096,14 +4097,6 @@ TEST_F(HeartbeatResponseTestV1, makeOplogQueryMetadata(lastOpTimeApplied), now())); - // Sometimes the heartbeat is stale and the metadata says it's the primary. Trust the metadata. - ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( - HostAndPort("host2"), - makeReplSetMetadata(), - makeOplogQueryMetadata( - lastOpTimeApplied, 1 /* host2 is primary */, -1 /* no sync source */), - now())); - // But if it is secondary and has some progress beyond our own, we still like it. OpTime newerThanLastOpTimeApplied = OpTime(Timestamp(500, 0), 0); nextAction = receiveUpHeartbeat(HostAndPort("host2"), @@ -4286,6 +4279,100 @@ TEST_F(HeartbeatResponseTestV1, ShouldChangeSyncSourceWhenFresherMemberExists) { ASSERT_EQUALS(1, countLogLinesContaining("Choosing new sync source")); } +TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceFromStalePrimary) { + // In this test, the TopologyCoordinator should still sync to the primary, "host2", although + // "host3" is fresher. + // TODO(SERVER-47125): merge this test with ShouldNotChangeSyncSourceFromStale46Primary. + OpTime election = OpTime(); + OpTime lastOpTimeApplied = OpTime(Timestamp(4, 0), 0); + OpTime fresherLastOpTimeApplied = OpTime(Timestamp(5, 0), 0); + + topoCoordSetMyLastAppliedOpTime(lastOpTimeApplied, Date_t(), false); + HeartbeatResponseAction nextAction = receiveUpHeartbeat( + HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + ASSERT_NO_ACTION(nextAction.getAction()); + + nextAction = receiveUpHeartbeat( + HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + ASSERT_NO_ACTION(nextAction.getAction()); + + // set up complete, time for actual check + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( + HostAndPort("host2"), + makeReplSetMetadata(OpTime() /* visibleOpTime */, 1 /* primaryIndex */), + makeOplogQueryMetadata(lastOpTimeApplied, 1 /* primaryIndex */), + now())); +} + +TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceFromStale42Primary) { + // In this test, the TopologyCoordinator should still sync to the primary, "host2", although + // "host3" is fresher. Simulate a 4.2 primary which sends primaryIndex but not isPrimary with + // ReplSetMetadata. + // TODO(SERVER-47125): remove this test post-4.4. + OpTime election = OpTime(); + OpTime lastOpTimeApplied = OpTime(Timestamp(4, 0), 0); + OpTime fresherLastOpTimeApplied = OpTime(Timestamp(5, 0), 0); + + topoCoordSetMyLastAppliedOpTime(lastOpTimeApplied, Date_t(), false); + HeartbeatResponseAction nextAction = receiveUpHeartbeat( + HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + ASSERT_NO_ACTION(nextAction.getAction()); + + nextAction = receiveUpHeartbeat( + HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + ASSERT_NO_ACTION(nextAction.getAction()); + + // set up complete, time for actual check + ReplSetMetadata replMetadata42(getTopoCoord().getTerm(), + OpTimeAndWallTime(), + OpTime(), + getCurrentConfig().getConfigVersion(), + OID(), + 1 /* primaryIndex */, + -1 /* syncSourceIndex */, + boost::none /* isPrimary */); + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( + HostAndPort("host2"), + replMetadata42, + makeOplogQueryMetadata(lastOpTimeApplied, 1 /* primaryIndex */), + now())); +} + +TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceFromStale46Primary) { + // In this test, the TopologyCoordinator should still sync to the primary, "host2", although + // "host3" is fresher. Simulate a 4.6 primary which sends isPrimary but not the old + // primaryIndex field with ReplSetMetadata. + OpTime election = OpTime(); + OpTime lastOpTimeApplied = OpTime(Timestamp(4, 0), 0); + OpTime fresherLastOpTimeApplied = OpTime(Timestamp(5, 0), 0); + + topoCoordSetMyLastAppliedOpTime(lastOpTimeApplied, Date_t(), false); + HeartbeatResponseAction nextAction = receiveUpHeartbeat( + HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied); + ASSERT_NO_ACTION(nextAction.getAction()); + + nextAction = receiveUpHeartbeat( + HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied); + ASSERT_NO_ACTION(nextAction.getAction()); + + // If a 4.6 primary's ReplSetMetadata object, which lacks primaryIndex, is passed to + // ReplSetMetadata::readFromMetadata, it will produce a ReplSetMetadata object with primaryIndex + // -1 (the default), and primaryIndex true. Test this future scenario. + ReplSetMetadata replMetadata42(getTopoCoord().getTerm(), + OpTimeAndWallTime(), + OpTime(), + getCurrentConfig().getConfigVersion(), + OID(), + -1 /* primaryIndex */, + -1 /* syncSourceIndex */, + true /* isPrimary */); + ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource( + HostAndPort("host2"), + replMetadata42, + makeOplogQueryMetadata(lastOpTimeApplied, 1 /* primaryIndex */), + now())); +} + TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhenMemberHasYetToHeartbeatUs) { // In this test, the TopologyCoordinator should not tell us to change sync sources away from // "host2" since we do not use the member's heartbeatdata in pv1. diff --git a/src/mongo/rpc/metadata/repl_set_metadata.h b/src/mongo/rpc/metadata/repl_set_metadata.h index df2e2ae96b2..02dc867701e 100644 --- a/src/mongo/rpc/metadata/repl_set_metadata.h +++ b/src/mongo/rpc/metadata/repl_set_metadata.h @@ -50,6 +50,7 @@ public: * Default primary index. Also used to indicate in metadata that there is no * primary. */ + // TODO(SERVER-47125): remove post-4.4. static const int kNoPrimary = -1; ReplSetMetadata() = default; @@ -79,7 +80,7 @@ public: Status writeToMetadata(BSONObjBuilder* builder) const; /** - * Returns the OpTime of the most recent operation with which the client intereacted. + * Returns the OpTime of the most recent operation with which the client interacted. */ repl::OpTime getLastOpVisible() const { return _lastOpVisible; @@ -113,6 +114,7 @@ public: return _replicaSetId; } + // TODO(SERVER-47125): remove post-4.4. /** * Returns the index of the current primary from the perspective of the sender. * Returns kNoPrimary if there is no primary. @@ -156,6 +158,7 @@ private: long long _currentTerm = -1; long long _configVersion = -1; OID _replicaSetId; + // TODO(SERVER-47125): remove this member variable post-4.4. int _currentPrimaryIndex = kNoPrimary; int _currentSyncSourceIndex = -1; boost::optional<bool> _isPrimary; |