summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorA. Jesse Jiryu Davis <jesse@mongodb.com>2020-03-27 09:39:27 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-03-30 00:00:04 +0000
commitd8acf3c44e41fd4423caf8a337264a6dac86b8c5 (patch)
tree5f77ae3828dd70b34a00f235164ac7339f1697f1
parent97db26582d50d4028a6109144546d036d2885a7e (diff)
downloadmongo-d8acf3c44e41fd4423caf8a337264a6dac86b8c5.tar.gz
SERVER-47097 Use ReplSetMetadata.isPrimary to choose sync source
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp16
-rw-r--r--src/mongo/db/repl/topology_coordinator_v1_test.cpp121
-rw-r--r--src/mongo/rpc/metadata/repl_set_metadata.h5
3 files changed, 118 insertions, 24 deletions
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index 3116173a018..88be564cd2a 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -2946,20 +2946,24 @@ bool TopologyCoordinator::shouldChangeSyncSource(const HostAndPort& currentSourc
fassert(4612000, !currentSourceOpTime.isNull());
int syncSourceIndex = oqMetadata.getSyncSourceIndex();
- int primaryIndex = oqMetadata.getPrimaryIndex();
+ // A 4.2 sync source's primaryIndex is unreliable, because we don't know what config version the
+ // index is valid for. Prefer the new 4.4 field isPrimary.
+ // TODO(SERVER-47125): Require isPrimary and stop using primaryIndex.
+ bool sourceIsPrimary =
+ replMetadata.getIsPrimary().value_or(oqMetadata.getPrimaryIndex() == currentSourceIndex);
// Change sync source if they are not ahead of us, and don't have a sync source,
// unless they are primary.
const OpTime myLastOpTime = getMyLastAppliedOpTime();
- if (syncSourceIndex == -1 && currentSourceOpTime <= myLastOpTime &&
- primaryIndex != currentSourceIndex) {
+ if (syncSourceIndex == -1 && currentSourceOpTime <= myLastOpTime && !sourceIsPrimary) {
logv2::DynamicAttributes attrs;
attrs.add("syncSource", currentSource);
attrs.add("lastFetchedOpTime", myLastOpTime);
attrs.add("syncSourceLatestOplogOpTime", currentSourceOpTime);
-
- if (primaryIndex >= 0) {
- attrs.add("primary", _rsConfig.getMemberAt(primaryIndex).getHostAndPort());
+ if (replMetadata.getIsPrimary().is_initialized()) {
+ attrs.add("isPrimary", replMetadata.getIsPrimary().value());
+ } else {
+ attrs.add("isPrimary", "unset");
}
LOGV2(21832,
diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
index 01d658c6cb8..bffb58ece9a 100644
--- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
@@ -4071,20 +4071,21 @@ TEST_F(HeartbeatResponseTestV1,
HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied);
ASSERT_NO_ACTION(nextAction.getAction());
// Show we like host2 while it is primary.
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"),
- makeReplSetMetadata(),
- makeOplogQueryMetadata(lastOpTimeApplied, 1),
- now()));
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(
+ HostAndPort("host2"),
+ makeReplSetMetadata(OpTime() /* visibleOpTime */, 1 /* primaryIndex */),
+ makeOplogQueryMetadata(lastOpTimeApplied, 1 /* primaryIndex */),
+ now()));
// Show that we also like host2 while it has a sync source.
nextAction = receiveUpHeartbeat(
HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, election, lastOpTimeApplied);
ASSERT_NO_ACTION(nextAction.getAction());
- ASSERT_FALSE(
- getTopoCoord().shouldChangeSyncSource(HostAndPort("host2"),
- makeReplSetMetadata(),
- makeOplogQueryMetadata(lastOpTimeApplied, 2, 2),
- now()));
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(
+ HostAndPort("host2"),
+ makeReplSetMetadata(OpTime() /* visibleOpTime */, 2 /* primaryIndex */),
+ makeOplogQueryMetadata(lastOpTimeApplied, 2, 2),
+ now()));
// Show that we do not like it when it is not PRIMARY and lacks a sync source and lacks progress
// beyond our own.
@@ -4096,14 +4097,6 @@ TEST_F(HeartbeatResponseTestV1,
makeOplogQueryMetadata(lastOpTimeApplied),
now()));
- // Sometimes the heartbeat is stale and the metadata says it's the primary. Trust the metadata.
- ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(
- HostAndPort("host2"),
- makeReplSetMetadata(),
- makeOplogQueryMetadata(
- lastOpTimeApplied, 1 /* host2 is primary */, -1 /* no sync source */),
- now()));
-
// But if it is secondary and has some progress beyond our own, we still like it.
OpTime newerThanLastOpTimeApplied = OpTime(Timestamp(500, 0), 0);
nextAction = receiveUpHeartbeat(HostAndPort("host2"),
@@ -4286,6 +4279,100 @@ TEST_F(HeartbeatResponseTestV1, ShouldChangeSyncSourceWhenFresherMemberExists) {
ASSERT_EQUALS(1, countLogLinesContaining("Choosing new sync source"));
}
+TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceFromStalePrimary) {
+ // In this test, the TopologyCoordinator should still sync to the primary, "host2", although
+ // "host3" is fresher.
+ // TODO(SERVER-47125): merge this test with ShouldNotChangeSyncSourceFromStale46Primary.
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(4, 0), 0);
+ OpTime fresherLastOpTimeApplied = OpTime(Timestamp(5, 0), 0);
+
+ topoCoordSetMyLastAppliedOpTime(lastOpTimeApplied, Date_t(), false);
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(
+ HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(
+ HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // set up complete, time for actual check
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(
+ HostAndPort("host2"),
+ makeReplSetMetadata(OpTime() /* visibleOpTime */, 1 /* primaryIndex */),
+ makeOplogQueryMetadata(lastOpTimeApplied, 1 /* primaryIndex */),
+ now()));
+}
+
+TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceFromStale42Primary) {
+ // In this test, the TopologyCoordinator should still sync to the primary, "host2", although
+ // "host3" is fresher. Simulate a 4.2 primary which sends primaryIndex but not isPrimary with
+ // ReplSetMetadata.
+ // TODO(SERVER-47125): remove this test post-4.4.
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(4, 0), 0);
+ OpTime fresherLastOpTimeApplied = OpTime(Timestamp(5, 0), 0);
+
+ topoCoordSetMyLastAppliedOpTime(lastOpTimeApplied, Date_t(), false);
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(
+ HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(
+ HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // set up complete, time for actual check
+ ReplSetMetadata replMetadata42(getTopoCoord().getTerm(),
+ OpTimeAndWallTime(),
+ OpTime(),
+ getCurrentConfig().getConfigVersion(),
+ OID(),
+ 1 /* primaryIndex */,
+ -1 /* syncSourceIndex */,
+ boost::none /* isPrimary */);
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(
+ HostAndPort("host2"),
+ replMetadata42,
+ makeOplogQueryMetadata(lastOpTimeApplied, 1 /* primaryIndex */),
+ now()));
+}
+
+TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceFromStale46Primary) {
+ // In this test, the TopologyCoordinator should still sync to the primary, "host2", although
+ // "host3" is fresher. Simulate a 4.6 primary which sends isPrimary but not the old
+ // primaryIndex field with ReplSetMetadata.
+ OpTime election = OpTime();
+ OpTime lastOpTimeApplied = OpTime(Timestamp(4, 0), 0);
+ OpTime fresherLastOpTimeApplied = OpTime(Timestamp(5, 0), 0);
+
+ topoCoordSetMyLastAppliedOpTime(lastOpTimeApplied, Date_t(), false);
+ HeartbeatResponseAction nextAction = receiveUpHeartbeat(
+ HostAndPort("host2"), "rs0", MemberState::RS_PRIMARY, election, lastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ nextAction = receiveUpHeartbeat(
+ HostAndPort("host3"), "rs0", MemberState::RS_SECONDARY, election, fresherLastOpTimeApplied);
+ ASSERT_NO_ACTION(nextAction.getAction());
+
+ // If a 4.6 primary's ReplSetMetadata object, which lacks primaryIndex, is passed to
+ // ReplSetMetadata::readFromMetadata, it will produce a ReplSetMetadata object with primaryIndex
+ // -1 (the default), and primaryIndex true. Test this future scenario.
+ ReplSetMetadata replMetadata42(getTopoCoord().getTerm(),
+ OpTimeAndWallTime(),
+ OpTime(),
+ getCurrentConfig().getConfigVersion(),
+ OID(),
+ -1 /* primaryIndex */,
+ -1 /* syncSourceIndex */,
+ true /* isPrimary */);
+ ASSERT_FALSE(getTopoCoord().shouldChangeSyncSource(
+ HostAndPort("host2"),
+ replMetadata42,
+ makeOplogQueryMetadata(lastOpTimeApplied, 1 /* primaryIndex */),
+ now()));
+}
+
TEST_F(HeartbeatResponseTestV1, ShouldNotChangeSyncSourceWhenMemberHasYetToHeartbeatUs) {
// In this test, the TopologyCoordinator should not tell us to change sync sources away from
// "host2" since we do not use the member's heartbeatdata in pv1.
diff --git a/src/mongo/rpc/metadata/repl_set_metadata.h b/src/mongo/rpc/metadata/repl_set_metadata.h
index df2e2ae96b2..02dc867701e 100644
--- a/src/mongo/rpc/metadata/repl_set_metadata.h
+++ b/src/mongo/rpc/metadata/repl_set_metadata.h
@@ -50,6 +50,7 @@ public:
* Default primary index. Also used to indicate in metadata that there is no
* primary.
*/
+ // TODO(SERVER-47125): remove post-4.4.
static const int kNoPrimary = -1;
ReplSetMetadata() = default;
@@ -79,7 +80,7 @@ public:
Status writeToMetadata(BSONObjBuilder* builder) const;
/**
- * Returns the OpTime of the most recent operation with which the client intereacted.
+ * Returns the OpTime of the most recent operation with which the client interacted.
*/
repl::OpTime getLastOpVisible() const {
return _lastOpVisible;
@@ -113,6 +114,7 @@ public:
return _replicaSetId;
}
+ // TODO(SERVER-47125): remove post-4.4.
/**
* Returns the index of the current primary from the perspective of the sender.
* Returns kNoPrimary if there is no primary.
@@ -156,6 +158,7 @@ private:
long long _currentTerm = -1;
long long _configVersion = -1;
OID _replicaSetId;
+ // TODO(SERVER-47125): remove this member variable post-4.4.
int _currentPrimaryIndex = kNoPrimary;
int _currentSyncSourceIndex = -1;
boost::optional<bool> _isPrimary;