diff options
author | Lingzhi Deng <lingzhi.deng@mongodb.com> | 2019-10-17 00:10:12 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2019-10-17 00:10:12 +0000 |
commit | b37eb88c8ae801751711dd54f2506d3561989db7 (patch) | |
tree | 9a8e7d66ea6650a97113bc541347b7857a23762f /src/mongo | |
parent | 1c616ff20fa768296ef11156bd32c47e9ae7ec34 (diff) | |
download | mongo-b37eb88c8ae801751711dd54f2506d3561989db7.tar.gz |
SERVER-43949: Check for term equality when comparing OpTime for a writeConcern
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.cpp | 32 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.h | 6 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_v1_test.cpp | 1 |
3 files changed, 34 insertions, 5 deletions
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp index 1de492a3eef..41593036ad2 100644 --- a/src/mongo/db/repl/topology_coordinator.cpp +++ b/src/mongo/db/repl/topology_coordinator.cpp @@ -830,6 +830,12 @@ bool TopologyCoordinator::haveNumNodesReachedOpTime(const OpTime& targetOpTime, return false; } + // Invariants that we only wait for an OpTime in the term that this node is currently writing + // to. In other words, we do not support waiting for an OpTime written by a previous primary + // because comparing members' lastApplied/lastDurable alone is not sufficient to tell if the + // OpTime has been replicated. + invariant(targetOpTime.getTerm() == getMyLastAppliedOpTime().getTerm()); + for (auto&& memberData : _memberData) { const auto isArbiter = _rsConfig.getMemberAt(memberData.getConfigIndex()).isArbiter(); @@ -841,7 +847,14 @@ bool TopologyCoordinator::haveNumNodesReachedOpTime(const OpTime& targetOpTime, const OpTime& memberOpTime = durablyWritten ? memberData.getLastDurableOpTime() : memberData.getLastAppliedOpTime(); - if (memberOpTime >= targetOpTime) { + // In addition to checking if a member has a greater/equal timestamp field we also need to + // make sure that the memberOpTime is in the same term as the OpTime we wait for. If a + // member's OpTime has a higher term, it indicates that this node will be stepping down. And + // thus we do not know if the target OpTime in our previous term has been replicated to the + // member because the memberOpTime in a higher term could correspond to an operation in a + // divergent branch of history regardless of its timestamp. + if (memberOpTime.getTerm() == targetOpTime.getTerm() && + memberOpTime.getTimestamp() >= targetOpTime.getTimestamp()) { --numNodes; } @@ -856,10 +869,25 @@ bool TopologyCoordinator::haveTaggedNodesReachedOpTime(const OpTime& opTime, const ReplSetTagPattern& tagPattern, bool durablyWritten) { ReplSetTagMatch matcher(tagPattern); + + // Invariants that we only wait for an OpTime in the term that this node is currently writing + // to. In other words, we do not support waiting for an OpTime written by a previous primary + // because comparing members' lastApplied/lastDurable alone is not sufficient to tell if the + // OpTime has been replicated. + invariant(opTime.getTerm() == getMyLastAppliedOpTime().getTerm()); + for (auto&& memberData : _memberData) { const OpTime& memberOpTime = durablyWritten ? memberData.getLastDurableOpTime() : memberData.getLastAppliedOpTime(); - if (memberOpTime >= opTime) { + + // In addition to checking if a member has a greater/equal timestamp field we also need to + // make sure that the memberOpTime is in the same term as the OpTime we wait for. If a + // member's OpTime has a higher term, it indicates that this node will be stepping down. And + // thus we do not know if the target OpTime in our previous term has been replicated to the + // member because the memberOpTime in a higher term could correspond to an operation in a + // divergent branch of history regardless of its timestamp. + if (memberOpTime.getTerm() == opTime.getTerm() && + memberOpTime.getTimestamp() >= opTime.getTimestamp()) { // This node has reached the desired optime, now we need to check if it is a part // of the tagPattern. int memberIndex = memberData.getConfigIndex(); diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h index d1e8a8eb2b9..ec30a024d4b 100644 --- a/src/mongo/db/repl/topology_coordinator.h +++ b/src/mongo/db/repl/topology_coordinator.h @@ -407,14 +407,14 @@ public: const StatusWith<ReplSetHeartbeatResponse>& hbResponse); /** - * Returns whether or not at least 'numNodes' have reached the given opTime. + * Returns whether or not at least 'numNodes' have reached the given opTime with the same term. * "durablyWritten" indicates whether the operation has to be durably applied. */ bool haveNumNodesReachedOpTime(const OpTime& opTime, int numNodes, bool durablyWritten); /** - * Returns whether or not at least one node matching the tagPattern has reached - * the given opTime. + * Returns whether or not at least one node matching the tagPattern has reached the given opTime + * with the same term. * "durablyWritten" indicates whether the operation has to be durably applied. */ bool haveTaggedNodesReachedOpTime(const OpTime& opTime, diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp index 3a272d4d67a..208ce19ef11 100644 --- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp @@ -114,6 +114,7 @@ protected: getTopoCoord().changeMemberState_forTest(MemberState::RS_PRIMARY, electionTimestamp); getTopoCoord().setCurrentPrimary_forTest(_selfIndex, electionTimestamp); OpTime dummyOpTime(Timestamp(1, 1), getTopoCoord().getTerm()); + setMyOpTime(dummyOpTime); ASSERT_OK(getTopoCoord().completeTransitionToPrimary(dummyOpTime)); } |