summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSuganthi Mani <suganthi.mani@mongodb.com>2018-11-19 11:08:52 -0500
committerSuganthi Mani <suganthi.mani@mongodb.com>2019-01-23 11:08:09 -0500
commit1ce959ee43baeaa6679d8b50c2e80d4650e94e3a (patch)
treed53a630562055a5a865897edd81c535bf7f77d18 /src
parent1a24b7d77d1271de19b61705b3890953d485978e (diff)
downloadmongo-1ce959ee43baeaa6679d8b50c2e80d4650e94e3a.tar.gz
SERVER-35608 Added an invariant to make sure that optime with lower and higher term than the current lastAppliedOpTime will have lower and higher timestamp respectively. And, provided both the optime and the current lastAppliedOpTime terms are in pv1.
(cherry picked from commit b91aaa5bbc54a176cc61e5051cb6be857747b068)
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp36
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp126
2 files changed, 145 insertions, 17 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index e3a351cfecc..c2b41c82ad3 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1079,14 +1079,26 @@ void ReplicationCoordinatorImpl::setMyHeartbeatMessage(const std::string& msg) {
void ReplicationCoordinatorImpl::setMyLastAppliedOpTimeForward(const OpTime& opTime,
DataConsistency consistency) {
stdx::unique_lock<stdx::mutex> lock(_mutex);
- if (opTime > _getMyLastAppliedOpTime_inlock()) {
+ auto myLastAppliedOpTime = _getMyLastAppliedOpTime_inlock();
+ if (opTime > myLastAppliedOpTime) {
_setMyLastAppliedOpTime_inlock(opTime, false, consistency);
_reportUpstream_inlock(std::move(lock));
- } else if (consistency == DataConsistency::Consistent && _canAcceptNonLocalWrites &&
- _rsConfig.getWriteMajority() == 1) {
- // Single vote primaries may have a lagged stable timestamp due to paring back the stable
- // timestamp to the all committed timestamp.
- _setStableTimestampForStorage_inlock();
+ } else {
+ if (opTime != myLastAppliedOpTime) {
+ // In pv1, oplog entries are ordered by non-decreasing term and strictly increasing
+ // timestamp. So, in pv1, its not possible for us to get opTime with lower term and
+ // timestamp higher than or equal to our current lastAppliedOptime.
+ invariant(opTime.getTerm() == OpTime::kUninitializedTerm ||
+ myLastAppliedOpTime.getTerm() == OpTime::kUninitializedTerm ||
+ opTime.getTimestamp() < myLastAppliedOpTime.getTimestamp());
+ }
+
+ if (consistency == DataConsistency::Consistent && _canAcceptNonLocalWrites &&
+ _rsConfig.getWriteMajority() == 1) {
+ // Single vote primaries may have a lagged stable timestamp due to paring back the
+ // stable timestamp to the all committed timestamp.
+ _setStableTimestampForStorage_inlock();
+ }
}
}
@@ -1146,7 +1158,17 @@ void ReplicationCoordinatorImpl::_setMyLastAppliedOpTime_inlock(const OpTime& op
bool isRollbackAllowed,
DataConsistency consistency) {
auto* myMemberData = _topCoord->getMyMemberData();
- invariant(isRollbackAllowed || opTime >= myMemberData->getLastAppliedOpTime());
+ auto myLastAppliedOpTime = myMemberData->getLastAppliedOpTime();
+
+ if (!(isRollbackAllowed || opTime == myLastAppliedOpTime)) {
+ invariant(opTime > myLastAppliedOpTime);
+ // In pv1, oplog entries are ordered by non-decreasing term and strictly increasing
+ // timestamp. So, in pv1, its not possible for us to get opTime with higher term and
+ // timestamp lesser than or equal to our current lastAppliedOptime.
+ invariant(opTime.getTerm() == OpTime::kUninitializedTerm ||
+ myLastAppliedOpTime.getTerm() == OpTime::kUninitializedTerm ||
+ opTime.getTimestamp() > myLastAppliedOpTime.getTimestamp());
+ }
myMemberData->setLastAppliedOpTime(opTime, _replExecutor->now());
// If we are using applied times to calculate the commit level, update it now.
if (!_rsConfig.getWriteConcernMajorityShouldJournal()) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index 44b59d183eb..5cb06cb11d8 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -68,6 +68,7 @@
#include "mongo/stdx/future.h"
#include "mongo/stdx/thread.h"
#include "mongo/unittest/barrier.h"
+#include "mongo/unittest/death_test.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/log.h"
@@ -942,8 +943,8 @@ TEST_F(
getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 0));
simulateSuccessfulV1Election();
- OpTime time1(Timestamp(100, 1), 1);
- OpTime time2(Timestamp(100, 2), 1);
+ OpTime time1(Timestamp(100, 2), 1);
+ OpTime time2(Timestamp(100, 3), 1);
// Set up valid write concerns for the rest of the test
WriteConcernOptions majorityWriteConcern;
@@ -4217,7 +4218,7 @@ TEST_F(StableOpTimeTest, OpTimeCandidatesAreNotAddedWhenStateIsNotConsistent) {
initReplSetMode();
auto repl = getReplCoord();
- long long term = 0;
+ long long term = getTopoCoord().getTerm();
OpTime consistentOpTime = OpTime({1, 1}, term);
OpTime inconsistentOpTime = OpTime({1, 2}, term);
@@ -4513,8 +4514,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedDeferredGreaterOpTime) {
auto opCtx = makeOperationContext();
runSingleNodeElection(opCtx.get());
- getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(0, 0), 1));
- getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(0, 0), 1));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 1));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 1));
OpTime committedOpTime(Timestamp(200, 1), 1);
auto pseudoLogOp = stdx::async(stdx::launch::async, [this, &committedOpTime]() {
// Not guaranteed to be scheduled after waitUntil blocks...
@@ -4540,8 +4541,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedDeferredEqualOpTime) {
HostAndPort("node1", 12345));
auto opCtx = makeOperationContext();
runSingleNodeElection(opCtx.get());
- getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(0, 0), 1));
- getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(0, 0), 1));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 1));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 1));
OpTime opTimeToWait(Timestamp(100, 1), 1);
@@ -5399,9 +5400,10 @@ TEST_F(ReplCoordTest,
HostAndPort("node1", 12345));
- OpTime time1(Timestamp(100, 1), 1);
- OpTime time2(Timestamp(100, 2), 1);
- OpTime time3(Timestamp(100, 3), 1);
+ auto term = getTopoCoord().getTerm();
+ OpTime time1(Timestamp(100, 1), term);
+ OpTime time2(Timestamp(100, 2), term);
+ OpTime time3(Timestamp(100, 3), term);
auto consistency = ReplicationCoordinator::DataConsistency::Consistent;
getReplCoord()->setMyLastAppliedOpTime(time1);
@@ -5413,6 +5415,110 @@ TEST_F(ReplCoordTest,
ASSERT_EQUALS(time3, getReplCoord()->getMyLastAppliedOpTime());
}
+DEATH_TEST_F(ReplCoordTest,
+ SetMyLastOpTimeToTimestampLesserThanCurrentLastOpTimeTimestampButWithHigherTerm,
+ "opTime.getTimestamp() > myLastAppliedOpTime.getTimestamp()") {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id"
+ << 0))),
+ HostAndPort("node1", 12345));
+
+
+ OpTime time1(Timestamp(100, 1), 1);
+ OpTime time2(Timestamp(99, 1), 2);
+
+ auto consistency = ReplicationCoordinator::DataConsistency::Consistent;
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime());
+ // Since in pv1, oplog entries are ordered by non-decreasing
+ // term and strictly increasing timestamp, it leads to invariant failure.
+ getReplCoord()->setMyLastAppliedOpTimeForward(time2, consistency);
+}
+
+DEATH_TEST_F(ReplCoordTest,
+ SetMyLastOpTimeToTimestampEqualToCurrentLastOpTimeTimestampButWithHigherTerm,
+ "opTime.getTimestamp() > myLastAppliedOpTime.getTimestamp()") {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id"
+ << 0))),
+ HostAndPort("node1", 12345));
+
+
+ OpTime time1(Timestamp(100, 1), 1);
+ OpTime time2(Timestamp(100, 1), 2);
+
+ auto consistency = ReplicationCoordinator::DataConsistency::Consistent;
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime());
+ // Since in pv1, oplog entries are ordered by non-decreasing
+ // term and strictly increasing timestamp, it leads to invariant failure.
+ getReplCoord()->setMyLastAppliedOpTimeForward(time2, consistency);
+}
+
+DEATH_TEST_F(ReplCoordTest,
+ SetMyLastOpTimeToTimestampGreaterThanCurrentLastOpTimeTimestampButWithLesserTerm,
+ "opTime.getTimestamp() < myLastAppliedOpTime.getTimestamp()") {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id"
+ << 0))),
+ HostAndPort("node1", 12345));
+
+
+ OpTime time1(Timestamp(100, 1), 1);
+ OpTime time2(Timestamp(100, 2), 0);
+
+ auto consistency = ReplicationCoordinator::DataConsistency::Consistent;
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime());
+ // Since in pv1, oplog entries are ordered by non-decreasing
+ // term and strictly increasing timestamp, it leads to invariant failure.
+ getReplCoord()->setMyLastAppliedOpTimeForward(time2, consistency);
+}
+
+DEATH_TEST_F(ReplCoordTest,
+ SetMyLastOpTimeToTimestampEqualToCurrentLastOpTimeTimestampButWithLesserTerm,
+ "opTime.getTimestamp() < myLastAppliedOpTime.getTimestamp()") {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id"
+ << 0))),
+ HostAndPort("node1", 12345));
+
+
+ OpTime time1(Timestamp(100, 1), 1);
+ OpTime time2(Timestamp(100, 1), 0);
+
+ auto consistency = ReplicationCoordinator::DataConsistency::Consistent;
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime());
+ // Since in pv1, oplog entries are ordered by non-decreasing
+ // term and strictly increasing timestamp, it leads to invariant failure.
+ getReplCoord()->setMyLastAppliedOpTimeForward(time2, consistency);
+}
+
TEST_F(ReplCoordTest, OnlyForwardSyncProgressForOtherNodesWhenTheNodesAreBelievedToBeUp) {
assertStartSuccess(
BSON("_id"