From ee1e46cee281560bf13529c6db75cfb317703780 Mon Sep 17 00:00:00 2001 From: Suganthi Mani Date: Tue, 8 Jan 2019 17:41:21 -0500 Subject: SERVER-35608 Added an invariant to make sure that optime with lower and higher term than the current lastAppliedOpTime will have lower and higher timestamp respectively. And, provided both the optime and the current lastAppliedOpTime terms are in pv1. (cherry picked from commit b91aaa5bbc54a176cc61e5051cb6be857747b068) --- src/mongo/db/repl/replication_coordinator_impl.cpp | 22 +++- .../db/repl/replication_coordinator_impl_test.cpp | 116 +++++++++++++++++++-- 2 files changed, 129 insertions(+), 9 deletions(-) diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index eec1b8989ae..ee60702be5e 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -1165,10 +1165,18 @@ void ReplicationCoordinatorImpl::setMyHeartbeatMessage(const std::string& msg) { void ReplicationCoordinatorImpl::setMyLastAppliedOpTimeForward(const OpTime& opTime) { stdx::unique_lock lock(_mutex); - if (opTime > _getMyLastAppliedOpTime_inlock()) { + auto myLastAppliedOpTime = _getMyLastAppliedOpTime_inlock(); + if (opTime > myLastAppliedOpTime) { const bool allowRollback = false; _setMyLastAppliedOpTime_inlock(opTime, allowRollback); _reportUpstream_inlock(std::move(lock)); + } else if (opTime != myLastAppliedOpTime) { + // In pv1, oplog entries are ordered by non-decreasing term and strictly increasing + // timestamp. So, in pv1, its not possible for us to get opTime with lower term and + // timestamp higher than or equal to our current lastAppliedOptime. + invariant(opTime.getTerm() == OpTime::kUninitializedTerm || + myLastAppliedOpTime.getTerm() == OpTime::kUninitializedTerm || + opTime.getTimestamp() < myLastAppliedOpTime.getTimestamp()); } } @@ -1224,7 +1232,17 @@ void ReplicationCoordinatorImpl::_reportUpstream_inlock(stdx::unique_locklastAppliedOpTime <= opTime); + auto myLastAppliedOpTime = mySlaveInfo->lastAppliedOpTime; + + if (!(isRollbackAllowed || opTime == myLastAppliedOpTime)) { + invariant(opTime > myLastAppliedOpTime); + // In pv1, oplog entries are ordered by non-decreasing term and strictly increasing + // timestamp. So, in pv1, its not possible for us to get opTime with higher term and + // timestamp lesser than or equal to our current lastAppliedOptime. + invariant(opTime.getTerm() == OpTime::kUninitializedTerm || + myLastAppliedOpTime.getTerm() == OpTime::kUninitializedTerm || + opTime.getTimestamp() > myLastAppliedOpTime.getTimestamp()); + } _updateSlaveInfoAppliedOpTime_inlock(mySlaveInfo, opTime); _opTimeWaiterList.signalAndRemoveIf_inlock( diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index e586f1f0e9e..5426bd237f4 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -65,6 +65,7 @@ #include "mongo/stdx/future.h" #include "mongo/stdx/thread.h" #include "mongo/unittest/barrier.h" +#include "mongo/unittest/death_test.h" #include "mongo/unittest/unittest.h" #include "mongo/util/assert_util.h" #include "mongo/util/log.h" @@ -3786,8 +3787,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedDeferredGreaterOpTime) { << 0))), HostAndPort("node1", 12345)); runSingleNodeElection(makeOperationContext(), getReplCoord(), getNet()); - getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(0, 0), 1)); - getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(0, 0), 1)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 1)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 1)); OpTime committedOpTime(Timestamp(200, 0), 1); auto pseudoLogOp = stdx::async(stdx::launch::async, [this, &committedOpTime]() { // Not guaranteed to be scheduled after waitUntil blocks... @@ -3815,8 +3816,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedDeferredEqualOpTime) { << 0))), HostAndPort("node1", 12345)); runSingleNodeElection(makeOperationContext(), getReplCoord(), getNet()); - getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(0, 0), 1)); - getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(0, 0), 1)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 1)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 1)); OpTime opTimeToWait(Timestamp(100, 0), 1); @@ -4742,9 +4743,10 @@ TEST_F(ReplCoordTest, HostAndPort("node1", 12345)); - OpTime time1(Timestamp(100, 1), 1); - OpTime time2(Timestamp(100, 2), 1); - OpTime time3(Timestamp(100, 3), 1); + auto term = getTopoCoord().getTerm(); + OpTime time1(Timestamp(100, 1), term); + OpTime time2(Timestamp(100, 2), term); + OpTime time3(Timestamp(100, 3), term); getReplCoord()->setMyLastAppliedOpTime(time1); ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime()); @@ -4755,6 +4757,106 @@ TEST_F(ReplCoordTest, ASSERT_EQUALS(time3, getReplCoord()->getMyLastAppliedOpTime()); } +DEATH_TEST_F(ReplCoordTest, + SetMyLastOpTimeToTimestampLesserThanCurrentLastOpTimeTimestampButWithHigherTerm, + "opTime.getTimestamp() > myLastAppliedOpTime.getTimestamp()") { + assertStartSuccess(BSON("_id" + << "mySet" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("host" + << "node1:12345" + << "_id" + << 0))), + HostAndPort("node1", 12345)); + + + OpTime time1(Timestamp(100, 1), 1); + OpTime time2(Timestamp(99, 1), 2); + + getReplCoord()->setMyLastAppliedOpTime(time1); + ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime()); + // Since in pv1, oplog entries are ordered by non-decreasing + // term and strictly increasing timestamp, it leads to invariant failure. + getReplCoord()->setMyLastAppliedOpTimeForward(time2); +} + +DEATH_TEST_F(ReplCoordTest, + SetMyLastOpTimeToTimestampEqualToCurrentLastOpTimeTimestampButWithHigherTerm, + "opTime.getTimestamp() > myLastAppliedOpTime.getTimestamp()") { + assertStartSuccess(BSON("_id" + << "mySet" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("host" + << "node1:12345" + << "_id" + << 0))), + HostAndPort("node1", 12345)); + + + OpTime time1(Timestamp(100, 1), 1); + OpTime time2(Timestamp(100, 1), 2); + + getReplCoord()->setMyLastAppliedOpTime(time1); + ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime()); + // Since in pv1, oplog entries are ordered by non-decreasing + // term and strictly increasing timestamp, it leads to invariant failure. + getReplCoord()->setMyLastAppliedOpTimeForward(time2); +} + +DEATH_TEST_F(ReplCoordTest, + SetMyLastOpTimeToTimestampGreaterThanCurrentLastOpTimeTimestampButWithLesserTerm, + "opTime.getTimestamp() < myLastAppliedOpTime.getTimestamp()") { + assertStartSuccess(BSON("_id" + << "mySet" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("host" + << "node1:12345" + << "_id" + << 0))), + HostAndPort("node1", 12345)); + + + OpTime time1(Timestamp(100, 1), 1); + OpTime time2(Timestamp(100, 2), 0); + + getReplCoord()->setMyLastAppliedOpTime(time1); + ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime()); + // Since in pv1, oplog entries are ordered by non-decreasing + // term and strictly increasing timestamp, it leads to invariant failure. + getReplCoord()->setMyLastAppliedOpTimeForward(time2); +} + +DEATH_TEST_F(ReplCoordTest, + SetMyLastOpTimeToTimestampEqualToCurrentLastOpTimeTimestampButWithLesserTerm, + "opTime.getTimestamp() < myLastAppliedOpTime.getTimestamp()") { + assertStartSuccess(BSON("_id" + << "mySet" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("host" + << "node1:12345" + << "_id" + << 0))), + HostAndPort("node1", 12345)); + + + OpTime time1(Timestamp(100, 1), 1); + OpTime time2(Timestamp(100, 1), 0); + + getReplCoord()->setMyLastAppliedOpTime(time1); + ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime()); + // Since in pv1, oplog entries are ordered by non-decreasing + // term and strictly increasing timestamp, it leads to invariant failure. + getReplCoord()->setMyLastAppliedOpTimeForward(time2); +} + TEST_F(ReplCoordTest, OnlyForwardSyncProgressForOtherNodesWhenTheNodesAreBelievedToBeUp) { assertStartSuccess( BSON("_id" -- cgit v1.2.1