summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSuganthi Mani <suganthi.mani@mongodb.com>2018-11-19 11:08:52 -0500
committerSuganthi Mani <suganthi.mani@mongodb.com>2019-01-08 18:56:04 -0500
commit938f2b25a50a4c907b736ffe81546ae4c42e4f0c (patch)
tree341de8c3a517c678610666001893ea20e71af2d6
parente28fadf9059711910433d9fe7561e2ef7b682f8d (diff)
downloadmongo-938f2b25a50a4c907b736ffe81546ae4c42e4f0c.tar.gz
SERVER-35608 Added an invariant to make sure that optime with lower and higher term than the current lastAppliedOpTime will have lower and higher timestamp respectively. And, provided both the optime and the current lastAppliedOpTime terms are in pv1.
(cherry picked from commit b91aaa5bbc54a176cc61e5051cb6be857747b068)
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp24
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp126
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp12
3 files changed, 145 insertions, 17 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 134adad96ef..b78e8b56f11 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1078,14 +1078,26 @@ void ReplicationCoordinatorImpl::setMyHeartbeatMessage(const std::string& msg) {
void ReplicationCoordinatorImpl::setMyLastAppliedOpTimeForward(const OpTime& opTime,
DataConsistency consistency) {
stdx::unique_lock<stdx::mutex> lock(_mutex);
- if (opTime > _getMyLastAppliedOpTime_inlock()) {
+ auto myLastAppliedOpTime = _getMyLastAppliedOpTime_inlock();
+ if (opTime > myLastAppliedOpTime) {
_setMyLastAppliedOpTime_inlock(opTime, false, consistency);
_reportUpstream_inlock(std::move(lock));
- } else if (consistency == DataConsistency::Consistent && _canAcceptNonLocalWrites &&
- _rsConfig.getWriteMajority() == 1) {
- // Single vote primaries may have a lagged stable timestamp due to paring back the stable
- // timestamp to the all committed timestamp.
- _setStableTimestampForStorage_inlock();
+ } else {
+ if (opTime != myLastAppliedOpTime) {
+ // In pv1, oplog entries are ordered by non-decreasing term and strictly increasing
+ // timestamp. So, in pv1, its not possible for us to get opTime with lower term and
+ // timestamp higher than or equal to our current lastAppliedOptime.
+ invariant(opTime.getTerm() == OpTime::kUninitializedTerm ||
+ myLastAppliedOpTime.getTerm() == OpTime::kUninitializedTerm ||
+ opTime.getTimestamp() < myLastAppliedOpTime.getTimestamp());
+ }
+
+ if (consistency == DataConsistency::Consistent && _canAcceptNonLocalWrites &&
+ _rsConfig.getWriteMajority() == 1) {
+ // Single vote primaries may have a lagged stable timestamp due to paring back the
+ // stable timestamp to the all committed timestamp.
+ _setStableTimestampForStorage_inlock();
+ }
}
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index 5513971dd58..8b583073c2e 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -68,6 +68,7 @@
#include "mongo/stdx/future.h"
#include "mongo/stdx/thread.h"
#include "mongo/unittest/barrier.h"
+#include "mongo/unittest/death_test.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/log.h"
@@ -919,8 +920,8 @@ TEST_F(
getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 0));
simulateSuccessfulV1Election();
- OpTime time1(Timestamp(100, 1), 1);
- OpTime time2(Timestamp(100, 2), 1);
+ OpTime time1(Timestamp(100, 2), 1);
+ OpTime time2(Timestamp(100, 3), 1);
// Set up valid write concerns for the rest of the test
WriteConcernOptions majorityWriteConcern;
@@ -4123,7 +4124,7 @@ TEST_F(StableOpTimeTest, OpTimeCandidatesAreNotAddedWhenStateIsNotConsistent) {
initReplSetMode();
auto repl = getReplCoord();
- long long term = 0;
+ long long term = getTopoCoord().getTerm();
OpTime consistentOpTime = OpTime({1, 1}, term);
OpTime inconsistentOpTime = OpTime({1, 2}, term);
@@ -4387,8 +4388,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedDeferredGreaterOpTime) {
auto opCtx = makeOperationContext();
runSingleNodeElection(opCtx.get());
- getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(0, 0), 1));
- getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(0, 0), 1));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 1));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 1));
OpTime committedOpTime(Timestamp(200, 1), 1);
auto pseudoLogOp = stdx::async(stdx::launch::async, [this, &committedOpTime]() {
// Not guaranteed to be scheduled after waitUntil blocks...
@@ -4414,8 +4415,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedDeferredEqualOpTime) {
HostAndPort("node1", 12345));
auto opCtx = makeOperationContext();
runSingleNodeElection(opCtx.get());
- getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(0, 0), 1));
- getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(0, 0), 1));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 1));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 1));
OpTime opTimeToWait(Timestamp(100, 1), 1);
@@ -5247,9 +5248,10 @@ TEST_F(ReplCoordTest,
HostAndPort("node1", 12345));
- OpTime time1(Timestamp(100, 1), 1);
- OpTime time2(Timestamp(100, 2), 1);
- OpTime time3(Timestamp(100, 3), 1);
+ auto term = getTopoCoord().getTerm();
+ OpTime time1(Timestamp(100, 1), term);
+ OpTime time2(Timestamp(100, 2), term);
+ OpTime time3(Timestamp(100, 3), term);
auto consistency = ReplicationCoordinator::DataConsistency::Consistent;
getReplCoord()->setMyLastAppliedOpTime(time1);
@@ -5261,6 +5263,110 @@ TEST_F(ReplCoordTest,
ASSERT_EQUALS(time3, getReplCoord()->getMyLastAppliedOpTime());
}
+DEATH_TEST_F(ReplCoordTest,
+ SetMyLastOpTimeToTimestampLesserThanCurrentLastOpTimeTimestampButWithHigherTerm,
+ "opTime.getTimestamp() > myLastAppliedOpTime.getTimestamp()") {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id"
+ << 0))),
+ HostAndPort("node1", 12345));
+
+
+ OpTime time1(Timestamp(100, 1), 1);
+ OpTime time2(Timestamp(99, 1), 2);
+
+ auto consistency = ReplicationCoordinator::DataConsistency::Consistent;
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime());
+ // Since in pv1, oplog entries are ordered by non-decreasing
+ // term and strictly increasing timestamp, it leads to invariant failure.
+ getReplCoord()->setMyLastAppliedOpTimeForward(time2, consistency);
+}
+
+DEATH_TEST_F(ReplCoordTest,
+ SetMyLastOpTimeToTimestampEqualToCurrentLastOpTimeTimestampButWithHigherTerm,
+ "opTime.getTimestamp() > myLastAppliedOpTime.getTimestamp()") {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id"
+ << 0))),
+ HostAndPort("node1", 12345));
+
+
+ OpTime time1(Timestamp(100, 1), 1);
+ OpTime time2(Timestamp(100, 1), 2);
+
+ auto consistency = ReplicationCoordinator::DataConsistency::Consistent;
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime());
+ // Since in pv1, oplog entries are ordered by non-decreasing
+ // term and strictly increasing timestamp, it leads to invariant failure.
+ getReplCoord()->setMyLastAppliedOpTimeForward(time2, consistency);
+}
+
+DEATH_TEST_F(ReplCoordTest,
+ SetMyLastOpTimeToTimestampGreaterThanCurrentLastOpTimeTimestampButWithLesserTerm,
+ "opTime.getTimestamp() < myLastAppliedOpTime.getTimestamp()") {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id"
+ << 0))),
+ HostAndPort("node1", 12345));
+
+
+ OpTime time1(Timestamp(100, 1), 1);
+ OpTime time2(Timestamp(100, 2), 0);
+
+ auto consistency = ReplicationCoordinator::DataConsistency::Consistent;
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime());
+ // Since in pv1, oplog entries are ordered by non-decreasing
+ // term and strictly increasing timestamp, it leads to invariant failure.
+ getReplCoord()->setMyLastAppliedOpTimeForward(time2, consistency);
+}
+
+DEATH_TEST_F(ReplCoordTest,
+ SetMyLastOpTimeToTimestampEqualToCurrentLastOpTimeTimestampButWithLesserTerm,
+ "opTime.getTimestamp() < myLastAppliedOpTime.getTimestamp()") {
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id"
+ << 0))),
+ HostAndPort("node1", 12345));
+
+
+ OpTime time1(Timestamp(100, 1), 1);
+ OpTime time2(Timestamp(100, 1), 0);
+
+ auto consistency = ReplicationCoordinator::DataConsistency::Consistent;
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime());
+ // Since in pv1, oplog entries are ordered by non-decreasing
+ // term and strictly increasing timestamp, it leads to invariant failure.
+ getReplCoord()->setMyLastAppliedOpTimeForward(time2, consistency);
+}
+
TEST_F(ReplCoordTest, OnlyForwardSyncProgressForOtherNodesWhenTheNodesAreBelievedToBeUp) {
assertStartSuccess(
BSON("_id"
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index 472f0c29426..3416709c24e 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -1313,7 +1313,17 @@ void TopologyCoordinator::setMyLastAppliedOpTime(OpTime opTime,
Date_t now,
bool isRollbackAllowed) {
auto& myMemberData = _selfMemberData();
- invariant(isRollbackAllowed || opTime >= myMemberData.getLastAppliedOpTime());
+ auto myLastAppliedOpTime = myMemberData.getLastAppliedOpTime();
+
+ if (!(isRollbackAllowed || opTime == myLastAppliedOpTime)) {
+ invariant(opTime > myLastAppliedOpTime);
+ // In pv1, oplog entries are ordered by non-decreasing term and strictly increasing
+ // timestamp. So, in pv1, its not possible for us to get opTime with higher term and
+ // timestamp lesser than or equal to our current lastAppliedOptime.
+ invariant(opTime.getTerm() == OpTime::kUninitializedTerm ||
+ myLastAppliedOpTime.getTerm() == OpTime::kUninitializedTerm ||
+ opTime.getTimestamp() > myLastAppliedOpTime.getTimestamp());
+ }
myMemberData.setLastAppliedOpTime(opTime, now);
}