summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorScott Hernandez <scotthernandez@tart.local>2016-03-10 08:21:01 -0500
committerScott Hernandez <scotthernandez@tart.local>2016-03-27 11:45:02 -0400
commit8e3849c3b7c4abe4ed27e4ca0ee3e01ea06e2384 (patch)
tree6abd05fead4d156bd1703960a05451ec303e85d1
parentf88e2487baf7183e022f485ade359761fc62c6c2 (diff)
downloadmongo-8e3849c3b7c4abe4ed27e4ca0ee3e01ea06e2384.tar.gz
SERVER-23010: change commmitted snapshot by either durable or applied optime
-rw-r--r--src/mongo/db/repl/oplog.cpp1
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp29
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp26
3 files changed, 22 insertions, 34 deletions
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 62e36d9238a..31e1baab697 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -1180,6 +1180,7 @@ void SnapshotThread::run() {
SnapshotName name(0); // assigned real value in block.
{
// Make sure there are no in-flight capped inserts while we create our snapshot.
+ // This lock cannot be aquired until all writes holding the resource commit/abort.
Lock::ResourceLock cappedInsertLockForOtherDb(
txn->lockState(), resourceCappedInFlightForOtherDb, MODE_X);
Lock::ResourceLock cappedInsertLockForLocalDb(
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index df4f912ec58..41ef7d0698b 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1331,14 +1331,18 @@ bool ReplicationCoordinatorImpl::_doneWaitingForReplication_inlock(
return false;
}
- // Wait for the "current" snapshot to advance to/past the opTime.
- // We cannot have this committed snapshot until we have replicated to a majority,
- // so we can return true here once that requirement is met.
- return (_currentCommittedSnapshot->opTime >= opTime &&
- _currentCommittedSnapshot->name >= minSnapshot);
- } else {
- patternName = ReplicaSetConfig::kMajorityWriteConcernModeName;
+ if (getWriteConcernMajorityShouldJournal_inlock()) {
+ // Wait for the "current" snapshot to advance to/past the opTime.
+
+ // We cannot have this committed snapshot until we have replicated to a majority,
+ // so we can return true here once that requirement is met for durable writes.
+ return (_currentCommittedSnapshot->opTime >= opTime &&
+ _currentCommittedSnapshot->name >= minSnapshot);
+ }
}
+ // Continue and wait for replication to the majority (of voters).
+ // *** Needed for J:True, writeConcernMajorityShouldJournal:False (appliedOpTime snapshot).
+ patternName = ReplicaSetConfig::kMajorityWriteConcernModeName;
} else {
patternName = writeConcern.wMode;
}
@@ -2343,6 +2347,11 @@ void ReplicationCoordinatorImpl::_finishReplSetReconfig(
const ReplicaSetConfig oldConfig = _rsConfig;
const PostMemberStateUpdateAction action =
_setCurrentRSConfig_inlock(cbData, newConfig, myIndex);
+
+ // On a reconfig we drop all snapshots so we don't mistakenely read from the wrong one.
+ // For example, if we change the meaning of the "committed" snapshot from applied -> durable.
+ _dropAllSnapshots_inlock();
+
lk.unlock();
_resetElectionInfoOnProtocolVersionUpgrade(oldConfig, newConfig);
_performPostMemberStateUpdateAction(action);
@@ -3189,11 +3198,15 @@ void ReplicationCoordinatorImpl::_updateLastCommittedOpTime_inlock() {
std::vector<OpTime> votingNodesOpTimes;
+ // Whether we use the applied or durable OpTime for the commit point is decided here.
+ const bool useDurableOpTime = getWriteConcernMajorityShouldJournal_inlock();
+
for (const auto& sI : _slaveInfo) {
auto memberConfig = _rsConfig.findMemberByID(sI.memberId);
invariant(memberConfig);
if (memberConfig->isVoter()) {
- votingNodesOpTimes.push_back(sI.lastDurableOpTime);
+ const auto opTime = useDurableOpTime ? sI.lastDurableOpTime : sI.lastAppliedOpTime;
+ votingNodesOpTimes.push_back(opTime);
}
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index dd43b7b3f1d..6b10ba21154 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -3999,32 +3999,6 @@ TEST_F(ReplCoordTest,
ASSERT_EQUALS(time3, getReplCoord()->getMyLastAppliedOpTime());
}
-TEST_F(ReplCoordTest,
- NodeChangesMyLastOpTimeWhenSetMyLastDurableOpTimeReceivesANewerOpTimeWithoutJournaling) {
- assertStartSuccess(BSON("_id"
- << "mySet"
- << "version" << 2 << "members" << BSON_ARRAY(BSON("host"
- << "node1:12345"
- << "_id" << 0))),
- HostAndPort("node1", 12345));
-
-
- setStorageEngineDurable(false);
-
- OpTime time1(Timestamp(100, 1), 1);
- OpTime time2(Timestamp(100, 2), 1);
- OpTime time3(Timestamp(100, 3), 1);
-
- getReplCoord()->setMyLastAppliedOpTime(time1);
- ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime());
- getReplCoord()->setMyLastAppliedOpTimeForward(time3);
- ASSERT_EQUALS(time3, getReplCoord()->getMyLastAppliedOpTime());
- ASSERT_EQUALS(time3, getReplCoord()->getMyLastDurableOpTime());
- getReplCoord()->setMyLastAppliedOpTimeForward(time2);
- ASSERT_EQUALS(time3, getReplCoord()->getMyLastAppliedOpTime());
- ASSERT_EQUALS(time3, getReplCoord()->getMyLastDurableOpTime());
-}
-
TEST_F(ReplCoordTest, OnlyForwardSyncProgressForOtherNodesWhenTheNodesAreBelievedToBeUp) {
assertStartSuccess(
BSON("_id"