summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authormatt dannenberg <matt.dannenberg@10gen.com>2016-01-05 10:29:01 -0500
committerScott Hernandez <scotthernandez@tart.local>2016-02-24 09:55:46 -0500
commit7bc59dac4f46e8f59786130262fb1dfea68fb605 (patch)
tree0ad5c45f99655a7cc60abc6162993ec1eab0070d /src
parentb5a76e83860d0cff964af4989d798f19ffce4aae (diff)
downloadmongo-7bc59dac4f46e8f59786130262fb1dfea68fb605.tar.gz
SERVER-22276 SERVER-22277 implement "j" flag in write concern apply to secondary as well as primary
(cherry picked from commit 2c2e6a38f559f25559c2b24eff51511c6fbc4a5b)
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/commands/cleanup_orphaned_cmd.cpp2
-rw-r--r--src/mongo/db/commands/write_commands/batch_executor.cpp3
-rw-r--r--src/mongo/db/query/find_and_modify_request_test.cpp8
-rw-r--r--src/mongo/db/range_deleter.cpp2
-rw-r--r--src/mongo/db/repl/SConscript1
-rw-r--r--src/mongo/db/repl/bgsync.cpp12
-rw-r--r--src/mongo/db/repl/data_replicator.cpp8
-rw-r--r--src/mongo/db/repl/data_replicator.h4
-rw-r--r--src/mongo/db/repl/data_replicator_test.cpp2
-rw-r--r--src/mongo/db/repl/initial_sync.cpp5
-rw-r--r--src/mongo/db/repl/member_heartbeat_data.cpp11
-rw-r--r--src/mongo/db/repl/member_heartbeat_data.h4
-rw-r--r--src/mongo/db/repl/minvalid.cpp4
-rw-r--r--src/mongo/db/repl/old_update_position_args.cpp154
-rw-r--r--src/mongo/db/repl/old_update_position_args.h88
-rw-r--r--src/mongo/db/repl/oplog.cpp4
-rw-r--r--src/mongo/db/repl/repl_client_info.cpp2
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_response.cpp64
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_response.h27
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_response_test.cpp155
-rw-r--r--src/mongo/db/repl/repl_set_html_summary.cpp7
-rw-r--r--src/mongo/db/repl/replica_set_config.cpp39
-rw-r--r--src/mongo/db/repl/replica_set_config.h9
-rw-r--r--src/mongo/db/repl/replica_set_config_test.cpp81
-rw-r--r--src/mongo/db/repl/replication_coordinator.h67
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp495
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h116
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp14
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp8
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp45
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp39
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp2
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp39
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp976
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.cpp61
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.h26
-rw-r--r--src/mongo/db/repl/replication_coordinator_test_fixture.cpp5
-rw-r--r--src/mongo/db/repl/replication_info.cpp2
-rw-r--r--src/mongo/db/repl/replset_commands.cpp41
-rw-r--r--src/mongo/db/repl/reporter.cpp10
-rw-r--r--src/mongo/db/repl/reporter.h4
-rw-r--r--src/mongo/db/repl/reporter_test.cpp14
-rw-r--r--src/mongo/db/repl/rs_initialsync.cpp10
-rw-r--r--src/mongo/db/repl/rs_rollback.cpp6
-rw-r--r--src/mongo/db/repl/rs_rollback_test.cpp4
-rw-r--r--src/mongo/db/repl/rs_sync.cpp2
-rw-r--r--src/mongo/db/repl/sync_source_feedback.cpp47
-rw-r--r--src/mongo/db/repl/sync_source_feedback.h6
-rw-r--r--src/mongo/db/repl/sync_tail.cpp20
-rw-r--r--src/mongo/db/repl/topology_coordinator.h2
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.cpp52
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.h2
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl_test.cpp39
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp30
-rw-r--r--src/mongo/db/repl/update_position_args.cpp62
-rw-r--r--src/mongo/db/repl/update_position_args.h11
-rw-r--r--src/mongo/db/s/migration_impl.cpp4
-rw-r--r--src/mongo/db/s/sharding_state_recovery.cpp2
-rw-r--r--src/mongo/db/write_concern.cpp62
-rw-r--r--src/mongo/db/write_concern_options.cpp16
-rw-r--r--src/mongo/db/write_concern_options.h7
-rw-r--r--src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp9
-rw-r--r--src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp4
-rw-r--r--src/mongo/s/catalog/type_settings.cpp2
65 files changed, 2365 insertions, 698 deletions
diff --git a/src/mongo/db/commands/cleanup_orphaned_cmd.cpp b/src/mongo/db/commands/cleanup_orphaned_cmd.cpp
index e1980b9a0b1..6eafbe5a695 100644
--- a/src/mongo/db/commands/cleanup_orphaned_cmd.cpp
+++ b/src/mongo/db/commands/cleanup_orphaned_cmd.cpp
@@ -58,7 +58,7 @@ namespace {
const int kDefaultWTimeoutMs = 60 * 1000;
const WriteConcernOptions DefaultWriteConcern(WriteConcernOptions::kMajority,
- WriteConcernOptions::NONE,
+ WriteConcernOptions::SyncMode::UNSET,
kDefaultWTimeoutMs);
enum CleanupResult { CleanupResult_Done, CleanupResult_Continue, CleanupResult_Error };
diff --git a/src/mongo/db/commands/write_commands/batch_executor.cpp b/src/mongo/db/commands/write_commands/batch_executor.cpp
index 34eb38a13f2..146bfd7e87f 100644
--- a/src/mongo/db/commands/write_commands/batch_executor.cpp
+++ b/src/mongo/db/commands/write_commands/batch_executor.cpp
@@ -293,7 +293,8 @@ void WriteBatchExecutor::executeBatch(const BatchedCommandRequest& request,
const WriteConcernOptions& writeConcern = _txn->getWriteConcern();
bool silentWC = writeConcern.wMode.empty() && writeConcern.wNumNodes == 0 &&
- writeConcern.syncMode == WriteConcernOptions::NONE;
+ (writeConcern.syncMode == WriteConcernOptions::SyncMode::NONE ||
+ writeConcern.syncMode == WriteConcernOptions::SyncMode::UNSET);
Timer commandTimer;
diff --git a/src/mongo/db/query/find_and_modify_request_test.cpp b/src/mongo/db/query/find_and_modify_request_test.cpp
index 27490715e02..761b344a96f 100644
--- a/src/mongo/db/query/find_and_modify_request_test.cpp
+++ b/src/mongo/db/query/find_and_modify_request_test.cpp
@@ -154,7 +154,7 @@ TEST(FindAndModifyRequest, UpdateWithSort) {
TEST(FindAndModifyRequest, UpdateWithWriteConcern) {
const BSONObj query(BSON("x" << 1));
const BSONObj update(BSON("y" << 1));
- const WriteConcernOptions writeConcern(2, WriteConcernOptions::FSYNC, 150);
+ const WriteConcernOptions writeConcern(2, WriteConcernOptions::SyncMode::FSYNC, 150);
auto request = FindAndModifyRequest::makeUpdate(NamespaceString("test.user"), query, update);
request.setWriteConcern(writeConcern);
@@ -174,7 +174,7 @@ TEST(FindAndModifyRequest, UpdateWithFullSpec) {
const BSONObj update(BSON("y" << 1));
const BSONObj sort(BSON("z" << -1));
const BSONObj field(BSON("x" << 1 << "y" << 1));
- const WriteConcernOptions writeConcern(2, WriteConcernOptions::FSYNC, 150);
+ const WriteConcernOptions writeConcern(2, WriteConcernOptions::SyncMode::FSYNC, 150);
auto request = FindAndModifyRequest::makeUpdate(NamespaceString("test.user"), query, update);
request.setFieldProjection(field);
@@ -246,7 +246,7 @@ TEST(FindAndModifyRequest, RemoveWithSort) {
TEST(FindAndModifyRequest, RemoveWithWriteConcern) {
const BSONObj query(BSON("x" << 1));
- const WriteConcernOptions writeConcern(2, WriteConcernOptions::FSYNC, 150);
+ const WriteConcernOptions writeConcern(2, WriteConcernOptions::SyncMode::FSYNC, 150);
auto request = FindAndModifyRequest::makeRemove(NamespaceString("test.user"), query);
request.setWriteConcern(writeConcern);
@@ -265,7 +265,7 @@ TEST(FindAndModifyRequest, RemoveWithFullSpec) {
const BSONObj query(BSON("x" << 1));
const BSONObj sort(BSON("z" << -1));
const BSONObj field(BSON("x" << 1 << "y" << 1));
- const WriteConcernOptions writeConcern(2, WriteConcernOptions::FSYNC, 150);
+ const WriteConcernOptions writeConcern(2, WriteConcernOptions::SyncMode::FSYNC, 150);
auto request = FindAndModifyRequest::makeRemove(NamespaceString("test.user"), query);
request.setFieldProjection(field);
diff --git a/src/mongo/db/range_deleter.cpp b/src/mongo/db/range_deleter.cpp
index 7a4aaf878be..768cbd791b8 100644
--- a/src/mongo/db/range_deleter.cpp
+++ b/src/mongo/db/range_deleter.cpp
@@ -259,7 +259,7 @@ const int kWTimeoutMillis = 60 * 60 * 1000;
bool _waitForMajority(OperationContext* txn, std::string* errMsg) {
const WriteConcernOptions writeConcern(
- WriteConcernOptions::kMajority, WriteConcernOptions::NONE, kWTimeoutMillis);
+ WriteConcernOptions::kMajority, WriteConcernOptions::SyncMode::UNSET, kWTimeoutMillis);
repl::ReplicationCoordinator::StatusAndDuration replStatus =
repl::getGlobalReplicationCoordinator()->awaitReplicationOfLastOpForClient(txn,
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index 778192b8567..5d2d8f1fe06 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -446,6 +446,7 @@ env.Library('replica_set_messages',
'handshake_args.cpp',
'is_master_response.cpp',
'member_config.cpp',
+ 'old_update_position_args.cpp',
'read_concern_response.cpp',
'repl_set_declare_election_winner_args.cpp',
'repl_set_heartbeat_args.cpp',
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp
index 13859b4deea..0e5e4cbb882 100644
--- a/src/mongo/db/repl/bgsync.cpp
+++ b/src/mongo/db/repl/bgsync.cpp
@@ -238,7 +238,7 @@ void BackgroundSync::_producerThread() {
}
// We need to wait until initial sync has started.
- if (_replCoord->getMyLastOptime().isNull()) {
+ if (_replCoord->getMyLastAppliedOpTime().isNull()) {
sleepsecs(1);
return;
}
@@ -406,11 +406,11 @@ void BackgroundSync::_produce(OperationContext* txn) {
log() << "Starting rollback due to " << fetcherReturnStatus;
// Wait till all buffered oplog entries have drained and been applied.
- auto lastApplied = _replCoord->getMyLastOptime();
- if (lastApplied != _lastOpTimeFetched) {
+ auto lastApplied = _replCoord->getMyLastAppliedOpTime();
+ if (lastApplied != lastOpTimeFetched) {
log() << "Waiting for all operations from " << lastApplied << " until "
- << _lastOpTimeFetched << " to be applied before starting rollback.";
- while (_lastOpTimeFetched > (lastApplied = _replCoord->getMyLastOptime())) {
+ << lastOpTimeFetched << " to be applied before starting rollback.";
+ while (lastOpTimeFetched > (lastApplied = _replCoord->getMyLastAppliedOpTime())) {
sleepmillis(10);
if (isStopped() || inShutdown()) {
return;
@@ -730,7 +730,7 @@ void BackgroundSync::start(OperationContext* txn) {
_stopped = false;
// reset _last fields with current oplog data
- _lastOpTimeFetched = _replCoord->getMyLastOptime();
+ _lastOpTimeFetched = _replCoord->getMyLastAppliedOpTime();
_lastFetchedHash = lastFetchedHash;
LOG(1) << "bgsync fetch queue set to: " << _lastOpTimeFetched << " " << _lastFetchedHash;
diff --git a/src/mongo/db/repl/data_replicator.cpp b/src/mongo/db/repl/data_replicator.cpp
index ef296dd486a..aed481df509 100644
--- a/src/mongo/db/repl/data_replicator.cpp
+++ b/src/mongo/db/repl/data_replicator.cpp
@@ -525,7 +525,7 @@ DataReplicator::DataReplicator(DataReplicatorOptions opts, ReplicationExecutor*
uassert(ErrorCodes::BadValue, "invalid rollback function", _opts.rollbackFn);
uassert(ErrorCodes::BadValue,
"invalid replSetUpdatePosition command object creation function",
- _opts.prepareReplSetUpdatePositionCommandFn);
+ _opts.prepareOldReplSetUpdatePositionCommandFn);
uassert(ErrorCodes::BadValue, "invalid getMyLastOptime function", _opts.getMyLastOptime);
uassert(ErrorCodes::BadValue, "invalid setMyLastOptime function", _opts.setMyLastOptime);
uassert(ErrorCodes::BadValue, "invalid setFollowerMode function", _opts.setFollowerMode);
@@ -664,9 +664,9 @@ TimestampStatus DataReplicator::flushAndPause() {
return TimestampStatus(_lastTimestampApplied);
}
-void DataReplicator::_resetState_inlock(Timestamp lastAppliedOptime) {
+void DataReplicator::_resetState_inlock(Timestamp lastAppliedOpTime) {
invariant(!_anyActiveHandles_inlock());
- _lastTimestampApplied = _lastTimestampFetched = lastAppliedOptime;
+ _lastTimestampApplied = _lastTimestampFetched = lastAppliedOpTime;
_oplogBuffer.clear();
}
@@ -1020,7 +1020,7 @@ void DataReplicator::_doNextActions_Steady_inlock() {
if (!_reporterPaused && (!_reporter || !_reporter->getStatus().isOK())) {
// TODO get reporter in good shape
_reporter.reset(
- new Reporter(_exec, _opts.prepareReplSetUpdatePositionCommandFn, _syncSource));
+ new Reporter(_exec, _opts.prepareOldReplSetUpdatePositionCommandFn, _syncSource));
}
}
diff --git a/src/mongo/db/repl/data_replicator.h b/src/mongo/db/repl/data_replicator.h
index bd3fd86101e..992aeb71d58 100644
--- a/src/mongo/db/repl/data_replicator.h
+++ b/src/mongo/db/repl/data_replicator.h
@@ -127,7 +127,7 @@ struct DataReplicatorOptions {
Applier::ApplyOperationFn applierFn;
RollbackFn rollbackFn;
- Reporter::PrepareReplSetUpdatePositionCommandFn prepareReplSetUpdatePositionCommandFn;
+ Reporter::PrepareReplSetUpdatePositionCommandFn prepareOldReplSetUpdatePositionCommandFn;
GetMyLastOptimeFn getMyLastOptime;
SetMyLastOptimeFn setMyLastOptime;
SetFollowerModeFn setFollowerMode;
@@ -205,7 +205,7 @@ public:
// For testing only
- void _resetState_inlock(Timestamp lastAppliedOptime);
+ void _resetState_inlock(Timestamp lastAppliedOpTime);
void _setInitialSyncStorageInterface(CollectionCloner::StorageInterface* si);
private:
diff --git a/src/mongo/db/repl/data_replicator_test.cpp b/src/mongo/db/repl/data_replicator_test.cpp
index adff8d96782..046e73956df 100644
--- a/src/mongo/db/repl/data_replicator_test.cpp
+++ b/src/mongo/db/repl/data_replicator_test.cpp
@@ -179,7 +179,7 @@ protected:
return _rollbackFn(txn, lastOpTimeWritten, syncSource);
};
- options.prepareReplSetUpdatePositionCommandFn =
+ options.prepareOldReplSetUpdatePositionCommandFn =
[]() -> StatusWith<BSONObj> { return BSON("replSetUpdatePosition" << 1); };
options.getMyLastOptime = [this]() { return _myLastOpTime; };
options.setMyLastOptime = [this](const OpTime& opTime) { _setMyLastOptime(opTime); };
diff --git a/src/mongo/db/repl/initial_sync.cpp b/src/mongo/db/repl/initial_sync.cpp
index e0ca82a6ea0..09553e3d93e 100644
--- a/src/mongo/db/repl/initial_sync.cpp
+++ b/src/mongo/db/repl/initial_sync.cpp
@@ -112,7 +112,7 @@ void InitialSync::_applyOplogUntil(OperationContext* txn, const OpTime& endOpTim
const OpTime lastOpTime = multiApply(txn, ops);
- replCoord->setMyLastOptime(lastOpTime);
+ replCoord->setMyLastAppliedOpTime(lastOpTime);
setNewTimestamp(lastOpTime.getTimestamp());
if (inShutdown()) {
@@ -122,8 +122,7 @@ void InitialSync::_applyOplogUntil(OperationContext* txn, const OpTime& endOpTim
// if the last op applied was our end, return
if (lastOpTime == endOpTime) {
LOG(1) << "SyncTail applied " << entriesApplied << " entries (" << bytesApplied
- << " bytes)"
- << " and finished at opTime " << endOpTime;
+ << " bytes) and finished at opTime " << endOpTime;
return;
}
} // end of while (true)
diff --git a/src/mongo/db/repl/member_heartbeat_data.cpp b/src/mongo/db/repl/member_heartbeat_data.cpp
index df6e5b0912d..c267a6ba8ed 100644
--- a/src/mongo/db/repl/member_heartbeat_data.cpp
+++ b/src/mongo/db/repl/member_heartbeat_data.cpp
@@ -42,7 +42,7 @@ namespace repl {
MemberHeartbeatData::MemberHeartbeatData() : _health(-1), _authIssue(false) {
_lastResponse.setState(MemberState::RS_UNKNOWN);
_lastResponse.setElectionTime(Timestamp());
- _lastResponse.setOpTime(OpTime());
+ _lastResponse.setAppliedOpTime(OpTime());
}
void MemberHeartbeatData::setUpValues(Date_t now,
@@ -60,10 +60,9 @@ void MemberHeartbeatData::setUpValues(Date_t now,
if (!hbResponse.hasElectionTime()) {
hbResponse.setElectionTime(_lastResponse.getElectionTime());
}
- if (!hbResponse.hasOpTime()) {
- hbResponse.setOpTime(_lastResponse.getOpTime());
+ if (!hbResponse.hasAppliedOpTime()) {
+ hbResponse.setAppliedOpTime(_lastResponse.getAppliedOpTime());
}
-
// Log if the state changes
if (_lastResponse.getState() != hbResponse.getState()) {
log() << "Member " << host.toString() << " is now in state "
@@ -82,7 +81,7 @@ void MemberHeartbeatData::setDownValues(Date_t now, const std::string& heartbeat
_lastResponse = ReplSetHeartbeatResponse();
_lastResponse.setState(MemberState::RS_DOWN);
_lastResponse.setElectionTime(Timestamp());
- _lastResponse.setOpTime(OpTime());
+ _lastResponse.setAppliedOpTime(OpTime());
_lastResponse.setHbMsg(heartbeatMessage);
_lastResponse.setSyncingTo(HostAndPort());
}
@@ -96,7 +95,7 @@ void MemberHeartbeatData::setAuthIssue(Date_t now) {
_lastResponse = ReplSetHeartbeatResponse();
_lastResponse.setState(MemberState::RS_UNKNOWN);
_lastResponse.setElectionTime(Timestamp());
- _lastResponse.setOpTime(OpTime());
+ _lastResponse.setAppliedOpTime(OpTime());
_lastResponse.setHbMsg("");
_lastResponse.setSyncingTo(HostAndPort());
}
diff --git a/src/mongo/db/repl/member_heartbeat_data.h b/src/mongo/db/repl/member_heartbeat_data.h
index d5b87a3767e..e64dcb4ef67 100644
--- a/src/mongo/db/repl/member_heartbeat_data.h
+++ b/src/mongo/db/repl/member_heartbeat_data.h
@@ -68,8 +68,8 @@ public:
const HostAndPort& getSyncSource() const {
return _lastResponse.getSyncingTo();
}
- OpTime getOpTime() const {
- return _lastResponse.getOpTime();
+ OpTime getAppliedOpTime() const {
+ return _lastResponse.getAppliedOpTime();
}
int getConfigVersion() const {
return _lastResponse.getConfigVersion();
diff --git a/src/mongo/db/repl/minvalid.cpp b/src/mongo/db/repl/minvalid.cpp
index 990d6224e50..90753cff0f4 100644
--- a/src/mongo/db/repl/minvalid.cpp
+++ b/src/mongo/db/repl/minvalid.cpp
@@ -39,6 +39,7 @@
#include "mongo/db/operation_context.h"
#include "mongo/db/operation_context_impl.h"
#include "mongo/db/repl/oplog.h"
+#include "mongo/db/repl/replication_coordinator_global.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/log.h"
@@ -62,7 +63,10 @@ void clearInitialSyncFlag(OperationContext* txn) {
}
MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "clearInitialSyncFlags", minvalidNS);
+ auto replCoord = repl::ReplicationCoordinator::get(txn);
+ OpTime time = replCoord->getMyLastAppliedOpTime();
txn->recoveryUnit()->waitUntilDurable();
+ replCoord->setMyLastDurableOpTime(time);
LOG(3) << "clearing initial sync flag";
}
diff --git a/src/mongo/db/repl/old_update_position_args.cpp b/src/mongo/db/repl/old_update_position_args.cpp
new file mode 100644
index 00000000000..1a01a1fa3e8
--- /dev/null
+++ b/src/mongo/db/repl/old_update_position_args.cpp
@@ -0,0 +1,154 @@
+/**
+ * Copyright 2014 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/repl/old_update_position_args.h"
+
+#include "mongo/base/status.h"
+#include "mongo/bson/util/bson_check.h"
+#include "mongo/bson/util/bson_extract.h"
+#include "mongo/db/jsobj.h"
+
+namespace mongo {
+namespace repl {
+
+
+OldUpdatePositionArgs::UpdateInfo::UpdateInfo(const OID& anRid,
+ const OpTime& aTs,
+ long long aCfgver,
+ long long aMemberId)
+ : rid(anRid), ts(aTs), cfgver(aCfgver), memberId(aMemberId) {}
+
+namespace {
+
+const std::string kCommandFieldName = "replSetUpdatePosition";
+const std::string kUpdateArrayFieldName = "optimes";
+
+const std::string kLegalUpdatePositionFieldNames[] = {
+ kCommandFieldName, kUpdateArrayFieldName,
+};
+
+const std::string kMemberRIDFieldName = "_id";
+const std::string kMemberConfigFieldName = "config";
+const std::string kOpTimeFieldName = "optime";
+const std::string kMemberIdFieldName = "memberId";
+const std::string kConfigVersionFieldName = "cfgver";
+
+const std::string kLegalUpdateInfoFieldNames[] = {
+ kMemberConfigFieldName,
+ kMemberRIDFieldName,
+ kOpTimeFieldName,
+ kMemberIdFieldName,
+ kConfigVersionFieldName,
+};
+
+} // namespace
+
+Status OldUpdatePositionArgs::initialize(const BSONObj& argsObj) {
+ Status status =
+ bsonCheckOnlyHasFields("OldUpdatePositionArgs", argsObj, kLegalUpdatePositionFieldNames);
+
+ if (!status.isOK())
+ return status;
+
+ // grab the array of changes
+ BSONElement updateArray;
+ status = bsonExtractTypedField(argsObj, kUpdateArrayFieldName, Array, &updateArray);
+ if (!status.isOK())
+ return status;
+
+ // now parse each array entry into an update
+ BSONObjIterator i(updateArray.Obj());
+ while (i.more()) {
+ BSONObj entry = i.next().Obj();
+ status = bsonCheckOnlyHasFields("UpdateInfoArgs", entry, kLegalUpdateInfoFieldNames);
+ if (!status.isOK())
+ return status;
+
+ OpTime opTime;
+ if (entry[kOpTimeFieldName].isABSONObj()) {
+ // In protocol version 1, { ts: <timestamp>, t: term }
+ Status status = bsonExtractOpTimeField(entry, kOpTimeFieldName, &opTime);
+ if (!status.isOK())
+ return status;
+ } else {
+ Timestamp ts;
+ status = bsonExtractTimestampField(entry, kOpTimeFieldName, &ts);
+ if (!status.isOK())
+ return status;
+ opTime = OpTime(ts, OpTime::kUninitializedTerm);
+ }
+ if (!status.isOK())
+ return status;
+
+ // TODO(spencer): The following three fields are optional in 3.0, but should be made
+ // required or ignored in 3.0
+ long long cfgver;
+ status = bsonExtractIntegerFieldWithDefault(entry, kConfigVersionFieldName, -1, &cfgver);
+ if (!status.isOK())
+ return status;
+
+ OID rid;
+ status = bsonExtractOIDFieldWithDefault(entry, kMemberRIDFieldName, OID(), &rid);
+ if (!status.isOK())
+ return status;
+
+ long long memberID;
+ status = bsonExtractIntegerFieldWithDefault(entry, kMemberIdFieldName, -1, &memberID);
+ if (!status.isOK())
+ return status;
+
+ _updates.push_back(UpdateInfo(rid, opTime, cfgver, memberID));
+ }
+
+ return Status::OK();
+}
+
+BSONObj OldUpdatePositionArgs::toBSON() const {
+ BSONObjBuilder builder;
+ // add command name
+ builder.append(kCommandFieldName, 1);
+
+ // build array of updates
+ if (!_updates.empty()) {
+ BSONArrayBuilder updateArray(builder.subarrayStart(kUpdateArrayFieldName));
+ for (OldUpdatePositionArgs::UpdateIterator update = updatesBegin(); update != updatesEnd();
+ ++update) {
+ updateArray.append(BSON(kMemberRIDFieldName << update->rid << kOpTimeFieldName
+ << update->ts.getTimestamp()
+ << kConfigVersionFieldName << update->cfgver
+ << kMemberIdFieldName << update->memberId));
+ }
+ updateArray.doneFast();
+ }
+ return builder.obj();
+}
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/old_update_position_args.h b/src/mongo/db/repl/old_update_position_args.h
new file mode 100644
index 00000000000..fa9d1a3ef90
--- /dev/null
+++ b/src/mongo/db/repl/old_update_position_args.h
@@ -0,0 +1,88 @@
+/**
+ * Copyright (C) 2014 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <vector>
+
+#include "mongo/db/jsobj.h"
+#include "mongo/db/repl/optime.h"
+
+namespace mongo {
+
+class Status;
+
+namespace repl {
+
+/**
+ * Arguments to the handshake command.
+ */
+class OldUpdatePositionArgs {
+public:
+ struct UpdateInfo {
+ UpdateInfo(const OID& anRid, const OpTime& aTs, long long aCfgver, long long aMemberId);
+
+ OID rid;
+ OpTime ts;
+ long long cfgver;
+ long long memberId;
+ };
+
+ typedef std::vector<UpdateInfo>::const_iterator UpdateIterator;
+
+ /**
+ * Initializes this OldUpdatePositionArgs from the contents of "argsObj".
+ */
+ Status initialize(const BSONObj& argsObj);
+
+ /**
+ * Gets a begin iterator over the UpdateInfos stored in this OldUpdatePositionArgs.
+ */
+ UpdateIterator updatesBegin() const {
+ return _updates.begin();
+ }
+
+ /**
+ * Gets an end iterator over the UpdateInfos stored in this OldUpdatePositionArgs.
+ */
+ UpdateIterator updatesEnd() const {
+ return _updates.end();
+ }
+
+ /**
+ * Returns a BSONified version of the object.
+ * _updates is only included if it is not empty.
+ */
+ BSONObj toBSON() const;
+
+private:
+ std::vector<UpdateInfo> _updates;
+};
+
+} // namespace repl
+} // namespace mongo
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index d5ff327d03c..f4c2f136965 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -218,7 +218,7 @@ public:
: _newOpTime(newOpTime), _replCoord(replCoord) {}
virtual void commit() {
- _replCoord->setMyLastOptimeForward(_newOpTime);
+ _replCoord->setMyLastAppliedOpTimeForward(_newOpTime);
}
virtual void rollback() {}
@@ -464,7 +464,7 @@ OpTime writeOpsToOplog(OperationContext* txn, const std::vector<BSONObj>& ops) {
OpTime lastOptime;
MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
- lastOptime = replCoord->getMyLastOptime();
+ lastOptime = replCoord->getMyLastAppliedOpTime();
invariant(!ops.empty());
ScopedTransaction transaction(txn, MODE_IX);
Lock::DBLock lk(txn->lockState(), "local", MODE_X);
diff --git a/src/mongo/db/repl/repl_client_info.cpp b/src/mongo/db/repl/repl_client_info.cpp
index b339c10b4f0..678938f2755 100644
--- a/src/mongo/db/repl/repl_client_info.cpp
+++ b/src/mongo/db/repl/repl_client_info.cpp
@@ -46,7 +46,7 @@ const Client::Decoration<ReplClientInfo> ReplClientInfo::forClient =
void ReplClientInfo::setLastOpToSystemLastOpTime(OperationContext* txn) {
ReplicationCoordinator* replCoord = repl::ReplicationCoordinator::get(txn->getServiceContext());
if (replCoord->isReplEnabled() && txn->writesAreReplicated()) {
- setLastOp(replCoord->getMyLastOptime());
+ setLastOp(replCoord->getMyLastAppliedOpTime());
}
}
diff --git a/src/mongo/db/repl/repl_set_heartbeat_response.cpp b/src/mongo/db/repl/repl_set_heartbeat_response.cpp
index 8ccb6241950..8c06dc7bd7d 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_response.cpp
+++ b/src/mongo/db/repl/repl_set_heartbeat_response.cpp
@@ -58,7 +58,8 @@ const std::string kIsReplSetFieldName = "rs";
const std::string kMemberStateFieldName = "state";
const std::string kMismatchFieldName = "mismatch";
const std::string kOkFieldName = "ok";
-const std::string kOpTimeFieldName = "opTime";
+const std::string kDurableOpTimeFieldName = "durableOpTime";
+const std::string kAppliedOpTimeFieldName = "opTime";
const std::string kPrimaryIdFieldName = "primaryId";
const std::string kReplSetFieldName = "set";
const std::string kSyncSourceFieldName = "syncingTo";
@@ -117,12 +118,15 @@ void ReplSetHeartbeatResponse::addToBSON(BSONObjBuilder* builder, bool isProtoco
if (_primaryIdSet) {
builder->append(kPrimaryIdFieldName, _primaryId);
}
- if (_opTimeSet) {
+ if (_durableOpTimeSet) {
+ _durableOpTime.append(builder, kDurableOpTimeFieldName);
+ }
+ if (_appliedOpTimeSet) {
if (isProtocolVersionV1) {
- _opTime.append(builder, kOpTimeFieldName);
+ _appliedOpTime.append(builder, kAppliedOpTimeFieldName);
} else {
- builder->appendDate(kOpTimeFieldName,
- Date_t::fromMillisSinceEpoch(_opTime.getTimestamp().asLL()));
+ builder->appendDate(kAppliedOpTimeFieldName,
+ Date_t::fromMillisSinceEpoch(_appliedOpTime.getTimestamp().asLL()));
}
}
}
@@ -209,30 +213,39 @@ Status ReplSetHeartbeatResponse::initialize(const BSONObj& doc, long long term)
return termStatus;
}
+ Status status = bsonExtractOpTimeField(doc, kDurableOpTimeFieldName, &_durableOpTime);
+ if (!status.isOK()) {
+ if (status != ErrorCodes::NoSuchKey) {
+ return status;
+ }
+ } else {
+ _durableOpTimeSet = true;
+ }
+
// In order to support both the 3.0(V0) and 3.2(V1) heartbeats we must parse the OpTime
// field based on its type. If it is a Date, we parse it as the timestamp and use
// initialize's term argument to complete the OpTime type. If it is an Object, then it's
// V1 and we construct an OpTime out of its nested fields.
- const BSONElement opTimeElement = doc[kOpTimeFieldName];
- if (opTimeElement.eoo()) {
- _opTimeSet = false;
- } else if (opTimeElement.type() == bsonTimestamp) {
- _opTimeSet = true;
- _opTime = OpTime(opTimeElement.timestamp(), term);
- } else if (opTimeElement.type() == Date) {
- _opTimeSet = true;
- _opTime = OpTime(Timestamp(opTimeElement.date()), term);
- } else if (opTimeElement.type() == Object) {
- Status status = bsonExtractOpTimeField(doc, kOpTimeFieldName, &_opTime);
- _opTimeSet = true;
+ const BSONElement appliedOpTimeElement = doc[kAppliedOpTimeFieldName];
+ if (appliedOpTimeElement.eoo()) {
+ _appliedOpTimeSet = false;
+ } else if (appliedOpTimeElement.type() == bsonTimestamp) {
+ _appliedOpTimeSet = true;
+ _appliedOpTime = OpTime(appliedOpTimeElement.timestamp(), term);
+ } else if (appliedOpTimeElement.type() == Date) {
+ _appliedOpTimeSet = true;
+ _appliedOpTime = OpTime(Timestamp(appliedOpTimeElement.date()), term);
+ } else if (appliedOpTimeElement.type() == Object) {
+ Status status = bsonExtractOpTimeField(doc, kAppliedOpTimeFieldName, &_appliedOpTime);
+ _appliedOpTimeSet = true;
// since a v1 OpTime was in the response, the member must be part of a replset
_isReplSet = true;
} else {
return Status(ErrorCodes::TypeMismatch,
- str::stream() << "Expected \"" << kOpTimeFieldName
+ str::stream() << "Expected \"" << kAppliedOpTimeFieldName
<< "\" field in response to replSetHeartbeat "
"command to have type Date or Timestamp, but found type "
- << typeName(opTimeElement.type()));
+ << typeName(appliedOpTimeElement.type()));
}
const BSONElement electableElement = doc[kIsElectableFieldName];
@@ -274,7 +287,7 @@ Status ReplSetHeartbeatResponse::initialize(const BSONObj& doc, long long term)
const BSONElement configVersionElement = doc[kConfigVersionFieldName];
// If we have an optime then we must have a configVersion
- if (_opTimeSet && configVersionElement.eoo()) {
+ if (_appliedOpTimeSet && configVersionElement.eoo()) {
return Status(ErrorCodes::NoSuchKey,
str::stream() << "Response to replSetHeartbeat missing required \""
<< kConfigVersionFieldName
@@ -362,9 +375,14 @@ long long ReplSetHeartbeatResponse::getPrimaryId() const {
return _primaryId;
}
-OpTime ReplSetHeartbeatResponse::getOpTime() const {
- invariant(_opTimeSet);
- return _opTime;
+OpTime ReplSetHeartbeatResponse::getAppliedOpTime() const {
+ invariant(_appliedOpTimeSet);
+ return _appliedOpTime;
+}
+
+OpTime ReplSetHeartbeatResponse::getDurableOpTime() const {
+ invariant(_durableOpTimeSet);
+ return _durableOpTime;
}
} // namespace repl
diff --git a/src/mongo/db/repl/repl_set_heartbeat_response.h b/src/mongo/db/repl/repl_set_heartbeat_response.h
index b3fba2a4803..2b968bbb17d 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_response.h
+++ b/src/mongo/db/repl/repl_set_heartbeat_response.h
@@ -125,10 +125,14 @@ public:
long long getTerm() const {
return _term;
}
- bool hasOpTime() const {
- return _opTimeSet;
+ bool hasAppliedOpTime() const {
+ return _appliedOpTimeSet;
}
- OpTime getOpTime() const;
+ OpTime getAppliedOpTime() const;
+ bool hasDurableOpTime() const {
+ return _durableOpTimeSet;
+ }
+ OpTime getDurableOpTime() const;
/**
* Sets _mismatch to true.
@@ -232,9 +236,13 @@ public:
_primaryIdSet = true;
_primaryId = primaryId;
}
- void setOpTime(OpTime time) {
- _opTimeSet = true;
- _opTime = time;
+ void setAppliedOpTime(OpTime time) {
+ _appliedOpTimeSet = true;
+ _appliedOpTime = time;
+ }
+ void setDurableOpTime(OpTime time) {
+ _durableOpTimeSet = true;
+ _durableOpTime = time;
}
void setTerm(long long term) {
_term = term;
@@ -247,8 +255,11 @@ private:
bool _timeSet = false;
Seconds _time = Seconds(0); // Seconds since UNIX epoch.
- bool _opTimeSet = false;
- OpTime _opTime;
+ bool _appliedOpTimeSet = false;
+ OpTime _appliedOpTime;
+
+ bool _durableOpTimeSet = false;
+ OpTime _durableOpTime;
bool _electableSet = false;
bool _electable = false;
diff --git a/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp b/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp
index 45c8dba3e1f..3c7adf479ee 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp
+++ b/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp
@@ -51,7 +51,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(false, hbResponse.hasElectionTime());
ASSERT_EQUALS(false, hbResponse.hasIsElectable());
ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(false, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(false, hbResponse.hasConfig());
ASSERT_EQUALS(false, hbResponse.isMismatched());
ASSERT_EQUALS(false, hbResponse.isReplSet());
@@ -75,7 +76,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(false, hbResponse.hasElectionTime());
ASSERT_EQUALS(false, hbResponse.hasIsElectable());
ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(false, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(false, hbResponse.hasConfig());
ASSERT_EQUALS(false, hbResponse.isMismatched());
ASSERT_EQUALS(false, hbResponse.isReplSet());
@@ -101,7 +103,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(false, hbResponse.hasElectionTime());
ASSERT_EQUALS(false, hbResponse.hasIsElectable());
ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(false, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(false, hbResponse.hasConfig());
ASSERT_EQUALS(false, hbResponse.isMismatched());
ASSERT_EQUALS(false, hbResponse.isReplSet());
@@ -128,7 +131,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(true, hbResponse.hasElectionTime());
ASSERT_EQUALS(false, hbResponse.hasIsElectable());
ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(false, hbResponse.hasOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(false, hbResponse.hasConfig());
ASSERT_EQUALS(false, hbResponse.isMismatched());
ASSERT_EQUALS(false, hbResponse.isReplSet());
@@ -150,14 +154,15 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(Status::OK(), initializeResult);
ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toString());
- // set opTime
- hbResponse.setOpTime(OpTime(Timestamp(10), 0));
+ // set durableOpTime
+ hbResponse.setDurableOpTime(OpTime(Timestamp(10), 0));
++fieldsSet;
ASSERT_EQUALS(false, hbResponse.hasState());
ASSERT_EQUALS(true, hbResponse.hasElectionTime());
ASSERT_EQUALS(false, hbResponse.hasIsElectable());
ASSERT_EQUALS(false, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(false, hbResponse.hasConfig());
ASSERT_EQUALS(false, hbResponse.isMismatched());
ASSERT_EQUALS(false, hbResponse.isReplSet());
@@ -167,7 +172,7 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
ASSERT_EQUALS(1, hbResponse.getConfigVersion());
ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime());
hbResponseObj = hbResponse.toBSON(false);
ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
@@ -175,7 +180,41 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
ASSERT_EQUALS(1, hbResponseObj["v"].Number());
ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp());
+
+ initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
+ ASSERT_EQUALS(Status::OK(), initializeResult);
+ ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON(false).toString());
+
+ // set appliedOpTime
+ hbResponse.setAppliedOpTime(OpTime(Timestamp(50), 0));
+ ++fieldsSet;
+ ASSERT_EQUALS(false, hbResponse.hasState());
+ ASSERT_EQUALS(true, hbResponse.hasElectionTime());
+ ASSERT_EQUALS(false, hbResponse.hasIsElectable());
+ ASSERT_EQUALS(false, hbResponse.hasTime());
+ ASSERT_EQUALS(true, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime());
+ ASSERT_EQUALS(false, hbResponse.hasConfig());
+ ASSERT_EQUALS(false, hbResponse.isMismatched());
+ ASSERT_EQUALS(false, hbResponse.isReplSet());
+ ASSERT_EQUALS(false, hbResponse.isStateDisagreement());
+ ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName());
+ ASSERT_EQUALS("", hbResponse.getHbMsg());
+ ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
+ ASSERT_EQUALS(1, hbResponse.getConfigVersion());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime());
+
+ hbResponseObj = hbResponse.toBSON(false);
+ ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields());
+ ASSERT_EQUALS("rs0", hbResponseObj["set"].String());
+ ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
+ ASSERT_EQUALS(1, hbResponseObj["v"].Number());
+ ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp());
initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
ASSERT_EQUALS(Status::OK(), initializeResult);
@@ -188,7 +227,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(true, hbResponse.hasElectionTime());
ASSERT_EQUALS(false, hbResponse.hasIsElectable());
ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(false, hbResponse.hasConfig());
ASSERT_EQUALS(false, hbResponse.isMismatched());
ASSERT_EQUALS(false, hbResponse.isReplSet());
@@ -198,7 +238,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
ASSERT_EQUALS(1, hbResponse.getConfigVersion());
ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime());
ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
hbResponseObj = hbResponse.toBSON(false);
@@ -207,7 +248,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
ASSERT_EQUALS(1, hbResponseObj["v"].Number());
ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp());
ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0);
@@ -221,7 +263,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(true, hbResponse.hasElectionTime());
ASSERT_EQUALS(true, hbResponse.hasIsElectable());
ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(false, hbResponse.hasConfig());
ASSERT_EQUALS(false, hbResponse.isMismatched());
ASSERT_EQUALS(false, hbResponse.isReplSet());
@@ -231,7 +274,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
ASSERT_EQUALS(1, hbResponse.getConfigVersion());
ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime());
ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
ASSERT_EQUALS(true, hbResponse.isElectable());
@@ -241,7 +285,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
ASSERT_EQUALS(1, hbResponseObj["v"].Number());
ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp());
ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
@@ -257,7 +302,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(true, hbResponse.hasElectionTime());
ASSERT_EQUALS(true, hbResponse.hasIsElectable());
ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(true, hbResponse.hasConfig());
ASSERT_EQUALS(false, hbResponse.isMismatched());
ASSERT_EQUALS(false, hbResponse.isReplSet());
@@ -267,7 +313,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
ASSERT_EQUALS(1, hbResponse.getConfigVersion());
ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime());
ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
ASSERT_EQUALS(true, hbResponse.isElectable());
ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
@@ -278,7 +325,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
ASSERT_EQUALS(1, hbResponseObj["v"].Number());
ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp());
ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
@@ -294,7 +342,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(true, hbResponse.hasElectionTime());
ASSERT_EQUALS(true, hbResponse.hasIsElectable());
ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(true, hbResponse.hasConfig());
ASSERT_EQUALS(false, hbResponse.isMismatched());
ASSERT_EQUALS(false, hbResponse.isReplSet());
@@ -306,7 +355,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
ASSERT_EQUALS(1, hbResponse.getConfigVersion());
ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime());
ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
ASSERT_EQUALS(true, hbResponse.isElectable());
ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
@@ -317,7 +367,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
ASSERT_EQUALS(1, hbResponseObj["v"].Number());
ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp());
ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
@@ -334,7 +385,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(true, hbResponse.hasElectionTime());
ASSERT_EQUALS(true, hbResponse.hasIsElectable());
ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(true, hbResponse.hasConfig());
ASSERT_EQUALS(false, hbResponse.isMismatched());
ASSERT_EQUALS(false, hbResponse.isReplSet());
@@ -346,7 +398,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
ASSERT_EQUALS(1, hbResponse.getConfigVersion());
ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime());
ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
ASSERT_EQUALS(true, hbResponse.isElectable());
ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
@@ -357,7 +410,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
ASSERT_EQUALS(1, hbResponseObj["v"].Number());
ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp());
ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
@@ -376,7 +430,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(true, hbResponse.hasElectionTime());
ASSERT_EQUALS(true, hbResponse.hasIsElectable());
ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(true, hbResponse.hasConfig());
ASSERT_EQUALS(false, hbResponse.isMismatched());
ASSERT_EQUALS(true, hbResponse.isReplSet());
@@ -388,7 +443,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo());
ASSERT_EQUALS(1, hbResponse.getConfigVersion());
ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime());
ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
ASSERT_EQUALS(true, hbResponse.isElectable());
ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
@@ -399,7 +455,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
ASSERT_EQUALS(1, hbResponseObj["v"].Number());
ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp());
ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
@@ -419,7 +476,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(true, hbResponse.hasElectionTime());
ASSERT_EQUALS(true, hbResponse.hasIsElectable());
ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(true, hbResponse.hasConfig());
ASSERT_EQUALS(false, hbResponse.isMismatched());
ASSERT_EQUALS(true, hbResponse.isReplSet());
@@ -431,7 +489,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(HostAndPort("syncTarget"), hbResponse.getSyncingTo());
ASSERT_EQUALS(1, hbResponse.getConfigVersion());
ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime());
ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
ASSERT_EQUALS(true, hbResponse.isElectable());
ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
@@ -442,7 +501,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS("", hbResponseObj["hbmsg"].String());
ASSERT_EQUALS(1, hbResponseObj["v"].Number());
ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp());
ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
@@ -462,7 +522,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(true, hbResponse.hasElectionTime());
ASSERT_EQUALS(true, hbResponse.hasIsElectable());
ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(true, hbResponse.hasConfig());
ASSERT_EQUALS(false, hbResponse.isMismatched());
ASSERT_EQUALS(true, hbResponse.isReplSet());
@@ -474,7 +535,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(HostAndPort("syncTarget"), hbResponse.getSyncingTo());
ASSERT_EQUALS(1, hbResponse.getConfigVersion());
ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime());
ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
ASSERT_EQUALS(true, hbResponse.isElectable());
ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
@@ -485,7 +547,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS("lub dub", hbResponseObj["hbmsg"].String());
ASSERT_EQUALS(1, hbResponseObj["v"].Number());
ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp());
- ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp());
+ ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp());
ASSERT_EQUALS(10, hbResponseObj["time"].numberLong());
ASSERT_EQUALS(true, hbResponseObj["e"].trueValue());
ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString());
@@ -505,7 +568,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(true, hbResponse.hasElectionTime());
ASSERT_EQUALS(true, hbResponse.hasIsElectable());
ASSERT_EQUALS(true, hbResponse.hasTime());
- ASSERT_EQUALS(true, hbResponse.hasOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasDurableOpTime());
+ ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime());
ASSERT_EQUALS(true, hbResponse.hasConfig());
ASSERT_EQUALS(true, hbResponse.isMismatched());
ASSERT_EQUALS(true, hbResponse.isReplSet());
@@ -517,7 +581,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) {
ASSERT_EQUALS(HostAndPort("syncTarget"), hbResponse.getSyncingTo());
ASSERT_EQUALS(1, hbResponse.getConfigVersion());
ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime());
ASSERT_EQUALS(Seconds(10), hbResponse.getTime());
ASSERT_EQUALS(true, hbResponse.isElectable());
ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString());
@@ -557,7 +622,23 @@ TEST(ReplSetHeartbeatResponse, InitializeWrongTimeType) {
result.reason());
}
-TEST(ReplSetHeartbeatResponse, InitializeWrongOpTimeType) {
+TEST(ReplSetHeartbeatResponse, InitializeWrongDurableOpTimeType) {
+ ReplSetHeartbeatResponse hbResponse;
+ BSONObj initializerObj = BSON("ok" << 1.0 << "durableOpTime"
+ << "hello");
+ Status result = hbResponse.initialize(initializerObj, 0);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result);
+ ASSERT_EQUALS("\"durableOpTime\" had the wrong type. Expected Object, found String",
+ result.reason());
+
+ BSONObj initializerObj2 = BSON("ok" << 1.0 << "durableOpTime" << OpTime().getTimestamp());
+ Status result2 = hbResponse.initialize(initializerObj2, 0);
+ ASSERT_EQUALS(ErrorCodes::TypeMismatch, result2);
+ ASSERT_EQUALS("\"durableOpTime\" had the wrong type. Expected Object, found Timestamp",
+ result2.reason());
+}
+
+TEST(ReplSetHeartbeatResponse, InitializeWrongAppliedOpTimeType) {
ReplSetHeartbeatResponse hbResponse;
BSONObj initializerObj = BSON("ok" << 1.0 << "opTime"
<< "hello");
@@ -719,7 +800,7 @@ TEST(ReplSetHeartbeatResponse, InitializeBothOpTimeTypesSameResult) {
result = hbResponseTimestamp.initialize(initializerTimestamp.obj(), 0);
ASSERT_EQUALS(Status::OK(), result);
- ASSERT_EQUALS(hbResponseTimestamp.getOpTime(), hbResponseTimestamp.getOpTime());
+ ASSERT_EQUALS(hbResponseTimestamp.getAppliedOpTime(), hbResponseTimestamp.getAppliedOpTime());
}
TEST(ReplSetHeartbeatResponse, NoConfigStillInitializing) {
diff --git a/src/mongo/db/repl/repl_set_html_summary.cpp b/src/mongo/db/repl/repl_set_html_summary.cpp
index 821e8258d2e..218dff908fd 100644
--- a/src/mongo/db/repl/repl_set_html_summary.cpp
+++ b/src/mongo/db/repl/repl_set_html_summary.cpp
@@ -185,8 +185,9 @@ const std::string ReplSetHtmlSummary::toHtmlString() const {
}
memberTable << td(grey(memberHB.getLastHeartbeatMsg(), !up));
// TODO(dannenberg): change timestamp to optime in V1
- memberTable << td(
- memberHB.getLastHeartbeat() == Date_t() ? "?" : memberHB.getOpTime().toString());
+ memberTable << td(memberHB.getLastHeartbeat() == Date_t()
+ ? "?"
+ : memberHB.getAppliedOpTime().toString());
}
memberTable << _tr();
}
@@ -200,7 +201,7 @@ const std::string ReplSetHtmlSummary::toHtmlString() const {
const MemberConfig& selfConfig = _config.getMemberAt(_selfIndex);
if (_primaryIndex >= 0 && _primaryIndex != _selfIndex && !selfConfig.isArbiter()) {
- int lag = _hbData[_primaryIndex].getOpTime().getTimestamp().getSecs() -
+ int lag = _hbData[_primaryIndex].getAppliedOpTime().getTimestamp().getSecs() -
_selfOptime.getTimestamp().getSecs();
s << tr("Lag: ", str::stream() << lag << " secs");
}
diff --git a/src/mongo/db/repl/replica_set_config.cpp b/src/mongo/db/repl/replica_set_config.cpp
index 2e86cecd9c9..4c578feca3a 100644
--- a/src/mongo/db/repl/replica_set_config.cpp
+++ b/src/mongo/db/repl/replica_set_config.cpp
@@ -61,21 +61,24 @@ const std::string kMembersFieldName = "members";
const std::string kSettingsFieldName = "settings";
const std::string kStepDownCheckWriteConcernModeName = "$stepDownCheck";
const std::string kProtocolVersionFieldName = "protocolVersion";
+const std::string kWriteConcernMajorityJournalDefaultFieldName =
+ "writeConcernMajorityJournalDefault";
const std::string kLegalConfigTopFieldNames[] = {kIdFieldName,
ReplicaSetConfig::kVersionFieldName,
kMembersFieldName,
kSettingsFieldName,
kProtocolVersionFieldName,
- ReplicaSetConfig::kConfigServerFieldName};
+ ReplicaSetConfig::kConfigServerFieldName,
+ kWriteConcernMajorityJournalDefaultFieldName};
-const std::string kElectionTimeoutFieldName = "electionTimeoutMillis";
-const std::string kHeartbeatIntervalFieldName = "heartbeatIntervalMillis";
-const std::string kHeartbeatTimeoutFieldName = "heartbeatTimeoutSecs";
const std::string kChainingAllowedFieldName = "chainingAllowed";
+const std::string kElectionTimeoutFieldName = "electionTimeoutMillis";
const std::string kGetLastErrorDefaultsFieldName = "getLastErrorDefaults";
const std::string kGetLastErrorModesFieldName = "getLastErrorModes";
const std::string kReplicaSetIdFieldName = "replicaSetId";
+const std::string kHeartbeatIntervalFieldName = "heartbeatIntervalMillis";
+const std::string kHeartbeatTimeoutFieldName = "heartbeatTimeoutSecs";
} // namespace
@@ -164,6 +167,16 @@ Status ReplicaSetConfig::_initialize(const BSONObj& cfg,
}
//
+ // Parse writeConcernMajorityJournalDefault
+ //
+ status = bsonExtractBooleanFieldWithDefault(cfg,
+ kWriteConcernMajorityJournalDefaultFieldName,
+ _protocolVersion == 1,
+ &_writeConcernMajorityJournalDefault);
+ if (!status.isOK())
+ return status;
+
+ //
// Parse settings
//
BSONElement settingsElement;
@@ -490,6 +503,12 @@ Status ReplicaSetConfig::validate() const {
"Nodes being used for config servers must be started with the "
"--configsvr flag");
}
+ if (!_writeConcernMajorityJournalDefault) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << kWriteConcernMajorityJournalDefaultFieldName
+ << " must be true in replica set configurations being "
+ "used for config servers");
+ }
} else if (serverGlobalParams.configsvr) {
return Status(ErrorCodes::BadValue,
"Nodes started with the --configsvr flag must have configsvr:true in "
@@ -652,8 +671,20 @@ BSONObj ReplicaSetConfig::toBSON() const {
configBuilder.append(kConfigServerFieldName, _configServer);
}
+ // Only include writeConcernMajorityJournalDefault if it is not the default version for this
+ // ProtocolVersion to prevent breaking cross version-3.2.1 compatibilty of ReplicaSetConfigs.
if (_protocolVersion > 0) {
configBuilder.append(kProtocolVersionFieldName, _protocolVersion);
+ // Only include writeConcernMajorityJournalDefault if it is not the default version for this
+ // ProtocolVersion to prevent breaking cross version-3.2.1 compatibilty of
+ // ReplicaSetConfigs.
+ if (!_writeConcernMajorityJournalDefault) {
+ configBuilder.append(kWriteConcernMajorityJournalDefaultFieldName,
+ _writeConcernMajorityJournalDefault);
+ }
+ } else if (_writeConcernMajorityJournalDefault) {
+ configBuilder.append(kWriteConcernMajorityJournalDefaultFieldName,
+ _writeConcernMajorityJournalDefault);
}
BSONArrayBuilder members(configBuilder.subarrayStart(kMembersFieldName));
diff --git a/src/mongo/db/repl/replica_set_config.h b/src/mongo/db/repl/replica_set_config.h
index 5a2541b8ecf..509a84b0b5c 100644
--- a/src/mongo/db/repl/replica_set_config.h
+++ b/src/mongo/db/repl/replica_set_config.h
@@ -231,6 +231,14 @@ public:
}
/**
+ * Returns whether or not majority write concerns should implicitly journal, if j has not been
+ * explicitly set.
+ */
+ bool getWriteConcernMajorityShouldJournal() const {
+ return _writeConcernMajorityJournalDefault;
+ }
+
+ /**
* Returns true if this replica set is for use as a config server replica set.
*/
bool isConfigServer() const {
@@ -349,6 +357,7 @@ private:
Milliseconds _heartbeatInterval = kDefaultHeartbeatInterval;
Seconds _heartbeatTimeoutPeriod = kDefaultHeartbeatTimeoutPeriod;
bool _chainingAllowed = kDefaultChainingAllowed;
+ bool _writeConcernMajorityJournalDefault = false;
int _majorityVoteCount = 0;
int _writeMajority = 0;
int _totalVotingMembers = 0;
diff --git a/src/mongo/db/repl/replica_set_config_test.cpp b/src/mongo/db/repl/replica_set_config_test.cpp
index 19cda44163d..05e534584e7 100644
--- a/src/mongo/db/repl/replica_set_config_test.cpp
+++ b/src/mongo/db/repl/replica_set_config_test.cpp
@@ -76,6 +76,7 @@ TEST(ReplicaSetConfig, ParseMinimalConfigAndCheckDefaults) {
ASSERT_EQUALS(ReplicaSetConfig::kDefaultElectionTimeoutPeriod,
config.getElectionTimeoutPeriod());
ASSERT_TRUE(config.isChainingAllowed());
+ ASSERT_FALSE(config.getWriteConcernMajorityShouldJournal());
ASSERT_FALSE(config.isConfigServer());
ASSERT_EQUALS(0, config.getProtocolVersion());
}
@@ -104,6 +105,7 @@ TEST(ReplicaSetConfig, ParseLargeConfigAndCheckAccessors) {
ASSERT_EQUALS(0, config.getDefaultWriteConcern().wNumNodes);
ASSERT_EQUALS("majority", config.getDefaultWriteConcern().wMode);
ASSERT_FALSE(config.isChainingAllowed());
+ ASSERT_TRUE(config.getWriteConcernMajorityShouldJournal());
ASSERT_FALSE(config.isConfigServer());
ASSERT_EQUALS(Seconds(5), config.getHeartbeatInterval());
ASSERT_EQUALS(Seconds(120), config.getHeartbeatTimeoutPeriod());
@@ -977,7 +979,7 @@ TEST(ReplicaSetConfig, toBSONRoundTripAbilityLarge) {
ASSERT_OK(configA.initialize(BSON(
"_id"
<< "asdf"
- << "version" << 9 << "members"
+ << "version" << 9 << "writeConcernMajorityJournalDefault" << true << "members"
<< BSON_ARRAY(BSON("_id" << 0 << "host"
<< "localhost:12345"
<< "arbiterOnly" << true << "votes" << 1)
@@ -995,14 +997,14 @@ TEST(ReplicaSetConfig, toBSONRoundTripAbilityLarge) {
<< BSON("coast"
<< "west"
<< "hdd"
- << "true"))) << "protocolVersion" << 0
- << "settings" << BSON("heartbeatIntervalMillis"
- << 5000 << "heartbeatTimeoutSecs" << 20 << "electionTimeoutMillis"
- << 4 << "chainingAllowd" << true << "getLastErrorDefaults"
- << BSON("w"
- << "majority") << "getLastErrorModes"
- << BSON("disks" << BSON("ssd" << 1 << "hdd" << 1) << "coasts"
- << BSON("coast" << 2))))));
+ << "true"))) << "protocolVersion" << 0 << "settings"
+
+ << BSON("heartbeatIntervalMillis"
+ << 5000 << "heartbeatTimeoutSecs" << 20 << "electionTimeoutMillis" << 4
+ << "chainingAllowd" << true << "getLastErrorDefaults" << BSON("w"
+ << "majority")
+ << "getLastErrorModes" << BSON("disks" << BSON("ssd" << 1 << "hdd" << 1) << "coasts"
+ << BSON("coast" << 2))))));
BSONObj configObjA = configA.toBSON();
// Ensure a protocolVersion does not show up if it is 0 to maintain cross version compatibility.
ASSERT_FALSE(configObjA.hasField("protocolVersion"));
@@ -1199,6 +1201,23 @@ TEST(ReplicaSetConfig, CheckConfigServerCantHaveSlaveDelay) {
ASSERT_STRING_CONTAINS(status.reason(), "cannot have a non-zero slaveDelay");
}
+TEST(ReplicaSetConfig, CheckConfigServerMustHaveTrueForWriteConcernMajorityJournalDefault) {
+ serverGlobalParams.configsvr = true;
+ ON_BLOCK_EXIT([&] { serverGlobalParams.configsvr = false; });
+ ReplicaSetConfig configA;
+ ASSERT_OK(
+ configA.initialize(BSON("_id"
+ << "rs0"
+ << "protocolVersion" << 1 << "version" << 1 << "configsvr" << true
+ << "members" << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")
+ << BSON("_id" << 1 << "host"
+ << "localhost:54321"))
+ << "writeConcernMajorityJournalDefault" << false)));
+ Status status = configA.validate();
+ ASSERT_EQUALS(ErrorCodes::BadValue, status);
+ ASSERT_STRING_CONTAINS(status.reason(), " must be true in replica set configurations being ");
+}
TEST(ReplicaSetConfig, GetPriorityTakeoverDelay) {
ReplicaSetConfig configA;
@@ -1341,6 +1360,50 @@ TEST(ReplicaSetConfig, ReplSetId) {
"\"replicaSetId\" had the wrong type. Expected OID, found NumberInt32");
}
+TEST(ReplicaSetConfig, ConfirmDefaultValuesOfAndAbilityToSetWriteConcernMajorityJournalDefault) {
+ // PV0, should default to false.
+ ReplicaSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+ ASSERT_OK(config.validate());
+ ASSERT_FALSE(config.getWriteConcernMajorityShouldJournal());
+ ASSERT_FALSE(config.toBSON().hasField("writeConcernMajorityJournalDefault"));
+
+ // Should be able to set it true in PV0.
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"))
+ << "writeConcernMajorityJournalDefault" << true)));
+ ASSERT_OK(config.validate());
+ ASSERT_TRUE(config.getWriteConcernMajorityShouldJournal());
+ ASSERT_TRUE(config.toBSON().hasField("writeConcernMajorityJournalDefault"));
+
+ // PV1, should default to true.
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "protocolVersion" << 1 << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345")))));
+ ASSERT_OK(config.validate());
+ ASSERT_TRUE(config.getWriteConcernMajorityShouldJournal());
+ ASSERT_FALSE(config.toBSON().hasField("writeConcernMajorityJournalDefault"));
+
+ // Should be able to set it false in PV1.
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "protocolVersion" << 1 << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "localhost:12345"))
+ << "writeConcernMajorityJournalDefault" << false)));
+ ASSERT_OK(config.validate());
+ ASSERT_FALSE(config.getWriteConcernMajorityShouldJournal());
+ ASSERT_TRUE(config.toBSON().hasField("writeConcernMajorityJournalDefault"));
+}
} // namespace
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index 1d31b059bb7..630b9277b48 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -67,6 +67,7 @@ namespace repl {
class BackgroundSync;
class HandshakeArgs;
class IsMasterResponse;
+class OldUpdatePositionArgs;
class OplogReader;
class OpTime;
class ReadConcernArgs;
@@ -293,11 +294,22 @@ public:
*
* The new value of "opTime" must be no less than any prior value passed to this method, and
* it is the caller's job to properly synchronize this behavior. The exception to this rule
- * is that after calls to resetLastOpTimeFromOplog(), the minimum acceptable value for
+ * is that after calls to resetLastOpTimesFromOplog(), the minimum acceptable value for
* "opTime" is reset based on the contents of the oplog, and may go backwards due to
* rollback.
*/
- virtual void setMyLastOptime(const OpTime& opTime) = 0;
+ virtual void setMyLastAppliedOpTime(const OpTime& opTime) = 0;
+
+ /**
+ * Updates our internal tracking of the last OpTime durable to this node.
+ *
+ * The new value of "opTime" must be no less than any prior value passed to this method, and
+ * it is the caller's job to properly synchronize this behavior. The exception to this rule
+ * is that after calls to resetLastOpTimesFromOplog(), the minimum acceptable value for
+ * "opTime" is reset based on the contents of the oplog, and may go backwards due to
+ * rollback.
+ */
+ virtual void setMyLastDurableOpTime(const OpTime& opTime) = 0;
/**
* Updates our internal tracking of the last OpTime applied to this node, but only
@@ -307,12 +319,22 @@ public:
* This function is used by logOp() on a primary, since the ops in the oplog do not
* necessarily commit in sequential order.
*/
- virtual void setMyLastOptimeForward(const OpTime& opTime) = 0;
+ virtual void setMyLastAppliedOpTimeForward(const OpTime& opTime) = 0;
+
+ /**
+ * Updates our internal tracking of the last OpTime durable to this node, but only
+ * if the supplied optime is later than the current last OpTime known to the replication
+ * coordinator.
+ *
+ * This function is used by logOp() on a primary, since the ops in the oplog do not
+ * necessarily commit in sequential order.
+ */
+ virtual void setMyLastDurableOpTimeForward(const OpTime& opTime) = 0;
/**
* Same as above, but used during places we need to zero our last optime.
*/
- virtual void resetMyLastOptime() = 0;
+ virtual void resetMyLastOpTimes() = 0;
/**
* Updates our the message we include in heartbeat responses.
@@ -320,9 +342,14 @@ public:
virtual void setMyHeartbeatMessage(const std::string& msg) = 0;
/**
- * Returns the last optime recorded by setMyLastOptime.
+ * Returns the last optime recorded by setMyLastAppliedOpTime.
*/
- virtual OpTime getMyLastOptime() const = 0;
+ virtual OpTime getMyLastAppliedOpTime() const = 0;
+
+ /**
+ * Returns the last optime recorded by setMyLastDurableOpTime.
+ */
+ virtual OpTime getMyLastDurableOpTime() const = 0;
/**
* Waits until the optime of the current node is at least the opTime specified in
@@ -408,6 +435,7 @@ public:
*
* The returned bool indicates whether or not the command was created.
*/
+ virtual bool prepareOldReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) = 0;
virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) = 0;
/**
@@ -573,7 +601,12 @@ public:
* were applied.
* "configVersion" will be populated with our config version if and only if we return
* InvalidReplicaSetConfig.
+ *
+ * The OldUpdatePositionArgs version provides support for the pre-3.2.2 format of
+ * UpdatePositionArgs.
*/
+ virtual Status processReplSetUpdatePosition(const OldUpdatePositionArgs& updates,
+ long long* configVersion) = 0;
virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates,
long long* configVersion) = 0;
@@ -594,8 +627,9 @@ public:
/**
* Returns a vector of members that have applied the operation with OpTime 'op'.
+ * "durablyWritten" indicates whether the operation has to be durably applied.
*/
- virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op) = 0;
+ virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op, bool durablyWritten) = 0;
/**
* Returns a vector of the members other than ourself in the replica set, as specified in
@@ -620,10 +654,10 @@ public:
virtual Status checkReplEnabledForCommand(BSONObjBuilder* result) = 0;
/**
- * Loads the optime from the last op in the oplog into the coordinator's lastOpApplied
- * value.
+ * Loads the optime from the last op in the oplog into the coordinator's lastAppliedOpTime and
+ * lastDurableOpTime values.
*/
- virtual void resetLastOpTimeFromOplog(OperationContext* txn) = 0;
+ virtual void resetLastOpTimesFromOplog(OperationContext* txn) = 0;
/**
* Returns the OpTime of the latest replica set-committed op known to this server.
@@ -661,6 +695,12 @@ public:
virtual bool isV1ElectionProtocol() = 0;
/**
+ * Returns whether or not majority write concerns should implicitly journal, if j has not been
+ * explicitly set.
+ */
+ virtual bool getWriteConcernMajorityShouldJournal() = 0;
+
+ /**
* Writes into 'output' all the information needed to generate a summary of the current
* replication state for use by the web interface.
*/
@@ -739,6 +779,13 @@ public:
*/
virtual size_t getNumUncommittedSnapshots() = 0;
+ /**
+ * Returns a new WriteConcernOptions based on "wc" but with UNSET syncMode reset to JOURNAL or
+ * NONE based on our rsConfig.
+ */
+ virtual WriteConcernOptions populateUnsetWriteConcernOptionsSyncMode(
+ WriteConcernOptions wc) = 0;
+
protected:
ReplicationCoordinator();
};
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index b98004f05fc..76be96c4b3a 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -47,6 +47,7 @@
#include "mongo/db/repl/handshake_args.h"
#include "mongo/db/repl/is_master_response.h"
#include "mongo/db/repl/last_vote.h"
+#include "mongo/db/repl/old_update_position_args.h"
#include "mongo/db/repl/read_concern_args.h"
#include "mongo/db/repl/read_concern_response.h"
#include "mongo/db/repl/repl_client_info.h"
@@ -64,6 +65,7 @@
#include "mongo/db/repl/update_position_args.h"
#include "mongo/db/repl/vote_requester.h"
#include "mongo/db/server_options.h"
+#include "mongo/db/write_concern.h"
#include "mongo/db/write_concern_options.h"
#include "mongo/executor/connection_pool_stats.h"
#include "mongo/rpc/metadata/repl_set_metadata.h"
@@ -161,17 +163,17 @@ DataReplicatorOptions createDataReplicatorOptions(ReplicationCoordinator* replCo
options.applierFn = [](OperationContext*, const BSONObj&) -> Status { return Status::OK(); };
options.rollbackFn =
[](OperationContext*, const OpTime&, const HostAndPort&) { return Status::OK(); };
- options.prepareReplSetUpdatePositionCommandFn = [replCoord]() -> StatusWith<BSONObj> {
+ options.prepareOldReplSetUpdatePositionCommandFn = [replCoord]() -> StatusWith<BSONObj> {
BSONObjBuilder bob;
- if (replCoord->prepareReplSetUpdatePositionCommand(&bob)) {
+ if (replCoord->prepareOldReplSetUpdatePositionCommand(&bob)) {
return bob.obj();
}
return Status(ErrorCodes::OperationFailed,
"unable to prepare replSetUpdatePosition command object");
};
- options.getMyLastOptime = [replCoord]() { return replCoord->getMyLastOptime(); };
+ options.getMyLastOptime = [replCoord]() { return replCoord->getMyLastAppliedOpTime(); };
options.setMyLastOptime =
- [replCoord](const OpTime& opTime) { replCoord->setMyLastOptime(opTime); };
+ [replCoord](const OpTime& opTime) { replCoord->setMyLastAppliedOpTime(opTime); };
options.setFollowerMode =
[replCoord](const MemberState& newState) { return replCoord->setFollowerMode(newState); };
options.syncSourceSelector = replCoord;
@@ -186,7 +188,8 @@ ReplicationCoordinatorImpl::ReplicationCoordinatorImpl(
int64_t prngSeed,
NetworkInterface* network,
StorageInterface* storage,
- ReplicationExecutor* replExec)
+ ReplicationExecutor* replExec,
+ stdx::function<bool()>* isDurableStorageEngineFn)
: _settings(settings),
_replMode(getReplicationModeFromSettings(settings)),
_topCoord(topCoord),
@@ -202,7 +205,10 @@ ReplicationCoordinatorImpl::ReplicationCoordinatorImpl(
_sleptLastElection(false),
_canAcceptNonLocalWrites(!(settings.usingReplSets() || settings.isSlave())),
_canServeNonLocalReads(0U),
- _dr(createDataReplicatorOptions(this), &_replExecutor) {
+ _dr(createDataReplicatorOptions(this), &_replExecutor),
+ _isDurableStorageEngine(isDurableStorageEngineFn ? *isDurableStorageEngineFn : []() -> bool {
+ return getGlobalServiceContext()->getGlobalStorageEngine()->isDurable();
+ }) {
if (!isReplEnabled()) {
return;
}
@@ -228,16 +234,23 @@ ReplicationCoordinatorImpl::ReplicationCoordinatorImpl(
TopologyCoordinator* topCoord,
int64_t prngSeed)
: ReplicationCoordinatorImpl(
- settings, externalState, topCoord, prngSeed, network, storage, nullptr) {}
+ settings, externalState, topCoord, prngSeed, network, storage, nullptr, nullptr) {}
ReplicationCoordinatorImpl::ReplicationCoordinatorImpl(
const ReplSettings& settings,
ReplicationCoordinatorExternalState* externalState,
TopologyCoordinator* topCoord,
ReplicationExecutor* replExec,
- int64_t prngSeed)
- : ReplicationCoordinatorImpl(
- settings, externalState, topCoord, prngSeed, nullptr, nullptr, replExec) {}
+ int64_t prngSeed,
+ stdx::function<bool()>* isDurableStorageEngineFn)
+ : ReplicationCoordinatorImpl(settings,
+ externalState,
+ topCoord,
+ prngSeed,
+ nullptr,
+ nullptr,
+ replExec,
+ isDurableStorageEngineFn) {}
ReplicationCoordinatorImpl::~ReplicationCoordinatorImpl() {}
@@ -404,7 +417,8 @@ void ReplicationCoordinatorImpl::_finishLoadLocalConfig(
invariant(_rsConfigState == kConfigStartingUp);
const PostMemberStateUpdateAction action =
_setCurrentRSConfig_inlock(cbData, localConfig, myIndex.getValue());
- _setMyLastOptimeAndReport_inlock(&lk, lastOpTime, false);
+ _setMyLastAppliedOpTime_inlock(lastOpTime, false);
+ _setMyLastDurableOpTimeAndReport_inlock(&lk, lastOpTime, false);
_externalState->setGlobalTimestamp(lastOpTime.getTimestamp());
// Step down is impossible, so we don't need to wait for the returned event.
_updateTerm_incallback(term);
@@ -741,9 +755,26 @@ void ReplicationCoordinatorImpl::_addSlaveInfo_inlock(const SlaveInfo& slaveInfo
_wakeReadyWaiters_inlock();
}
-void ReplicationCoordinatorImpl::_updateSlaveInfoOptime_inlock(SlaveInfo* slaveInfo,
- const OpTime& opTime) {
- slaveInfo->opTime = opTime;
+void ReplicationCoordinatorImpl::_updateSlaveInfoAppliedOpTime_inlock(SlaveInfo* slaveInfo,
+ const OpTime& opTime) {
+ slaveInfo->lastAppliedOpTime = opTime;
+ slaveInfo->lastUpdate = _replExecutor.now();
+ slaveInfo->down = false;
+
+ // Wake up any threads waiting for replication that now have their replication
+ // check satisfied
+ _wakeReadyWaiters_inlock();
+}
+
+void ReplicationCoordinatorImpl::_updateSlaveInfoDurableOpTime_inlock(SlaveInfo* slaveInfo,
+ const OpTime& opTime) {
+ // lastAppliedOpTime cannot be behind lastDurableOpTime.
+ if (slaveInfo->lastAppliedOpTime < opTime) {
+ log() << "Durable progress is ahead of the applied progress. This is likely due to a "
+ "rollback.";
+ return;
+ }
+ slaveInfo->lastDurableOpTime = opTime;
slaveInfo->lastUpdate = _replExecutor.now();
slaveInfo->down = false;
@@ -824,13 +855,13 @@ Status ReplicationCoordinatorImpl::setLastOptimeForSlave(const OID& rid, const T
OpTime opTime(ts, OpTime::kUninitializedTerm);
SlaveInfo* slaveInfo = _findSlaveInfoByRID_inlock(rid);
if (slaveInfo) {
- if (slaveInfo->opTime < opTime) {
- _updateSlaveInfoOptime_inlock(slaveInfo, opTime);
+ if (slaveInfo->lastAppliedOpTime < opTime) {
+ _updateSlaveInfoAppliedOpTime_inlock(slaveInfo, opTime);
}
} else {
SlaveInfo newSlaveInfo;
newSlaveInfo.rid = rid;
- newSlaveInfo.opTime = opTime;
+ newSlaveInfo.lastAppliedOpTime = opTime;
_addSlaveInfo_inlock(newSlaveInfo);
}
return Status::OK();
@@ -841,28 +872,41 @@ void ReplicationCoordinatorImpl::setMyHeartbeatMessage(const std::string& msg) {
&TopologyCoordinator::setMyHeartbeatMessage, _topCoord.get(), _replExecutor.now(), msg));
}
-void ReplicationCoordinatorImpl::setMyLastOptimeForward(const OpTime& opTime) {
+void ReplicationCoordinatorImpl::setMyLastAppliedOpTimeForward(const OpTime& opTime) {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ if (opTime > _getMyLastAppliedOpTime_inlock()) {
+ _setMyLastAppliedOpTimeAndReport_inlock(&lock, opTime, false);
+ }
+}
+
+void ReplicationCoordinatorImpl::setMyLastDurableOpTimeForward(const OpTime& opTime) {
stdx::unique_lock<stdx::mutex> lock(_mutex);
- if (opTime > _getMyLastOptime_inlock()) {
- _setMyLastOptimeAndReport_inlock(&lock, opTime, false);
+ if (opTime > _getMyLastDurableOpTime_inlock()) {
+ _setMyLastDurableOpTimeAndReport_inlock(&lock, opTime, false);
}
}
-void ReplicationCoordinatorImpl::setMyLastOptime(const OpTime& opTime) {
+void ReplicationCoordinatorImpl::setMyLastAppliedOpTime(const OpTime& opTime) {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ _setMyLastAppliedOpTimeAndReport_inlock(&lock, opTime, false);
+}
+
+void ReplicationCoordinatorImpl::setMyLastDurableOpTime(const OpTime& opTime) {
stdx::unique_lock<stdx::mutex> lock(_mutex);
- _setMyLastOptimeAndReport_inlock(&lock, opTime, false);
+ _setMyLastDurableOpTimeAndReport_inlock(&lock, opTime, false);
}
-void ReplicationCoordinatorImpl::resetMyLastOptime() {
+void ReplicationCoordinatorImpl::resetMyLastOpTimes() {
stdx::unique_lock<stdx::mutex> lock(_mutex);
// Reset to uninitialized OpTime
- _setMyLastOptimeAndReport_inlock(&lock, OpTime(), true);
+ _setMyLastAppliedOpTime_inlock(OpTime(), true);
+ _setMyLastDurableOpTimeAndReport_inlock(&lock, OpTime(), true);
}
-void ReplicationCoordinatorImpl::_setMyLastOptimeAndReport_inlock(
+void ReplicationCoordinatorImpl::_setMyLastAppliedOpTimeAndReport_inlock(
stdx::unique_lock<stdx::mutex>* lock, const OpTime& opTime, bool isRollbackAllowed) {
invariant(lock->owns_lock());
- _setMyLastOptime_inlock(opTime, isRollbackAllowed);
+ _setMyLastAppliedOpTime_inlock(opTime, isRollbackAllowed);
if (getReplicationMode() != modeReplSet) {
return;
@@ -877,11 +921,29 @@ void ReplicationCoordinatorImpl::_setMyLastOptimeAndReport_inlock(
_externalState->forwardSlaveProgress(); // Must do this outside _mutex
}
-void ReplicationCoordinatorImpl::_setMyLastOptime_inlock(const OpTime& opTime,
- bool isRollbackAllowed) {
+void ReplicationCoordinatorImpl::_setMyLastDurableOpTimeAndReport_inlock(
+ stdx::unique_lock<stdx::mutex>* lock, const OpTime& opTime, bool isRollbackAllowed) {
+ invariant(lock->owns_lock());
+ _setMyLastDurableOpTime_inlock(opTime, isRollbackAllowed);
+
+ if (getReplicationMode() != modeReplSet) {
+ return;
+ }
+
+ if (_getMemberState_inlock().primary()) {
+ return;
+ }
+
+ lock->unlock();
+
+ _externalState->forwardSlaveProgress(); // Must do this outside _mutex
+}
+
+void ReplicationCoordinatorImpl::_setMyLastAppliedOpTime_inlock(const OpTime& opTime,
+ bool isRollbackAllowed) {
SlaveInfo* mySlaveInfo = &_slaveInfo[_getMyIndexInSlaveInfo_inlock()];
- invariant(isRollbackAllowed || mySlaveInfo->opTime <= opTime);
- _updateSlaveInfoOptime_inlock(mySlaveInfo, opTime);
+ invariant(isRollbackAllowed || mySlaveInfo->lastAppliedOpTime <= opTime);
+ _updateSlaveInfoAppliedOpTime_inlock(mySlaveInfo, opTime);
for (auto& opTimeWaiter : _opTimeWaiterList) {
if (*(opTimeWaiter->opTime) <= opTime) {
@@ -890,9 +952,27 @@ void ReplicationCoordinatorImpl::_setMyLastOptime_inlock(const OpTime& opTime,
}
}
-OpTime ReplicationCoordinatorImpl::getMyLastOptime() const {
+void ReplicationCoordinatorImpl::_setMyLastDurableOpTime_inlock(const OpTime& opTime,
+ bool isRollbackAllowed) {
+ SlaveInfo* mySlaveInfo = &_slaveInfo[_getMyIndexInSlaveInfo_inlock()];
+ invariant(isRollbackAllowed || mySlaveInfo->lastDurableOpTime <= opTime);
+ // lastAppliedOpTime cannot be behind lastDurableOpTime.
+ if (mySlaveInfo->lastAppliedOpTime < opTime) {
+ log() << "Durable progress is ahead of the applied progress. This is likely due to a "
+ "rollback.";
+ return;
+ }
+ _updateSlaveInfoDurableOpTime_inlock(mySlaveInfo, opTime);
+}
+
+OpTime ReplicationCoordinatorImpl::getMyLastAppliedOpTime() const {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ return _getMyLastAppliedOpTime_inlock();
+}
+
+OpTime ReplicationCoordinatorImpl::getMyLastDurableOpTime() const {
stdx::lock_guard<stdx::mutex> lock(_mutex);
- return _getMyLastOptime_inlock();
+ return _getMyLastDurableOpTime_inlock();
}
ReadConcernResponse ReplicationCoordinatorImpl::waitUntilOpTime(OperationContext* txn,
@@ -933,7 +1013,7 @@ ReadConcernResponse ReplicationCoordinatorImpl::waitUntilOpTime(OperationContext
auto loopCondition = [this, isMajorityReadConcern, targetOpTime] {
return isMajorityReadConcern
? !_currentCommittedSnapshot || targetOpTime > _currentCommittedSnapshot->opTime
- : targetOpTime > _getMyLastOptime_inlock();
+ : targetOpTime > _getMyLastAppliedOpTime_inlock();
};
while (loopCondition()) {
@@ -950,6 +1030,9 @@ ReadConcernResponse ReplicationCoordinatorImpl::waitUntilOpTime(OperationContext
stdx::condition_variable condVar;
WriteConcernOptions writeConcern;
writeConcern.wMode = WriteConcernOptions::kMajority;
+ writeConcern.syncMode = getWriteConcernMajorityShouldJournal_inlock()
+ ? WriteConcernOptions::SyncMode::JOURNAL
+ : WriteConcernOptions::SyncMode::NONE;
WaiterInfo waitInfo(isMajorityReadConcern ? &_replicationWaiterList : &_opTimeWaiterList,
txn->getOpID(),
@@ -967,25 +1050,111 @@ ReadConcernResponse ReplicationCoordinatorImpl::waitUntilOpTime(OperationContext
return ReadConcernResponse(Status::OK(), Milliseconds(timer.millis()));
}
-OpTime ReplicationCoordinatorImpl::_getMyLastOptime_inlock() const {
- return _slaveInfo[_getMyIndexInSlaveInfo_inlock()].opTime;
+OpTime ReplicationCoordinatorImpl::_getMyLastAppliedOpTime_inlock() const {
+ return _slaveInfo[_getMyIndexInSlaveInfo_inlock()].lastAppliedOpTime;
+}
+
+OpTime ReplicationCoordinatorImpl::_getMyLastDurableOpTime_inlock() const {
+ return _slaveInfo[_getMyIndexInSlaveInfo_inlock()].lastDurableOpTime;
}
-Status ReplicationCoordinatorImpl::setLastOptime_forTest(long long cfgVer,
- long long memberId,
- const OpTime& opTime) {
+Status ReplicationCoordinatorImpl::setLastDurableOptime_forTest(long long cfgVer,
+ long long memberId,
+ const OpTime& opTime) {
stdx::lock_guard<stdx::mutex> lock(_mutex);
invariant(getReplicationMode() == modeReplSet);
- const UpdatePositionArgs::UpdateInfo update(OID(), opTime, cfgVer, memberId);
+ const UpdatePositionArgs::UpdateInfo update(OpTime(), opTime, cfgVer, memberId);
long long configVersion;
return _setLastOptime_inlock(update, &configVersion);
}
+Status ReplicationCoordinatorImpl::setLastAppliedOptime_forTest(long long cfgVer,
+ long long memberId,
+ const OpTime& opTime) {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ invariant(getReplicationMode() == modeReplSet);
+
+ const UpdatePositionArgs::UpdateInfo update(opTime, OpTime(), cfgVer, memberId);
+ long long configVersion;
+ return _setLastOptime_inlock(update, &configVersion);
+}
+
+Status ReplicationCoordinatorImpl::_setLastOptime_inlock(
+ const OldUpdatePositionArgs::UpdateInfo& args, long long* configVersion) {
+ if (_selfIndex == -1) {
+ // Ignore updates when we're in state REMOVED
+ return Status(ErrorCodes::NotMasterOrSecondary,
+ "Received replSetUpdatePosition command but we are in state REMOVED");
+ }
+ invariant(getReplicationMode() == modeReplSet);
+
+ if (args.memberId < 0) {
+ std::string errmsg = str::stream()
+ << "Received replSetUpdatePosition for node with memberId " << args.memberId
+ << " which is negative and therefore invalid";
+ LOG(1) << errmsg;
+ return Status(ErrorCodes::NodeNotFound, errmsg);
+ }
+
+ if (args.memberId == _rsConfig.getMemberAt(_selfIndex).getId()) {
+ // Do not let remote nodes tell us what our optime is.
+ return Status::OK();
+ }
+
+ LOG(2) << "received notification that node with memberID " << args.memberId
+ << " in config with version " << args.cfgver
+ << " has durably reached optime: " << args.ts;
+
+ SlaveInfo* slaveInfo = NULL;
+ if (args.cfgver != _rsConfig.getConfigVersion()) {
+ std::string errmsg = str::stream()
+ << "Received replSetUpdatePosition for node with memberId " << args.memberId
+ << " whose config version of " << args.cfgver << " doesn't match our config version of "
+ << _rsConfig.getConfigVersion();
+ LOG(1) << errmsg;
+ *configVersion = _rsConfig.getConfigVersion();
+ return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg);
+ }
+
+ slaveInfo = _findSlaveInfoByMemberID_inlock(args.memberId);
+ if (!slaveInfo) {
+ invariant(!_rsConfig.findMemberByID(args.memberId));
+
+ std::string errmsg = str::stream()
+ << "Received replSetUpdatePosition for node with memberId " << args.memberId
+ << " which doesn't exist in our config";
+ LOG(1) << errmsg;
+ return Status(ErrorCodes::NodeNotFound, errmsg);
+ }
+
+ invariant(args.memberId == slaveInfo->memberId);
+
+ LOG(3) << "Node with memberID " << args.memberId << " has durably applied operationss through "
+ << slaveInfo->lastDurableOpTime << " and has applied operations through "
+ << slaveInfo->lastAppliedOpTime << "; updating to new durable operation with timestamp "
+ << args.ts;
+
+ // Only update remote optimes if they increase.
+ if (slaveInfo->lastAppliedOpTime < args.ts) {
+ _updateSlaveInfoAppliedOpTime_inlock(slaveInfo, args.ts);
+ }
+ if (slaveInfo->lastDurableOpTime < args.ts) {
+ _updateSlaveInfoDurableOpTime_inlock(slaveInfo, args.ts);
+ }
+
+
+ // Update liveness for this node.
+ slaveInfo->lastUpdate = _replExecutor.now();
+ slaveInfo->down = false;
+ _cancelAndRescheduleLivenessUpdate_inlock(args.memberId);
+ return Status::OK();
+}
+
Status ReplicationCoordinatorImpl::_setLastOptime_inlock(const UpdatePositionArgs::UpdateInfo& args,
long long* configVersion) {
if (_selfIndex == -1) {
- // Ignore updates when we're in state REMOVED
+ // Ignore updates when we're in state REMOVED.
return Status(ErrorCodes::NotMasterOrSecondary,
"Received replSetUpdatePosition command but we are in state REMOVED");
}
@@ -1005,7 +1174,9 @@ Status ReplicationCoordinatorImpl::_setLastOptime_inlock(const UpdatePositionArg
}
LOG(2) << "received notification that node with memberID " << args.memberId
- << " in config with version " << args.cfgver << " has reached optime: " << args.ts;
+ << " in config with version " << args.cfgver
+ << " has reached optime: " << args.appliedOpTime
+ << " and is durable through: " << args.durableOpTime;
SlaveInfo* slaveInfo = NULL;
if (args.cfgver != _rsConfig.getConfigVersion()) {
@@ -1032,11 +1203,17 @@ Status ReplicationCoordinatorImpl::_setLastOptime_inlock(const UpdatePositionArg
invariant(args.memberId == slaveInfo->memberId);
LOG(3) << "Node with memberID " << args.memberId << " currently has optime "
- << slaveInfo->opTime << "; updating to " << args.ts;
+ << slaveInfo->lastAppliedOpTime << " durable through " << slaveInfo->lastDurableOpTime
+ << "; updating to optime " << args.appliedOpTime << " and durable through "
+ << args.durableOpTime;
+
// Only update remote optimes if they increase.
- if (slaveInfo->opTime < args.ts) {
- _updateSlaveInfoOptime_inlock(slaveInfo, args.ts);
+ if (slaveInfo->lastAppliedOpTime < args.appliedOpTime) {
+ _updateSlaveInfoAppliedOpTime_inlock(slaveInfo, args.appliedOpTime);
+ }
+ if (slaveInfo->lastDurableOpTime < args.durableOpTime) {
+ _updateSlaveInfoDurableOpTime_inlock(slaveInfo, args.durableOpTime);
}
// Update liveness for this node.
@@ -1092,17 +1269,22 @@ void ReplicationCoordinatorImpl::interruptAll() {
bool ReplicationCoordinatorImpl::_doneWaitingForReplication_inlock(
const OpTime& opTime, SnapshotName minSnapshot, const WriteConcernOptions& writeConcern) {
+ invariant(writeConcern.syncMode != WriteConcernOptions::SyncMode::UNSET);
Status status = _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
if (!status.isOK()) {
return true;
}
if (writeConcern.wMode.empty())
- return _haveNumNodesReachedOpTime_inlock(opTime, writeConcern.wNumNodes);
+ return _haveNumNodesReachedOpTime_inlock(opTime,
+ writeConcern.wNumNodes,
+ writeConcern.syncMode ==
+ WriteConcernOptions::SyncMode::JOURNAL);
StringData patternName;
if (writeConcern.wMode == WriteConcernOptions::kMajority) {
- if (_externalState->snapshotsEnabled()) {
+ if (writeConcern.syncMode == WriteConcernOptions::SyncMode::JOURNAL &&
+ _externalState->snapshotsEnabled()) {
if (!_currentCommittedSnapshot) {
return false;
}
@@ -1119,20 +1301,26 @@ bool ReplicationCoordinatorImpl::_doneWaitingForReplication_inlock(
if (!tagPattern.isOK()) {
return true;
}
- return _haveTaggedNodesReachedOpTime_inlock(opTime, tagPattern.getValue());
+ return _haveTaggedNodesReachedOpTime_inlock(opTime,
+ tagPattern.getValue(),
+ writeConcern.syncMode ==
+ WriteConcernOptions::SyncMode::JOURNAL);
}
-bool ReplicationCoordinatorImpl::_haveNumNodesReachedOpTime_inlock(const OpTime& opTime,
- int numNodes) {
- if (_getMyLastOptime_inlock() < opTime) {
- // Secondaries that are for some reason ahead of us should not allow us to
- // satisfy a write concern if we aren't caught up ourselves.
+bool ReplicationCoordinatorImpl::_haveNumNodesReachedOpTime_inlock(const OpTime& targetOpTime,
+ int numNodes,
+ bool durablyWritten) {
+ // Replication progress that is for some reason ahead of us should not allow us to
+ // satisfy a write concern if we aren't caught up ourselves.
+ OpTime myOpTime =
+ durablyWritten ? _getMyLastDurableOpTime_inlock() : _getMyLastAppliedOpTime_inlock();
+ if (myOpTime < targetOpTime) {
return false;
}
for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
- const OpTime& slaveTime = it->opTime;
- if (slaveTime >= opTime) {
+ const OpTime& slaveTime = durablyWritten ? it->lastDurableOpTime : it->lastAppliedOpTime;
+ if (slaveTime >= targetOpTime) {
--numNodes;
}
@@ -1144,10 +1332,10 @@ bool ReplicationCoordinatorImpl::_haveNumNodesReachedOpTime_inlock(const OpTime&
}
bool ReplicationCoordinatorImpl::_haveTaggedNodesReachedOpTime_inlock(
- const OpTime& opTime, const ReplicaSetTagPattern& tagPattern) {
+ const OpTime& opTime, const ReplicaSetTagPattern& tagPattern, bool durablyWritten) {
ReplicaSetTagMatch matcher(tagPattern);
for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) {
- const OpTime& slaveTime = it->opTime;
+ const OpTime& slaveTime = durablyWritten ? it->lastDurableOpTime : it->lastAppliedOpTime;
if (slaveTime >= opTime) {
// This node has reached the desired optime, now we need to check if it is a part
// of the tagPattern.
@@ -1168,18 +1356,25 @@ bool ReplicationCoordinatorImpl::_haveTaggedNodesReachedOpTime_inlock(
ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::awaitReplication(
OperationContext* txn, const OpTime& opTime, const WriteConcernOptions& writeConcern) {
Timer timer;
+ WriteConcernOptions fixedWriteConcern = populateUnsetWriteConcernOptionsSyncMode(writeConcern);
stdx::unique_lock<stdx::mutex> lock(_mutex);
- return _awaitReplication_inlock(&timer, &lock, txn, opTime, SnapshotName::min(), writeConcern);
+ return _awaitReplication_inlock(
+ &timer, &lock, txn, opTime, SnapshotName::min(), fixedWriteConcern);
}
ReplicationCoordinator::StatusAndDuration
ReplicationCoordinatorImpl::awaitReplicationOfLastOpForClient(
OperationContext* txn, const WriteConcernOptions& writeConcern) {
Timer timer;
+ WriteConcernOptions fixedWriteConcern = populateUnsetWriteConcernOptionsSyncMode(writeConcern);
stdx::unique_lock<stdx::mutex> lock(_mutex);
const auto& clientInfo = ReplClientInfo::forClient(txn->getClient());
- return _awaitReplication_inlock(
- &timer, &lock, txn, clientInfo.getLastOp(), clientInfo.getLastSnapshot(), writeConcern);
+ return _awaitReplication_inlock(&timer,
+ &lock,
+ txn,
+ clientInfo.getLastOp(),
+ clientInfo.getLastSnapshot(),
+ fixedWriteConcern);
}
ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::_awaitReplication_inlock(
@@ -1214,7 +1409,7 @@ ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::_awaitRepl
if (writeConcern.wMode.empty()) {
if (writeConcern.wNumNodes < 1) {
return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
- } else if (writeConcern.wNumNodes == 1 && _getMyLastOptime_inlock() >= opTime) {
+ } else if (writeConcern.wNumNodes == 1 && _getMyLastAppliedOpTime_inlock() >= opTime) {
return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
}
}
@@ -1403,7 +1598,7 @@ void ReplicationCoordinatorImpl::_stepDownContinue(
return;
}
bool forceNow = now >= waitUntil ? force : false;
- if (_topCoord->stepDown(stepDownUntil, forceNow, getMyLastOptime())) {
+ if (_topCoord->stepDown(stepDownUntil, forceNow, getMyLastAppliedOpTime())) {
// Schedule work to (potentially) step back up once the stepdown period has ended.
_replExecutor.scheduleWorkAt(stepDownUntil,
stdx::bind(&ReplicationCoordinatorImpl::_handleTimePassing,
@@ -1612,6 +1807,37 @@ int ReplicationCoordinatorImpl::_getMyId_inlock() const {
bool ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) {
stdx::lock_guard<stdx::mutex> lock(_mutex);
invariant(_rsConfig.isInitialized());
+ // Do not send updates if we have been removed from the config.
+ if (_selfIndex == -1) {
+ return false;
+ }
+ cmdBuilder->append("replSetUpdatePosition", 1);
+ // Create an array containing objects each live member connected to us and for ourself.
+ BSONArrayBuilder arrayBuilder(cmdBuilder->subarrayStart("optimes"));
+ for (SlaveInfoVector::iterator itr = _slaveInfo.begin(); itr != _slaveInfo.end(); ++itr) {
+ if (itr->lastAppliedOpTime.isNull()) {
+ // Don't include info on members we haven't heard from yet.
+ continue;
+ }
+ // Don't include members we think are down.
+ if (!itr->self && itr->down) {
+ continue;
+ }
+
+ BSONObjBuilder entry(arrayBuilder.subobjStart());
+ itr->lastDurableOpTime.append(&entry, "durableOpTime");
+ itr->lastAppliedOpTime.append(&entry, "appliedOpTime");
+ entry.append("memberId", itr->memberId);
+ entry.append("cfgver", _rsConfig.getConfigVersion());
+ }
+
+ return true;
+}
+
+bool ReplicationCoordinatorImpl::prepareOldReplSetUpdatePositionCommand(
+ BSONObjBuilder* cmdBuilder) {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ invariant(_rsConfig.isInitialized());
// do not send updates if we have been removed from the config
if (_selfIndex == -1) {
return false;
@@ -1620,7 +1846,7 @@ bool ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommand(BSONObjBuil
// create an array containing objects each member connected to us and for ourself
BSONArrayBuilder arrayBuilder(cmdBuilder->subarrayStart("optimes"));
for (SlaveInfoVector::iterator itr = _slaveInfo.begin(); itr != _slaveInfo.end(); ++itr) {
- if (itr->opTime.isNull()) {
+ if (itr->lastDurableOpTime.isNull()) {
// Don't include info on members we haven't heard from yet.
continue;
}
@@ -1632,9 +1858,9 @@ bool ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommand(BSONObjBuil
BSONObjBuilder entry(arrayBuilder.subobjStart());
entry.append("_id", itr->rid);
if (isV1ElectionProtocol()) {
- itr->opTime.append(&entry, "optime");
+ itr->lastDurableOpTime.append(&entry, "optime");
} else {
- entry.append("optime", itr->opTime.getTimestamp());
+ entry.append("optime", itr->lastDurableOpTime.getTimestamp());
}
entry.append("memberId", itr->memberId);
entry.append("cfgver", _rsConfig.getConfigVersion());
@@ -1645,21 +1871,15 @@ bool ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommand(BSONObjBuil
Status ReplicationCoordinatorImpl::processReplSetGetStatus(BSONObjBuilder* response) {
Status result(ErrorCodes::InternalError, "didn't set status in prepareStatusResponse");
- CBHStatus cbh =
- _replExecutor.scheduleWork(stdx::bind(&TopologyCoordinator::prepareStatusResponse,
- _topCoord.get(),
- stdx::placeholders::_1,
- _replExecutor.now(),
- time(0) - serverGlobalParams.started,
- getMyLastOptime(),
- response,
- &result));
- if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
- return Status(ErrorCodes::ShutdownInProgress, "replication shutdown in progress");
- }
- fassert(18640, cbh.getStatus());
- _replExecutor.wait(cbh.getValue());
-
+ _scheduleWorkAndWaitForCompletion(stdx::bind(&TopologyCoordinator::prepareStatusResponse,
+ _topCoord.get(),
+ stdx::placeholders::_1,
+ _replExecutor.now(),
+ time(0) - serverGlobalParams.started,
+ getMyLastAppliedOpTime(),
+ getLastCommittedOpTime(),
+ response,
+ &result));
return result;
}
@@ -1704,11 +1924,11 @@ void ReplicationCoordinatorImpl::appendSlaveInfoData(BSONObjBuilder* result) {
entry.append("rid", itr->rid);
if (isV1ElectionProtocol()) {
BSONObjBuilder opTime(entry.subobjStart("optime"));
- opTime.append("ts", itr->opTime.getTimestamp());
- opTime.append("term", itr->opTime.getTerm());
+ opTime.append("ts", itr->lastDurableOpTime.getTimestamp());
+ opTime.append("term", itr->lastDurableOpTime.getTerm());
opTime.done();
} else {
- entry.append("optime", itr->opTime.getTimestamp());
+ entry.append("optime", itr->lastDurableOpTime.getTimestamp());
}
entry.append("host", itr->hostAndPort.toString());
if (getReplicationMode() == modeReplSet) {
@@ -1850,7 +2070,7 @@ Status ReplicationCoordinatorImpl::processReplSetSyncFrom(const HostAndPort& tar
_topCoord.get(),
stdx::placeholders::_1,
target,
- _getMyLastOptime_inlock(),
+ _getMyLastAppliedOpTime_inlock(),
resultObj,
&result));
if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
@@ -1939,8 +2159,12 @@ void ReplicationCoordinatorImpl::_processHeartbeatFinish(
auto senderHost(args.getSenderHost());
const Date_t now = _replExecutor.now();
- *outStatus = _topCoord->prepareHeartbeatResponse(
- now, args, _settings.ourSetName(), getMyLastOptime(), response);
+ *outStatus = _topCoord->prepareHeartbeatResponse(now,
+ args,
+ _settings.ourSetName(),
+ getMyLastAppliedOpTime(),
+ getMyLastDurableOpTime(),
+ response);
if ((outStatus->isOK() || *outStatus == ErrorCodes::InvalidReplicaSetConfig) &&
_selfIndex < 0) {
// If this node does not belong to the configuration it knows about, send heartbeats
@@ -2409,7 +2633,8 @@ void ReplicationCoordinatorImpl::_processReplSetFresh_finish(
return;
}
- _topCoord->prepareFreshResponse(args, _replExecutor.now(), getMyLastOptime(), response, result);
+ _topCoord->prepareFreshResponse(
+ args, _replExecutor.now(), getMyLastAppliedOpTime(), response, result);
}
Status ReplicationCoordinatorImpl::processReplSetElect(const ReplSetElectArgs& args,
@@ -2440,7 +2665,8 @@ void ReplicationCoordinatorImpl::_processReplSetElect_finish(
return;
}
- _topCoord->prepareElectResponse(args, _replExecutor.now(), getMyLastOptime(), response, result);
+ _topCoord->prepareElectResponse(
+ args, _replExecutor.now(), getMyLastAppliedOpTime(), response, result);
}
ReplicationCoordinatorImpl::PostMemberStateUpdateAction
@@ -2453,7 +2679,7 @@ ReplicationCoordinatorImpl::_setCurrentRSConfig_inlock(
_setConfigState_inlock(kConfigSteady);
// Must get this before changing our config.
- OpTime myOptime = _getMyLastOptime_inlock();
+ OpTime myOptime = _getMyLastAppliedOpTime_inlock();
_topCoord->updateConfig(newConfig, myIndex, _replExecutor.now(), myOptime);
_cachedTerm = _topCoord->getTerm();
const ReplicaSetConfig oldConfig = _rsConfig;
@@ -2512,6 +2738,31 @@ void ReplicationCoordinatorImpl::_wakeReadyWaiters_inlock() {
}
}
+Status ReplicationCoordinatorImpl::processReplSetUpdatePosition(
+ const OldUpdatePositionArgs& updates, long long* configVersion) {
+ stdx::unique_lock<stdx::mutex> lock(_mutex);
+ Status status = Status::OK();
+ bool somethingChanged = false;
+ for (OldUpdatePositionArgs::UpdateIterator update = updates.updatesBegin();
+ update != updates.updatesEnd();
+ ++update) {
+ status = _setLastOptime_inlock(*update, configVersion);
+ if (!status.isOK()) {
+ break;
+ }
+ somethingChanged = true;
+ }
+
+ if (somethingChanged && !_getMemberState_inlock().primary()) {
+ lock.unlock();
+ // Must do this outside _mutex
+ // TODO: enable _dr, remove _externalState when DataReplicator is used excl.
+ //_dr.slavesHaveProgressed();
+ _externalState->forwardSlaveProgress();
+ }
+ return status;
+}
+
Status ReplicationCoordinatorImpl::processReplSetUpdatePosition(const UpdatePositionArgs& updates,
long long* configVersion) {
stdx::unique_lock<stdx::mutex> lock(_mutex);
@@ -2572,19 +2823,25 @@ bool ReplicationCoordinatorImpl::buildsIndexes() {
return self.shouldBuildIndexes();
}
-std::vector<HostAndPort> ReplicationCoordinatorImpl::getHostsWrittenTo(const OpTime& op) {
+std::vector<HostAndPort> ReplicationCoordinatorImpl::getHostsWrittenTo(const OpTime& op,
+ bool durablyWritten) {
std::vector<HostAndPort> hosts;
stdx::lock_guard<stdx::mutex> lk(_mutex);
for (size_t i = 0; i < _slaveInfo.size(); ++i) {
const SlaveInfo& slaveInfo = _slaveInfo[i];
- if (slaveInfo.opTime < op) {
+ if (getReplicationMode() == modeMasterSlave && slaveInfo.rid == _getMyRID_inlock()) {
+ // Master-slave doesn't know the HostAndPort for itself at this point.
continue;
}
- if (getReplicationMode() == modeMasterSlave && slaveInfo.rid == _getMyRID_inlock()) {
- // Master-slave doesn't know the HostAndPort for itself at this point.
+ if (durablyWritten) {
+ if (slaveInfo.lastDurableOpTime < op) {
+ continue;
+ }
+ } else if (slaveInfo.lastAppliedOpTime < op) {
continue;
}
+
hosts.push_back(slaveInfo.hostAndPort);
}
return hosts;
@@ -2739,7 +2996,7 @@ void ReplicationCoordinatorImpl::blacklistSyncSource(const HostAndPort& host, Da
_replExecutor.wait(cbh.getValue());
}
-void ReplicationCoordinatorImpl::resetLastOpTimeFromOplog(OperationContext* txn) {
+void ReplicationCoordinatorImpl::resetLastOpTimesFromOplog(OperationContext* txn) {
StatusWith<OpTime> lastOpTimeStatus = _externalState->loadLastOpTime(txn);
OpTime lastOpTime;
if (!lastOpTimeStatus.isOK()) {
@@ -2748,8 +3005,10 @@ void ReplicationCoordinatorImpl::resetLastOpTimeFromOplog(OperationContext* txn)
} else {
lastOpTime = lastOpTimeStatus.getValue();
}
+
stdx::unique_lock<stdx::mutex> lk(_mutex);
- _setMyLastOptimeAndReport_inlock(&lk, lastOpTime, true);
+ _setMyLastAppliedOpTime_inlock(lastOpTime, true);
+ _setMyLastDurableOpTimeAndReport_inlock(&lk, lastOpTime, true);
_externalState->setGlobalTimestamp(lastOpTime.getTimestamp());
}
@@ -2764,7 +3023,7 @@ void ReplicationCoordinatorImpl::_shouldChangeSyncSource(
}
*shouldChange = _topCoord->shouldChangeSyncSource(currentSource,
- getMyLastOptime(),
+ getMyLastAppliedOpTime(),
syncSourceLastOpTime,
syncSourceHasSyncSource,
_replExecutor.now());
@@ -2801,7 +3060,7 @@ void ReplicationCoordinatorImpl::_updateLastCommittedOpTime_inlock() {
auto memberConfig = _rsConfig.findMemberByID(sI.memberId);
invariant(memberConfig);
if (memberConfig->isVoter()) {
- votingNodesOpTimes.push_back(sI.opTime);
+ votingNodesOpTimes.push_back(sI.lastDurableOpTime);
}
}
@@ -2823,8 +3082,9 @@ void ReplicationCoordinatorImpl::_setLastCommittedOpTime(const OpTime& committed
}
void ReplicationCoordinatorImpl::_setLastCommittedOpTime_inlock(const OpTime& committedOpTime) {
- if (committedOpTime <= _lastCommittedOpTime)
+ if (committedOpTime <= _lastCommittedOpTime) {
return; // This may have come from an out-of-order heartbeat. Ignore it.
+ }
// This check is performed to ensure primaries do not commit an OpTime from a previous term.
if (_getMemberState_inlock().primary() && committedOpTime < _firstOpTimeOfMyTerm) {
@@ -2832,7 +3092,8 @@ void ReplicationCoordinatorImpl::_setLastCommittedOpTime_inlock(const OpTime& co
}
if (_getMemberState_inlock().arbiter()) {
- _setMyLastOptime_inlock(committedOpTime, false);
+ _setMyLastAppliedOpTime_inlock(committedOpTime, false);
+ _setMyLastDurableOpTime_inlock(committedOpTime, false);
}
_lastCommittedOpTime = committedOpTime;
@@ -2840,6 +3101,7 @@ void ReplicationCoordinatorImpl::_setLastCommittedOpTime_inlock(const OpTime& co
_externalState->notifyOplogMetadataWaiters();
auto maxSnapshotForOpTime = SnapshotInfo{committedOpTime, SnapshotName::max()};
+
if (!_uncommittedSnapshots.empty() && _uncommittedSnapshots.front() <= maxSnapshotForOpTime) {
// At least one uncommitted snapshot is ready to be blessed as committed.
@@ -2920,7 +3182,7 @@ void ReplicationCoordinatorImpl::_processReplSetRequestVotes_finish(
}
stdx::unique_lock<stdx::mutex> lk(_mutex);
- _topCoord->processReplSetRequestVotes(args, response, _getMyLastOptime_inlock());
+ _topCoord->processReplSetRequestVotes(args, response, _getMyLastAppliedOpTime_inlock());
*result = Status::OK();
}
@@ -2991,13 +3253,21 @@ void ReplicationCoordinatorImpl::_prepareReplResponseMetadata_finish(
rpc::ReplSetMetadata* metadata) {
OpTime lastReadableOpTime = getCurrentCommittedSnapshotOpTime();
OpTime lastVisibleOpTime = std::max(lastOpTimeFromClient, lastReadableOpTime);
- _topCoord->prepareReplResponseMetadata(metadata, lastVisibleOpTime, getLastCommittedOpTime());
+ _topCoord->prepareReplResponseMetadata(metadata, lastVisibleOpTime, _lastCommittedOpTime);
}
bool ReplicationCoordinatorImpl::isV1ElectionProtocol() {
return _protVersion.load() == 1;
}
+bool ReplicationCoordinatorImpl::getWriteConcernMajorityShouldJournal() {
+ return getConfig().getWriteConcernMajorityShouldJournal();
+}
+
+bool ReplicationCoordinatorImpl::getWriteConcernMajorityShouldJournal_inlock() const {
+ return _rsConfig.getWriteConcernMajorityShouldJournal();
+}
+
Status ReplicationCoordinatorImpl::processHeartbeatV1(const ReplSetHeartbeatArgsV1& args,
ReplSetHeartbeatResponse* response) {
{
@@ -3038,8 +3308,12 @@ void ReplicationCoordinatorImpl::_processHeartbeatFinishV1(
auto senderHost(args.getSenderHost());
const Date_t now = _replExecutor.now();
- *outStatus = _topCoord->prepareHeartbeatResponseV1(
- now, args, _settings.ourSetName(), getMyLastOptime(), response);
+ *outStatus = _topCoord->prepareHeartbeatResponseV1(now,
+ args,
+ _settings.ourSetName(),
+ getMyLastAppliedOpTime(),
+ getMyLastDurableOpTime(),
+ response);
if ((outStatus->isOK() || *outStatus == ErrorCodes::InvalidReplicaSetConfig) &&
_selfIndex < 0) {
@@ -3089,7 +3363,8 @@ void ReplicationCoordinatorImpl::_summarizeAsHtml_finish(const CallbackArgs& cbD
return;
}
- output->setSelfOptime(getMyLastOptime());
+ // TODO(dannenberg) consider putting both optimes into the htmlsummary.
+ output->setSelfOptime(getMyLastAppliedOpTime());
output->setSelfUptime(time(0) - serverGlobalParams.started);
output->setNow(_replExecutor.now());
@@ -3394,5 +3669,19 @@ void ReplicationCoordinatorImpl::_scheduleElectionWinNotification() {
}
}
+WriteConcernOptions ReplicationCoordinatorImpl::populateUnsetWriteConcernOptionsSyncMode(
+ WriteConcernOptions wc) {
+ WriteConcernOptions writeConcern(wc);
+ if (writeConcern.syncMode == WriteConcernOptions::SyncMode::UNSET) {
+ if (writeConcern.wMode == WriteConcernOptions::kMajority && _isDurableStorageEngine() &&
+ getWriteConcernMajorityShouldJournal()) {
+ writeConcern.syncMode = WriteConcernOptions::SyncMode::JOURNAL;
+ } else {
+ writeConcern.syncMode = WriteConcernOptions::SyncMode::NONE;
+ }
+ }
+ return writeConcern;
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 8252ac53412..c5cde2742bf 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -37,6 +37,7 @@
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/repl/data_replicator.h"
#include "mongo/db/repl/member_state.h"
+#include "mongo/db/repl/old_update_position_args.h"
#include "mongo/db/repl/optime.h"
#include "mongo/db/repl/replica_set_config.h"
#include "mongo/db/repl/replication_coordinator.h"
@@ -104,7 +105,8 @@ public:
ReplicationCoordinatorExternalState* externalState,
TopologyCoordinator* topoCoord,
ReplicationExecutor* replExec,
- int64_t prngSeed);
+ int64_t prngSeed,
+ stdx::function<bool()>* isDurableStorageEngineFn);
virtual ~ReplicationCoordinatorImpl();
// ================== Members of public ReplicationCoordinator API ===================
@@ -170,15 +172,18 @@ public:
virtual Status setLastOptimeForSlave(const OID& rid, const Timestamp& ts);
- virtual void setMyLastOptime(const OpTime& opTime);
+ virtual void setMyLastAppliedOpTime(const OpTime& opTime);
+ virtual void setMyLastDurableOpTime(const OpTime& opTime);
- virtual void setMyLastOptimeForward(const OpTime& opTime);
+ virtual void setMyLastAppliedOpTimeForward(const OpTime& opTime);
+ virtual void setMyLastDurableOpTimeForward(const OpTime& opTime);
- virtual void resetMyLastOptime();
+ virtual void resetMyLastOpTimes();
virtual void setMyHeartbeatMessage(const std::string& msg);
- virtual OpTime getMyLastOptime() const override;
+ virtual OpTime getMyLastAppliedOpTime() const override;
+ virtual OpTime getMyLastDurableOpTime() const override;
virtual ReadConcernResponse waitUntilOpTime(OperationContext* txn,
const ReadConcernArgs& settings) override;
@@ -199,6 +204,7 @@ public:
virtual void signalUpstreamUpdater() override;
+ virtual bool prepareOldReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) override;
virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) override;
virtual Status processReplSetGetStatus(BSONObjBuilder* result) override;
@@ -245,6 +251,8 @@ public:
virtual Status processReplSetElect(const ReplSetElectArgs& args,
BSONObjBuilder* response) override;
+ virtual Status processReplSetUpdatePosition(const OldUpdatePositionArgs& updates,
+ long long* configVersion) override;
virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates,
long long* configVersion) override;
@@ -252,7 +260,8 @@ public:
virtual bool buildsIndexes() override;
- virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op) override;
+ virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op,
+ bool durablyWritten) override;
virtual std::vector<HostAndPort> getOtherNodesInReplSet() const override;
@@ -266,7 +275,7 @@ public:
virtual void blacklistSyncSource(const HostAndPort& host, Date_t until) override;
- virtual void resetLastOpTimeFromOplog(OperationContext* txn) override;
+ virtual void resetLastOpTimesFromOplog(OperationContext* txn) override;
virtual bool shouldChangeSyncSource(const HostAndPort& currentSource,
const OpTime& syncSourceLastOpTime,
@@ -290,6 +299,8 @@ public:
virtual bool isV1ElectionProtocol() override;
+ virtual bool getWriteConcernMajorityShouldJournal() override;
+
virtual void summarizeAsHtml(ReplSetHtmlSummary* s) override;
virtual void dropAllSnapshots() override;
@@ -315,6 +326,9 @@ public:
virtual size_t getNumUncommittedSnapshots() override;
+ virtual WriteConcernOptions populateUnsetWriteConcernOptionsSyncMode(
+ WriteConcernOptions wc) override;
+
// ================== Test support API ===================
/**
@@ -341,9 +355,10 @@ public:
Date_t getPriorityTakeover_forTest() const;
/**
- * Simple wrapper around _setLastOptime_inlock to make it easier to test.
+ * Simple wrappers around _setLastOptime_inlock to make it easier to test.
*/
- Status setLastOptime_forTest(long long cfgVer, long long memberId, const OpTime& opTime);
+ Status setLastAppliedOptime_forTest(long long cfgVer, long long memberId, const OpTime& opTime);
+ Status setLastDurableOptime_forTest(long long cfgVer, long long memberId, const OpTime& opTime);
/**
* Non-blocking version of stepDown.
@@ -436,7 +451,8 @@ private:
int64_t prngSeed,
executor::NetworkInterface* network,
StorageInterface* storage,
- ReplicationExecutor* replExec);
+ ReplicationExecutor* replExec,
+ stdx::function<bool()>* isDurableStorageEngineFn);
/**
* Configuration states for a replica set node.
*
@@ -485,7 +501,10 @@ private:
// Struct that holds information about nodes in this replication group, mainly used for
// tracking replication progress for write concern satisfaction.
struct SlaveInfo {
- OpTime opTime; // Our last known OpTime that this slave has replicated to.
+ // Our last known OpTime that this slave has applied and journaled to.
+ OpTime lastDurableOpTime;
+ // Our last known OpTime that this slave has applied, whether journaled or unjournaled.
+ OpTime lastAppliedOpTime;
HostAndPort hostAndPort; // Client address of the slave.
int memberId =
-1; // Id of the node in the replica set config, or -1 if we're not a replSet.
@@ -519,11 +538,18 @@ private:
void _addSlaveInfo_inlock(const SlaveInfo& slaveInfo);
/**
- * Updates the item in _slaveInfo pointed to by 'slaveInfo' with the given OpTime 'opTime'
- * and wakes up any threads waiting for replication that now have their write concern
- * satisfied.
+ * Updates the durableOpTime field on the item in _slaveInfo pointed to by 'slaveInfo' with the
+ * given OpTime 'opTime' and wakes up any threads waiting for replication that now have their
+ * write concern satisfied.
*/
- void _updateSlaveInfoOptime_inlock(SlaveInfo* slaveInfo, const OpTime& opTime);
+ void _updateSlaveInfoDurableOpTime_inlock(SlaveInfo* slaveInfo, const OpTime& opTime);
+
+ /**
+ * Updates the appliedOpTime field on the item in _slaveInfo pointed to by 'slaveInfo' with the
+ * given OpTime 'opTime' and wakes up any threads waiting for replication that now have their
+ * write concern satisfied.
+ */
+ void _updateSlaveInfoAppliedOpTime_inlock(SlaveInfo* slaveInfo, const OpTime& opTime);
/**
* Returns the index into _slaveInfo where data corresponding to ourself is stored.
@@ -533,6 +559,11 @@ private:
size_t _getMyIndexInSlaveInfo_inlock() const;
/**
+ * Returns the _writeConcernMajorityJournalDefault of our current _rsConfig.
+ */
+ bool getWriteConcernMajorityShouldJournal_inlock() const;
+
+ /**
* Helper method that removes entries from _slaveInfo if they correspond to a node
* with a member ID that is not in the current replica set config. Will always leave an
* entry for ourself at the beginning of _slaveInfo, even if we aren't present in the
@@ -665,15 +696,18 @@ private:
/**
* Helper for _doneWaitingForReplication_inlock that takes an integer write concern.
+ * "durablyWritten" indicates whether the operation has to be durably applied.
*/
- bool _haveNumNodesReachedOpTime_inlock(const OpTime& opTime, int numNodes);
+ bool _haveNumNodesReachedOpTime_inlock(const OpTime& opTime, int numNodes, bool durablyWritten);
/**
* Helper for _doneWaitingForReplication_inlock that takes a tag pattern representing a
* named write concern mode.
+ * "durablyWritten" indicates whether the operation has to be durably applied.
*/
bool _haveTaggedNodesReachedOpTime_inlock(const OpTime& opTime,
- const ReplicaSetTagPattern& tagPattern);
+ const ReplicaSetTagPattern& tagPattern,
+ bool durablyWritten);
Status _checkIfWriteConcernCanBeSatisfied_inlock(const WriteConcernOptions& writeConcern) const;
@@ -702,7 +736,8 @@ private:
int _getMyId_inlock() const;
- OpTime _getMyLastOptime_inlock() const;
+ OpTime _getMyLastAppliedOpTime_inlock() const;
+ OpTime _getMyLastDurableOpTime_inlock() const;
/**
* Bottom half of setFollowerMode.
@@ -722,24 +757,44 @@ private:
* This is only valid to call on replica sets.
* "configVersion" will be populated with our config version if it and the configVersion
* of "args" differ.
+ *
+ * The OldUpdatePositionArgs version provides support for the pre-3.2.2 format of
+ * UpdatePositionArgs.
*/
+ Status _setLastOptime_inlock(const OldUpdatePositionArgs::UpdateInfo& args,
+ long long* configVersion);
Status _setLastOptime_inlock(const UpdatePositionArgs::UpdateInfo& args,
long long* configVersion);
/**
- * Helper method for setMyLastOptime that takes in a unique lock on
+ * Helper method for setMyLastAppliedOptime that takes in a unique lock on
+ * _mutex. The passed in lock must already be locked. It is unspecified what state the
+ * lock will be in after this method finishes.
+ *
+ * This function has the same rules for "opTime" as setMyLastAppliedOptime(), unless
+ * "isRollbackAllowed" is true.
+ *
+ * This function will also report our position externally (like upstream) if necessary.
+ */
+ void _setMyLastAppliedOpTimeAndReport_inlock(stdx::unique_lock<stdx::mutex>* lock,
+ const OpTime& opTime,
+ bool isRollbackAllowed);
+ void _setMyLastAppliedOpTime_inlock(const OpTime& opTime, bool isRollbackAllowed);
+
+ /**
+ * Helper method for setMyLastDurableOptime that takes in a unique lock on
* _mutex. The passed in lock must already be locked. It is unspecified what state the
* lock will be in after this method finishes.
*
- * This function has the same rules for "opTime" as setMyLastOptime(), unless
+ * This function has the same rules for "opTime" as setMyLastDurableOptime(), unless
* "isRollbackAllowed" is true.
*
* This function will also report our position externally (like upstream) if necessary.
*/
- void _setMyLastOptimeAndReport_inlock(stdx::unique_lock<stdx::mutex>* lock,
- const OpTime& opTime,
- bool isRollbackAllowed);
- void _setMyLastOptime_inlock(const OpTime& opTime, bool isRollbackAllowed);
+ void _setMyLastDurableOpTimeAndReport_inlock(stdx::unique_lock<stdx::mutex>* lock,
+ const OpTime& opTime,
+ bool isRollbackAllowed);
+ void _setMyLastDurableOpTime_inlock(const OpTime& opTime, bool isRollbackAllowed);
/**
* Schedules a heartbeat to be sent to "target" at "when". "targetIndex" is the index
@@ -766,9 +821,12 @@ private:
/**
* Helper for _handleHeartbeatResponse.
*
- * Updates the optime associated with the member at "memberIndex" in our config.
+ * Updates the lastDurableOpTime and lastAppliedOpTime associated with the member at
+ * "memberIndex" in our config.
*/
- void _updateOpTimeFromHeartbeat_inlock(int memberIndex, const OpTime& optime);
+ void _updateOpTimesFromHeartbeat_inlock(int targetIndex,
+ const OpTime& durableOpTime,
+ const OpTime& appliedOpTime);
/**
* Starts a heartbeat for each member in the current config. Called within the executor
@@ -1235,8 +1293,7 @@ private:
// TODO: ideally this should only change on rollbacks NOT on mongod restarts also.
int _rbid; // (M)
- // list of information about clients waiting on replication. Does *not* own the
- // WaiterInfos.
+ // list of information about clients waiting on replication. Does *not* own the WaiterInfos.
std::vector<WaiterInfo*> _replicationWaiterList; // (M)
// list of information about clients waiting for a particular opTime.
@@ -1391,6 +1448,9 @@ private:
// Cached copy of the current config protocol version.
AtomicInt64 _protVersion; // (S)
+
+ // Lambda indicating durability of storageEngine.
+ stdx::function<bool()> _isDurableStorageEngine; // (R)
};
} // namespace repl
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
index c2105a1007a..e1109d0cc4d 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp
@@ -121,7 +121,7 @@ void ReplicationCoordinatorImpl::_startElectSelf() {
invariant(_rsConfig.getMemberAt(_selfIndex).isElectable());
- OpTime lastOpTimeApplied(_getMyLastOptime_inlock());
+ OpTime lastOpTimeApplied(_getMyLastAppliedOpTime_inlock());
if (lastOpTimeApplied.isNull()) {
log() << "not trying to elect self, "
@@ -275,7 +275,7 @@ void ReplicationCoordinatorImpl::_recoverFromElectionTie(
return;
}
auto now = _replExecutor.now();
- auto lastOpApplied = getMyLastOptime();
+ auto lastOpApplied = getMyLastAppliedOpTime();
if (_topCoord->checkShouldStandForElection(now, lastOpApplied)) {
fassert(28817, _topCoord->becomeCandidateIfElectable(now, lastOpApplied));
_startElectSelf();
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp
index ce50d2e9619..6dac4852d0f 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp
@@ -161,7 +161,7 @@ TEST_F(ReplCoordElectTest, ElectionSucceedsWhenNodeIsTheOnlyElectableNode) {
ASSERT(getReplCoord()->getMemberState().secondary())
<< getReplCoord()->getMemberState().toString();
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(10, 0), 0));
NetworkInterfaceMock* net = getNet();
net->enterNetwork();
@@ -232,7 +232,7 @@ TEST_F(ReplCoordElectTest, ElectionSucceedsWhenAllNodesVoteYea) {
<< "node3:12345")));
assertStartSuccess(configObj, HostAndPort("node1", 12345));
OperationContextNoop txn;
- getReplCoord()->setMyLastOptime(OpTime{{100, 1}, 0});
+ getReplCoord()->setMyLastAppliedOpTime(OpTime{{100, 1}, 0});
getExternalState()->setLastOpTime(OpTime{{100, 1}, 0});
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
@@ -259,7 +259,7 @@ TEST_F(ReplCoordElectTest, ElectionFailsWhenOneNodeVotesNay) {
OperationContextNoop txn;
OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
simulateEnoughHeartbeatsForElectability();
@@ -304,7 +304,7 @@ TEST_F(ReplCoordElectTest, VotesWithStringValuesAreNotCountedAsYeas) {
OperationContextNoop txn;
OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
simulateEnoughHeartbeatsForElectability();
@@ -349,7 +349,7 @@ TEST_F(ReplCoordElectTest, ElectionsAbortWhenNodeTransitionsToRollbackState) {
OperationContextNoop txn;
OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
simulateEnoughHeartbeatsForElectability();
@@ -384,7 +384,7 @@ TEST_F(ReplCoordElectTest, NodeWillNotStandForElectionDuringHeartbeatReconfig) {
<< "node5:12345"))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
// set hbreconfig to hang while in progress
getExternalState()->setStoreLocalConfigDocumentToHang(true);
@@ -478,7 +478,7 @@ TEST_F(ReplCoordElectTest, StepsDownRemoteIfNodeHasHigherPriorityThanCurrentPrim
OperationContextNoop txn;
OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
auto net = getNet();
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
index 1ed1f7769b3..97b545a1292 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
@@ -124,9 +124,9 @@ void ReplicationCoordinatorImpl::_startElectSelfV1() {
invariant(_rsConfig.getMemberAt(_selfIndex).isElectable());
- OpTime lastOpTimeApplied(_getMyLastOptime_inlock());
+ OpTime lastOpTimeDurable(_getMyLastDurableOpTime_inlock());
- if (lastOpTimeApplied == OpTime()) {
+ if (lastOpTimeDurable == OpTime()) {
log() << "not trying to elect self, "
"do not yet have a complete set of data from any point in time";
return;
@@ -147,7 +147,7 @@ void ReplicationCoordinatorImpl::_startElectSelfV1() {
_selfIndex,
_topCoord->getTerm(),
true, // dry run
- getMyLastOptime(),
+ getMyLastDurableOpTime(),
stdx::bind(&ReplicationCoordinatorImpl::_onDryRunComplete, this, term));
if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
return;
@@ -245,7 +245,7 @@ void ReplicationCoordinatorImpl::_startVoteRequester(long long newTerm) {
_selfIndex,
_topCoord->getTerm(),
false,
- getMyLastOptime(),
+ getMyLastDurableOpTime(),
stdx::bind(&ReplicationCoordinatorImpl::_onVoteRequestComplete, this, newTerm));
if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
return;
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
index bd8ddfbc139..e29e772b011 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
@@ -104,7 +104,8 @@ TEST_F(ReplCoordElectV1Test, ElectionSucceedsWhenNodeIsTheOnlyElectableNode) {
ASSERT(getReplCoord()->getMemberState().secondary())
<< getReplCoord()->getMemberState().toString();
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(10, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(10, 0), 0));
auto electionTimeoutWhen = getReplCoord()->getElectionTimeout_forTest();
ASSERT_NOT_EQUALS(Date_t(), electionTimeoutWhen);
@@ -160,7 +161,8 @@ TEST_F(ReplCoordElectV1Test, StartElectionDoesNotStartAnElectionWhenNodeIsRecove
ASSERT(getReplCoord()->getMemberState().recovering())
<< getReplCoord()->getMemberState().toString();
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(10, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(10, 0), 0));
simulateEnoughHeartbeatsForElectability();
auto electionTimeoutWhen = getReplCoord()->getElectionTimeout_forTest();
@@ -177,7 +179,8 @@ TEST_F(ReplCoordElectV1Test, ElectionSucceedsWhenNodeIsTheOnlyNode) {
<< "node1:12345")) << "protocolVersion" << 1),
HostAndPort("node1", 12345));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(10, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(10, 0), 0));
getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
getReplCoord()->waitForElectionFinish_forTest();
ASSERT(getReplCoord()->getMemberState().primary())
@@ -208,7 +211,8 @@ TEST_F(ReplCoordElectV1Test, ElectionSucceedsWhenAllNodesVoteYea) {
<< 1);
assertStartSuccess(configObj, HostAndPort("node1", 12345));
OperationContextNoop txn;
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 1), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 0));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
startCapturingLogMessages();
simulateSuccessfulV1Election();
@@ -243,7 +247,8 @@ TEST_F(ReplCoordElectV1Test, ElectionSucceedsWhenMaxSevenNodesVoteYea) {
<< "protocolVersion" << 1);
assertStartSuccess(configObj, HostAndPort("node1", 12345));
OperationContextNoop txn;
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 1), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 0));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
startCapturingLogMessages();
simulateSuccessfulV1Election();
@@ -276,7 +281,8 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenInsufficientVotesAreReceivedDuring
OperationContextNoop txn;
OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
simulateEnoughHeartbeatsForElectability();
@@ -331,7 +337,8 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenDryRunResponseContainsANewerTerm)
OperationContextNoop txn;
OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
simulateEnoughHeartbeatsForElectability();
@@ -390,7 +397,8 @@ TEST_F(ReplCoordElectV1Test, NodeWillNotStandForElectionDuringHeartbeatReconfig)
<< "protocolVersion" << 1),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
// set hbreconfig to hang while in progress
getExternalState()->setStoreLocalConfigDocumentToHang(true);
@@ -498,7 +506,8 @@ TEST_F(ReplCoordElectV1Test, NodeWillNotStandForElectionDuringHeartbeatReconfig)
//
// OperationContextNoop txn;
// OpTime time1(Timestamp(100, 1), 0);
-// getReplCoord()->setMyLastOptime(time1);
+// getReplCoord()->setMyLastAppliedOpTime(time1);
+// getReplCoord()->setMyLastDurableOpTime(time1);
// ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
//
// simulateEnoughHeartbeatsForElectability();
@@ -556,7 +565,8 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenInsufficientVotesAreReceivedDuring
OperationContextNoop txn;
OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
simulateEnoughHeartbeatsForElectability();
@@ -603,7 +613,8 @@ TEST_F(ReplCoordElectV1Test, ElectionsAbortWhenNodeTransitionsToRollbackState) {
OperationContextNoop txn;
OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
simulateEnoughHeartbeatsForElectability();
@@ -637,7 +648,8 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenVoteRequestResponseContainsANewerT
OperationContextNoop txn;
OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
simulateEnoughHeartbeatsForElectability();
@@ -687,7 +699,8 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenTermChangesDuringDryRun) {
OperationContextNoop txn;
OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
simulateEnoughHeartbeatsForElectability();
@@ -722,7 +735,8 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenTermChangesDuringActualElection) {
OperationContextNoop txn;
OpTime time1(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
simulateEnoughHeartbeatsForElectability();
@@ -775,7 +789,8 @@ TEST_F(ReplCoordElectV1Test, SchedulesPriorityTakeoverIfNodeHasHigherPriorityTha
OperationContextNoop txn;
OpTime time1(Timestamp(100, 1), 0);
- replCoord->setMyLastOptime(time1);
+ replCoord->setMyLastAppliedOpTime(time1);
+ replCoord->setMyLastDurableOpTime(time1);
ASSERT(replCoord->setFollowerMode(MemberState::RS_SECONDARY));
ASSERT_EQUALS(Date_t(), replCoord->getPriorityTakeover_forTest());
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index 74fb8fc8fe9..211abb412a2 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -156,7 +156,7 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
}
}
const Date_t now = _replExecutor.now();
- const OpTime lastApplied = getMyLastOptime(); // Locks and unlocks _mutex.
+ const OpTime lastApplied = getMyLastAppliedOpTime(); // Locks and unlocks _mutex.
Milliseconds networkTime(0);
StatusWith<ReplSetHeartbeatResponse> hbStatusResponse(hbResponse);
@@ -183,15 +183,20 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
now, networkTime, target, hbStatusResponse, lastApplied);
if (action.getAction() == HeartbeatResponseAction::NoAction && hbStatusResponse.isOK() &&
- hbStatusResponse.getValue().hasOpTime() && targetIndex >= 0 &&
- hbStatusResponse.getValue().hasState() &&
+ targetIndex >= 0 && hbStatusResponse.getValue().hasState() &&
hbStatusResponse.getValue().getState() != MemberState::RS_PRIMARY) {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- if (hbStatusResponse.getValue().getConfigVersion() == _rsConfig.getConfigVersion()) {
- _updateOpTimeFromHeartbeat_inlock(targetIndex, hbStatusResponse.getValue().getOpTime());
- // TODO: Enable with Data Replicator
- // lk.unlock();
- //_dr.slavesHaveProgressed();
+ ReplSetHeartbeatResponse hbResp = hbStatusResponse.getValue();
+ if (hbResp.hasAppliedOpTime()) {
+ stdx::unique_lock<stdx::mutex> lk(_mutex);
+ if (hbResp.getConfigVersion() == _rsConfig.getConfigVersion()) {
+ _updateOpTimesFromHeartbeat_inlock(
+ targetIndex,
+ hbResp.hasDurableOpTime() ? hbResp.getDurableOpTime() : OpTime(),
+ hbResp.getAppliedOpTime());
+ // TODO: Enable with Data Replicator
+ // lk.unlock();
+ //_dr.slavesHaveProgressed();
+ }
}
}
@@ -204,14 +209,18 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
_handleHeartbeatResponseAction(action, hbStatusResponse);
}
-void ReplicationCoordinatorImpl::_updateOpTimeFromHeartbeat_inlock(int targetIndex,
- const OpTime& optime) {
+void ReplicationCoordinatorImpl::_updateOpTimesFromHeartbeat_inlock(int targetIndex,
+ const OpTime& durableOpTime,
+ const OpTime& appliedOpTime) {
invariant(_selfIndex >= 0);
invariant(targetIndex >= 0);
SlaveInfo& slaveInfo = _slaveInfo[targetIndex];
- if (optime > slaveInfo.opTime) {
- _updateSlaveInfoOptime_inlock(&slaveInfo, optime);
+ if (appliedOpTime > slaveInfo.lastAppliedOpTime) {
+ _updateSlaveInfoAppliedOpTime_inlock(&slaveInfo, appliedOpTime);
+ }
+ if (durableOpTime > slaveInfo.lastDurableOpTime) {
+ _updateSlaveInfoDurableOpTime_inlock(&slaveInfo, durableOpTime);
}
}
@@ -608,7 +617,7 @@ void ReplicationCoordinatorImpl::_handleLivenessTimeout(
// Secondaries might not see other secondaries in the cluster if they are not
// downstream.
HeartbeatResponseAction action =
- _topCoord->setMemberAsDown(now, memberIndex, _getMyLastOptime_inlock());
+ _topCoord->setMemberAsDown(now, memberIndex, _getMyLastDurableOpTime_inlock());
// Don't mind potential asynchronous stepdown as this is the last step of
// liveness check.
_handleHeartbeatResponseAction(action, makeStatusWith<ReplSetHeartbeatResponse>());
@@ -747,7 +756,7 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(bool isPriorityTake
_cancelAndRescheduleElectionTimeout_inlock();
}
- if (!_topCoord->becomeCandidateIfElectable(_replExecutor.now(), getMyLastOptime())) {
+ if (!_topCoord->becomeCandidateIfElectable(_replExecutor.now(), getMyLastDurableOpTime())) {
if (isPriorityTakeOver) {
log() << "Not starting an election for a priority takeover, since we are not "
"electable";
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
index c3dc217d91e..2233e21cc21 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
@@ -353,7 +353,7 @@ TEST_F(ReplCoordHBV1Test, ArbiterRecordsCommittedOpTimeFromHeartbeatMetadata) {
ASSERT_OK(metadata.getStatus());
getReplCoord()->processReplSetMetadata(metadata.getValue());
- ASSERT_EQ(getReplCoord()->getMyLastOptime().getTimestamp(), expected.getTimestamp());
+ ASSERT_EQ(getReplCoord()->getMyLastAppliedOpTime().getTimestamp(), expected.getTimestamp());
};
OpTime committedOpTime{Timestamp{10, 10}, 10};
diff --git a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
index 609a8249a4f..5c97e3bc976 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
@@ -80,7 +80,8 @@ TEST_F(ReplCoordTest, NodeReturnsNotMasterWhenReconfigReceivedWhileSecondary) {
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
BSONObjBuilder result;
ReplSetReconfigArgs args;
@@ -102,7 +103,8 @@ TEST_F(ReplCoordTest, NodeReturnsInvalidReplicaSetConfigWhenReconfigReceivedWith
<< "node2:12345"))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
simulateSuccessfulV1Election();
BSONObjBuilder result;
@@ -135,7 +137,8 @@ TEST_F(ReplCoordTest, NodeReturnsInvalidReplicaSetConfigWhenReconfigReceivedWith
<< "node2:12345"))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
simulateSuccessfulV1Election();
BSONObjBuilder result;
@@ -167,7 +170,8 @@ TEST_F(ReplCoordTest, NodeReturnsInvalidReplicaSetConfigWhenReconfigReceivedWith
<< BSON("replicaSetId" << OID::gen())),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
simulateSuccessfulV1Election();
BSONObjBuilder result;
@@ -200,7 +204,8 @@ TEST_F(ReplCoordTest,
<< "node2:12345"))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
simulateSuccessfulV1Election();
BSONObjBuilder result;
@@ -265,7 +270,8 @@ TEST_F(ReplCoordTest,
<< "node2:12345"))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
simulateSuccessfulV1Election();
Status status(ErrorCodes::InternalError, "Not Set");
@@ -303,7 +309,8 @@ TEST_F(ReplCoordTest, NodeReturnsOutOfDiskSpaceWhenSavingANewConfigFailsDuringRe
<< "node2:12345"))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
simulateSuccessfulV1Election();
Status status(ErrorCodes::InternalError, "Not Set");
@@ -329,7 +336,8 @@ TEST_F(ReplCoordTest,
<< "node2:12345"))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
simulateSuccessfulV1Election();
Status status(ErrorCodes::InternalError, "Not Set");
@@ -365,7 +373,8 @@ TEST_F(ReplCoordTest, NodeReturnsConfigurationInProgressWhenReceivingAReconfigWh
init();
start(HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
// initiate
Status status(ErrorCodes::InternalError, "Not Set");
@@ -407,7 +416,8 @@ TEST_F(ReplCoordTest, PrimaryNodeAcceptsNewConfigWhenReceivingAReconfigWithAComp
<< BSON("replicaSetId" << OID::gen())),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
simulateSuccessfulV1Election();
Status status(ErrorCodes::InternalError, "Not Set");
@@ -448,7 +458,8 @@ TEST_F(
<< "node2:12345"))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
simulateSuccessfulV1Election();
ASSERT_TRUE(getReplCoord()->getMemberState().primary());
@@ -503,7 +514,8 @@ TEST_F(ReplCoordTest, NodeDoesNotAcceptHeartbeatReconfigWhileInTheMidstOfReconfi
<< "node2:12345"))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
simulateSuccessfulV1Election();
ASSERT_TRUE(getReplCoord()->getMemberState().primary());
@@ -565,7 +577,8 @@ TEST_F(ReplCoordTest, NodeAcceptsConfigFromAReconfigWithForceTrueWhileNotPrimary
<< "node2:12345"))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
// fail before forced
BSONObjBuilder result;
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index db7fc578af8..d7f37369a35 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -39,6 +39,7 @@
#include "mongo/db/operation_context_noop.h"
#include "mongo/db/repl/handshake_args.h"
#include "mongo/db/repl/is_master_response.h"
+#include "mongo/db/repl/old_update_position_args.h"
#include "mongo/db/repl/operation_context_repl_mock.h"
#include "mongo/db/repl/optime.h"
#include "mongo/db/repl/read_concern_args.h"
@@ -94,7 +95,8 @@ struct OpTimeWithTermZero {
};
void runSingleNodeElection(ReplicationCoordinatorImpl* replCoord) {
- replCoord->setMyLastOptime(OpTime(Timestamp(1, 0), 0));
+ replCoord->setMyLastAppliedOpTime(OpTime(Timestamp(1, 0), 0));
+ replCoord->setMyLastDurableOpTime(OpTime(Timestamp(1, 0), 0));
ASSERT(replCoord->setFollowerMode(MemberState::RS_SECONDARY));
replCoord->waitForElectionFinish_forTest();
@@ -749,7 +751,8 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenRunningAwaitReplicationAgainstPrimaryWith
// Become primary.
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
simulateSuccessfulV1Election();
ASSERT(getReplCoord()->getMemberState().primary());
@@ -760,6 +763,80 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenRunningAwaitReplicationAgainstPrimaryWith
ASSERT_TRUE(getExternalState()->isApplierSignaledToCancelFetcher());
}
+TEST_F(ReplCoordTest,
+ NodeReturnsWriteConcernFailedUntilASufficientNumberOfNodesHaveTheWriteDurable) {
+ OperationContextNoop txn;
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2) << BSON("host"
+ << "node4:12345"
+ << "_id" << 3))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ simulateSuccessfulV1Election();
+
+ OpTimeWithTermZero time1(100, 1);
+ OpTimeWithTermZero time2(100, 2);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 1;
+ writeConcern.syncMode = WriteConcernOptions::SyncMode::JOURNAL;
+
+ // 1 node waiting for time 1
+ ReplicationCoordinator::StatusAndDuration statusAndDur =
+ getReplCoord()->awaitReplication(&txn, time1, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+
+ // 2 nodes waiting for time1
+ writeConcern.wNumNodes = 2;
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ // Applied is not durable and will not satisfy WriteConcern with SyncMode JOURNAL.
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1));
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 1, time1));
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+
+ // 2 nodes waiting for time2
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ getReplCoord()->setMyLastAppliedOpTime(time2);
+ getReplCoord()->setMyLastDurableOpTime(time2);
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time2));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 2, time2));
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+
+ // 3 nodes waiting for time2
+ writeConcern.wNumNodes = 3;
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 3, time2));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 3, time2));
+ statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
+ ASSERT_OK(statusAndDur.status);
+}
+
TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientNumberOfNodesHaveTheWrite) {
OperationContextNoop txn;
assertStartSuccess(
@@ -778,7 +855,8 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientNumberOfNodes
<< "_id" << 3))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
simulateSuccessfulV1Election();
OpTimeWithTermZero time1(100, 1);
@@ -792,7 +870,8 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientNumberOfNodes
ReplicationCoordinator::StatusAndDuration statusAndDur =
getReplCoord()->awaitReplication(&txn, time1, writeConcern);
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
ASSERT_OK(statusAndDur.status);
@@ -800,17 +879,19 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientNumberOfNodes
writeConcern.wNumNodes = 2;
statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1));
statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern);
ASSERT_OK(statusAndDur.status);
// 2 nodes waiting for time2
statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- getReplCoord()->setMyLastOptime(time2);
+ getReplCoord()->setMyLastAppliedOpTime(time2);
+ getReplCoord()->setMyLastDurableOpTime(time2);
statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time2));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time2));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 2, time2));
statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
ASSERT_OK(statusAndDur.status);
@@ -818,7 +899,7 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientNumberOfNodes
writeConcern.wNumNodes = 3;
statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, time2));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 3, time2));
statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern);
ASSERT_OK(statusAndDur.status);
}
@@ -842,7 +923,8 @@ TEST_F(ReplCoordTest,
<< "node4"))),
HostAndPort("node0"));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
simulateSuccessfulV1Election();
OpTime time1(Timestamp(100, 1), 1);
@@ -858,7 +940,9 @@ TEST_F(ReplCoordTest,
ASSERT_EQUALS(ErrorCodes::UnknownReplWriteConcern, statusAndDur.status);
}
-TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHaveTheWrite) {
+TEST_F(
+ ReplCoordTest,
+ NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHaveTheWriteAndTheWriteIsInACommittedSnapshot) {
auto service = stdx::make_unique<ServiceContextNoop>();
auto client = service->makeClient("test");
OperationContextNoop txn(client.get(), 100);
@@ -901,7 +985,8 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHav
<< BSON("dc" << 2 << "rack" << 3)))),
HostAndPort("node0"));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
simulateSuccessfulV1Election();
OpTime time1(Timestamp(100, 1), 1);
@@ -912,6 +997,7 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHav
WriteConcernOptions majorityWriteConcern;
majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
majorityWriteConcern.wMode = WriteConcernOptions::kMajority;
+ majorityWriteConcern.syncMode = WriteConcernOptions::SyncMode::JOURNAL;
WriteConcernOptions multiDCWriteConcern;
multiDCWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
@@ -923,7 +1009,8 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHav
// Nothing satisfied
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
ReplicationCoordinator::StatusAndDuration statusAndDur =
getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
@@ -933,8 +1020,10 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHav
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
// Majority satisfied but not either custom mode
- getReplCoord()->setLastOptime_forTest(2, 1, time1);
- getReplCoord()->setLastOptime_forTest(2, 2, time1);
+ getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1);
+ getReplCoord()->setLastDurableOptime_forTest(2, 1, time1);
+ getReplCoord()->setLastAppliedOptime_forTest(2, 2, time1);
+ getReplCoord()->setLastDurableOptime_forTest(2, 2, time1);
getReplCoord()->onSnapshotCreate(time1, SnapshotName(1));
statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
@@ -945,7 +1034,8 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHav
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
// All modes satisfied
- getReplCoord()->setLastOptime_forTest(2, 3, time1);
+ getReplCoord()->setLastAppliedOptime_forTest(2, 3, time1);
+ getReplCoord()->setLastDurableOptime_forTest(2, 3, time1);
statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern);
ASSERT_OK(statusAndDur.status);
@@ -979,8 +1069,10 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHav
ASSERT_OK(statusAndDur.status);
// multiDC satisfied but not majority or multiRack
- getReplCoord()->setMyLastOptime(time2);
- getReplCoord()->setLastOptime_forTest(2, 3, time2);
+ getReplCoord()->setMyLastAppliedOpTime(time2);
+ getReplCoord()->setMyLastDurableOpTime(time2);
+ getReplCoord()->setLastAppliedOptime_forTest(2, 3, time2);
+ getReplCoord()->setLastDurableOptime_forTest(2, 3, time2);
statusAndDur = getReplCoord()->awaitReplication(&txn, time2, majorityWriteConcern);
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
@@ -1009,7 +1101,7 @@ public:
_optime = ot;
}
- void setWriteConcern(const WriteConcernOptions& wc) {
+ void setWriteConcern(WriteConcernOptions wc) {
_writeConcern = wc;
}
@@ -1061,7 +1153,8 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenAWriteConcernWithNoTimeoutHasBeenSatisfie
<< "_id" << 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
simulateSuccessfulV1Election();
ReplicationAwaiter awaiter(getReplCoord(), &txn);
@@ -1077,8 +1170,9 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenAWriteConcernWithNoTimeoutHasBeenSatisfie
awaiter.setOpTime(time1);
awaiter.setWriteConcern(writeConcern);
awaiter.start(&txn);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1));
ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
ASSERT_OK(statusAndDur.status);
awaiter.reset();
@@ -1086,8 +1180,9 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenAWriteConcernWithNoTimeoutHasBeenSatisfie
// 2 nodes waiting for time2
awaiter.setOpTime(time2);
awaiter.start(&txn);
- getReplCoord()->setMyLastOptime(time2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time2));
+ getReplCoord()->setMyLastAppliedOpTime(time2);
+ getReplCoord()->setMyLastDurableOpTime(time2);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time2));
statusAndDur = awaiter.getResult();
ASSERT_OK(statusAndDur.status);
awaiter.reset();
@@ -1096,7 +1191,7 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenAWriteConcernWithNoTimeoutHasBeenSatisfie
writeConcern.wNumNodes = 3;
awaiter.setWriteConcern(writeConcern);
awaiter.start(&txn);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time2));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time2));
statusAndDur = awaiter.getResult();
ASSERT_OK(statusAndDur.status);
awaiter.reset();
@@ -1117,7 +1212,8 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedWhenAWriteConcernTimesOutBefo
<< "_id" << 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
simulateSuccessfulV1Election();
ReplicationAwaiter awaiter(getReplCoord(), &txn);
@@ -1133,8 +1229,9 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedWhenAWriteConcernTimesOutBefo
awaiter.setOpTime(time2);
awaiter.setWriteConcern(writeConcern);
awaiter.start(&txn);
- getReplCoord()->setMyLastOptime(time2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
+ getReplCoord()->setMyLastAppliedOpTime(time2);
+ getReplCoord()->setMyLastDurableOpTime(time2);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1));
ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
awaiter.reset();
@@ -1156,7 +1253,8 @@ TEST_F(ReplCoordTest,
<< "_id" << 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
simulateSuccessfulV1Election();
ReplicationAwaiter awaiter(getReplCoord(), &txn);
@@ -1172,8 +1270,8 @@ TEST_F(ReplCoordTest,
awaiter.setOpTime(time2);
awaiter.setWriteConcern(writeConcern);
awaiter.start(&txn);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time1));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time1));
shutdown();
ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, statusAndDur.status);
@@ -1197,7 +1295,8 @@ TEST_F(ReplCoordTest, NodeReturnsNotMasterWhenSteppingDownBeforeSatisfyingAWrite
<< "_id" << 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
simulateSuccessfulV1Election();
ReplicationAwaiter awaiter(getReplCoord(), &txn);
@@ -1213,8 +1312,8 @@ TEST_F(ReplCoordTest, NodeReturnsNotMasterWhenSteppingDownBeforeSatisfyingAWrite
awaiter.setOpTime(time2);
awaiter.setWriteConcern(writeConcern);
awaiter.start(&txn);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time1));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time1));
getReplCoord()->stepDown(&txn, true, Milliseconds(0), Milliseconds(1000));
ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
ASSERT_EQUALS(ErrorCodes::NotMaster, statusAndDur.status);
@@ -1236,7 +1335,8 @@ TEST_F(ReplCoordTest,
<< "node3"))),
HostAndPort("node1"));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
simulateSuccessfulV1Election();
ReplicationAwaiter awaiter(getReplCoord(), &txn);
@@ -1253,8 +1353,8 @@ TEST_F(ReplCoordTest,
awaiter.setOpTime(time2);
awaiter.setWriteConcern(writeConcern);
awaiter.start(&txn);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time1));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time1));
txn.setCheckForInterruptStatus(kInterruptedStatus);
getReplCoord()->interrupt(opID);
@@ -1310,7 +1410,8 @@ TEST_F(ReplCoordTest, NodeChangesTermAndStepsDownWhenAndOnlyWhenUpdateTermSuppli
<< "test3:1234"))
<< "protocolVersion" << 1),
HostAndPort("test1", 1234));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 1), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 0));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
@@ -1356,7 +1457,8 @@ TEST_F(ReplCoordTest, ConcurrentStepDownShouldNotSignalTheSameFinishEventMoreTha
<< "test3:1234"))
<< "protocolVersion" << 1),
HostAndPort("test1", 1234));
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 1), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 0));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
@@ -1418,9 +1520,10 @@ TEST_F(StepDownTest, NodeReturnsNotMasterWhenAskedToStepDownAsANonPrimaryNode) {
OperationContextReplMock txn;
OpTimeWithTermZero optime1(100, 1);
// All nodes are caught up
- getReplCoord()->setMyLastOptime(optime1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
+ getReplCoord()->setMyLastAppliedOpTime(optime1);
+ getReplCoord()->setMyLastDurableOpTime(optime1);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime1));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime1));
Status status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(0));
ASSERT_EQUALS(ErrorCodes::NotMaster, status);
@@ -1432,9 +1535,10 @@ TEST_F(StepDownTest,
OperationContextReplMock txn;
OpTimeWithTermZero optime1(100, 1);
// All nodes are caught up
- getReplCoord()->setMyLastOptime(optime1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
+ getReplCoord()->setMyLastAppliedOpTime(optime1);
+ getReplCoord()->setMyLastDurableOpTime(optime1);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime1));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime1));
simulateSuccessfulV1Election();
@@ -1451,9 +1555,10 @@ TEST_F(StepDownTest,
OperationContextReplMock txn;
OpTimeWithTermZero optime1(100, 1);
// All nodes are caught up
- getReplCoord()->setMyLastOptime(optime1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
+ getReplCoord()->setMyLastAppliedOpTime(optime1);
+ getReplCoord()->setMyLastDurableOpTime(optime1);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime1));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime1));
simulateSuccessfulV1Election();
@@ -1469,7 +1574,8 @@ TEST_F(StepDownTest,
hbResp.setSetName(hbArgs.getSetName());
hbResp.setState(MemberState::RS_SECONDARY);
hbResp.setConfigVersion(hbArgs.getConfigVersion());
- hbResp.setOpTime(optime1);
+ hbResp.setDurableOpTime(optime1);
+ hbResp.setAppliedOpTime(optime1);
BSONObjBuilder respObj;
respObj << "ok" << 1;
hbResp.addToBSON(&respObj, false);
@@ -1525,9 +1631,10 @@ TEST_F(StepDownTest,
OpTimeWithTermZero optime2(100, 2);
// No secondary is caught up
auto repl = getReplCoord();
- repl->setMyLastOptime(optime2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1));
+ repl->setMyLastAppliedOpTime(optime2);
+ repl->setMyLastDurableOpTime(optime2);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime1));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime1));
simulateSuccessfulV1Election();
@@ -1559,9 +1666,10 @@ TEST_F(StepDownTest,
OpTimeWithTermZero optime2(100, 2);
// No secondary is caught up
auto repl = getReplCoord();
- repl->setMyLastOptime(optime2);
- ASSERT_OK(repl->setLastOptime_forTest(1, 1, optime1));
- ASSERT_OK(repl->setLastOptime_forTest(1, 2, optime1));
+ repl->setMyLastAppliedOpTime(optime2);
+ repl->setMyLastDurableOpTime(optime2);
+ ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 1, optime1));
+ ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 2, optime1));
simulateSuccessfulV1Election();
@@ -1589,7 +1697,8 @@ TEST_F(StepDownTest,
hbResp.setSetName(hbArgs.getSetName());
hbResp.setState(MemberState::RS_SECONDARY);
hbResp.setConfigVersion(hbArgs.getConfigVersion());
- hbResp.setOpTime(optime2);
+ hbResp.setAppliedOpTime(optime2);
+ hbResp.setDurableOpTime(optime2);
BSONObjBuilder respObj;
respObj << "ok" << 1;
hbResp.addToBSON(&respObj, false);
@@ -1615,9 +1724,10 @@ TEST_F(StepDownTest,
OpTimeWithTermZero optime2(100, 2);
// No secondary is caught up
auto repl = getReplCoord();
- repl->setMyLastOptime(optime2);
- ASSERT_OK(repl->setLastOptime_forTest(1, 1, optime1));
- ASSERT_OK(repl->setLastOptime_forTest(1, 2, optime1));
+ repl->setMyLastAppliedOpTime(optime2);
+ repl->setMyLastDurableOpTime(optime2);
+ ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 1, optime1));
+ ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 2, optime1));
simulateSuccessfulV1Election();
@@ -1673,7 +1783,8 @@ TEST_F(StepDownTest,
hbResp.setSetName(hbArgs.getSetName());
hbResp.setState(MemberState::RS_SECONDARY);
hbResp.setConfigVersion(hbArgs.getConfigVersion());
- hbResp.setOpTime(optime2);
+ hbResp.setAppliedOpTime(optime2);
+ hbResp.setDurableOpTime(optime2);
BSONObjBuilder respObj;
respObj << "ok" << 1;
hbResp.addToBSON(&respObj, false);
@@ -1697,9 +1808,10 @@ TEST_F(StepDownTest, NodeReturnsInterruptedWhenInterruptedDuringStepDown) {
OpTimeWithTermZero optime2(100, 2);
// No secondary is caught up
auto repl = getReplCoord();
- repl->setMyLastOptime(optime2);
- ASSERT_OK(repl->setLastOptime_forTest(1, 1, optime1));
- ASSERT_OK(repl->setLastOptime_forTest(1, 2, optime1));
+ repl->setMyLastAppliedOpTime(optime2);
+ repl->setMyLastDurableOpTime(optime2);
+ ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 1, optime1));
+ ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 2, optime1));
simulateSuccessfulV1Election();
ASSERT_TRUE(repl->getMemberState().primary());
@@ -1770,18 +1882,92 @@ TEST_F(ReplCoordTest, NodeIncludesOtherMembersProgressInUpdatePositionCommand) {
<< "test1:1234")
<< BSON("_id" << 1 << "host"
<< "test2:1234") << BSON("_id" << 2 << "host"
+ << "test3:1234")
+ << BSON("_id" << 3 << "host"
+ << "test4:1234"))),
+ HostAndPort("test1", 1234));
+ OpTime optime1({2, 1}, 1);
+ OpTime optime2({100, 1}, 1);
+ OpTime optime3({100, 2}, 1);
+ getReplCoord()->setMyLastAppliedOpTime(optime1);
+ getReplCoord()->setMyLastDurableOpTime(optime1);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime2));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime3));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(1, 2, optime3));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 3, optime3));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(1, 3, optime1));
+
+ // Check that the proper BSON is generated for the replSetUpdatePositionCommand
+ BSONObjBuilder cmdBuilder;
+ getReplCoord()->prepareReplSetUpdatePositionCommand(&cmdBuilder);
+ BSONObj cmd = cmdBuilder.done();
+
+ ASSERT_EQUALS(2, cmd.nFields());
+ ASSERT_EQUALS("replSetUpdatePosition", cmd.firstElement().fieldNameStringData());
+
+ std::set<long long> memberIds;
+ BSONForEach(entryElement, cmd["optimes"].Obj()) {
+ OpTime durableOpTime;
+ OpTime appliedOpTime;
+ BSONObj entry = entryElement.Obj();
+ long long memberId = entry["memberId"].Number();
+ memberIds.insert(memberId);
+ if (memberId == 0) {
+ log() << 0;
+ ASSERT_OK(bsonExtractOpTimeField(entry, "appliedOpTime", &appliedOpTime));
+ ASSERT_OK(bsonExtractOpTimeField(entry, "durableOpTime", &durableOpTime));
+ ASSERT_EQUALS(optime1, appliedOpTime);
+ ASSERT_EQUALS(optime1, durableOpTime);
+ } else if (memberId == 1) {
+ log() << 1;
+ ASSERT_OK(bsonExtractOpTimeField(entry, "appliedOpTime", &appliedOpTime));
+ ASSERT_OK(bsonExtractOpTimeField(entry, "durableOpTime", &durableOpTime));
+ ASSERT_EQUALS(optime2, appliedOpTime);
+ ASSERT_EQUALS(OpTime(), durableOpTime);
+ } else if (memberId == 2) {
+ log() << 2;
+ ASSERT_OK(bsonExtractOpTimeField(entry, "appliedOpTime", &appliedOpTime));
+ ASSERT_OK(bsonExtractOpTimeField(entry, "durableOpTime", &durableOpTime));
+ ASSERT_EQUALS(optime3, appliedOpTime);
+ ASSERT_EQUALS(optime3, durableOpTime);
+ } else {
+ log() << 3;
+ ASSERT_EQUALS(3, memberId);
+ ASSERT_OK(bsonExtractOpTimeField(entry, "appliedOpTime", &appliedOpTime));
+ ASSERT_OK(bsonExtractOpTimeField(entry, "durableOpTime", &durableOpTime));
+ ASSERT_EQUALS(optime3, appliedOpTime);
+ ASSERT_EQUALS(optime1, durableOpTime);
+ }
+ }
+ ASSERT_EQUALS(4U, memberIds.size()); // Make sure we saw all 4 nodes
+}
+
+TEST_F(ReplCoordTest, NodeIncludesOtherMembersProgressInOldUpdatePositionCommand) {
+ OperationContextNoop txn;
+ init("mySet/test1:1234,test2:1234,test3:1234");
+ assertStartSuccess(
+ BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234")
+ << BSON("_id" << 1 << "host"
+ << "test2:1234") << BSON("_id" << 2 << "host"
<< "test3:1234"))),
HostAndPort("test1", 1234));
OpTimeWithTermZero optime1(100, 1);
OpTimeWithTermZero optime2(100, 2);
OpTimeWithTermZero optime3(2, 1);
- getReplCoord()->setMyLastOptime(optime1);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime2));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime3));
+ getReplCoord()->setMyLastAppliedOpTime(optime1);
+ getReplCoord()->setMyLastDurableOpTime(optime1);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime2));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(1, 1, optime2));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime3));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(1, 2, optime3));
// Check that the proper BSON is generated for the replSetUpdatePositionCommand
BSONObjBuilder cmdBuilder;
- getReplCoord()->prepareReplSetUpdatePositionCommand(&cmdBuilder);
+ getReplCoord()->prepareOldReplSetUpdatePositionCommand(&cmdBuilder);
BSONObj cmd = cmdBuilder.done();
ASSERT_EQUALS(2, cmd.nFields());
@@ -1821,7 +2007,8 @@ TEST_F(ReplCoordTest,
HostAndPort("test2", 1234));
OperationContextNoop txn;
getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
// Can't unset maintenance mode if it was never set to begin with.
Status status = getReplCoord()->setMaintenanceMode(false);
@@ -1844,7 +2031,8 @@ TEST_F(ReplCoordTest,
HostAndPort("test2", 1234));
OperationContextNoop txn;
getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
// valid set
ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
@@ -1872,7 +2060,8 @@ TEST_F(ReplCoordTest, AllowAsManyUnsetMaintenanceModesAsThereHaveBeenSetMaintena
HostAndPort("test2", 1234));
OperationContextNoop txn;
getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
// Can set multiple times
ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
@@ -1902,7 +2091,8 @@ TEST_F(ReplCoordTest, SettingAndUnsettingMaintenanceModeShouldNotAffectRollbackS
HostAndPort("test2", 1234));
OperationContextNoop txn;
getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
// From rollback, entering and exiting maintenance mode doesn't change perceived
// state.
@@ -1940,7 +2130,8 @@ TEST_F(ReplCoordTest, DoNotAllowMaintenanceModeWhilePrimary) {
HostAndPort("test2", 1234));
OperationContextNoop txn;
getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
// Can't modify maintenance mode when PRIMARY
simulateSuccessfulV1Election();
@@ -1972,7 +2163,8 @@ TEST_F(ReplCoordTest, DoNotAllowSettingMaintenanceModeWhileConductingAnElection)
HostAndPort("test2", 1234));
OperationContextNoop txn;
getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
// TODO this election shouldn't have to happen.
simulateSuccessfulV1Election();
@@ -2018,6 +2210,50 @@ TEST_F(ReplCoordTest, DoNotAllowSettingMaintenanceModeWhileConductingAnElection)
}
TEST_F(ReplCoordTest,
+ NodeReturnsACompleteListOfNodesWeKnowHaveTheWriteDurablyInResponseToGetHostsWrittenTo) {
+ HostAndPort myHost("node1:12345");
+ HostAndPort client1Host("node2:12345");
+ HostAndPort client2Host("node3:12345");
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host" << myHost.toString())
+ << BSON("_id" << 1 << "host" << client1Host.toString())
+ << BSON("_id" << 2 << "host" << client2Host.toString()))),
+ HostAndPort("node1", 12345));
+ OperationContextNoop txn;
+
+ OpTimeWithTermZero time1(100, 1);
+ OpTimeWithTermZero time2(100, 2);
+
+ getReplCoord()->setMyLastAppliedOpTime(time2);
+ getReplCoord()->setMyLastDurableOpTime(time2);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 1, time1));
+
+ std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, true);
+ ASSERT_EQUALS(1U, caughtUpHosts.size());
+ ASSERT_EQUALS(myHost, caughtUpHosts[0]);
+
+ // Ensure updating applied does not affect the results for getHostsWritten durably.
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time2));
+ caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, true);
+ ASSERT_EQUALS(1U, caughtUpHosts.size());
+ ASSERT_EQUALS(myHost, caughtUpHosts[0]);
+
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time2));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 2, time2));
+ caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, true);
+ ASSERT_EQUALS(2U, caughtUpHosts.size());
+ if (myHost == caughtUpHosts[0]) {
+ ASSERT_EQUALS(client2Host, caughtUpHosts[1]);
+ } else {
+ ASSERT_EQUALS(client2Host, caughtUpHosts[0]);
+ ASSERT_EQUALS(myHost, caughtUpHosts[1]);
+ }
+}
+
+TEST_F(ReplCoordTest,
NodeReturnsACompleteListOfNodesWeKnowHaveTheWriteInResponseToGetHostsWrittenTo) {
HostAndPort myHost("node1:12345");
HostAndPort client1Host("node2:12345");
@@ -2034,15 +2270,16 @@ TEST_F(ReplCoordTest,
OpTimeWithTermZero time1(100, 1);
OpTimeWithTermZero time2(100, 2);
- getReplCoord()->setMyLastOptime(time2);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1));
+ getReplCoord()->setMyLastAppliedOpTime(time2);
+ getReplCoord()->setMyLastDurableOpTime(time2);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1));
- std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
+ std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, false);
ASSERT_EQUALS(1U, caughtUpHosts.size());
ASSERT_EQUALS(myHost, caughtUpHosts[0]);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time2));
- caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time2));
+ caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, false);
ASSERT_EQUALS(2U, caughtUpHosts.size());
if (myHost == caughtUpHosts[0]) {
ASSERT_EQUALS(client2Host, caughtUpHosts[1]);
@@ -2068,14 +2305,15 @@ TEST_F(ReplCoordTest, NodeDoesNotIncludeItselfWhenRunningGetHostsWrittenToInMast
ASSERT_OK(handshake.initialize(BSON("handshake" << client)));
ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake));
- getReplCoord()->setMyLastOptime(time2);
+ getReplCoord()->setMyLastAppliedOpTime(time2);
+ getReplCoord()->setMyLastDurableOpTime(time2);
ASSERT_OK(getReplCoord()->setLastOptimeForSlave(client, time1.timestamp));
- std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
+ std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, false);
ASSERT_EQUALS(0U, caughtUpHosts.size()); // self doesn't get included in master-slave
ASSERT_OK(getReplCoord()->setLastOptimeForSlave(client, time2.timestamp));
- caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2);
+ caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, false);
ASSERT_EQUALS(1U, caughtUpHosts.size());
ASSERT_EQUALS(clientHost, caughtUpHosts[0]);
}
@@ -2208,13 +2446,61 @@ TEST_F(ReplCoordTest, DoNotProcessSelfWhenUpdatePositionContainsInfoAboutSelf) {
<< "_id" << 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ simulateSuccessfulV1Election();
+
+ OpTime time1({100, 1}, 2);
+ OpTime time2({100, 2}, 2);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 1;
+
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
+ getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+
+ // receive updatePosition containing ourself, should not process the update for self
+ UpdatePositionArgs args;
+ ASSERT_OK(args.initialize(
+ BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 0 << "durableOpTime"
+ << BSON("ts" << time2.getTimestamp() << "t" << 2)
+ << "appliedOpTime"
+ << BSON("ts" << time2.getTimestamp() << "t" << 2))))));
+
+ ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args, 0));
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
+ getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+}
+
+TEST_F(ReplCoordTest, DoNotProcessSelfWhenOldUpdatePositionContainsInfoAboutSelf) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
simulateSuccessfulV1Election();
OpTimeWithTermZero time1(100, 1);
OpTimeWithTermZero time2(100, 2);
OpTimeWithTermZero staleTime(10, 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
@@ -2224,7 +2510,7 @@ TEST_F(ReplCoordTest, DoNotProcessSelfWhenUpdatePositionContainsInfoAboutSelf) {
getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
// receive updatePosition containing ourself, should not process the update for self
- UpdatePositionArgs args;
+ OldUpdatePositionArgs args;
ASSERT_OK(args.initialize(BSON("replSetUpdatePosition"
<< 1 << "optimes"
<< BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 0 << "optime"
@@ -2250,20 +2536,67 @@ TEST_F(ReplCoordTest, DoNotProcessUpdatePositionWhenItsConfigVersionIsIncorrect)
<< "_id" << 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ simulateSuccessfulV1Election();
+
+ OpTime time1({100, 1}, 3);
+ OpTime time2({100, 2}, 3);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 1;
+
+ // receive updatePosition with incorrect config version
+ UpdatePositionArgs args;
+ ASSERT_OK(args.initialize(
+ BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(BSON("cfgver" << 3 << "memberId" << 1 << "durableOpTime"
+ << BSON("ts" << time2.getTimestamp() << "t" << 3)
+ << "appliedOpTime"
+ << BSON("ts" << time2.getTimestamp() << "t" << 3))))));
+
+ long long cfgver;
+ ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig,
+ getReplCoord()->processReplSetUpdatePosition(args, &cfgver));
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
+ getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+}
+
+TEST_F(ReplCoordTest, DoNotProcessOldUpdatePositionWhenItsConfigVersionIsIncorrect) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
simulateSuccessfulV1Election();
OpTimeWithTermZero time1(100, 1);
OpTimeWithTermZero time2(100, 2);
OpTimeWithTermZero staleTime(10, 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
writeConcern.wNumNodes = 1;
// receive updatePosition with incorrect config version
- UpdatePositionArgs args;
+ OldUpdatePositionArgs args;
ASSERT_OK(args.initialize(BSON("replSetUpdatePosition"
<< 1 << "optimes"
<< BSON_ARRAY(BSON("cfgver" << 3 << "memberId" << 1 << "optime"
@@ -2291,20 +2624,65 @@ TEST_F(ReplCoordTest, DoNotProcessUpdatePositionOfMembersWhoseIdsAreNotInTheConf
<< "_id" << 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ simulateSuccessfulV1Election();
+
+ OpTime time1({100, 1}, 2);
+ OpTime time2({100, 2}, 2);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
+
+ WriteConcernOptions writeConcern;
+ writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
+ writeConcern.wNumNodes = 1;
+
+ // receive updatePosition with nonexistent member id
+ UpdatePositionArgs args;
+ ASSERT_OK(args.initialize(
+ BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 9 << "durableOpTime"
+ << BSON("ts" << time2.getTimestamp() << "t" << 2)
+ << "appliedOpTime"
+ << BSON("ts" << time2.getTimestamp() << "t" << 2))))));
+
+ ASSERT_EQUALS(ErrorCodes::NodeNotFound, getReplCoord()->processReplSetUpdatePosition(args, 0));
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
+ getReplCoord()->awaitReplication(&txn, time2, writeConcern).status);
+}
+
+TEST_F(ReplCoordTest, DoNotProcessOldUpdatePositionOfMembersWhoseIdsAreNotInTheConfig) {
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("host"
+ << "node1:12345"
+ << "_id" << 0)
+ << BSON("host"
+ << "node2:12345"
+ << "_id" << 1) << BSON("host"
+ << "node3:12345"
+ << "_id" << 2))),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
simulateSuccessfulV1Election();
OpTimeWithTermZero time1(100, 1);
OpTimeWithTermZero time2(100, 2);
OpTimeWithTermZero staleTime(10, 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
writeConcern.wNumNodes = 1;
// receive updatePosition with nonexistent member id
- UpdatePositionArgs args;
+ OldUpdatePositionArgs args;
ASSERT_OK(args.initialize(BSON("replSetUpdatePosition"
<< 1 << "optimes"
<< BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 9 << "optime"
@@ -2331,21 +2709,24 @@ TEST_F(ReplCoordTest,
<< "_id" << 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
simulateSuccessfulV1Election();
OpTimeWithTermZero time1(100, 1);
OpTimeWithTermZero time2(100, 2);
OpTimeWithTermZero staleTime(10, 0);
- getReplCoord()->setMyLastOptime(time1);
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ getReplCoord()->setMyLastDurableOpTime(time1);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
writeConcern.wNumNodes = 1;
// receive a good update position
- getReplCoord()->setMyLastOptime(time2);
- UpdatePositionArgs args;
+ getReplCoord()->setMyLastAppliedOpTime(time2);
+ getReplCoord()->setMyLastDurableOpTime(time2);
+ OldUpdatePositionArgs args;
ASSERT_OK(args.initialize(
BSON("replSetUpdatePosition"
<< 1 << "optimes"
@@ -2393,7 +2774,8 @@ TEST_F(ReplCoordTest, AwaitReplicationShouldResolveAsNormalDuringAReconfig) {
<< "_id" << 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 2));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 2));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 2));
simulateSuccessfulV1Election();
OpTimeWithTermZero time(100, 2);
@@ -2408,6 +2790,12 @@ TEST_F(ReplCoordTest, AwaitReplicationShouldResolveAsNormalDuringAReconfig) {
awaiter.setWriteConcern(writeConcern);
awaiter.start(&txn);
+ ReplicationAwaiter awaiterJournaled(getReplCoord(), &txn);
+ writeConcern.wMode = WriteConcernOptions::kMajority;
+ awaiterJournaled.setOpTime(time);
+ awaiterJournaled.setWriteConcern(writeConcern);
+ awaiterJournaled.start(&txn);
+
// reconfig
Status status(ErrorCodes::InternalError, "Not Set");
stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status));
@@ -2417,12 +2805,22 @@ TEST_F(ReplCoordTest, AwaitReplicationShouldResolveAsNormalDuringAReconfig) {
ASSERT_OK(status);
// satisfy write concern
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(3, 0, time));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(3, 1, time));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(3, 2, time));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(3, 0, time));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(3, 1, time));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(3, 2, time));
ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
ASSERT_OK(statusAndDur.status);
awaiter.reset();
+
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(3, 0, time));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(3, 0, time));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(3, 1, time));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(3, 1, time));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(3, 2, time));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(3, 2, time));
+ ReplicationCoordinator::StatusAndDuration statusAndDurJournaled = awaiterJournaled.getResult();
+ ASSERT_OK(statusAndDurJournaled.status);
+ awaiterJournaled.reset();
}
void doReplSetReconfigToFewer(ReplicationCoordinatorImpl* replCoord, Status* status) {
@@ -2457,7 +2855,8 @@ TEST_F(
<< "_id" << 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 2));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 2));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 2));
simulateSuccessfulV1Election();
OpTimeWithTermZero time(100, 2);
@@ -2472,6 +2871,12 @@ TEST_F(
awaiter.setWriteConcern(writeConcern);
awaiter.start(&txn);
+ ReplicationAwaiter awaiterJournaled(getReplCoord(), &txn);
+ writeConcern.wMode = WriteConcernOptions::kMajority;
+ awaiterJournaled.setOpTime(time);
+ awaiterJournaled.setWriteConcern(writeConcern);
+ awaiterJournaled.start(&txn);
+
// reconfig to fewer nodes
Status status(ErrorCodes::InternalError, "Not Set");
stdx::thread reconfigThread(stdx::bind(doReplSetReconfigToFewer, getReplCoord(), &status));
@@ -2485,6 +2890,9 @@ TEST_F(
ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult();
ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern, statusAndDur.status);
awaiter.reset();
+ ReplicationCoordinator::StatusAndDuration statusAndDurJournaled = awaiterJournaled.getResult();
+ ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern, statusAndDurJournaled.status);
+ awaiterJournaled.reset();
}
TEST_F(ReplCoordTest,
@@ -2508,14 +2916,16 @@ TEST_F(ReplCoordTest,
<< "_id" << 4))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 1));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 1));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 1));
simulateSuccessfulV1Election();
OpTime time(Timestamp(100, 2), 1);
- getReplCoord()->setMyLastOptime(time);
+ getReplCoord()->setMyLastAppliedOpTime(time);
+ getReplCoord()->setMyLastDurableOpTime(time);
getReplCoord()->onSnapshotCreate(time, SnapshotName(1));
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time));
// majority nodes waiting for time
@@ -2550,7 +2960,7 @@ TEST_F(ReplCoordTest,
}
TEST_F(ReplCoordTest,
- NodeReturnsFromMajorityWriteConcernOnlyOnceAMajorityOfVotingNodesHaveReceivedTheWrite) {
+ NodeReturnsFromMajorityWriteConcernOnlyOnceTheWriteAppearsInACommittedSnapShot) {
// Test that we can satisfy majority write concern can only be
// satisfied by voting data-bearing members.
OperationContextNoop txn;
@@ -2574,26 +2984,31 @@ TEST_F(ReplCoordTest,
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
OpTime time(Timestamp(100, 0), 1);
- getReplCoord()->setMyLastOptime(time);
+ getReplCoord()->setMyLastAppliedOpTime(time);
+ getReplCoord()->setMyLastDurableOpTime(time);
simulateSuccessfulV1Election();
WriteConcernOptions majorityWriteConcern;
majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
majorityWriteConcern.wMode = WriteConcernOptions::kMajority;
+ majorityWriteConcern.syncMode = WriteConcernOptions::SyncMode::JOURNAL;
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 1, time));
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
// this member does not vote and as a result should not count towards write concern
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, time));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 3, time));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 3, time));
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 2, time));
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
@@ -2626,30 +3041,38 @@ TEST_F(ReplCoordTest,
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
OpTime zero(Timestamp(0, 0), 0);
OpTime time(Timestamp(100, 0), 1);
- getReplCoord()->setMyLastOptime(time);
+ getReplCoord()->setMyLastAppliedOpTime(time);
+ getReplCoord()->setMyLastDurableOpTime(time);
simulateSuccessfulV1Election();
ASSERT_EQUALS(zero, getReplCoord()->getLastCommittedOpTime());
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 1, time));
ASSERT_EQUALS(zero, getReplCoord()->getLastCommittedOpTime());
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, time));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 3, time));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 3, time));
ASSERT_EQUALS(zero, getReplCoord()->getLastCommittedOpTime());
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 2, time));
ASSERT_EQUALS(time, getReplCoord()->getLastCommittedOpTime());
// Set a new, later OpTime.
OpTime newTime(Timestamp(100, 1), 1);
- getReplCoord()->setMyLastOptime(newTime);
+ getReplCoord()->setMyLastAppliedOpTime(newTime);
+ getReplCoord()->setMyLastDurableOpTime(newTime);
ASSERT_EQUALS(time, getReplCoord()->getLastCommittedOpTime());
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, newTime));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 3, newTime));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 3, newTime));
ASSERT_EQUALS(time, getReplCoord()->getLastCommittedOpTime());
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, newTime));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, newTime));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 2, newTime));
// Reached majority of voting nodes with newTime.
ASSERT_EQUALS(time, getReplCoord()->getLastCommittedOpTime());
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, newTime));
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, newTime));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 1, newTime));
ASSERT_EQUALS(newTime, getReplCoord()->getLastCommittedOpTime());
}
@@ -2662,7 +3085,8 @@ TEST_F(ReplCoordTest, NodeReturnsShutdownInProgressWhenWaitingUntilAnOpTimeDurin
<< "_id" << 0))),
HostAndPort("node1", 12345));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(10, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(10, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(10, 0));
shutdown();
@@ -2682,7 +3106,8 @@ TEST_F(ReplCoordTest, NodeReturnsInterruptedWhenWaitingUntilAnOpTimeIsInterrupte
<< "_id" << 0))),
HostAndPort("node1", 12345));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(10, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(10, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(10, 0));
txn.setCheckForInterruptStatus(Status(ErrorCodes::Interrupted, "test"));
@@ -2717,7 +3142,8 @@ TEST_F(ReplCoordTest, NodeReturnsOkImmediatelyWhenWaitingUntilOpTimePassesAnOpTi
<< "_id" << 0))),
HostAndPort("node1", 12345));
- getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
auto result = getReplCoord()->waitUntilOpTime(
&txn, ReadConcernArgs(OpTimeWithTermZero(50, 0), ReadConcernLevel::kLocalReadConcern));
@@ -2736,7 +3162,8 @@ TEST_F(ReplCoordTest, NodeReturnsOkImmediatelyWhenWaitingUntilOpTimePassesAnOpTi
OpTimeWithTermZero time(100, 0);
- getReplCoord()->setMyLastOptime(time);
+ getReplCoord()->setMyLastAppliedOpTime(time);
+ getReplCoord()->setMyLastDurableOpTime(time);
auto result = getReplCoord()->waitUntilOpTime(
&txn, ReadConcernArgs(time, ReadConcernLevel::kLocalReadConcern));
@@ -2779,7 +3206,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedWhileShutdown) {
HostAndPort("node1", 12345));
runSingleNodeElection(getReplCoord());
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(10, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(10, 0), 0));
shutdown();
@@ -2800,7 +3228,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedInterrupted) {
HostAndPort("node1", 12345));
runSingleNodeElection(getReplCoord());
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(10, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(10, 0), 0));
txn.setCheckForInterruptStatus(Status(ErrorCodes::Interrupted, "test"));
@@ -2821,7 +3250,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedGreaterOpTime) {
HostAndPort("node1", 12345));
runSingleNodeElection(getReplCoord());
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 1));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 1));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 1));
getReplCoord()->onSnapshotCreate(OpTime(Timestamp(100, 0), 1), SnapshotName(1));
auto result = getReplCoord()->waitUntilOpTime(
&txn, ReadConcernArgs(OpTime(Timestamp(50, 0), 1), ReadConcernLevel::kMajorityReadConcern));
@@ -2840,7 +3270,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedEqualOpTime) {
HostAndPort("node1", 12345));
runSingleNodeElection(getReplCoord());
OpTime time(Timestamp(100, 0), 1);
- getReplCoord()->setMyLastOptime(time);
+ getReplCoord()->setMyLastAppliedOpTime(time);
+ getReplCoord()->setMyLastDurableOpTime(time);
getReplCoord()->onSnapshotCreate(time, SnapshotName(1));
auto result = getReplCoord()->waitUntilOpTime(
&txn, ReadConcernArgs(time, ReadConcernLevel::kMajorityReadConcern));
@@ -2858,13 +3289,15 @@ TEST_F(ReplCoordTest, ReadAfterCommittedDeferredGreaterOpTime) {
<< "_id" << 0))),
HostAndPort("node1", 12345));
runSingleNodeElection(getReplCoord());
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(0, 0), 1));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(0, 0), 1));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(0, 0), 1));
OpTime committedOpTime(Timestamp(200, 0), 1);
auto pseudoLogOp =
stdx::async(stdx::launch::async,
[this, &committedOpTime]() {
// Not guaranteed to be scheduled after waitUntil blocks...
- getReplCoord()->setMyLastOptime(committedOpTime);
+ getReplCoord()->setMyLastAppliedOpTime(committedOpTime);
+ getReplCoord()->setMyLastDurableOpTime(committedOpTime);
getReplCoord()->onSnapshotCreate(committedOpTime, SnapshotName(1));
});
@@ -2886,7 +3319,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedDeferredEqualOpTime) {
<< "_id" << 0))),
HostAndPort("node1", 12345));
runSingleNodeElection(getReplCoord());
- getReplCoord()->setMyLastOptime(OpTime(Timestamp(0, 0), 1));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(0, 0), 1));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(0, 0), 1));
OpTime opTimeToWait(Timestamp(100, 0), 1);
@@ -2894,7 +3328,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedDeferredEqualOpTime) {
stdx::async(stdx::launch::async,
[this, &opTimeToWait]() {
// Not guaranteed to be scheduled after waitUntil blocks...
- getReplCoord()->setMyLastOptime(opTimeToWait);
+ getReplCoord()->setMyLastAppliedOpTime(opTimeToWait);
+ getReplCoord()->setMyLastDurableOpTime(opTimeToWait);
getReplCoord()->onSnapshotCreate(opTimeToWait, SnapshotName(1));
});
@@ -3373,9 +3808,11 @@ TEST_F(ReplCoordTest, AdvanceCommittedSnapshotToMostRecentSnapshotPriorToOpTimeW
getReplCoord()->onSnapshotCreate(time5, SnapshotName(3));
// ensure current snapshot follows price is right rules (closest but not greater than)
- getReplCoord()->setMyLastOptime(time3);
+ getReplCoord()->setMyLastAppliedOpTime(time3);
+ getReplCoord()->setMyLastDurableOpTime(time3);
ASSERT_EQUALS(time2, getReplCoord()->getCurrentCommittedSnapshotOpTime());
- getReplCoord()->setMyLastOptime(time4);
+ getReplCoord()->setMyLastAppliedOpTime(time4);
+ getReplCoord()->setMyLastDurableOpTime(time4);
ASSERT_EQUALS(time2, getReplCoord()->getCurrentCommittedSnapshotOpTime());
}
@@ -3403,7 +3840,8 @@ TEST_F(ReplCoordTest, DoNotAdvanceCommittedSnapshotWhenAnOpTimeIsNewerThanOurLat
getReplCoord()->onSnapshotCreate(time5, SnapshotName(3));
// ensure current snapshot will not advance beyond existing snapshots
- getReplCoord()->setMyLastOptime(time6);
+ getReplCoord()->setMyLastAppliedOpTime(time6);
+ getReplCoord()->setMyLastDurableOpTime(time6);
ASSERT_EQUALS(time5, getReplCoord()->getCurrentCommittedSnapshotOpTime());
}
@@ -3431,7 +3869,8 @@ TEST_F(ReplCoordTest,
getReplCoord()->onSnapshotCreate(time2, SnapshotName(2));
getReplCoord()->onSnapshotCreate(time5, SnapshotName(3));
- getReplCoord()->setMyLastOptime(time6);
+ getReplCoord()->setMyLastAppliedOpTime(time6);
+ getReplCoord()->setMyLastDurableOpTime(time6);
ASSERT_EQUALS(time5, getReplCoord()->getCurrentCommittedSnapshotOpTime());
// ensure current snapshot updates on new snapshot if we are that far
@@ -3467,7 +3906,34 @@ TEST_F(ReplCoordTest, ZeroCommittedSnapshotWhenAllSnapshotsAreDropped) {
ASSERT_EQUALS(OpTime(), getReplCoord()->getCurrentCommittedSnapshotOpTime());
}
-TEST_F(ReplCoordTest, NodeChangesMyLastOpTimeWhenAndOnlyWhenSetMyLastOpTimeReceivesANewerOpTime) {
+TEST_F(ReplCoordTest, DoNotAdvanceCommittedSnapshotWhenAppliedOpTimeChanges) {
+ init("mySet");
+
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234"))),
+ HostAndPort("test1", 1234));
+ OperationContextReplMock txn;
+ runSingleNodeElection(getReplCoord());
+
+ OpTime time1(Timestamp(100, 1), 1);
+ OpTime time2(Timestamp(100, 2), 1);
+
+ getReplCoord()->onSnapshotCreate(time1, SnapshotName(1));
+
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ ASSERT_EQUALS(OpTime(), getReplCoord()->getCurrentCommittedSnapshotOpTime());
+ getReplCoord()->setMyLastAppliedOpTime(time2);
+ ASSERT_EQUALS(OpTime(), getReplCoord()->getCurrentCommittedSnapshotOpTime());
+ getReplCoord()->setMyLastAppliedOpTime(time2);
+ getReplCoord()->setMyLastDurableOpTime(time2);
+ ASSERT_EQUALS(time1, getReplCoord()->getCurrentCommittedSnapshotOpTime());
+}
+
+TEST_F(ReplCoordTest,
+ NodeChangesMyLastOpTimeWhenAndOnlyWhensetMyLastDurableOpTimeReceivesANewerOpTime) {
assertStartSuccess(BSON("_id"
<< "mySet"
<< "version" << 2 << "members" << BSON_ARRAY(BSON("host"
@@ -3480,12 +3946,13 @@ TEST_F(ReplCoordTest, NodeChangesMyLastOpTimeWhenAndOnlyWhenSetMyLastOpTimeRecei
OpTime time2(Timestamp(100, 2), 1);
OpTime time3(Timestamp(100, 3), 1);
- getReplCoord()->setMyLastOptime(time1);
- ASSERT_EQUALS(time1, getReplCoord()->getMyLastOptime());
- getReplCoord()->setMyLastOptimeForward(time3);
- ASSERT_EQUALS(time3, getReplCoord()->getMyLastOptime());
- getReplCoord()->setMyLastOptimeForward(time2);
- ASSERT_EQUALS(time3, getReplCoord()->getMyLastOptime());
+ getReplCoord()->setMyLastAppliedOpTime(time1);
+ ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime());
+ getReplCoord()->setMyLastAppliedOpTimeForward(time3);
+ ASSERT_EQUALS(time3, getReplCoord()->getMyLastAppliedOpTime());
+ getReplCoord()->setMyLastAppliedOpTimeForward(time2);
+ getReplCoord()->setMyLastDurableOpTimeForward(time2);
+ ASSERT_EQUALS(time3, getReplCoord()->getMyLastAppliedOpTime());
}
TEST_F(ReplCoordTest, OnlyForwardSyncProgressForOtherNodesWhenTheNodesAreBelievedToBeUp) {
@@ -3502,8 +3969,10 @@ TEST_F(ReplCoordTest, OnlyForwardSyncProgressForOtherNodesWhenTheNodesAreBelieve
<< BSON("electionTimeoutMillis" << 2000 << "heartbeatIntervalMillis" << 40000)),
HostAndPort("test1", 1234));
OpTime optime(Timestamp(100, 2), 0);
- getReplCoord()->setMyLastOptime(optime);
- ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime));
+ getReplCoord()->setMyLastAppliedOpTime(optime);
+ getReplCoord()->setMyLastDurableOpTime(optime);
+ ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime));
+ ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(1, 1, optime));
// Check that we have two entries in our UpdatePosition (us and node 1).
BSONObjBuilder cmdBuilder;
@@ -3514,11 +3983,29 @@ TEST_F(ReplCoordTest, OnlyForwardSyncProgressForOtherNodesWhenTheNodesAreBelieve
BSONObj entry = entryElement.Obj();
long long memberId = entry["memberId"].Number();
memberIds.insert(memberId);
+ OpTime appliedOpTime;
+ OpTime durableOpTime;
+ bsonExtractOpTimeField(entry, "appliedOpTime", &appliedOpTime);
+ ASSERT_EQUALS(optime, appliedOpTime);
+ bsonExtractOpTimeField(entry, "durableOpTime", &durableOpTime);
+ ASSERT_EQUALS(optime, durableOpTime);
+ }
+ ASSERT_EQUALS(2U, memberIds.size());
+
+ // Check that this true for old style (pre-3.2.2) UpdatePosition as well.
+ BSONObjBuilder cmdBuilder2;
+ getReplCoord()->prepareOldReplSetUpdatePositionCommand(&cmdBuilder2);
+ BSONObj cmd2 = cmdBuilder2.done();
+ std::set<long long> memberIds2;
+ BSONForEach(entryElement, cmd2["optimes"].Obj()) {
+ BSONObj entry = entryElement.Obj();
+ long long memberId = entry["memberId"].Number();
+ memberIds2.insert(memberId);
OpTime entryOpTime;
bsonExtractOpTimeField(entry, "optime", &entryOpTime);
ASSERT_EQUALS(optime, entryOpTime);
}
- ASSERT_EQUALS(2U, memberIds.size());
+ ASSERT_EQUALS(2U, memberIds2.size());
// Advance the clock far enough to cause the other node to be marked as DOWN.
const Date_t startDate = getNet()->now();
@@ -3534,19 +4021,37 @@ TEST_F(ReplCoordTest, OnlyForwardSyncProgressForOtherNodesWhenTheNodesAreBelieve
// Check there is one entry in our UpdatePosition, since we shouldn't forward for a
// DOWN node.
- BSONObjBuilder cmdBuilder2;
- getReplCoord()->prepareReplSetUpdatePositionCommand(&cmdBuilder2);
- BSONObj cmd2 = cmdBuilder2.done();
- std::set<long long> memberIds2;
- BSONForEach(entryElement, cmd2["optimes"].Obj()) {
+ BSONObjBuilder cmdBuilder3;
+ getReplCoord()->prepareReplSetUpdatePositionCommand(&cmdBuilder3);
+ BSONObj cmd3 = cmdBuilder3.done();
+ std::set<long long> memberIds3;
+ BSONForEach(entryElement, cmd3["optimes"].Obj()) {
BSONObj entry = entryElement.Obj();
long long memberId = entry["memberId"].Number();
- memberIds2.insert(memberId);
+ memberIds3.insert(memberId);
+ OpTime appliedOpTime;
+ OpTime durableOpTime;
+ bsonExtractOpTimeField(entry, "appliedOpTime", &appliedOpTime);
+ ASSERT_EQUALS(optime, appliedOpTime);
+ bsonExtractOpTimeField(entry, "durableOpTime", &durableOpTime);
+ ASSERT_EQUALS(optime, durableOpTime);
+ }
+ ASSERT_EQUALS(1U, memberIds3.size());
+
+ // Check that this true for old style (pre-3.2.2) UpdatePosition as well.
+ BSONObjBuilder cmdBuilder4;
+ getReplCoord()->prepareOldReplSetUpdatePositionCommand(&cmdBuilder4);
+ BSONObj cmd4 = cmdBuilder4.done();
+ std::set<long long> memberIds4;
+ BSONForEach(entryElement, cmd4["optimes"].Obj()) {
+ BSONObj entry = entryElement.Obj();
+ long long memberId = entry["memberId"].Number();
+ memberIds4.insert(memberId);
OpTime entryOpTime;
bsonExtractOpTimeField(entry, "optime", &entryOpTime);
ASSERT_EQUALS(optime, entryOpTime);
}
- ASSERT_EQUALS(1U, memberIds2.size());
+ ASSERT_EQUALS(1U, memberIds4.size());
}
TEST_F(ReplCoordTest, StepDownWhenHandleLivenessTimeoutMarksAMajorityOfVotingNodesDown) {
@@ -3571,10 +4076,11 @@ TEST_F(ReplCoordTest, StepDownWhenHandleLivenessTimeoutMarksAMajorityOfVotingNod
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
OpTime startingOpTime = OpTime(Timestamp(100, 1), 0);
- getReplCoord()->setMyLastOptime(startingOpTime);
+ getReplCoord()->setMyLastAppliedOpTime(startingOpTime);
+ getReplCoord()->setMyLastDurableOpTime(startingOpTime);
// Receive notification that every node is up.
- UpdatePositionArgs args;
+ OldUpdatePositionArgs args;
ASSERT_OK(args.initialize(
BSON("replSetUpdatePosition"
<< 1 << "optimes" << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 1 << "optime"
@@ -3591,7 +4097,7 @@ TEST_F(ReplCoordTest, StepDownWhenHandleLivenessTimeoutMarksAMajorityOfVotingNod
simulateSuccessfulV1Election();
// Keep two nodes alive.
- UpdatePositionArgs args1;
+ OldUpdatePositionArgs args1;
ASSERT_OK(args1.initialize(
BSON("replSetUpdatePosition"
<< 1 << "optimes" << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 1 << "optime"
@@ -3608,7 +4114,7 @@ TEST_F(ReplCoordTest, StepDownWhenHandleLivenessTimeoutMarksAMajorityOfVotingNod
ASSERT_EQUALS(MemberState::RS_PRIMARY, getReplCoord()->getMemberState().s);
// Keep one node alive via two methods (UpdatePosition and requestHeartbeat).
- UpdatePositionArgs args2;
+ OldUpdatePositionArgs args2;
ASSERT_OK(args2.initialize(
BSON("replSetUpdatePosition"
<< 1 << "optimes" << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 1 << "optime"
@@ -3654,7 +4160,8 @@ TEST_F(ReplCoordTest, WaitForMemberState) {
HostAndPort("test1", 1234));
auto replCoord = getReplCoord();
auto initialTerm = replCoord->getTerm();
- replCoord->setMyLastOptime(OpTime(Timestamp(1, 0), 0));
+ replCoord->setMyLastAppliedOpTime(OpTime(Timestamp(1, 0), 0));
+ replCoord->setMyLastDurableOpTime(OpTime(Timestamp(1, 0), 0));
ASSERT_TRUE(replCoord->setFollowerMode(MemberState::RS_SECONDARY));
// Successful dry run election increases term.
@@ -3688,7 +4195,8 @@ TEST_F(ReplCoordTest, WaitForDrainFinish) {
HostAndPort("test1", 1234));
auto replCoord = getReplCoord();
auto initialTerm = replCoord->getTerm();
- replCoord->setMyLastOptime(OpTime(Timestamp(1, 0), 0));
+ replCoord->setMyLastAppliedOpTime(OpTime(Timestamp(1, 0), 0));
+ replCoord->setMyLastDurableOpTime(OpTime(Timestamp(1, 0), 0));
ASSERT_TRUE(replCoord->setFollowerMode(MemberState::RS_SECONDARY));
// Successful dry run election increases term.
@@ -3713,8 +4221,168 @@ TEST_F(ReplCoordTest, WaitForDrainFinish) {
ASSERT_OK(replCoord->waitForDrainFinish(Milliseconds(0)));
}
-// TODO(schwerin): Unit test election id updating
+TEST_F(ReplCoordTest, UpdatePositionArgsReturnsNoSuchKeyWhenParsingOldUpdatePositionArgs) {
+ OldUpdatePositionArgs args;
+ UpdatePositionArgs args2;
+ OpTime opTime = OpTime(Timestamp(100, 1), 0);
+ ASSERT_EQUALS(
+ ErrorCodes::NoSuchKey,
+ args2.initialize(BSON(
+ "replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(
+ BSON("cfgver" << 2 << "memberId" << 1 << "optime" << opTime.getTimestamp())
+ << BSON("cfgver" << 2 << "memberId" << 2 << "optime" << opTime.getTimestamp())
+ << BSON("cfgver" << 2 << "memberId" << 3 << "optime" << opTime.getTimestamp())
+ << BSON("cfgver" << 2 << "memberId" << 4 << "optime"
+ << opTime.getTimestamp())))));
+
+ ASSERT_OK(args.initialize(
+ BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(
+ BSON("cfgver" << 2 << "memberId" << 1 << "optime" << opTime.getTimestamp())
+ << BSON("cfgver" << 2 << "memberId" << 2 << "optime" << opTime.getTimestamp())
+ << BSON("cfgver" << 2 << "memberId" << 3 << "optime" << opTime.getTimestamp())
+ << BSON("cfgver" << 2 << "memberId" << 4 << "optime"
+ << opTime.getTimestamp())))));
+}
+
+
+TEST_F(ReplCoordTest, OldUpdatePositionArgsReturnsBadValueWhenParsingUpdatePositionArgs) {
+ OldUpdatePositionArgs args;
+ UpdatePositionArgs args2;
+ OpTime opTime = OpTime(Timestamp(100, 1), 0);
+ ASSERT_EQUALS(
+ ErrorCodes::BadValue,
+ args.initialize(BSON(
+ "replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 1 << "durableOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3)
+ << "appliedOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3))
+ << BSON("cfgver" << 2 << "memberId" << 2 << "durableOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3)
+ << "appliedOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3))
+ << BSON("cfgver" << 2 << "memberId" << 3 << "durableOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3)
+ << "appliedOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3))
+ << BSON("cfgver" << 2 << "memberId" << 4 << "durableOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3)
+ << "appliedOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3))))));
+ ASSERT_OK(args2.initialize(
+ BSON("replSetUpdatePosition"
+ << 1 << "optimes"
+ << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 1 << "durableOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3)
+ << "appliedOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3))
+ << BSON("cfgver" << 2 << "memberId" << 2 << "durableOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3)
+ << "appliedOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3))
+ << BSON("cfgver" << 2 << "memberId" << 3 << "durableOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3)
+ << "appliedOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3))
+ << BSON("cfgver" << 2 << "memberId" << 4 << "durableOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3)
+ << "appliedOpTime"
+ << BSON("ts" << opTime.getTimestamp() << "t" << 3))))));
+}
+
+TEST_F(
+ ReplCoordTest,
+ PopulateUnsetWriteConcernOptionsSyncModeReturnsInputWithSyncModeNoneIfUnsetAndWriteConcernMajorityJournalDefaultIsFalse) {
+ init("mySet");
+
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234"))
+ << "writeConcernMajorityJournalDefault" << false),
+ HostAndPort("test1", 1234));
+
+ WriteConcernOptions wc;
+ wc.wMode = WriteConcernOptions::kMajority;
+ wc.syncMode = WriteConcernOptions::SyncMode::UNSET;
+ ASSERT(WriteConcernOptions::SyncMode::NONE ==
+ getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode);
+}
+
+TEST_F(
+ ReplCoordTest,
+ PopulateUnsetWriteConcernOptionsSyncModeReturnsInputWithSyncModeJournalIfUnsetAndWriteConcernMajorityJournalDefaultIsTrue) {
+ init("mySet");
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234"))
+ << "writeConcernMajorityJournalDefault" << true),
+ HostAndPort("test1", 1234));
+
+ WriteConcernOptions wc;
+ wc.wMode = WriteConcernOptions::kMajority;
+ wc.syncMode = WriteConcernOptions::SyncMode::UNSET;
+ ASSERT(WriteConcernOptions::SyncMode::JOURNAL ==
+ getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode);
+}
+
+TEST_F(ReplCoordTest, PopulateUnsetWriteConcernOptionsSyncModeReturnsInputIfSyncModeIsNotUnset) {
+ init("mySet");
+
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234"))
+ << "writeConcernMajorityJournalDefault" << false),
+ HostAndPort("test1", 1234));
+
+ WriteConcernOptions wc;
+ wc.wMode = WriteConcernOptions::kMajority;
+ ASSERT(WriteConcernOptions::SyncMode::NONE ==
+ getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode);
+
+ wc.syncMode = WriteConcernOptions::SyncMode::JOURNAL;
+ ASSERT(WriteConcernOptions::SyncMode::JOURNAL ==
+ getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode);
+
+ wc.syncMode = WriteConcernOptions::SyncMode::FSYNC;
+ ASSERT(WriteConcernOptions::SyncMode::FSYNC ==
+ getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode);
+}
+
+TEST_F(ReplCoordTest, PopulateUnsetWriteConcernOptionsSyncModeReturnsInputIfWModeIsNotMajority) {
+ init("mySet");
+
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "test1:1234"))
+ << "writeConcernMajorityJournalDefault" << false),
+ HostAndPort("test1", 1234));
+
+ WriteConcernOptions wc;
+ wc.syncMode = WriteConcernOptions::SyncMode::UNSET;
+ wc.wMode = "not the value of kMajority";
+ ASSERT(WriteConcernOptions::SyncMode::NONE ==
+ getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode);
+
+ wc.wMode = "like literally anythingelse";
+ ASSERT(WriteConcernOptions::SyncMode::NONE ==
+ getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode);
+}
+
+// TODO(schwerin): Unit test election id updating
} // namespace
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp
index acacb6c9584..2ceb947cb8e 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_mock.cpp
@@ -148,22 +148,36 @@ void ReplicationCoordinatorMock::setMyHeartbeatMessage(const std::string& msg) {
// TODO
}
-void ReplicationCoordinatorMock::setMyLastOptime(const OpTime& opTime) {
- _myLastOpTime = opTime;
+void ReplicationCoordinatorMock::setMyLastAppliedOpTime(const OpTime& opTime) {
+ _myLastAppliedOpTime = opTime;
}
-void ReplicationCoordinatorMock::setMyLastOptimeForward(const OpTime& opTime) {
- if (opTime > _myLastOpTime) {
- _myLastOpTime = opTime;
+void ReplicationCoordinatorMock::setMyLastDurableOpTime(const OpTime& opTime) {
+ _myLastDurableOpTime = opTime;
+}
+
+void ReplicationCoordinatorMock::setMyLastAppliedOpTimeForward(const OpTime& opTime) {
+ if (opTime > _myLastAppliedOpTime) {
+ _myLastAppliedOpTime = opTime;
+ }
+}
+
+void ReplicationCoordinatorMock::setMyLastDurableOpTimeForward(const OpTime& opTime) {
+ if (opTime > _myLastDurableOpTime) {
+ _myLastDurableOpTime = opTime;
}
}
-void ReplicationCoordinatorMock::resetMyLastOptime() {
- _myLastOpTime = OpTime();
+void ReplicationCoordinatorMock::resetMyLastOpTimes() {
+ _myLastDurableOpTime = OpTime();
}
-OpTime ReplicationCoordinatorMock::getMyLastOptime() const {
- return _myLastOpTime;
+OpTime ReplicationCoordinatorMock::getMyLastAppliedOpTime() const {
+ return _myLastAppliedOpTime;
+}
+
+OpTime ReplicationCoordinatorMock::getMyLastDurableOpTime() const {
+ return _myLastDurableOpTime;
}
ReadConcernResponse ReplicationCoordinatorMock::waitUntilOpTime(OperationContext* txn,
@@ -203,6 +217,12 @@ Status ReplicationCoordinatorMock::waitForDrainFinish(Milliseconds timeout) {
void ReplicationCoordinatorMock::signalUpstreamUpdater() {}
+bool ReplicationCoordinatorMock::prepareOldReplSetUpdatePositionCommand(
+ BSONObjBuilder* cmdBuilder) {
+ cmdBuilder->append("replSetUpdatePosition", 1);
+ return true;
+}
+
bool ReplicationCoordinatorMock::prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) {
cmdBuilder->append("replSetUpdatePosition", 1);
return true;
@@ -284,6 +304,12 @@ Status ReplicationCoordinatorMock::processReplSetElect(const ReplSetElectArgs& a
return Status::OK();
}
+Status ReplicationCoordinatorMock::processReplSetUpdatePosition(
+ const OldUpdatePositionArgs& updates, long long* configVersion) {
+ // TODO
+ return Status::OK();
+}
+
Status ReplicationCoordinatorMock::processReplSetUpdatePosition(const UpdatePositionArgs& updates,
long long* configVersion) {
// TODO
@@ -300,7 +326,8 @@ bool ReplicationCoordinatorMock::buildsIndexes() {
return true;
}
-std::vector<HostAndPort> ReplicationCoordinatorMock::getHostsWrittenTo(const OpTime& op) {
+std::vector<HostAndPort> ReplicationCoordinatorMock::getHostsWrittenTo(const OpTime& op,
+ bool durablyWritten) {
return std::vector<HostAndPort>();
}
@@ -328,7 +355,7 @@ HostAndPort ReplicationCoordinatorMock::chooseNewSyncSource(const Timestamp& las
void ReplicationCoordinatorMock::blacklistSyncSource(const HostAndPort& host, Date_t until) {}
-void ReplicationCoordinatorMock::resetLastOpTimeFromOplog(OperationContext* txn) {
+void ReplicationCoordinatorMock::resetLastOpTimesFromOplog(OperationContext* txn) {
invariant(false);
}
@@ -367,6 +394,10 @@ bool ReplicationCoordinatorMock::isV1ElectionProtocol() {
return true;
}
+bool ReplicationCoordinatorMock::getWriteConcernMajorityShouldJournal() {
+ return true;
+}
+
void ReplicationCoordinatorMock::summarizeAsHtml(ReplSetHtmlSummary* output) {}
long long ReplicationCoordinatorMock::getTerm() {
@@ -398,5 +429,13 @@ size_t ReplicationCoordinatorMock::getNumUncommittedSnapshots() {
return 0;
}
+WriteConcernOptions ReplicationCoordinatorMock::populateUnsetWriteConcernOptionsSyncMode(
+ WriteConcernOptions wc) {
+ if (wc.syncMode == WriteConcernOptions::SyncMode::UNSET) {
+ wc.syncMode = WriteConcernOptions::SyncMode::JOURNAL;
+ }
+ return wc;
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index 60f1b1b23c4..4a21d9ad705 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -103,15 +103,18 @@ public:
virtual Status setLastOptimeForSlave(const OID& rid, const Timestamp& ts);
- virtual void setMyLastOptime(const OpTime& opTime);
+ virtual void setMyLastAppliedOpTime(const OpTime& opTime);
+ virtual void setMyLastDurableOpTime(const OpTime& opTime);
- virtual void setMyLastOptimeForward(const OpTime& opTime);
+ virtual void setMyLastAppliedOpTimeForward(const OpTime& opTime);
+ virtual void setMyLastDurableOpTimeForward(const OpTime& opTime);
- virtual void resetMyLastOptime();
+ virtual void resetMyLastOpTimes();
virtual void setMyHeartbeatMessage(const std::string& msg);
- virtual OpTime getMyLastOptime() const;
+ virtual OpTime getMyLastAppliedOpTime() const;
+ virtual OpTime getMyLastDurableOpTime() const;
virtual ReadConcernResponse waitUntilOpTime(OperationContext* txn,
const ReadConcernArgs& settings) override;
@@ -132,6 +135,7 @@ public:
virtual void signalUpstreamUpdater();
+ virtual bool prepareOldReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder);
virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder);
virtual Status processReplSetGetStatus(BSONObjBuilder* result);
@@ -177,6 +181,8 @@ public:
virtual Status processReplSetElect(const ReplSetElectArgs& args, BSONObjBuilder* resultObj);
+ virtual Status processReplSetUpdatePosition(const OldUpdatePositionArgs& updates,
+ long long* configVersion);
virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates,
long long* configVersion);
@@ -184,7 +190,7 @@ public:
virtual bool buildsIndexes();
- virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op);
+ virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op, bool durablyWritten);
virtual std::vector<HostAndPort> getOtherNodesInReplSet() const;
@@ -196,7 +202,7 @@ public:
virtual void blacklistSyncSource(const HostAndPort& host, Date_t until);
- virtual void resetLastOpTimeFromOplog(OperationContext* txn);
+ virtual void resetLastOpTimesFromOplog(OperationContext* txn);
virtual bool shouldChangeSyncSource(const HostAndPort& currentSource,
const OpTime& syncSourceLastOpTime,
@@ -220,6 +226,8 @@ public:
virtual bool isV1ElectionProtocol();
+ virtual bool getWriteConcernMajorityShouldJournal();
+
virtual void summarizeAsHtml(ReplSetHtmlSummary* output);
virtual long long getTerm();
@@ -241,11 +249,15 @@ public:
virtual size_t getNumUncommittedSnapshots() override;
+ virtual WriteConcernOptions populateUnsetWriteConcernOptionsSyncMode(
+ WriteConcernOptions wc) override;
+
private:
AtomicUInt64 _snapshotNameGenerator;
const ReplSettings _settings;
MemberState _memberState;
- OpTime _myLastOpTime;
+ OpTime _myLastDurableOpTime;
+ OpTime _myLastAppliedOpTime;
};
} // namespace repl
diff --git a/src/mongo/db/repl/replication_coordinator_test_fixture.cpp b/src/mongo/db/repl/replication_coordinator_test_fixture.cpp
index ed30ceacde8..cb45baf086f 100644
--- a/src/mongo/db/repl/replication_coordinator_test_fixture.cpp
+++ b/src/mongo/db/repl/replication_coordinator_test_fixture.cpp
@@ -113,12 +113,13 @@ void ReplCoordTest::init() {
TopologyCoordinatorImpl::Options settings;
_topo = new TopologyCoordinatorImpl(settings);
+ stdx::function<bool()> _durablityLambda = []() -> bool { return true; };
_net = new NetworkInterfaceMock;
_storage = new StorageInterfaceMock;
_replExec.reset(new ReplicationExecutor(_net, _storage, seed));
_externalState = new ReplicationCoordinatorExternalStateMock;
- _repl.reset(
- new ReplicationCoordinatorImpl(_settings, _externalState, _topo, _replExec.get(), seed));
+ _repl.reset(new ReplicationCoordinatorImpl(
+ _settings, _externalState, _topo, _replExec.get(), seed, &_durablityLambda));
}
void ReplCoordTest::init(const ReplSettings& settings) {
diff --git a/src/mongo/db/repl/replication_info.cpp b/src/mongo/db/repl/replication_info.cpp
index 273deaa3bd9..50e84803c6f 100644
--- a/src/mongo/db/repl/replication_info.cpp
+++ b/src/mongo/db/repl/replication_info.cpp
@@ -178,7 +178,7 @@ public:
BSONObjBuilder result;
// TODO(siyuan) Output term of OpTime
- result.append("latestOptime", replCoord->getMyLastOptime().getTimestamp());
+ result.append("latestOptime", replCoord->getMyLastAppliedOpTime().getTimestamp());
const std::string& oplogNS =
replCoord->getReplicationMode() == ReplicationCoordinator::modeReplSet
diff --git a/src/mongo/db/repl/replset_commands.cpp b/src/mongo/db/repl/replset_commands.cpp
index 07f247fdc98..ee0aa50a6f0 100644
--- a/src/mongo/db/repl/replset_commands.cpp
+++ b/src/mongo/db/repl/replset_commands.cpp
@@ -45,6 +45,7 @@
#include "mongo/db/lasterror.h"
#include "mongo/db/op_observer.h"
#include "mongo/db/repl/initial_sync.h"
+#include "mongo/db/repl/old_update_position_args.h"
#include "mongo/db/repl/oplog.h"
#include "mongo/db/repl/repl_set_heartbeat_args_v1.h"
#include "mongo/db/repl/repl_set_heartbeat_args.h"
@@ -669,25 +670,43 @@ public:
// accept and ignore handshakes sent from old (3.0-series) nodes without erroring to
// enable mixed-version operation, since we no longer use the handshakes
- if (cmdObj.hasField("handshake")) {
+ if (cmdObj.hasField("handshake"))
return true;
- }
+
+ // In the case of an update from a member with an invalid replica set config,
+ // we return our current config version.
+ long long configVersion = -1;
UpdatePositionArgs args;
+
status = args.initialize(cmdObj);
- if (!status.isOK())
+ if (status.isOK()) {
+ // v3.2.2+ style replSetUpdatePosition command.
+ status = getGlobalReplicationCoordinator()->processReplSetUpdatePosition(
+ args, &configVersion);
+
+ if (status == ErrorCodes::InvalidReplicaSetConfig) {
+ result.append("configVersion", configVersion);
+ }
return appendCommandStatus(result, status);
+ } else if (status == ErrorCodes::NoSuchKey) {
+ // Pre-3.2.2 style replSetUpdatePosition command.
+ OldUpdatePositionArgs oldArgs;
+ status = oldArgs.initialize(cmdObj);
+ if (!status.isOK())
+ return appendCommandStatus(result, status);
- // in the case of an update from a member with an invalid replica set config,
- // we return our current config version
- long long configVersion = -1;
- status =
- getGlobalReplicationCoordinator()->processReplSetUpdatePosition(args, &configVersion);
+ status = getGlobalReplicationCoordinator()->processReplSetUpdatePosition(
+ oldArgs, &configVersion);
- if (status == ErrorCodes::InvalidReplicaSetConfig) {
- result.append("configVersion", configVersion);
+ if (status == ErrorCodes::InvalidReplicaSetConfig) {
+ result.append("configVersion", configVersion);
+ }
+ return appendCommandStatus(result, status);
+ } else {
+ // Parsing error from UpdatePositionArgs.
+ return appendCommandStatus(result, status);
}
- return appendCommandStatus(result, status);
}
} cmdReplSetUpdatePosition;
diff --git a/src/mongo/db/repl/reporter.cpp b/src/mongo/db/repl/reporter.cpp
index fbbc4d64d33..3a6cfb81e82 100644
--- a/src/mongo/db/repl/reporter.cpp
+++ b/src/mongo/db/repl/reporter.cpp
@@ -41,10 +41,10 @@ namespace repl {
using executor::RemoteCommandRequest;
Reporter::Reporter(ReplicationExecutor* executor,
- PrepareReplSetUpdatePositionCommandFn prepareReplSetUpdatePositionCommandFn,
+ PrepareReplSetUpdatePositionCommandFn prepareOldReplSetUpdatePositionCommandFn,
const HostAndPort& target)
: _executor(executor),
- _prepareReplSetUpdatePositionCommandFn(prepareReplSetUpdatePositionCommandFn),
+ _prepareOldReplSetUpdatePositionCommandFn(prepareOldReplSetUpdatePositionCommandFn),
_target(target),
_status(Status::OK()),
_willRunAgain(false),
@@ -52,7 +52,7 @@ Reporter::Reporter(ReplicationExecutor* executor,
uassert(ErrorCodes::BadValue, "null replication executor", executor);
uassert(ErrorCodes::BadValue,
"null function to create replSetUpdatePosition command object",
- prepareReplSetUpdatePositionCommandFn);
+ prepareOldReplSetUpdatePositionCommandFn);
uassert(ErrorCodes::BadValue, "target name cannot be empty", !target.empty());
}
@@ -105,11 +105,11 @@ Status Reporter::_schedule_inlock() {
LOG(2) << "Reporter scheduling report to : " << _target;
- auto prepareResult = _prepareReplSetUpdatePositionCommandFn();
+ auto prepareResult = _prepareOldReplSetUpdatePositionCommandFn();
if (!prepareResult.isOK()) {
// Returning NodeNotFound because currently this is the only way
- // prepareReplSetUpdatePositionCommand() can fail in production.
+ // prepareOldReplSetUpdatePositionCommand() can fail in production.
return Status(ErrorCodes::NodeNotFound,
"Reporter failed to create replSetUpdatePositionCommand command.");
}
diff --git a/src/mongo/db/repl/reporter.h b/src/mongo/db/repl/reporter.h
index 585539454c1..5fba25e5ba0 100644
--- a/src/mongo/db/repl/reporter.h
+++ b/src/mongo/db/repl/reporter.h
@@ -50,7 +50,7 @@ public:
using PrepareReplSetUpdatePositionCommandFn = stdx::function<StatusWith<BSONObj>()>;
Reporter(ReplicationExecutor* executor,
- PrepareReplSetUpdatePositionCommandFn prepareReplSetUpdatePositionCommandFn,
+ PrepareReplSetUpdatePositionCommandFn prepareOldReplSetUpdatePositionCommandFn,
const HostAndPort& target);
virtual ~Reporter();
@@ -105,7 +105,7 @@ private:
ReplicationExecutor* _executor;
// Prepares update command object.
- PrepareReplSetUpdatePositionCommandFn _prepareReplSetUpdatePositionCommandFn;
+ PrepareReplSetUpdatePositionCommandFn _prepareOldReplSetUpdatePositionCommandFn;
// Host to whom the Reporter sends updates.
HostAndPort _target;
diff --git a/src/mongo/db/repl/reporter_test.cpp b/src/mongo/db/repl/reporter_test.cpp
index c5533d7adb2..01904e81c92 100644
--- a/src/mongo/db/repl/reporter_test.cpp
+++ b/src/mongo/db/repl/reporter_test.cpp
@@ -52,7 +52,7 @@ public:
_result = newResult;
}
- bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) {
+ bool prepareOldReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) {
if (!_result) {
return _result;
}
@@ -85,7 +85,7 @@ protected:
std::unique_ptr<Reporter> reporter;
std::unique_ptr<MockProgressManager> posUpdater;
- Reporter::PrepareReplSetUpdatePositionCommandFn prepareReplSetUpdatePositionCommandFn;
+ Reporter::PrepareReplSetUpdatePositionCommandFn prepareOldReplSetUpdatePositionCommandFn;
};
ReporterTest::ReporterTest() {}
@@ -93,16 +93,16 @@ ReporterTest::ReporterTest() {}
void ReporterTest::setUp() {
ReplicationExecutorTest::setUp();
posUpdater.reset(new MockProgressManager());
- prepareReplSetUpdatePositionCommandFn = [this]() -> StatusWith<BSONObj> {
+ prepareOldReplSetUpdatePositionCommandFn = [this]() -> StatusWith<BSONObj> {
BSONObjBuilder bob;
- if (posUpdater->prepareReplSetUpdatePositionCommand(&bob)) {
+ if (posUpdater->prepareOldReplSetUpdatePositionCommand(&bob)) {
return bob.obj();
}
return Status(ErrorCodes::OperationFailed,
"unable to prepare replSetUpdatePosition command object");
};
reporter.reset(new Reporter(&getReplExecutor(),
- [this]() { return prepareReplSetUpdatePositionCommandFn(); },
+ [this]() { return prepareOldReplSetUpdatePositionCommandFn(); },
HostAndPort("h1")));
launchExecutorThread();
}
@@ -138,12 +138,12 @@ TEST_F(ReporterTest, InvalidConstruction) {
UserException);
// null ReplicationExecutor
- ASSERT_THROWS(Reporter(nullptr, prepareReplSetUpdatePositionCommandFn, HostAndPort("h1")),
+ ASSERT_THROWS(Reporter(nullptr, prepareOldReplSetUpdatePositionCommandFn, HostAndPort("h1")),
UserException);
// empty HostAndPort
ASSERT_THROWS(
- Reporter(&getReplExecutor(), prepareReplSetUpdatePositionCommandFn, HostAndPort()),
+ Reporter(&getReplExecutor(), prepareOldReplSetUpdatePositionCommandFn, HostAndPort()),
UserException);
}
diff --git a/src/mongo/db/repl/rs_initialsync.cpp b/src/mongo/db/repl/rs_initialsync.cpp
index 569480452d1..3d62914b081 100644
--- a/src/mongo/db/repl/rs_initialsync.cpp
+++ b/src/mongo/db/repl/rs_initialsync.cpp
@@ -88,11 +88,11 @@ void truncateAndResetOplog(OperationContext* txn,
// Note: the following order is important.
// The bgsync thread uses an empty optime as a sentinel to know to wait
// for initial sync; thus, we must
- // ensure the lastAppliedOptime is empty before restarting the bgsync thread
+ // ensure the lastAppliedOpTime is empty before restarting the bgsync thread
// via stop().
// We must clear the sync source blacklist after calling stop()
// because the bgsync thread, while running, may update the blacklist.
- replCoord->resetMyLastOptime();
+ replCoord->resetMyLastOpTimes();
bgsync->stop();
bgsync->clearBuffer();
@@ -214,7 +214,7 @@ bool _initialSyncClone(OperationContext* txn,
* @return if applying the oplog succeeded.
*/
bool _initialSyncApplyOplog(OperationContext* ctx, repl::InitialSync* syncer, OplogReader* r) {
- const OpTime startOpTime = getGlobalReplicationCoordinator()->getMyLastOptime();
+ const OpTime startOpTime = getGlobalReplicationCoordinator()->getMyLastAppliedOpTime();
BSONObj lastOp;
// If the fail point is set, exit failing.
@@ -370,7 +370,7 @@ Status _initialSync() {
// prime oplog, but don't need to actually apply the op as the cloned data already reflects it.
OpTime lastOptime = writeOpsToOplog(&txn, {lastOp});
ReplClientInfo::forClient(txn.getClient()).setLastOp(lastOptime);
- replCoord->setMyLastOptime(lastOptime);
+ replCoord->setMyLastAppliedOpTime(lastOptime);
setNewTimestamp(lastOptime.getTimestamp());
std::string msg = "oplog sync 1 of 3";
@@ -425,7 +425,7 @@ Status _initialSync() {
{
ScopedTransaction scopedXact(&txn, MODE_IX);
AutoGetDb autodb(&txn, "local", MODE_X);
- OpTime lastOpTimeWritten(getGlobalReplicationCoordinator()->getMyLastOptime());
+ OpTime lastOpTimeWritten(getGlobalReplicationCoordinator()->getMyLastAppliedOpTime());
log() << "set minValid=" << lastOpTimeWritten;
// Initial sync is now complete. Flag this by setting minValid to the last thing
diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp
index a9073f49524..076507b4106 100644
--- a/src/mongo/db/repl/rs_rollback.cpp
+++ b/src/mongo/db/repl/rs_rollback.cpp
@@ -791,9 +791,9 @@ void syncFixUp(OperationContext* txn,
warn = true;
}
- // Reload the lastOpTimeApplied value in the replcoord and the lastAppliedHash value in
- // bgsync to reflect our new last op.
- replCoord->resetLastOpTimeFromOplog(txn);
+ // Reload the lastAppliedOpTime and lastDurableOpTime value in the replcoord and the
+ // lastAppliedHash value in bgsync to reflect our new last op.
+ replCoord->resetLastOpTimesFromOplog(txn);
// done
if (warn)
diff --git a/src/mongo/db/repl/rs_rollback_test.cpp b/src/mongo/db/repl/rs_rollback_test.cpp
index 7320cf98365..835389c3494 100644
--- a/src/mongo/db/repl/rs_rollback_test.cpp
+++ b/src/mongo/db/repl/rs_rollback_test.cpp
@@ -76,13 +76,13 @@ ReplSettings createReplSettings() {
class ReplicationCoordinatorRollbackMock : public ReplicationCoordinatorMock {
public:
ReplicationCoordinatorRollbackMock();
- void resetLastOpTimeFromOplog(OperationContext* txn) override;
+ void resetLastOpTimesFromOplog(OperationContext* txn) override;
};
ReplicationCoordinatorRollbackMock::ReplicationCoordinatorRollbackMock()
: ReplicationCoordinatorMock(createReplSettings()) {}
-void ReplicationCoordinatorRollbackMock::resetLastOpTimeFromOplog(OperationContext* txn) {}
+void ReplicationCoordinatorRollbackMock::resetLastOpTimesFromOplog(OperationContext* txn) {}
class RollbackSourceMock : public RollbackSource {
public:
diff --git a/src/mongo/db/repl/rs_sync.cpp b/src/mongo/db/repl/rs_sync.cpp
index 83dd0131567..9e5c0e7e344 100644
--- a/src/mongo/db/repl/rs_sync.cpp
+++ b/src/mongo/db/repl/rs_sync.cpp
@@ -107,7 +107,7 @@ void runSyncThread() {
// 1. If the oplog is empty, do an initial sync
// 2. If minValid has _initialSyncFlag set, do an initial sync
// 3. If initialSyncRequested is true
- if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
+ if (getGlobalReplicationCoordinator()->getMyLastAppliedOpTime().isNull() ||
getInitialSyncFlag() || initialSyncRequested) {
syncDoInitialSync();
continue; // start from top again in case sync failed.
diff --git a/src/mongo/db/repl/sync_source_feedback.cpp b/src/mongo/db/repl/sync_source_feedback.cpp
index 7c08a04be27..43b84f45e38 100644
--- a/src/mongo/db/repl/sync_source_feedback.cpp
+++ b/src/mongo/db/repl/sync_source_feedback.cpp
@@ -60,6 +60,7 @@ namespace repl {
void SyncSourceFeedback::_resetConnection() {
LOG(1) << "resetting connection in sync source feedback";
_connection.reset();
+ _fallBackToOldUpdatePosition = false;
}
bool SyncSourceFeedback::replAuthenticate() {
@@ -105,18 +106,24 @@ void SyncSourceFeedback::forwardSlaveProgress() {
_cond.notify_all();
}
-Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
+Status SyncSourceFeedback::updateUpstream(OperationContext* txn, bool oldStyle) {
auto replCoord = repl::ReplicationCoordinator::get(txn);
if (replCoord->getMemberState().primary()) {
- // primary has no one to update to
+ // Primary has no one to send updates to.
return Status::OK();
}
BSONObjBuilder cmd;
{
stdx::unique_lock<stdx::mutex> lock(_mtx);
- // the command could not be created, likely because the node was removed from the set
- if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) {
- return Status::OK();
+ // The command could not be created, likely because this node was removed from the set.
+ if (!oldStyle) {
+ if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) {
+ return Status::OK();
+ }
+ } else {
+ if (!replCoord->prepareOldReplSetUpdatePositionCommand(&cmd)) {
+ return Status::OK();
+ }
}
}
BSONObj res;
@@ -125,8 +132,9 @@ Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
try {
_connection->runCommand("admin", cmd.obj(), res);
} catch (const DBException& e) {
- log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
- // blacklist sync target for .5 seconds and find a new one
+ log() << "SyncSourceFeedback error sending " << (oldStyle ? "old style " : "")
+ << "update: " << e.what();
+ // Blacklist sync target for .5 seconds and find a new one.
replCoord->blacklistSyncSource(_syncTarget, Date_t::now() + Milliseconds(500));
BackgroundSync::get()->clearSyncTarget();
_resetConnection();
@@ -135,11 +143,15 @@ Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
Status status = Command::getStatusFromCommandResult(res);
if (!status.isOK()) {
- log() << "SyncSourceFeedback error sending update, response: " << res.toString() << endl;
- // blacklist sync target for .5 seconds and find a new one, unless we were rejected due
- // to the syncsource having a newer config
- if (status != ErrorCodes::InvalidReplicaSetConfig || res["configVersion"].eoo() ||
- res["configVersion"].numberLong() < replCoord->getConfig().getConfigVersion()) {
+ log() << "SyncSourceFeedback error sending " << (oldStyle ? "old style " : "")
+ << "update, response: " << res.toString();
+ if (status == ErrorCodes::BadValue && !oldStyle) {
+ log() << "SyncSourceFeedback falling back to old style UpdatePosition command";
+ _fallBackToOldUpdatePosition = true;
+ } else if (status != ErrorCodes::InvalidReplicaSetConfig || res["configVersion"].eoo() ||
+ res["configVersion"].numberLong() < replCoord->getConfig().getConfigVersion()) {
+ // Blacklist sync target for .5 seconds and find a new one, unless we were rejected due
+ // to the syncsource having a newer config.
replCoord->blacklistSyncSource(_syncTarget, Date_t::now() + Milliseconds(500));
BackgroundSync::get()->clearSyncTarget();
_resetConnection();
@@ -195,9 +207,16 @@ void SyncSourceFeedback::run() {
continue;
}
}
- Status status = updateUpstream(txn.get());
+ bool oldFallBackValue = _fallBackToOldUpdatePosition;
+ Status status = updateUpstream(txn.get(), _fallBackToOldUpdatePosition);
if (!status.isOK()) {
- log() << "updateUpstream failed: " << status << ", will retry";
+ if (_fallBackToOldUpdatePosition != oldFallBackValue) {
+ stdx::unique_lock<stdx::mutex> lock(_mtx);
+ _positionChanged = true;
+ } else {
+ log() << (_fallBackToOldUpdatePosition ? "old style " : "") << "updateUpstream"
+ << " failed: " << status << ", will retry";
+ }
}
}
}
diff --git a/src/mongo/db/repl/sync_source_feedback.h b/src/mongo/db/repl/sync_source_feedback.h
index d1dc13444e1..ed45b59a752 100644
--- a/src/mongo/db/repl/sync_source_feedback.h
+++ b/src/mongo/db/repl/sync_source_feedback.h
@@ -68,8 +68,10 @@ private:
/* Inform the sync target of our current position in the oplog, as well as the positions
* of all secondaries chained through us.
+ * "oldStyle" indicates whether or not the upstream node is pre-3.2.2 and needs the older style
+ * ReplSetUpdatePosition commands as a result.
*/
- Status updateUpstream(OperationContext* txn);
+ Status updateUpstream(OperationContext* txn, bool oldStyle);
bool hasConnection() {
return _connection.get();
@@ -92,6 +94,8 @@ private:
bool _positionChanged = false;
// Once this is set to true the _run method will terminate
bool _shutdownSignaled = false;
+ // Indicates whether our syncSource can't accept the new version of the UpdatePosition command.
+ bool _fallBackToOldUpdatePosition = false;
};
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index 2e5d0182bcc..b81ee3bfa8e 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -216,13 +216,9 @@ ApplyBatchFinalizer::~ApplyBatchFinalizer() {
}
void ApplyBatchFinalizer::record(OpTime newOp) {
- const bool mustWaitUntilDurable = _replCoord->isV1ElectionProtocol();
- if (!mustWaitUntilDurable) {
- // We have to use setMyLastOptimeForward since this thread races with
- // logTransitionToPrimaryToOplog.
- _replCoord->setMyLastOptimeForward(newOp);
- return;
- }
+ // We have to use setMyLastAppliedOpTimeForward since this thread races with
+ // logTransitionToPrimaryToOplog.
+ _replCoord->setMyLastAppliedOpTimeForward(newOp);
stdx::unique_lock<stdx::mutex> lock(_mutex);
_latestOpTime = newOp;
@@ -252,9 +248,9 @@ void ApplyBatchFinalizer::_run() {
auto txn = cc().makeOperationContext();
txn->recoveryUnit()->goingToWaitUntilDurable();
txn->recoveryUnit()->waitUntilDurable();
- // We have to use setMyLastOptimeForward since this thread races with
+ // We have to use setMyLastDurableOpTimeForward since this thread races with
// logTransitionToPrimaryToOplog.
- _replCoord->setMyLastOptimeForward(latestOpTime);
+ _replCoord->setMyLastDurableOpTimeForward(latestOpTime);
}
}
} // anonymous namespace containing ApplyBatchFinalizer definitions.
@@ -716,7 +712,7 @@ void SyncTail::oplogApplication() {
auto minValidBoundaries = getMinValid(&txn);
OpTime originalEndOpTime(minValidBoundaries.end);
- OpTime lastWriteOpTime{replCoord->getMyLastOptime()};
+ OpTime lastWriteOpTime{replCoord->getMyLastAppliedOpTime()};
while (!inShutdown()) {
OpQueue ops;
@@ -747,6 +743,10 @@ void SyncTail::oplogApplication() {
if (replCoord->isWaitingForApplierToDrain()) {
replCoord->signalDrainComplete(&txn);
}
+
+ // Reset when triggered in case it was from a rollback, safe to do at any time.
+ lastWriteOpTime = replCoord->getMyLastAppliedOpTime();
+
continue; // This wasn't a real op. Don't try to apply it.
}
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index fa4edcc9bd9..11b61cb4490 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -233,6 +233,7 @@ public:
const ReplSetHeartbeatArgs& args,
const std::string& ourSetName,
const OpTime& lastOpApplied,
+ const OpTime& lastOpDurable,
ReplSetHeartbeatResponse* response) = 0;
// produce a reply to a V1 heartbeat
@@ -240,6 +241,7 @@ public:
const ReplSetHeartbeatArgsV1& args,
const std::string& ourSetName,
const OpTime& lastOpApplied,
+ const OpTime& lastOpDurable,
ReplSetHeartbeatResponse* response) = 0;
// produce a reply to a status request
diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp
index 0c4169fe4b9..32d857be5c8 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl.cpp
@@ -204,7 +204,7 @@ HostAndPort TopologyCoordinatorImpl::chooseNewSyncSource(Date_t now,
// Find primary's oplog time. Reject sync candidates that are more than
// _options.maxSyncSourceLagSecs seconds behind.
if (_currentPrimaryIndex != -1) {
- OpTime primaryOpTime = _hbdata[_currentPrimaryIndex].getOpTime();
+ OpTime primaryOpTime = _hbdata[_currentPrimaryIndex].getAppliedOpTime();
// Check if primaryOpTime is still close to 0 because we haven't received
// our first heartbeat from a new primary yet.
@@ -257,7 +257,7 @@ HostAndPort TopologyCoordinatorImpl::chooseNewSyncSource(Date_t now,
continue;
}
// Candidates cannot be excessively behind.
- if (it->getOpTime() < oldestSyncOpTime) {
+ if (it->getAppliedOpTime() < oldestSyncOpTime) {
continue;
}
// Candidate must not have a configured delay larger than ours.
@@ -272,7 +272,7 @@ HostAndPort TopologyCoordinatorImpl::chooseNewSyncSource(Date_t now,
}
}
// only consider candidates that are ahead of where we are
- if (it->getOpTime().getTimestamp() <= lastTimestampApplied) {
+ if (it->getAppliedOpTime().getTimestamp() <= lastTimestampApplied) {
continue;
}
// Candidate cannot be more latent than anything we've already considered.
@@ -421,10 +421,10 @@ void TopologyCoordinatorImpl::prepareSyncFromResponse(const ReplicationExecutor:
str::stream() << "I cannot reach the requested member: " << target.toString());
return;
}
- if (hbdata.getOpTime().getSecs() + 10 < lastOpApplied.getSecs()) {
+ if (hbdata.getAppliedOpTime().getSecs() + 10 < lastOpApplied.getSecs()) {
warning() << "attempting to sync from " << target << ", but its latest opTime is "
- << hbdata.getOpTime().getSecs() << " and ours is " << lastOpApplied.getSecs()
- << " so this may not work";
+ << hbdata.getAppliedOpTime().getSecs() << " and ours is "
+ << lastOpApplied.getSecs() << " so this may not work";
response->append("warning",
str::stream() << "requested member \"" << target.toString()
<< "\" is more than 10 seconds behind us");
@@ -518,7 +518,7 @@ bool TopologyCoordinatorImpl::_shouldVetoMember(
return true;
}
- if (_iAmPrimary() && lastOpApplied >= _hbdata[hopefulIndex].getOpTime()) {
+ if (_iAmPrimary() && lastOpApplied >= _hbdata[hopefulIndex].getAppliedOpTime()) {
// hbinfo is not updated for ourself, so if we are primary we have to check the
// primary's last optime separately
*errmsg = str::stream() << "I am already primary, "
@@ -528,7 +528,8 @@ bool TopologyCoordinatorImpl::_shouldVetoMember(
}
if (_currentPrimaryIndex != -1 && (hopefulIndex != _currentPrimaryIndex) &&
- (_hbdata[_currentPrimaryIndex].getOpTime() >= _hbdata[hopefulIndex].getOpTime())) {
+ (_hbdata[_currentPrimaryIndex].getAppliedOpTime() >=
+ _hbdata[hopefulIndex].getAppliedOpTime())) {
// other members might be aware of more up-to-date nodes
*errmsg =
str::stream() << _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString()
@@ -646,6 +647,7 @@ Status TopologyCoordinatorImpl::prepareHeartbeatResponse(Date_t now,
const ReplSetHeartbeatArgs& args,
const std::string& ourSetName,
const OpTime& lastOpApplied,
+ const OpTime& lastOpDurable,
ReplSetHeartbeatResponse* response) {
if (args.getProtocolVersion() != 1) {
return Status(ErrorCodes::BadValue,
@@ -694,7 +696,8 @@ Status TopologyCoordinatorImpl::prepareHeartbeatResponse(Date_t now,
// Heartbeat status message
response->setHbMsg(_getHbmsg(now));
response->setTime(duration_cast<Seconds>(now - Date_t{}));
- response->setOpTime(lastOpApplied);
+ response->setAppliedOpTime(lastOpApplied);
+ response->setDurableOpTime(lastOpDurable);
if (!_syncSource.empty()) {
response->setSyncingTo(_syncSource);
@@ -737,6 +740,7 @@ Status TopologyCoordinatorImpl::prepareHeartbeatResponseV1(Date_t now,
const ReplSetHeartbeatArgsV1& args,
const std::string& ourSetName,
const OpTime& lastOpApplied,
+ const OpTime& lastOpDurable,
ReplSetHeartbeatResponse* response) {
// Verify that replica set names match
const std::string rshb = args.getSetName();
@@ -770,7 +774,8 @@ Status TopologyCoordinatorImpl::prepareHeartbeatResponseV1(Date_t now,
response->setElectionTime(_electionTime);
}
- response->setOpTime(lastOpApplied);
+ response->setAppliedOpTime(lastOpApplied);
+ response->setDurableOpTime(lastOpDurable);
if (_currentPrimaryIndex != -1) {
response->setPrimaryId(_rsConfig.getMemberAt(_currentPrimaryIndex).getId());
@@ -1148,7 +1153,7 @@ HeartbeatResponseAction TopologyCoordinatorImpl::_updatePrimaryFromHBData(
const MemberConfig& highestPriorityMember = _rsConfig.getMemberAt(highestPriorityIndex);
const OpTime highestPriorityMemberOptime = highestPriorityIndex == _selfIndex
? lastOpApplied
- : _hbdata[highestPriorityIndex].getOpTime();
+ : _hbdata[highestPriorityIndex].getAppliedOpTime();
if ((highestPriorityMember.getPriority() > currentPrimaryMember.getPriority()) &&
_isOpTimeCloseEnoughToLatestToElect(highestPriorityMemberOptime, lastOpApplied)) {
@@ -1378,7 +1383,7 @@ OpTime TopologyCoordinatorImpl::_latestKnownOpTime(const OpTime& ourLastOpApplie
continue;
}
- OpTime optime = it->getOpTime();
+ OpTime optime = it->getAppliedOpTime();
if (optime > latest) {
latest = optime;
@@ -1467,7 +1472,7 @@ void TopologyCoordinatorImpl::_setCurrentPrimaryForTest(int primaryIndex) {
ReplSetHeartbeatResponse hbResponse;
hbResponse.setState(MemberState::RS_PRIMARY);
hbResponse.setElectionTime(Timestamp());
- hbResponse.setOpTime(_hbdata[primaryIndex].getOpTime());
+ hbResponse.setAppliedOpTime(_hbdata[primaryIndex].getAppliedOpTime());
hbResponse.setSyncingTo(HostAndPort());
hbResponse.setHbMsg("");
_hbdata[primaryIndex].setUpValues(_hbdata[primaryIndex].getLastHeartbeat(),
@@ -1598,15 +1603,16 @@ void TopologyCoordinatorImpl::prepareStatusResponse(const ReplicationExecutor::C
if (!itConfig.isArbiter()) {
if (_rsConfig.getProtocolVersion() == 1) {
BSONObjBuilder opTime(bb.subobjStart("optime"));
- opTime.append("ts", it->getOpTime().getTimestamp());
- opTime.append("t", it->getOpTime().getTerm());
+ opTime.append("ts", it->getAppliedOpTime().getTimestamp());
+ opTime.append("t", it->getAppliedOpTime().getTerm());
opTime.done();
} else {
- bb.append("optime", it->getOpTime().getTimestamp());
+ bb.append("optime", it->getAppliedOpTime().getTimestamp());
}
- bb.appendDate("optimeDate",
- Date_t::fromDurationSinceEpoch(Seconds(it->getOpTime().getSecs())));
+ bb.appendDate(
+ "optimeDate",
+ Date_t::fromDurationSinceEpoch(Seconds(it->getAppliedOpTime().getSecs())));
}
bb.appendDate("lastHeartbeat", it->getLastHeartbeat());
bb.appendDate("lastHeartbeatRecv", it->getLastHeartbeatRecv());
@@ -1914,7 +1920,7 @@ TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getUnel
result |= NotSecondary;
}
if (_rsConfig.getProtocolVersion() == 0 &&
- !_isOpTimeCloseEnoughToLatestToElect(hbData.getOpTime(), lastOpApplied)) {
+ !_isOpTimeCloseEnoughToLatestToElect(hbData.getAppliedOpTime(), lastOpApplied)) {
result |= NotCloseEnoughToLatestOptime;
}
if (hbData.up() && hbData.isUnelectable()) {
@@ -2175,7 +2181,7 @@ bool TopologyCoordinatorImpl::stepDown(Date_t until, bool force, const OpTime& l
continue;
}
UnelectableReasonMask reason = _getUnelectableReason(i, lastOpApplied);
- if (!reason && _hbdata[i].getOpTime() >= lastOpApplied) {
+ if (!reason && _hbdata[i].getAppliedOpTime() >= lastOpApplied) {
canStepDown = true;
}
}
@@ -2309,7 +2315,7 @@ bool TopologyCoordinatorImpl::shouldChangeSyncSource(const HostAndPort& currentS
invariant(currentSourceIndex != _selfIndex);
OpTime currentSourceOpTime =
- std::max(syncSourceLastOpTime, _hbdata[currentSourceIndex].getOpTime());
+ std::max(syncSourceLastOpTime, _hbdata[currentSourceIndex].getAppliedOpTime());
if (currentSourceOpTime.isNull()) {
// Haven't received a heartbeat from the sync source yet, so can't tell if we should
@@ -2333,12 +2339,12 @@ bool TopologyCoordinatorImpl::shouldChangeSyncSource(const HostAndPort& currentS
if (it->up() && (candidateConfig.isVoter() || !_selfConfig().isVoter()) &&
(candidateConfig.shouldBuildIndexes() || !_selfConfig().shouldBuildIndexes()) &&
it->getState().readable() && !_memberIsBlacklisted(candidateConfig, now) &&
- goalSecs < it->getOpTime().getSecs()) {
+ goalSecs < it->getAppliedOpTime().getSecs()) {
log() << "re-evaluating sync source because our current sync source's most recent "
<< "OpTime is " << currentSourceOpTime.toString() << " which is more than "
<< _options.maxSyncSourceLagSecs << " behind member "
<< candidateConfig.getHostAndPort().toString() << " whose most recent OpTime is "
- << it->getOpTime().toString();
+ << it->getAppliedOpTime().toString();
invariant(itIndex != _selfIndex);
return true;
}
diff --git a/src/mongo/db/repl/topology_coordinator_impl.h b/src/mongo/db/repl/topology_coordinator_impl.h
index 019f6fbaf1d..9c8fad87bfe 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.h
+++ b/src/mongo/db/repl/topology_coordinator_impl.h
@@ -183,11 +183,13 @@ public:
const ReplSetHeartbeatArgs& args,
const std::string& ourSetName,
const OpTime& lastOpApplied,
+ const OpTime& lastOpDurable,
ReplSetHeartbeatResponse* response);
virtual Status prepareHeartbeatResponseV1(Date_t now,
const ReplSetHeartbeatArgsV1& args,
const std::string& ourSetName,
const OpTime& lastOpApplied,
+ const OpTime& lastOpDurable,
ReplSetHeartbeatResponse* response);
virtual void prepareStatusResponse(const ReplicationExecutor::CallbackArgs& data,
Date_t now,
diff --git a/src/mongo/db/repl/topology_coordinator_impl_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
index 294c591bbfe..5a73906d164 100644
--- a/src/mongo/db/repl/topology_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
@@ -207,7 +207,8 @@ private:
ReplSetHeartbeatResponse hb;
hb.setConfigVersion(1);
hb.setState(memberState);
- hb.setOpTime(lastOpTimeSender);
+ hb.setDurableOpTime(lastOpTimeSender);
+ hb.setAppliedOpTime(lastOpTimeSender);
hb.setElectionTime(electionTime);
StatusWith<ReplSetHeartbeatResponse> hbResponse = responseStatus.isOK()
@@ -1327,7 +1328,8 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) {
hb.setState(MemberState::RS_SECONDARY);
hb.setElectionTime(electionTime);
hb.setHbMsg("READY");
- hb.setOpTime(oplogProgress);
+ hb.setAppliedOpTime(oplogProgress);
+ hb.setDurableOpTime(oplogProgress);
StatusWith<ReplSetHeartbeatResponse> hbResponseGood = StatusWith<ReplSetHeartbeatResponse>(hb);
updateConfig(
@@ -2960,7 +2962,8 @@ TEST_F(
hbArgs.setSenderId(1);
hbArgs.setSenderHost(HostAndPort("host3", 27017));
ReplSetHeartbeatResponse hbResp;
- ASSERT_OK(getTopoCoord().prepareHeartbeatResponse(now(), hbArgs, "rs0", election, &hbResp));
+ ASSERT_OK(
+ getTopoCoord().prepareHeartbeatResponse(now(), hbArgs, "rs0", election, election, &hbResp));
ASSERT(!hbResp.hasIsElectable() || hbResp.isElectable()) << hbResp.toString();
}
@@ -4245,8 +4248,8 @@ public:
OpTime lastOpApplied,
ReplSetHeartbeatResponse* response,
Status* result) {
- *result =
- getTopoCoord().prepareHeartbeatResponse(now()++, args, "rs0", lastOpApplied, response);
+ *result = getTopoCoord().prepareHeartbeatResponse(
+ now()++, args, "rs0", lastOpApplied, lastOpApplied, response);
}
};
@@ -4319,7 +4322,7 @@ TEST_F(PrepareHeartbeatResponseTest,
ASSERT_FALSE(response.isElectable());
ASSERT_TRUE(response.isReplSet());
ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(OpTime(), response.getDurableOpTime());
ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime()));
ASSERT_EQUALS("", response.getHbMsg());
ASSERT_EQUALS("rs0", response.getReplicaSetName());
@@ -4343,7 +4346,7 @@ TEST_F(PrepareHeartbeatResponseTest,
ASSERT_FALSE(response.isElectable());
ASSERT_TRUE(response.isReplSet());
ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(OpTime(), response.getDurableOpTime());
ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime()));
ASSERT_EQUALS("", response.getHbMsg());
ASSERT_EQUALS("rs0", response.getReplicaSetName());
@@ -4368,7 +4371,7 @@ TEST_F(PrepareHeartbeatResponseTest,
ASSERT_FALSE(response.isElectable());
ASSERT_TRUE(response.isReplSet());
ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(OpTime(), response.getDurableOpTime());
ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime()));
ASSERT_EQUALS("", response.getHbMsg());
ASSERT_EQUALS("rs0", response.getReplicaSetName());
@@ -4393,7 +4396,7 @@ TEST_F(PrepareHeartbeatResponseTest,
ASSERT_FALSE(response.isElectable());
ASSERT_TRUE(response.isReplSet());
ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(OpTime(), response.getDurableOpTime());
ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime()));
ASSERT_EQUALS("", response.getHbMsg());
ASSERT_EQUALS("rs0", response.getReplicaSetName());
@@ -4417,7 +4420,7 @@ TEST_F(PrepareHeartbeatResponseTest,
ASSERT_FALSE(response.isElectable());
ASSERT_TRUE(response.isReplSet());
ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(OpTime(), response.getDurableOpTime());
ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime()));
ASSERT_EQUALS("", response.getHbMsg());
ASSERT_EQUALS("rs0", response.getReplicaSetName());
@@ -4444,7 +4447,7 @@ TEST_F(PrepareHeartbeatResponseTest,
ASSERT_TRUE(response.isElectable());
ASSERT_TRUE(response.isReplSet());
ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getDurableOpTime());
ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime()));
ASSERT_EQUALS("", response.getHbMsg());
ASSERT_EQUALS("rs0", response.getReplicaSetName());
@@ -4460,13 +4463,13 @@ TEST_F(TopoCoordTest, SetConfigVersionToNegativeTwoInHeartbeatResponseWhenNoConf
args.setSenderId(20);
ReplSetHeartbeatResponse response;
// prepare response and check the results
- Status result =
- getTopoCoord().prepareHeartbeatResponse(now()++, args, "rs0", OpTime(), &response);
+ Status result = getTopoCoord().prepareHeartbeatResponse(
+ now()++, args, "rs0", OpTime(), OpTime(), &response);
ASSERT_OK(result);
ASSERT_FALSE(response.isElectable());
ASSERT_TRUE(response.isReplSet());
ASSERT_EQUALS(MemberState::RS_STARTUP, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(OpTime(), response.getDurableOpTime());
ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime()));
ASSERT_EQUALS("", response.getHbMsg());
ASSERT_EQUALS("rs0", response.getReplicaSetName());
@@ -4493,7 +4496,7 @@ TEST_F(PrepareHeartbeatResponseTest,
ASSERT_TRUE(response.isElectable());
ASSERT_TRUE(response.isReplSet());
ASSERT_EQUALS(MemberState::RS_PRIMARY, response.getState().s);
- ASSERT_EQUALS(OpTime(Timestamp(11, 0), 0), response.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(11, 0), 0), response.getDurableOpTime());
ASSERT_EQUALS(Timestamp(10, 0), response.getElectionTime());
ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime()));
ASSERT_EQUALS("", response.getHbMsg());
@@ -4527,7 +4530,7 @@ TEST_F(PrepareHeartbeatResponseTest,
ASSERT_TRUE(response.isElectable());
ASSERT_TRUE(response.isReplSet());
ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getDurableOpTime());
ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime()));
// changed to a syncing message because our sync source changed recently
ASSERT_EQUALS("syncing from: h2:27017", response.getHbMsg());
@@ -4854,7 +4857,7 @@ TEST_F(HeartbeatResponseTest, ReconfigBetweenHeartbeatRequestAndRepsonse) {
ReplSetHeartbeatResponse hb;
hb.initialize(BSON("ok" << 1 << "v" << 1 << "state" << MemberState::RS_PRIMARY), 0);
- hb.setOpTime(lastOpTimeApplied);
+ hb.setDurableOpTime(lastOpTimeApplied);
hb.setElectionTime(election.getTimestamp());
StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
@@ -4903,7 +4906,7 @@ TEST_F(HeartbeatResponseTest, ReconfigNodeRemovedBetweenHeartbeatRequestAndRepso
ReplSetHeartbeatResponse hb;
hb.initialize(BSON("ok" << 1 << "v" << 1 << "state" << MemberState::RS_PRIMARY), 0);
- hb.setOpTime(lastOpTimeApplied);
+ hb.setDurableOpTime(lastOpTimeApplied);
hb.setElectionTime(election.getTimestamp());
StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
diff --git a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp
index db549689d69..c8e533b1f86 100644
--- a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp
@@ -209,7 +209,8 @@ private:
ReplSetHeartbeatResponse hb;
hb.setConfigVersion(1);
hb.setState(memberState);
- hb.setOpTime(lastOpTimeSender);
+ hb.setDurableOpTime(lastOpTimeSender);
+ hb.setAppliedOpTime(lastOpTimeSender);
hb.setElectionTime(electionTime);
hb.setTerm(getTopoCoord().getTerm());
@@ -1320,7 +1321,8 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) {
hb.setState(MemberState::RS_SECONDARY);
hb.setElectionTime(electionTime);
hb.setHbMsg("READY");
- hb.setOpTime(oplogProgress);
+ hb.setDurableOpTime(oplogProgress);
+ hb.setAppliedOpTime(oplogProgress);
StatusWith<ReplSetHeartbeatResponse> hbResponseGood = StatusWith<ReplSetHeartbeatResponse>(hb);
updateConfig(
@@ -1553,7 +1555,7 @@ public:
ReplSetHeartbeatResponse* response,
Status* result) {
*result = getTopoCoord().prepareHeartbeatResponseV1(
- now()++, args, "rs0", lastOpApplied, response);
+ now()++, args, "rs0", lastOpApplied, lastOpApplied, response);
}
};
@@ -1625,13 +1627,13 @@ TEST_F(TopoCoordTest, SetConfigVersionToNegativeTwoInHeartbeatResponseWhenNoConf
args.setSenderId(20);
ReplSetHeartbeatResponse response;
// prepare response and check the results
- Status result =
- getTopoCoord().prepareHeartbeatResponseV1(now()++, args, "rs0", OpTime(), &response);
+ Status result = getTopoCoord().prepareHeartbeatResponseV1(
+ now()++, args, "rs0", OpTime(), OpTime(), &response);
ASSERT_OK(result);
// this change to true because we can now see a majority, unlike in the previous cases
ASSERT_EQUALS("rs0", response.getReplicaSetName());
ASSERT_EQUALS(MemberState::RS_STARTUP, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(OpTime(), response.getDurableOpTime());
// default term of topology coordinator is -1
ASSERT_EQUALS(-1, response.getTerm());
ASSERT_EQUALS(-2, response.getConfigVersion());
@@ -1651,7 +1653,7 @@ TEST_F(PrepareHeartbeatResponseV1Test,
ASSERT_OK(result);
ASSERT_EQUALS("rs0", response.getReplicaSetName());
ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(OpTime(), response.getDurableOpTime());
ASSERT_EQUALS(0, response.getTerm());
ASSERT_EQUALS(1, response.getConfigVersion());
}
@@ -1671,7 +1673,7 @@ TEST_F(PrepareHeartbeatResponseV1Test,
ASSERT_OK(result);
ASSERT_EQUALS("rs0", response.getReplicaSetName());
ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(OpTime(), response.getDurableOpTime());
ASSERT_EQUALS(0, response.getTerm());
ASSERT_EQUALS(1, response.getConfigVersion());
}
@@ -1692,7 +1694,7 @@ TEST_F(PrepareHeartbeatResponseV1Test,
ASSERT_TRUE(response.hasConfig());
ASSERT_EQUALS("rs0", response.getReplicaSetName());
ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(OpTime(), response.getDurableOpTime());
ASSERT_EQUALS(0, response.getTerm());
ASSERT_EQUALS(1, response.getConfigVersion());
}
@@ -1713,7 +1715,7 @@ TEST_F(PrepareHeartbeatResponseV1Test,
ASSERT_FALSE(response.hasConfig());
ASSERT_EQUALS("rs0", response.getReplicaSetName());
ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
- ASSERT_EQUALS(OpTime(), response.getOpTime());
+ ASSERT_EQUALS(OpTime(), response.getDurableOpTime());
ASSERT_EQUALS(0, response.getTerm());
ASSERT_EQUALS(1, response.getConfigVersion());
}
@@ -1736,7 +1738,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, SetStatePrimaryInHeartbeatResponseWhenPri
ASSERT_EQUALS(MemberState::RS_PRIMARY, response.getState().s);
ASSERT_TRUE(response.hasElectionTime());
ASSERT_EQUALS(getTopoCoord().getElectionTime(), response.getElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(11, 0), 0), response.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(11, 0), 0), response.getDurableOpTime());
ASSERT_EQUALS(0, response.getTerm());
ASSERT_EQUALS(1, response.getConfigVersion());
}
@@ -1767,7 +1769,7 @@ TEST_F(PrepareHeartbeatResponseV1Test,
ASSERT_EQUALS("rs0", response.getReplicaSetName());
ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s);
ASSERT_FALSE(response.hasElectionTime());
- ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getOpTime());
+ ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getDurableOpTime());
ASSERT_EQUALS(0, response.getTerm());
ASSERT_EQUALS(1, response.getConfigVersion());
ASSERT_EQUALS(HostAndPort("h2"), response.getSyncingTo());
@@ -3133,7 +3135,7 @@ TEST_F(HeartbeatResponseTestV1, ReconfigNodeRemovedBetweenHeartbeatRequestAndRep
ReplSetHeartbeatResponse hb;
hb.initialize(BSON("ok" << 1 << "v" << 1 << "state" << MemberState::RS_PRIMARY), 0);
- hb.setOpTime(lastOpTimeApplied);
+ hb.setDurableOpTime(lastOpTimeApplied);
hb.setElectionTime(election.getTimestamp());
StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
@@ -3182,7 +3184,7 @@ TEST_F(HeartbeatResponseTestV1, ReconfigBetweenHeartbeatRequestAndRepsonse) {
ReplSetHeartbeatResponse hb;
hb.initialize(BSON("ok" << 1 << "v" << 1 << "state" << MemberState::RS_PRIMARY), 0);
- hb.setOpTime(lastOpTimeApplied);
+ hb.setDurableOpTime(lastOpTimeApplied);
hb.setElectionTime(election.getTimestamp());
StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb);
HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse(
diff --git a/src/mongo/db/repl/update_position_args.cpp b/src/mongo/db/repl/update_position_args.cpp
index 6ccddfa96aa..c4e2688fdd1 100644
--- a/src/mongo/db/repl/update_position_args.cpp
+++ b/src/mongo/db/repl/update_position_args.cpp
@@ -1,5 +1,5 @@
/**
- * Copyright 2014 MongoDB Inc.
+ * Copyright 2016 MongoDB Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
@@ -39,11 +39,11 @@ namespace mongo {
namespace repl {
-UpdatePositionArgs::UpdateInfo::UpdateInfo(const OID& anRid,
- const OpTime& aTs,
+UpdatePositionArgs::UpdateInfo::UpdateInfo(const OpTime& applied,
+ const OpTime& durable,
long long aCfgver,
long long aMemberId)
- : rid(anRid), ts(aTs), cfgver(aCfgver), memberId(aMemberId) {}
+ : appliedOpTime(applied), durableOpTime(durable), cfgver(aCfgver), memberId(aMemberId) {}
namespace {
@@ -54,32 +54,21 @@ const std::string kLegalUpdatePositionFieldNames[] = {
kCommandFieldName, kUpdateArrayFieldName,
};
-const std::string kMemberRIDFieldName = "_id";
-const std::string kMemberConfigFieldName = "config";
-const std::string kOpTimeFieldName = "optime";
+const std::string kAppliedOpTimeFieldName = "appliedOpTime";
+const std::string kDurableOpTimeFieldName = "durableOpTime";
const std::string kMemberIdFieldName = "memberId";
const std::string kConfigVersionFieldName = "cfgver";
const std::string kLegalUpdateInfoFieldNames[] = {
- kMemberConfigFieldName,
- kMemberRIDFieldName,
- kOpTimeFieldName,
- kMemberIdFieldName,
- kConfigVersionFieldName,
+ kAppliedOpTimeFieldName, kDurableOpTimeFieldName, kMemberIdFieldName, kConfigVersionFieldName,
};
} // namespace
Status UpdatePositionArgs::initialize(const BSONObj& argsObj) {
- Status status =
- bsonCheckOnlyHasFields("UpdatePositionArgs", argsObj, kLegalUpdatePositionFieldNames);
-
- if (!status.isOK())
- return status;
-
// grab the array of changes
BSONElement updateArray;
- status = bsonExtractTypedField(argsObj, kUpdateArrayFieldName, Array, &updateArray);
+ Status status = bsonExtractTypedField(argsObj, kUpdateArrayFieldName, Array, &updateArray);
if (!status.isOK())
return status;
@@ -87,23 +76,14 @@ Status UpdatePositionArgs::initialize(const BSONObj& argsObj) {
BSONObjIterator i(updateArray.Obj());
while (i.more()) {
BSONObj entry = i.next().Obj();
- status = bsonCheckOnlyHasFields("UpdateInfoArgs", entry, kLegalUpdateInfoFieldNames);
+
+ OpTime appliedOpTime;
+ status = bsonExtractOpTimeField(entry, kAppliedOpTimeFieldName, &appliedOpTime);
if (!status.isOK())
return status;
- OpTime opTime;
- if (entry[kOpTimeFieldName].isABSONObj()) {
- // In protocol version 1, { ts: <timestamp>, t: term }
- Status status = bsonExtractOpTimeField(entry, kOpTimeFieldName, &opTime);
- if (!status.isOK())
- return status;
- } else {
- Timestamp ts;
- status = bsonExtractTimestampField(entry, kOpTimeFieldName, &ts);
- if (!status.isOK())
- return status;
- opTime = OpTime(ts, OpTime::kUninitializedTerm);
- }
+ OpTime durableOpTime;
+ status = bsonExtractOpTimeField(entry, kDurableOpTimeFieldName, &durableOpTime);
if (!status.isOK())
return status;
@@ -114,17 +94,12 @@ Status UpdatePositionArgs::initialize(const BSONObj& argsObj) {
if (!status.isOK())
return status;
- OID rid;
- status = bsonExtractOIDFieldWithDefault(entry, kMemberRIDFieldName, OID(), &rid);
- if (!status.isOK())
- return status;
-
long long memberID;
status = bsonExtractIntegerFieldWithDefault(entry, kMemberIdFieldName, -1, &memberID);
if (!status.isOK())
return status;
- _updates.push_back(UpdateInfo(rid, opTime, cfgver, memberID));
+ _updates.push_back(UpdateInfo(appliedOpTime, durableOpTime, cfgver, memberID));
}
return Status::OK();
@@ -140,10 +115,11 @@ BSONObj UpdatePositionArgs::toBSON() const {
BSONArrayBuilder updateArray(builder.subarrayStart(kUpdateArrayFieldName));
for (UpdatePositionArgs::UpdateIterator update = updatesBegin(); update != updatesEnd();
++update) {
- updateArray.append(BSON(kMemberRIDFieldName << update->rid << kOpTimeFieldName
- << update->ts.getTimestamp()
- << kConfigVersionFieldName << update->cfgver
- << kMemberIdFieldName << update->memberId));
+ BSONObjBuilder updateEntry(updateArray.subobjStart());
+ updateEntry.append(kConfigVersionFieldName, update->cfgver);
+ updateEntry.append(kMemberIdFieldName, update->memberId);
+ update->durableOpTime.append(&updateEntry, kDurableOpTimeFieldName);
+ update->appliedOpTime.append(&updateEntry, kAppliedOpTimeFieldName);
}
updateArray.doneFast();
}
diff --git a/src/mongo/db/repl/update_position_args.h b/src/mongo/db/repl/update_position_args.h
index ecaf9ec5d4e..823a775588e 100644
--- a/src/mongo/db/repl/update_position_args.h
+++ b/src/mongo/db/repl/update_position_args.h
@@ -1,5 +1,5 @@
/**
- * Copyright (C) 2014 MongoDB Inc.
+ * Copyright (C) 2016 MongoDB Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
@@ -45,10 +45,13 @@ namespace repl {
class UpdatePositionArgs {
public:
struct UpdateInfo {
- UpdateInfo(const OID& anRid, const OpTime& aTs, long long aCfgver, long long aMemberId);
+ UpdateInfo(const OpTime& applied,
+ const OpTime& durable,
+ long long aCfgver,
+ long long aMemberId);
- OID rid;
- OpTime ts;
+ OpTime appliedOpTime;
+ OpTime durableOpTime;
long long cfgver;
long long memberId;
};
diff --git a/src/mongo/db/s/migration_impl.cpp b/src/mongo/db/s/migration_impl.cpp
index d33fc302d9b..e761f27a78d 100644
--- a/src/mongo/db/s/migration_impl.cpp
+++ b/src/mongo/db/s/migration_impl.cpp
@@ -64,7 +64,7 @@ Tee* const migrateLog = RamLog::get("migrate");
const int kDefaultWriteTimeoutForMigrationMs = 60 * 1000;
const WriteConcernOptions DefaultWriteConcernForMigration(2,
- WriteConcernOptions::NONE,
+ WriteConcernOptions::SyncMode::NONE,
kDefaultWriteTimeoutForMigrationMs);
WriteConcernOptions getDefaultWriteConcernForMigration() {
@@ -77,7 +77,7 @@ WriteConcernOptions getDefaultWriteConcernForMigration() {
}
}
- return WriteConcernOptions(1, WriteConcernOptions::NONE, 0);
+ return WriteConcernOptions(1, WriteConcernOptions::SyncMode::NONE, 0);
}
BSONObj createRecvChunkCommitRequest(const MigrationSessionId& sessionId) {
diff --git a/src/mongo/db/s/sharding_state_recovery.cpp b/src/mongo/db/s/sharding_state_recovery.cpp
index ec95bb5f956..804f4b21ca2 100644
--- a/src/mongo/db/s/sharding_state_recovery.cpp
+++ b/src/mongo/db/s/sharding_state_recovery.cpp
@@ -64,7 +64,7 @@ const char kMinOpTimeUpdaters[] = "minOpTimeUpdaters";
const Seconds kWriteTimeout(15);
const WriteConcernOptions kMajorityWriteConcern(WriteConcernOptions::kMajority,
- WriteConcernOptions::NONE,
+ WriteConcernOptions::SyncMode::UNSET,
kWriteTimeout);
MONGO_EXPORT_STARTUP_SERVER_PARAMETER(recoverShardingState, bool, true);
diff --git a/src/mongo/db/write_concern.cpp b/src/mongo/db/write_concern.cpp
index 6f447d8b666..3c9086ca39a 100644
--- a/src/mongo/db/write_concern.cpp
+++ b/src/mongo/db/write_concern.cpp
@@ -26,6 +26,8 @@
* it in the license file.
*/
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kWrite
+
#include "mongo/platform/basic.h"
#include "mongo/db/write_concern.h"
@@ -43,6 +45,7 @@
#include "mongo/db/storage/storage_engine.h"
#include "mongo/db/write_concern_options.h"
#include "mongo/rpc/protocol.h"
+#include "mongo/util/log.h"
namespace mongo {
@@ -59,23 +62,13 @@ static ServerStatusMetricField<Counter64> gleWtimeoutsDisplay("getLastError.wtim
void setupSynchronousCommit(OperationContext* txn) {
const WriteConcernOptions& writeConcern = txn->getWriteConcern();
- if (writeConcern.syncMode == WriteConcernOptions::JOURNAL ||
- writeConcern.syncMode == WriteConcernOptions::FSYNC) {
+ if (writeConcern.syncMode == WriteConcernOptions::SyncMode::JOURNAL ||
+ writeConcern.syncMode == WriteConcernOptions::SyncMode::FSYNC) {
txn->recoveryUnit()->goingToWaitUntilDurable();
}
}
namespace {
-// The consensus protocol requires that w: majority implies j: true on all nodes.
-void addJournalSyncForWMajority(WriteConcernOptions* writeConcern) {
- if (repl::getGlobalReplicationCoordinator()->isV1ElectionProtocol() &&
- writeConcern->wMode == WriteConcernOptions::kMajority &&
- writeConcern->syncMode == WriteConcernOptions::NONE &&
- getGlobalServiceContext()->getGlobalStorageEngine()->isDurable()) {
- writeConcern->syncMode = WriteConcernOptions::JOURNAL;
- }
-}
-
const std::string kLocalDB = "local";
} // namespace
@@ -89,8 +82,6 @@ StatusWith<WriteConcernOptions> extractWriteConcern(OperationContext* txn,
if (writeConcern.wNumNodes == 0 && writeConcern.wMode.empty()) {
writeConcern.wNumNodes = 1;
}
- // Upgrade default write concern if necessary.
- addJournalSyncForWMajority(&writeConcern);
BSONElement writeConcernElement;
Status wcStatus = bsonExtractTypedField(cmdObj, "writeConcern", Object, &writeConcernElement);
@@ -118,17 +109,15 @@ StatusWith<WriteConcernOptions> extractWriteConcern(OperationContext* txn,
return wcStatus;
}
- // Upgrade parsed write concern if necessary.
- addJournalSyncForWMajority(&writeConcern);
-
return writeConcern;
}
+
Status validateWriteConcern(OperationContext* txn,
const WriteConcernOptions& writeConcern,
const std::string& dbName) {
const bool isJournalEnabled = getGlobalServiceContext()->getGlobalStorageEngine()->isDurable();
- if (writeConcern.syncMode == WriteConcernOptions::JOURNAL && !isJournalEnabled) {
+ if (writeConcern.syncMode == WriteConcernOptions::SyncMode::JOURNAL && !isJournalEnabled) {
return Status(ErrorCodes::BadValue,
"cannot use 'j' option when a host does not have journaling enabled");
}
@@ -220,7 +209,7 @@ void WriteConcernResult::appendTo(const WriteConcernOptions& writeConcern,
// GLE, but with journaling we don't actually need to run the fsync (fsync command is
// preferred in 2.6). So we add a "waited" field if one doesn't exist.
- if (writeConcern.syncMode == WriteConcernOptions::FSYNC) {
+ if (writeConcern.syncMode == WriteConcernOptions::SyncMode::FSYNC) {
if (fsyncFiles < 0 && (wTime < 0 || !wTimedOut)) {
dassert(result->asTempObj()["waited"].eoo());
result->appendNumber("waited", syncMillis);
@@ -248,11 +237,18 @@ Status waitForWriteConcern(OperationContext* txn,
// Next handle blocking on disk
Timer syncTimer;
+ auto replCoord = repl::getGlobalReplicationCoordinator();
+ WriteConcernOptions writeConcernWithPopulatedSyncMode =
+ replCoord->populateUnsetWriteConcernOptionsSyncMode(writeConcern);
- switch (writeConcern.syncMode) {
- case WriteConcernOptions::NONE:
+
+ switch (writeConcernWithPopulatedSyncMode.syncMode) {
+ case WriteConcernOptions::SyncMode::UNSET:
+ severe() << "Attempting to wait on a WriteConcern with an unset sync option";
+ fassertFailed(34410);
+ case WriteConcernOptions::SyncMode::NONE:
break;
- case WriteConcernOptions::FSYNC: {
+ case WriteConcernOptions::SyncMode::FSYNC: {
StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
if (!storageEngine->isDurable()) {
result->fsyncFiles = storageEngine->flushAllFiles(true);
@@ -262,8 +258,16 @@ Status waitForWriteConcern(OperationContext* txn,
}
break;
}
- case WriteConcernOptions::JOURNAL:
- txn->recoveryUnit()->waitUntilDurable();
+ case WriteConcernOptions::SyncMode::JOURNAL:
+ if (replCoord->getReplicationMode() != repl::ReplicationCoordinator::Mode::modeNone) {
+ // Wait for ops to become durable then update replication system's
+ // knowledge of this.
+ OpTime appliedOpTime = replCoord->getMyLastAppliedOpTime();
+ txn->recoveryUnit()->waitUntilDurable();
+ replCoord->setMyLastDurableOpTimeForward(appliedOpTime);
+ } else {
+ txn->recoveryUnit()->waitUntilDurable();
+ }
break;
}
@@ -277,7 +281,8 @@ Status waitForWriteConcern(OperationContext* txn,
}
// needed to avoid incrementing gleWtimeStats SERVER-9005
- if (writeConcern.wNumNodes <= 1 && writeConcern.wMode.empty()) {
+ if (writeConcernWithPopulatedSyncMode.wNumNodes <= 1 &&
+ writeConcernWithPopulatedSyncMode.wMode.empty()) {
// no desired replication check
return Status::OK();
}
@@ -285,14 +290,17 @@ Status waitForWriteConcern(OperationContext* txn,
// Now we wait for replication
// Note that replica set stepdowns and gle mode changes are thrown as errors
repl::ReplicationCoordinator::StatusAndDuration replStatus =
- repl::getGlobalReplicationCoordinator()->awaitReplication(txn, replOpTime, writeConcern);
+ repl::getGlobalReplicationCoordinator()->awaitReplication(
+ txn, replOpTime, writeConcernWithPopulatedSyncMode);
if (replStatus.status == ErrorCodes::WriteConcernFailed) {
gleWtimeouts.increment();
result->err = "timeout";
result->wTimedOut = true;
}
// Add stats
- result->writtenTo = repl::getGlobalReplicationCoordinator()->getHostsWrittenTo(replOpTime);
+ result->writtenTo = repl::getGlobalReplicationCoordinator()->getHostsWrittenTo(
+ replOpTime,
+ writeConcernWithPopulatedSyncMode.syncMode == WriteConcernOptions::SyncMode::JOURNAL);
gleWtimeStats.recordMillis(durationCount<Milliseconds>(replStatus.duration));
result->wTime = durationCount<Milliseconds>(replStatus.duration);
diff --git a/src/mongo/db/write_concern_options.cpp b/src/mongo/db/write_concern_options.cpp
index b099d868a96..58af7b36a9d 100644
--- a/src/mongo/db/write_concern_options.cpp
+++ b/src/mongo/db/write_concern_options.cpp
@@ -72,6 +72,7 @@ WriteConcernOptions::WriteConcernOptions(const std::string& mode,
: syncMode(sync), wNumNodes(0), wMode(mode), wTimeout(durationCount<Milliseconds>(timeout)) {}
Status WriteConcernOptions::parse(const BSONObj& obj) {
+ reset();
if (obj.isEmpty()) {
return Status(ErrorCodes::FailedToParse, "write concern object cannot be empty");
}
@@ -94,10 +95,11 @@ Status WriteConcernOptions::parse(const BSONObj& obj) {
return Status(ErrorCodes::FailedToParse, "fsync and j options cannot be used together");
if (j) {
- syncMode = JOURNAL;
- }
- if (fsync) {
- syncMode = FSYNC;
+ syncMode = SyncMode::JOURNAL;
+ } else if (fsync) {
+ syncMode = SyncMode::FSYNC;
+ } else if (!jEl.eoo()) {
+ syncMode = SyncMode::NONE;
}
BSONElement e = obj["w"];
@@ -172,10 +174,12 @@ BSONObj WriteConcernOptions::toBSON() const {
builder.append("w", wMode);
}
- if (syncMode == FSYNC) {
+ if (syncMode == SyncMode::FSYNC) {
builder.append("fsync", true);
- } else if (syncMode == JOURNAL) {
+ } else if (syncMode == SyncMode::JOURNAL) {
builder.append("j", true);
+ } else if (syncMode == SyncMode::NONE) {
+ builder.append("j", false);
}
builder.append("wtimeout", wTimeout);
diff --git a/src/mongo/db/write_concern_options.h b/src/mongo/db/write_concern_options.h
index 1bac963f16f..5acc54e5294 100644
--- a/src/mongo/db/write_concern_options.h
+++ b/src/mongo/db/write_concern_options.h
@@ -37,7 +37,7 @@ class Status;
struct WriteConcernOptions {
public:
- enum SyncMode { NONE, FSYNC, JOURNAL };
+ enum class SyncMode { UNSET, NONE, FSYNC, JOURNAL };
static const int kNoTimeout = 0;
static const int kNoWaiting = -1;
@@ -51,6 +51,9 @@ public:
WriteConcernOptions() {
reset();
+ // We set syncMode to NONE to avoid having an UNSET syncMode in default WriteConcernOptions
+ // since that can cause invariants to trigger.
+ syncMode = SyncMode::NONE;
}
WriteConcernOptions(int numNodes, SyncMode sync, int timeout);
@@ -94,7 +97,7 @@ public:
bool validForConfigServers() const;
void reset() {
- syncMode = NONE;
+ syncMode = SyncMode::UNSET;
wNumNodes = 0;
wMode = "";
wTimeout = 0;
diff --git a/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp b/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp
index 3313438448f..6355d87ad62 100644
--- a/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp
+++ b/src/mongo/s/catalog/replset/catalog_manager_replica_set.cpp
@@ -95,10 +95,12 @@ const ReadPreferenceSetting kConfigReadSelector(ReadPreference::Nearest, TagSet{
const ReadPreferenceSetting kConfigPrimaryPreferredSelector(ReadPreference::PrimaryPreferred,
TagSet{});
const WriteConcernOptions kMajorityWriteConcern(WriteConcernOptions::kMajority,
- // Note: Even though we're setting NONE here,
+ // Note: Even though we're setting UNSET here,
// kMajority implies JOURNAL if journaling is
- // supported by mongod.
- WriteConcernOptions::NONE,
+ // supported by mongod and
+ // writeConcernMajorityJournalDefault is set to true
+ // in the ReplicaSetConfig.
+ WriteConcernOptions::SyncMode::UNSET,
Seconds(15));
const int kMaxConfigVersionInitRetry = 3;
@@ -791,6 +793,7 @@ bool CatalogManagerReplicaSet::runUserManagementWriteCommand(OperationContext* t
// Make sure that if the command has a write concern that it is w:1 or w:majority, and
// convert w:1 or no write concern to w:majority before sending.
WriteConcernOptions writeConcern;
+ writeConcern.reset();
const char* writeConcernFieldName = "writeConcern";
BSONElement writeConcernElement = cmdObj[writeConcernFieldName];
bool initialCmdHadWriteConcern = !writeConcernElement.eoo();
diff --git a/src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp b/src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp
index 2ce6c844619..3664fdfcbc5 100644
--- a/src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp
+++ b/src/mongo/s/catalog/replset/dist_lock_catalog_impl.cpp
@@ -65,10 +65,10 @@ const char kFindAndModifyResponseResultDocField[] = "value";
const char kLocalTimeField[] = "localTime";
const ReadPreferenceSetting kReadPref(ReadPreference::PrimaryOnly, TagSet());
const WriteConcernOptions kMajorityWriteConcern(WriteConcernOptions::kMajority,
- // Note: Even though we're setting NONE here,
+ // Note: Even though we're setting UNSET here,
// kMajority implies JOURNAL if journaling is
// supported by this mongod.
- WriteConcernOptions::NONE,
+ WriteConcernOptions::SyncMode::UNSET,
Seconds(15));
/**
diff --git a/src/mongo/s/catalog/type_settings.cpp b/src/mongo/s/catalog/type_settings.cpp
index c19809efa30..d26972b5990 100644
--- a/src/mongo/s/catalog/type_settings.cpp
+++ b/src/mongo/s/catalog/type_settings.cpp
@@ -192,7 +192,7 @@ std::unique_ptr<WriteConcernOptions> SettingsType::getWriteConcern() const {
dassert(_key == BalancerDocKey);
if (isSecondaryThrottleSet() && !getSecondaryThrottle()) {
- return stdx::make_unique<WriteConcernOptions>(1, WriteConcernOptions::NONE, 0);
+ return stdx::make_unique<WriteConcernOptions>(1, WriteConcernOptions::SyncMode::NONE, 0);
} else if (!isMigrationWriteConcernSet()) {
// Default setting.
return nullptr;