diff options
author | matt dannenberg <matt.dannenberg@10gen.com> | 2016-01-05 10:29:01 -0500 |
---|---|---|
committer | matt dannenberg <matt.dannenberg@10gen.com> | 2016-02-04 11:22:18 -0500 |
commit | 2c2e6a38f559f25559c2b24eff51511c6fbc4a5b (patch) | |
tree | 56ea15031ee21b8cb269d2ce6c7bb5b5235c97ed /src/mongo/db | |
parent | a818421d4f60b61ef81830af396deb1a3bb998de (diff) | |
download | mongo-2c2e6a38f559f25559c2b24eff51511c6fbc4a5b.tar.gz |
SERVER-22276 SERVER-22277 implement "j" flag in write concern apply to secondary as well as primary
Diffstat (limited to 'src/mongo/db')
62 files changed, 2341 insertions, 676 deletions
diff --git a/src/mongo/db/commands/cleanup_orphaned_cmd.cpp b/src/mongo/db/commands/cleanup_orphaned_cmd.cpp index 87c83db0740..e3d36168a1f 100644 --- a/src/mongo/db/commands/cleanup_orphaned_cmd.cpp +++ b/src/mongo/db/commands/cleanup_orphaned_cmd.cpp @@ -58,7 +58,7 @@ namespace { const int kDefaultWTimeoutMs = 60 * 1000; const WriteConcernOptions DefaultWriteConcern(WriteConcernOptions::kMajority, - WriteConcernOptions::NONE, + WriteConcernOptions::SyncMode::UNSET, kDefaultWTimeoutMs); enum CleanupResult { CleanupResult_Done, CleanupResult_Continue, CleanupResult_Error }; diff --git a/src/mongo/db/commands/write_commands/batch_executor.cpp b/src/mongo/db/commands/write_commands/batch_executor.cpp index c8d2a3e83b2..bc754e833c8 100644 --- a/src/mongo/db/commands/write_commands/batch_executor.cpp +++ b/src/mongo/db/commands/write_commands/batch_executor.cpp @@ -293,7 +293,8 @@ void WriteBatchExecutor::executeBatch(const BatchedCommandRequest& request, const WriteConcernOptions& writeConcern = _txn->getWriteConcern(); bool silentWC = writeConcern.wMode.empty() && writeConcern.wNumNodes == 0 && - writeConcern.syncMode == WriteConcernOptions::NONE; + (writeConcern.syncMode == WriteConcernOptions::SyncMode::NONE || + writeConcern.syncMode == WriteConcernOptions::SyncMode::UNSET); Timer commandTimer; diff --git a/src/mongo/db/query/find_and_modify_request_test.cpp b/src/mongo/db/query/find_and_modify_request_test.cpp index 27490715e02..761b344a96f 100644 --- a/src/mongo/db/query/find_and_modify_request_test.cpp +++ b/src/mongo/db/query/find_and_modify_request_test.cpp @@ -154,7 +154,7 @@ TEST(FindAndModifyRequest, UpdateWithSort) { TEST(FindAndModifyRequest, UpdateWithWriteConcern) { const BSONObj query(BSON("x" << 1)); const BSONObj update(BSON("y" << 1)); - const WriteConcernOptions writeConcern(2, WriteConcernOptions::FSYNC, 150); + const WriteConcernOptions writeConcern(2, WriteConcernOptions::SyncMode::FSYNC, 150); auto request = FindAndModifyRequest::makeUpdate(NamespaceString("test.user"), query, update); request.setWriteConcern(writeConcern); @@ -174,7 +174,7 @@ TEST(FindAndModifyRequest, UpdateWithFullSpec) { const BSONObj update(BSON("y" << 1)); const BSONObj sort(BSON("z" << -1)); const BSONObj field(BSON("x" << 1 << "y" << 1)); - const WriteConcernOptions writeConcern(2, WriteConcernOptions::FSYNC, 150); + const WriteConcernOptions writeConcern(2, WriteConcernOptions::SyncMode::FSYNC, 150); auto request = FindAndModifyRequest::makeUpdate(NamespaceString("test.user"), query, update); request.setFieldProjection(field); @@ -246,7 +246,7 @@ TEST(FindAndModifyRequest, RemoveWithSort) { TEST(FindAndModifyRequest, RemoveWithWriteConcern) { const BSONObj query(BSON("x" << 1)); - const WriteConcernOptions writeConcern(2, WriteConcernOptions::FSYNC, 150); + const WriteConcernOptions writeConcern(2, WriteConcernOptions::SyncMode::FSYNC, 150); auto request = FindAndModifyRequest::makeRemove(NamespaceString("test.user"), query); request.setWriteConcern(writeConcern); @@ -265,7 +265,7 @@ TEST(FindAndModifyRequest, RemoveWithFullSpec) { const BSONObj query(BSON("x" << 1)); const BSONObj sort(BSON("z" << -1)); const BSONObj field(BSON("x" << 1 << "y" << 1)); - const WriteConcernOptions writeConcern(2, WriteConcernOptions::FSYNC, 150); + const WriteConcernOptions writeConcern(2, WriteConcernOptions::SyncMode::FSYNC, 150); auto request = FindAndModifyRequest::makeRemove(NamespaceString("test.user"), query); request.setFieldProjection(field); diff --git a/src/mongo/db/range_deleter.cpp b/src/mongo/db/range_deleter.cpp index 7bb98bbc01f..69528e16b73 100644 --- a/src/mongo/db/range_deleter.cpp +++ b/src/mongo/db/range_deleter.cpp @@ -259,7 +259,7 @@ const int kWTimeoutMillis = 60 * 60 * 1000; bool _waitForMajority(OperationContext* txn, std::string* errMsg) { const WriteConcernOptions writeConcern( - WriteConcernOptions::kMajority, WriteConcernOptions::NONE, kWTimeoutMillis); + WriteConcernOptions::kMajority, WriteConcernOptions::SyncMode::UNSET, kWTimeoutMillis); repl::ReplicationCoordinator::StatusAndDuration replStatus = repl::getGlobalReplicationCoordinator()->awaitReplicationOfLastOpForClient(txn, diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index 778192b8567..5d2d8f1fe06 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -446,6 +446,7 @@ env.Library('replica_set_messages', 'handshake_args.cpp', 'is_master_response.cpp', 'member_config.cpp', + 'old_update_position_args.cpp', 'read_concern_response.cpp', 'repl_set_declare_election_winner_args.cpp', 'repl_set_heartbeat_args.cpp', diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp index 671b36ca35f..439ae5a0ba8 100644 --- a/src/mongo/db/repl/bgsync.cpp +++ b/src/mongo/db/repl/bgsync.cpp @@ -242,7 +242,7 @@ void BackgroundSync::_producerThread() { } // We need to wait until initial sync has started. - if (_replCoord->getMyLastOptime().isNull()) { + if (_replCoord->getMyLastAppliedOpTime().isNull()) { sleepsecs(1); return; } @@ -410,11 +410,11 @@ void BackgroundSync::_produce(OperationContext* txn) { log() << "Starting rollback due to " << fetcherReturnStatus; // Wait till all buffered oplog entries have drained and been applied. - auto lastApplied = _replCoord->getMyLastOptime(); + auto lastApplied = _replCoord->getMyLastAppliedOpTime(); if (lastApplied != lastOpTimeFetched) { log() << "Waiting for all operations from " << lastApplied << " until " << lastOpTimeFetched << " to be applied before starting rollback."; - while (lastOpTimeFetched > (lastApplied = _replCoord->getMyLastOptime())) { + while (lastOpTimeFetched > (lastApplied = _replCoord->getMyLastAppliedOpTime())) { sleepmillis(10); if (isStopped() || inShutdown()) { return; @@ -735,7 +735,7 @@ void BackgroundSync::start(OperationContext* txn) { _stopped = false; // reset _last fields with current oplog data - _lastOpTimeFetched = _replCoord->getMyLastOptime(); + _lastOpTimeFetched = _replCoord->getMyLastAppliedOpTime(); _lastFetchedHash = lastFetchedHash; LOG(1) << "bgsync fetch queue set to: " << _lastOpTimeFetched << " " << _lastFetchedHash; diff --git a/src/mongo/db/repl/data_replicator.cpp b/src/mongo/db/repl/data_replicator.cpp index ef296dd486a..aed481df509 100644 --- a/src/mongo/db/repl/data_replicator.cpp +++ b/src/mongo/db/repl/data_replicator.cpp @@ -525,7 +525,7 @@ DataReplicator::DataReplicator(DataReplicatorOptions opts, ReplicationExecutor* uassert(ErrorCodes::BadValue, "invalid rollback function", _opts.rollbackFn); uassert(ErrorCodes::BadValue, "invalid replSetUpdatePosition command object creation function", - _opts.prepareReplSetUpdatePositionCommandFn); + _opts.prepareOldReplSetUpdatePositionCommandFn); uassert(ErrorCodes::BadValue, "invalid getMyLastOptime function", _opts.getMyLastOptime); uassert(ErrorCodes::BadValue, "invalid setMyLastOptime function", _opts.setMyLastOptime); uassert(ErrorCodes::BadValue, "invalid setFollowerMode function", _opts.setFollowerMode); @@ -664,9 +664,9 @@ TimestampStatus DataReplicator::flushAndPause() { return TimestampStatus(_lastTimestampApplied); } -void DataReplicator::_resetState_inlock(Timestamp lastAppliedOptime) { +void DataReplicator::_resetState_inlock(Timestamp lastAppliedOpTime) { invariant(!_anyActiveHandles_inlock()); - _lastTimestampApplied = _lastTimestampFetched = lastAppliedOptime; + _lastTimestampApplied = _lastTimestampFetched = lastAppliedOpTime; _oplogBuffer.clear(); } @@ -1020,7 +1020,7 @@ void DataReplicator::_doNextActions_Steady_inlock() { if (!_reporterPaused && (!_reporter || !_reporter->getStatus().isOK())) { // TODO get reporter in good shape _reporter.reset( - new Reporter(_exec, _opts.prepareReplSetUpdatePositionCommandFn, _syncSource)); + new Reporter(_exec, _opts.prepareOldReplSetUpdatePositionCommandFn, _syncSource)); } } diff --git a/src/mongo/db/repl/data_replicator.h b/src/mongo/db/repl/data_replicator.h index bd3fd86101e..992aeb71d58 100644 --- a/src/mongo/db/repl/data_replicator.h +++ b/src/mongo/db/repl/data_replicator.h @@ -127,7 +127,7 @@ struct DataReplicatorOptions { Applier::ApplyOperationFn applierFn; RollbackFn rollbackFn; - Reporter::PrepareReplSetUpdatePositionCommandFn prepareReplSetUpdatePositionCommandFn; + Reporter::PrepareReplSetUpdatePositionCommandFn prepareOldReplSetUpdatePositionCommandFn; GetMyLastOptimeFn getMyLastOptime; SetMyLastOptimeFn setMyLastOptime; SetFollowerModeFn setFollowerMode; @@ -205,7 +205,7 @@ public: // For testing only - void _resetState_inlock(Timestamp lastAppliedOptime); + void _resetState_inlock(Timestamp lastAppliedOpTime); void _setInitialSyncStorageInterface(CollectionCloner::StorageInterface* si); private: diff --git a/src/mongo/db/repl/data_replicator_test.cpp b/src/mongo/db/repl/data_replicator_test.cpp index adff8d96782..046e73956df 100644 --- a/src/mongo/db/repl/data_replicator_test.cpp +++ b/src/mongo/db/repl/data_replicator_test.cpp @@ -179,7 +179,7 @@ protected: return _rollbackFn(txn, lastOpTimeWritten, syncSource); }; - options.prepareReplSetUpdatePositionCommandFn = + options.prepareOldReplSetUpdatePositionCommandFn = []() -> StatusWith<BSONObj> { return BSON("replSetUpdatePosition" << 1); }; options.getMyLastOptime = [this]() { return _myLastOpTime; }; options.setMyLastOptime = [this](const OpTime& opTime) { _setMyLastOptime(opTime); }; diff --git a/src/mongo/db/repl/initial_sync.cpp b/src/mongo/db/repl/initial_sync.cpp index e0ca82a6ea0..09553e3d93e 100644 --- a/src/mongo/db/repl/initial_sync.cpp +++ b/src/mongo/db/repl/initial_sync.cpp @@ -112,7 +112,7 @@ void InitialSync::_applyOplogUntil(OperationContext* txn, const OpTime& endOpTim const OpTime lastOpTime = multiApply(txn, ops); - replCoord->setMyLastOptime(lastOpTime); + replCoord->setMyLastAppliedOpTime(lastOpTime); setNewTimestamp(lastOpTime.getTimestamp()); if (inShutdown()) { @@ -122,8 +122,7 @@ void InitialSync::_applyOplogUntil(OperationContext* txn, const OpTime& endOpTim // if the last op applied was our end, return if (lastOpTime == endOpTime) { LOG(1) << "SyncTail applied " << entriesApplied << " entries (" << bytesApplied - << " bytes)" - << " and finished at opTime " << endOpTime; + << " bytes) and finished at opTime " << endOpTime; return; } } // end of while (true) diff --git a/src/mongo/db/repl/member_heartbeat_data.cpp b/src/mongo/db/repl/member_heartbeat_data.cpp index df6e5b0912d..c267a6ba8ed 100644 --- a/src/mongo/db/repl/member_heartbeat_data.cpp +++ b/src/mongo/db/repl/member_heartbeat_data.cpp @@ -42,7 +42,7 @@ namespace repl { MemberHeartbeatData::MemberHeartbeatData() : _health(-1), _authIssue(false) { _lastResponse.setState(MemberState::RS_UNKNOWN); _lastResponse.setElectionTime(Timestamp()); - _lastResponse.setOpTime(OpTime()); + _lastResponse.setAppliedOpTime(OpTime()); } void MemberHeartbeatData::setUpValues(Date_t now, @@ -60,10 +60,9 @@ void MemberHeartbeatData::setUpValues(Date_t now, if (!hbResponse.hasElectionTime()) { hbResponse.setElectionTime(_lastResponse.getElectionTime()); } - if (!hbResponse.hasOpTime()) { - hbResponse.setOpTime(_lastResponse.getOpTime()); + if (!hbResponse.hasAppliedOpTime()) { + hbResponse.setAppliedOpTime(_lastResponse.getAppliedOpTime()); } - // Log if the state changes if (_lastResponse.getState() != hbResponse.getState()) { log() << "Member " << host.toString() << " is now in state " @@ -82,7 +81,7 @@ void MemberHeartbeatData::setDownValues(Date_t now, const std::string& heartbeat _lastResponse = ReplSetHeartbeatResponse(); _lastResponse.setState(MemberState::RS_DOWN); _lastResponse.setElectionTime(Timestamp()); - _lastResponse.setOpTime(OpTime()); + _lastResponse.setAppliedOpTime(OpTime()); _lastResponse.setHbMsg(heartbeatMessage); _lastResponse.setSyncingTo(HostAndPort()); } @@ -96,7 +95,7 @@ void MemberHeartbeatData::setAuthIssue(Date_t now) { _lastResponse = ReplSetHeartbeatResponse(); _lastResponse.setState(MemberState::RS_UNKNOWN); _lastResponse.setElectionTime(Timestamp()); - _lastResponse.setOpTime(OpTime()); + _lastResponse.setAppliedOpTime(OpTime()); _lastResponse.setHbMsg(""); _lastResponse.setSyncingTo(HostAndPort()); } diff --git a/src/mongo/db/repl/member_heartbeat_data.h b/src/mongo/db/repl/member_heartbeat_data.h index d5b87a3767e..e64dcb4ef67 100644 --- a/src/mongo/db/repl/member_heartbeat_data.h +++ b/src/mongo/db/repl/member_heartbeat_data.h @@ -68,8 +68,8 @@ public: const HostAndPort& getSyncSource() const { return _lastResponse.getSyncingTo(); } - OpTime getOpTime() const { - return _lastResponse.getOpTime(); + OpTime getAppliedOpTime() const { + return _lastResponse.getAppliedOpTime(); } int getConfigVersion() const { return _lastResponse.getConfigVersion(); diff --git a/src/mongo/db/repl/minvalid.cpp b/src/mongo/db/repl/minvalid.cpp index 990d6224e50..90753cff0f4 100644 --- a/src/mongo/db/repl/minvalid.cpp +++ b/src/mongo/db/repl/minvalid.cpp @@ -39,6 +39,7 @@ #include "mongo/db/operation_context.h" #include "mongo/db/operation_context_impl.h" #include "mongo/db/repl/oplog.h" +#include "mongo/db/repl/replication_coordinator_global.h" #include "mongo/util/assert_util.h" #include "mongo/util/log.h" @@ -62,7 +63,10 @@ void clearInitialSyncFlag(OperationContext* txn) { } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "clearInitialSyncFlags", minvalidNS); + auto replCoord = repl::ReplicationCoordinator::get(txn); + OpTime time = replCoord->getMyLastAppliedOpTime(); txn->recoveryUnit()->waitUntilDurable(); + replCoord->setMyLastDurableOpTime(time); LOG(3) << "clearing initial sync flag"; } diff --git a/src/mongo/db/repl/old_update_position_args.cpp b/src/mongo/db/repl/old_update_position_args.cpp new file mode 100644 index 00000000000..1a01a1fa3e8 --- /dev/null +++ b/src/mongo/db/repl/old_update_position_args.cpp @@ -0,0 +1,154 @@ +/** + * Copyright 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/repl/old_update_position_args.h" + +#include "mongo/base/status.h" +#include "mongo/bson/util/bson_check.h" +#include "mongo/bson/util/bson_extract.h" +#include "mongo/db/jsobj.h" + +namespace mongo { +namespace repl { + + +OldUpdatePositionArgs::UpdateInfo::UpdateInfo(const OID& anRid, + const OpTime& aTs, + long long aCfgver, + long long aMemberId) + : rid(anRid), ts(aTs), cfgver(aCfgver), memberId(aMemberId) {} + +namespace { + +const std::string kCommandFieldName = "replSetUpdatePosition"; +const std::string kUpdateArrayFieldName = "optimes"; + +const std::string kLegalUpdatePositionFieldNames[] = { + kCommandFieldName, kUpdateArrayFieldName, +}; + +const std::string kMemberRIDFieldName = "_id"; +const std::string kMemberConfigFieldName = "config"; +const std::string kOpTimeFieldName = "optime"; +const std::string kMemberIdFieldName = "memberId"; +const std::string kConfigVersionFieldName = "cfgver"; + +const std::string kLegalUpdateInfoFieldNames[] = { + kMemberConfigFieldName, + kMemberRIDFieldName, + kOpTimeFieldName, + kMemberIdFieldName, + kConfigVersionFieldName, +}; + +} // namespace + +Status OldUpdatePositionArgs::initialize(const BSONObj& argsObj) { + Status status = + bsonCheckOnlyHasFields("OldUpdatePositionArgs", argsObj, kLegalUpdatePositionFieldNames); + + if (!status.isOK()) + return status; + + // grab the array of changes + BSONElement updateArray; + status = bsonExtractTypedField(argsObj, kUpdateArrayFieldName, Array, &updateArray); + if (!status.isOK()) + return status; + + // now parse each array entry into an update + BSONObjIterator i(updateArray.Obj()); + while (i.more()) { + BSONObj entry = i.next().Obj(); + status = bsonCheckOnlyHasFields("UpdateInfoArgs", entry, kLegalUpdateInfoFieldNames); + if (!status.isOK()) + return status; + + OpTime opTime; + if (entry[kOpTimeFieldName].isABSONObj()) { + // In protocol version 1, { ts: <timestamp>, t: term } + Status status = bsonExtractOpTimeField(entry, kOpTimeFieldName, &opTime); + if (!status.isOK()) + return status; + } else { + Timestamp ts; + status = bsonExtractTimestampField(entry, kOpTimeFieldName, &ts); + if (!status.isOK()) + return status; + opTime = OpTime(ts, OpTime::kUninitializedTerm); + } + if (!status.isOK()) + return status; + + // TODO(spencer): The following three fields are optional in 3.0, but should be made + // required or ignored in 3.0 + long long cfgver; + status = bsonExtractIntegerFieldWithDefault(entry, kConfigVersionFieldName, -1, &cfgver); + if (!status.isOK()) + return status; + + OID rid; + status = bsonExtractOIDFieldWithDefault(entry, kMemberRIDFieldName, OID(), &rid); + if (!status.isOK()) + return status; + + long long memberID; + status = bsonExtractIntegerFieldWithDefault(entry, kMemberIdFieldName, -1, &memberID); + if (!status.isOK()) + return status; + + _updates.push_back(UpdateInfo(rid, opTime, cfgver, memberID)); + } + + return Status::OK(); +} + +BSONObj OldUpdatePositionArgs::toBSON() const { + BSONObjBuilder builder; + // add command name + builder.append(kCommandFieldName, 1); + + // build array of updates + if (!_updates.empty()) { + BSONArrayBuilder updateArray(builder.subarrayStart(kUpdateArrayFieldName)); + for (OldUpdatePositionArgs::UpdateIterator update = updatesBegin(); update != updatesEnd(); + ++update) { + updateArray.append(BSON(kMemberRIDFieldName << update->rid << kOpTimeFieldName + << update->ts.getTimestamp() + << kConfigVersionFieldName << update->cfgver + << kMemberIdFieldName << update->memberId)); + } + updateArray.doneFast(); + } + return builder.obj(); +} + +} // namespace repl +} // namespace mongo diff --git a/src/mongo/db/repl/old_update_position_args.h b/src/mongo/db/repl/old_update_position_args.h new file mode 100644 index 00000000000..fa9d1a3ef90 --- /dev/null +++ b/src/mongo/db/repl/old_update_position_args.h @@ -0,0 +1,88 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <vector> + +#include "mongo/db/jsobj.h" +#include "mongo/db/repl/optime.h" + +namespace mongo { + +class Status; + +namespace repl { + +/** + * Arguments to the handshake command. + */ +class OldUpdatePositionArgs { +public: + struct UpdateInfo { + UpdateInfo(const OID& anRid, const OpTime& aTs, long long aCfgver, long long aMemberId); + + OID rid; + OpTime ts; + long long cfgver; + long long memberId; + }; + + typedef std::vector<UpdateInfo>::const_iterator UpdateIterator; + + /** + * Initializes this OldUpdatePositionArgs from the contents of "argsObj". + */ + Status initialize(const BSONObj& argsObj); + + /** + * Gets a begin iterator over the UpdateInfos stored in this OldUpdatePositionArgs. + */ + UpdateIterator updatesBegin() const { + return _updates.begin(); + } + + /** + * Gets an end iterator over the UpdateInfos stored in this OldUpdatePositionArgs. + */ + UpdateIterator updatesEnd() const { + return _updates.end(); + } + + /** + * Returns a BSONified version of the object. + * _updates is only included if it is not empty. + */ + BSONObj toBSON() const; + +private: + std::vector<UpdateInfo> _updates; +}; + +} // namespace repl +} // namespace mongo diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index 4c6df13e9ea..3d2e0fae885 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -219,7 +219,7 @@ public: : _newOpTime(newOpTime), _replCoord(replCoord) {} virtual void commit() { - _replCoord->setMyLastOptimeForward(_newOpTime); + _replCoord->setMyLastAppliedOpTimeForward(_newOpTime); } virtual void rollback() {} @@ -465,7 +465,7 @@ OpTime writeOpsToOplog(OperationContext* txn, const std::vector<BSONObj>& ops) { OpTime lastOptime; MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { - lastOptime = replCoord->getMyLastOptime(); + lastOptime = replCoord->getMyLastAppliedOpTime(); invariant(!ops.empty()); ScopedTransaction transaction(txn, MODE_IX); Lock::DBLock lk(txn->lockState(), "local", MODE_X); diff --git a/src/mongo/db/repl/repl_client_info.cpp b/src/mongo/db/repl/repl_client_info.cpp index b339c10b4f0..678938f2755 100644 --- a/src/mongo/db/repl/repl_client_info.cpp +++ b/src/mongo/db/repl/repl_client_info.cpp @@ -46,7 +46,7 @@ const Client::Decoration<ReplClientInfo> ReplClientInfo::forClient = void ReplClientInfo::setLastOpToSystemLastOpTime(OperationContext* txn) { ReplicationCoordinator* replCoord = repl::ReplicationCoordinator::get(txn->getServiceContext()); if (replCoord->isReplEnabled() && txn->writesAreReplicated()) { - setLastOp(replCoord->getMyLastOptime()); + setLastOp(replCoord->getMyLastAppliedOpTime()); } } diff --git a/src/mongo/db/repl/repl_set_heartbeat_response.cpp b/src/mongo/db/repl/repl_set_heartbeat_response.cpp index 8ccb6241950..8c06dc7bd7d 100644 --- a/src/mongo/db/repl/repl_set_heartbeat_response.cpp +++ b/src/mongo/db/repl/repl_set_heartbeat_response.cpp @@ -58,7 +58,8 @@ const std::string kIsReplSetFieldName = "rs"; const std::string kMemberStateFieldName = "state"; const std::string kMismatchFieldName = "mismatch"; const std::string kOkFieldName = "ok"; -const std::string kOpTimeFieldName = "opTime"; +const std::string kDurableOpTimeFieldName = "durableOpTime"; +const std::string kAppliedOpTimeFieldName = "opTime"; const std::string kPrimaryIdFieldName = "primaryId"; const std::string kReplSetFieldName = "set"; const std::string kSyncSourceFieldName = "syncingTo"; @@ -117,12 +118,15 @@ void ReplSetHeartbeatResponse::addToBSON(BSONObjBuilder* builder, bool isProtoco if (_primaryIdSet) { builder->append(kPrimaryIdFieldName, _primaryId); } - if (_opTimeSet) { + if (_durableOpTimeSet) { + _durableOpTime.append(builder, kDurableOpTimeFieldName); + } + if (_appliedOpTimeSet) { if (isProtocolVersionV1) { - _opTime.append(builder, kOpTimeFieldName); + _appliedOpTime.append(builder, kAppliedOpTimeFieldName); } else { - builder->appendDate(kOpTimeFieldName, - Date_t::fromMillisSinceEpoch(_opTime.getTimestamp().asLL())); + builder->appendDate(kAppliedOpTimeFieldName, + Date_t::fromMillisSinceEpoch(_appliedOpTime.getTimestamp().asLL())); } } } @@ -209,30 +213,39 @@ Status ReplSetHeartbeatResponse::initialize(const BSONObj& doc, long long term) return termStatus; } + Status status = bsonExtractOpTimeField(doc, kDurableOpTimeFieldName, &_durableOpTime); + if (!status.isOK()) { + if (status != ErrorCodes::NoSuchKey) { + return status; + } + } else { + _durableOpTimeSet = true; + } + // In order to support both the 3.0(V0) and 3.2(V1) heartbeats we must parse the OpTime // field based on its type. If it is a Date, we parse it as the timestamp and use // initialize's term argument to complete the OpTime type. If it is an Object, then it's // V1 and we construct an OpTime out of its nested fields. - const BSONElement opTimeElement = doc[kOpTimeFieldName]; - if (opTimeElement.eoo()) { - _opTimeSet = false; - } else if (opTimeElement.type() == bsonTimestamp) { - _opTimeSet = true; - _opTime = OpTime(opTimeElement.timestamp(), term); - } else if (opTimeElement.type() == Date) { - _opTimeSet = true; - _opTime = OpTime(Timestamp(opTimeElement.date()), term); - } else if (opTimeElement.type() == Object) { - Status status = bsonExtractOpTimeField(doc, kOpTimeFieldName, &_opTime); - _opTimeSet = true; + const BSONElement appliedOpTimeElement = doc[kAppliedOpTimeFieldName]; + if (appliedOpTimeElement.eoo()) { + _appliedOpTimeSet = false; + } else if (appliedOpTimeElement.type() == bsonTimestamp) { + _appliedOpTimeSet = true; + _appliedOpTime = OpTime(appliedOpTimeElement.timestamp(), term); + } else if (appliedOpTimeElement.type() == Date) { + _appliedOpTimeSet = true; + _appliedOpTime = OpTime(Timestamp(appliedOpTimeElement.date()), term); + } else if (appliedOpTimeElement.type() == Object) { + Status status = bsonExtractOpTimeField(doc, kAppliedOpTimeFieldName, &_appliedOpTime); + _appliedOpTimeSet = true; // since a v1 OpTime was in the response, the member must be part of a replset _isReplSet = true; } else { return Status(ErrorCodes::TypeMismatch, - str::stream() << "Expected \"" << kOpTimeFieldName + str::stream() << "Expected \"" << kAppliedOpTimeFieldName << "\" field in response to replSetHeartbeat " "command to have type Date or Timestamp, but found type " - << typeName(opTimeElement.type())); + << typeName(appliedOpTimeElement.type())); } const BSONElement electableElement = doc[kIsElectableFieldName]; @@ -274,7 +287,7 @@ Status ReplSetHeartbeatResponse::initialize(const BSONObj& doc, long long term) const BSONElement configVersionElement = doc[kConfigVersionFieldName]; // If we have an optime then we must have a configVersion - if (_opTimeSet && configVersionElement.eoo()) { + if (_appliedOpTimeSet && configVersionElement.eoo()) { return Status(ErrorCodes::NoSuchKey, str::stream() << "Response to replSetHeartbeat missing required \"" << kConfigVersionFieldName @@ -362,9 +375,14 @@ long long ReplSetHeartbeatResponse::getPrimaryId() const { return _primaryId; } -OpTime ReplSetHeartbeatResponse::getOpTime() const { - invariant(_opTimeSet); - return _opTime; +OpTime ReplSetHeartbeatResponse::getAppliedOpTime() const { + invariant(_appliedOpTimeSet); + return _appliedOpTime; +} + +OpTime ReplSetHeartbeatResponse::getDurableOpTime() const { + invariant(_durableOpTimeSet); + return _durableOpTime; } } // namespace repl diff --git a/src/mongo/db/repl/repl_set_heartbeat_response.h b/src/mongo/db/repl/repl_set_heartbeat_response.h index b3fba2a4803..2b968bbb17d 100644 --- a/src/mongo/db/repl/repl_set_heartbeat_response.h +++ b/src/mongo/db/repl/repl_set_heartbeat_response.h @@ -125,10 +125,14 @@ public: long long getTerm() const { return _term; } - bool hasOpTime() const { - return _opTimeSet; + bool hasAppliedOpTime() const { + return _appliedOpTimeSet; } - OpTime getOpTime() const; + OpTime getAppliedOpTime() const; + bool hasDurableOpTime() const { + return _durableOpTimeSet; + } + OpTime getDurableOpTime() const; /** * Sets _mismatch to true. @@ -232,9 +236,13 @@ public: _primaryIdSet = true; _primaryId = primaryId; } - void setOpTime(OpTime time) { - _opTimeSet = true; - _opTime = time; + void setAppliedOpTime(OpTime time) { + _appliedOpTimeSet = true; + _appliedOpTime = time; + } + void setDurableOpTime(OpTime time) { + _durableOpTimeSet = true; + _durableOpTime = time; } void setTerm(long long term) { _term = term; @@ -247,8 +255,11 @@ private: bool _timeSet = false; Seconds _time = Seconds(0); // Seconds since UNIX epoch. - bool _opTimeSet = false; - OpTime _opTime; + bool _appliedOpTimeSet = false; + OpTime _appliedOpTime; + + bool _durableOpTimeSet = false; + OpTime _durableOpTime; bool _electableSet = false; bool _electable = false; diff --git a/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp b/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp index 45c8dba3e1f..3c7adf479ee 100644 --- a/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp +++ b/src/mongo/db/repl/repl_set_heartbeat_response_test.cpp @@ -51,7 +51,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(false, hbResponse.hasElectionTime()); ASSERT_EQUALS(false, hbResponse.hasIsElectable()); ASSERT_EQUALS(false, hbResponse.hasTime()); - ASSERT_EQUALS(false, hbResponse.hasOpTime()); + ASSERT_EQUALS(false, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(false, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(false, hbResponse.hasConfig()); ASSERT_EQUALS(false, hbResponse.isMismatched()); ASSERT_EQUALS(false, hbResponse.isReplSet()); @@ -75,7 +76,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(false, hbResponse.hasElectionTime()); ASSERT_EQUALS(false, hbResponse.hasIsElectable()); ASSERT_EQUALS(false, hbResponse.hasTime()); - ASSERT_EQUALS(false, hbResponse.hasOpTime()); + ASSERT_EQUALS(false, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(false, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(false, hbResponse.hasConfig()); ASSERT_EQUALS(false, hbResponse.isMismatched()); ASSERT_EQUALS(false, hbResponse.isReplSet()); @@ -101,7 +103,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(false, hbResponse.hasElectionTime()); ASSERT_EQUALS(false, hbResponse.hasIsElectable()); ASSERT_EQUALS(false, hbResponse.hasTime()); - ASSERT_EQUALS(false, hbResponse.hasOpTime()); + ASSERT_EQUALS(false, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(false, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(false, hbResponse.hasConfig()); ASSERT_EQUALS(false, hbResponse.isMismatched()); ASSERT_EQUALS(false, hbResponse.isReplSet()); @@ -128,7 +131,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(true, hbResponse.hasElectionTime()); ASSERT_EQUALS(false, hbResponse.hasIsElectable()); ASSERT_EQUALS(false, hbResponse.hasTime()); - ASSERT_EQUALS(false, hbResponse.hasOpTime()); + ASSERT_EQUALS(false, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(false, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(false, hbResponse.hasConfig()); ASSERT_EQUALS(false, hbResponse.isMismatched()); ASSERT_EQUALS(false, hbResponse.isReplSet()); @@ -150,14 +154,15 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(Status::OK(), initializeResult); ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toString()); - // set opTime - hbResponse.setOpTime(OpTime(Timestamp(10), 0)); + // set durableOpTime + hbResponse.setDurableOpTime(OpTime(Timestamp(10), 0)); ++fieldsSet; ASSERT_EQUALS(false, hbResponse.hasState()); ASSERT_EQUALS(true, hbResponse.hasElectionTime()); ASSERT_EQUALS(false, hbResponse.hasIsElectable()); ASSERT_EQUALS(false, hbResponse.hasTime()); - ASSERT_EQUALS(true, hbResponse.hasOpTime()); + ASSERT_EQUALS(true, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(false, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(false, hbResponse.hasConfig()); ASSERT_EQUALS(false, hbResponse.isMismatched()); ASSERT_EQUALS(false, hbResponse.isReplSet()); @@ -167,7 +172,7 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo()); ASSERT_EQUALS(1, hbResponse.getConfigVersion()); ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime()); - ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime()); hbResponseObj = hbResponse.toBSON(false); ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields()); @@ -175,7 +180,41 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS("", hbResponseObj["hbmsg"].String()); ASSERT_EQUALS(1, hbResponseObj["v"].Number()); ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp()); - ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp()); + + initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0); + ASSERT_EQUALS(Status::OK(), initializeResult); + ASSERT_EQUALS(hbResponseObj.toString(), hbResponseObjRoundTripChecker.toBSON(false).toString()); + + // set appliedOpTime + hbResponse.setAppliedOpTime(OpTime(Timestamp(50), 0)); + ++fieldsSet; + ASSERT_EQUALS(false, hbResponse.hasState()); + ASSERT_EQUALS(true, hbResponse.hasElectionTime()); + ASSERT_EQUALS(false, hbResponse.hasIsElectable()); + ASSERT_EQUALS(false, hbResponse.hasTime()); + ASSERT_EQUALS(true, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime()); + ASSERT_EQUALS(false, hbResponse.hasConfig()); + ASSERT_EQUALS(false, hbResponse.isMismatched()); + ASSERT_EQUALS(false, hbResponse.isReplSet()); + ASSERT_EQUALS(false, hbResponse.isStateDisagreement()); + ASSERT_EQUALS("rs0", hbResponse.getReplicaSetName()); + ASSERT_EQUALS("", hbResponse.getHbMsg()); + ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo()); + ASSERT_EQUALS(1, hbResponse.getConfigVersion()); + ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime()); + + hbResponseObj = hbResponse.toBSON(false); + ASSERT_EQUALS(fieldsSet, hbResponseObj.nFields()); + ASSERT_EQUALS("rs0", hbResponseObj["set"].String()); + ASSERT_EQUALS("", hbResponseObj["hbmsg"].String()); + ASSERT_EQUALS(1, hbResponseObj["v"].Number()); + ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp()); initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0); ASSERT_EQUALS(Status::OK(), initializeResult); @@ -188,7 +227,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(true, hbResponse.hasElectionTime()); ASSERT_EQUALS(false, hbResponse.hasIsElectable()); ASSERT_EQUALS(true, hbResponse.hasTime()); - ASSERT_EQUALS(true, hbResponse.hasOpTime()); + ASSERT_EQUALS(true, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(false, hbResponse.hasConfig()); ASSERT_EQUALS(false, hbResponse.isMismatched()); ASSERT_EQUALS(false, hbResponse.isReplSet()); @@ -198,7 +238,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo()); ASSERT_EQUALS(1, hbResponse.getConfigVersion()); ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime()); - ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime()); ASSERT_EQUALS(Seconds(10), hbResponse.getTime()); hbResponseObj = hbResponse.toBSON(false); @@ -207,7 +248,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS("", hbResponseObj["hbmsg"].String()); ASSERT_EQUALS(1, hbResponseObj["v"].Number()); ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp()); - ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp()); ASSERT_EQUALS(10, hbResponseObj["time"].numberLong()); initializeResult = hbResponseObjRoundTripChecker.initialize(hbResponseObj, 0); @@ -221,7 +263,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(true, hbResponse.hasElectionTime()); ASSERT_EQUALS(true, hbResponse.hasIsElectable()); ASSERT_EQUALS(true, hbResponse.hasTime()); - ASSERT_EQUALS(true, hbResponse.hasOpTime()); + ASSERT_EQUALS(true, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(false, hbResponse.hasConfig()); ASSERT_EQUALS(false, hbResponse.isMismatched()); ASSERT_EQUALS(false, hbResponse.isReplSet()); @@ -231,7 +274,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo()); ASSERT_EQUALS(1, hbResponse.getConfigVersion()); ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime()); - ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime()); ASSERT_EQUALS(Seconds(10), hbResponse.getTime()); ASSERT_EQUALS(true, hbResponse.isElectable()); @@ -241,7 +285,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS("", hbResponseObj["hbmsg"].String()); ASSERT_EQUALS(1, hbResponseObj["v"].Number()); ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp()); - ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp()); ASSERT_EQUALS(10, hbResponseObj["time"].numberLong()); ASSERT_EQUALS(true, hbResponseObj["e"].trueValue()); @@ -257,7 +302,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(true, hbResponse.hasElectionTime()); ASSERT_EQUALS(true, hbResponse.hasIsElectable()); ASSERT_EQUALS(true, hbResponse.hasTime()); - ASSERT_EQUALS(true, hbResponse.hasOpTime()); + ASSERT_EQUALS(true, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(true, hbResponse.hasConfig()); ASSERT_EQUALS(false, hbResponse.isMismatched()); ASSERT_EQUALS(false, hbResponse.isReplSet()); @@ -267,7 +313,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo()); ASSERT_EQUALS(1, hbResponse.getConfigVersion()); ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime()); - ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime()); ASSERT_EQUALS(Seconds(10), hbResponse.getTime()); ASSERT_EQUALS(true, hbResponse.isElectable()); ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString()); @@ -278,7 +325,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS("", hbResponseObj["hbmsg"].String()); ASSERT_EQUALS(1, hbResponseObj["v"].Number()); ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp()); - ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp()); ASSERT_EQUALS(10, hbResponseObj["time"].numberLong()); ASSERT_EQUALS(true, hbResponseObj["e"].trueValue()); ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString()); @@ -294,7 +342,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(true, hbResponse.hasElectionTime()); ASSERT_EQUALS(true, hbResponse.hasIsElectable()); ASSERT_EQUALS(true, hbResponse.hasTime()); - ASSERT_EQUALS(true, hbResponse.hasOpTime()); + ASSERT_EQUALS(true, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(true, hbResponse.hasConfig()); ASSERT_EQUALS(false, hbResponse.isMismatched()); ASSERT_EQUALS(false, hbResponse.isReplSet()); @@ -306,7 +355,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo()); ASSERT_EQUALS(1, hbResponse.getConfigVersion()); ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime()); - ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime()); ASSERT_EQUALS(Seconds(10), hbResponse.getTime()); ASSERT_EQUALS(true, hbResponse.isElectable()); ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString()); @@ -317,7 +367,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS("", hbResponseObj["hbmsg"].String()); ASSERT_EQUALS(1, hbResponseObj["v"].Number()); ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp()); - ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp()); ASSERT_EQUALS(10, hbResponseObj["time"].numberLong()); ASSERT_EQUALS(true, hbResponseObj["e"].trueValue()); ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString()); @@ -334,7 +385,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(true, hbResponse.hasElectionTime()); ASSERT_EQUALS(true, hbResponse.hasIsElectable()); ASSERT_EQUALS(true, hbResponse.hasTime()); - ASSERT_EQUALS(true, hbResponse.hasOpTime()); + ASSERT_EQUALS(true, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(true, hbResponse.hasConfig()); ASSERT_EQUALS(false, hbResponse.isMismatched()); ASSERT_EQUALS(false, hbResponse.isReplSet()); @@ -346,7 +398,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo()); ASSERT_EQUALS(1, hbResponse.getConfigVersion()); ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime()); - ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime()); ASSERT_EQUALS(Seconds(10), hbResponse.getTime()); ASSERT_EQUALS(true, hbResponse.isElectable()); ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString()); @@ -357,7 +410,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS("", hbResponseObj["hbmsg"].String()); ASSERT_EQUALS(1, hbResponseObj["v"].Number()); ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp()); - ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp()); ASSERT_EQUALS(10, hbResponseObj["time"].numberLong()); ASSERT_EQUALS(true, hbResponseObj["e"].trueValue()); ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString()); @@ -376,7 +430,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(true, hbResponse.hasElectionTime()); ASSERT_EQUALS(true, hbResponse.hasIsElectable()); ASSERT_EQUALS(true, hbResponse.hasTime()); - ASSERT_EQUALS(true, hbResponse.hasOpTime()); + ASSERT_EQUALS(true, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(true, hbResponse.hasConfig()); ASSERT_EQUALS(false, hbResponse.isMismatched()); ASSERT_EQUALS(true, hbResponse.isReplSet()); @@ -388,7 +443,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(HostAndPort(), hbResponse.getSyncingTo()); ASSERT_EQUALS(1, hbResponse.getConfigVersion()); ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime()); - ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime()); ASSERT_EQUALS(Seconds(10), hbResponse.getTime()); ASSERT_EQUALS(true, hbResponse.isElectable()); ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString()); @@ -399,7 +455,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS("", hbResponseObj["hbmsg"].String()); ASSERT_EQUALS(1, hbResponseObj["v"].Number()); ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp()); - ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp()); ASSERT_EQUALS(10, hbResponseObj["time"].numberLong()); ASSERT_EQUALS(true, hbResponseObj["e"].trueValue()); ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString()); @@ -419,7 +476,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(true, hbResponse.hasElectionTime()); ASSERT_EQUALS(true, hbResponse.hasIsElectable()); ASSERT_EQUALS(true, hbResponse.hasTime()); - ASSERT_EQUALS(true, hbResponse.hasOpTime()); + ASSERT_EQUALS(true, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(true, hbResponse.hasConfig()); ASSERT_EQUALS(false, hbResponse.isMismatched()); ASSERT_EQUALS(true, hbResponse.isReplSet()); @@ -431,7 +489,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(HostAndPort("syncTarget"), hbResponse.getSyncingTo()); ASSERT_EQUALS(1, hbResponse.getConfigVersion()); ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime()); - ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime()); ASSERT_EQUALS(Seconds(10), hbResponse.getTime()); ASSERT_EQUALS(true, hbResponse.isElectable()); ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString()); @@ -442,7 +501,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS("", hbResponseObj["hbmsg"].String()); ASSERT_EQUALS(1, hbResponseObj["v"].Number()); ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp()); - ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp()); ASSERT_EQUALS(10, hbResponseObj["time"].numberLong()); ASSERT_EQUALS(true, hbResponseObj["e"].trueValue()); ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString()); @@ -462,7 +522,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(true, hbResponse.hasElectionTime()); ASSERT_EQUALS(true, hbResponse.hasIsElectable()); ASSERT_EQUALS(true, hbResponse.hasTime()); - ASSERT_EQUALS(true, hbResponse.hasOpTime()); + ASSERT_EQUALS(true, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(true, hbResponse.hasConfig()); ASSERT_EQUALS(false, hbResponse.isMismatched()); ASSERT_EQUALS(true, hbResponse.isReplSet()); @@ -474,7 +535,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(HostAndPort("syncTarget"), hbResponse.getSyncingTo()); ASSERT_EQUALS(1, hbResponse.getConfigVersion()); ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime()); - ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime()); ASSERT_EQUALS(Seconds(10), hbResponse.getTime()); ASSERT_EQUALS(true, hbResponse.isElectable()); ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString()); @@ -485,7 +547,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS("lub dub", hbResponseObj["hbmsg"].String()); ASSERT_EQUALS(1, hbResponseObj["v"].Number()); ASSERT_EQUALS(Timestamp(10, 0), hbResponseObj["electionTime"].timestamp()); - ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 50), hbResponseObj["opTime"].timestamp()); + ASSERT_EQUALS(Timestamp(0, 10), hbResponseObj["durableOpTime"]["ts"].timestamp()); ASSERT_EQUALS(10, hbResponseObj["time"].numberLong()); ASSERT_EQUALS(true, hbResponseObj["e"].trueValue()); ASSERT_EQUALS(config.toBSON().toString(), hbResponseObj["config"].Obj().toString()); @@ -505,7 +568,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(true, hbResponse.hasElectionTime()); ASSERT_EQUALS(true, hbResponse.hasIsElectable()); ASSERT_EQUALS(true, hbResponse.hasTime()); - ASSERT_EQUALS(true, hbResponse.hasOpTime()); + ASSERT_EQUALS(true, hbResponse.hasDurableOpTime()); + ASSERT_EQUALS(true, hbResponse.hasAppliedOpTime()); ASSERT_EQUALS(true, hbResponse.hasConfig()); ASSERT_EQUALS(true, hbResponse.isMismatched()); ASSERT_EQUALS(true, hbResponse.isReplSet()); @@ -517,7 +581,8 @@ TEST(ReplSetHeartbeatResponse, DefaultConstructThenSlowlyBuildToFullObj) { ASSERT_EQUALS(HostAndPort("syncTarget"), hbResponse.getSyncingTo()); ASSERT_EQUALS(1, hbResponse.getConfigVersion()); ASSERT_EQUALS(Timestamp(10, 0), hbResponse.getElectionTime()); - ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 10), 0), hbResponse.getDurableOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(0, 50), 0), hbResponse.getAppliedOpTime()); ASSERT_EQUALS(Seconds(10), hbResponse.getTime()); ASSERT_EQUALS(true, hbResponse.isElectable()); ASSERT_EQUALS(config.toBSON().toString(), hbResponse.getConfig().toBSON().toString()); @@ -557,7 +622,23 @@ TEST(ReplSetHeartbeatResponse, InitializeWrongTimeType) { result.reason()); } -TEST(ReplSetHeartbeatResponse, InitializeWrongOpTimeType) { +TEST(ReplSetHeartbeatResponse, InitializeWrongDurableOpTimeType) { + ReplSetHeartbeatResponse hbResponse; + BSONObj initializerObj = BSON("ok" << 1.0 << "durableOpTime" + << "hello"); + Status result = hbResponse.initialize(initializerObj, 0); + ASSERT_EQUALS(ErrorCodes::TypeMismatch, result); + ASSERT_EQUALS("\"durableOpTime\" had the wrong type. Expected Object, found String", + result.reason()); + + BSONObj initializerObj2 = BSON("ok" << 1.0 << "durableOpTime" << OpTime().getTimestamp()); + Status result2 = hbResponse.initialize(initializerObj2, 0); + ASSERT_EQUALS(ErrorCodes::TypeMismatch, result2); + ASSERT_EQUALS("\"durableOpTime\" had the wrong type. Expected Object, found Timestamp", + result2.reason()); +} + +TEST(ReplSetHeartbeatResponse, InitializeWrongAppliedOpTimeType) { ReplSetHeartbeatResponse hbResponse; BSONObj initializerObj = BSON("ok" << 1.0 << "opTime" << "hello"); @@ -719,7 +800,7 @@ TEST(ReplSetHeartbeatResponse, InitializeBothOpTimeTypesSameResult) { result = hbResponseTimestamp.initialize(initializerTimestamp.obj(), 0); ASSERT_EQUALS(Status::OK(), result); - ASSERT_EQUALS(hbResponseTimestamp.getOpTime(), hbResponseTimestamp.getOpTime()); + ASSERT_EQUALS(hbResponseTimestamp.getAppliedOpTime(), hbResponseTimestamp.getAppliedOpTime()); } TEST(ReplSetHeartbeatResponse, NoConfigStillInitializing) { diff --git a/src/mongo/db/repl/repl_set_html_summary.cpp b/src/mongo/db/repl/repl_set_html_summary.cpp index 821e8258d2e..218dff908fd 100644 --- a/src/mongo/db/repl/repl_set_html_summary.cpp +++ b/src/mongo/db/repl/repl_set_html_summary.cpp @@ -185,8 +185,9 @@ const std::string ReplSetHtmlSummary::toHtmlString() const { } memberTable << td(grey(memberHB.getLastHeartbeatMsg(), !up)); // TODO(dannenberg): change timestamp to optime in V1 - memberTable << td( - memberHB.getLastHeartbeat() == Date_t() ? "?" : memberHB.getOpTime().toString()); + memberTable << td(memberHB.getLastHeartbeat() == Date_t() + ? "?" + : memberHB.getAppliedOpTime().toString()); } memberTable << _tr(); } @@ -200,7 +201,7 @@ const std::string ReplSetHtmlSummary::toHtmlString() const { const MemberConfig& selfConfig = _config.getMemberAt(_selfIndex); if (_primaryIndex >= 0 && _primaryIndex != _selfIndex && !selfConfig.isArbiter()) { - int lag = _hbData[_primaryIndex].getOpTime().getTimestamp().getSecs() - + int lag = _hbData[_primaryIndex].getAppliedOpTime().getTimestamp().getSecs() - _selfOptime.getTimestamp().getSecs(); s << tr("Lag: ", str::stream() << lag << " secs"); } diff --git a/src/mongo/db/repl/replica_set_config.cpp b/src/mongo/db/repl/replica_set_config.cpp index 2e83b80a000..5f9220918fd 100644 --- a/src/mongo/db/repl/replica_set_config.cpp +++ b/src/mongo/db/repl/replica_set_config.cpp @@ -61,20 +61,23 @@ const std::string kMembersFieldName = "members"; const std::string kSettingsFieldName = "settings"; const std::string kStepDownCheckWriteConcernModeName = "$stepDownCheck"; const std::string kProtocolVersionFieldName = "protocolVersion"; +const std::string kWriteConcernMajorityJournalDefaultFieldName = + "writeConcernMajorityJournalDefault"; const std::string kLegalConfigTopFieldNames[] = {kIdFieldName, ReplicaSetConfig::kVersionFieldName, kMembersFieldName, kSettingsFieldName, kProtocolVersionFieldName, - ReplicaSetConfig::kConfigServerFieldName}; + ReplicaSetConfig::kConfigServerFieldName, + kWriteConcernMajorityJournalDefaultFieldName}; -const std::string kElectionTimeoutFieldName = "electionTimeoutMillis"; -const std::string kHeartbeatIntervalFieldName = "heartbeatIntervalMillis"; -const std::string kHeartbeatTimeoutFieldName = "heartbeatTimeoutSecs"; const std::string kChainingAllowedFieldName = "chainingAllowed"; +const std::string kElectionTimeoutFieldName = "electionTimeoutMillis"; const std::string kGetLastErrorDefaultsFieldName = "getLastErrorDefaults"; const std::string kGetLastErrorModesFieldName = "getLastErrorModes"; +const std::string kHeartbeatIntervalFieldName = "heartbeatIntervalMillis"; +const std::string kHeartbeatTimeoutFieldName = "heartbeatTimeoutSecs"; } // namespace @@ -158,6 +161,16 @@ Status ReplicaSetConfig::_initialize(const BSONObj& cfg, bool forInitiate, bool } // + // Parse writeConcernMajorityJournalDefault + // + status = bsonExtractBooleanFieldWithDefault(cfg, + kWriteConcernMajorityJournalDefaultFieldName, + _protocolVersion == 1, + &_writeConcernMajorityJournalDefault); + if (!status.isOK()) + return status; + + // // Parse settings // BSONElement settingsElement; @@ -454,6 +467,12 @@ Status ReplicaSetConfig::validate() const { "Nodes being used for config servers must be started with the " "--configsvr flag"); } + if (!_writeConcernMajorityJournalDefault) { + return Status(ErrorCodes::BadValue, + str::stream() << kWriteConcernMajorityJournalDefaultFieldName + << " must be true in replica set configurations being " + "used for config servers"); + } } else if (serverGlobalParams.configsvr) { return Status(ErrorCodes::BadValue, "Nodes started with the --configsvr flag must have configsvr:true in " @@ -616,8 +635,20 @@ BSONObj ReplicaSetConfig::toBSON() const { configBuilder.append(kConfigServerFieldName, _configServer); } + // Only include writeConcernMajorityJournalDefault if it is not the default version for this + // ProtocolVersion to prevent breaking cross version-3.2.1 compatibilty of ReplicaSetConfigs. if (_protocolVersion > 0) { configBuilder.append(kProtocolVersionFieldName, _protocolVersion); + // Only include writeConcernMajorityJournalDefault if it is not the default version for this + // ProtocolVersion to prevent breaking cross version-3.2.1 compatibilty of + // ReplicaSetConfigs. + if (!_writeConcernMajorityJournalDefault) { + configBuilder.append(kWriteConcernMajorityJournalDefaultFieldName, + _writeConcernMajorityJournalDefault); + } + } else if (_writeConcernMajorityJournalDefault) { + configBuilder.append(kWriteConcernMajorityJournalDefaultFieldName, + _writeConcernMajorityJournalDefault); } BSONArrayBuilder members(configBuilder.subarrayStart(kMembersFieldName)); diff --git a/src/mongo/db/repl/replica_set_config.h b/src/mongo/db/repl/replica_set_config.h index b02a3feba68..651c4e904da 100644 --- a/src/mongo/db/repl/replica_set_config.h +++ b/src/mongo/db/repl/replica_set_config.h @@ -228,6 +228,14 @@ public: } /** + * Returns whether or not majority write concerns should implicitly journal, if j has not been + * explicitly set. + */ + bool getWriteConcernMajorityShouldJournal() const { + return _writeConcernMajorityJournalDefault; + } + + /** * Returns true if this replica set is for use as a config server replica set. */ bool isConfigServer() const { @@ -323,6 +331,7 @@ private: Milliseconds _heartbeatInterval = kDefaultHeartbeatInterval; Seconds _heartbeatTimeoutPeriod = kDefaultHeartbeatTimeoutPeriod; bool _chainingAllowed = kDefaultChainingAllowed; + bool _writeConcernMajorityJournalDefault = false; int _majorityVoteCount = 0; int _writeMajority = 0; int _totalVotingMembers = 0; diff --git a/src/mongo/db/repl/replica_set_config_test.cpp b/src/mongo/db/repl/replica_set_config_test.cpp index 4e264f34046..4749a5d4168 100644 --- a/src/mongo/db/repl/replica_set_config_test.cpp +++ b/src/mongo/db/repl/replica_set_config_test.cpp @@ -76,6 +76,7 @@ TEST(ReplicaSetConfig, ParseMinimalConfigAndCheckDefaults) { ASSERT_EQUALS(ReplicaSetConfig::kDefaultElectionTimeoutPeriod, config.getElectionTimeoutPeriod()); ASSERT_TRUE(config.isChainingAllowed()); + ASSERT_FALSE(config.getWriteConcernMajorityShouldJournal()); ASSERT_FALSE(config.isConfigServer()); ASSERT_EQUALS(0, config.getProtocolVersion()); } @@ -104,6 +105,7 @@ TEST(ReplicaSetConfig, ParseLargeConfigAndCheckAccessors) { ASSERT_EQUALS(0, config.getDefaultWriteConcern().wNumNodes); ASSERT_EQUALS("majority", config.getDefaultWriteConcern().wMode); ASSERT_FALSE(config.isChainingAllowed()); + ASSERT_TRUE(config.getWriteConcernMajorityShouldJournal()); ASSERT_FALSE(config.isConfigServer()); ASSERT_EQUALS(Seconds(5), config.getHeartbeatInterval()); ASSERT_EQUALS(Seconds(120), config.getHeartbeatTimeoutPeriod()); @@ -975,7 +977,7 @@ TEST(ReplicaSetConfig, toBSONRoundTripAbilityLarge) { ASSERT_OK(configA.initialize(BSON( "_id" << "asdf" - << "version" << 9 << "members" + << "version" << 9 << "writeConcernMajorityJournalDefault" << true << "members" << BSON_ARRAY(BSON("_id" << 0 << "host" << "localhost:12345" << "arbiterOnly" << true << "votes" << 1) @@ -993,14 +995,14 @@ TEST(ReplicaSetConfig, toBSONRoundTripAbilityLarge) { << BSON("coast" << "west" << "hdd" - << "true"))) << "protocolVersion" << 0 - << "settings" << BSON("heartbeatIntervalMillis" - << 5000 << "heartbeatTimeoutSecs" << 20 << "electionTimeoutMillis" - << 4 << "chainingAllowd" << true << "getLastErrorDefaults" - << BSON("w" - << "majority") << "getLastErrorModes" - << BSON("disks" << BSON("ssd" << 1 << "hdd" << 1) << "coasts" - << BSON("coast" << 2)))))); + << "true"))) << "protocolVersion" << 0 << "settings" + + << BSON("heartbeatIntervalMillis" + << 5000 << "heartbeatTimeoutSecs" << 20 << "electionTimeoutMillis" << 4 + << "chainingAllowd" << true << "getLastErrorDefaults" << BSON("w" + << "majority") + << "getLastErrorModes" << BSON("disks" << BSON("ssd" << 1 << "hdd" << 1) << "coasts" + << BSON("coast" << 2)))))); BSONObj configObjA = configA.toBSON(); // Ensure a protocolVersion does not show up if it is 0 to maintain cross version compatibility. ASSERT_FALSE(configObjA.hasField("protocolVersion")); @@ -1197,6 +1199,23 @@ TEST(ReplicaSetConfig, CheckConfigServerCantHaveSlaveDelay) { ASSERT_STRING_CONTAINS(status.reason(), "cannot have a non-zero slaveDelay"); } +TEST(ReplicaSetConfig, CheckConfigServerMustHaveTrueForWriteConcernMajorityJournalDefault) { + serverGlobalParams.configsvr = true; + ON_BLOCK_EXIT([&] { serverGlobalParams.configsvr = false; }); + ReplicaSetConfig configA; + ASSERT_OK( + configA.initialize(BSON("_id" + << "rs0" + << "protocolVersion" << 1 << "version" << 1 << "configsvr" << true + << "members" << BSON_ARRAY(BSON("_id" << 0 << "host" + << "localhost:12345") + << BSON("_id" << 1 << "host" + << "localhost:54321")) + << "writeConcernMajorityJournalDefault" << false))); + Status status = configA.validate(); + ASSERT_EQUALS(ErrorCodes::BadValue, status); + ASSERT_STRING_CONTAINS(status.reason(), " must be true in replica set configurations being "); +} TEST(ReplicaSetConfig, GetPriorityTakeoverDelay) { ReplicaSetConfig configA; @@ -1254,6 +1273,50 @@ TEST(ReplicaSetConfig, GetPriorityTakeoverDelay) { ASSERT_EQUALS(Milliseconds(1000), configB.getPriorityTakeoverDelay(4)); } +TEST(ReplicaSetConfig, ConfirmDefaultValuesOfAndAbilityToSetWriteConcernMajorityJournalDefault) { + // PV0, should default to false. + ReplicaSetConfig config; + ASSERT_OK(config.initialize(BSON("_id" + << "rs0" + << "version" << 1 << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "localhost:12345"))))); + ASSERT_OK(config.validate()); + ASSERT_FALSE(config.getWriteConcernMajorityShouldJournal()); + ASSERT_FALSE(config.toBSON().hasField("writeConcernMajorityJournalDefault")); + + // Should be able to set it true in PV0. + ASSERT_OK(config.initialize(BSON("_id" + << "rs0" + << "version" << 1 << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "localhost:12345")) + << "writeConcernMajorityJournalDefault" << true))); + ASSERT_OK(config.validate()); + ASSERT_TRUE(config.getWriteConcernMajorityShouldJournal()); + ASSERT_TRUE(config.toBSON().hasField("writeConcernMajorityJournalDefault")); + + // PV1, should default to true. + ASSERT_OK(config.initialize(BSON("_id" + << "rs0" + << "protocolVersion" << 1 << "version" << 1 << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "localhost:12345"))))); + ASSERT_OK(config.validate()); + ASSERT_TRUE(config.getWriteConcernMajorityShouldJournal()); + ASSERT_FALSE(config.toBSON().hasField("writeConcernMajorityJournalDefault")); + + // Should be able to set it false in PV1. + ASSERT_OK(config.initialize(BSON("_id" + << "rs0" + << "protocolVersion" << 1 << "version" << 1 << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "localhost:12345")) + << "writeConcernMajorityJournalDefault" << false))); + ASSERT_OK(config.validate()); + ASSERT_FALSE(config.getWriteConcernMajorityShouldJournal()); + ASSERT_TRUE(config.toBSON().hasField("writeConcernMajorityJournalDefault")); +} } // namespace } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h index 1d31b059bb7..630b9277b48 100644 --- a/src/mongo/db/repl/replication_coordinator.h +++ b/src/mongo/db/repl/replication_coordinator.h @@ -67,6 +67,7 @@ namespace repl { class BackgroundSync; class HandshakeArgs; class IsMasterResponse; +class OldUpdatePositionArgs; class OplogReader; class OpTime; class ReadConcernArgs; @@ -293,11 +294,22 @@ public: * * The new value of "opTime" must be no less than any prior value passed to this method, and * it is the caller's job to properly synchronize this behavior. The exception to this rule - * is that after calls to resetLastOpTimeFromOplog(), the minimum acceptable value for + * is that after calls to resetLastOpTimesFromOplog(), the minimum acceptable value for * "opTime" is reset based on the contents of the oplog, and may go backwards due to * rollback. */ - virtual void setMyLastOptime(const OpTime& opTime) = 0; + virtual void setMyLastAppliedOpTime(const OpTime& opTime) = 0; + + /** + * Updates our internal tracking of the last OpTime durable to this node. + * + * The new value of "opTime" must be no less than any prior value passed to this method, and + * it is the caller's job to properly synchronize this behavior. The exception to this rule + * is that after calls to resetLastOpTimesFromOplog(), the minimum acceptable value for + * "opTime" is reset based on the contents of the oplog, and may go backwards due to + * rollback. + */ + virtual void setMyLastDurableOpTime(const OpTime& opTime) = 0; /** * Updates our internal tracking of the last OpTime applied to this node, but only @@ -307,12 +319,22 @@ public: * This function is used by logOp() on a primary, since the ops in the oplog do not * necessarily commit in sequential order. */ - virtual void setMyLastOptimeForward(const OpTime& opTime) = 0; + virtual void setMyLastAppliedOpTimeForward(const OpTime& opTime) = 0; + + /** + * Updates our internal tracking of the last OpTime durable to this node, but only + * if the supplied optime is later than the current last OpTime known to the replication + * coordinator. + * + * This function is used by logOp() on a primary, since the ops in the oplog do not + * necessarily commit in sequential order. + */ + virtual void setMyLastDurableOpTimeForward(const OpTime& opTime) = 0; /** * Same as above, but used during places we need to zero our last optime. */ - virtual void resetMyLastOptime() = 0; + virtual void resetMyLastOpTimes() = 0; /** * Updates our the message we include in heartbeat responses. @@ -320,9 +342,14 @@ public: virtual void setMyHeartbeatMessage(const std::string& msg) = 0; /** - * Returns the last optime recorded by setMyLastOptime. + * Returns the last optime recorded by setMyLastAppliedOpTime. */ - virtual OpTime getMyLastOptime() const = 0; + virtual OpTime getMyLastAppliedOpTime() const = 0; + + /** + * Returns the last optime recorded by setMyLastDurableOpTime. + */ + virtual OpTime getMyLastDurableOpTime() const = 0; /** * Waits until the optime of the current node is at least the opTime specified in @@ -408,6 +435,7 @@ public: * * The returned bool indicates whether or not the command was created. */ + virtual bool prepareOldReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) = 0; virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) = 0; /** @@ -573,7 +601,12 @@ public: * were applied. * "configVersion" will be populated with our config version if and only if we return * InvalidReplicaSetConfig. + * + * The OldUpdatePositionArgs version provides support for the pre-3.2.2 format of + * UpdatePositionArgs. */ + virtual Status processReplSetUpdatePosition(const OldUpdatePositionArgs& updates, + long long* configVersion) = 0; virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates, long long* configVersion) = 0; @@ -594,8 +627,9 @@ public: /** * Returns a vector of members that have applied the operation with OpTime 'op'. + * "durablyWritten" indicates whether the operation has to be durably applied. */ - virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op) = 0; + virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op, bool durablyWritten) = 0; /** * Returns a vector of the members other than ourself in the replica set, as specified in @@ -620,10 +654,10 @@ public: virtual Status checkReplEnabledForCommand(BSONObjBuilder* result) = 0; /** - * Loads the optime from the last op in the oplog into the coordinator's lastOpApplied - * value. + * Loads the optime from the last op in the oplog into the coordinator's lastAppliedOpTime and + * lastDurableOpTime values. */ - virtual void resetLastOpTimeFromOplog(OperationContext* txn) = 0; + virtual void resetLastOpTimesFromOplog(OperationContext* txn) = 0; /** * Returns the OpTime of the latest replica set-committed op known to this server. @@ -661,6 +695,12 @@ public: virtual bool isV1ElectionProtocol() = 0; /** + * Returns whether or not majority write concerns should implicitly journal, if j has not been + * explicitly set. + */ + virtual bool getWriteConcernMajorityShouldJournal() = 0; + + /** * Writes into 'output' all the information needed to generate a summary of the current * replication state for use by the web interface. */ @@ -739,6 +779,13 @@ public: */ virtual size_t getNumUncommittedSnapshots() = 0; + /** + * Returns a new WriteConcernOptions based on "wc" but with UNSET syncMode reset to JOURNAL or + * NONE based on our rsConfig. + */ + virtual WriteConcernOptions populateUnsetWriteConcernOptionsSyncMode( + WriteConcernOptions wc) = 0; + protected: ReplicationCoordinator(); }; diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 50ae67c8dae..ab0f3c8336a 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -47,6 +47,7 @@ #include "mongo/db/repl/handshake_args.h" #include "mongo/db/repl/is_master_response.h" #include "mongo/db/repl/last_vote.h" +#include "mongo/db/repl/old_update_position_args.h" #include "mongo/db/repl/read_concern_args.h" #include "mongo/db/repl/read_concern_response.h" #include "mongo/db/repl/repl_client_info.h" @@ -64,6 +65,7 @@ #include "mongo/db/repl/update_position_args.h" #include "mongo/db/repl/vote_requester.h" #include "mongo/db/server_options.h" +#include "mongo/db/write_concern.h" #include "mongo/db/write_concern_options.h" #include "mongo/executor/connection_pool_stats.h" #include "mongo/rpc/metadata/repl_set_metadata.h" @@ -161,17 +163,17 @@ DataReplicatorOptions createDataReplicatorOptions(ReplicationCoordinator* replCo options.applierFn = [](OperationContext*, const BSONObj&) -> Status { return Status::OK(); }; options.rollbackFn = [](OperationContext*, const OpTime&, const HostAndPort&) { return Status::OK(); }; - options.prepareReplSetUpdatePositionCommandFn = [replCoord]() -> StatusWith<BSONObj> { + options.prepareOldReplSetUpdatePositionCommandFn = [replCoord]() -> StatusWith<BSONObj> { BSONObjBuilder bob; - if (replCoord->prepareReplSetUpdatePositionCommand(&bob)) { + if (replCoord->prepareOldReplSetUpdatePositionCommand(&bob)) { return bob.obj(); } return Status(ErrorCodes::OperationFailed, "unable to prepare replSetUpdatePosition command object"); }; - options.getMyLastOptime = [replCoord]() { return replCoord->getMyLastOptime(); }; + options.getMyLastOptime = [replCoord]() { return replCoord->getMyLastAppliedOpTime(); }; options.setMyLastOptime = - [replCoord](const OpTime& opTime) { replCoord->setMyLastOptime(opTime); }; + [replCoord](const OpTime& opTime) { replCoord->setMyLastAppliedOpTime(opTime); }; options.setFollowerMode = [replCoord](const MemberState& newState) { return replCoord->setFollowerMode(newState); }; options.syncSourceSelector = replCoord; @@ -186,7 +188,8 @@ ReplicationCoordinatorImpl::ReplicationCoordinatorImpl( int64_t prngSeed, NetworkInterface* network, StorageInterface* storage, - ReplicationExecutor* replExec) + ReplicationExecutor* replExec, + stdx::function<bool()>* isDurableStorageEngineFn) : _settings(settings), _replMode(getReplicationModeFromSettings(settings)), _topCoord(topCoord), @@ -202,7 +205,10 @@ ReplicationCoordinatorImpl::ReplicationCoordinatorImpl( _sleptLastElection(false), _canAcceptNonLocalWrites(!(settings.usingReplSets() || settings.isSlave())), _canServeNonLocalReads(0U), - _dr(createDataReplicatorOptions(this), &_replExecutor) { + _dr(createDataReplicatorOptions(this), &_replExecutor), + _isDurableStorageEngine(isDurableStorageEngineFn ? *isDurableStorageEngineFn : []() -> bool { + return getGlobalServiceContext()->getGlobalStorageEngine()->isDurable(); + }) { if (!isReplEnabled()) { return; } @@ -228,16 +234,23 @@ ReplicationCoordinatorImpl::ReplicationCoordinatorImpl( TopologyCoordinator* topCoord, int64_t prngSeed) : ReplicationCoordinatorImpl( - settings, externalState, topCoord, prngSeed, network, storage, nullptr) {} + settings, externalState, topCoord, prngSeed, network, storage, nullptr, nullptr) {} ReplicationCoordinatorImpl::ReplicationCoordinatorImpl( const ReplSettings& settings, ReplicationCoordinatorExternalState* externalState, TopologyCoordinator* topCoord, ReplicationExecutor* replExec, - int64_t prngSeed) - : ReplicationCoordinatorImpl( - settings, externalState, topCoord, prngSeed, nullptr, nullptr, replExec) {} + int64_t prngSeed, + stdx::function<bool()>* isDurableStorageEngineFn) + : ReplicationCoordinatorImpl(settings, + externalState, + topCoord, + prngSeed, + nullptr, + nullptr, + replExec, + isDurableStorageEngineFn) {} ReplicationCoordinatorImpl::~ReplicationCoordinatorImpl() {} @@ -404,7 +417,8 @@ void ReplicationCoordinatorImpl::_finishLoadLocalConfig( invariant(_rsConfigState == kConfigStartingUp); const PostMemberStateUpdateAction action = _setCurrentRSConfig_inlock(cbData, localConfig, myIndex.getValue()); - _setMyLastOptimeAndReport_inlock(&lk, lastOpTime, false); + _setMyLastAppliedOpTime_inlock(lastOpTime, false); + _setMyLastDurableOpTimeAndReport_inlock(&lk, lastOpTime, false); _externalState->setGlobalTimestamp(lastOpTime.getTimestamp()); // Step down is impossible, so we don't need to wait for the returned event. _updateTerm_incallback(term); @@ -741,9 +755,26 @@ void ReplicationCoordinatorImpl::_addSlaveInfo_inlock(const SlaveInfo& slaveInfo _wakeReadyWaiters_inlock(); } -void ReplicationCoordinatorImpl::_updateSlaveInfoOptime_inlock(SlaveInfo* slaveInfo, - const OpTime& opTime) { - slaveInfo->opTime = opTime; +void ReplicationCoordinatorImpl::_updateSlaveInfoAppliedOpTime_inlock(SlaveInfo* slaveInfo, + const OpTime& opTime) { + slaveInfo->lastAppliedOpTime = opTime; + slaveInfo->lastUpdate = _replExecutor.now(); + slaveInfo->down = false; + + // Wake up any threads waiting for replication that now have their replication + // check satisfied + _wakeReadyWaiters_inlock(); +} + +void ReplicationCoordinatorImpl::_updateSlaveInfoDurableOpTime_inlock(SlaveInfo* slaveInfo, + const OpTime& opTime) { + // lastAppliedOpTime cannot be behind lastDurableOpTime. + if (slaveInfo->lastAppliedOpTime < opTime) { + log() << "Durable progress is ahead of the applied progress. This is likely due to a " + "rollback."; + return; + } + slaveInfo->lastDurableOpTime = opTime; slaveInfo->lastUpdate = _replExecutor.now(); slaveInfo->down = false; @@ -824,13 +855,13 @@ Status ReplicationCoordinatorImpl::setLastOptimeForSlave(const OID& rid, const T OpTime opTime(ts, OpTime::kUninitializedTerm); SlaveInfo* slaveInfo = _findSlaveInfoByRID_inlock(rid); if (slaveInfo) { - if (slaveInfo->opTime < opTime) { - _updateSlaveInfoOptime_inlock(slaveInfo, opTime); + if (slaveInfo->lastAppliedOpTime < opTime) { + _updateSlaveInfoAppliedOpTime_inlock(slaveInfo, opTime); } } else { SlaveInfo newSlaveInfo; newSlaveInfo.rid = rid; - newSlaveInfo.opTime = opTime; + newSlaveInfo.lastAppliedOpTime = opTime; _addSlaveInfo_inlock(newSlaveInfo); } return Status::OK(); @@ -841,28 +872,59 @@ void ReplicationCoordinatorImpl::setMyHeartbeatMessage(const std::string& msg) { &TopologyCoordinator::setMyHeartbeatMessage, _topCoord.get(), _replExecutor.now(), msg)); } -void ReplicationCoordinatorImpl::setMyLastOptimeForward(const OpTime& opTime) { +void ReplicationCoordinatorImpl::setMyLastAppliedOpTimeForward(const OpTime& opTime) { + stdx::unique_lock<stdx::mutex> lock(_mutex); + if (opTime > _getMyLastAppliedOpTime_inlock()) { + _setMyLastAppliedOpTimeAndReport_inlock(&lock, opTime, false); + } +} + +void ReplicationCoordinatorImpl::setMyLastDurableOpTimeForward(const OpTime& opTime) { stdx::unique_lock<stdx::mutex> lock(_mutex); - if (opTime > _getMyLastOptime_inlock()) { - _setMyLastOptimeAndReport_inlock(&lock, opTime, false); + if (opTime > _getMyLastDurableOpTime_inlock()) { + _setMyLastDurableOpTimeAndReport_inlock(&lock, opTime, false); } } -void ReplicationCoordinatorImpl::setMyLastOptime(const OpTime& opTime) { +void ReplicationCoordinatorImpl::setMyLastAppliedOpTime(const OpTime& opTime) { stdx::unique_lock<stdx::mutex> lock(_mutex); - _setMyLastOptimeAndReport_inlock(&lock, opTime, false); + _setMyLastAppliedOpTimeAndReport_inlock(&lock, opTime, false); } -void ReplicationCoordinatorImpl::resetMyLastOptime() { +void ReplicationCoordinatorImpl::setMyLastDurableOpTime(const OpTime& opTime) { + stdx::unique_lock<stdx::mutex> lock(_mutex); + _setMyLastDurableOpTimeAndReport_inlock(&lock, opTime, false); +} + +void ReplicationCoordinatorImpl::resetMyLastOpTimes() { stdx::unique_lock<stdx::mutex> lock(_mutex); // Reset to uninitialized OpTime - _setMyLastOptimeAndReport_inlock(&lock, OpTime(), true); + _setMyLastAppliedOpTime_inlock(OpTime(), true); + _setMyLastDurableOpTimeAndReport_inlock(&lock, OpTime(), true); +} + +void ReplicationCoordinatorImpl::_setMyLastAppliedOpTimeAndReport_inlock( + stdx::unique_lock<stdx::mutex>* lock, const OpTime& opTime, bool isRollbackAllowed) { + invariant(lock->owns_lock()); + _setMyLastAppliedOpTime_inlock(opTime, isRollbackAllowed); + + if (getReplicationMode() != modeReplSet) { + return; + } + + if (_getMemberState_inlock().primary()) { + return; + } + + lock->unlock(); + + _externalState->forwardSlaveProgress(); // Must do this outside _mutex } -void ReplicationCoordinatorImpl::_setMyLastOptimeAndReport_inlock( +void ReplicationCoordinatorImpl::_setMyLastDurableOpTimeAndReport_inlock( stdx::unique_lock<stdx::mutex>* lock, const OpTime& opTime, bool isRollbackAllowed) { invariant(lock->owns_lock()); - _setMyLastOptime_inlock(opTime, isRollbackAllowed); + _setMyLastDurableOpTime_inlock(opTime, isRollbackAllowed); if (getReplicationMode() != modeReplSet) { return; @@ -877,11 +939,11 @@ void ReplicationCoordinatorImpl::_setMyLastOptimeAndReport_inlock( _externalState->forwardSlaveProgress(); // Must do this outside _mutex } -void ReplicationCoordinatorImpl::_setMyLastOptime_inlock(const OpTime& opTime, - bool isRollbackAllowed) { +void ReplicationCoordinatorImpl::_setMyLastAppliedOpTime_inlock(const OpTime& opTime, + bool isRollbackAllowed) { SlaveInfo* mySlaveInfo = &_slaveInfo[_getMyIndexInSlaveInfo_inlock()]; - invariant(isRollbackAllowed || mySlaveInfo->opTime <= opTime); - _updateSlaveInfoOptime_inlock(mySlaveInfo, opTime); + invariant(isRollbackAllowed || mySlaveInfo->lastAppliedOpTime <= opTime); + _updateSlaveInfoAppliedOpTime_inlock(mySlaveInfo, opTime); for (auto& opTimeWaiter : _opTimeWaiterList) { if (*(opTimeWaiter->opTime) <= opTime) { @@ -890,9 +952,27 @@ void ReplicationCoordinatorImpl::_setMyLastOptime_inlock(const OpTime& opTime, } } -OpTime ReplicationCoordinatorImpl::getMyLastOptime() const { +void ReplicationCoordinatorImpl::_setMyLastDurableOpTime_inlock(const OpTime& opTime, + bool isRollbackAllowed) { + SlaveInfo* mySlaveInfo = &_slaveInfo[_getMyIndexInSlaveInfo_inlock()]; + invariant(isRollbackAllowed || mySlaveInfo->lastDurableOpTime <= opTime); + // lastAppliedOpTime cannot be behind lastDurableOpTime. + if (mySlaveInfo->lastAppliedOpTime < opTime) { + log() << "Durable progress is ahead of the applied progress. This is likely due to a " + "rollback."; + return; + } + _updateSlaveInfoDurableOpTime_inlock(mySlaveInfo, opTime); +} + +OpTime ReplicationCoordinatorImpl::getMyLastAppliedOpTime() const { stdx::lock_guard<stdx::mutex> lock(_mutex); - return _getMyLastOptime_inlock(); + return _getMyLastAppliedOpTime_inlock(); +} + +OpTime ReplicationCoordinatorImpl::getMyLastDurableOpTime() const { + stdx::lock_guard<stdx::mutex> lock(_mutex); + return _getMyLastDurableOpTime_inlock(); } ReadConcernResponse ReplicationCoordinatorImpl::waitUntilOpTime(OperationContext* txn, @@ -933,7 +1013,7 @@ ReadConcernResponse ReplicationCoordinatorImpl::waitUntilOpTime(OperationContext auto loopCondition = [this, isMajorityReadConcern, targetOpTime] { return isMajorityReadConcern ? !_currentCommittedSnapshot || targetOpTime > _currentCommittedSnapshot->opTime - : targetOpTime > _getMyLastOptime_inlock(); + : targetOpTime > _getMyLastAppliedOpTime_inlock(); }; while (loopCondition()) { @@ -950,6 +1030,9 @@ ReadConcernResponse ReplicationCoordinatorImpl::waitUntilOpTime(OperationContext stdx::condition_variable condVar; WriteConcernOptions writeConcern; writeConcern.wMode = WriteConcernOptions::kMajority; + writeConcern.syncMode = getWriteConcernMajorityShouldJournal_inlock() + ? WriteConcernOptions::SyncMode::JOURNAL + : WriteConcernOptions::SyncMode::NONE; WaiterInfo waitInfo(isMajorityReadConcern ? &_replicationWaiterList : &_opTimeWaiterList, txn->getOpID(), @@ -967,25 +1050,111 @@ ReadConcernResponse ReplicationCoordinatorImpl::waitUntilOpTime(OperationContext return ReadConcernResponse(Status::OK(), Milliseconds(timer.millis())); } -OpTime ReplicationCoordinatorImpl::_getMyLastOptime_inlock() const { - return _slaveInfo[_getMyIndexInSlaveInfo_inlock()].opTime; +OpTime ReplicationCoordinatorImpl::_getMyLastAppliedOpTime_inlock() const { + return _slaveInfo[_getMyIndexInSlaveInfo_inlock()].lastAppliedOpTime; +} + +OpTime ReplicationCoordinatorImpl::_getMyLastDurableOpTime_inlock() const { + return _slaveInfo[_getMyIndexInSlaveInfo_inlock()].lastDurableOpTime; +} + +Status ReplicationCoordinatorImpl::setLastDurableOptime_forTest(long long cfgVer, + long long memberId, + const OpTime& opTime) { + stdx::lock_guard<stdx::mutex> lock(_mutex); + invariant(getReplicationMode() == modeReplSet); + + const UpdatePositionArgs::UpdateInfo update(OpTime(), opTime, cfgVer, memberId); + long long configVersion; + return _setLastOptime_inlock(update, &configVersion); } -Status ReplicationCoordinatorImpl::setLastOptime_forTest(long long cfgVer, - long long memberId, - const OpTime& opTime) { +Status ReplicationCoordinatorImpl::setLastAppliedOptime_forTest(long long cfgVer, + long long memberId, + const OpTime& opTime) { stdx::lock_guard<stdx::mutex> lock(_mutex); invariant(getReplicationMode() == modeReplSet); - const UpdatePositionArgs::UpdateInfo update(OID(), opTime, cfgVer, memberId); + const UpdatePositionArgs::UpdateInfo update(opTime, OpTime(), cfgVer, memberId); long long configVersion; return _setLastOptime_inlock(update, &configVersion); } +Status ReplicationCoordinatorImpl::_setLastOptime_inlock( + const OldUpdatePositionArgs::UpdateInfo& args, long long* configVersion) { + if (_selfIndex == -1) { + // Ignore updates when we're in state REMOVED + return Status(ErrorCodes::NotMasterOrSecondary, + "Received replSetUpdatePosition command but we are in state REMOVED"); + } + invariant(getReplicationMode() == modeReplSet); + + if (args.memberId < 0) { + std::string errmsg = str::stream() + << "Received replSetUpdatePosition for node with memberId " << args.memberId + << " which is negative and therefore invalid"; + LOG(1) << errmsg; + return Status(ErrorCodes::NodeNotFound, errmsg); + } + + if (args.memberId == _rsConfig.getMemberAt(_selfIndex).getId()) { + // Do not let remote nodes tell us what our optime is. + return Status::OK(); + } + + LOG(2) << "received notification that node with memberID " << args.memberId + << " in config with version " << args.cfgver + << " has durably reached optime: " << args.ts; + + SlaveInfo* slaveInfo = NULL; + if (args.cfgver != _rsConfig.getConfigVersion()) { + std::string errmsg = str::stream() + << "Received replSetUpdatePosition for node with memberId " << args.memberId + << " whose config version of " << args.cfgver << " doesn't match our config version of " + << _rsConfig.getConfigVersion(); + LOG(1) << errmsg; + *configVersion = _rsConfig.getConfigVersion(); + return Status(ErrorCodes::InvalidReplicaSetConfig, errmsg); + } + + slaveInfo = _findSlaveInfoByMemberID_inlock(args.memberId); + if (!slaveInfo) { + invariant(!_rsConfig.findMemberByID(args.memberId)); + + std::string errmsg = str::stream() + << "Received replSetUpdatePosition for node with memberId " << args.memberId + << " which doesn't exist in our config"; + LOG(1) << errmsg; + return Status(ErrorCodes::NodeNotFound, errmsg); + } + + invariant(args.memberId == slaveInfo->memberId); + + LOG(3) << "Node with memberID " << args.memberId << " has durably applied operationss through " + << slaveInfo->lastDurableOpTime << " and has applied operations through " + << slaveInfo->lastAppliedOpTime << "; updating to new durable operation with timestamp " + << args.ts; + + // Only update remote optimes if they increase. + if (slaveInfo->lastAppliedOpTime < args.ts) { + _updateSlaveInfoAppliedOpTime_inlock(slaveInfo, args.ts); + } + if (slaveInfo->lastDurableOpTime < args.ts) { + _updateSlaveInfoDurableOpTime_inlock(slaveInfo, args.ts); + } + + + // Update liveness for this node. + slaveInfo->lastUpdate = _replExecutor.now(); + slaveInfo->down = false; + _cancelAndRescheduleLivenessUpdate_inlock(args.memberId); + return Status::OK(); +} + Status ReplicationCoordinatorImpl::_setLastOptime_inlock(const UpdatePositionArgs::UpdateInfo& args, long long* configVersion) { if (_selfIndex == -1) { - // Ignore updates when we're in state REMOVED + // Ignore updates when we're in state REMOVED. return Status(ErrorCodes::NotMasterOrSecondary, "Received replSetUpdatePosition command but we are in state REMOVED"); } @@ -1005,7 +1174,9 @@ Status ReplicationCoordinatorImpl::_setLastOptime_inlock(const UpdatePositionArg } LOG(2) << "received notification that node with memberID " << args.memberId - << " in config with version " << args.cfgver << " has reached optime: " << args.ts; + << " in config with version " << args.cfgver + << " has reached optime: " << args.appliedOpTime + << " and is durable through: " << args.durableOpTime; SlaveInfo* slaveInfo = NULL; if (args.cfgver != _rsConfig.getConfigVersion()) { @@ -1032,11 +1203,17 @@ Status ReplicationCoordinatorImpl::_setLastOptime_inlock(const UpdatePositionArg invariant(args.memberId == slaveInfo->memberId); LOG(3) << "Node with memberID " << args.memberId << " currently has optime " - << slaveInfo->opTime << "; updating to " << args.ts; + << slaveInfo->lastAppliedOpTime << " durable through " << slaveInfo->lastDurableOpTime + << "; updating to optime " << args.appliedOpTime << " and durable through " + << args.durableOpTime; + // Only update remote optimes if they increase. - if (slaveInfo->opTime < args.ts) { - _updateSlaveInfoOptime_inlock(slaveInfo, args.ts); + if (slaveInfo->lastAppliedOpTime < args.appliedOpTime) { + _updateSlaveInfoAppliedOpTime_inlock(slaveInfo, args.appliedOpTime); + } + if (slaveInfo->lastDurableOpTime < args.durableOpTime) { + _updateSlaveInfoDurableOpTime_inlock(slaveInfo, args.durableOpTime); } // Update liveness for this node. @@ -1092,17 +1269,22 @@ void ReplicationCoordinatorImpl::interruptAll() { bool ReplicationCoordinatorImpl::_doneWaitingForReplication_inlock( const OpTime& opTime, SnapshotName minSnapshot, const WriteConcernOptions& writeConcern) { + invariant(writeConcern.syncMode != WriteConcernOptions::SyncMode::UNSET); Status status = _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern); if (!status.isOK()) { return true; } if (writeConcern.wMode.empty()) - return _haveNumNodesReachedOpTime_inlock(opTime, writeConcern.wNumNodes); + return _haveNumNodesReachedOpTime_inlock(opTime, + writeConcern.wNumNodes, + writeConcern.syncMode == + WriteConcernOptions::SyncMode::JOURNAL); StringData patternName; if (writeConcern.wMode == WriteConcernOptions::kMajority) { - if (_externalState->snapshotsEnabled()) { + if (writeConcern.syncMode == WriteConcernOptions::SyncMode::JOURNAL && + _externalState->snapshotsEnabled()) { if (!_currentCommittedSnapshot) { return false; } @@ -1119,20 +1301,26 @@ bool ReplicationCoordinatorImpl::_doneWaitingForReplication_inlock( if (!tagPattern.isOK()) { return true; } - return _haveTaggedNodesReachedOpTime_inlock(opTime, tagPattern.getValue()); + return _haveTaggedNodesReachedOpTime_inlock(opTime, + tagPattern.getValue(), + writeConcern.syncMode == + WriteConcernOptions::SyncMode::JOURNAL); } -bool ReplicationCoordinatorImpl::_haveNumNodesReachedOpTime_inlock(const OpTime& opTime, - int numNodes) { - if (_getMyLastOptime_inlock() < opTime) { - // Secondaries that are for some reason ahead of us should not allow us to - // satisfy a write concern if we aren't caught up ourselves. +bool ReplicationCoordinatorImpl::_haveNumNodesReachedOpTime_inlock(const OpTime& targetOpTime, + int numNodes, + bool durablyWritten) { + // Replication progress that is for some reason ahead of us should not allow us to + // satisfy a write concern if we aren't caught up ourselves. + OpTime myOpTime = + durablyWritten ? _getMyLastDurableOpTime_inlock() : _getMyLastAppliedOpTime_inlock(); + if (myOpTime < targetOpTime) { return false; } for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) { - const OpTime& slaveTime = it->opTime; - if (slaveTime >= opTime) { + const OpTime& slaveTime = durablyWritten ? it->lastDurableOpTime : it->lastAppliedOpTime; + if (slaveTime >= targetOpTime) { --numNodes; } @@ -1144,10 +1332,10 @@ bool ReplicationCoordinatorImpl::_haveNumNodesReachedOpTime_inlock(const OpTime& } bool ReplicationCoordinatorImpl::_haveTaggedNodesReachedOpTime_inlock( - const OpTime& opTime, const ReplicaSetTagPattern& tagPattern) { + const OpTime& opTime, const ReplicaSetTagPattern& tagPattern, bool durablyWritten) { ReplicaSetTagMatch matcher(tagPattern); for (SlaveInfoVector::iterator it = _slaveInfo.begin(); it != _slaveInfo.end(); ++it) { - const OpTime& slaveTime = it->opTime; + const OpTime& slaveTime = durablyWritten ? it->lastDurableOpTime : it->lastAppliedOpTime; if (slaveTime >= opTime) { // This node has reached the desired optime, now we need to check if it is a part // of the tagPattern. @@ -1168,18 +1356,25 @@ bool ReplicationCoordinatorImpl::_haveTaggedNodesReachedOpTime_inlock( ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::awaitReplication( OperationContext* txn, const OpTime& opTime, const WriteConcernOptions& writeConcern) { Timer timer; + WriteConcernOptions fixedWriteConcern = populateUnsetWriteConcernOptionsSyncMode(writeConcern); stdx::unique_lock<stdx::mutex> lock(_mutex); - return _awaitReplication_inlock(&timer, &lock, txn, opTime, SnapshotName::min(), writeConcern); + return _awaitReplication_inlock( + &timer, &lock, txn, opTime, SnapshotName::min(), fixedWriteConcern); } ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::awaitReplicationOfLastOpForClient( OperationContext* txn, const WriteConcernOptions& writeConcern) { Timer timer; + WriteConcernOptions fixedWriteConcern = populateUnsetWriteConcernOptionsSyncMode(writeConcern); stdx::unique_lock<stdx::mutex> lock(_mutex); const auto& clientInfo = ReplClientInfo::forClient(txn->getClient()); - return _awaitReplication_inlock( - &timer, &lock, txn, clientInfo.getLastOp(), clientInfo.getLastSnapshot(), writeConcern); + return _awaitReplication_inlock(&timer, + &lock, + txn, + clientInfo.getLastOp(), + clientInfo.getLastSnapshot(), + fixedWriteConcern); } ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::_awaitReplication_inlock( @@ -1214,7 +1409,7 @@ ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::_awaitRepl if (writeConcern.wMode.empty()) { if (writeConcern.wNumNodes < 1) { return StatusAndDuration(Status::OK(), Milliseconds(timer->millis())); - } else if (writeConcern.wNumNodes == 1 && _getMyLastOptime_inlock() >= opTime) { + } else if (writeConcern.wNumNodes == 1 && _getMyLastAppliedOpTime_inlock() >= opTime) { return StatusAndDuration(Status::OK(), Milliseconds(timer->millis())); } } @@ -1403,7 +1598,7 @@ void ReplicationCoordinatorImpl::_stepDownContinue( return; } bool forceNow = now >= waitUntil ? force : false; - if (_topCoord->stepDown(stepDownUntil, forceNow, getMyLastOptime())) { + if (_topCoord->stepDown(stepDownUntil, forceNow, getMyLastAppliedOpTime())) { // Schedule work to (potentially) step back up once the stepdown period has ended. _replExecutor.scheduleWorkAt(stepDownUntil, stdx::bind(&ReplicationCoordinatorImpl::_handleTimePassing, @@ -1612,6 +1807,37 @@ int ReplicationCoordinatorImpl::_getMyId_inlock() const { bool ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) { stdx::lock_guard<stdx::mutex> lock(_mutex); invariant(_rsConfig.isInitialized()); + // Do not send updates if we have been removed from the config. + if (_selfIndex == -1) { + return false; + } + cmdBuilder->append("replSetUpdatePosition", 1); + // Create an array containing objects each live member connected to us and for ourself. + BSONArrayBuilder arrayBuilder(cmdBuilder->subarrayStart("optimes")); + for (SlaveInfoVector::iterator itr = _slaveInfo.begin(); itr != _slaveInfo.end(); ++itr) { + if (itr->lastAppliedOpTime.isNull()) { + // Don't include info on members we haven't heard from yet. + continue; + } + // Don't include members we think are down. + if (!itr->self && itr->down) { + continue; + } + + BSONObjBuilder entry(arrayBuilder.subobjStart()); + itr->lastDurableOpTime.append(&entry, "durableOpTime"); + itr->lastAppliedOpTime.append(&entry, "appliedOpTime"); + entry.append("memberId", itr->memberId); + entry.append("cfgver", _rsConfig.getConfigVersion()); + } + + return true; +} + +bool ReplicationCoordinatorImpl::prepareOldReplSetUpdatePositionCommand( + BSONObjBuilder* cmdBuilder) { + stdx::lock_guard<stdx::mutex> lock(_mutex); + invariant(_rsConfig.isInitialized()); // do not send updates if we have been removed from the config if (_selfIndex == -1) { return false; @@ -1620,7 +1846,7 @@ bool ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommand(BSONObjBuil // create an array containing objects each member connected to us and for ourself BSONArrayBuilder arrayBuilder(cmdBuilder->subarrayStart("optimes")); for (SlaveInfoVector::iterator itr = _slaveInfo.begin(); itr != _slaveInfo.end(); ++itr) { - if (itr->opTime.isNull()) { + if (itr->lastDurableOpTime.isNull()) { // Don't include info on members we haven't heard from yet. continue; } @@ -1632,9 +1858,9 @@ bool ReplicationCoordinatorImpl::prepareReplSetUpdatePositionCommand(BSONObjBuil BSONObjBuilder entry(arrayBuilder.subobjStart()); entry.append("_id", itr->rid); if (isV1ElectionProtocol()) { - itr->opTime.append(&entry, "optime"); + itr->lastDurableOpTime.append(&entry, "optime"); } else { - entry.append("optime", itr->opTime.getTimestamp()); + entry.append("optime", itr->lastDurableOpTime.getTimestamp()); } entry.append("memberId", itr->memberId); entry.append("cfgver", _rsConfig.getConfigVersion()); @@ -1650,7 +1876,7 @@ Status ReplicationCoordinatorImpl::processReplSetGetStatus(BSONObjBuilder* respo stdx::placeholders::_1, _replExecutor.now(), time(0) - serverGlobalParams.started, - getMyLastOptime(), + getMyLastAppliedOpTime(), getLastCommittedOpTime(), response, &result)); @@ -1698,11 +1924,11 @@ void ReplicationCoordinatorImpl::appendSlaveInfoData(BSONObjBuilder* result) { entry.append("rid", itr->rid); if (isV1ElectionProtocol()) { BSONObjBuilder opTime(entry.subobjStart("optime")); - opTime.append("ts", itr->opTime.getTimestamp()); - opTime.append("term", itr->opTime.getTerm()); + opTime.append("ts", itr->lastDurableOpTime.getTimestamp()); + opTime.append("term", itr->lastDurableOpTime.getTerm()); opTime.done(); } else { - entry.append("optime", itr->opTime.getTimestamp()); + entry.append("optime", itr->lastDurableOpTime.getTimestamp()); } entry.append("host", itr->hostAndPort.toString()); if (getReplicationMode() == modeReplSet) { @@ -1844,7 +2070,7 @@ Status ReplicationCoordinatorImpl::processReplSetSyncFrom(const HostAndPort& tar _topCoord.get(), stdx::placeholders::_1, target, - _getMyLastOptime_inlock(), + _getMyLastAppliedOpTime_inlock(), resultObj, &result)); if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) { @@ -1933,8 +2159,12 @@ void ReplicationCoordinatorImpl::_processHeartbeatFinish( auto senderHost(args.getSenderHost()); const Date_t now = _replExecutor.now(); - *outStatus = _topCoord->prepareHeartbeatResponse( - now, args, _settings.ourSetName(), getMyLastOptime(), response); + *outStatus = _topCoord->prepareHeartbeatResponse(now, + args, + _settings.ourSetName(), + getMyLastAppliedOpTime(), + getMyLastDurableOpTime(), + response); if ((outStatus->isOK() || *outStatus == ErrorCodes::InvalidReplicaSetConfig) && _selfIndex < 0) { // If this node does not belong to the configuration it knows about, send heartbeats @@ -2402,7 +2632,8 @@ void ReplicationCoordinatorImpl::_processReplSetFresh_finish( return; } - _topCoord->prepareFreshResponse(args, _replExecutor.now(), getMyLastOptime(), response, result); + _topCoord->prepareFreshResponse( + args, _replExecutor.now(), getMyLastAppliedOpTime(), response, result); } Status ReplicationCoordinatorImpl::processReplSetElect(const ReplSetElectArgs& args, @@ -2433,7 +2664,8 @@ void ReplicationCoordinatorImpl::_processReplSetElect_finish( return; } - _topCoord->prepareElectResponse(args, _replExecutor.now(), getMyLastOptime(), response, result); + _topCoord->prepareElectResponse( + args, _replExecutor.now(), getMyLastAppliedOpTime(), response, result); } ReplicationCoordinatorImpl::PostMemberStateUpdateAction @@ -2446,7 +2678,7 @@ ReplicationCoordinatorImpl::_setCurrentRSConfig_inlock( _setConfigState_inlock(kConfigSteady); // Must get this before changing our config. - OpTime myOptime = _getMyLastOptime_inlock(); + OpTime myOptime = _getMyLastAppliedOpTime_inlock(); _topCoord->updateConfig(newConfig, myIndex, _replExecutor.now(), myOptime); _cachedTerm = _topCoord->getTerm(); const ReplicaSetConfig oldConfig = _rsConfig; @@ -2505,6 +2737,31 @@ void ReplicationCoordinatorImpl::_wakeReadyWaiters_inlock() { } } +Status ReplicationCoordinatorImpl::processReplSetUpdatePosition( + const OldUpdatePositionArgs& updates, long long* configVersion) { + stdx::unique_lock<stdx::mutex> lock(_mutex); + Status status = Status::OK(); + bool somethingChanged = false; + for (OldUpdatePositionArgs::UpdateIterator update = updates.updatesBegin(); + update != updates.updatesEnd(); + ++update) { + status = _setLastOptime_inlock(*update, configVersion); + if (!status.isOK()) { + break; + } + somethingChanged = true; + } + + if (somethingChanged && !_getMemberState_inlock().primary()) { + lock.unlock(); + // Must do this outside _mutex + // TODO: enable _dr, remove _externalState when DataReplicator is used excl. + //_dr.slavesHaveProgressed(); + _externalState->forwardSlaveProgress(); + } + return status; +} + Status ReplicationCoordinatorImpl::processReplSetUpdatePosition(const UpdatePositionArgs& updates, long long* configVersion) { stdx::unique_lock<stdx::mutex> lock(_mutex); @@ -2565,19 +2822,25 @@ bool ReplicationCoordinatorImpl::buildsIndexes() { return self.shouldBuildIndexes(); } -std::vector<HostAndPort> ReplicationCoordinatorImpl::getHostsWrittenTo(const OpTime& op) { +std::vector<HostAndPort> ReplicationCoordinatorImpl::getHostsWrittenTo(const OpTime& op, + bool durablyWritten) { std::vector<HostAndPort> hosts; stdx::lock_guard<stdx::mutex> lk(_mutex); for (size_t i = 0; i < _slaveInfo.size(); ++i) { const SlaveInfo& slaveInfo = _slaveInfo[i]; - if (slaveInfo.opTime < op) { + if (getReplicationMode() == modeMasterSlave && slaveInfo.rid == _getMyRID_inlock()) { + // Master-slave doesn't know the HostAndPort for itself at this point. continue; } - if (getReplicationMode() == modeMasterSlave && slaveInfo.rid == _getMyRID_inlock()) { - // Master-slave doesn't know the HostAndPort for itself at this point. + if (durablyWritten) { + if (slaveInfo.lastDurableOpTime < op) { + continue; + } + } else if (slaveInfo.lastAppliedOpTime < op) { continue; } + hosts.push_back(slaveInfo.hostAndPort); } return hosts; @@ -2732,7 +2995,7 @@ void ReplicationCoordinatorImpl::blacklistSyncSource(const HostAndPort& host, Da _replExecutor.wait(cbh.getValue()); } -void ReplicationCoordinatorImpl::resetLastOpTimeFromOplog(OperationContext* txn) { +void ReplicationCoordinatorImpl::resetLastOpTimesFromOplog(OperationContext* txn) { StatusWith<OpTime> lastOpTimeStatus = _externalState->loadLastOpTime(txn); OpTime lastOpTime; if (!lastOpTimeStatus.isOK()) { @@ -2741,8 +3004,10 @@ void ReplicationCoordinatorImpl::resetLastOpTimeFromOplog(OperationContext* txn) } else { lastOpTime = lastOpTimeStatus.getValue(); } + stdx::unique_lock<stdx::mutex> lk(_mutex); - _setMyLastOptimeAndReport_inlock(&lk, lastOpTime, true); + _setMyLastAppliedOpTime_inlock(lastOpTime, true); + _setMyLastDurableOpTimeAndReport_inlock(&lk, lastOpTime, true); _externalState->setGlobalTimestamp(lastOpTime.getTimestamp()); } @@ -2757,7 +3022,7 @@ void ReplicationCoordinatorImpl::_shouldChangeSyncSource( } *shouldChange = _topCoord->shouldChangeSyncSource(currentSource, - getMyLastOptime(), + getMyLastAppliedOpTime(), syncSourceLastOpTime, syncSourceHasSyncSource, _replExecutor.now()); @@ -2794,7 +3059,7 @@ void ReplicationCoordinatorImpl::_updateLastCommittedOpTime_inlock() { auto memberConfig = _rsConfig.findMemberByID(sI.memberId); invariant(memberConfig); if (memberConfig->isVoter()) { - votingNodesOpTimes.push_back(sI.opTime); + votingNodesOpTimes.push_back(sI.lastDurableOpTime); } } @@ -2816,8 +3081,9 @@ void ReplicationCoordinatorImpl::_setLastCommittedOpTime(const OpTime& committed } void ReplicationCoordinatorImpl::_setLastCommittedOpTime_inlock(const OpTime& committedOpTime) { - if (committedOpTime <= _lastCommittedOpTime) + if (committedOpTime <= _lastCommittedOpTime) { return; // This may have come from an out-of-order heartbeat. Ignore it. + } // This check is performed to ensure primaries do not commit an OpTime from a previous term. if (_getMemberState_inlock().primary() && committedOpTime < _firstOpTimeOfMyTerm) { @@ -2825,7 +3091,8 @@ void ReplicationCoordinatorImpl::_setLastCommittedOpTime_inlock(const OpTime& co } if (_getMemberState_inlock().arbiter()) { - _setMyLastOptime_inlock(committedOpTime, false); + _setMyLastAppliedOpTime_inlock(committedOpTime, false); + _setMyLastDurableOpTime_inlock(committedOpTime, false); } _lastCommittedOpTime = committedOpTime; @@ -2833,6 +3100,7 @@ void ReplicationCoordinatorImpl::_setLastCommittedOpTime_inlock(const OpTime& co _externalState->notifyOplogMetadataWaiters(); auto maxSnapshotForOpTime = SnapshotInfo{committedOpTime, SnapshotName::max()}; + if (!_uncommittedSnapshots.empty() && _uncommittedSnapshots.front() <= maxSnapshotForOpTime) { // At least one uncommitted snapshot is ready to be blessed as committed. @@ -2913,7 +3181,7 @@ void ReplicationCoordinatorImpl::_processReplSetRequestVotes_finish( } stdx::unique_lock<stdx::mutex> lk(_mutex); - _topCoord->processReplSetRequestVotes(args, response, _getMyLastOptime_inlock()); + _topCoord->processReplSetRequestVotes(args, response, _getMyLastAppliedOpTime_inlock()); *result = Status::OK(); } @@ -2984,13 +3252,21 @@ void ReplicationCoordinatorImpl::_prepareReplResponseMetadata_finish( rpc::ReplSetMetadata* metadata) { OpTime lastReadableOpTime = getCurrentCommittedSnapshotOpTime(); OpTime lastVisibleOpTime = std::max(lastOpTimeFromClient, lastReadableOpTime); - _topCoord->prepareReplResponseMetadata(metadata, lastVisibleOpTime, getLastCommittedOpTime()); + _topCoord->prepareReplResponseMetadata(metadata, lastVisibleOpTime, _lastCommittedOpTime); } bool ReplicationCoordinatorImpl::isV1ElectionProtocol() { return _protVersion.load() == 1; } +bool ReplicationCoordinatorImpl::getWriteConcernMajorityShouldJournal() { + return getConfig().getWriteConcernMajorityShouldJournal(); +} + +bool ReplicationCoordinatorImpl::getWriteConcernMajorityShouldJournal_inlock() const { + return _rsConfig.getWriteConcernMajorityShouldJournal(); +} + Status ReplicationCoordinatorImpl::processHeartbeatV1(const ReplSetHeartbeatArgsV1& args, ReplSetHeartbeatResponse* response) { { @@ -3031,8 +3307,12 @@ void ReplicationCoordinatorImpl::_processHeartbeatFinishV1( auto senderHost(args.getSenderHost()); const Date_t now = _replExecutor.now(); - *outStatus = _topCoord->prepareHeartbeatResponseV1( - now, args, _settings.ourSetName(), getMyLastOptime(), response); + *outStatus = _topCoord->prepareHeartbeatResponseV1(now, + args, + _settings.ourSetName(), + getMyLastAppliedOpTime(), + getMyLastDurableOpTime(), + response); if ((outStatus->isOK() || *outStatus == ErrorCodes::InvalidReplicaSetConfig) && _selfIndex < 0) { @@ -3082,7 +3362,8 @@ void ReplicationCoordinatorImpl::_summarizeAsHtml_finish(const CallbackArgs& cbD return; } - output->setSelfOptime(getMyLastOptime()); + // TODO(dannenberg) consider putting both optimes into the htmlsummary. + output->setSelfOptime(getMyLastAppliedOpTime()); output->setSelfUptime(time(0) - serverGlobalParams.started); output->setNow(_replExecutor.now()); @@ -3387,5 +3668,19 @@ void ReplicationCoordinatorImpl::_scheduleElectionWinNotification() { } } +WriteConcernOptions ReplicationCoordinatorImpl::populateUnsetWriteConcernOptionsSyncMode( + WriteConcernOptions wc) { + WriteConcernOptions writeConcern(wc); + if (writeConcern.syncMode == WriteConcernOptions::SyncMode::UNSET) { + if (writeConcern.wMode == WriteConcernOptions::kMajority && _isDurableStorageEngine() && + getWriteConcernMajorityShouldJournal()) { + writeConcern.syncMode = WriteConcernOptions::SyncMode::JOURNAL; + } else { + writeConcern.syncMode = WriteConcernOptions::SyncMode::NONE; + } + } + return writeConcern; +} + } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index 8252ac53412..c5cde2742bf 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -37,6 +37,7 @@ #include "mongo/db/concurrency/d_concurrency.h" #include "mongo/db/repl/data_replicator.h" #include "mongo/db/repl/member_state.h" +#include "mongo/db/repl/old_update_position_args.h" #include "mongo/db/repl/optime.h" #include "mongo/db/repl/replica_set_config.h" #include "mongo/db/repl/replication_coordinator.h" @@ -104,7 +105,8 @@ public: ReplicationCoordinatorExternalState* externalState, TopologyCoordinator* topoCoord, ReplicationExecutor* replExec, - int64_t prngSeed); + int64_t prngSeed, + stdx::function<bool()>* isDurableStorageEngineFn); virtual ~ReplicationCoordinatorImpl(); // ================== Members of public ReplicationCoordinator API =================== @@ -170,15 +172,18 @@ public: virtual Status setLastOptimeForSlave(const OID& rid, const Timestamp& ts); - virtual void setMyLastOptime(const OpTime& opTime); + virtual void setMyLastAppliedOpTime(const OpTime& opTime); + virtual void setMyLastDurableOpTime(const OpTime& opTime); - virtual void setMyLastOptimeForward(const OpTime& opTime); + virtual void setMyLastAppliedOpTimeForward(const OpTime& opTime); + virtual void setMyLastDurableOpTimeForward(const OpTime& opTime); - virtual void resetMyLastOptime(); + virtual void resetMyLastOpTimes(); virtual void setMyHeartbeatMessage(const std::string& msg); - virtual OpTime getMyLastOptime() const override; + virtual OpTime getMyLastAppliedOpTime() const override; + virtual OpTime getMyLastDurableOpTime() const override; virtual ReadConcernResponse waitUntilOpTime(OperationContext* txn, const ReadConcernArgs& settings) override; @@ -199,6 +204,7 @@ public: virtual void signalUpstreamUpdater() override; + virtual bool prepareOldReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) override; virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) override; virtual Status processReplSetGetStatus(BSONObjBuilder* result) override; @@ -245,6 +251,8 @@ public: virtual Status processReplSetElect(const ReplSetElectArgs& args, BSONObjBuilder* response) override; + virtual Status processReplSetUpdatePosition(const OldUpdatePositionArgs& updates, + long long* configVersion) override; virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates, long long* configVersion) override; @@ -252,7 +260,8 @@ public: virtual bool buildsIndexes() override; - virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op) override; + virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op, + bool durablyWritten) override; virtual std::vector<HostAndPort> getOtherNodesInReplSet() const override; @@ -266,7 +275,7 @@ public: virtual void blacklistSyncSource(const HostAndPort& host, Date_t until) override; - virtual void resetLastOpTimeFromOplog(OperationContext* txn) override; + virtual void resetLastOpTimesFromOplog(OperationContext* txn) override; virtual bool shouldChangeSyncSource(const HostAndPort& currentSource, const OpTime& syncSourceLastOpTime, @@ -290,6 +299,8 @@ public: virtual bool isV1ElectionProtocol() override; + virtual bool getWriteConcernMajorityShouldJournal() override; + virtual void summarizeAsHtml(ReplSetHtmlSummary* s) override; virtual void dropAllSnapshots() override; @@ -315,6 +326,9 @@ public: virtual size_t getNumUncommittedSnapshots() override; + virtual WriteConcernOptions populateUnsetWriteConcernOptionsSyncMode( + WriteConcernOptions wc) override; + // ================== Test support API =================== /** @@ -341,9 +355,10 @@ public: Date_t getPriorityTakeover_forTest() const; /** - * Simple wrapper around _setLastOptime_inlock to make it easier to test. + * Simple wrappers around _setLastOptime_inlock to make it easier to test. */ - Status setLastOptime_forTest(long long cfgVer, long long memberId, const OpTime& opTime); + Status setLastAppliedOptime_forTest(long long cfgVer, long long memberId, const OpTime& opTime); + Status setLastDurableOptime_forTest(long long cfgVer, long long memberId, const OpTime& opTime); /** * Non-blocking version of stepDown. @@ -436,7 +451,8 @@ private: int64_t prngSeed, executor::NetworkInterface* network, StorageInterface* storage, - ReplicationExecutor* replExec); + ReplicationExecutor* replExec, + stdx::function<bool()>* isDurableStorageEngineFn); /** * Configuration states for a replica set node. * @@ -485,7 +501,10 @@ private: // Struct that holds information about nodes in this replication group, mainly used for // tracking replication progress for write concern satisfaction. struct SlaveInfo { - OpTime opTime; // Our last known OpTime that this slave has replicated to. + // Our last known OpTime that this slave has applied and journaled to. + OpTime lastDurableOpTime; + // Our last known OpTime that this slave has applied, whether journaled or unjournaled. + OpTime lastAppliedOpTime; HostAndPort hostAndPort; // Client address of the slave. int memberId = -1; // Id of the node in the replica set config, or -1 if we're not a replSet. @@ -519,11 +538,18 @@ private: void _addSlaveInfo_inlock(const SlaveInfo& slaveInfo); /** - * Updates the item in _slaveInfo pointed to by 'slaveInfo' with the given OpTime 'opTime' - * and wakes up any threads waiting for replication that now have their write concern - * satisfied. + * Updates the durableOpTime field on the item in _slaveInfo pointed to by 'slaveInfo' with the + * given OpTime 'opTime' and wakes up any threads waiting for replication that now have their + * write concern satisfied. */ - void _updateSlaveInfoOptime_inlock(SlaveInfo* slaveInfo, const OpTime& opTime); + void _updateSlaveInfoDurableOpTime_inlock(SlaveInfo* slaveInfo, const OpTime& opTime); + + /** + * Updates the appliedOpTime field on the item in _slaveInfo pointed to by 'slaveInfo' with the + * given OpTime 'opTime' and wakes up any threads waiting for replication that now have their + * write concern satisfied. + */ + void _updateSlaveInfoAppliedOpTime_inlock(SlaveInfo* slaveInfo, const OpTime& opTime); /** * Returns the index into _slaveInfo where data corresponding to ourself is stored. @@ -533,6 +559,11 @@ private: size_t _getMyIndexInSlaveInfo_inlock() const; /** + * Returns the _writeConcernMajorityJournalDefault of our current _rsConfig. + */ + bool getWriteConcernMajorityShouldJournal_inlock() const; + + /** * Helper method that removes entries from _slaveInfo if they correspond to a node * with a member ID that is not in the current replica set config. Will always leave an * entry for ourself at the beginning of _slaveInfo, even if we aren't present in the @@ -665,15 +696,18 @@ private: /** * Helper for _doneWaitingForReplication_inlock that takes an integer write concern. + * "durablyWritten" indicates whether the operation has to be durably applied. */ - bool _haveNumNodesReachedOpTime_inlock(const OpTime& opTime, int numNodes); + bool _haveNumNodesReachedOpTime_inlock(const OpTime& opTime, int numNodes, bool durablyWritten); /** * Helper for _doneWaitingForReplication_inlock that takes a tag pattern representing a * named write concern mode. + * "durablyWritten" indicates whether the operation has to be durably applied. */ bool _haveTaggedNodesReachedOpTime_inlock(const OpTime& opTime, - const ReplicaSetTagPattern& tagPattern); + const ReplicaSetTagPattern& tagPattern, + bool durablyWritten); Status _checkIfWriteConcernCanBeSatisfied_inlock(const WriteConcernOptions& writeConcern) const; @@ -702,7 +736,8 @@ private: int _getMyId_inlock() const; - OpTime _getMyLastOptime_inlock() const; + OpTime _getMyLastAppliedOpTime_inlock() const; + OpTime _getMyLastDurableOpTime_inlock() const; /** * Bottom half of setFollowerMode. @@ -722,24 +757,44 @@ private: * This is only valid to call on replica sets. * "configVersion" will be populated with our config version if it and the configVersion * of "args" differ. + * + * The OldUpdatePositionArgs version provides support for the pre-3.2.2 format of + * UpdatePositionArgs. */ + Status _setLastOptime_inlock(const OldUpdatePositionArgs::UpdateInfo& args, + long long* configVersion); Status _setLastOptime_inlock(const UpdatePositionArgs::UpdateInfo& args, long long* configVersion); /** - * Helper method for setMyLastOptime that takes in a unique lock on + * Helper method for setMyLastAppliedOptime that takes in a unique lock on + * _mutex. The passed in lock must already be locked. It is unspecified what state the + * lock will be in after this method finishes. + * + * This function has the same rules for "opTime" as setMyLastAppliedOptime(), unless + * "isRollbackAllowed" is true. + * + * This function will also report our position externally (like upstream) if necessary. + */ + void _setMyLastAppliedOpTimeAndReport_inlock(stdx::unique_lock<stdx::mutex>* lock, + const OpTime& opTime, + bool isRollbackAllowed); + void _setMyLastAppliedOpTime_inlock(const OpTime& opTime, bool isRollbackAllowed); + + /** + * Helper method for setMyLastDurableOptime that takes in a unique lock on * _mutex. The passed in lock must already be locked. It is unspecified what state the * lock will be in after this method finishes. * - * This function has the same rules for "opTime" as setMyLastOptime(), unless + * This function has the same rules for "opTime" as setMyLastDurableOptime(), unless * "isRollbackAllowed" is true. * * This function will also report our position externally (like upstream) if necessary. */ - void _setMyLastOptimeAndReport_inlock(stdx::unique_lock<stdx::mutex>* lock, - const OpTime& opTime, - bool isRollbackAllowed); - void _setMyLastOptime_inlock(const OpTime& opTime, bool isRollbackAllowed); + void _setMyLastDurableOpTimeAndReport_inlock(stdx::unique_lock<stdx::mutex>* lock, + const OpTime& opTime, + bool isRollbackAllowed); + void _setMyLastDurableOpTime_inlock(const OpTime& opTime, bool isRollbackAllowed); /** * Schedules a heartbeat to be sent to "target" at "when". "targetIndex" is the index @@ -766,9 +821,12 @@ private: /** * Helper for _handleHeartbeatResponse. * - * Updates the optime associated with the member at "memberIndex" in our config. + * Updates the lastDurableOpTime and lastAppliedOpTime associated with the member at + * "memberIndex" in our config. */ - void _updateOpTimeFromHeartbeat_inlock(int memberIndex, const OpTime& optime); + void _updateOpTimesFromHeartbeat_inlock(int targetIndex, + const OpTime& durableOpTime, + const OpTime& appliedOpTime); /** * Starts a heartbeat for each member in the current config. Called within the executor @@ -1235,8 +1293,7 @@ private: // TODO: ideally this should only change on rollbacks NOT on mongod restarts also. int _rbid; // (M) - // list of information about clients waiting on replication. Does *not* own the - // WaiterInfos. + // list of information about clients waiting on replication. Does *not* own the WaiterInfos. std::vector<WaiterInfo*> _replicationWaiterList; // (M) // list of information about clients waiting for a particular opTime. @@ -1391,6 +1448,9 @@ private: // Cached copy of the current config protocol version. AtomicInt64 _protVersion; // (S) + + // Lambda indicating durability of storageEngine. + stdx::function<bool()> _isDurableStorageEngine; // (R) }; } // namespace repl diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp index c2105a1007a..e1109d0cc4d 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect.cpp @@ -121,7 +121,7 @@ void ReplicationCoordinatorImpl::_startElectSelf() { invariant(_rsConfig.getMemberAt(_selfIndex).isElectable()); - OpTime lastOpTimeApplied(_getMyLastOptime_inlock()); + OpTime lastOpTimeApplied(_getMyLastAppliedOpTime_inlock()); if (lastOpTimeApplied.isNull()) { log() << "not trying to elect self, " @@ -275,7 +275,7 @@ void ReplicationCoordinatorImpl::_recoverFromElectionTie( return; } auto now = _replExecutor.now(); - auto lastOpApplied = getMyLastOptime(); + auto lastOpApplied = getMyLastAppliedOpTime(); if (_topCoord->checkShouldStandForElection(now, lastOpApplied)) { fassert(28817, _topCoord->becomeCandidateIfElectable(now, lastOpApplied)); _startElectSelf(); diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp index ce50d2e9619..6dac4852d0f 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect_test.cpp @@ -161,7 +161,7 @@ TEST_F(ReplCoordElectTest, ElectionSucceedsWhenNodeIsTheOnlyElectableNode) { ASSERT(getReplCoord()->getMemberState().secondary()) << getReplCoord()->getMemberState().toString(); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(10, 0), 0)); NetworkInterfaceMock* net = getNet(); net->enterNetwork(); @@ -232,7 +232,7 @@ TEST_F(ReplCoordElectTest, ElectionSucceedsWhenAllNodesVoteYea) { << "node3:12345"))); assertStartSuccess(configObj, HostAndPort("node1", 12345)); OperationContextNoop txn; - getReplCoord()->setMyLastOptime(OpTime{{100, 1}, 0}); + getReplCoord()->setMyLastAppliedOpTime(OpTime{{100, 1}, 0}); getExternalState()->setLastOpTime(OpTime{{100, 1}, 0}); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); @@ -259,7 +259,7 @@ TEST_F(ReplCoordElectTest, ElectionFailsWhenOneNodeVotesNay) { OperationContextNoop txn; OpTime time1(Timestamp(100, 1), 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); simulateEnoughHeartbeatsForElectability(); @@ -304,7 +304,7 @@ TEST_F(ReplCoordElectTest, VotesWithStringValuesAreNotCountedAsYeas) { OperationContextNoop txn; OpTime time1(Timestamp(100, 1), 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); simulateEnoughHeartbeatsForElectability(); @@ -349,7 +349,7 @@ TEST_F(ReplCoordElectTest, ElectionsAbortWhenNodeTransitionsToRollbackState) { OperationContextNoop txn; OpTime time1(Timestamp(100, 1), 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); simulateEnoughHeartbeatsForElectability(); @@ -384,7 +384,7 @@ TEST_F(ReplCoordElectTest, NodeWillNotStandForElectionDuringHeartbeatReconfig) { << "node5:12345"))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); // set hbreconfig to hang while in progress getExternalState()->setStoreLocalConfigDocumentToHang(true); @@ -478,7 +478,7 @@ TEST_F(ReplCoordElectTest, StepsDownRemoteIfNodeHasHigherPriorityThanCurrentPrim OperationContextNoop txn; OpTime time1(Timestamp(100, 1), 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); auto net = getNet(); diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp index 1ed1f7769b3..97b545a1292 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp @@ -124,9 +124,9 @@ void ReplicationCoordinatorImpl::_startElectSelfV1() { invariant(_rsConfig.getMemberAt(_selfIndex).isElectable()); - OpTime lastOpTimeApplied(_getMyLastOptime_inlock()); + OpTime lastOpTimeDurable(_getMyLastDurableOpTime_inlock()); - if (lastOpTimeApplied == OpTime()) { + if (lastOpTimeDurable == OpTime()) { log() << "not trying to elect self, " "do not yet have a complete set of data from any point in time"; return; @@ -147,7 +147,7 @@ void ReplicationCoordinatorImpl::_startElectSelfV1() { _selfIndex, _topCoord->getTerm(), true, // dry run - getMyLastOptime(), + getMyLastDurableOpTime(), stdx::bind(&ReplicationCoordinatorImpl::_onDryRunComplete, this, term)); if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) { return; @@ -245,7 +245,7 @@ void ReplicationCoordinatorImpl::_startVoteRequester(long long newTerm) { _selfIndex, _topCoord->getTerm(), false, - getMyLastOptime(), + getMyLastDurableOpTime(), stdx::bind(&ReplicationCoordinatorImpl::_onVoteRequestComplete, this, newTerm)); if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) { return; diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp index bd8ddfbc139..e29e772b011 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp @@ -104,7 +104,8 @@ TEST_F(ReplCoordElectV1Test, ElectionSucceedsWhenNodeIsTheOnlyElectableNode) { ASSERT(getReplCoord()->getMemberState().secondary()) << getReplCoord()->getMemberState().toString(); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(10, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(10, 0), 0)); auto electionTimeoutWhen = getReplCoord()->getElectionTimeout_forTest(); ASSERT_NOT_EQUALS(Date_t(), electionTimeoutWhen); @@ -160,7 +161,8 @@ TEST_F(ReplCoordElectV1Test, StartElectionDoesNotStartAnElectionWhenNodeIsRecove ASSERT(getReplCoord()->getMemberState().recovering()) << getReplCoord()->getMemberState().toString(); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(10, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(10, 0), 0)); simulateEnoughHeartbeatsForElectability(); auto electionTimeoutWhen = getReplCoord()->getElectionTimeout_forTest(); @@ -177,7 +179,8 @@ TEST_F(ReplCoordElectV1Test, ElectionSucceedsWhenNodeIsTheOnlyNode) { << "node1:12345")) << "protocolVersion" << 1), HostAndPort("node1", 12345)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(10, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(10, 0), 0)); getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY); getReplCoord()->waitForElectionFinish_forTest(); ASSERT(getReplCoord()->getMemberState().primary()) @@ -208,7 +211,8 @@ TEST_F(ReplCoordElectV1Test, ElectionSucceedsWhenAllNodesVoteYea) { << 1); assertStartSuccess(configObj, HostAndPort("node1", 12345)); OperationContextNoop txn; - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 1), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 0)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); startCapturingLogMessages(); simulateSuccessfulV1Election(); @@ -243,7 +247,8 @@ TEST_F(ReplCoordElectV1Test, ElectionSucceedsWhenMaxSevenNodesVoteYea) { << "protocolVersion" << 1); assertStartSuccess(configObj, HostAndPort("node1", 12345)); OperationContextNoop txn; - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 1), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 0)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); startCapturingLogMessages(); simulateSuccessfulV1Election(); @@ -276,7 +281,8 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenInsufficientVotesAreReceivedDuring OperationContextNoop txn; OpTime time1(Timestamp(100, 1), 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); simulateEnoughHeartbeatsForElectability(); @@ -331,7 +337,8 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenDryRunResponseContainsANewerTerm) OperationContextNoop txn; OpTime time1(Timestamp(100, 1), 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); simulateEnoughHeartbeatsForElectability(); @@ -390,7 +397,8 @@ TEST_F(ReplCoordElectV1Test, NodeWillNotStandForElectionDuringHeartbeatReconfig) << "protocolVersion" << 1), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); // set hbreconfig to hang while in progress getExternalState()->setStoreLocalConfigDocumentToHang(true); @@ -498,7 +506,8 @@ TEST_F(ReplCoordElectV1Test, NodeWillNotStandForElectionDuringHeartbeatReconfig) // // OperationContextNoop txn; // OpTime time1(Timestamp(100, 1), 0); -// getReplCoord()->setMyLastOptime(time1); +// getReplCoord()->setMyLastAppliedOpTime(time1); +// getReplCoord()->setMyLastDurableOpTime(time1); // ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); // // simulateEnoughHeartbeatsForElectability(); @@ -556,7 +565,8 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenInsufficientVotesAreReceivedDuring OperationContextNoop txn; OpTime time1(Timestamp(100, 1), 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); simulateEnoughHeartbeatsForElectability(); @@ -603,7 +613,8 @@ TEST_F(ReplCoordElectV1Test, ElectionsAbortWhenNodeTransitionsToRollbackState) { OperationContextNoop txn; OpTime time1(Timestamp(100, 1), 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); simulateEnoughHeartbeatsForElectability(); @@ -637,7 +648,8 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenVoteRequestResponseContainsANewerT OperationContextNoop txn; OpTime time1(Timestamp(100, 1), 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); simulateEnoughHeartbeatsForElectability(); @@ -687,7 +699,8 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenTermChangesDuringDryRun) { OperationContextNoop txn; OpTime time1(Timestamp(100, 1), 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); simulateEnoughHeartbeatsForElectability(); @@ -722,7 +735,8 @@ TEST_F(ReplCoordElectV1Test, ElectionFailsWhenTermChangesDuringActualElection) { OperationContextNoop txn; OpTime time1(Timestamp(100, 1), 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); simulateEnoughHeartbeatsForElectability(); @@ -775,7 +789,8 @@ TEST_F(ReplCoordElectV1Test, SchedulesPriorityTakeoverIfNodeHasHigherPriorityTha OperationContextNoop txn; OpTime time1(Timestamp(100, 1), 0); - replCoord->setMyLastOptime(time1); + replCoord->setMyLastAppliedOpTime(time1); + replCoord->setMyLastDurableOpTime(time1); ASSERT(replCoord->setFollowerMode(MemberState::RS_SECONDARY)); ASSERT_EQUALS(Date_t(), replCoord->getPriorityTakeover_forTest()); diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index 734a054238b..600c16e8b0e 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -138,7 +138,7 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse( } } const Date_t now = _replExecutor.now(); - const OpTime lastApplied = getMyLastOptime(); // Locks and unlocks _mutex. + const OpTime lastApplied = getMyLastAppliedOpTime(); // Locks and unlocks _mutex. Milliseconds networkTime(0); StatusWith<ReplSetHeartbeatResponse> hbStatusResponse(hbResponse); @@ -165,15 +165,20 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse( now, networkTime, target, hbStatusResponse, lastApplied); if (action.getAction() == HeartbeatResponseAction::NoAction && hbStatusResponse.isOK() && - hbStatusResponse.getValue().hasOpTime() && targetIndex >= 0 && - hbStatusResponse.getValue().hasState() && + targetIndex >= 0 && hbStatusResponse.getValue().hasState() && hbStatusResponse.getValue().getState() != MemberState::RS_PRIMARY) { - stdx::unique_lock<stdx::mutex> lk(_mutex); - if (hbStatusResponse.getValue().getConfigVersion() == _rsConfig.getConfigVersion()) { - _updateOpTimeFromHeartbeat_inlock(targetIndex, hbStatusResponse.getValue().getOpTime()); - // TODO: Enable with Data Replicator - // lk.unlock(); - //_dr.slavesHaveProgressed(); + ReplSetHeartbeatResponse hbResp = hbStatusResponse.getValue(); + if (hbResp.hasAppliedOpTime()) { + stdx::unique_lock<stdx::mutex> lk(_mutex); + if (hbResp.getConfigVersion() == _rsConfig.getConfigVersion()) { + _updateOpTimesFromHeartbeat_inlock( + targetIndex, + hbResp.hasDurableOpTime() ? hbResp.getDurableOpTime() : OpTime(), + hbResp.getAppliedOpTime()); + // TODO: Enable with Data Replicator + // lk.unlock(); + //_dr.slavesHaveProgressed(); + } } } @@ -186,14 +191,18 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse( _handleHeartbeatResponseAction(action, hbStatusResponse); } -void ReplicationCoordinatorImpl::_updateOpTimeFromHeartbeat_inlock(int targetIndex, - const OpTime& optime) { +void ReplicationCoordinatorImpl::_updateOpTimesFromHeartbeat_inlock(int targetIndex, + const OpTime& durableOpTime, + const OpTime& appliedOpTime) { invariant(_selfIndex >= 0); invariant(targetIndex >= 0); SlaveInfo& slaveInfo = _slaveInfo[targetIndex]; - if (optime > slaveInfo.opTime) { - _updateSlaveInfoOptime_inlock(&slaveInfo, optime); + if (appliedOpTime > slaveInfo.lastAppliedOpTime) { + _updateSlaveInfoAppliedOpTime_inlock(&slaveInfo, appliedOpTime); + } + if (durableOpTime > slaveInfo.lastDurableOpTime) { + _updateSlaveInfoDurableOpTime_inlock(&slaveInfo, durableOpTime); } } @@ -592,7 +601,7 @@ void ReplicationCoordinatorImpl::_handleLivenessTimeout( // Secondaries might not see other secondaries in the cluster if they are not // downstream. HeartbeatResponseAction action = - _topCoord->setMemberAsDown(now, memberIndex, _getMyLastOptime_inlock()); + _topCoord->setMemberAsDown(now, memberIndex, _getMyLastDurableOpTime_inlock()); // Don't mind potential asynchronous stepdown as this is the last step of // liveness check. _handleHeartbeatResponseAction(action, makeStatusWith<ReplSetHeartbeatResponse>()); @@ -731,7 +740,7 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(bool isPriorityTake _cancelAndRescheduleElectionTimeout_inlock(); } - if (!_topCoord->becomeCandidateIfElectable(_replExecutor.now(), getMyLastOptime())) { + if (!_topCoord->becomeCandidateIfElectable(_replExecutor.now(), getMyLastDurableOpTime())) { if (isPriorityTakeOver) { log() << "Not starting an election for a priority takeover, since we are not " "electable"; diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp index 047e310cb4e..8b064ada513 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp @@ -353,7 +353,7 @@ TEST_F(ReplCoordHBV1Test, ArbiterRecordsCommittedOpTimeFromHeartbeatMetadata) { ASSERT_OK(metadata.getStatus()); getReplCoord()->processReplSetMetadata(metadata.getValue()); - ASSERT_EQ(getReplCoord()->getMyLastOptime().getTimestamp(), expected.getTimestamp()); + ASSERT_EQ(getReplCoord()->getMyLastAppliedOpTime().getTimestamp(), expected.getTimestamp()); }; OpTime committedOpTime{Timestamp{10, 10}, 10}; diff --git a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp index a3cd03e3798..444bf83d5ac 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp @@ -80,7 +80,8 @@ TEST_F(ReplCoordTest, NodeReturnsNotMasterWhenReconfigReceivedWhileSecondary) { HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); BSONObjBuilder result; ReplSetReconfigArgs args; @@ -102,7 +103,8 @@ TEST_F(ReplCoordTest, NodeReturnsInvalidReplicaSetConfigWhenReconfigReceivedWith << "node2:12345"))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); simulateSuccessfulV1Election(); BSONObjBuilder result; @@ -135,7 +137,8 @@ TEST_F(ReplCoordTest, NodeReturnsInvalidReplicaSetConfigWhenReconfigReceivedWith << "node2:12345"))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); simulateSuccessfulV1Election(); BSONObjBuilder result; @@ -167,7 +170,8 @@ TEST_F(ReplCoordTest, << "node2:12345"))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); simulateSuccessfulV1Election(); BSONObjBuilder result; @@ -231,7 +235,8 @@ TEST_F(ReplCoordTest, << "node2:12345"))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); simulateSuccessfulV1Election(); Status status(ErrorCodes::InternalError, "Not Set"); @@ -269,7 +274,8 @@ TEST_F(ReplCoordTest, NodeReturnsOutOfDiskSpaceWhenSavingANewConfigFailsDuringRe << "node2:12345"))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); simulateSuccessfulV1Election(); Status status(ErrorCodes::InternalError, "Not Set"); @@ -295,7 +301,8 @@ TEST_F(ReplCoordTest, << "node2:12345"))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); simulateSuccessfulV1Election(); Status status(ErrorCodes::InternalError, "Not Set"); @@ -331,7 +338,8 @@ TEST_F(ReplCoordTest, NodeReturnsConfigurationInProgressWhenReceivingAReconfigWh init(); start(HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); // initiate Status status(ErrorCodes::InternalError, "Not Set"); @@ -372,7 +380,8 @@ TEST_F(ReplCoordTest, PrimaryNodeAcceptsNewConfigWhenReceivingAReconfigWithAComp << "node2:12345"))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); simulateSuccessfulV1Election(); Status status(ErrorCodes::InternalError, "Not Set"); @@ -413,7 +422,8 @@ TEST_F( << "node2:12345"))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); simulateSuccessfulV1Election(); ASSERT_TRUE(getReplCoord()->getMemberState().primary()); @@ -468,7 +478,8 @@ TEST_F(ReplCoordTest, NodeDoesNotAcceptHeartbeatReconfigWhileInTheMidstOfReconfi << "node2:12345"))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); simulateSuccessfulV1Election(); ASSERT_TRUE(getReplCoord()->getMemberState().primary()); @@ -530,7 +541,8 @@ TEST_F(ReplCoordTest, NodeAcceptsConfigFromAReconfigWithForceTrueWhileNotPrimary << "node2:12345"))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); // fail before forced BSONObjBuilder result; diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index db7fc578af8..d7f37369a35 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -39,6 +39,7 @@ #include "mongo/db/operation_context_noop.h" #include "mongo/db/repl/handshake_args.h" #include "mongo/db/repl/is_master_response.h" +#include "mongo/db/repl/old_update_position_args.h" #include "mongo/db/repl/operation_context_repl_mock.h" #include "mongo/db/repl/optime.h" #include "mongo/db/repl/read_concern_args.h" @@ -94,7 +95,8 @@ struct OpTimeWithTermZero { }; void runSingleNodeElection(ReplicationCoordinatorImpl* replCoord) { - replCoord->setMyLastOptime(OpTime(Timestamp(1, 0), 0)); + replCoord->setMyLastAppliedOpTime(OpTime(Timestamp(1, 0), 0)); + replCoord->setMyLastDurableOpTime(OpTime(Timestamp(1, 0), 0)); ASSERT(replCoord->setFollowerMode(MemberState::RS_SECONDARY)); replCoord->waitForElectionFinish_forTest(); @@ -749,7 +751,8 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenRunningAwaitReplicationAgainstPrimaryWith // Become primary. ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); simulateSuccessfulV1Election(); ASSERT(getReplCoord()->getMemberState().primary()); @@ -760,6 +763,80 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenRunningAwaitReplicationAgainstPrimaryWith ASSERT_TRUE(getExternalState()->isApplierSignaledToCancelFetcher()); } +TEST_F(ReplCoordTest, + NodeReturnsWriteConcernFailedUntilASufficientNumberOfNodesHaveTheWriteDurable) { + OperationContextNoop txn; + assertStartSuccess( + BSON("_id" + << "mySet" + << "version" << 2 << "members" + << BSON_ARRAY(BSON("host" + << "node1:12345" + << "_id" << 0) + << BSON("host" + << "node2:12345" + << "_id" << 1) << BSON("host" + << "node3:12345" + << "_id" << 2) << BSON("host" + << "node4:12345" + << "_id" << 3))), + HostAndPort("node1", 12345)); + ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); + simulateSuccessfulV1Election(); + + OpTimeWithTermZero time1(100, 1); + OpTimeWithTermZero time2(100, 2); + + WriteConcernOptions writeConcern; + writeConcern.wTimeout = WriteConcernOptions::kNoWaiting; + writeConcern.wNumNodes = 1; + writeConcern.syncMode = WriteConcernOptions::SyncMode::JOURNAL; + + // 1 node waiting for time 1 + ReplicationCoordinator::StatusAndDuration statusAndDur = + getReplCoord()->awaitReplication(&txn, time1, writeConcern); + ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); + statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern); + ASSERT_OK(statusAndDur.status); + + // 2 nodes waiting for time1 + writeConcern.wNumNodes = 2; + statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern); + ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); + // Applied is not durable and will not satisfy WriteConcern with SyncMode JOURNAL. + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1)); + statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern); + ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 1, time1)); + statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern); + ASSERT_OK(statusAndDur.status); + + // 2 nodes waiting for time2 + statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern); + ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); + getReplCoord()->setMyLastAppliedOpTime(time2); + getReplCoord()->setMyLastDurableOpTime(time2); + statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern); + ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time2)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 2, time2)); + statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern); + ASSERT_OK(statusAndDur.status); + + // 3 nodes waiting for time2 + writeConcern.wNumNodes = 3; + statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern); + ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 3, time2)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 3, time2)); + statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern); + ASSERT_OK(statusAndDur.status); +} + TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientNumberOfNodesHaveTheWrite) { OperationContextNoop txn; assertStartSuccess( @@ -778,7 +855,8 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientNumberOfNodes << "_id" << 3))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); simulateSuccessfulV1Election(); OpTimeWithTermZero time1(100, 1); @@ -792,7 +870,8 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientNumberOfNodes ReplicationCoordinator::StatusAndDuration statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern); ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern); ASSERT_OK(statusAndDur.status); @@ -800,17 +879,19 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientNumberOfNodes writeConcern.wNumNodes = 2; statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern); ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1)); statusAndDur = getReplCoord()->awaitReplication(&txn, time1, writeConcern); ASSERT_OK(statusAndDur.status); // 2 nodes waiting for time2 statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern); ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); - getReplCoord()->setMyLastOptime(time2); + getReplCoord()->setMyLastAppliedOpTime(time2); + getReplCoord()->setMyLastDurableOpTime(time2); statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern); ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time2)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time2)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 2, time2)); statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern); ASSERT_OK(statusAndDur.status); @@ -818,7 +899,7 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientNumberOfNodes writeConcern.wNumNodes = 3; statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern); ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, time2)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 3, time2)); statusAndDur = getReplCoord()->awaitReplication(&txn, time2, writeConcern); ASSERT_OK(statusAndDur.status); } @@ -842,7 +923,8 @@ TEST_F(ReplCoordTest, << "node4"))), HostAndPort("node0")); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); simulateSuccessfulV1Election(); OpTime time1(Timestamp(100, 1), 1); @@ -858,7 +940,9 @@ TEST_F(ReplCoordTest, ASSERT_EQUALS(ErrorCodes::UnknownReplWriteConcern, statusAndDur.status); } -TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHaveTheWrite) { +TEST_F( + ReplCoordTest, + NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHaveTheWriteAndTheWriteIsInACommittedSnapshot) { auto service = stdx::make_unique<ServiceContextNoop>(); auto client = service->makeClient("test"); OperationContextNoop txn(client.get(), 100); @@ -901,7 +985,8 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHav << BSON("dc" << 2 << "rack" << 3)))), HostAndPort("node0")); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0)); simulateSuccessfulV1Election(); OpTime time1(Timestamp(100, 1), 1); @@ -912,6 +997,7 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHav WriteConcernOptions majorityWriteConcern; majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting; majorityWriteConcern.wMode = WriteConcernOptions::kMajority; + majorityWriteConcern.syncMode = WriteConcernOptions::SyncMode::JOURNAL; WriteConcernOptions multiDCWriteConcern; multiDCWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting; @@ -923,7 +1009,8 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHav // Nothing satisfied - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); ReplicationCoordinator::StatusAndDuration statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern); ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); @@ -933,8 +1020,10 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHav ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); // Majority satisfied but not either custom mode - getReplCoord()->setLastOptime_forTest(2, 1, time1); - getReplCoord()->setLastOptime_forTest(2, 2, time1); + getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1); + getReplCoord()->setLastDurableOptime_forTest(2, 1, time1); + getReplCoord()->setLastAppliedOptime_forTest(2, 2, time1); + getReplCoord()->setLastDurableOptime_forTest(2, 2, time1); getReplCoord()->onSnapshotCreate(time1, SnapshotName(1)); statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern); @@ -945,7 +1034,8 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHav ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); // All modes satisfied - getReplCoord()->setLastOptime_forTest(2, 3, time1); + getReplCoord()->setLastAppliedOptime_forTest(2, 3, time1); + getReplCoord()->setLastDurableOptime_forTest(2, 3, time1); statusAndDur = getReplCoord()->awaitReplication(&txn, time1, majorityWriteConcern); ASSERT_OK(statusAndDur.status); @@ -979,8 +1069,10 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientSetOfNodesHav ASSERT_OK(statusAndDur.status); // multiDC satisfied but not majority or multiRack - getReplCoord()->setMyLastOptime(time2); - getReplCoord()->setLastOptime_forTest(2, 3, time2); + getReplCoord()->setMyLastAppliedOpTime(time2); + getReplCoord()->setMyLastDurableOpTime(time2); + getReplCoord()->setLastAppliedOptime_forTest(2, 3, time2); + getReplCoord()->setLastDurableOptime_forTest(2, 3, time2); statusAndDur = getReplCoord()->awaitReplication(&txn, time2, majorityWriteConcern); ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); @@ -1009,7 +1101,7 @@ public: _optime = ot; } - void setWriteConcern(const WriteConcernOptions& wc) { + void setWriteConcern(WriteConcernOptions wc) { _writeConcern = wc; } @@ -1061,7 +1153,8 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenAWriteConcernWithNoTimeoutHasBeenSatisfie << "_id" << 2))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); simulateSuccessfulV1Election(); ReplicationAwaiter awaiter(getReplCoord(), &txn); @@ -1077,8 +1170,9 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenAWriteConcernWithNoTimeoutHasBeenSatisfie awaiter.setOpTime(time1); awaiter.setWriteConcern(writeConcern); awaiter.start(&txn); - getReplCoord()->setMyLastOptime(time1); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1)); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1)); ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult(); ASSERT_OK(statusAndDur.status); awaiter.reset(); @@ -1086,8 +1180,9 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenAWriteConcernWithNoTimeoutHasBeenSatisfie // 2 nodes waiting for time2 awaiter.setOpTime(time2); awaiter.start(&txn); - getReplCoord()->setMyLastOptime(time2); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time2)); + getReplCoord()->setMyLastAppliedOpTime(time2); + getReplCoord()->setMyLastDurableOpTime(time2); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time2)); statusAndDur = awaiter.getResult(); ASSERT_OK(statusAndDur.status); awaiter.reset(); @@ -1096,7 +1191,7 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenAWriteConcernWithNoTimeoutHasBeenSatisfie writeConcern.wNumNodes = 3; awaiter.setWriteConcern(writeConcern); awaiter.start(&txn); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time2)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time2)); statusAndDur = awaiter.getResult(); ASSERT_OK(statusAndDur.status); awaiter.reset(); @@ -1117,7 +1212,8 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedWhenAWriteConcernTimesOutBefo << "_id" << 2))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); simulateSuccessfulV1Election(); ReplicationAwaiter awaiter(getReplCoord(), &txn); @@ -1133,8 +1229,9 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedWhenAWriteConcernTimesOutBefo awaiter.setOpTime(time2); awaiter.setWriteConcern(writeConcern); awaiter.start(&txn); - getReplCoord()->setMyLastOptime(time2); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1)); + getReplCoord()->setMyLastAppliedOpTime(time2); + getReplCoord()->setMyLastDurableOpTime(time2); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1)); ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult(); ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status); awaiter.reset(); @@ -1156,7 +1253,8 @@ TEST_F(ReplCoordTest, << "_id" << 2))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); simulateSuccessfulV1Election(); ReplicationAwaiter awaiter(getReplCoord(), &txn); @@ -1172,8 +1270,8 @@ TEST_F(ReplCoordTest, awaiter.setOpTime(time2); awaiter.setWriteConcern(writeConcern); awaiter.start(&txn); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1)); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time1)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time1)); shutdown(); ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult(); ASSERT_EQUALS(ErrorCodes::ShutdownInProgress, statusAndDur.status); @@ -1197,7 +1295,8 @@ TEST_F(ReplCoordTest, NodeReturnsNotMasterWhenSteppingDownBeforeSatisfyingAWrite << "_id" << 2))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); simulateSuccessfulV1Election(); ReplicationAwaiter awaiter(getReplCoord(), &txn); @@ -1213,8 +1312,8 @@ TEST_F(ReplCoordTest, NodeReturnsNotMasterWhenSteppingDownBeforeSatisfyingAWrite awaiter.setOpTime(time2); awaiter.setWriteConcern(writeConcern); awaiter.start(&txn); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1)); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time1)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time1)); getReplCoord()->stepDown(&txn, true, Milliseconds(0), Milliseconds(1000)); ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult(); ASSERT_EQUALS(ErrorCodes::NotMaster, statusAndDur.status); @@ -1236,7 +1335,8 @@ TEST_F(ReplCoordTest, << "node3"))), HostAndPort("node1")); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); simulateSuccessfulV1Election(); ReplicationAwaiter awaiter(getReplCoord(), &txn); @@ -1253,8 +1353,8 @@ TEST_F(ReplCoordTest, awaiter.setOpTime(time2); awaiter.setWriteConcern(writeConcern); awaiter.start(&txn); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1)); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time1)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time1)); txn.setCheckForInterruptStatus(kInterruptedStatus); getReplCoord()->interrupt(opID); @@ -1310,7 +1410,8 @@ TEST_F(ReplCoordTest, NodeChangesTermAndStepsDownWhenAndOnlyWhenUpdateTermSuppli << "test3:1234")) << "protocolVersion" << 1), HostAndPort("test1", 1234)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 1), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 0)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); ASSERT_TRUE(getReplCoord()->getMemberState().secondary()); @@ -1356,7 +1457,8 @@ TEST_F(ReplCoordTest, ConcurrentStepDownShouldNotSignalTheSameFinishEventMoreTha << "test3:1234")) << "protocolVersion" << 1), HostAndPort("test1", 1234)); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 1), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 1), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 1), 0)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); ASSERT_TRUE(getReplCoord()->getMemberState().secondary()); @@ -1418,9 +1520,10 @@ TEST_F(StepDownTest, NodeReturnsNotMasterWhenAskedToStepDownAsANonPrimaryNode) { OperationContextReplMock txn; OpTimeWithTermZero optime1(100, 1); // All nodes are caught up - getReplCoord()->setMyLastOptime(optime1); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1)); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1)); + getReplCoord()->setMyLastAppliedOpTime(optime1); + getReplCoord()->setMyLastDurableOpTime(optime1); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime1)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime1)); Status status = getReplCoord()->stepDown(&txn, false, Milliseconds(0), Milliseconds(0)); ASSERT_EQUALS(ErrorCodes::NotMaster, status); @@ -1432,9 +1535,10 @@ TEST_F(StepDownTest, OperationContextReplMock txn; OpTimeWithTermZero optime1(100, 1); // All nodes are caught up - getReplCoord()->setMyLastOptime(optime1); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1)); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1)); + getReplCoord()->setMyLastAppliedOpTime(optime1); + getReplCoord()->setMyLastDurableOpTime(optime1); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime1)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime1)); simulateSuccessfulV1Election(); @@ -1451,9 +1555,10 @@ TEST_F(StepDownTest, OperationContextReplMock txn; OpTimeWithTermZero optime1(100, 1); // All nodes are caught up - getReplCoord()->setMyLastOptime(optime1); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1)); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1)); + getReplCoord()->setMyLastAppliedOpTime(optime1); + getReplCoord()->setMyLastDurableOpTime(optime1); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime1)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime1)); simulateSuccessfulV1Election(); @@ -1469,7 +1574,8 @@ TEST_F(StepDownTest, hbResp.setSetName(hbArgs.getSetName()); hbResp.setState(MemberState::RS_SECONDARY); hbResp.setConfigVersion(hbArgs.getConfigVersion()); - hbResp.setOpTime(optime1); + hbResp.setDurableOpTime(optime1); + hbResp.setAppliedOpTime(optime1); BSONObjBuilder respObj; respObj << "ok" << 1; hbResp.addToBSON(&respObj, false); @@ -1525,9 +1631,10 @@ TEST_F(StepDownTest, OpTimeWithTermZero optime2(100, 2); // No secondary is caught up auto repl = getReplCoord(); - repl->setMyLastOptime(optime2); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime1)); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime1)); + repl->setMyLastAppliedOpTime(optime2); + repl->setMyLastDurableOpTime(optime2); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime1)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime1)); simulateSuccessfulV1Election(); @@ -1559,9 +1666,10 @@ TEST_F(StepDownTest, OpTimeWithTermZero optime2(100, 2); // No secondary is caught up auto repl = getReplCoord(); - repl->setMyLastOptime(optime2); - ASSERT_OK(repl->setLastOptime_forTest(1, 1, optime1)); - ASSERT_OK(repl->setLastOptime_forTest(1, 2, optime1)); + repl->setMyLastAppliedOpTime(optime2); + repl->setMyLastDurableOpTime(optime2); + ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 1, optime1)); + ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 2, optime1)); simulateSuccessfulV1Election(); @@ -1589,7 +1697,8 @@ TEST_F(StepDownTest, hbResp.setSetName(hbArgs.getSetName()); hbResp.setState(MemberState::RS_SECONDARY); hbResp.setConfigVersion(hbArgs.getConfigVersion()); - hbResp.setOpTime(optime2); + hbResp.setAppliedOpTime(optime2); + hbResp.setDurableOpTime(optime2); BSONObjBuilder respObj; respObj << "ok" << 1; hbResp.addToBSON(&respObj, false); @@ -1615,9 +1724,10 @@ TEST_F(StepDownTest, OpTimeWithTermZero optime2(100, 2); // No secondary is caught up auto repl = getReplCoord(); - repl->setMyLastOptime(optime2); - ASSERT_OK(repl->setLastOptime_forTest(1, 1, optime1)); - ASSERT_OK(repl->setLastOptime_forTest(1, 2, optime1)); + repl->setMyLastAppliedOpTime(optime2); + repl->setMyLastDurableOpTime(optime2); + ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 1, optime1)); + ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 2, optime1)); simulateSuccessfulV1Election(); @@ -1673,7 +1783,8 @@ TEST_F(StepDownTest, hbResp.setSetName(hbArgs.getSetName()); hbResp.setState(MemberState::RS_SECONDARY); hbResp.setConfigVersion(hbArgs.getConfigVersion()); - hbResp.setOpTime(optime2); + hbResp.setAppliedOpTime(optime2); + hbResp.setDurableOpTime(optime2); BSONObjBuilder respObj; respObj << "ok" << 1; hbResp.addToBSON(&respObj, false); @@ -1697,9 +1808,10 @@ TEST_F(StepDownTest, NodeReturnsInterruptedWhenInterruptedDuringStepDown) { OpTimeWithTermZero optime2(100, 2); // No secondary is caught up auto repl = getReplCoord(); - repl->setMyLastOptime(optime2); - ASSERT_OK(repl->setLastOptime_forTest(1, 1, optime1)); - ASSERT_OK(repl->setLastOptime_forTest(1, 2, optime1)); + repl->setMyLastAppliedOpTime(optime2); + repl->setMyLastDurableOpTime(optime2); + ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 1, optime1)); + ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 2, optime1)); simulateSuccessfulV1Election(); ASSERT_TRUE(repl->getMemberState().primary()); @@ -1770,18 +1882,92 @@ TEST_F(ReplCoordTest, NodeIncludesOtherMembersProgressInUpdatePositionCommand) { << "test1:1234") << BSON("_id" << 1 << "host" << "test2:1234") << BSON("_id" << 2 << "host" + << "test3:1234") + << BSON("_id" << 3 << "host" + << "test4:1234"))), + HostAndPort("test1", 1234)); + OpTime optime1({2, 1}, 1); + OpTime optime2({100, 1}, 1); + OpTime optime3({100, 2}, 1); + getReplCoord()->setMyLastAppliedOpTime(optime1); + getReplCoord()->setMyLastDurableOpTime(optime1); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime2)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime3)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(1, 2, optime3)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 3, optime3)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(1, 3, optime1)); + + // Check that the proper BSON is generated for the replSetUpdatePositionCommand + BSONObjBuilder cmdBuilder; + getReplCoord()->prepareReplSetUpdatePositionCommand(&cmdBuilder); + BSONObj cmd = cmdBuilder.done(); + + ASSERT_EQUALS(2, cmd.nFields()); + ASSERT_EQUALS("replSetUpdatePosition", cmd.firstElement().fieldNameStringData()); + + std::set<long long> memberIds; + BSONForEach(entryElement, cmd["optimes"].Obj()) { + OpTime durableOpTime; + OpTime appliedOpTime; + BSONObj entry = entryElement.Obj(); + long long memberId = entry["memberId"].Number(); + memberIds.insert(memberId); + if (memberId == 0) { + log() << 0; + ASSERT_OK(bsonExtractOpTimeField(entry, "appliedOpTime", &appliedOpTime)); + ASSERT_OK(bsonExtractOpTimeField(entry, "durableOpTime", &durableOpTime)); + ASSERT_EQUALS(optime1, appliedOpTime); + ASSERT_EQUALS(optime1, durableOpTime); + } else if (memberId == 1) { + log() << 1; + ASSERT_OK(bsonExtractOpTimeField(entry, "appliedOpTime", &appliedOpTime)); + ASSERT_OK(bsonExtractOpTimeField(entry, "durableOpTime", &durableOpTime)); + ASSERT_EQUALS(optime2, appliedOpTime); + ASSERT_EQUALS(OpTime(), durableOpTime); + } else if (memberId == 2) { + log() << 2; + ASSERT_OK(bsonExtractOpTimeField(entry, "appliedOpTime", &appliedOpTime)); + ASSERT_OK(bsonExtractOpTimeField(entry, "durableOpTime", &durableOpTime)); + ASSERT_EQUALS(optime3, appliedOpTime); + ASSERT_EQUALS(optime3, durableOpTime); + } else { + log() << 3; + ASSERT_EQUALS(3, memberId); + ASSERT_OK(bsonExtractOpTimeField(entry, "appliedOpTime", &appliedOpTime)); + ASSERT_OK(bsonExtractOpTimeField(entry, "durableOpTime", &durableOpTime)); + ASSERT_EQUALS(optime3, appliedOpTime); + ASSERT_EQUALS(optime1, durableOpTime); + } + } + ASSERT_EQUALS(4U, memberIds.size()); // Make sure we saw all 4 nodes +} + +TEST_F(ReplCoordTest, NodeIncludesOtherMembersProgressInOldUpdatePositionCommand) { + OperationContextNoop txn; + init("mySet/test1:1234,test2:1234,test3:1234"); + assertStartSuccess( + BSON("_id" + << "mySet" + << "version" << 1 << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "test1:1234") + << BSON("_id" << 1 << "host" + << "test2:1234") << BSON("_id" << 2 << "host" << "test3:1234"))), HostAndPort("test1", 1234)); OpTimeWithTermZero optime1(100, 1); OpTimeWithTermZero optime2(100, 2); OpTimeWithTermZero optime3(2, 1); - getReplCoord()->setMyLastOptime(optime1); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime2)); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 2, optime3)); + getReplCoord()->setMyLastAppliedOpTime(optime1); + getReplCoord()->setMyLastDurableOpTime(optime1); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime2)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(1, 1, optime2)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 2, optime3)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(1, 2, optime3)); // Check that the proper BSON is generated for the replSetUpdatePositionCommand BSONObjBuilder cmdBuilder; - getReplCoord()->prepareReplSetUpdatePositionCommand(&cmdBuilder); + getReplCoord()->prepareOldReplSetUpdatePositionCommand(&cmdBuilder); BSONObj cmd = cmdBuilder.done(); ASSERT_EQUALS(2, cmd.nFields()); @@ -1821,7 +2007,8 @@ TEST_F(ReplCoordTest, HostAndPort("test2", 1234)); OperationContextNoop txn; getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); // Can't unset maintenance mode if it was never set to begin with. Status status = getReplCoord()->setMaintenanceMode(false); @@ -1844,7 +2031,8 @@ TEST_F(ReplCoordTest, HostAndPort("test2", 1234)); OperationContextNoop txn; getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); // valid set ASSERT_OK(getReplCoord()->setMaintenanceMode(true)); ASSERT_TRUE(getReplCoord()->getMemberState().recovering()); @@ -1872,7 +2060,8 @@ TEST_F(ReplCoordTest, AllowAsManyUnsetMaintenanceModesAsThereHaveBeenSetMaintena HostAndPort("test2", 1234)); OperationContextNoop txn; getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); // Can set multiple times ASSERT_OK(getReplCoord()->setMaintenanceMode(true)); ASSERT_OK(getReplCoord()->setMaintenanceMode(true)); @@ -1902,7 +2091,8 @@ TEST_F(ReplCoordTest, SettingAndUnsettingMaintenanceModeShouldNotAffectRollbackS HostAndPort("test2", 1234)); OperationContextNoop txn; getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); // From rollback, entering and exiting maintenance mode doesn't change perceived // state. @@ -1940,7 +2130,8 @@ TEST_F(ReplCoordTest, DoNotAllowMaintenanceModeWhilePrimary) { HostAndPort("test2", 1234)); OperationContextNoop txn; getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); // Can't modify maintenance mode when PRIMARY simulateSuccessfulV1Election(); @@ -1972,7 +2163,8 @@ TEST_F(ReplCoordTest, DoNotAllowSettingMaintenanceModeWhileConductingAnElection) HostAndPort("test2", 1234)); OperationContextNoop txn; getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); // TODO this election shouldn't have to happen. simulateSuccessfulV1Election(); @@ -2018,6 +2210,50 @@ TEST_F(ReplCoordTest, DoNotAllowSettingMaintenanceModeWhileConductingAnElection) } TEST_F(ReplCoordTest, + NodeReturnsACompleteListOfNodesWeKnowHaveTheWriteDurablyInResponseToGetHostsWrittenTo) { + HostAndPort myHost("node1:12345"); + HostAndPort client1Host("node2:12345"); + HostAndPort client2Host("node3:12345"); + assertStartSuccess(BSON("_id" + << "mySet" + << "version" << 2 << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" << myHost.toString()) + << BSON("_id" << 1 << "host" << client1Host.toString()) + << BSON("_id" << 2 << "host" << client2Host.toString()))), + HostAndPort("node1", 12345)); + OperationContextNoop txn; + + OpTimeWithTermZero time1(100, 1); + OpTimeWithTermZero time2(100, 2); + + getReplCoord()->setMyLastAppliedOpTime(time2); + getReplCoord()->setMyLastDurableOpTime(time2); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 1, time1)); + + std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, true); + ASSERT_EQUALS(1U, caughtUpHosts.size()); + ASSERT_EQUALS(myHost, caughtUpHosts[0]); + + // Ensure updating applied does not affect the results for getHostsWritten durably. + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time2)); + caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, true); + ASSERT_EQUALS(1U, caughtUpHosts.size()); + ASSERT_EQUALS(myHost, caughtUpHosts[0]); + + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time2)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 2, time2)); + caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, true); + ASSERT_EQUALS(2U, caughtUpHosts.size()); + if (myHost == caughtUpHosts[0]) { + ASSERT_EQUALS(client2Host, caughtUpHosts[1]); + } else { + ASSERT_EQUALS(client2Host, caughtUpHosts[0]); + ASSERT_EQUALS(myHost, caughtUpHosts[1]); + } +} + +TEST_F(ReplCoordTest, NodeReturnsACompleteListOfNodesWeKnowHaveTheWriteInResponseToGetHostsWrittenTo) { HostAndPort myHost("node1:12345"); HostAndPort client1Host("node2:12345"); @@ -2034,15 +2270,16 @@ TEST_F(ReplCoordTest, OpTimeWithTermZero time1(100, 1); OpTimeWithTermZero time2(100, 2); - getReplCoord()->setMyLastOptime(time2); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time1)); + getReplCoord()->setMyLastAppliedOpTime(time2); + getReplCoord()->setMyLastDurableOpTime(time2); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time1)); - std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2); + std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, false); ASSERT_EQUALS(1U, caughtUpHosts.size()); ASSERT_EQUALS(myHost, caughtUpHosts[0]); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time2)); - caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time2)); + caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, false); ASSERT_EQUALS(2U, caughtUpHosts.size()); if (myHost == caughtUpHosts[0]) { ASSERT_EQUALS(client2Host, caughtUpHosts[1]); @@ -2068,14 +2305,15 @@ TEST_F(ReplCoordTest, NodeDoesNotIncludeItselfWhenRunningGetHostsWrittenToInMast ASSERT_OK(handshake.initialize(BSON("handshake" << client))); ASSERT_OK(getReplCoord()->processHandshake(&txn, handshake)); - getReplCoord()->setMyLastOptime(time2); + getReplCoord()->setMyLastAppliedOpTime(time2); + getReplCoord()->setMyLastDurableOpTime(time2); ASSERT_OK(getReplCoord()->setLastOptimeForSlave(client, time1.timestamp)); - std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2); + std::vector<HostAndPort> caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, false); ASSERT_EQUALS(0U, caughtUpHosts.size()); // self doesn't get included in master-slave ASSERT_OK(getReplCoord()->setLastOptimeForSlave(client, time2.timestamp)); - caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2); + caughtUpHosts = getReplCoord()->getHostsWrittenTo(time2, false); ASSERT_EQUALS(1U, caughtUpHosts.size()); ASSERT_EQUALS(clientHost, caughtUpHosts[0]); } @@ -2208,13 +2446,61 @@ TEST_F(ReplCoordTest, DoNotProcessSelfWhenUpdatePositionContainsInfoAboutSelf) { << "_id" << 2))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); + simulateSuccessfulV1Election(); + + OpTime time1({100, 1}, 2); + OpTime time2({100, 2}, 2); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); + + WriteConcernOptions writeConcern; + writeConcern.wTimeout = WriteConcernOptions::kNoWaiting; + writeConcern.wNumNodes = 1; + + ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, + getReplCoord()->awaitReplication(&txn, time2, writeConcern).status); + + // receive updatePosition containing ourself, should not process the update for self + UpdatePositionArgs args; + ASSERT_OK(args.initialize( + BSON("replSetUpdatePosition" + << 1 << "optimes" + << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 0 << "durableOpTime" + << BSON("ts" << time2.getTimestamp() << "t" << 2) + << "appliedOpTime" + << BSON("ts" << time2.getTimestamp() << "t" << 2)))))); + + ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args, 0)); + ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, + getReplCoord()->awaitReplication(&txn, time2, writeConcern).status); +} + +TEST_F(ReplCoordTest, DoNotProcessSelfWhenOldUpdatePositionContainsInfoAboutSelf) { + OperationContextNoop txn; + assertStartSuccess(BSON("_id" + << "mySet" + << "version" << 2 << "members" + << BSON_ARRAY(BSON("host" + << "node1:12345" + << "_id" << 0) + << BSON("host" + << "node2:12345" + << "_id" << 1) << BSON("host" + << "node3:12345" + << "_id" << 2))), + HostAndPort("node1", 12345)); + ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); simulateSuccessfulV1Election(); OpTimeWithTermZero time1(100, 1); OpTimeWithTermZero time2(100, 2); OpTimeWithTermZero staleTime(10, 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); WriteConcernOptions writeConcern; writeConcern.wTimeout = WriteConcernOptions::kNoWaiting; @@ -2224,7 +2510,7 @@ TEST_F(ReplCoordTest, DoNotProcessSelfWhenUpdatePositionContainsInfoAboutSelf) { getReplCoord()->awaitReplication(&txn, time2, writeConcern).status); // receive updatePosition containing ourself, should not process the update for self - UpdatePositionArgs args; + OldUpdatePositionArgs args; ASSERT_OK(args.initialize(BSON("replSetUpdatePosition" << 1 << "optimes" << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 0 << "optime" @@ -2250,20 +2536,67 @@ TEST_F(ReplCoordTest, DoNotProcessUpdatePositionWhenItsConfigVersionIsIncorrect) << "_id" << 2))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); + simulateSuccessfulV1Election(); + + OpTime time1({100, 1}, 3); + OpTime time2({100, 2}, 3); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); + + WriteConcernOptions writeConcern; + writeConcern.wTimeout = WriteConcernOptions::kNoWaiting; + writeConcern.wNumNodes = 1; + + // receive updatePosition with incorrect config version + UpdatePositionArgs args; + ASSERT_OK(args.initialize( + BSON("replSetUpdatePosition" + << 1 << "optimes" + << BSON_ARRAY(BSON("cfgver" << 3 << "memberId" << 1 << "durableOpTime" + << BSON("ts" << time2.getTimestamp() << "t" << 3) + << "appliedOpTime" + << BSON("ts" << time2.getTimestamp() << "t" << 3)))))); + + long long cfgver; + ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, + getReplCoord()->processReplSetUpdatePosition(args, &cfgver)); + ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, + getReplCoord()->awaitReplication(&txn, time2, writeConcern).status); +} + +TEST_F(ReplCoordTest, DoNotProcessOldUpdatePositionWhenItsConfigVersionIsIncorrect) { + OperationContextNoop txn; + assertStartSuccess(BSON("_id" + << "mySet" + << "version" << 2 << "members" + << BSON_ARRAY(BSON("host" + << "node1:12345" + << "_id" << 0) + << BSON("host" + << "node2:12345" + << "_id" << 1) << BSON("host" + << "node3:12345" + << "_id" << 2))), + HostAndPort("node1", 12345)); + ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); simulateSuccessfulV1Election(); OpTimeWithTermZero time1(100, 1); OpTimeWithTermZero time2(100, 2); OpTimeWithTermZero staleTime(10, 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); WriteConcernOptions writeConcern; writeConcern.wTimeout = WriteConcernOptions::kNoWaiting; writeConcern.wNumNodes = 1; // receive updatePosition with incorrect config version - UpdatePositionArgs args; + OldUpdatePositionArgs args; ASSERT_OK(args.initialize(BSON("replSetUpdatePosition" << 1 << "optimes" << BSON_ARRAY(BSON("cfgver" << 3 << "memberId" << 1 << "optime" @@ -2291,20 +2624,65 @@ TEST_F(ReplCoordTest, DoNotProcessUpdatePositionOfMembersWhoseIdsAreNotInTheConf << "_id" << 2))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); + simulateSuccessfulV1Election(); + + OpTime time1({100, 1}, 2); + OpTime time2({100, 2}, 2); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); + + WriteConcernOptions writeConcern; + writeConcern.wTimeout = WriteConcernOptions::kNoWaiting; + writeConcern.wNumNodes = 1; + + // receive updatePosition with nonexistent member id + UpdatePositionArgs args; + ASSERT_OK(args.initialize( + BSON("replSetUpdatePosition" + << 1 << "optimes" + << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 9 << "durableOpTime" + << BSON("ts" << time2.getTimestamp() << "t" << 2) + << "appliedOpTime" + << BSON("ts" << time2.getTimestamp() << "t" << 2)))))); + + ASSERT_EQUALS(ErrorCodes::NodeNotFound, getReplCoord()->processReplSetUpdatePosition(args, 0)); + ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, + getReplCoord()->awaitReplication(&txn, time2, writeConcern).status); +} + +TEST_F(ReplCoordTest, DoNotProcessOldUpdatePositionOfMembersWhoseIdsAreNotInTheConfig) { + OperationContextNoop txn; + assertStartSuccess(BSON("_id" + << "mySet" + << "version" << 2 << "members" + << BSON_ARRAY(BSON("host" + << "node1:12345" + << "_id" << 0) + << BSON("host" + << "node2:12345" + << "_id" << 1) << BSON("host" + << "node3:12345" + << "_id" << 2))), + HostAndPort("node1", 12345)); + ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); simulateSuccessfulV1Election(); OpTimeWithTermZero time1(100, 1); OpTimeWithTermZero time2(100, 2); OpTimeWithTermZero staleTime(10, 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); WriteConcernOptions writeConcern; writeConcern.wTimeout = WriteConcernOptions::kNoWaiting; writeConcern.wNumNodes = 1; // receive updatePosition with nonexistent member id - UpdatePositionArgs args; + OldUpdatePositionArgs args; ASSERT_OK(args.initialize(BSON("replSetUpdatePosition" << 1 << "optimes" << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 9 << "optime" @@ -2331,21 +2709,24 @@ TEST_F(ReplCoordTest, << "_id" << 2))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); simulateSuccessfulV1Election(); OpTimeWithTermZero time1(100, 1); OpTimeWithTermZero time2(100, 2); OpTimeWithTermZero staleTime(10, 0); - getReplCoord()->setMyLastOptime(time1); + getReplCoord()->setMyLastAppliedOpTime(time1); + getReplCoord()->setMyLastDurableOpTime(time1); WriteConcernOptions writeConcern; writeConcern.wTimeout = WriteConcernOptions::kNoWaiting; writeConcern.wNumNodes = 1; // receive a good update position - getReplCoord()->setMyLastOptime(time2); - UpdatePositionArgs args; + getReplCoord()->setMyLastAppliedOpTime(time2); + getReplCoord()->setMyLastDurableOpTime(time2); + OldUpdatePositionArgs args; ASSERT_OK(args.initialize( BSON("replSetUpdatePosition" << 1 << "optimes" @@ -2393,7 +2774,8 @@ TEST_F(ReplCoordTest, AwaitReplicationShouldResolveAsNormalDuringAReconfig) { << "_id" << 2))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 2)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 2)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 2)); simulateSuccessfulV1Election(); OpTimeWithTermZero time(100, 2); @@ -2408,6 +2790,12 @@ TEST_F(ReplCoordTest, AwaitReplicationShouldResolveAsNormalDuringAReconfig) { awaiter.setWriteConcern(writeConcern); awaiter.start(&txn); + ReplicationAwaiter awaiterJournaled(getReplCoord(), &txn); + writeConcern.wMode = WriteConcernOptions::kMajority; + awaiterJournaled.setOpTime(time); + awaiterJournaled.setWriteConcern(writeConcern); + awaiterJournaled.start(&txn); + // reconfig Status status(ErrorCodes::InternalError, "Not Set"); stdx::thread reconfigThread(stdx::bind(doReplSetReconfig, getReplCoord(), &status)); @@ -2417,12 +2805,22 @@ TEST_F(ReplCoordTest, AwaitReplicationShouldResolveAsNormalDuringAReconfig) { ASSERT_OK(status); // satisfy write concern - ASSERT_OK(getReplCoord()->setLastOptime_forTest(3, 0, time)); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(3, 1, time)); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(3, 2, time)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(3, 0, time)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(3, 1, time)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(3, 2, time)); ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult(); ASSERT_OK(statusAndDur.status); awaiter.reset(); + + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(3, 0, time)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(3, 0, time)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(3, 1, time)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(3, 1, time)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(3, 2, time)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(3, 2, time)); + ReplicationCoordinator::StatusAndDuration statusAndDurJournaled = awaiterJournaled.getResult(); + ASSERT_OK(statusAndDurJournaled.status); + awaiterJournaled.reset(); } void doReplSetReconfigToFewer(ReplicationCoordinatorImpl* replCoord, Status* status) { @@ -2457,7 +2855,8 @@ TEST_F( << "_id" << 2))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 2)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 2)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 2)); simulateSuccessfulV1Election(); OpTimeWithTermZero time(100, 2); @@ -2472,6 +2871,12 @@ TEST_F( awaiter.setWriteConcern(writeConcern); awaiter.start(&txn); + ReplicationAwaiter awaiterJournaled(getReplCoord(), &txn); + writeConcern.wMode = WriteConcernOptions::kMajority; + awaiterJournaled.setOpTime(time); + awaiterJournaled.setWriteConcern(writeConcern); + awaiterJournaled.start(&txn); + // reconfig to fewer nodes Status status(ErrorCodes::InternalError, "Not Set"); stdx::thread reconfigThread(stdx::bind(doReplSetReconfigToFewer, getReplCoord(), &status)); @@ -2485,6 +2890,9 @@ TEST_F( ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult(); ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern, statusAndDur.status); awaiter.reset(); + ReplicationCoordinator::StatusAndDuration statusAndDurJournaled = awaiterJournaled.getResult(); + ASSERT_EQUALS(ErrorCodes::CannotSatisfyWriteConcern, statusAndDurJournaled.status); + awaiterJournaled.reset(); } TEST_F(ReplCoordTest, @@ -2508,14 +2916,16 @@ TEST_F(ReplCoordTest, << "_id" << 4))), HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 1)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 1)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 1)); simulateSuccessfulV1Election(); OpTime time(Timestamp(100, 2), 1); - getReplCoord()->setMyLastOptime(time); + getReplCoord()->setMyLastAppliedOpTime(time); + getReplCoord()->setMyLastDurableOpTime(time); getReplCoord()->onSnapshotCreate(time, SnapshotName(1)); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time)); // majority nodes waiting for time @@ -2550,7 +2960,7 @@ TEST_F(ReplCoordTest, } TEST_F(ReplCoordTest, - NodeReturnsFromMajorityWriteConcernOnlyOnceAMajorityOfVotingNodesHaveReceivedTheWrite) { + NodeReturnsFromMajorityWriteConcernOnlyOnceTheWriteAppearsInACommittedSnapShot) { // Test that we can satisfy majority write concern can only be // satisfied by voting data-bearing members. OperationContextNoop txn; @@ -2574,26 +2984,31 @@ TEST_F(ReplCoordTest, HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); OpTime time(Timestamp(100, 0), 1); - getReplCoord()->setMyLastOptime(time); + getReplCoord()->setMyLastAppliedOpTime(time); + getReplCoord()->setMyLastDurableOpTime(time); simulateSuccessfulV1Election(); WriteConcernOptions majorityWriteConcern; majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting; majorityWriteConcern.wMode = WriteConcernOptions::kMajority; + majorityWriteConcern.syncMode = WriteConcernOptions::SyncMode::JOURNAL; ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 1, time)); ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status); // this member does not vote and as a result should not count towards write concern - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, time)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 3, time)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 3, time)); ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 2, time)); ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status); @@ -2626,30 +3041,38 @@ TEST_F(ReplCoordTest, ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); OpTime zero(Timestamp(0, 0), 0); OpTime time(Timestamp(100, 0), 1); - getReplCoord()->setMyLastOptime(time); + getReplCoord()->setMyLastAppliedOpTime(time); + getReplCoord()->setMyLastDurableOpTime(time); simulateSuccessfulV1Election(); ASSERT_EQUALS(zero, getReplCoord()->getLastCommittedOpTime()); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, time)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, time)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 1, time)); ASSERT_EQUALS(zero, getReplCoord()->getLastCommittedOpTime()); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, time)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 3, time)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 3, time)); ASSERT_EQUALS(zero, getReplCoord()->getLastCommittedOpTime()); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, time)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 2, time)); ASSERT_EQUALS(time, getReplCoord()->getLastCommittedOpTime()); // Set a new, later OpTime. OpTime newTime(Timestamp(100, 1), 1); - getReplCoord()->setMyLastOptime(newTime); + getReplCoord()->setMyLastAppliedOpTime(newTime); + getReplCoord()->setMyLastDurableOpTime(newTime); ASSERT_EQUALS(time, getReplCoord()->getLastCommittedOpTime()); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 3, newTime)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 3, newTime)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 3, newTime)); ASSERT_EQUALS(time, getReplCoord()->getLastCommittedOpTime()); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 2, newTime)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, newTime)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 2, newTime)); // Reached majority of voting nodes with newTime. ASSERT_EQUALS(time, getReplCoord()->getLastCommittedOpTime()); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(2, 1, newTime)); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 1, newTime)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(2, 1, newTime)); ASSERT_EQUALS(newTime, getReplCoord()->getLastCommittedOpTime()); } @@ -2662,7 +3085,8 @@ TEST_F(ReplCoordTest, NodeReturnsShutdownInProgressWhenWaitingUntilAnOpTimeDurin << "_id" << 0))), HostAndPort("node1", 12345)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(10, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(10, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(10, 0)); shutdown(); @@ -2682,7 +3106,8 @@ TEST_F(ReplCoordTest, NodeReturnsInterruptedWhenWaitingUntilAnOpTimeIsInterrupte << "_id" << 0))), HostAndPort("node1", 12345)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(10, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(10, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(10, 0)); txn.setCheckForInterruptStatus(Status(ErrorCodes::Interrupted, "test")); @@ -2717,7 +3142,8 @@ TEST_F(ReplCoordTest, NodeReturnsOkImmediatelyWhenWaitingUntilOpTimePassesAnOpTi << "_id" << 0))), HostAndPort("node1", 12345)); - getReplCoord()->setMyLastOptime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0)); + getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0)); auto result = getReplCoord()->waitUntilOpTime( &txn, ReadConcernArgs(OpTimeWithTermZero(50, 0), ReadConcernLevel::kLocalReadConcern)); @@ -2736,7 +3162,8 @@ TEST_F(ReplCoordTest, NodeReturnsOkImmediatelyWhenWaitingUntilOpTimePassesAnOpTi OpTimeWithTermZero time(100, 0); - getReplCoord()->setMyLastOptime(time); + getReplCoord()->setMyLastAppliedOpTime(time); + getReplCoord()->setMyLastDurableOpTime(time); auto result = getReplCoord()->waitUntilOpTime( &txn, ReadConcernArgs(time, ReadConcernLevel::kLocalReadConcern)); @@ -2779,7 +3206,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedWhileShutdown) { HostAndPort("node1", 12345)); runSingleNodeElection(getReplCoord()); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(10, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(10, 0), 0)); shutdown(); @@ -2800,7 +3228,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedInterrupted) { HostAndPort("node1", 12345)); runSingleNodeElection(getReplCoord()); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(10, 0), 0)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(10, 0), 0)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(10, 0), 0)); txn.setCheckForInterruptStatus(Status(ErrorCodes::Interrupted, "test")); @@ -2821,7 +3250,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedGreaterOpTime) { HostAndPort("node1", 12345)); runSingleNodeElection(getReplCoord()); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(100, 0), 1)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 1)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 1)); getReplCoord()->onSnapshotCreate(OpTime(Timestamp(100, 0), 1), SnapshotName(1)); auto result = getReplCoord()->waitUntilOpTime( &txn, ReadConcernArgs(OpTime(Timestamp(50, 0), 1), ReadConcernLevel::kMajorityReadConcern)); @@ -2840,7 +3270,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedEqualOpTime) { HostAndPort("node1", 12345)); runSingleNodeElection(getReplCoord()); OpTime time(Timestamp(100, 0), 1); - getReplCoord()->setMyLastOptime(time); + getReplCoord()->setMyLastAppliedOpTime(time); + getReplCoord()->setMyLastDurableOpTime(time); getReplCoord()->onSnapshotCreate(time, SnapshotName(1)); auto result = getReplCoord()->waitUntilOpTime( &txn, ReadConcernArgs(time, ReadConcernLevel::kMajorityReadConcern)); @@ -2858,13 +3289,15 @@ TEST_F(ReplCoordTest, ReadAfterCommittedDeferredGreaterOpTime) { << "_id" << 0))), HostAndPort("node1", 12345)); runSingleNodeElection(getReplCoord()); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(0, 0), 1)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(0, 0), 1)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(0, 0), 1)); OpTime committedOpTime(Timestamp(200, 0), 1); auto pseudoLogOp = stdx::async(stdx::launch::async, [this, &committedOpTime]() { // Not guaranteed to be scheduled after waitUntil blocks... - getReplCoord()->setMyLastOptime(committedOpTime); + getReplCoord()->setMyLastAppliedOpTime(committedOpTime); + getReplCoord()->setMyLastDurableOpTime(committedOpTime); getReplCoord()->onSnapshotCreate(committedOpTime, SnapshotName(1)); }); @@ -2886,7 +3319,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedDeferredEqualOpTime) { << "_id" << 0))), HostAndPort("node1", 12345)); runSingleNodeElection(getReplCoord()); - getReplCoord()->setMyLastOptime(OpTime(Timestamp(0, 0), 1)); + getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(0, 0), 1)); + getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(0, 0), 1)); OpTime opTimeToWait(Timestamp(100, 0), 1); @@ -2894,7 +3328,8 @@ TEST_F(ReplCoordTest, ReadAfterCommittedDeferredEqualOpTime) { stdx::async(stdx::launch::async, [this, &opTimeToWait]() { // Not guaranteed to be scheduled after waitUntil blocks... - getReplCoord()->setMyLastOptime(opTimeToWait); + getReplCoord()->setMyLastAppliedOpTime(opTimeToWait); + getReplCoord()->setMyLastDurableOpTime(opTimeToWait); getReplCoord()->onSnapshotCreate(opTimeToWait, SnapshotName(1)); }); @@ -3373,9 +3808,11 @@ TEST_F(ReplCoordTest, AdvanceCommittedSnapshotToMostRecentSnapshotPriorToOpTimeW getReplCoord()->onSnapshotCreate(time5, SnapshotName(3)); // ensure current snapshot follows price is right rules (closest but not greater than) - getReplCoord()->setMyLastOptime(time3); + getReplCoord()->setMyLastAppliedOpTime(time3); + getReplCoord()->setMyLastDurableOpTime(time3); ASSERT_EQUALS(time2, getReplCoord()->getCurrentCommittedSnapshotOpTime()); - getReplCoord()->setMyLastOptime(time4); + getReplCoord()->setMyLastAppliedOpTime(time4); + getReplCoord()->setMyLastDurableOpTime(time4); ASSERT_EQUALS(time2, getReplCoord()->getCurrentCommittedSnapshotOpTime()); } @@ -3403,7 +3840,8 @@ TEST_F(ReplCoordTest, DoNotAdvanceCommittedSnapshotWhenAnOpTimeIsNewerThanOurLat getReplCoord()->onSnapshotCreate(time5, SnapshotName(3)); // ensure current snapshot will not advance beyond existing snapshots - getReplCoord()->setMyLastOptime(time6); + getReplCoord()->setMyLastAppliedOpTime(time6); + getReplCoord()->setMyLastDurableOpTime(time6); ASSERT_EQUALS(time5, getReplCoord()->getCurrentCommittedSnapshotOpTime()); } @@ -3431,7 +3869,8 @@ TEST_F(ReplCoordTest, getReplCoord()->onSnapshotCreate(time2, SnapshotName(2)); getReplCoord()->onSnapshotCreate(time5, SnapshotName(3)); - getReplCoord()->setMyLastOptime(time6); + getReplCoord()->setMyLastAppliedOpTime(time6); + getReplCoord()->setMyLastDurableOpTime(time6); ASSERT_EQUALS(time5, getReplCoord()->getCurrentCommittedSnapshotOpTime()); // ensure current snapshot updates on new snapshot if we are that far @@ -3467,7 +3906,34 @@ TEST_F(ReplCoordTest, ZeroCommittedSnapshotWhenAllSnapshotsAreDropped) { ASSERT_EQUALS(OpTime(), getReplCoord()->getCurrentCommittedSnapshotOpTime()); } -TEST_F(ReplCoordTest, NodeChangesMyLastOpTimeWhenAndOnlyWhenSetMyLastOpTimeReceivesANewerOpTime) { +TEST_F(ReplCoordTest, DoNotAdvanceCommittedSnapshotWhenAppliedOpTimeChanges) { + init("mySet"); + + assertStartSuccess(BSON("_id" + << "mySet" + << "version" << 1 << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "test1:1234"))), + HostAndPort("test1", 1234)); + OperationContextReplMock txn; + runSingleNodeElection(getReplCoord()); + + OpTime time1(Timestamp(100, 1), 1); + OpTime time2(Timestamp(100, 2), 1); + + getReplCoord()->onSnapshotCreate(time1, SnapshotName(1)); + + getReplCoord()->setMyLastAppliedOpTime(time1); + ASSERT_EQUALS(OpTime(), getReplCoord()->getCurrentCommittedSnapshotOpTime()); + getReplCoord()->setMyLastAppliedOpTime(time2); + ASSERT_EQUALS(OpTime(), getReplCoord()->getCurrentCommittedSnapshotOpTime()); + getReplCoord()->setMyLastAppliedOpTime(time2); + getReplCoord()->setMyLastDurableOpTime(time2); + ASSERT_EQUALS(time1, getReplCoord()->getCurrentCommittedSnapshotOpTime()); +} + +TEST_F(ReplCoordTest, + NodeChangesMyLastOpTimeWhenAndOnlyWhensetMyLastDurableOpTimeReceivesANewerOpTime) { assertStartSuccess(BSON("_id" << "mySet" << "version" << 2 << "members" << BSON_ARRAY(BSON("host" @@ -3480,12 +3946,13 @@ TEST_F(ReplCoordTest, NodeChangesMyLastOpTimeWhenAndOnlyWhenSetMyLastOpTimeRecei OpTime time2(Timestamp(100, 2), 1); OpTime time3(Timestamp(100, 3), 1); - getReplCoord()->setMyLastOptime(time1); - ASSERT_EQUALS(time1, getReplCoord()->getMyLastOptime()); - getReplCoord()->setMyLastOptimeForward(time3); - ASSERT_EQUALS(time3, getReplCoord()->getMyLastOptime()); - getReplCoord()->setMyLastOptimeForward(time2); - ASSERT_EQUALS(time3, getReplCoord()->getMyLastOptime()); + getReplCoord()->setMyLastAppliedOpTime(time1); + ASSERT_EQUALS(time1, getReplCoord()->getMyLastAppliedOpTime()); + getReplCoord()->setMyLastAppliedOpTimeForward(time3); + ASSERT_EQUALS(time3, getReplCoord()->getMyLastAppliedOpTime()); + getReplCoord()->setMyLastAppliedOpTimeForward(time2); + getReplCoord()->setMyLastDurableOpTimeForward(time2); + ASSERT_EQUALS(time3, getReplCoord()->getMyLastAppliedOpTime()); } TEST_F(ReplCoordTest, OnlyForwardSyncProgressForOtherNodesWhenTheNodesAreBelievedToBeUp) { @@ -3502,8 +3969,10 @@ TEST_F(ReplCoordTest, OnlyForwardSyncProgressForOtherNodesWhenTheNodesAreBelieve << BSON("electionTimeoutMillis" << 2000 << "heartbeatIntervalMillis" << 40000)), HostAndPort("test1", 1234)); OpTime optime(Timestamp(100, 2), 0); - getReplCoord()->setMyLastOptime(optime); - ASSERT_OK(getReplCoord()->setLastOptime_forTest(1, 1, optime)); + getReplCoord()->setMyLastAppliedOpTime(optime); + getReplCoord()->setMyLastDurableOpTime(optime); + ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime)); + ASSERT_OK(getReplCoord()->setLastDurableOptime_forTest(1, 1, optime)); // Check that we have two entries in our UpdatePosition (us and node 1). BSONObjBuilder cmdBuilder; @@ -3514,11 +3983,29 @@ TEST_F(ReplCoordTest, OnlyForwardSyncProgressForOtherNodesWhenTheNodesAreBelieve BSONObj entry = entryElement.Obj(); long long memberId = entry["memberId"].Number(); memberIds.insert(memberId); + OpTime appliedOpTime; + OpTime durableOpTime; + bsonExtractOpTimeField(entry, "appliedOpTime", &appliedOpTime); + ASSERT_EQUALS(optime, appliedOpTime); + bsonExtractOpTimeField(entry, "durableOpTime", &durableOpTime); + ASSERT_EQUALS(optime, durableOpTime); + } + ASSERT_EQUALS(2U, memberIds.size()); + + // Check that this true for old style (pre-3.2.2) UpdatePosition as well. + BSONObjBuilder cmdBuilder2; + getReplCoord()->prepareOldReplSetUpdatePositionCommand(&cmdBuilder2); + BSONObj cmd2 = cmdBuilder2.done(); + std::set<long long> memberIds2; + BSONForEach(entryElement, cmd2["optimes"].Obj()) { + BSONObj entry = entryElement.Obj(); + long long memberId = entry["memberId"].Number(); + memberIds2.insert(memberId); OpTime entryOpTime; bsonExtractOpTimeField(entry, "optime", &entryOpTime); ASSERT_EQUALS(optime, entryOpTime); } - ASSERT_EQUALS(2U, memberIds.size()); + ASSERT_EQUALS(2U, memberIds2.size()); // Advance the clock far enough to cause the other node to be marked as DOWN. const Date_t startDate = getNet()->now(); @@ -3534,19 +4021,37 @@ TEST_F(ReplCoordTest, OnlyForwardSyncProgressForOtherNodesWhenTheNodesAreBelieve // Check there is one entry in our UpdatePosition, since we shouldn't forward for a // DOWN node. - BSONObjBuilder cmdBuilder2; - getReplCoord()->prepareReplSetUpdatePositionCommand(&cmdBuilder2); - BSONObj cmd2 = cmdBuilder2.done(); - std::set<long long> memberIds2; - BSONForEach(entryElement, cmd2["optimes"].Obj()) { + BSONObjBuilder cmdBuilder3; + getReplCoord()->prepareReplSetUpdatePositionCommand(&cmdBuilder3); + BSONObj cmd3 = cmdBuilder3.done(); + std::set<long long> memberIds3; + BSONForEach(entryElement, cmd3["optimes"].Obj()) { BSONObj entry = entryElement.Obj(); long long memberId = entry["memberId"].Number(); - memberIds2.insert(memberId); + memberIds3.insert(memberId); + OpTime appliedOpTime; + OpTime durableOpTime; + bsonExtractOpTimeField(entry, "appliedOpTime", &appliedOpTime); + ASSERT_EQUALS(optime, appliedOpTime); + bsonExtractOpTimeField(entry, "durableOpTime", &durableOpTime); + ASSERT_EQUALS(optime, durableOpTime); + } + ASSERT_EQUALS(1U, memberIds3.size()); + + // Check that this true for old style (pre-3.2.2) UpdatePosition as well. + BSONObjBuilder cmdBuilder4; + getReplCoord()->prepareOldReplSetUpdatePositionCommand(&cmdBuilder4); + BSONObj cmd4 = cmdBuilder4.done(); + std::set<long long> memberIds4; + BSONForEach(entryElement, cmd4["optimes"].Obj()) { + BSONObj entry = entryElement.Obj(); + long long memberId = entry["memberId"].Number(); + memberIds4.insert(memberId); OpTime entryOpTime; bsonExtractOpTimeField(entry, "optime", &entryOpTime); ASSERT_EQUALS(optime, entryOpTime); } - ASSERT_EQUALS(1U, memberIds2.size()); + ASSERT_EQUALS(1U, memberIds4.size()); } TEST_F(ReplCoordTest, StepDownWhenHandleLivenessTimeoutMarksAMajorityOfVotingNodesDown) { @@ -3571,10 +4076,11 @@ TEST_F(ReplCoordTest, StepDownWhenHandleLivenessTimeoutMarksAMajorityOfVotingNod HostAndPort("node1", 12345)); ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); OpTime startingOpTime = OpTime(Timestamp(100, 1), 0); - getReplCoord()->setMyLastOptime(startingOpTime); + getReplCoord()->setMyLastAppliedOpTime(startingOpTime); + getReplCoord()->setMyLastDurableOpTime(startingOpTime); // Receive notification that every node is up. - UpdatePositionArgs args; + OldUpdatePositionArgs args; ASSERT_OK(args.initialize( BSON("replSetUpdatePosition" << 1 << "optimes" << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 1 << "optime" @@ -3591,7 +4097,7 @@ TEST_F(ReplCoordTest, StepDownWhenHandleLivenessTimeoutMarksAMajorityOfVotingNod simulateSuccessfulV1Election(); // Keep two nodes alive. - UpdatePositionArgs args1; + OldUpdatePositionArgs args1; ASSERT_OK(args1.initialize( BSON("replSetUpdatePosition" << 1 << "optimes" << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 1 << "optime" @@ -3608,7 +4114,7 @@ TEST_F(ReplCoordTest, StepDownWhenHandleLivenessTimeoutMarksAMajorityOfVotingNod ASSERT_EQUALS(MemberState::RS_PRIMARY, getReplCoord()->getMemberState().s); // Keep one node alive via two methods (UpdatePosition and requestHeartbeat). - UpdatePositionArgs args2; + OldUpdatePositionArgs args2; ASSERT_OK(args2.initialize( BSON("replSetUpdatePosition" << 1 << "optimes" << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 1 << "optime" @@ -3654,7 +4160,8 @@ TEST_F(ReplCoordTest, WaitForMemberState) { HostAndPort("test1", 1234)); auto replCoord = getReplCoord(); auto initialTerm = replCoord->getTerm(); - replCoord->setMyLastOptime(OpTime(Timestamp(1, 0), 0)); + replCoord->setMyLastAppliedOpTime(OpTime(Timestamp(1, 0), 0)); + replCoord->setMyLastDurableOpTime(OpTime(Timestamp(1, 0), 0)); ASSERT_TRUE(replCoord->setFollowerMode(MemberState::RS_SECONDARY)); // Successful dry run election increases term. @@ -3688,7 +4195,8 @@ TEST_F(ReplCoordTest, WaitForDrainFinish) { HostAndPort("test1", 1234)); auto replCoord = getReplCoord(); auto initialTerm = replCoord->getTerm(); - replCoord->setMyLastOptime(OpTime(Timestamp(1, 0), 0)); + replCoord->setMyLastAppliedOpTime(OpTime(Timestamp(1, 0), 0)); + replCoord->setMyLastDurableOpTime(OpTime(Timestamp(1, 0), 0)); ASSERT_TRUE(replCoord->setFollowerMode(MemberState::RS_SECONDARY)); // Successful dry run election increases term. @@ -3713,8 +4221,168 @@ TEST_F(ReplCoordTest, WaitForDrainFinish) { ASSERT_OK(replCoord->waitForDrainFinish(Milliseconds(0))); } -// TODO(schwerin): Unit test election id updating +TEST_F(ReplCoordTest, UpdatePositionArgsReturnsNoSuchKeyWhenParsingOldUpdatePositionArgs) { + OldUpdatePositionArgs args; + UpdatePositionArgs args2; + OpTime opTime = OpTime(Timestamp(100, 1), 0); + ASSERT_EQUALS( + ErrorCodes::NoSuchKey, + args2.initialize(BSON( + "replSetUpdatePosition" + << 1 << "optimes" + << BSON_ARRAY( + BSON("cfgver" << 2 << "memberId" << 1 << "optime" << opTime.getTimestamp()) + << BSON("cfgver" << 2 << "memberId" << 2 << "optime" << opTime.getTimestamp()) + << BSON("cfgver" << 2 << "memberId" << 3 << "optime" << opTime.getTimestamp()) + << BSON("cfgver" << 2 << "memberId" << 4 << "optime" + << opTime.getTimestamp()))))); + + ASSERT_OK(args.initialize( + BSON("replSetUpdatePosition" + << 1 << "optimes" + << BSON_ARRAY( + BSON("cfgver" << 2 << "memberId" << 1 << "optime" << opTime.getTimestamp()) + << BSON("cfgver" << 2 << "memberId" << 2 << "optime" << opTime.getTimestamp()) + << BSON("cfgver" << 2 << "memberId" << 3 << "optime" << opTime.getTimestamp()) + << BSON("cfgver" << 2 << "memberId" << 4 << "optime" + << opTime.getTimestamp()))))); +} + + +TEST_F(ReplCoordTest, OldUpdatePositionArgsReturnsBadValueWhenParsingUpdatePositionArgs) { + OldUpdatePositionArgs args; + UpdatePositionArgs args2; + OpTime opTime = OpTime(Timestamp(100, 1), 0); + ASSERT_EQUALS( + ErrorCodes::BadValue, + args.initialize(BSON( + "replSetUpdatePosition" + << 1 << "optimes" + << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 1 << "durableOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3) + << "appliedOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3)) + << BSON("cfgver" << 2 << "memberId" << 2 << "durableOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3) + << "appliedOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3)) + << BSON("cfgver" << 2 << "memberId" << 3 << "durableOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3) + << "appliedOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3)) + << BSON("cfgver" << 2 << "memberId" << 4 << "durableOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3) + << "appliedOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3)))))); + ASSERT_OK(args2.initialize( + BSON("replSetUpdatePosition" + << 1 << "optimes" + << BSON_ARRAY(BSON("cfgver" << 2 << "memberId" << 1 << "durableOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3) + << "appliedOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3)) + << BSON("cfgver" << 2 << "memberId" << 2 << "durableOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3) + << "appliedOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3)) + << BSON("cfgver" << 2 << "memberId" << 3 << "durableOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3) + << "appliedOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3)) + << BSON("cfgver" << 2 << "memberId" << 4 << "durableOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3) + << "appliedOpTime" + << BSON("ts" << opTime.getTimestamp() << "t" << 3)))))); +} + +TEST_F( + ReplCoordTest, + PopulateUnsetWriteConcernOptionsSyncModeReturnsInputWithSyncModeNoneIfUnsetAndWriteConcernMajorityJournalDefaultIsFalse) { + init("mySet"); + + assertStartSuccess(BSON("_id" + << "mySet" + << "version" << 1 << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "test1:1234")) + << "writeConcernMajorityJournalDefault" << false), + HostAndPort("test1", 1234)); + + WriteConcernOptions wc; + wc.wMode = WriteConcernOptions::kMajority; + wc.syncMode = WriteConcernOptions::SyncMode::UNSET; + ASSERT(WriteConcernOptions::SyncMode::NONE == + getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode); +} + +TEST_F( + ReplCoordTest, + PopulateUnsetWriteConcernOptionsSyncModeReturnsInputWithSyncModeJournalIfUnsetAndWriteConcernMajorityJournalDefaultIsTrue) { + init("mySet"); + assertStartSuccess(BSON("_id" + << "mySet" + << "version" << 1 << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "test1:1234")) + << "writeConcernMajorityJournalDefault" << true), + HostAndPort("test1", 1234)); + + WriteConcernOptions wc; + wc.wMode = WriteConcernOptions::kMajority; + wc.syncMode = WriteConcernOptions::SyncMode::UNSET; + ASSERT(WriteConcernOptions::SyncMode::JOURNAL == + getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode); +} + +TEST_F(ReplCoordTest, PopulateUnsetWriteConcernOptionsSyncModeReturnsInputIfSyncModeIsNotUnset) { + init("mySet"); + + assertStartSuccess(BSON("_id" + << "mySet" + << "version" << 1 << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "test1:1234")) + << "writeConcernMajorityJournalDefault" << false), + HostAndPort("test1", 1234)); + + WriteConcernOptions wc; + wc.wMode = WriteConcernOptions::kMajority; + ASSERT(WriteConcernOptions::SyncMode::NONE == + getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode); + + wc.syncMode = WriteConcernOptions::SyncMode::JOURNAL; + ASSERT(WriteConcernOptions::SyncMode::JOURNAL == + getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode); + + wc.syncMode = WriteConcernOptions::SyncMode::FSYNC; + ASSERT(WriteConcernOptions::SyncMode::FSYNC == + getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode); +} + +TEST_F(ReplCoordTest, PopulateUnsetWriteConcernOptionsSyncModeReturnsInputIfWModeIsNotMajority) { + init("mySet"); + + assertStartSuccess(BSON("_id" + << "mySet" + << "version" << 1 << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "test1:1234")) + << "writeConcernMajorityJournalDefault" << false), + HostAndPort("test1", 1234)); + + WriteConcernOptions wc; + wc.syncMode = WriteConcernOptions::SyncMode::UNSET; + wc.wMode = "not the value of kMajority"; + ASSERT(WriteConcernOptions::SyncMode::NONE == + getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode); + + wc.wMode = "like literally anythingelse"; + ASSERT(WriteConcernOptions::SyncMode::NONE == + getReplCoord()->populateUnsetWriteConcernOptionsSyncMode(wc).syncMode); +} + +// TODO(schwerin): Unit test election id updating } // namespace } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp index acacb6c9584..2ceb947cb8e 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_mock.cpp @@ -148,22 +148,36 @@ void ReplicationCoordinatorMock::setMyHeartbeatMessage(const std::string& msg) { // TODO } -void ReplicationCoordinatorMock::setMyLastOptime(const OpTime& opTime) { - _myLastOpTime = opTime; +void ReplicationCoordinatorMock::setMyLastAppliedOpTime(const OpTime& opTime) { + _myLastAppliedOpTime = opTime; } -void ReplicationCoordinatorMock::setMyLastOptimeForward(const OpTime& opTime) { - if (opTime > _myLastOpTime) { - _myLastOpTime = opTime; +void ReplicationCoordinatorMock::setMyLastDurableOpTime(const OpTime& opTime) { + _myLastDurableOpTime = opTime; +} + +void ReplicationCoordinatorMock::setMyLastAppliedOpTimeForward(const OpTime& opTime) { + if (opTime > _myLastAppliedOpTime) { + _myLastAppliedOpTime = opTime; + } +} + +void ReplicationCoordinatorMock::setMyLastDurableOpTimeForward(const OpTime& opTime) { + if (opTime > _myLastDurableOpTime) { + _myLastDurableOpTime = opTime; } } -void ReplicationCoordinatorMock::resetMyLastOptime() { - _myLastOpTime = OpTime(); +void ReplicationCoordinatorMock::resetMyLastOpTimes() { + _myLastDurableOpTime = OpTime(); } -OpTime ReplicationCoordinatorMock::getMyLastOptime() const { - return _myLastOpTime; +OpTime ReplicationCoordinatorMock::getMyLastAppliedOpTime() const { + return _myLastAppliedOpTime; +} + +OpTime ReplicationCoordinatorMock::getMyLastDurableOpTime() const { + return _myLastDurableOpTime; } ReadConcernResponse ReplicationCoordinatorMock::waitUntilOpTime(OperationContext* txn, @@ -203,6 +217,12 @@ Status ReplicationCoordinatorMock::waitForDrainFinish(Milliseconds timeout) { void ReplicationCoordinatorMock::signalUpstreamUpdater() {} +bool ReplicationCoordinatorMock::prepareOldReplSetUpdatePositionCommand( + BSONObjBuilder* cmdBuilder) { + cmdBuilder->append("replSetUpdatePosition", 1); + return true; +} + bool ReplicationCoordinatorMock::prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) { cmdBuilder->append("replSetUpdatePosition", 1); return true; @@ -284,6 +304,12 @@ Status ReplicationCoordinatorMock::processReplSetElect(const ReplSetElectArgs& a return Status::OK(); } +Status ReplicationCoordinatorMock::processReplSetUpdatePosition( + const OldUpdatePositionArgs& updates, long long* configVersion) { + // TODO + return Status::OK(); +} + Status ReplicationCoordinatorMock::processReplSetUpdatePosition(const UpdatePositionArgs& updates, long long* configVersion) { // TODO @@ -300,7 +326,8 @@ bool ReplicationCoordinatorMock::buildsIndexes() { return true; } -std::vector<HostAndPort> ReplicationCoordinatorMock::getHostsWrittenTo(const OpTime& op) { +std::vector<HostAndPort> ReplicationCoordinatorMock::getHostsWrittenTo(const OpTime& op, + bool durablyWritten) { return std::vector<HostAndPort>(); } @@ -328,7 +355,7 @@ HostAndPort ReplicationCoordinatorMock::chooseNewSyncSource(const Timestamp& las void ReplicationCoordinatorMock::blacklistSyncSource(const HostAndPort& host, Date_t until) {} -void ReplicationCoordinatorMock::resetLastOpTimeFromOplog(OperationContext* txn) { +void ReplicationCoordinatorMock::resetLastOpTimesFromOplog(OperationContext* txn) { invariant(false); } @@ -367,6 +394,10 @@ bool ReplicationCoordinatorMock::isV1ElectionProtocol() { return true; } +bool ReplicationCoordinatorMock::getWriteConcernMajorityShouldJournal() { + return true; +} + void ReplicationCoordinatorMock::summarizeAsHtml(ReplSetHtmlSummary* output) {} long long ReplicationCoordinatorMock::getTerm() { @@ -398,5 +429,13 @@ size_t ReplicationCoordinatorMock::getNumUncommittedSnapshots() { return 0; } +WriteConcernOptions ReplicationCoordinatorMock::populateUnsetWriteConcernOptionsSyncMode( + WriteConcernOptions wc) { + if (wc.syncMode == WriteConcernOptions::SyncMode::UNSET) { + wc.syncMode = WriteConcernOptions::SyncMode::JOURNAL; + } + return wc; +} + } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h index 60f1b1b23c4..4a21d9ad705 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.h +++ b/src/mongo/db/repl/replication_coordinator_mock.h @@ -103,15 +103,18 @@ public: virtual Status setLastOptimeForSlave(const OID& rid, const Timestamp& ts); - virtual void setMyLastOptime(const OpTime& opTime); + virtual void setMyLastAppliedOpTime(const OpTime& opTime); + virtual void setMyLastDurableOpTime(const OpTime& opTime); - virtual void setMyLastOptimeForward(const OpTime& opTime); + virtual void setMyLastAppliedOpTimeForward(const OpTime& opTime); + virtual void setMyLastDurableOpTimeForward(const OpTime& opTime); - virtual void resetMyLastOptime(); + virtual void resetMyLastOpTimes(); virtual void setMyHeartbeatMessage(const std::string& msg); - virtual OpTime getMyLastOptime() const; + virtual OpTime getMyLastAppliedOpTime() const; + virtual OpTime getMyLastDurableOpTime() const; virtual ReadConcernResponse waitUntilOpTime(OperationContext* txn, const ReadConcernArgs& settings) override; @@ -132,6 +135,7 @@ public: virtual void signalUpstreamUpdater(); + virtual bool prepareOldReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder); virtual bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder); virtual Status processReplSetGetStatus(BSONObjBuilder* result); @@ -177,6 +181,8 @@ public: virtual Status processReplSetElect(const ReplSetElectArgs& args, BSONObjBuilder* resultObj); + virtual Status processReplSetUpdatePosition(const OldUpdatePositionArgs& updates, + long long* configVersion); virtual Status processReplSetUpdatePosition(const UpdatePositionArgs& updates, long long* configVersion); @@ -184,7 +190,7 @@ public: virtual bool buildsIndexes(); - virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op); + virtual std::vector<HostAndPort> getHostsWrittenTo(const OpTime& op, bool durablyWritten); virtual std::vector<HostAndPort> getOtherNodesInReplSet() const; @@ -196,7 +202,7 @@ public: virtual void blacklistSyncSource(const HostAndPort& host, Date_t until); - virtual void resetLastOpTimeFromOplog(OperationContext* txn); + virtual void resetLastOpTimesFromOplog(OperationContext* txn); virtual bool shouldChangeSyncSource(const HostAndPort& currentSource, const OpTime& syncSourceLastOpTime, @@ -220,6 +226,8 @@ public: virtual bool isV1ElectionProtocol(); + virtual bool getWriteConcernMajorityShouldJournal(); + virtual void summarizeAsHtml(ReplSetHtmlSummary* output); virtual long long getTerm(); @@ -241,11 +249,15 @@ public: virtual size_t getNumUncommittedSnapshots() override; + virtual WriteConcernOptions populateUnsetWriteConcernOptionsSyncMode( + WriteConcernOptions wc) override; + private: AtomicUInt64 _snapshotNameGenerator; const ReplSettings _settings; MemberState _memberState; - OpTime _myLastOpTime; + OpTime _myLastDurableOpTime; + OpTime _myLastAppliedOpTime; }; } // namespace repl diff --git a/src/mongo/db/repl/replication_coordinator_test_fixture.cpp b/src/mongo/db/repl/replication_coordinator_test_fixture.cpp index ed30ceacde8..cb45baf086f 100644 --- a/src/mongo/db/repl/replication_coordinator_test_fixture.cpp +++ b/src/mongo/db/repl/replication_coordinator_test_fixture.cpp @@ -113,12 +113,13 @@ void ReplCoordTest::init() { TopologyCoordinatorImpl::Options settings; _topo = new TopologyCoordinatorImpl(settings); + stdx::function<bool()> _durablityLambda = []() -> bool { return true; }; _net = new NetworkInterfaceMock; _storage = new StorageInterfaceMock; _replExec.reset(new ReplicationExecutor(_net, _storage, seed)); _externalState = new ReplicationCoordinatorExternalStateMock; - _repl.reset( - new ReplicationCoordinatorImpl(_settings, _externalState, _topo, _replExec.get(), seed)); + _repl.reset(new ReplicationCoordinatorImpl( + _settings, _externalState, _topo, _replExec.get(), seed, &_durablityLambda)); } void ReplCoordTest::init(const ReplSettings& settings) { diff --git a/src/mongo/db/repl/replication_info.cpp b/src/mongo/db/repl/replication_info.cpp index 211ba0ff89a..a5da7e24258 100644 --- a/src/mongo/db/repl/replication_info.cpp +++ b/src/mongo/db/repl/replication_info.cpp @@ -178,7 +178,7 @@ public: BSONObjBuilder result; // TODO(siyuan) Output term of OpTime - result.append("latestOptime", replCoord->getMyLastOptime().getTimestamp()); + result.append("latestOptime", replCoord->getMyLastAppliedOpTime().getTimestamp()); const std::string& oplogNS = replCoord->getReplicationMode() == ReplicationCoordinator::modeReplSet diff --git a/src/mongo/db/repl/replset_commands.cpp b/src/mongo/db/repl/replset_commands.cpp index a81d614b05a..b48f2f7d9a6 100644 --- a/src/mongo/db/repl/replset_commands.cpp +++ b/src/mongo/db/repl/replset_commands.cpp @@ -45,6 +45,7 @@ #include "mongo/db/lasterror.h" #include "mongo/db/op_observer.h" #include "mongo/db/repl/initial_sync.h" +#include "mongo/db/repl/old_update_position_args.h" #include "mongo/db/repl/oplog.h" #include "mongo/db/repl/repl_set_heartbeat_args_v1.h" #include "mongo/db/repl/repl_set_heartbeat_args.h" @@ -663,25 +664,43 @@ public: // accept and ignore handshakes sent from old (3.0-series) nodes without erroring to // enable mixed-version operation, since we no longer use the handshakes - if (cmdObj.hasField("handshake")) { + if (cmdObj.hasField("handshake")) return true; - } + + // In the case of an update from a member with an invalid replica set config, + // we return our current config version. + long long configVersion = -1; UpdatePositionArgs args; + status = args.initialize(cmdObj); - if (!status.isOK()) + if (status.isOK()) { + // v3.2.2+ style replSetUpdatePosition command. + status = getGlobalReplicationCoordinator()->processReplSetUpdatePosition( + args, &configVersion); + + if (status == ErrorCodes::InvalidReplicaSetConfig) { + result.append("configVersion", configVersion); + } return appendCommandStatus(result, status); + } else if (status == ErrorCodes::NoSuchKey) { + // Pre-3.2.2 style replSetUpdatePosition command. + OldUpdatePositionArgs oldArgs; + status = oldArgs.initialize(cmdObj); + if (!status.isOK()) + return appendCommandStatus(result, status); - // in the case of an update from a member with an invalid replica set config, - // we return our current config version - long long configVersion = -1; - status = - getGlobalReplicationCoordinator()->processReplSetUpdatePosition(args, &configVersion); + status = getGlobalReplicationCoordinator()->processReplSetUpdatePosition( + oldArgs, &configVersion); - if (status == ErrorCodes::InvalidReplicaSetConfig) { - result.append("configVersion", configVersion); + if (status == ErrorCodes::InvalidReplicaSetConfig) { + result.append("configVersion", configVersion); + } + return appendCommandStatus(result, status); + } else { + // Parsing error from UpdatePositionArgs. + return appendCommandStatus(result, status); } - return appendCommandStatus(result, status); } } cmdReplSetUpdatePosition; diff --git a/src/mongo/db/repl/reporter.cpp b/src/mongo/db/repl/reporter.cpp index fbbc4d64d33..3a6cfb81e82 100644 --- a/src/mongo/db/repl/reporter.cpp +++ b/src/mongo/db/repl/reporter.cpp @@ -41,10 +41,10 @@ namespace repl { using executor::RemoteCommandRequest; Reporter::Reporter(ReplicationExecutor* executor, - PrepareReplSetUpdatePositionCommandFn prepareReplSetUpdatePositionCommandFn, + PrepareReplSetUpdatePositionCommandFn prepareOldReplSetUpdatePositionCommandFn, const HostAndPort& target) : _executor(executor), - _prepareReplSetUpdatePositionCommandFn(prepareReplSetUpdatePositionCommandFn), + _prepareOldReplSetUpdatePositionCommandFn(prepareOldReplSetUpdatePositionCommandFn), _target(target), _status(Status::OK()), _willRunAgain(false), @@ -52,7 +52,7 @@ Reporter::Reporter(ReplicationExecutor* executor, uassert(ErrorCodes::BadValue, "null replication executor", executor); uassert(ErrorCodes::BadValue, "null function to create replSetUpdatePosition command object", - prepareReplSetUpdatePositionCommandFn); + prepareOldReplSetUpdatePositionCommandFn); uassert(ErrorCodes::BadValue, "target name cannot be empty", !target.empty()); } @@ -105,11 +105,11 @@ Status Reporter::_schedule_inlock() { LOG(2) << "Reporter scheduling report to : " << _target; - auto prepareResult = _prepareReplSetUpdatePositionCommandFn(); + auto prepareResult = _prepareOldReplSetUpdatePositionCommandFn(); if (!prepareResult.isOK()) { // Returning NodeNotFound because currently this is the only way - // prepareReplSetUpdatePositionCommand() can fail in production. + // prepareOldReplSetUpdatePositionCommand() can fail in production. return Status(ErrorCodes::NodeNotFound, "Reporter failed to create replSetUpdatePositionCommand command."); } diff --git a/src/mongo/db/repl/reporter.h b/src/mongo/db/repl/reporter.h index 585539454c1..5fba25e5ba0 100644 --- a/src/mongo/db/repl/reporter.h +++ b/src/mongo/db/repl/reporter.h @@ -50,7 +50,7 @@ public: using PrepareReplSetUpdatePositionCommandFn = stdx::function<StatusWith<BSONObj>()>; Reporter(ReplicationExecutor* executor, - PrepareReplSetUpdatePositionCommandFn prepareReplSetUpdatePositionCommandFn, + PrepareReplSetUpdatePositionCommandFn prepareOldReplSetUpdatePositionCommandFn, const HostAndPort& target); virtual ~Reporter(); @@ -105,7 +105,7 @@ private: ReplicationExecutor* _executor; // Prepares update command object. - PrepareReplSetUpdatePositionCommandFn _prepareReplSetUpdatePositionCommandFn; + PrepareReplSetUpdatePositionCommandFn _prepareOldReplSetUpdatePositionCommandFn; // Host to whom the Reporter sends updates. HostAndPort _target; diff --git a/src/mongo/db/repl/reporter_test.cpp b/src/mongo/db/repl/reporter_test.cpp index c5533d7adb2..01904e81c92 100644 --- a/src/mongo/db/repl/reporter_test.cpp +++ b/src/mongo/db/repl/reporter_test.cpp @@ -52,7 +52,7 @@ public: _result = newResult; } - bool prepareReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) { + bool prepareOldReplSetUpdatePositionCommand(BSONObjBuilder* cmdBuilder) { if (!_result) { return _result; } @@ -85,7 +85,7 @@ protected: std::unique_ptr<Reporter> reporter; std::unique_ptr<MockProgressManager> posUpdater; - Reporter::PrepareReplSetUpdatePositionCommandFn prepareReplSetUpdatePositionCommandFn; + Reporter::PrepareReplSetUpdatePositionCommandFn prepareOldReplSetUpdatePositionCommandFn; }; ReporterTest::ReporterTest() {} @@ -93,16 +93,16 @@ ReporterTest::ReporterTest() {} void ReporterTest::setUp() { ReplicationExecutorTest::setUp(); posUpdater.reset(new MockProgressManager()); - prepareReplSetUpdatePositionCommandFn = [this]() -> StatusWith<BSONObj> { + prepareOldReplSetUpdatePositionCommandFn = [this]() -> StatusWith<BSONObj> { BSONObjBuilder bob; - if (posUpdater->prepareReplSetUpdatePositionCommand(&bob)) { + if (posUpdater->prepareOldReplSetUpdatePositionCommand(&bob)) { return bob.obj(); } return Status(ErrorCodes::OperationFailed, "unable to prepare replSetUpdatePosition command object"); }; reporter.reset(new Reporter(&getReplExecutor(), - [this]() { return prepareReplSetUpdatePositionCommandFn(); }, + [this]() { return prepareOldReplSetUpdatePositionCommandFn(); }, HostAndPort("h1"))); launchExecutorThread(); } @@ -138,12 +138,12 @@ TEST_F(ReporterTest, InvalidConstruction) { UserException); // null ReplicationExecutor - ASSERT_THROWS(Reporter(nullptr, prepareReplSetUpdatePositionCommandFn, HostAndPort("h1")), + ASSERT_THROWS(Reporter(nullptr, prepareOldReplSetUpdatePositionCommandFn, HostAndPort("h1")), UserException); // empty HostAndPort ASSERT_THROWS( - Reporter(&getReplExecutor(), prepareReplSetUpdatePositionCommandFn, HostAndPort()), + Reporter(&getReplExecutor(), prepareOldReplSetUpdatePositionCommandFn, HostAndPort()), UserException); } diff --git a/src/mongo/db/repl/rs_initialsync.cpp b/src/mongo/db/repl/rs_initialsync.cpp index 569480452d1..3d62914b081 100644 --- a/src/mongo/db/repl/rs_initialsync.cpp +++ b/src/mongo/db/repl/rs_initialsync.cpp @@ -88,11 +88,11 @@ void truncateAndResetOplog(OperationContext* txn, // Note: the following order is important. // The bgsync thread uses an empty optime as a sentinel to know to wait // for initial sync; thus, we must - // ensure the lastAppliedOptime is empty before restarting the bgsync thread + // ensure the lastAppliedOpTime is empty before restarting the bgsync thread // via stop(). // We must clear the sync source blacklist after calling stop() // because the bgsync thread, while running, may update the blacklist. - replCoord->resetMyLastOptime(); + replCoord->resetMyLastOpTimes(); bgsync->stop(); bgsync->clearBuffer(); @@ -214,7 +214,7 @@ bool _initialSyncClone(OperationContext* txn, * @return if applying the oplog succeeded. */ bool _initialSyncApplyOplog(OperationContext* ctx, repl::InitialSync* syncer, OplogReader* r) { - const OpTime startOpTime = getGlobalReplicationCoordinator()->getMyLastOptime(); + const OpTime startOpTime = getGlobalReplicationCoordinator()->getMyLastAppliedOpTime(); BSONObj lastOp; // If the fail point is set, exit failing. @@ -370,7 +370,7 @@ Status _initialSync() { // prime oplog, but don't need to actually apply the op as the cloned data already reflects it. OpTime lastOptime = writeOpsToOplog(&txn, {lastOp}); ReplClientInfo::forClient(txn.getClient()).setLastOp(lastOptime); - replCoord->setMyLastOptime(lastOptime); + replCoord->setMyLastAppliedOpTime(lastOptime); setNewTimestamp(lastOptime.getTimestamp()); std::string msg = "oplog sync 1 of 3"; @@ -425,7 +425,7 @@ Status _initialSync() { { ScopedTransaction scopedXact(&txn, MODE_IX); AutoGetDb autodb(&txn, "local", MODE_X); - OpTime lastOpTimeWritten(getGlobalReplicationCoordinator()->getMyLastOptime()); + OpTime lastOpTimeWritten(getGlobalReplicationCoordinator()->getMyLastAppliedOpTime()); log() << "set minValid=" << lastOpTimeWritten; // Initial sync is now complete. Flag this by setting minValid to the last thing diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp index a9073f49524..076507b4106 100644 --- a/src/mongo/db/repl/rs_rollback.cpp +++ b/src/mongo/db/repl/rs_rollback.cpp @@ -791,9 +791,9 @@ void syncFixUp(OperationContext* txn, warn = true; } - // Reload the lastOpTimeApplied value in the replcoord and the lastAppliedHash value in - // bgsync to reflect our new last op. - replCoord->resetLastOpTimeFromOplog(txn); + // Reload the lastAppliedOpTime and lastDurableOpTime value in the replcoord and the + // lastAppliedHash value in bgsync to reflect our new last op. + replCoord->resetLastOpTimesFromOplog(txn); // done if (warn) diff --git a/src/mongo/db/repl/rs_rollback_test.cpp b/src/mongo/db/repl/rs_rollback_test.cpp index 04324090a15..84dd923eaef 100644 --- a/src/mongo/db/repl/rs_rollback_test.cpp +++ b/src/mongo/db/repl/rs_rollback_test.cpp @@ -77,13 +77,13 @@ ReplSettings createReplSettings() { class ReplicationCoordinatorRollbackMock : public ReplicationCoordinatorMock { public: ReplicationCoordinatorRollbackMock(); - void resetLastOpTimeFromOplog(OperationContext* txn) override; + void resetLastOpTimesFromOplog(OperationContext* txn) override; }; ReplicationCoordinatorRollbackMock::ReplicationCoordinatorRollbackMock() : ReplicationCoordinatorMock(createReplSettings()) {} -void ReplicationCoordinatorRollbackMock::resetLastOpTimeFromOplog(OperationContext* txn) {} +void ReplicationCoordinatorRollbackMock::resetLastOpTimesFromOplog(OperationContext* txn) {} class RollbackSourceMock : public RollbackSource { public: diff --git a/src/mongo/db/repl/rs_sync.cpp b/src/mongo/db/repl/rs_sync.cpp index 83dd0131567..9e5c0e7e344 100644 --- a/src/mongo/db/repl/rs_sync.cpp +++ b/src/mongo/db/repl/rs_sync.cpp @@ -107,7 +107,7 @@ void runSyncThread() { // 1. If the oplog is empty, do an initial sync // 2. If minValid has _initialSyncFlag set, do an initial sync // 3. If initialSyncRequested is true - if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() || + if (getGlobalReplicationCoordinator()->getMyLastAppliedOpTime().isNull() || getInitialSyncFlag() || initialSyncRequested) { syncDoInitialSync(); continue; // start from top again in case sync failed. diff --git a/src/mongo/db/repl/sync_source_feedback.cpp b/src/mongo/db/repl/sync_source_feedback.cpp index 7c08a04be27..43b84f45e38 100644 --- a/src/mongo/db/repl/sync_source_feedback.cpp +++ b/src/mongo/db/repl/sync_source_feedback.cpp @@ -60,6 +60,7 @@ namespace repl { void SyncSourceFeedback::_resetConnection() { LOG(1) << "resetting connection in sync source feedback"; _connection.reset(); + _fallBackToOldUpdatePosition = false; } bool SyncSourceFeedback::replAuthenticate() { @@ -105,18 +106,24 @@ void SyncSourceFeedback::forwardSlaveProgress() { _cond.notify_all(); } -Status SyncSourceFeedback::updateUpstream(OperationContext* txn) { +Status SyncSourceFeedback::updateUpstream(OperationContext* txn, bool oldStyle) { auto replCoord = repl::ReplicationCoordinator::get(txn); if (replCoord->getMemberState().primary()) { - // primary has no one to update to + // Primary has no one to send updates to. return Status::OK(); } BSONObjBuilder cmd; { stdx::unique_lock<stdx::mutex> lock(_mtx); - // the command could not be created, likely because the node was removed from the set - if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) { - return Status::OK(); + // The command could not be created, likely because this node was removed from the set. + if (!oldStyle) { + if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) { + return Status::OK(); + } + } else { + if (!replCoord->prepareOldReplSetUpdatePositionCommand(&cmd)) { + return Status::OK(); + } } } BSONObj res; @@ -125,8 +132,9 @@ Status SyncSourceFeedback::updateUpstream(OperationContext* txn) { try { _connection->runCommand("admin", cmd.obj(), res); } catch (const DBException& e) { - log() << "SyncSourceFeedback error sending update: " << e.what() << endl; - // blacklist sync target for .5 seconds and find a new one + log() << "SyncSourceFeedback error sending " << (oldStyle ? "old style " : "") + << "update: " << e.what(); + // Blacklist sync target for .5 seconds and find a new one. replCoord->blacklistSyncSource(_syncTarget, Date_t::now() + Milliseconds(500)); BackgroundSync::get()->clearSyncTarget(); _resetConnection(); @@ -135,11 +143,15 @@ Status SyncSourceFeedback::updateUpstream(OperationContext* txn) { Status status = Command::getStatusFromCommandResult(res); if (!status.isOK()) { - log() << "SyncSourceFeedback error sending update, response: " << res.toString() << endl; - // blacklist sync target for .5 seconds and find a new one, unless we were rejected due - // to the syncsource having a newer config - if (status != ErrorCodes::InvalidReplicaSetConfig || res["configVersion"].eoo() || - res["configVersion"].numberLong() < replCoord->getConfig().getConfigVersion()) { + log() << "SyncSourceFeedback error sending " << (oldStyle ? "old style " : "") + << "update, response: " << res.toString(); + if (status == ErrorCodes::BadValue && !oldStyle) { + log() << "SyncSourceFeedback falling back to old style UpdatePosition command"; + _fallBackToOldUpdatePosition = true; + } else if (status != ErrorCodes::InvalidReplicaSetConfig || res["configVersion"].eoo() || + res["configVersion"].numberLong() < replCoord->getConfig().getConfigVersion()) { + // Blacklist sync target for .5 seconds and find a new one, unless we were rejected due + // to the syncsource having a newer config. replCoord->blacklistSyncSource(_syncTarget, Date_t::now() + Milliseconds(500)); BackgroundSync::get()->clearSyncTarget(); _resetConnection(); @@ -195,9 +207,16 @@ void SyncSourceFeedback::run() { continue; } } - Status status = updateUpstream(txn.get()); + bool oldFallBackValue = _fallBackToOldUpdatePosition; + Status status = updateUpstream(txn.get(), _fallBackToOldUpdatePosition); if (!status.isOK()) { - log() << "updateUpstream failed: " << status << ", will retry"; + if (_fallBackToOldUpdatePosition != oldFallBackValue) { + stdx::unique_lock<stdx::mutex> lock(_mtx); + _positionChanged = true; + } else { + log() << (_fallBackToOldUpdatePosition ? "old style " : "") << "updateUpstream" + << " failed: " << status << ", will retry"; + } } } } diff --git a/src/mongo/db/repl/sync_source_feedback.h b/src/mongo/db/repl/sync_source_feedback.h index d1dc13444e1..ed45b59a752 100644 --- a/src/mongo/db/repl/sync_source_feedback.h +++ b/src/mongo/db/repl/sync_source_feedback.h @@ -68,8 +68,10 @@ private: /* Inform the sync target of our current position in the oplog, as well as the positions * of all secondaries chained through us. + * "oldStyle" indicates whether or not the upstream node is pre-3.2.2 and needs the older style + * ReplSetUpdatePosition commands as a result. */ - Status updateUpstream(OperationContext* txn); + Status updateUpstream(OperationContext* txn, bool oldStyle); bool hasConnection() { return _connection.get(); @@ -92,6 +94,8 @@ private: bool _positionChanged = false; // Once this is set to true the _run method will terminate bool _shutdownSignaled = false; + // Indicates whether our syncSource can't accept the new version of the UpdatePosition command. + bool _fallBackToOldUpdatePosition = false; }; } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp index c5369159245..8c517666909 100644 --- a/src/mongo/db/repl/sync_tail.cpp +++ b/src/mongo/db/repl/sync_tail.cpp @@ -217,13 +217,9 @@ ApplyBatchFinalizer::~ApplyBatchFinalizer() { } void ApplyBatchFinalizer::record(OpTime newOp) { - const bool mustWaitUntilDurable = _replCoord->isV1ElectionProtocol(); - if (!mustWaitUntilDurable) { - // We have to use setMyLastOptimeForward since this thread races with - // logTransitionToPrimaryToOplog. - _replCoord->setMyLastOptimeForward(newOp); - return; - } + // We have to use setMyLastAppliedOpTimeForward since this thread races with + // logTransitionToPrimaryToOplog. + _replCoord->setMyLastAppliedOpTimeForward(newOp); stdx::unique_lock<stdx::mutex> lock(_mutex); _latestOpTime = newOp; @@ -253,9 +249,9 @@ void ApplyBatchFinalizer::_run() { auto txn = cc().makeOperationContext(); txn->recoveryUnit()->goingToWaitUntilDurable(); txn->recoveryUnit()->waitUntilDurable(); - // We have to use setMyLastOptimeForward since this thread races with + // We have to use setMyLastDurableOpTimeForward since this thread races with // logTransitionToPrimaryToOplog. - _replCoord->setMyLastOptimeForward(latestOpTime); + _replCoord->setMyLastDurableOpTimeForward(latestOpTime); } } } // anonymous namespace containing ApplyBatchFinalizer definitions. @@ -717,7 +713,7 @@ void SyncTail::oplogApplication() { auto minValidBoundaries = getMinValid(&txn); OpTime originalEndOpTime(minValidBoundaries.end); - OpTime lastWriteOpTime{replCoord->getMyLastOptime()}; + OpTime lastWriteOpTime{replCoord->getMyLastAppliedOpTime()}; while (!inShutdown()) { OpQueue ops; @@ -750,7 +746,7 @@ void SyncTail::oplogApplication() { } // Reset when triggered in case it was from a rollback, safe to do at any time. - lastWriteOpTime = replCoord->getMyLastOptime(); + lastWriteOpTime = replCoord->getMyLastAppliedOpTime(); continue; // This wasn't a real op. Don't try to apply it. } diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h index 38a1dca8607..c31ab4f3881 100644 --- a/src/mongo/db/repl/topology_coordinator.h +++ b/src/mongo/db/repl/topology_coordinator.h @@ -233,6 +233,7 @@ public: const ReplSetHeartbeatArgs& args, const std::string& ourSetName, const OpTime& lastOpApplied, + const OpTime& lastOpDurable, ReplSetHeartbeatResponse* response) = 0; // produce a reply to a V1 heartbeat @@ -240,6 +241,7 @@ public: const ReplSetHeartbeatArgsV1& args, const std::string& ourSetName, const OpTime& lastOpApplied, + const OpTime& lastOpDurable, ReplSetHeartbeatResponse* response) = 0; // produce a reply to a status request diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp index a44b933208d..dd0ce2f766f 100644 --- a/src/mongo/db/repl/topology_coordinator_impl.cpp +++ b/src/mongo/db/repl/topology_coordinator_impl.cpp @@ -204,7 +204,7 @@ HostAndPort TopologyCoordinatorImpl::chooseNewSyncSource(Date_t now, // Find primary's oplog time. Reject sync candidates that are more than // _options.maxSyncSourceLagSecs seconds behind. if (_currentPrimaryIndex != -1) { - OpTime primaryOpTime = _hbdata[_currentPrimaryIndex].getOpTime(); + OpTime primaryOpTime = _hbdata[_currentPrimaryIndex].getAppliedOpTime(); // Check if primaryOpTime is still close to 0 because we haven't received // our first heartbeat from a new primary yet. @@ -257,7 +257,7 @@ HostAndPort TopologyCoordinatorImpl::chooseNewSyncSource(Date_t now, continue; } // Candidates cannot be excessively behind. - if (it->getOpTime() < oldestSyncOpTime) { + if (it->getAppliedOpTime() < oldestSyncOpTime) { continue; } // Candidate must not have a configured delay larger than ours. @@ -272,7 +272,7 @@ HostAndPort TopologyCoordinatorImpl::chooseNewSyncSource(Date_t now, } } // only consider candidates that are ahead of where we are - if (it->getOpTime().getTimestamp() <= lastTimestampApplied) { + if (it->getAppliedOpTime().getTimestamp() <= lastTimestampApplied) { continue; } // Candidate cannot be more latent than anything we've already considered. @@ -421,10 +421,10 @@ void TopologyCoordinatorImpl::prepareSyncFromResponse(const ReplicationExecutor: str::stream() << "I cannot reach the requested member: " << target.toString()); return; } - if (hbdata.getOpTime().getSecs() + 10 < lastOpApplied.getSecs()) { + if (hbdata.getAppliedOpTime().getSecs() + 10 < lastOpApplied.getSecs()) { warning() << "attempting to sync from " << target << ", but its latest opTime is " - << hbdata.getOpTime().getSecs() << " and ours is " << lastOpApplied.getSecs() - << " so this may not work"; + << hbdata.getAppliedOpTime().getSecs() << " and ours is " + << lastOpApplied.getSecs() << " so this may not work"; response->append("warning", str::stream() << "requested member \"" << target.toString() << "\" is more than 10 seconds behind us"); @@ -518,7 +518,7 @@ bool TopologyCoordinatorImpl::_shouldVetoMember( return true; } - if (_iAmPrimary() && lastOpApplied >= _hbdata[hopefulIndex].getOpTime()) { + if (_iAmPrimary() && lastOpApplied >= _hbdata[hopefulIndex].getAppliedOpTime()) { // hbinfo is not updated for ourself, so if we are primary we have to check the // primary's last optime separately *errmsg = str::stream() << "I am already primary, " @@ -528,7 +528,8 @@ bool TopologyCoordinatorImpl::_shouldVetoMember( } if (_currentPrimaryIndex != -1 && (hopefulIndex != _currentPrimaryIndex) && - (_hbdata[_currentPrimaryIndex].getOpTime() >= _hbdata[hopefulIndex].getOpTime())) { + (_hbdata[_currentPrimaryIndex].getAppliedOpTime() >= + _hbdata[hopefulIndex].getAppliedOpTime())) { // other members might be aware of more up-to-date nodes *errmsg = str::stream() << _rsConfig.getMemberAt(hopefulIndex).getHostAndPort().toString() @@ -646,6 +647,7 @@ Status TopologyCoordinatorImpl::prepareHeartbeatResponse(Date_t now, const ReplSetHeartbeatArgs& args, const std::string& ourSetName, const OpTime& lastOpApplied, + const OpTime& lastOpDurable, ReplSetHeartbeatResponse* response) { if (args.getProtocolVersion() != 1) { return Status(ErrorCodes::BadValue, @@ -694,7 +696,8 @@ Status TopologyCoordinatorImpl::prepareHeartbeatResponse(Date_t now, // Heartbeat status message response->setHbMsg(_getHbmsg(now)); response->setTime(duration_cast<Seconds>(now - Date_t{})); - response->setOpTime(lastOpApplied); + response->setAppliedOpTime(lastOpApplied); + response->setDurableOpTime(lastOpDurable); if (!_syncSource.empty()) { response->setSyncingTo(_syncSource); @@ -737,6 +740,7 @@ Status TopologyCoordinatorImpl::prepareHeartbeatResponseV1(Date_t now, const ReplSetHeartbeatArgsV1& args, const std::string& ourSetName, const OpTime& lastOpApplied, + const OpTime& lastOpDurable, ReplSetHeartbeatResponse* response) { // Verify that replica set names match const std::string rshb = args.getSetName(); @@ -770,7 +774,8 @@ Status TopologyCoordinatorImpl::prepareHeartbeatResponseV1(Date_t now, response->setElectionTime(_electionTime); } - response->setOpTime(lastOpApplied); + response->setAppliedOpTime(lastOpApplied); + response->setDurableOpTime(lastOpDurable); if (_currentPrimaryIndex != -1) { response->setPrimaryId(_rsConfig.getMemberAt(_currentPrimaryIndex).getId()); @@ -1148,7 +1153,7 @@ HeartbeatResponseAction TopologyCoordinatorImpl::_updatePrimaryFromHBData( const MemberConfig& highestPriorityMember = _rsConfig.getMemberAt(highestPriorityIndex); const OpTime highestPriorityMemberOptime = highestPriorityIndex == _selfIndex ? lastOpApplied - : _hbdata[highestPriorityIndex].getOpTime(); + : _hbdata[highestPriorityIndex].getAppliedOpTime(); if ((highestPriorityMember.getPriority() > currentPrimaryMember.getPriority()) && _isOpTimeCloseEnoughToLatestToElect(highestPriorityMemberOptime, lastOpApplied)) { @@ -1378,7 +1383,7 @@ OpTime TopologyCoordinatorImpl::_latestKnownOpTime(const OpTime& ourLastOpApplie continue; } - OpTime optime = it->getOpTime(); + OpTime optime = it->getAppliedOpTime(); if (optime > latest) { latest = optime; @@ -1467,7 +1472,7 @@ void TopologyCoordinatorImpl::_setCurrentPrimaryForTest(int primaryIndex) { ReplSetHeartbeatResponse hbResponse; hbResponse.setState(MemberState::RS_PRIMARY); hbResponse.setElectionTime(Timestamp()); - hbResponse.setOpTime(_hbdata[primaryIndex].getOpTime()); + hbResponse.setAppliedOpTime(_hbdata[primaryIndex].getAppliedOpTime()); hbResponse.setSyncingTo(HostAndPort()); hbResponse.setHbMsg(""); _hbdata[primaryIndex].setUpValues(_hbdata[primaryIndex].getLastHeartbeat(), @@ -1599,15 +1604,16 @@ void TopologyCoordinatorImpl::prepareStatusResponse(const ReplicationExecutor::C if (!itConfig.isArbiter()) { if (_rsConfig.getProtocolVersion() == 1) { BSONObjBuilder opTime(bb.subobjStart("optime")); - opTime.append("ts", it->getOpTime().getTimestamp()); - opTime.append("t", it->getOpTime().getTerm()); + opTime.append("ts", it->getAppliedOpTime().getTimestamp()); + opTime.append("t", it->getAppliedOpTime().getTerm()); opTime.done(); } else { - bb.append("optime", it->getOpTime().getTimestamp()); + bb.append("optime", it->getAppliedOpTime().getTimestamp()); } - bb.appendDate("optimeDate", - Date_t::fromDurationSinceEpoch(Seconds(it->getOpTime().getSecs()))); + bb.appendDate( + "optimeDate", + Date_t::fromDurationSinceEpoch(Seconds(it->getAppliedOpTime().getSecs()))); } bb.appendDate("lastHeartbeat", it->getLastHeartbeat()); bb.appendDate("lastHeartbeatRecv", it->getLastHeartbeatRecv()); @@ -1917,7 +1923,7 @@ TopologyCoordinatorImpl::UnelectableReasonMask TopologyCoordinatorImpl::_getUnel result |= NotSecondary; } if (_rsConfig.getProtocolVersion() == 0 && - !_isOpTimeCloseEnoughToLatestToElect(hbData.getOpTime(), lastOpApplied)) { + !_isOpTimeCloseEnoughToLatestToElect(hbData.getAppliedOpTime(), lastOpApplied)) { result |= NotCloseEnoughToLatestOptime; } if (hbData.up() && hbData.isUnelectable()) { @@ -2178,7 +2184,7 @@ bool TopologyCoordinatorImpl::stepDown(Date_t until, bool force, const OpTime& l continue; } UnelectableReasonMask reason = _getUnelectableReason(i, lastOpApplied); - if (!reason && _hbdata[i].getOpTime() >= lastOpApplied) { + if (!reason && _hbdata[i].getAppliedOpTime() >= lastOpApplied) { canStepDown = true; } } @@ -2312,7 +2318,7 @@ bool TopologyCoordinatorImpl::shouldChangeSyncSource(const HostAndPort& currentS invariant(currentSourceIndex != _selfIndex); OpTime currentSourceOpTime = - std::max(syncSourceLastOpTime, _hbdata[currentSourceIndex].getOpTime()); + std::max(syncSourceLastOpTime, _hbdata[currentSourceIndex].getAppliedOpTime()); if (currentSourceOpTime.isNull()) { // Haven't received a heartbeat from the sync source yet, so can't tell if we should @@ -2336,12 +2342,12 @@ bool TopologyCoordinatorImpl::shouldChangeSyncSource(const HostAndPort& currentS if (it->up() && (candidateConfig.isVoter() || !_selfConfig().isVoter()) && (candidateConfig.shouldBuildIndexes() || !_selfConfig().shouldBuildIndexes()) && it->getState().readable() && !_memberIsBlacklisted(candidateConfig, now) && - goalSecs < it->getOpTime().getSecs()) { + goalSecs < it->getAppliedOpTime().getSecs()) { log() << "re-evaluating sync source because our current sync source's most recent " << "OpTime is " << currentSourceOpTime.toString() << " which is more than " << _options.maxSyncSourceLagSecs << " behind member " << candidateConfig.getHostAndPort().toString() << " whose most recent OpTime is " - << it->getOpTime().toString(); + << it->getAppliedOpTime().toString(); invariant(itIndex != _selfIndex); return true; } diff --git a/src/mongo/db/repl/topology_coordinator_impl.h b/src/mongo/db/repl/topology_coordinator_impl.h index c335c12586b..9cea2c7e0b6 100644 --- a/src/mongo/db/repl/topology_coordinator_impl.h +++ b/src/mongo/db/repl/topology_coordinator_impl.h @@ -183,11 +183,13 @@ public: const ReplSetHeartbeatArgs& args, const std::string& ourSetName, const OpTime& lastOpApplied, + const OpTime& lastOpDurable, ReplSetHeartbeatResponse* response); virtual Status prepareHeartbeatResponseV1(Date_t now, const ReplSetHeartbeatArgsV1& args, const std::string& ourSetName, const OpTime& lastOpApplied, + const OpTime& lastOpDurable, ReplSetHeartbeatResponse* response); virtual void prepareStatusResponse(const ReplicationExecutor::CallbackArgs& data, Date_t now, diff --git a/src/mongo/db/repl/topology_coordinator_impl_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_test.cpp index 027b61d0443..3cba188212c 100644 --- a/src/mongo/db/repl/topology_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_impl_test.cpp @@ -207,7 +207,8 @@ private: ReplSetHeartbeatResponse hb; hb.setConfigVersion(1); hb.setState(memberState); - hb.setOpTime(lastOpTimeSender); + hb.setDurableOpTime(lastOpTimeSender); + hb.setAppliedOpTime(lastOpTimeSender); hb.setElectionTime(electionTime); StatusWith<ReplSetHeartbeatResponse> hbResponse = responseStatus.isOK() @@ -1328,7 +1329,8 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { hb.setState(MemberState::RS_SECONDARY); hb.setElectionTime(electionTime); hb.setHbMsg("READY"); - hb.setOpTime(oplogProgress); + hb.setAppliedOpTime(oplogProgress); + hb.setDurableOpTime(oplogProgress); StatusWith<ReplSetHeartbeatResponse> hbResponseGood = StatusWith<ReplSetHeartbeatResponse>(hb); updateConfig( @@ -2972,7 +2974,8 @@ TEST_F( hbArgs.setSenderId(1); hbArgs.setSenderHost(HostAndPort("host3", 27017)); ReplSetHeartbeatResponse hbResp; - ASSERT_OK(getTopoCoord().prepareHeartbeatResponse(now(), hbArgs, "rs0", election, &hbResp)); + ASSERT_OK( + getTopoCoord().prepareHeartbeatResponse(now(), hbArgs, "rs0", election, election, &hbResp)); ASSERT(!hbResp.hasIsElectable() || hbResp.isElectable()) << hbResp.toString(); } @@ -4258,8 +4261,8 @@ public: OpTime lastOpApplied, ReplSetHeartbeatResponse* response, Status* result) { - *result = - getTopoCoord().prepareHeartbeatResponse(now()++, args, "rs0", lastOpApplied, response); + *result = getTopoCoord().prepareHeartbeatResponse( + now()++, args, "rs0", lastOpApplied, lastOpApplied, response); } }; @@ -4332,7 +4335,7 @@ TEST_F(PrepareHeartbeatResponseTest, ASSERT_FALSE(response.isElectable()); ASSERT_TRUE(response.isReplSet()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); - ASSERT_EQUALS(OpTime(), response.getOpTime()); + ASSERT_EQUALS(OpTime(), response.getDurableOpTime()); ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime())); ASSERT_EQUALS("", response.getHbMsg()); ASSERT_EQUALS("rs0", response.getReplicaSetName()); @@ -4356,7 +4359,7 @@ TEST_F(PrepareHeartbeatResponseTest, ASSERT_FALSE(response.isElectable()); ASSERT_TRUE(response.isReplSet()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); - ASSERT_EQUALS(OpTime(), response.getOpTime()); + ASSERT_EQUALS(OpTime(), response.getDurableOpTime()); ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime())); ASSERT_EQUALS("", response.getHbMsg()); ASSERT_EQUALS("rs0", response.getReplicaSetName()); @@ -4381,7 +4384,7 @@ TEST_F(PrepareHeartbeatResponseTest, ASSERT_FALSE(response.isElectable()); ASSERT_TRUE(response.isReplSet()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); - ASSERT_EQUALS(OpTime(), response.getOpTime()); + ASSERT_EQUALS(OpTime(), response.getDurableOpTime()); ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime())); ASSERT_EQUALS("", response.getHbMsg()); ASSERT_EQUALS("rs0", response.getReplicaSetName()); @@ -4406,7 +4409,7 @@ TEST_F(PrepareHeartbeatResponseTest, ASSERT_FALSE(response.isElectable()); ASSERT_TRUE(response.isReplSet()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); - ASSERT_EQUALS(OpTime(), response.getOpTime()); + ASSERT_EQUALS(OpTime(), response.getDurableOpTime()); ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime())); ASSERT_EQUALS("", response.getHbMsg()); ASSERT_EQUALS("rs0", response.getReplicaSetName()); @@ -4430,7 +4433,7 @@ TEST_F(PrepareHeartbeatResponseTest, ASSERT_FALSE(response.isElectable()); ASSERT_TRUE(response.isReplSet()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); - ASSERT_EQUALS(OpTime(), response.getOpTime()); + ASSERT_EQUALS(OpTime(), response.getDurableOpTime()); ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime())); ASSERT_EQUALS("", response.getHbMsg()); ASSERT_EQUALS("rs0", response.getReplicaSetName()); @@ -4457,7 +4460,7 @@ TEST_F(PrepareHeartbeatResponseTest, ASSERT_TRUE(response.isElectable()); ASSERT_TRUE(response.isReplSet()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); - ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getDurableOpTime()); ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime())); ASSERT_EQUALS("", response.getHbMsg()); ASSERT_EQUALS("rs0", response.getReplicaSetName()); @@ -4473,13 +4476,13 @@ TEST_F(TopoCoordTest, SetConfigVersionToNegativeTwoInHeartbeatResponseWhenNoConf args.setSenderId(20); ReplSetHeartbeatResponse response; // prepare response and check the results - Status result = - getTopoCoord().prepareHeartbeatResponse(now()++, args, "rs0", OpTime(), &response); + Status result = getTopoCoord().prepareHeartbeatResponse( + now()++, args, "rs0", OpTime(), OpTime(), &response); ASSERT_OK(result); ASSERT_FALSE(response.isElectable()); ASSERT_TRUE(response.isReplSet()); ASSERT_EQUALS(MemberState::RS_STARTUP, response.getState().s); - ASSERT_EQUALS(OpTime(), response.getOpTime()); + ASSERT_EQUALS(OpTime(), response.getDurableOpTime()); ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime())); ASSERT_EQUALS("", response.getHbMsg()); ASSERT_EQUALS("rs0", response.getReplicaSetName()); @@ -4506,7 +4509,7 @@ TEST_F(PrepareHeartbeatResponseTest, ASSERT_TRUE(response.isElectable()); ASSERT_TRUE(response.isReplSet()); ASSERT_EQUALS(MemberState::RS_PRIMARY, response.getState().s); - ASSERT_EQUALS(OpTime(Timestamp(11, 0), 0), response.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(11, 0), 0), response.getDurableOpTime()); ASSERT_EQUALS(Timestamp(10, 0), response.getElectionTime()); ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime())); ASSERT_EQUALS("", response.getHbMsg()); @@ -4540,7 +4543,7 @@ TEST_F(PrepareHeartbeatResponseTest, ASSERT_TRUE(response.isElectable()); ASSERT_TRUE(response.isReplSet()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); - ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getDurableOpTime()); ASSERT_EQUALS(0, durationCount<Seconds>(response.getTime())); // changed to a syncing message because our sync source changed recently ASSERT_EQUALS("syncing from: h2:27017", response.getHbMsg()); @@ -4867,7 +4870,7 @@ TEST_F(HeartbeatResponseTest, ReconfigBetweenHeartbeatRequestAndRepsonse) { ReplSetHeartbeatResponse hb; hb.initialize(BSON("ok" << 1 << "v" << 1 << "state" << MemberState::RS_PRIMARY), 0); - hb.setOpTime(lastOpTimeApplied); + hb.setDurableOpTime(lastOpTimeApplied); hb.setElectionTime(election.getTimestamp()); StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( @@ -4916,7 +4919,7 @@ TEST_F(HeartbeatResponseTest, ReconfigNodeRemovedBetweenHeartbeatRequestAndRepso ReplSetHeartbeatResponse hb; hb.initialize(BSON("ok" << 1 << "v" << 1 << "state" << MemberState::RS_PRIMARY), 0); - hb.setOpTime(lastOpTimeApplied); + hb.setDurableOpTime(lastOpTimeApplied); hb.setElectionTime(election.getTimestamp()); StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( diff --git a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp index 2476d4ed5ab..aa0ffeaaf2c 100644 --- a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp @@ -209,7 +209,8 @@ private: ReplSetHeartbeatResponse hb; hb.setConfigVersion(1); hb.setState(memberState); - hb.setOpTime(lastOpTimeSender); + hb.setDurableOpTime(lastOpTimeSender); + hb.setAppliedOpTime(lastOpTimeSender); hb.setElectionTime(electionTime); hb.setTerm(getTopoCoord().getTerm()); @@ -1321,7 +1322,8 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) { hb.setState(MemberState::RS_SECONDARY); hb.setElectionTime(electionTime); hb.setHbMsg("READY"); - hb.setOpTime(oplogProgress); + hb.setDurableOpTime(oplogProgress); + hb.setAppliedOpTime(oplogProgress); StatusWith<ReplSetHeartbeatResponse> hbResponseGood = StatusWith<ReplSetHeartbeatResponse>(hb); updateConfig( @@ -1554,7 +1556,7 @@ public: ReplSetHeartbeatResponse* response, Status* result) { *result = getTopoCoord().prepareHeartbeatResponseV1( - now()++, args, "rs0", lastOpApplied, response); + now()++, args, "rs0", lastOpApplied, lastOpApplied, response); } }; @@ -1626,13 +1628,13 @@ TEST_F(TopoCoordTest, SetConfigVersionToNegativeTwoInHeartbeatResponseWhenNoConf args.setSenderId(20); ReplSetHeartbeatResponse response; // prepare response and check the results - Status result = - getTopoCoord().prepareHeartbeatResponseV1(now()++, args, "rs0", OpTime(), &response); + Status result = getTopoCoord().prepareHeartbeatResponseV1( + now()++, args, "rs0", OpTime(), OpTime(), &response); ASSERT_OK(result); // this change to true because we can now see a majority, unlike in the previous cases ASSERT_EQUALS("rs0", response.getReplicaSetName()); ASSERT_EQUALS(MemberState::RS_STARTUP, response.getState().s); - ASSERT_EQUALS(OpTime(), response.getOpTime()); + ASSERT_EQUALS(OpTime(), response.getDurableOpTime()); // default term of topology coordinator is -1 ASSERT_EQUALS(-1, response.getTerm()); ASSERT_EQUALS(-2, response.getConfigVersion()); @@ -1652,7 +1654,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, ASSERT_OK(result); ASSERT_EQUALS("rs0", response.getReplicaSetName()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); - ASSERT_EQUALS(OpTime(), response.getOpTime()); + ASSERT_EQUALS(OpTime(), response.getDurableOpTime()); ASSERT_EQUALS(0, response.getTerm()); ASSERT_EQUALS(1, response.getConfigVersion()); } @@ -1672,7 +1674,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, ASSERT_OK(result); ASSERT_EQUALS("rs0", response.getReplicaSetName()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); - ASSERT_EQUALS(OpTime(), response.getOpTime()); + ASSERT_EQUALS(OpTime(), response.getDurableOpTime()); ASSERT_EQUALS(0, response.getTerm()); ASSERT_EQUALS(1, response.getConfigVersion()); } @@ -1693,7 +1695,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, ASSERT_TRUE(response.hasConfig()); ASSERT_EQUALS("rs0", response.getReplicaSetName()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); - ASSERT_EQUALS(OpTime(), response.getOpTime()); + ASSERT_EQUALS(OpTime(), response.getDurableOpTime()); ASSERT_EQUALS(0, response.getTerm()); ASSERT_EQUALS(1, response.getConfigVersion()); } @@ -1714,7 +1716,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, ASSERT_FALSE(response.hasConfig()); ASSERT_EQUALS("rs0", response.getReplicaSetName()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); - ASSERT_EQUALS(OpTime(), response.getOpTime()); + ASSERT_EQUALS(OpTime(), response.getDurableOpTime()); ASSERT_EQUALS(0, response.getTerm()); ASSERT_EQUALS(1, response.getConfigVersion()); } @@ -1737,7 +1739,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, SetStatePrimaryInHeartbeatResponseWhenPri ASSERT_EQUALS(MemberState::RS_PRIMARY, response.getState().s); ASSERT_TRUE(response.hasElectionTime()); ASSERT_EQUALS(getTopoCoord().getElectionTime(), response.getElectionTime()); - ASSERT_EQUALS(OpTime(Timestamp(11, 0), 0), response.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(11, 0), 0), response.getDurableOpTime()); ASSERT_EQUALS(0, response.getTerm()); ASSERT_EQUALS(1, response.getConfigVersion()); } @@ -1768,7 +1770,7 @@ TEST_F(PrepareHeartbeatResponseV1Test, ASSERT_EQUALS("rs0", response.getReplicaSetName()); ASSERT_EQUALS(MemberState::RS_SECONDARY, response.getState().s); ASSERT_FALSE(response.hasElectionTime()); - ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getOpTime()); + ASSERT_EQUALS(OpTime(Timestamp(100, 0), 0), response.getDurableOpTime()); ASSERT_EQUALS(0, response.getTerm()); ASSERT_EQUALS(1, response.getConfigVersion()); ASSERT_EQUALS(HostAndPort("h2"), response.getSyncingTo()); @@ -3134,7 +3136,7 @@ TEST_F(HeartbeatResponseTestV1, ReconfigNodeRemovedBetweenHeartbeatRequestAndRep ReplSetHeartbeatResponse hb; hb.initialize(BSON("ok" << 1 << "v" << 1 << "state" << MemberState::RS_PRIMARY), 0); - hb.setOpTime(lastOpTimeApplied); + hb.setDurableOpTime(lastOpTimeApplied); hb.setElectionTime(election.getTimestamp()); StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( @@ -3183,7 +3185,7 @@ TEST_F(HeartbeatResponseTestV1, ReconfigBetweenHeartbeatRequestAndRepsonse) { ReplSetHeartbeatResponse hb; hb.initialize(BSON("ok" << 1 << "v" << 1 << "state" << MemberState::RS_PRIMARY), 0); - hb.setOpTime(lastOpTimeApplied); + hb.setDurableOpTime(lastOpTimeApplied); hb.setElectionTime(election.getTimestamp()); StatusWith<ReplSetHeartbeatResponse> hbResponse = StatusWith<ReplSetHeartbeatResponse>(hb); HeartbeatResponseAction action = getTopoCoord().processHeartbeatResponse( diff --git a/src/mongo/db/repl/update_position_args.cpp b/src/mongo/db/repl/update_position_args.cpp index 6ccddfa96aa..c4e2688fdd1 100644 --- a/src/mongo/db/repl/update_position_args.cpp +++ b/src/mongo/db/repl/update_position_args.cpp @@ -1,5 +1,5 @@ /** - * Copyright 2014 MongoDB Inc. + * Copyright 2016 MongoDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, @@ -39,11 +39,11 @@ namespace mongo { namespace repl { -UpdatePositionArgs::UpdateInfo::UpdateInfo(const OID& anRid, - const OpTime& aTs, +UpdatePositionArgs::UpdateInfo::UpdateInfo(const OpTime& applied, + const OpTime& durable, long long aCfgver, long long aMemberId) - : rid(anRid), ts(aTs), cfgver(aCfgver), memberId(aMemberId) {} + : appliedOpTime(applied), durableOpTime(durable), cfgver(aCfgver), memberId(aMemberId) {} namespace { @@ -54,32 +54,21 @@ const std::string kLegalUpdatePositionFieldNames[] = { kCommandFieldName, kUpdateArrayFieldName, }; -const std::string kMemberRIDFieldName = "_id"; -const std::string kMemberConfigFieldName = "config"; -const std::string kOpTimeFieldName = "optime"; +const std::string kAppliedOpTimeFieldName = "appliedOpTime"; +const std::string kDurableOpTimeFieldName = "durableOpTime"; const std::string kMemberIdFieldName = "memberId"; const std::string kConfigVersionFieldName = "cfgver"; const std::string kLegalUpdateInfoFieldNames[] = { - kMemberConfigFieldName, - kMemberRIDFieldName, - kOpTimeFieldName, - kMemberIdFieldName, - kConfigVersionFieldName, + kAppliedOpTimeFieldName, kDurableOpTimeFieldName, kMemberIdFieldName, kConfigVersionFieldName, }; } // namespace Status UpdatePositionArgs::initialize(const BSONObj& argsObj) { - Status status = - bsonCheckOnlyHasFields("UpdatePositionArgs", argsObj, kLegalUpdatePositionFieldNames); - - if (!status.isOK()) - return status; - // grab the array of changes BSONElement updateArray; - status = bsonExtractTypedField(argsObj, kUpdateArrayFieldName, Array, &updateArray); + Status status = bsonExtractTypedField(argsObj, kUpdateArrayFieldName, Array, &updateArray); if (!status.isOK()) return status; @@ -87,23 +76,14 @@ Status UpdatePositionArgs::initialize(const BSONObj& argsObj) { BSONObjIterator i(updateArray.Obj()); while (i.more()) { BSONObj entry = i.next().Obj(); - status = bsonCheckOnlyHasFields("UpdateInfoArgs", entry, kLegalUpdateInfoFieldNames); + + OpTime appliedOpTime; + status = bsonExtractOpTimeField(entry, kAppliedOpTimeFieldName, &appliedOpTime); if (!status.isOK()) return status; - OpTime opTime; - if (entry[kOpTimeFieldName].isABSONObj()) { - // In protocol version 1, { ts: <timestamp>, t: term } - Status status = bsonExtractOpTimeField(entry, kOpTimeFieldName, &opTime); - if (!status.isOK()) - return status; - } else { - Timestamp ts; - status = bsonExtractTimestampField(entry, kOpTimeFieldName, &ts); - if (!status.isOK()) - return status; - opTime = OpTime(ts, OpTime::kUninitializedTerm); - } + OpTime durableOpTime; + status = bsonExtractOpTimeField(entry, kDurableOpTimeFieldName, &durableOpTime); if (!status.isOK()) return status; @@ -114,17 +94,12 @@ Status UpdatePositionArgs::initialize(const BSONObj& argsObj) { if (!status.isOK()) return status; - OID rid; - status = bsonExtractOIDFieldWithDefault(entry, kMemberRIDFieldName, OID(), &rid); - if (!status.isOK()) - return status; - long long memberID; status = bsonExtractIntegerFieldWithDefault(entry, kMemberIdFieldName, -1, &memberID); if (!status.isOK()) return status; - _updates.push_back(UpdateInfo(rid, opTime, cfgver, memberID)); + _updates.push_back(UpdateInfo(appliedOpTime, durableOpTime, cfgver, memberID)); } return Status::OK(); @@ -140,10 +115,11 @@ BSONObj UpdatePositionArgs::toBSON() const { BSONArrayBuilder updateArray(builder.subarrayStart(kUpdateArrayFieldName)); for (UpdatePositionArgs::UpdateIterator update = updatesBegin(); update != updatesEnd(); ++update) { - updateArray.append(BSON(kMemberRIDFieldName << update->rid << kOpTimeFieldName - << update->ts.getTimestamp() - << kConfigVersionFieldName << update->cfgver - << kMemberIdFieldName << update->memberId)); + BSONObjBuilder updateEntry(updateArray.subobjStart()); + updateEntry.append(kConfigVersionFieldName, update->cfgver); + updateEntry.append(kMemberIdFieldName, update->memberId); + update->durableOpTime.append(&updateEntry, kDurableOpTimeFieldName); + update->appliedOpTime.append(&updateEntry, kAppliedOpTimeFieldName); } updateArray.doneFast(); } diff --git a/src/mongo/db/repl/update_position_args.h b/src/mongo/db/repl/update_position_args.h index ecaf9ec5d4e..823a775588e 100644 --- a/src/mongo/db/repl/update_position_args.h +++ b/src/mongo/db/repl/update_position_args.h @@ -1,5 +1,5 @@ /** - * Copyright (C) 2014 MongoDB Inc. + * Copyright (C) 2016 MongoDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, @@ -45,10 +45,13 @@ namespace repl { class UpdatePositionArgs { public: struct UpdateInfo { - UpdateInfo(const OID& anRid, const OpTime& aTs, long long aCfgver, long long aMemberId); + UpdateInfo(const OpTime& applied, + const OpTime& durable, + long long aCfgver, + long long aMemberId); - OID rid; - OpTime ts; + OpTime appliedOpTime; + OpTime durableOpTime; long long cfgver; long long memberId; }; diff --git a/src/mongo/db/s/migration_impl.cpp b/src/mongo/db/s/migration_impl.cpp index d33fc302d9b..e761f27a78d 100644 --- a/src/mongo/db/s/migration_impl.cpp +++ b/src/mongo/db/s/migration_impl.cpp @@ -64,7 +64,7 @@ Tee* const migrateLog = RamLog::get("migrate"); const int kDefaultWriteTimeoutForMigrationMs = 60 * 1000; const WriteConcernOptions DefaultWriteConcernForMigration(2, - WriteConcernOptions::NONE, + WriteConcernOptions::SyncMode::NONE, kDefaultWriteTimeoutForMigrationMs); WriteConcernOptions getDefaultWriteConcernForMigration() { @@ -77,7 +77,7 @@ WriteConcernOptions getDefaultWriteConcernForMigration() { } } - return WriteConcernOptions(1, WriteConcernOptions::NONE, 0); + return WriteConcernOptions(1, WriteConcernOptions::SyncMode::NONE, 0); } BSONObj createRecvChunkCommitRequest(const MigrationSessionId& sessionId) { diff --git a/src/mongo/db/s/sharding_state_recovery.cpp b/src/mongo/db/s/sharding_state_recovery.cpp index a55ac8dd826..69c7dfe5596 100644 --- a/src/mongo/db/s/sharding_state_recovery.cpp +++ b/src/mongo/db/s/sharding_state_recovery.cpp @@ -64,7 +64,7 @@ const char kMinOpTimeUpdaters[] = "minOpTimeUpdaters"; const Seconds kWriteTimeout(15); const WriteConcernOptions kMajorityWriteConcern(WriteConcernOptions::kMajority, - WriteConcernOptions::NONE, + WriteConcernOptions::SyncMode::UNSET, kWriteTimeout); MONGO_EXPORT_STARTUP_SERVER_PARAMETER(recoverShardingState, bool, true); diff --git a/src/mongo/db/write_concern.cpp b/src/mongo/db/write_concern.cpp index 6f447d8b666..3c9086ca39a 100644 --- a/src/mongo/db/write_concern.cpp +++ b/src/mongo/db/write_concern.cpp @@ -26,6 +26,8 @@ * it in the license file. */ +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kWrite + #include "mongo/platform/basic.h" #include "mongo/db/write_concern.h" @@ -43,6 +45,7 @@ #include "mongo/db/storage/storage_engine.h" #include "mongo/db/write_concern_options.h" #include "mongo/rpc/protocol.h" +#include "mongo/util/log.h" namespace mongo { @@ -59,23 +62,13 @@ static ServerStatusMetricField<Counter64> gleWtimeoutsDisplay("getLastError.wtim void setupSynchronousCommit(OperationContext* txn) { const WriteConcernOptions& writeConcern = txn->getWriteConcern(); - if (writeConcern.syncMode == WriteConcernOptions::JOURNAL || - writeConcern.syncMode == WriteConcernOptions::FSYNC) { + if (writeConcern.syncMode == WriteConcernOptions::SyncMode::JOURNAL || + writeConcern.syncMode == WriteConcernOptions::SyncMode::FSYNC) { txn->recoveryUnit()->goingToWaitUntilDurable(); } } namespace { -// The consensus protocol requires that w: majority implies j: true on all nodes. -void addJournalSyncForWMajority(WriteConcernOptions* writeConcern) { - if (repl::getGlobalReplicationCoordinator()->isV1ElectionProtocol() && - writeConcern->wMode == WriteConcernOptions::kMajority && - writeConcern->syncMode == WriteConcernOptions::NONE && - getGlobalServiceContext()->getGlobalStorageEngine()->isDurable()) { - writeConcern->syncMode = WriteConcernOptions::JOURNAL; - } -} - const std::string kLocalDB = "local"; } // namespace @@ -89,8 +82,6 @@ StatusWith<WriteConcernOptions> extractWriteConcern(OperationContext* txn, if (writeConcern.wNumNodes == 0 && writeConcern.wMode.empty()) { writeConcern.wNumNodes = 1; } - // Upgrade default write concern if necessary. - addJournalSyncForWMajority(&writeConcern); BSONElement writeConcernElement; Status wcStatus = bsonExtractTypedField(cmdObj, "writeConcern", Object, &writeConcernElement); @@ -118,17 +109,15 @@ StatusWith<WriteConcernOptions> extractWriteConcern(OperationContext* txn, return wcStatus; } - // Upgrade parsed write concern if necessary. - addJournalSyncForWMajority(&writeConcern); - return writeConcern; } + Status validateWriteConcern(OperationContext* txn, const WriteConcernOptions& writeConcern, const std::string& dbName) { const bool isJournalEnabled = getGlobalServiceContext()->getGlobalStorageEngine()->isDurable(); - if (writeConcern.syncMode == WriteConcernOptions::JOURNAL && !isJournalEnabled) { + if (writeConcern.syncMode == WriteConcernOptions::SyncMode::JOURNAL && !isJournalEnabled) { return Status(ErrorCodes::BadValue, "cannot use 'j' option when a host does not have journaling enabled"); } @@ -220,7 +209,7 @@ void WriteConcernResult::appendTo(const WriteConcernOptions& writeConcern, // GLE, but with journaling we don't actually need to run the fsync (fsync command is // preferred in 2.6). So we add a "waited" field if one doesn't exist. - if (writeConcern.syncMode == WriteConcernOptions::FSYNC) { + if (writeConcern.syncMode == WriteConcernOptions::SyncMode::FSYNC) { if (fsyncFiles < 0 && (wTime < 0 || !wTimedOut)) { dassert(result->asTempObj()["waited"].eoo()); result->appendNumber("waited", syncMillis); @@ -248,11 +237,18 @@ Status waitForWriteConcern(OperationContext* txn, // Next handle blocking on disk Timer syncTimer; + auto replCoord = repl::getGlobalReplicationCoordinator(); + WriteConcernOptions writeConcernWithPopulatedSyncMode = + replCoord->populateUnsetWriteConcernOptionsSyncMode(writeConcern); - switch (writeConcern.syncMode) { - case WriteConcernOptions::NONE: + + switch (writeConcernWithPopulatedSyncMode.syncMode) { + case WriteConcernOptions::SyncMode::UNSET: + severe() << "Attempting to wait on a WriteConcern with an unset sync option"; + fassertFailed(34410); + case WriteConcernOptions::SyncMode::NONE: break; - case WriteConcernOptions::FSYNC: { + case WriteConcernOptions::SyncMode::FSYNC: { StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine(); if (!storageEngine->isDurable()) { result->fsyncFiles = storageEngine->flushAllFiles(true); @@ -262,8 +258,16 @@ Status waitForWriteConcern(OperationContext* txn, } break; } - case WriteConcernOptions::JOURNAL: - txn->recoveryUnit()->waitUntilDurable(); + case WriteConcernOptions::SyncMode::JOURNAL: + if (replCoord->getReplicationMode() != repl::ReplicationCoordinator::Mode::modeNone) { + // Wait for ops to become durable then update replication system's + // knowledge of this. + OpTime appliedOpTime = replCoord->getMyLastAppliedOpTime(); + txn->recoveryUnit()->waitUntilDurable(); + replCoord->setMyLastDurableOpTimeForward(appliedOpTime); + } else { + txn->recoveryUnit()->waitUntilDurable(); + } break; } @@ -277,7 +281,8 @@ Status waitForWriteConcern(OperationContext* txn, } // needed to avoid incrementing gleWtimeStats SERVER-9005 - if (writeConcern.wNumNodes <= 1 && writeConcern.wMode.empty()) { + if (writeConcernWithPopulatedSyncMode.wNumNodes <= 1 && + writeConcernWithPopulatedSyncMode.wMode.empty()) { // no desired replication check return Status::OK(); } @@ -285,14 +290,17 @@ Status waitForWriteConcern(OperationContext* txn, // Now we wait for replication // Note that replica set stepdowns and gle mode changes are thrown as errors repl::ReplicationCoordinator::StatusAndDuration replStatus = - repl::getGlobalReplicationCoordinator()->awaitReplication(txn, replOpTime, writeConcern); + repl::getGlobalReplicationCoordinator()->awaitReplication( + txn, replOpTime, writeConcernWithPopulatedSyncMode); if (replStatus.status == ErrorCodes::WriteConcernFailed) { gleWtimeouts.increment(); result->err = "timeout"; result->wTimedOut = true; } // Add stats - result->writtenTo = repl::getGlobalReplicationCoordinator()->getHostsWrittenTo(replOpTime); + result->writtenTo = repl::getGlobalReplicationCoordinator()->getHostsWrittenTo( + replOpTime, + writeConcernWithPopulatedSyncMode.syncMode == WriteConcernOptions::SyncMode::JOURNAL); gleWtimeStats.recordMillis(durationCount<Milliseconds>(replStatus.duration)); result->wTime = durationCount<Milliseconds>(replStatus.duration); diff --git a/src/mongo/db/write_concern_options.cpp b/src/mongo/db/write_concern_options.cpp index b099d868a96..58af7b36a9d 100644 --- a/src/mongo/db/write_concern_options.cpp +++ b/src/mongo/db/write_concern_options.cpp @@ -72,6 +72,7 @@ WriteConcernOptions::WriteConcernOptions(const std::string& mode, : syncMode(sync), wNumNodes(0), wMode(mode), wTimeout(durationCount<Milliseconds>(timeout)) {} Status WriteConcernOptions::parse(const BSONObj& obj) { + reset(); if (obj.isEmpty()) { return Status(ErrorCodes::FailedToParse, "write concern object cannot be empty"); } @@ -94,10 +95,11 @@ Status WriteConcernOptions::parse(const BSONObj& obj) { return Status(ErrorCodes::FailedToParse, "fsync and j options cannot be used together"); if (j) { - syncMode = JOURNAL; - } - if (fsync) { - syncMode = FSYNC; + syncMode = SyncMode::JOURNAL; + } else if (fsync) { + syncMode = SyncMode::FSYNC; + } else if (!jEl.eoo()) { + syncMode = SyncMode::NONE; } BSONElement e = obj["w"]; @@ -172,10 +174,12 @@ BSONObj WriteConcernOptions::toBSON() const { builder.append("w", wMode); } - if (syncMode == FSYNC) { + if (syncMode == SyncMode::FSYNC) { builder.append("fsync", true); - } else if (syncMode == JOURNAL) { + } else if (syncMode == SyncMode::JOURNAL) { builder.append("j", true); + } else if (syncMode == SyncMode::NONE) { + builder.append("j", false); } builder.append("wtimeout", wTimeout); diff --git a/src/mongo/db/write_concern_options.h b/src/mongo/db/write_concern_options.h index 1bac963f16f..5acc54e5294 100644 --- a/src/mongo/db/write_concern_options.h +++ b/src/mongo/db/write_concern_options.h @@ -37,7 +37,7 @@ class Status; struct WriteConcernOptions { public: - enum SyncMode { NONE, FSYNC, JOURNAL }; + enum class SyncMode { UNSET, NONE, FSYNC, JOURNAL }; static const int kNoTimeout = 0; static const int kNoWaiting = -1; @@ -51,6 +51,9 @@ public: WriteConcernOptions() { reset(); + // We set syncMode to NONE to avoid having an UNSET syncMode in default WriteConcernOptions + // since that can cause invariants to trigger. + syncMode = SyncMode::NONE; } WriteConcernOptions(int numNodes, SyncMode sync, int timeout); @@ -94,7 +97,7 @@ public: bool validForConfigServers() const; void reset() { - syncMode = NONE; + syncMode = SyncMode::UNSET; wNumNodes = 0; wMode = ""; wTimeout = 0; |