From e05ffdc10eb680dfbbf043678779aa4aac92bb0c Mon Sep 17 00:00:00 2001 From: Gregory Wlodarek Date: Wed, 30 Jan 2019 17:53:29 -0500 Subject: SERVER-37639 Add checkIfCommitQuorumCanBeSatisfied() and checkIfCommitQuorumIsSatisfied() to the replication interface --- src/mongo/base/error_codes.err | 1 + src/mongo/db/SConscript | 3 + src/mongo/db/repl/SConscript | 3 + src/mongo/db/repl/replication_coordinator.h | 21 +++ src/mongo/db/repl/replication_coordinator_impl.cpp | 46 +++++++ src/mongo/db/repl/replication_coordinator_impl.h | 10 ++ src/mongo/db/repl/replication_coordinator_mock.cpp | 11 ++ src/mongo/db/repl/replication_coordinator_mock.h | 6 + src/mongo/db/repl/topology_coordinator.cpp | 61 +++++++++ src/mongo/db/repl/topology_coordinator.h | 18 +++ src/mongo/db/repl/topology_coordinator_v1_test.cpp | 149 +++++++++++++++++++++ .../embedded/replication_coordinator_embedded.cpp | 11 ++ .../embedded/replication_coordinator_embedded.h | 7 + 13 files changed, 347 insertions(+) diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err index 9f70c55617d..b0be7a0303d 100644 --- a/src/mongo/base/error_codes.err +++ b/src/mongo/base/error_codes.err @@ -275,6 +275,7 @@ error_code("ProducerConsumerQueueConsumed", 274) error_code("ExchangePassthrough", 275) # For exchange execution in aggregation. Do not reuse. error_code("IndexBuildAborted", 276) error_code("AlarmAlreadyFulfilled", 277) +error_code("UnsatisfiableCommitQuorum", 278) # Error codes 4000-8999 are reserved. # Non-sequential error codes (for compatibility only) diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index aca5070ff88..4ece86d1ee3 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -931,6 +931,9 @@ env.Library( "index_builds_coordinator_interface", "$BUILD_DIR/mongo/util/concurrency/thread_pool", ], + LIBDEPS_PRIVATE=[ + "$BUILD_DIR/mongo/db/catalog/commit_quorum_options", + ], ) env.Library( diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index f78940a728d..03481453497 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -790,6 +790,9 @@ env.Library('topology_coordinator', 'repl_settings', 'rslog', 'repl_coordinator_interface', + ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/catalog/commit_quorum_options', ]) env.CppUnitTest('repl_set_heartbeat_response_test', diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h index 229ee7e5df3..9c310e41025 100644 --- a/src/mongo/db/repl/replication_coordinator.h +++ b/src/mongo/db/repl/replication_coordinator.h @@ -46,6 +46,7 @@ namespace mongo { class BSONObj; class BSONObjBuilder; +class CommitQuorumOptions; class IndexDescriptor; class NamespaceString; class OperationContext; @@ -250,6 +251,26 @@ public: virtual Status checkIfWriteConcernCanBeSatisfied( const WriteConcernOptions& writeConcern) const = 0; + /** + * Checks if the 'commitQuorum' can be satisfied by all the members in the replica set; if it + * cannot be satisfied, then the 'UnsatisfiableCommitQuorum' error code is returned. + * + * Returns the 'NoReplicationEnabled' error code if this is called without replication enabled. + */ + virtual Status checkIfCommitQuorumCanBeSatisfied( + const CommitQuorumOptions& commitQuorum) const = 0; + + /** + * Checks if the 'commitQuorum' has been satisfied by the 'commitReadyMembers', if it has been + * satisfied, return true. + * + * Prior to checking if the 'commitQuorum' is satisfied by 'commitReadyMembers', it calls + * 'checkIfCommitQuorumCanBeSatisfied()' with all the replica set members. + */ + virtual StatusWith checkIfCommitQuorumIsSatisfied( + const CommitQuorumOptions& commitQuorum, + const std::vector& commitReadyMembers) const = 0; + /** * Returns Status::OK() if it is valid for this node to serve reads on the given collection * and an errorcode indicating why the node cannot if it cannot. diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 560e2d61338..9da656c3138 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -42,6 +42,7 @@ #include "mongo/base/status.h" #include "mongo/client/fetcher.h" #include "mongo/db/audit.h" +#include "mongo/db/catalog/commit_quorum_options.h" #include "mongo/db/client.h" #include "mongo/db/commands.h" #include "mongo/db/commands/test_commands_enabled.h" @@ -3118,6 +3119,51 @@ Status ReplicationCoordinatorImpl::_checkIfWriteConcernCanBeSatisfied_inlock( return _rsConfig.checkIfWriteConcernCanBeSatisfied(writeConcern); } +Status ReplicationCoordinatorImpl::checkIfCommitQuorumCanBeSatisfied( + const CommitQuorumOptions& commitQuorum) const { + stdx::lock_guard lock(_mutex); + return _checkIfCommitQuorumCanBeSatisfied(lock, commitQuorum); +} + +Status ReplicationCoordinatorImpl::_checkIfCommitQuorumCanBeSatisfied( + WithLock, const CommitQuorumOptions& commitQuorum) const { + if (getReplicationMode() == modeNone) { + return Status(ErrorCodes::NoReplicationEnabled, + "No replication enabled when checking if commit quorum can be satisfied"); + } + + invariant(getReplicationMode() == modeReplSet); + + std::vector memberConfig(_rsConfig.membersBegin(), _rsConfig.membersEnd()); + + // We need to ensure that the 'commitQuorum' can be satisfied by all the members of this + // replica set. + bool commitQuorumCanBeSatisfied = + _topCoord->checkIfCommitQuorumCanBeSatisfied(commitQuorum, memberConfig); + if (!commitQuorumCanBeSatisfied) { + return Status(ErrorCodes::UnsatisfiableCommitQuorum, + str::stream() << "Commit quorum cannot be satisfied with the current replica " + << "set configuration"); + } + return Status::OK(); +} + +StatusWith ReplicationCoordinatorImpl::checkIfCommitQuorumIsSatisfied( + const CommitQuorumOptions& commitQuorum, + const std::vector& commitReadyMembers) const { + // If the 'commitQuorum' cannot be satisfied with all the members of this replica set, we + // need to inform the caller to avoid hanging while waiting for satisfiability of the + // 'commitQuorum' with 'commitReadyMembers' due to replica set reconfigurations. + stdx::lock_guard lock(_mutex); + Status status = _checkIfCommitQuorumCanBeSatisfied(lock, commitQuorum); + if (!status.isOK()) { + return status; + } + + // Return whether or not the 'commitQuorum' is satisfied by the 'commitReadyMembers'. + return _topCoord->checkIfCommitQuorumIsSatisfied(commitQuorum, commitReadyMembers); +} + WriteConcernOptions ReplicationCoordinatorImpl::getGetLastErrorDefault() { stdx::lock_guard lock(_mutex); if (_rsConfig.isInitialized()) { diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index be46d90817d..04aeb218525 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -137,6 +137,13 @@ public: virtual Status checkIfWriteConcernCanBeSatisfied(const WriteConcernOptions& writeConcern) const; + virtual Status checkIfCommitQuorumCanBeSatisfied( + const CommitQuorumOptions& commitQuorum) const override; + + virtual StatusWith checkIfCommitQuorumIsSatisfied( + const CommitQuorumOptions& commitQuorum, + const std::vector& commitReadyMembers) const override; + virtual Status checkCanServeReadsFor(OperationContext* opCtx, const NamespaceString& ns, bool slaveOk); @@ -742,6 +749,9 @@ private: Status _checkIfWriteConcernCanBeSatisfied_inlock(const WriteConcernOptions& writeConcern) const; + Status _checkIfCommitQuorumCanBeSatisfied(WithLock, + const CommitQuorumOptions& commitQuorum) const; + bool _canAcceptWritesFor_inlock(const NamespaceString& ns); int _getMyId_inlock() const; diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp index 040acc9c965..a2961c4fdfd 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_mock.cpp @@ -380,6 +380,17 @@ Status ReplicationCoordinatorMock::checkIfWriteConcernCanBeSatisfied( return Status::OK(); } +Status ReplicationCoordinatorMock::checkIfCommitQuorumCanBeSatisfied( + const CommitQuorumOptions& commitQuorum) const { + return Status::OK(); +} + +StatusWith ReplicationCoordinatorMock::checkIfCommitQuorumIsSatisfied( + const CommitQuorumOptions& commitQuorum, + const std::vector& commitReadyMembers) const { + return true; +} + WriteConcernOptions ReplicationCoordinatorMock::getGetLastErrorDefault() { return WriteConcernOptions(); } diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h index ed18860e16c..f79dadbc776 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.h +++ b/src/mongo/db/repl/replication_coordinator_mock.h @@ -109,6 +109,12 @@ public: virtual Status checkIfWriteConcernCanBeSatisfied(const WriteConcernOptions& writeConcern) const; + virtual Status checkIfCommitQuorumCanBeSatisfied(const CommitQuorumOptions& commitQuorum) const; + + virtual StatusWith checkIfCommitQuorumIsSatisfied( + const CommitQuorumOptions& commitQuorum, + const std::vector& commitReadyMembers) const; + virtual Status checkCanServeReadsFor(OperationContext* opCtx, const NamespaceString& ns, bool slaveOk); diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp index 54724c9683d..df4e115f999 100644 --- a/src/mongo/db/repl/topology_coordinator.cpp +++ b/src/mongo/db/repl/topology_coordinator.cpp @@ -41,6 +41,7 @@ #include "mongo/bson/simple_bsonobj_comparator.h" #include "mongo/db/audit.h" +#include "mongo/db/catalog/commit_quorum_options.h" #include "mongo/db/client.h" #include "mongo/db/mongod_options.h" #include "mongo/db/operation_context.h" @@ -2791,5 +2792,65 @@ TopologyCoordinator::latestKnownOpTimeSinceHeartbeatRestartPerMember() const { return opTimesPerMember; } +bool TopologyCoordinator::checkIfCommitQuorumCanBeSatisfied( + const CommitQuorumOptions& commitQuorum, const std::vector& members) const { + if (!commitQuorum.mode.empty() && commitQuorum.mode != CommitQuorumOptions::kMajority) { + StatusWith tagPatternStatus = + _rsConfig.findCustomWriteMode(commitQuorum.mode); + if (!tagPatternStatus.isOK()) { + return false; + } + + ReplSetTagMatch matcher(tagPatternStatus.getValue()); + for (auto&& member : members) { + for (MemberConfig::TagIterator it = member.tagsBegin(); it != member.tagsEnd(); ++it) { + if (matcher.update(*it)) { + return true; + } + } + } + + // Even if all the nodes in the set had a given write it still would not satisfy this + // commit quorum. + return false; + } else { + int nodesRemaining = 0; + if (!commitQuorum.mode.empty()) { + invariant(commitQuorum.mode == CommitQuorumOptions::kMajority); + nodesRemaining = _rsConfig.getWriteMajority(); + } else { + nodesRemaining = commitQuorum.numNodes; + } + + for (auto&& member : members) { + if (!member.isArbiter()) { // Only count data-bearing nodes + --nodesRemaining; + if (nodesRemaining <= 0) { + return true; + } + } + } + return false; + } +} + +bool TopologyCoordinator::checkIfCommitQuorumIsSatisfied( + const CommitQuorumOptions& commitQuorum, + const std::vector& commitReadyMembers) const { + std::vector commitReadyMemberConfigs; + for (auto& commitReadyMember : commitReadyMembers) { + const MemberConfig* memberConfig = _rsConfig.findMemberByHostAndPort(commitReadyMember); + + invariant(memberConfig); + commitReadyMemberConfigs.push_back(*memberConfig); + } + + // Calling this with commit ready members only is the same as checking if the commit quorum is + // satisfied. Because the 'commitQuorum' is based on the participation of all the replica set + // members, and if the 'commitQuorum' can be satisfied with all the commit ready members, then + // the commit quorum is satisfied in this replica set configuration. + return checkIfCommitQuorumCanBeSatisfied(commitQuorum, commitReadyMemberConfigs); +} + } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h index b2c4f44fea4..04656beaa26 100644 --- a/src/mongo/db/repl/topology_coordinator.h +++ b/src/mongo/db/repl/topology_coordinator.h @@ -44,6 +44,7 @@ #include "mongo/util/time_support.h" namespace mongo { +class CommitQuorumOptions; class Timestamp; namespace repl { @@ -659,6 +660,23 @@ public: */ std::map> latestKnownOpTimeSinceHeartbeatRestartPerMember() const; + /** + * Checks if the 'commitQuorum' can be satisifed by 'members'. Returns true if it can be + * satisfied. + * + * 'members' must be part of the replica set configuration. + */ + bool checkIfCommitQuorumCanBeSatisfied(const CommitQuorumOptions& commitQuorum, + const std::vector& members) const; + + /** + * Returns 'true' if the 'commitQuorum' is satisifed by the 'commitReadyMembers'. + * + * 'commitReadyMembers' must be part of the replica set configuration. + */ + bool checkIfCommitQuorumIsSatisfied(const CommitQuorumOptions& commitQuorum, + const std::vector& commitReadyMembers) const; + //////////////////////////////////////////////////////////// // // Test support methods diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp index c44f572fe3b..57fcabefdee 100644 --- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp @@ -33,6 +33,7 @@ #include #include "mongo/bson/json.h" +#include "mongo/db/catalog/commit_quorum_options.h" #include "mongo/db/repl/heartbeat_response_action.h" #include "mongo/db/repl/repl_set_heartbeat_args_v1.h" #include "mongo/db/repl/repl_set_heartbeat_response.h" @@ -5101,6 +5102,154 @@ TEST_F(TopoCoordTest, ArbitersNotIncludedInW2WriteInPSSAAReplSet) { caughtUpOpTime, 2 /* numNodes */, false /* durablyWritten */)); } +TEST_F(TopoCoordTest, CheckIfCommitQuorumCanBeSatisfied) { + ReplSetConfig configA; + ASSERT_OK(configA.initialize(BSON("_id" + << "rs0" + << "version" + << 1 + << "protocolVersion" + << 1 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "node0" + << "tags" + << BSON("dc" + << "NA" + << "rack" + << "rackNA1")) + << BSON("_id" << 1 << "host" + << "node1" + << "tags" + << BSON("dc" + << "NA" + << "rack" + << "rackNA2")) + << BSON("_id" << 2 << "host" + << "node2" + << "tags" + << BSON("dc" + << "NA" + << "rack" + << "rackNA3")) + << BSON("_id" << 3 << "host" + << "node3" + << "tags" + << BSON("dc" + << "EU" + << "rack" + << "rackEU1")) + << BSON("_id" << 4 << "host" + << "node4" + << "tags" + << BSON("dc" + << "EU" + << "rack" + << "rackEU2")) + << BSON("_id" << 5 << "host" + << "node5" + << "arbiterOnly" + << true)) + << "settings" + << BSON("getLastErrorModes" + << BSON("valid" << BSON("dc" << 2 << "rack" << 3) + << "invalidNotEnoughValues" + << BSON("dc" << 3) + << "invalidNotEnoughNodes" + << BSON("rack" << 6)))))); + getTopoCoord().updateConfig(configA, -1, Date_t()); + + std::vector memberConfig; + for (auto it = configA.membersBegin(); it != configA.membersEnd(); it++) { + memberConfig.push_back(*it); + } + + // Consider all the replica set members. + { + CommitQuorumOptions validNumberWC; + validNumberWC.numNodes = 5; + ASSERT_TRUE(getTopoCoord().checkIfCommitQuorumCanBeSatisfied(validNumberWC, memberConfig)); + + CommitQuorumOptions invalidNumberWC; + invalidNumberWC.numNodes = 6; + ASSERT_FALSE( + getTopoCoord().checkIfCommitQuorumCanBeSatisfied(invalidNumberWC, memberConfig)); + + CommitQuorumOptions majorityWC; + majorityWC.mode = "majority"; + ASSERT_TRUE(getTopoCoord().checkIfCommitQuorumCanBeSatisfied(majorityWC, memberConfig)); + + CommitQuorumOptions validModeWC; + validModeWC.mode = "valid"; + ASSERT_TRUE(getTopoCoord().checkIfCommitQuorumCanBeSatisfied(validModeWC, memberConfig)); + + CommitQuorumOptions invalidModeWC; + invalidModeWC.mode = "invalidNotEnoughNodes"; + ASSERT_FALSE(getTopoCoord().checkIfCommitQuorumCanBeSatisfied(invalidModeWC, memberConfig)); + + CommitQuorumOptions fakeModeWC; + fakeModeWC.mode = "fake"; + ASSERT_FALSE(getTopoCoord().checkIfCommitQuorumCanBeSatisfied(fakeModeWC, memberConfig)); + } + + // Use a list of commit ready members that is not a majority. + { + std::vector commitReadyMembersNoMajority; + commitReadyMembersNoMajority.push_back(*configA.findMemberByID(0)); + commitReadyMembersNoMajority.push_back(*configA.findMemberByID(1)); + commitReadyMembersNoMajority.push_back(*configA.findMemberByID(2)); + + CommitQuorumOptions validNumberWC; + validNumberWC.numNodes = 3; + ASSERT_TRUE(getTopoCoord().checkIfCommitQuorumCanBeSatisfied(validNumberWC, + commitReadyMembersNoMajority)); + + CommitQuorumOptions invalidNumberWC; + invalidNumberWC.numNodes = 4; + ASSERT_FALSE(getTopoCoord().checkIfCommitQuorumCanBeSatisfied( + invalidNumberWC, commitReadyMembersNoMajority)); + + CommitQuorumOptions majorityWC; + majorityWC.mode = "majority"; + ASSERT_FALSE(getTopoCoord().checkIfCommitQuorumCanBeSatisfied( + majorityWC, commitReadyMembersNoMajority)); + + CommitQuorumOptions invalidModeWC; + invalidModeWC.mode = "valid"; + ASSERT_FALSE(getTopoCoord().checkIfCommitQuorumCanBeSatisfied( + invalidModeWC, commitReadyMembersNoMajority)); + } + + // Use a list of commit ready members that is a majority. + { + std::vector commitReadyMembersMajority; + commitReadyMembersMajority.push_back(*configA.findMemberByID(0)); + commitReadyMembersMajority.push_back(*configA.findMemberByID(1)); + commitReadyMembersMajority.push_back(*configA.findMemberByID(2)); + commitReadyMembersMajority.push_back(*configA.findMemberByID(3)); + + CommitQuorumOptions validNumberWC; + validNumberWC.numNodes = 4; + ASSERT_TRUE(getTopoCoord().checkIfCommitQuorumCanBeSatisfied(validNumberWC, + commitReadyMembersMajority)); + + CommitQuorumOptions invalidNumberWC; + invalidNumberWC.numNodes = 5; + ASSERT_FALSE(getTopoCoord().checkIfCommitQuorumCanBeSatisfied(invalidNumberWC, + commitReadyMembersMajority)); + + CommitQuorumOptions majorityWC; + majorityWC.mode = "majority"; + ASSERT_TRUE(getTopoCoord().checkIfCommitQuorumCanBeSatisfied(majorityWC, + commitReadyMembersMajority)); + + CommitQuorumOptions invalidModeWC; + invalidModeWC.mode = "valid"; + ASSERT_TRUE(getTopoCoord().checkIfCommitQuorumCanBeSatisfied(invalidModeWC, + commitReadyMembersMajority)); + } +} + TEST_F(HeartbeatResponseTestV1, ScheduleACatchupTakeoverWhenElectableAndReceiveHeartbeatFromPrimaryInCatchup) { updateConfig(BSON("_id" diff --git a/src/mongo/embedded/replication_coordinator_embedded.cpp b/src/mongo/embedded/replication_coordinator_embedded.cpp index 199bebd9d18..ae4ddee7270 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.cpp +++ b/src/mongo/embedded/replication_coordinator_embedded.cpp @@ -353,6 +353,17 @@ Status ReplicationCoordinatorEmbedded::checkIfWriteConcernCanBeSatisfied( UASSERT_NOT_IMPLEMENTED; } +Status ReplicationCoordinatorEmbedded::checkIfCommitQuorumCanBeSatisfied( + const CommitQuorumOptions& commitQuorum) const { + UASSERT_NOT_IMPLEMENTED; +} + +StatusWith ReplicationCoordinatorEmbedded::checkIfCommitQuorumIsSatisfied( + const CommitQuorumOptions& commitQuorum, + const std::vector& commitReadyMembers) const { + UASSERT_NOT_IMPLEMENTED; +} + Status ReplicationCoordinatorEmbedded::checkReplEnabledForCommand(BSONObjBuilder*) { return Status(ErrorCodes::NoReplicationEnabled, "no replication on embedded"); } diff --git a/src/mongo/embedded/replication_coordinator_embedded.h b/src/mongo/embedded/replication_coordinator_embedded.h index 9e185f62a08..d29fd729b6a 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.h +++ b/src/mongo/embedded/replication_coordinator_embedded.h @@ -104,6 +104,13 @@ public: Status checkIfWriteConcernCanBeSatisfied(const WriteConcernOptions&) const override; + Status checkIfCommitQuorumCanBeSatisfied( + const CommitQuorumOptions& commitQuorum) const override; + + StatusWith checkIfCommitQuorumIsSatisfied( + const CommitQuorumOptions& commitQuorum, + const std::vector& commitReadyMembers) const override; + void setMyLastAppliedOpTime(const repl::OpTime&) override; void setMyLastDurableOpTime(const repl::OpTime&) override; -- cgit v1.2.1