diff options
author | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2019-01-14 19:22:35 -0500 |
---|---|---|
committer | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2019-02-27 14:35:10 -0500 |
commit | 5b6ae4ca09c36175186c7c0028758b9d9cdfc93e (patch) | |
tree | 3ba9db08761f1e5e719495a950f45375fc4c868e | |
parent | f35415816415b9ff93bb963690ac4f8c9f6bc453 (diff) | |
download | mongo-5b6ae4ca09c36175186c7c0028758b9d9cdfc93e.tar.gz |
SERVER-37846 Disallow using arbiters to satisfy numeric write concern when writes commit
(cherry picked from commit b023cfd4db379092f7642dd825d79652d905f847)
-rw-r--r-- | jstests/replsets/arbiters_not_included_in_w2_wc.js | 53 | ||||
-rw-r--r-- | jstests/replsets/arbiters_not_included_in_w3_wc.js | 51 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_v1_test.cpp | 96 |
4 files changed, 208 insertions, 0 deletions
diff --git a/jstests/replsets/arbiters_not_included_in_w2_wc.js b/jstests/replsets/arbiters_not_included_in_w2_wc.js new file mode 100644 index 00000000000..6ea19cc55a5 --- /dev/null +++ b/jstests/replsets/arbiters_not_included_in_w2_wc.js @@ -0,0 +1,53 @@ +/** + * Tests that arbiters cannot be used to acknowledge w:number writes even when some of the + * required secondaries are unavailable. After a write commits, the corresponding + * lastCommittedOpTime becomes the lastAppliedOpTime on arbiters in the set. This particular test + * uses a PSSAA set where both of the secondaries are non-voting, which makes the majority only one + * node. The effect of this configuration is that writes only need to be on the primary to commit. + * A w:2 write would thus require only one additional member to fully satisfy the write concern + * after committing. This test therefore shuts down the both secondaries and verifies that neither + * of the arbiters gets picked in its place and the w:2 write times out instead. + */ + +(function() { + "use strict"; + + const name = "arbiters_not_included_in_w2_wc"; + const rst = new ReplSetTest({name: name, nodes: 5}); + const nodes = rst.nodeList(); + + rst.startSet(); + rst.initiate({ + "_id": name, + "members": [ + {"_id": 0, "host": nodes[0]}, + {"_id": 1, "host": nodes[1], priority: 0, votes: 0}, + {"_id": 2, "host": nodes[2], priority: 0, votes: 0}, + {"_id": 3, "host": nodes[3], "arbiterOnly": true}, + {"_id": 4, "host": nodes[4], "arbiterOnly": true} + ] + }); + + const dbName = "test"; + const collName = name; + + const primary = rst.getPrimary(); + const testDB = primary.getDB(dbName); + const testColl = testDB.getCollection(collName); + + assert.commandWorked( + testColl.insert({"a": 1}, {writeConcern: {w: 2, wtimeout: ReplSetTest.kDefaultTimeoutMS}})); + + jsTestLog("Shutting down both secondaries"); + + rst.stop(1); + rst.stop(2); + + jsTestLog("Issuing a w:2 write and confirming that it times out"); + + assert.commandFailedWithCode( + testColl.insert({"b": 2}, {writeConcern: {w: 2, wtimeout: 5 * 1000}}), + ErrorCodes.WriteConcernFailed); + + rst.stopSet(); +})();
\ No newline at end of file diff --git a/jstests/replsets/arbiters_not_included_in_w3_wc.js b/jstests/replsets/arbiters_not_included_in_w3_wc.js new file mode 100644 index 00000000000..8e6cbf360f7 --- /dev/null +++ b/jstests/replsets/arbiters_not_included_in_w3_wc.js @@ -0,0 +1,51 @@ +/** + * Tests that arbiters cannot be used to acknowledge w:number writes even when some of the + * required secondaries are unavailable. After a write commits, the corresponding + * lastCommittedOpTime becomes the lastAppliedOpTime on arbiters in the set. This particular test + * uses a PSSA set where one of the secondaries is non-voting, which makes the majority 2 nodes. + * This means that a write needs to replicate to only one of the secondaries to commit. A w:3 + * write would thus require only one additional member to fully satisfy the write concern after + * committing. This test therefore shuts down the last secondary and verifies that the arbiter does + * *not* get picked in its place and the w:3 write times out instead. + */ + +(function() { + "use strict"; + + const name = "arbiters_not_included_in_w3_wc"; + const rst = new ReplSetTest({name: name, nodes: 4}); + const nodes = rst.nodeList(); + + rst.startSet(); + rst.initiate({ + "_id": name, + "members": [ + {"_id": 0, "host": nodes[0]}, + {"_id": 1, "host": nodes[1], priority: 0}, + {"_id": 2, "host": nodes[2], priority: 0, votes: 0}, + {"_id": 3, "host": nodes[3], "arbiterOnly": true} + ] + }); + + const dbName = "test"; + const collName = name; + + const primary = rst.getPrimary(); + const testDB = primary.getDB(dbName); + const testColl = testDB.getCollection(collName); + + assert.commandWorked( + testColl.insert({"a": 1}, {writeConcern: {w: 3, wtimeout: ReplSetTest.kDefaultTimeoutMS}})); + + jsTestLog("Shutting down the non-voting secondary"); + + rst.stop(2); + + jsTestLog("Issuing a w:3 write and confirming that it times out"); + + assert.commandFailedWithCode( + testColl.insert({"b": 2}, {writeConcern: {w: 3, wtimeout: 5 * 1000}}), + ErrorCodes.WriteConcernFailed); + + rst.stopSet(); +})();
\ No newline at end of file diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp index 3416709c24e..9fbf9dcfa51 100644 --- a/src/mongo/db/repl/topology_coordinator.cpp +++ b/src/mongo/db/repl/topology_coordinator.cpp @@ -1183,8 +1183,16 @@ bool TopologyCoordinator::haveNumNodesReachedOpTime(const OpTime& targetOpTime, } for (auto&& memberData : _memberData) { + const auto isArbiter = _rsConfig.getMemberAt(memberData.getMemberId()).isArbiter(); + + // We do not count arbiters towards the write concern. + if (isArbiter) { + continue; + } + const OpTime& memberOpTime = durablyWritten ? memberData.getLastDurableOpTime() : memberData.getLastAppliedOpTime(); + if (memberOpTime >= targetOpTime) { --numNodes; } diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp index 1882d894ebe..e60d324dd5d 100644 --- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp @@ -5069,6 +5069,102 @@ TEST_F(TopoCoordTest, TwoNodesEligibleForElectionHandoffEqualPriorityResolveByMe ASSERT_EQUALS(1, getTopoCoord().chooseElectionHandoffCandidate()); } +TEST_F(TopoCoordTest, ArbiterNotIncludedInW3WriteInPSSAReplSet) { + // In a PSSA set, a w:3 write should only be acknowledged if both secondaries can satisfy it. + updateConfig(BSON("_id" + << "rs0" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "host0:27017") + << BSON("_id" << 1 << "host" + << "host1:27017") + << BSON("_id" << 2 << "host" + << "host2:27017" + << "priority" + << 0 + << "votes" + << 0) + << BSON("_id" << 3 << "host" + << "host3:27017" + << "arbiterOnly" + << true))), + 0); + + const auto term = getTopoCoord().getTerm(); + makeSelfPrimary(); + + auto caughtUpOpTime = OpTime(Timestamp(100, 0), term); + auto laggedOpTime = OpTime(Timestamp(50, 0), term); + + setMyOpTime(caughtUpOpTime); + + // One secondary is caught up. + heartbeatFromMember(HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, caughtUpOpTime); + + // The other is not. + heartbeatFromMember(HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, laggedOpTime); + + // The arbiter is caught up, but should not count towards the w:3. + heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_ARBITER, caughtUpOpTime); + + ASSERT_FALSE(getTopoCoord().haveNumNodesReachedOpTime( + caughtUpOpTime, 3 /* numNodes */, false /* durablyWritten */)); +} + +TEST_F(TopoCoordTest, ArbitersNotIncludedInW2WriteInPSSAAReplSet) { + // In a PSSAA set, a w:2 write should only be acknowledged if at least one of the secondaries + // can satisfy it. + updateConfig(BSON("_id" + << "rs0" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "host0:27017") + << BSON("_id" << 1 << "host" + << "host1:27017" + << "priority" + << 0 + << "votes" + << 0) + << BSON("_id" << 2 << "host" + << "host2:27017" + << "priority" + << 0 + << "votes" + << 0) + << BSON("_id" << 3 << "host" + << "host3:27017" + << "arbiterOnly" + << true) + << BSON("_id" << 4 << "host" + << "host4:27017" + << "arbiterOnly" + << true))), + 0); + + const auto term = getTopoCoord().getTerm(); + makeSelfPrimary(); + + auto caughtUpOpTime = OpTime(Timestamp(100, 0), term); + auto laggedOpTime = OpTime(Timestamp(50, 0), term); + + setMyOpTime(caughtUpOpTime); + + // Neither secondary is caught up. + heartbeatFromMember(HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, laggedOpTime); + heartbeatFromMember(HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, laggedOpTime); + + // Both arbiters arae caught up, but neither should count towards the w:2. + heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_ARBITER, caughtUpOpTime); + heartbeatFromMember(HostAndPort("host4"), "rs0", MemberState::RS_ARBITER, caughtUpOpTime); + + ASSERT_FALSE(getTopoCoord().haveNumNodesReachedOpTime( + caughtUpOpTime, 2 /* numNodes */, false /* durablyWritten */)); +} + TEST_F(HeartbeatResponseTestV1, ScheduleACatchupTakeoverWhenElectableAndReceiveHeartbeatFromPrimaryInCatchup) { updateConfig(BSON("_id" |