diff options
-rw-r--r-- | jstests/noPassthrough/member_id_too_large.js | 37 | ||||
-rw-r--r-- | jstests/replsets/arbiters_not_included_in_w2_wc.js | 54 | ||||
-rw-r--r-- | jstests/replsets/arbiters_not_included_in_w3_wc.js | 52 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/repl/topology_coordinator_v1_test.cpp | 96 |
5 files changed, 249 insertions, 0 deletions
diff --git a/jstests/noPassthrough/member_id_too_large.js b/jstests/noPassthrough/member_id_too_large.js new file mode 100644 index 00000000000..a7c8e2b56af --- /dev/null +++ b/jstests/noPassthrough/member_id_too_large.js @@ -0,0 +1,37 @@ +// Tests replSetInitiate and replSetReconfig with member _id values greater than the number of +// members in the set, followed by waiting for writeConcern with "w" values equal to size of set. +// @tags: [requires_replication] +(function() { + "use strict"; + + const rst = new ReplSetTest({nodes: 2}); + rst.startSet(); + + jsTestLog("replSetInitiate with member _id greater than number of members"); + + let conf = rst.getReplSetConfig(); + conf.members[1]._id = 2; + + rst.initiate(conf); + + const dbName = "test"; + const collName = "test"; + const primary = rst.getPrimary(); + const testColl = primary.getDB(dbName).getCollection(collName); + const doc = {a: 1}; + + assert.writeOK(testColl.insert(doc, {writeConcern: {w: 2}})); + + jsTestLog("replSetReconfig with member _id greater than number of members"); + + let secondary2 = MongoRunner.runMongod({replSet: rst.name}); + conf = rst.getReplSetConfigFromNode(); + conf.version++; + conf.members.push({_id: 5, host: secondary2.host}); + assert.commandWorked(primary.getDB("admin").runCommand({replSetReconfig: conf})); + assert.writeOK(testColl.insert(doc, {writeConcern: {w: 2}})); + assert.writeOK(testColl.insert(doc, {writeConcern: {w: 3}})); + + MongoRunner.stopMongod(secondary2); + rst.stopSet(); +})(); diff --git a/jstests/replsets/arbiters_not_included_in_w2_wc.js b/jstests/replsets/arbiters_not_included_in_w2_wc.js new file mode 100644 index 00000000000..8f792bfdc79 --- /dev/null +++ b/jstests/replsets/arbiters_not_included_in_w2_wc.js @@ -0,0 +1,54 @@ +/** + * Tests that arbiters cannot be used to acknowledge w:number writes even when some of the + * required secondaries are unavailable. After a write commits, the corresponding + * lastCommittedOpTime becomes the lastAppliedOpTime on arbiters in the set. This particular test + * uses a PSSAA set where both of the secondaries are non-voting, which makes the majority only one + * node. The effect of this configuration is that writes only need to be on the primary to commit. + * A w:2 write would thus require only one additional member to fully satisfy the write concern + * after committing. This test therefore shuts down both secondaries and verifies that neither of + * the arbiters gets picked in its place and the w:2 write times out instead. + */ + +(function() { + "use strict"; + + const name = "arbiters_not_included_in_w2_wc"; + const rst = new ReplSetTest({name: name, nodes: 5}); + const nodes = rst.nodeList(); + + rst.startSet(); + rst.initiate({ + "_id": name, + "members": [ + {"_id": 0, "host": nodes[0]}, + {"_id": 1, "host": nodes[1], priority: 0, votes: 0}, + {"_id": 2, "host": nodes[2], priority: 0, votes: 0}, + {"_id": 3, "host": nodes[3], "arbiterOnly": true}, + {"_id": 4, "host": nodes[4], "arbiterOnly": true} + ] + }); + + const dbName = "test"; + const collName = name; + + const primary = rst.getPrimary(); + const testDB = primary.getDB(dbName); + const testColl = testDB.getCollection(collName); + + assert.writeOK(testColl.insert({"a": 1}, {writeConcern: {w: 2}})); + + jsTestLog("Shutting down both secondaries"); + + rst.stop(1); + rst.stop(2); + + jsTestLog("Issuing a w:2 write and confirming that it times out"); + + const res = testDB.runCommand( + {insert: collName, documents: [{"b": 2}], writeConcern: {w: 2, wtimeout: 5 * 1000}}); + assert.commandWorked(res); // Commands with write concern errors still report success. + assert(res.writeConcernError); + assert.eq(ErrorCodes.WriteConcernFailed, res.writeConcernError.code); + + rst.stopSet(); +})();
\ No newline at end of file diff --git a/jstests/replsets/arbiters_not_included_in_w3_wc.js b/jstests/replsets/arbiters_not_included_in_w3_wc.js new file mode 100644 index 00000000000..8aab8e47286 --- /dev/null +++ b/jstests/replsets/arbiters_not_included_in_w3_wc.js @@ -0,0 +1,52 @@ +/** + * Tests that arbiters cannot be used to acknowledge w:number writes even when some of the + * required secondaries are unavailable. After a write commits, the corresponding + * lastCommittedOpTime becomes the lastAppliedOpTime on arbiters in the set. This particular test + * uses a PSSA set where one of the secondaries is non-voting, which makes the majority 2 nodes. + * This means that a write needs to replicate to only one of the secondaries to commit. A w:3 + * write would thus require only one additional member to fully satisfy the write concern after + * committing. This test therefore shuts down the last secondary and verifies that the arbiter does + * *not* get picked in its place and the w:3 write times out instead. + */ + +(function() { + "use strict"; + + const name = "arbiters_not_included_in_w3_wc"; + const rst = new ReplSetTest({name: name, nodes: 4}); + const nodes = rst.nodeList(); + + rst.startSet(); + rst.initiate({ + "_id": name, + "members": [ + {"_id": 0, "host": nodes[0]}, + {"_id": 1, "host": nodes[1], priority: 0}, + {"_id": 2, "host": nodes[2], priority: 0, votes: 0}, + {"_id": 3, "host": nodes[3], "arbiterOnly": true} + ] + }); + + const dbName = "test"; + const collName = name; + + const primary = rst.getPrimary(); + const testDB = primary.getDB(dbName); + const testColl = testDB.getCollection(collName); + + assert.writeOK(testColl.insert({"a": 1}, {writeConcern: {w: 3}})); + + jsTestLog("Shutting down the non-voting secondary"); + + rst.stop(2); + + jsTestLog("Issuing a w:3 write and confirming that it times out"); + + const res = testDB.runCommand( + {insert: collName, documents: [{"b": 2}], writeConcern: {w: 3, wtimeout: 5 * 1000}}); + assert.commandWorked(res); // Commands with write concern errors still report success. + assert(res.writeConcernError); + assert.eq(ErrorCodes.WriteConcernFailed, res.writeConcernError.code); + + rst.stopSet(); +})();
\ No newline at end of file diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp index d6609d297b5..1247710891e 100644 --- a/src/mongo/db/repl/topology_coordinator.cpp +++ b/src/mongo/db/repl/topology_coordinator.cpp @@ -1177,8 +1177,18 @@ bool TopologyCoordinator::haveNumNodesReachedOpTime(const OpTime& targetOpTime, } for (auto&& memberData : _memberData) { + // The index in the config is -1 for master-slave nodes. + const auto configIndex = memberData.getConfigIndex(); + if (configIndex >= 0) { + // We do not count arbiters towards the write concern. + if (_rsConfig.getMemberAt(configIndex).isArbiter()) { + continue; + } + } + const OpTime& memberOpTime = durablyWritten ? memberData.getLastDurableOpTime() : memberData.getLastAppliedOpTime(); + if (memberOpTime >= targetOpTime) { --numNodes; } diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp index 6d340b83546..3088da7dae0 100644 --- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp @@ -5005,6 +5005,102 @@ TEST_F(TopoCoordTest, TwoNodesEligibleForElectionHandoffEqualPriorityResolveByMe ASSERT_EQUALS(1, getTopoCoord().chooseElectionHandoffCandidate()); } +TEST_F(TopoCoordTest, ArbiterNotIncludedInW3WriteInPSSAReplSet) { + // In a PSSA set, a w:3 write should only be acknowledged if both secondaries can satisfy it. + updateConfig(BSON("_id" + << "rs0" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "host0:27017") + << BSON("_id" << 1 << "host" + << "host1:27017") + << BSON("_id" << 2 << "host" + << "host2:27017" + << "priority" + << 0 + << "votes" + << 0) + << BSON("_id" << 3 << "host" + << "host3:27017" + << "arbiterOnly" + << true))), + 0); + + const auto term = getTopoCoord().getTerm(); + makeSelfPrimary(); + + auto caughtUpOpTime = OpTime(Timestamp(100, 0), term); + auto laggedOpTime = OpTime(Timestamp(50, 0), term); + + setMyOpTime(caughtUpOpTime); + + // One secondary is caught up. + heartbeatFromMember(HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, caughtUpOpTime); + + // The other is not. + heartbeatFromMember(HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, laggedOpTime); + + // The arbiter is caught up, but should not count towards the w:3. + heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_ARBITER, caughtUpOpTime); + + ASSERT_FALSE(getTopoCoord().haveNumNodesReachedOpTime( + caughtUpOpTime, 3 /* numNodes */, false /* durablyWritten */)); +} + +TEST_F(TopoCoordTest, ArbitersNotIncludedInW2WriteInPSSAAReplSet) { + // In a PSSAA set, a w:2 write should only be acknowledged if at least one of the secondaries + // can satisfy it. + updateConfig(BSON("_id" + << "rs0" + << "version" + << 2 + << "members" + << BSON_ARRAY(BSON("_id" << 0 << "host" + << "host0:27017") + << BSON("_id" << 1 << "host" + << "host1:27017" + << "priority" + << 0 + << "votes" + << 0) + << BSON("_id" << 2 << "host" + << "host2:27017" + << "priority" + << 0 + << "votes" + << 0) + << BSON("_id" << 3 << "host" + << "host3:27017" + << "arbiterOnly" + << true) + << BSON("_id" << 4 << "host" + << "host4:27017" + << "arbiterOnly" + << true))), + 0); + + const auto term = getTopoCoord().getTerm(); + makeSelfPrimary(); + + auto caughtUpOpTime = OpTime(Timestamp(100, 0), term); + auto laggedOpTime = OpTime(Timestamp(50, 0), term); + + setMyOpTime(caughtUpOpTime); + + // Neither secondary is caught up. + heartbeatFromMember(HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, laggedOpTime); + heartbeatFromMember(HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, laggedOpTime); + + // Both arbiters arae caught up, but neither should count towards the w:2. + heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_ARBITER, caughtUpOpTime); + heartbeatFromMember(HostAndPort("host4"), "rs0", MemberState::RS_ARBITER, caughtUpOpTime); + + ASSERT_FALSE(getTopoCoord().haveNumNodesReachedOpTime( + caughtUpOpTime, 2 /* numNodes */, false /* durablyWritten */)); +} + TEST_F(HeartbeatResponseTestV1, ScheduleACatchupTakeoverWhenElectableAndReceiveHeartbeatFromPrimaryInCatchup) { updateConfig(BSON("_id" |