summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVesselina Ratcheva <vesselina.ratcheva@10gen.com>2019-01-14 19:22:35 -0500
committerVesselina Ratcheva <vesselina.ratcheva@10gen.com>2019-02-27 14:35:10 -0500
commit5b6ae4ca09c36175186c7c0028758b9d9cdfc93e (patch)
tree3ba9db08761f1e5e719495a950f45375fc4c868e
parentf35415816415b9ff93bb963690ac4f8c9f6bc453 (diff)
downloadmongo-5b6ae4ca09c36175186c7c0028758b9d9cdfc93e.tar.gz
SERVER-37846 Disallow using arbiters to satisfy numeric write concern when writes commit
(cherry picked from commit b023cfd4db379092f7642dd825d79652d905f847)
-rw-r--r--jstests/replsets/arbiters_not_included_in_w2_wc.js53
-rw-r--r--jstests/replsets/arbiters_not_included_in_w3_wc.js51
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp8
-rw-r--r--src/mongo/db/repl/topology_coordinator_v1_test.cpp96
4 files changed, 208 insertions, 0 deletions
diff --git a/jstests/replsets/arbiters_not_included_in_w2_wc.js b/jstests/replsets/arbiters_not_included_in_w2_wc.js
new file mode 100644
index 00000000000..6ea19cc55a5
--- /dev/null
+++ b/jstests/replsets/arbiters_not_included_in_w2_wc.js
@@ -0,0 +1,53 @@
+/**
+ * Tests that arbiters cannot be used to acknowledge w:number writes even when some of the
+ * required secondaries are unavailable. After a write commits, the corresponding
+ * lastCommittedOpTime becomes the lastAppliedOpTime on arbiters in the set. This particular test
+ * uses a PSSAA set where both of the secondaries are non-voting, which makes the majority only one
+ * node. The effect of this configuration is that writes only need to be on the primary to commit.
+ * A w:2 write would thus require only one additional member to fully satisfy the write concern
+ * after committing. This test therefore shuts down the both secondaries and verifies that neither
+ * of the arbiters gets picked in its place and the w:2 write times out instead.
+ */
+
+(function() {
+ "use strict";
+
+ const name = "arbiters_not_included_in_w2_wc";
+ const rst = new ReplSetTest({name: name, nodes: 5});
+ const nodes = rst.nodeList();
+
+ rst.startSet();
+ rst.initiate({
+ "_id": name,
+ "members": [
+ {"_id": 0, "host": nodes[0]},
+ {"_id": 1, "host": nodes[1], priority: 0, votes: 0},
+ {"_id": 2, "host": nodes[2], priority: 0, votes: 0},
+ {"_id": 3, "host": nodes[3], "arbiterOnly": true},
+ {"_id": 4, "host": nodes[4], "arbiterOnly": true}
+ ]
+ });
+
+ const dbName = "test";
+ const collName = name;
+
+ const primary = rst.getPrimary();
+ const testDB = primary.getDB(dbName);
+ const testColl = testDB.getCollection(collName);
+
+ assert.commandWorked(
+ testColl.insert({"a": 1}, {writeConcern: {w: 2, wtimeout: ReplSetTest.kDefaultTimeoutMS}}));
+
+ jsTestLog("Shutting down both secondaries");
+
+ rst.stop(1);
+ rst.stop(2);
+
+ jsTestLog("Issuing a w:2 write and confirming that it times out");
+
+ assert.commandFailedWithCode(
+ testColl.insert({"b": 2}, {writeConcern: {w: 2, wtimeout: 5 * 1000}}),
+ ErrorCodes.WriteConcernFailed);
+
+ rst.stopSet();
+})(); \ No newline at end of file
diff --git a/jstests/replsets/arbiters_not_included_in_w3_wc.js b/jstests/replsets/arbiters_not_included_in_w3_wc.js
new file mode 100644
index 00000000000..8e6cbf360f7
--- /dev/null
+++ b/jstests/replsets/arbiters_not_included_in_w3_wc.js
@@ -0,0 +1,51 @@
+/**
+ * Tests that arbiters cannot be used to acknowledge w:number writes even when some of the
+ * required secondaries are unavailable. After a write commits, the corresponding
+ * lastCommittedOpTime becomes the lastAppliedOpTime on arbiters in the set. This particular test
+ * uses a PSSA set where one of the secondaries is non-voting, which makes the majority 2 nodes.
+ * This means that a write needs to replicate to only one of the secondaries to commit. A w:3
+ * write would thus require only one additional member to fully satisfy the write concern after
+ * committing. This test therefore shuts down the last secondary and verifies that the arbiter does
+ * *not* get picked in its place and the w:3 write times out instead.
+ */
+
+(function() {
+ "use strict";
+
+ const name = "arbiters_not_included_in_w3_wc";
+ const rst = new ReplSetTest({name: name, nodes: 4});
+ const nodes = rst.nodeList();
+
+ rst.startSet();
+ rst.initiate({
+ "_id": name,
+ "members": [
+ {"_id": 0, "host": nodes[0]},
+ {"_id": 1, "host": nodes[1], priority: 0},
+ {"_id": 2, "host": nodes[2], priority: 0, votes: 0},
+ {"_id": 3, "host": nodes[3], "arbiterOnly": true}
+ ]
+ });
+
+ const dbName = "test";
+ const collName = name;
+
+ const primary = rst.getPrimary();
+ const testDB = primary.getDB(dbName);
+ const testColl = testDB.getCollection(collName);
+
+ assert.commandWorked(
+ testColl.insert({"a": 1}, {writeConcern: {w: 3, wtimeout: ReplSetTest.kDefaultTimeoutMS}}));
+
+ jsTestLog("Shutting down the non-voting secondary");
+
+ rst.stop(2);
+
+ jsTestLog("Issuing a w:3 write and confirming that it times out");
+
+ assert.commandFailedWithCode(
+ testColl.insert({"b": 2}, {writeConcern: {w: 3, wtimeout: 5 * 1000}}),
+ ErrorCodes.WriteConcernFailed);
+
+ rst.stopSet();
+})(); \ No newline at end of file
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index 3416709c24e..9fbf9dcfa51 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -1183,8 +1183,16 @@ bool TopologyCoordinator::haveNumNodesReachedOpTime(const OpTime& targetOpTime,
}
for (auto&& memberData : _memberData) {
+ const auto isArbiter = _rsConfig.getMemberAt(memberData.getMemberId()).isArbiter();
+
+ // We do not count arbiters towards the write concern.
+ if (isArbiter) {
+ continue;
+ }
+
const OpTime& memberOpTime =
durablyWritten ? memberData.getLastDurableOpTime() : memberData.getLastAppliedOpTime();
+
if (memberOpTime >= targetOpTime) {
--numNodes;
}
diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
index 1882d894ebe..e60d324dd5d 100644
--- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
@@ -5069,6 +5069,102 @@ TEST_F(TopoCoordTest, TwoNodesEligibleForElectionHandoffEqualPriorityResolveByMe
ASSERT_EQUALS(1, getTopoCoord().chooseElectionHandoffCandidate());
}
+TEST_F(TopoCoordTest, ArbiterNotIncludedInW3WriteInPSSAReplSet) {
+ // In a PSSA set, a w:3 write should only be acknowledged if both secondaries can satisfy it.
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0:27017")
+ << BSON("_id" << 1 << "host"
+ << "host1:27017")
+ << BSON("_id" << 2 << "host"
+ << "host2:27017"
+ << "priority"
+ << 0
+ << "votes"
+ << 0)
+ << BSON("_id" << 3 << "host"
+ << "host3:27017"
+ << "arbiterOnly"
+ << true))),
+ 0);
+
+ const auto term = getTopoCoord().getTerm();
+ makeSelfPrimary();
+
+ auto caughtUpOpTime = OpTime(Timestamp(100, 0), term);
+ auto laggedOpTime = OpTime(Timestamp(50, 0), term);
+
+ setMyOpTime(caughtUpOpTime);
+
+ // One secondary is caught up.
+ heartbeatFromMember(HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, caughtUpOpTime);
+
+ // The other is not.
+ heartbeatFromMember(HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, laggedOpTime);
+
+ // The arbiter is caught up, but should not count towards the w:3.
+ heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_ARBITER, caughtUpOpTime);
+
+ ASSERT_FALSE(getTopoCoord().haveNumNodesReachedOpTime(
+ caughtUpOpTime, 3 /* numNodes */, false /* durablyWritten */));
+}
+
+TEST_F(TopoCoordTest, ArbitersNotIncludedInW2WriteInPSSAAReplSet) {
+ // In a PSSAA set, a w:2 write should only be acknowledged if at least one of the secondaries
+ // can satisfy it.
+ updateConfig(BSON("_id"
+ << "rs0"
+ << "version"
+ << 2
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 0 << "host"
+ << "host0:27017")
+ << BSON("_id" << 1 << "host"
+ << "host1:27017"
+ << "priority"
+ << 0
+ << "votes"
+ << 0)
+ << BSON("_id" << 2 << "host"
+ << "host2:27017"
+ << "priority"
+ << 0
+ << "votes"
+ << 0)
+ << BSON("_id" << 3 << "host"
+ << "host3:27017"
+ << "arbiterOnly"
+ << true)
+ << BSON("_id" << 4 << "host"
+ << "host4:27017"
+ << "arbiterOnly"
+ << true))),
+ 0);
+
+ const auto term = getTopoCoord().getTerm();
+ makeSelfPrimary();
+
+ auto caughtUpOpTime = OpTime(Timestamp(100, 0), term);
+ auto laggedOpTime = OpTime(Timestamp(50, 0), term);
+
+ setMyOpTime(caughtUpOpTime);
+
+ // Neither secondary is caught up.
+ heartbeatFromMember(HostAndPort("host1"), "rs0", MemberState::RS_SECONDARY, laggedOpTime);
+ heartbeatFromMember(HostAndPort("host2"), "rs0", MemberState::RS_SECONDARY, laggedOpTime);
+
+ // Both arbiters arae caught up, but neither should count towards the w:2.
+ heartbeatFromMember(HostAndPort("host3"), "rs0", MemberState::RS_ARBITER, caughtUpOpTime);
+ heartbeatFromMember(HostAndPort("host4"), "rs0", MemberState::RS_ARBITER, caughtUpOpTime);
+
+ ASSERT_FALSE(getTopoCoord().haveNumNodesReachedOpTime(
+ caughtUpOpTime, 2 /* numNodes */, false /* durablyWritten */));
+}
+
TEST_F(HeartbeatResponseTestV1,
ScheduleACatchupTakeoverWhenElectableAndReceiveHeartbeatFromPrimaryInCatchup) {
updateConfig(BSON("_id"