summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Russotto <matthew.russotto@10gen.com>2020-07-13 14:16:58 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-07-13 18:56:40 +0000
commit75691902119c31adc7955da52afcff63cc0bf97b (patch)
treecfa2e88e85afa0b8d9c850f0c2ecb429ec28fb4f
parent5893ced59bd4f54626d92b72a6a776a2f2ec2ccc (diff)
downloadmongo-75691902119c31adc7955da52afcff63cc0bf97b.tar.gz
SERVER-48530 Relax invariant around timestamping for nodes in REMOVED
-rw-r--r--jstests/replsets/initial_sync_succeeds_when_syncing_node_removed.js106
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp8
-rw-r--r--src/mongo/db/storage/durable_catalog_impl.cpp5
3 files changed, 115 insertions, 4 deletions
diff --git a/jstests/replsets/initial_sync_succeeds_when_syncing_node_removed.js b/jstests/replsets/initial_sync_succeeds_when_syncing_node_removed.js
new file mode 100644
index 00000000000..362c0e68a2a
--- /dev/null
+++ b/jstests/replsets/initial_sync_succeeds_when_syncing_node_removed.js
@@ -0,0 +1,106 @@
+/**
+ * Tests that initial sync will continue if the syncing node is removed during syncing.
+ * This behavior is desired because transient DNS failures can cause the node to falsely believe
+ * that it is removed.
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+
+const testName = TestData.testName;
+const rst = new ReplSetTest({name: testName, nodes: [{}]});
+const nodes = rst.startSet();
+rst.initiate();
+
+const primary = rst.getPrimary();
+const primaryDb = primary.getDB("test");
+const initialSyncSource = primary;
+
+// Add some data to be cloned.
+assert.commandWorked(primaryDb.test.insert([{a: 1}, {b: 2}, {c: 3}]));
+rst.awaitReplication();
+
+jsTest.log("Adding the initial sync destination node to the replica set");
+const initialSyncNode = rst.add({
+ rsConfig: {priority: 0, votes: 0},
+ setParameter: {
+ 'failpoint.initialSyncHangBeforeCopyingDatabases': tojson({mode: 'alwaysOn'}),
+ 'numInitialSyncAttempts': 1,
+ 'logComponentVerbosity': tojson({replication: {verbosity: 1}}),
+ 'failpoint.forceSyncSourceCandidate':
+ tojson({mode: 'alwaysOn', data: {hostAndPort: initialSyncSource.host}})
+ }
+});
+rst.reInitiate();
+rst.waitForState(initialSyncNode, ReplSetTest.State.STARTUP_2);
+
+// Set us up to hang before finish so we can check status.
+const beforeFinishFailPoint = configureFailPoint(initialSyncNode, "initialSyncHangBeforeFinish");
+
+jsTestLog("Waiting to reach cloning phase of initial sync");
+assert.commandWorked(initialSyncNode.adminCommand({
+ waitForFailPoint: "initialSyncHangBeforeCopyingDatabases",
+ timesEntered: 1,
+ maxTimeMS: kDefaultWaitForFailPointTimeout
+}));
+jsTestLog("Removing initial sync node");
+let config = rst.getReplSetConfigFromNode();
+const origHost = config.members[1].host;
+// This host will never resolve.
+config.members[1].host = "always.invalid:27017";
+config.version++;
+assert.commandWorked(primary.adminCommand({replSetReconfig: config, force: 1}));
+jsTestLog("Waiting for initial sync node to realize it is removed.");
+assert.soonNoExcept(function() {
+ assert.commandFailedWithCode(initialSyncNode.adminCommand({replSetGetStatus: 1}),
+ ErrorCodes.InvalidReplicaSetConfig);
+ return true;
+});
+
+// Release the initial failpoint.
+assert.commandWorked(initialSyncNode.adminCommand(
+ {configureFailPoint: "initialSyncHangBeforeCopyingDatabases", mode: "off"}));
+
+jsTestLog("Waiting for initial sync to complete.");
+beforeFinishFailPoint.wait();
+
+jsTestLog("Initial sync complete. Re-adding node to check initial sync status.");
+config.members[1].host = origHost;
+config.version++;
+assert.commandWorked(primary.adminCommand({replSetReconfig: config, force: 1}));
+rst.waitForState(initialSyncNode, ReplSetTest.State.STARTUP_2);
+const res = assert.commandWorked(initialSyncNode.adminCommand({replSetGetStatus: 1}));
+printjson(res.initialSyncStatus);
+assert.eq(res.initialSyncStatus.failedInitialSyncAttempts, 0);
+
+jsTestLog("Re-removing node.");
+config.members[1].host = "always.invalid:27017";
+config.version++;
+assert.commandWorked(primary.adminCommand({replSetReconfig: config, force: 1}));
+jsTestLog("Waiting for initial sync node to realize it is removed again.");
+assert.soonNoExcept(function() {
+ assert.commandFailedWithCode(initialSyncNode.adminCommand({replSetGetStatus: 1}),
+ ErrorCodes.InvalidReplicaSetConfig);
+ return true;
+});
+
+// Add some more data that must be cloned during steady-state replication.
+assert.commandWorked(primaryDb.test.insert([{d: 4}, {e: 5}, {f: 6}]));
+beforeFinishFailPoint.off();
+
+// Wait until initial sync completion routine is finished.
+checkLog.containsJson(initialSyncNode, 4853000);
+
+// Make sure the node is still REMOVED.
+assert.commandFailedWithCode(initialSyncNode.adminCommand({replSetGetStatus: 1}),
+ ErrorCodes.InvalidReplicaSetConfig);
+
+jsTestLog("Re-adding initial sync node a final time");
+config.members[1].host = origHost;
+config.version++;
+assert.commandWorked(primary.adminCommand({replSetReconfig: config, force: 1}));
+rst.waitForState(initialSyncNode, ReplSetTest.State.SECONDARY);
+
+rst.stopSet();
+})();
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 5537f73eb1d..041c7cce065 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -797,12 +797,16 @@ void ReplicationCoordinatorImpl::_startDataReplication(OperationContext* opCtx,
if (startCompleted) {
startCompleted();
}
- // Because initial sync completed, we can only be in STARTUP2, not REMOVED.
// Transition from STARTUP2 to RECOVERING and start the producer and the applier.
- invariant(getMemberState().startup2());
+ // If the member state is REMOVED, this will do nothing until we receive a config with
+ // ourself in it.
+ const auto memberState = getMemberState();
+ invariant(memberState.startup2() || memberState.removed());
invariant(setFollowerMode(MemberState::RS_RECOVERING));
auto opCtxHolder = cc().makeOperationContext();
_externalState->startSteadyStateReplication(opCtxHolder.get(), this);
+ // This log is used in tests to ensure we made it to this point.
+ LOGV2_DEBUG(4853000, 1, "initial sync complete.");
};
std::shared_ptr<InitialSyncer> initialSyncerCopy;
diff --git a/src/mongo/db/storage/durable_catalog_impl.cpp b/src/mongo/db/storage/durable_catalog_impl.cpp
index b5f1c526cdf..9a0e6d54599 100644
--- a/src/mongo/db/storage/durable_catalog_impl.cpp
+++ b/src/mongo/db/storage/durable_catalog_impl.cpp
@@ -164,9 +164,10 @@ bool requiresTimestampForCatalogWrite(OperationContext* opCtx, const NamespaceSt
// Nodes in `startup` do not need to timestamp writes.
// Nodes in the oplog application phase of initial sync (`startup2`) must not timestamp writes
- // before the `initialDataTimestamp`.
+ // before the `initialDataTimestamp`. Nodes in initial sync may also be in the `removed` state
+ // due to DNS resolution errors; they may continue writing during that time.
const auto memberState = replCoord->getMemberState();
- if (memberState.startup() || memberState.startup2()) {
+ if (memberState.startup() || memberState.startup2() || memberState.removed()) {
return false;
}