summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAbdul Qadeer <abdul.qadeer@mongodb.com>2022-07-18 11:36:54 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-07-18 12:08:05 +0000
commit0d5fd57f9e55915550dd7d13340e2944c169c6e2 (patch)
tree3155c6454d728fbc2640b149f6b3c0d8291e023e
parent6a8cc3ae1154cf24562b8f4d6b94a4c650f722ad (diff)
downloadmongo-0d5fd57f9e55915550dd7d13340e2944c169c6e2.tar.gz
SERVER-61985 SERVER-67193 Make reshardingPauseCoordinatorBeforeCompletion failpoint pause conditionally
-rw-r--r--jstests/multiVersion/genericSetFCVUsage/setfcv_aborts_reshard_collection.js (renamed from jstests/multiVersion/genericSetFCVUsage/setfcv_reshard_collection.js)33
-rw-r--r--jstests/sharding/libs/resharding_test_fixture.js6
-rw-r--r--jstests/sharding/resharding_abort_in_preparing_to_donate.js21
-rw-r--r--jstests/sharding/resharding_nonblocking_coordinator_rebuild.js14
-rw-r--r--jstests/sharding/resharding_prohibited_commands.js19
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_service.cpp11
6 files changed, 79 insertions, 25 deletions
diff --git a/jstests/multiVersion/genericSetFCVUsage/setfcv_reshard_collection.js b/jstests/multiVersion/genericSetFCVUsage/setfcv_aborts_reshard_collection.js
index 7fa818f7f99..82d7d62b8f7 100644
--- a/jstests/multiVersion/genericSetFCVUsage/setfcv_reshard_collection.js
+++ b/jstests/multiVersion/genericSetFCVUsage/setfcv_aborts_reshard_collection.js
@@ -1,6 +1,10 @@
+/**
+ * Tests that setFeatureCompatibilityVersion command aborts an ongoing reshardCollection command
+ */
(function() {
"use strict";
+load("jstests/libs/parallel_shell_helpers.js");
load("jstests/sharding/libs/resharding_test_fixture.js");
load('jstests/libs/discover_topology.js');
load('jstests/libs/fail_point_util.js');
@@ -21,6 +25,8 @@ function runTest(forcePooledConnectionsDropped) {
],
});
+ const sourceNamespace = inputCollection.getFullName();
+
let mongos = inputCollection.getMongo();
for (let x = 0; x < 1000; x++) {
@@ -37,7 +43,17 @@ function runTest(forcePooledConnectionsDropped) {
pauseBeforeCloseCxns = configureFailPoint(config, "pauseBeforeCloseCxns");
}
+ function checkCoordinatorDoc() {
+ assert.soon(() => {
+ const coordinatorDoc =
+ mongos.getCollection("config.reshardingOperations").findOne({ns: sourceNamespace});
+
+ return coordinatorDoc === null || coordinatorDoc.state === "aborting";
+ });
+ }
+
const recipientShardNames = reshardingTest.recipientShardNames;
+ let awaitShell;
reshardingTest.withReshardingInBackground(
{
newShardKeyPattern: {newKey: 1},
@@ -63,7 +79,7 @@ function runTest(forcePooledConnectionsDropped) {
assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}));
}`;
- let awaitShell = startParallelShell(codeToRunInParallelShell, mongos.port);
+ awaitShell = startParallelShell(codeToRunInParallelShell, mongos.port);
if (forcePooledConnectionsDropped) {
pauseBeforeCloseCxns.wait();
@@ -88,8 +104,7 @@ function runTest(forcePooledConnectionsDropped) {
jsTestLog("Turn off pause before pauseBeforeMarkKeepOpen failpoint");
pauseBeforeMarkKeepOpen.off();
}
-
- awaitShell();
+ checkCoordinatorDoc();
},
{
expectedErrorCode: [
@@ -98,6 +113,8 @@ function runTest(forcePooledConnectionsDropped) {
]
});
+ awaitShell();
+
reshardingTest.withReshardingInBackground(
{
newShardKeyPattern: {newKey: 1},
@@ -107,7 +124,14 @@ function runTest(forcePooledConnectionsDropped) {
],
},
() => {
- assert.commandWorked(mongos.adminCommand({setFeatureCompatibilityVersion: latestFCV}));
+ assert.soon(() => {
+ return mongos.getDB('config').reshardingOperations.findOne() != null;
+ }, "timed out waiting for coordinator doc to be written", 30 * 1000);
+ awaitShell = startParallelShell(funWithArgs(function(latestFCV) {
+ assert.commandWorked(db.adminCommand(
+ {setFeatureCompatibilityVersion: latestFCV}));
+ }, latestFCV), mongos.port);
+ checkCoordinatorDoc();
},
{
expectedErrorCode: [
@@ -117,6 +141,7 @@ function runTest(forcePooledConnectionsDropped) {
]
});
+ awaitShell();
reshardingTest.teardown();
}
diff --git a/jstests/sharding/libs/resharding_test_fixture.js b/jstests/sharding/libs/resharding_test_fixture.js
index 128ea61ebf1..30ed771ab98 100644
--- a/jstests/sharding/libs/resharding_test_fixture.js
+++ b/jstests/sharding/libs/resharding_test_fixture.js
@@ -321,8 +321,10 @@ var ReshardingTest = class {
configureFailPoint(configPrimary, "reshardingPauseCoordinatorBeforeBlockingWrites");
this._pauseCoordinatorBeforeDecisionPersistedFailpoint =
configureFailPoint(configPrimary, "reshardingPauseCoordinatorBeforeDecisionPersisted");
- this._pauseCoordinatorBeforeCompletionFailpoint = configureFailPoint(
- configPrimary, "reshardingPauseCoordinatorBeforeCompletion", {}, {times: 1});
+ this._pauseCoordinatorBeforeCompletionFailpoint =
+ configureFailPoint(configPrimary,
+ "reshardingPauseCoordinatorBeforeCompletion",
+ {"sourceNamespace": this._ns});
this._commandDoneSignal = new CountDownLatch(1);
diff --git a/jstests/sharding/resharding_abort_in_preparing_to_donate.js b/jstests/sharding/resharding_abort_in_preparing_to_donate.js
index 167dcd3c67a..711dbb71860 100644
--- a/jstests/sharding/resharding_abort_in_preparing_to_donate.js
+++ b/jstests/sharding/resharding_abort_in_preparing_to_donate.js
@@ -11,6 +11,7 @@
"use strict";
load("jstests/libs/discover_topology.js");
load("jstests/sharding/libs/resharding_test_fixture.js");
+load('jstests/libs/parallel_shell_helpers.js');
const originalCollectionNs = "reshardingDb.coll";
@@ -36,6 +37,7 @@ const configsvr = new Mongo(topology.configsvr.nodes[0]);
const pauseAfterPreparingToDonateFP =
configureFailPoint(configsvr, "reshardingPauseCoordinatorAfterPreparingToDonate");
+let awaitAbort;
reshardingTest.withReshardingInBackground(
{
@@ -47,13 +49,30 @@ reshardingTest.withReshardingInBackground(
},
() => {
pauseAfterPreparingToDonateFP.wait();
- assert.commandWorked(mongos.adminCommand({abortReshardCollection: originalCollectionNs}));
+ assert.neq(null, mongos.getCollection("config.reshardingOperations").findOne({
+ ns: originalCollectionNs
+ }));
// Signaling abort will cause the
// pauseAfterPreparingToDonateFP to throw, implicitly
// allowing the coordinator to make progress without
// explicitly turning off the failpoint.
+ awaitAbort =
+ startParallelShell(funWithArgs(function(sourceNamespace) {
+ db.adminCommand({abortReshardCollection: sourceNamespace});
+ }, originalCollectionNs), mongos.port);
+ // Wait for the coordinator to remove coordinator document from config.reshardingOperations
+ // as a result of the recipients and donors transitioning to done due to abort.
+ assert.soon(() => {
+ const coordinatorDoc = mongos.getCollection("config.reshardingOperations").findOne({
+ ns: originalCollectionNs
+ });
+ return coordinatorDoc === null || coordinatorDoc.state === "aborting";
+ });
},
{expectedErrorCode: ErrorCodes.ReshardCollectionAborted});
+
+awaitAbort();
pauseAfterPreparingToDonateFP.off();
+
reshardingTest.teardown();
})();
diff --git a/jstests/sharding/resharding_nonblocking_coordinator_rebuild.js b/jstests/sharding/resharding_nonblocking_coordinator_rebuild.js
index dac1afc0014..2ee6c76aaf1 100644
--- a/jstests/sharding/resharding_nonblocking_coordinator_rebuild.js
+++ b/jstests/sharding/resharding_nonblocking_coordinator_rebuild.js
@@ -109,19 +109,7 @@ reshardingTest.withReshardingInBackground(
}
},
{
- // As a result of the elections intentionally triggered on the config server replica sets,
- // the primary shard of the database may retry the _configsvrReshardCollection command. It
- // is possible for the resharding operation from the first _configsvrReshardCollection
- // command to have entirely finished executing to the point of removing the coordinator
- // state document. A retry of the _configsvrReshardCollection command in this situation will
- // lead to a second resharding operation to run. The second resharding operation will have
- // the duplicate documents cloned by the ReshardingCollectionCloner rather than applied by
- // the ReshardingOplogApplier as intended. This results in the reshardCollection command
- // failing with a DuplicateKey error rather than the error code for the stash collections
- // being non-empty. The recipient must have been able to successfully update its state to
- // "applying" in the first resharding operation even when the ReshardingCoordinatorService
- // had yet to be rebuilt so we accept DuplicateKey as an error too.
- expectedErrorCode: [5356800, ErrorCodes.DuplicateKey],
+ expectedErrorCode: 5356800,
});
reshardingTest.teardown();
diff --git a/jstests/sharding/resharding_prohibited_commands.js b/jstests/sharding/resharding_prohibited_commands.js
index d06a9561d2a..9f7d73b57c3 100644
--- a/jstests/sharding/resharding_prohibited_commands.js
+++ b/jstests/sharding/resharding_prohibited_commands.js
@@ -118,6 +118,7 @@ const waitUntilReshardingInitializedOnDonor = () => {
* @param {Function} config.setup
* @param {AfterReshardingCallback} afterReshardingFn
*/
+
const withReshardingInBackground =
(duringReshardingFn,
{setup = () => {}, expectedErrorCode, afterReshardingFn = () => {}} = {}) => {
@@ -132,22 +133,34 @@ const withReshardingInBackground =
},
duringReshardingFn,
{expectedErrorCode: expectedErrorCode, afterReshardingFn: afterReshardingFn});
-
assertCommandsSucceedAfterReshardingOpFinishes(mongos.getDB(databaseName));
assert.commandWorked(sourceCollection.dropIndex(indexCreatedByTest));
};
// Tests that the prohibited commands work if the resharding operation is aborted.
+let awaitAbort;
withReshardingInBackground(() => {
waitUntilReshardingInitializedOnDonor();
+ assert.neq(null,
+ mongos.getCollection("config.reshardingOperations").findOne({ns: sourceNamespace}));
+ awaitAbort = startParallelShell(funWithArgs(function(sourceNamespace) {
+ db.adminCommand({abortReshardCollection: sourceNamespace});
+ }, sourceNamespace), mongos.port);
+ // Wait for the coordinator to remove coordinator document from config.reshardingOperations
+ // as a result of the recipients and donors transitioning to done due to abort.
+ assert.soon(() => {
+ const coordinatorDoc =
+ mongos.getCollection("config.reshardingOperations").findOne({ns: sourceNamespace});
- assert.commandWorked(mongos.adminCommand({abortReshardCollection: sourceNamespace}));
+ return coordinatorDoc === null || coordinatorDoc.state === "aborting";
+ });
}, {
expectedErrorCode: ErrorCodes.ReshardCollectionAborted,
});
+awaitAbort();
// Tests that the prohibited commands succeed if the resharding operation succeeds. During the
-// operation it makes sures that the prohibited commands are rejected during the resharding
+// operation it makes sure that the prohibited commands are rejected during the resharding
// operation.
withReshardingInBackground(() => {
waitUntilReshardingInitializedOnDonor();
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
index 80635da8379..6586961d4d6 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
@@ -1364,8 +1364,15 @@ SemiFuture<void> ReshardingCoordinatorService::ReshardingCoordinator::run(
})
.onCompletion([this, executor](Status status) {
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
- reshardingPauseCoordinatorBeforeCompletion.pauseWhileSetAndNotCanceled(
- opCtx.get(), _ctHolder->getStepdownToken());
+ reshardingPauseCoordinatorBeforeCompletion.executeIf(
+ [&](const BSONObj&) {
+ reshardingPauseCoordinatorBeforeCompletion.pauseWhileSetAndNotCanceled(
+ opCtx.get(), _ctHolder->getStepdownToken());
+ },
+ [&](const BSONObj& data) {
+ auto ns = data.getStringField("sourceNamespace");
+ return ns.empty() ? true : ns.toString() == _coordinatorDoc.getSourceNss().ns();
+ });
{
auto lg = stdx::lock_guard(_fulfillmentMutex);