summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTommaso Tocci <tommaso.tocci@mongodb.com>2022-02-04 09:04:29 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-02-04 09:36:48 +0000
commita22f3c599e1e588cdb5916785f27af7cd5493d18 (patch)
tree474dc548070e77fbcde983ff0e7ee389a6b0793a
parente221ce7f62fc9fa22c11544525f49759eb003c43 (diff)
downloadmongo-a22f3c599e1e588cdb5916785f27af7cd5493d18.tar.gz
SERVER-63207 Speedup very slow move_primary_with_writes.js test
-rw-r--r--jstests/sharding/move_primary_with_writes.js35
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator.cpp20
2 files changed, 44 insertions, 11 deletions
diff --git a/jstests/sharding/move_primary_with_writes.js b/jstests/sharding/move_primary_with_writes.js
index b31647b5789..09783f8dc1d 100644
--- a/jstests/sharding/move_primary_with_writes.js
+++ b/jstests/sharding/move_primary_with_writes.js
@@ -4,7 +4,7 @@
(function() {
'use strict';
-load('jstests/sharding/libs/sharded_transactions_helpers.js');
+load('jstests/libs/fail_point_util.js');
let st = new ShardingTest({
mongos: 2,
@@ -177,15 +177,18 @@ function buildDDLCommands(collName) {
}
function testMovePrimary(failpoint, fromShard, toShard, db, shouldFail, sharded) {
+ jsTestLog("Testing move primary with FP: " + failpoint + " shouldFail: " + shouldFail +
+ " sharded: " + sharded);
+
let codeToRunInParallelShell = '{ db.getSiblingDB("admin").runCommand({movePrimary: "' +
dbName + '", to: "' + toShard.name + '"}); }';
- assert.commandWorked(fromShard.adminCommand({configureFailPoint: failpoint, mode: 'alwaysOn'}));
+ let fp = configureFailPoint(fromShard, failpoint);
let awaitShell = startParallelShell(codeToRunInParallelShell, st.s.port);
jsTestLog("Waiting for failpoint " + failpoint);
- waitForFailpoint("Hit " + failpoint, 1);
+ fp.wait();
clearRawMongoProgramOutput();
// Test DML
@@ -216,21 +219,24 @@ function testMovePrimary(failpoint, fromShard, toShard, db, shouldFail, sharded)
}
});
- assert.commandWorked(fromShard.adminCommand({configureFailPoint: failpoint, mode: 'off'}));
+ fp.off();
awaitShell();
}
function testMovePrimaryDDL(failpoint, fromShard, toShard, db, shouldFail, sharded) {
+ jsTest.log("Testing move primary DDL with FP: " + failpoint + " shouldFail: " + shouldFail +
+ " sharded: " + sharded);
+
let codeToRunInParallelShell = '{ db.getSiblingDB("admin").runCommand({movePrimary: "' +
dbName + '", to: "' + toShard.name + '"}); }';
- assert.commandWorked(fromShard.adminCommand({configureFailPoint: failpoint, mode: 'alwaysOn'}));
+ let fp = configureFailPoint(fromShard, failpoint);
let awaitShell = startParallelShell(codeToRunInParallelShell, st.s.port);
jsTestLog("Waiting for failpoint " + failpoint);
- waitForFailpoint("Hit " + failpoint, 1);
+ fp.wait();
clearRawMongoProgramOutput();
let collName;
@@ -253,11 +259,24 @@ function testMovePrimaryDDL(failpoint, fromShard, toShard, db, shouldFail, shard
}
});
- assert.commandWorked(fromShard.adminCommand({configureFailPoint: failpoint, mode: 'off'}));
+ fp.off();
awaitShell();
}
+// Reduce DDL lock timeout to half a second to speedup testing command that are expected to fail
+// with lockbusy error
+let overrideDDLLockTimeoutFPs = [];
+st.forEachConnection(shard => {
+ try {
+ overrideDDLLockTimeoutFPs.push(
+ configureFailPoint(shard, "overrideDDLLockTimeout", {'timeoutMillisecs': 500}));
+ } catch (e) {
+ // The failpoint has been added in 5.3 so multiversion suite will fail to set this failpoint
+ jsTestLog("Failed to override DDL lock timeout: " + e);
+ }
+});
+
createCollections();
let fromShard = st.getPrimaryShard(dbName);
let toShard = st.getOther(fromShard);
@@ -284,5 +303,7 @@ fromShard = st.getPrimaryShard(dbName);
toShard = st.getOther(fromShard);
testMovePrimary('hangInCleanStaleDataStage', fromShard, toShard, st.s.getDB(dbName), false, false);
+overrideDDLLockTimeoutFPs.forEach(fp => fp.off());
+
st.stop();
})();
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.cpp b/src/mongo/db/s/sharding_ddl_coordinator.cpp
index 15e875918ac..e6da6be570a 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.cpp
+++ b/src/mongo/db/s/sharding_ddl_coordinator.cpp
@@ -50,6 +50,7 @@
namespace mongo {
MONGO_FAIL_POINT_DEFINE(hangBeforeRunningCoordinatorInstance);
+MONGO_FAIL_POINT_DEFINE(overrideDDLLockTimeout);
namespace {
@@ -143,12 +144,23 @@ ExecutorFuture<void> ShardingDDLCoordinator::_acquireLockAsync(
const auto coorName = DDLCoordinatorType_serializer(_coordId.getOperationType());
- auto distLock = distLockManager->lockDirectLocally(
- opCtx, resource, DistLockManager::kDefaultLockTimeout);
+ const auto lockTimeOut = [&]() -> Milliseconds {
+ if (auto sfp = overrideDDLLockTimeout.scoped(); MONGO_unlikely(sfp.isActive())) {
+ if (auto timeoutElem = sfp.getData()["timeoutMillisecs"]; timeoutElem.ok()) {
+ const auto timeoutMillisecs = Milliseconds(timeoutElem.safeNumberLong());
+ LOGV2(6320700,
+ "Overriding DDL lock timeout",
+ "timeout"_attr = timeoutMillisecs);
+ return timeoutMillisecs;
+ }
+ }
+ return DistLockManager::kDefaultLockTimeout;
+ }();
+
+ auto distLock = distLockManager->lockDirectLocally(opCtx, resource, lockTimeOut);
_scopedLocks.emplace(std::move(distLock));
- uassertStatusOK(distLockManager->lockDirect(
- opCtx, resource, coorName, DistLockManager::kDefaultLockTimeout));
+ uassertStatusOK(distLockManager->lockDirect(opCtx, resource, coorName, lockTimeOut));
})
.until([this](Status status) { return (!_recoveredFromDisk) || status.isOK(); })
.withBackoffBetweenIterations(kExponentialBackoff)