diff options
author | Matt Broadstone <mbroadst@mongodb.com> | 2022-10-24 13:42:47 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-10-24 14:58:15 +0000 |
commit | 88e9f23bf6729409b098b6a9176fc9dd54dbd28d (patch) | |
tree | 813f5023ecd6c41fb34b81b9f46df0e0b46985d9 | |
parent | 77779ea639eae6988e85609c036f99d283a854a8 (diff) | |
download | mongo-88e9f23bf6729409b098b6a9176fc9dd54dbd28d.tar.gz |
SERVER-70571 Set reasonable value for shard split timeout
5 files changed, 16 insertions, 7 deletions
diff --git a/buildscripts/resmokeconfig/suites/shard_split_kill_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/shard_split_kill_primary_jscore_passthrough.yml index 9cf6b5b61fd..bb6c5699e2f 100644 --- a/buildscripts/resmokeconfig/suites/shard_split_kill_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/shard_split_kill_primary_jscore_passthrough.yml @@ -221,6 +221,8 @@ executor: blockTimeMS: 250 shardSplitGarbageCollectionDelayMS: 1 ttlMonitorSleepSecs: 1 + # Unclean shutdown may cause blockTimestamp catch up to be very long + shardSplitTimeoutMS: 60000 auth: '' keyFile: *keyFile num_nodes_per_replica_set: 3 diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js index bbd9138bb70..409b2ecdcb4 100644 --- a/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js +++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js @@ -65,7 +65,9 @@ const tenantId = "tenantId"; const test = new ShardSplitTest({ recipientTagName: "recipientTag", recipientSetName: "recipientSet", - quickGarbageCollection: true + quickGarbageCollection: true, + // Increase timeout because blocking in the critical section contributes to operation latency. + nodeOptions: {setParameter: {shardSplitTimeoutMS: 100000}} }); test.addRecipientNodes(); diff --git a/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js b/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js index 01de40772a1..b6e53662901 100644 --- a/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js +++ b/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js @@ -29,7 +29,9 @@ const tenantMigrationTest = new ShardSplitTest({ recipientSetName, quickGarbageCollection: true, allowStaleReadsOnDonor: true, - initiateWithShortElectionTimeout: true + initiateWithShortElectionTimeout: true, + // Increase timeout because blocking in the critical section contributes to operation latency. + nodeOptions: {setParameter: {shardSplitTimeoutMS: 100000}} }); const donorPrimary = tenantMigrationTest.getDonorPrimary(); diff --git a/src/mongo/db/repl/repl_server_parameters.idl b/src/mongo/db/repl/repl_server_parameters.idl index 65b87404f7e..14637377c6c 100644 --- a/src/mongo/db/repl/repl_server_parameters.idl +++ b/src/mongo/db/repl/repl_server_parameters.idl @@ -533,14 +533,13 @@ server_parameters: validator: gte: 1 - shardSplitTimeoutMS: description: >- Period of time, in milliseconds, after which a shard split should be interrupted. set_at: [ startup, runtime ] cpp_vartype: AtomicWord<int> cpp_varname: shardSplitTimeoutMS - default: 3600000 + default: 10000 validator: gte: 1 @@ -720,4 +719,4 @@ feature_flags: allow kDowngradingFromLatestToLastLTS -> kUpgrading -> kUpgraded path. cpp_varname: feature_flags::gDowngradingToUpgrading default: false -
\ No newline at end of file + diff --git a/src/mongo/db/serverless/shard_split_donor_service.cpp b/src/mongo/db/serverless/shard_split_donor_service.cpp index cabaeb0d722..80697cbe0f4 100644 --- a/src/mongo/db/serverless/shard_split_donor_service.cpp +++ b/src/mongo/db/serverless/shard_split_donor_service.cpp @@ -1039,8 +1039,7 @@ ShardSplitDonorService::DonorStateMachine::_handleErrorOrEnterAbortedState( ON_BLOCK_EXIT([&] { stdx::lock_guard<Latch> lg(_mutex); if (_abortSource) { - // Cancel source to ensure all child threads (RSM monitor, etc) - // terminate. + // Cancel source to ensure all child threads (RSM monitor, etc) terminate. _abortSource->cancel(); } }); @@ -1049,6 +1048,11 @@ ShardSplitDonorService::DonorStateMachine::_handleErrorOrEnterAbortedState( stdx::lock_guard<Latch> lg(_mutex); if (isAbortedDocumentPersistent(lg, _stateDoc)) { // The document is already in aborted state. No need to write it. + LOGV2(8423376, + "Shard split already aborted.", + "id"_attr = _migrationId, + "abortReason"_attr = _abortReason.value()); + return ExecutorFuture(**executor, DurableState{ShardSplitDonorStateEnum::kAborted, _abortReason}); } |