summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Broadstone <mbroadst@mongodb.com>2022-10-24 13:42:47 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-10-24 14:58:15 +0000
commit88e9f23bf6729409b098b6a9176fc9dd54dbd28d (patch)
tree813f5023ecd6c41fb34b81b9f46df0e0b46985d9
parent77779ea639eae6988e85609c036f99d283a854a8 (diff)
downloadmongo-88e9f23bf6729409b098b6a9176fc9dd54dbd28d.tar.gz
SERVER-70571 Set reasonable value for shard split timeout
-rw-r--r--buildscripts/resmokeconfig/suites/shard_split_kill_primary_jscore_passthrough.yml2
-rw-r--r--jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js4
-rw-r--r--jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js4
-rw-r--r--src/mongo/db/repl/repl_server_parameters.idl5
-rw-r--r--src/mongo/db/serverless/shard_split_donor_service.cpp8
5 files changed, 16 insertions, 7 deletions
diff --git a/buildscripts/resmokeconfig/suites/shard_split_kill_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/shard_split_kill_primary_jscore_passthrough.yml
index 9cf6b5b61fd..bb6c5699e2f 100644
--- a/buildscripts/resmokeconfig/suites/shard_split_kill_primary_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/shard_split_kill_primary_jscore_passthrough.yml
@@ -221,6 +221,8 @@ executor:
blockTimeMS: 250
shardSplitGarbageCollectionDelayMS: 1
ttlMonitorSleepSecs: 1
+ # Unclean shutdown may cause blockTimestamp catch up to be very long
+ shardSplitTimeoutMS: 60000
auth: ''
keyFile: *keyFile
num_nodes_per_replica_set: 3
diff --git a/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js
index bbd9138bb70..409b2ecdcb4 100644
--- a/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js
+++ b/jstests/serverless/shard_split_concurrent_reads_on_donor_blocking.js
@@ -65,7 +65,9 @@ const tenantId = "tenantId";
const test = new ShardSplitTest({
recipientTagName: "recipientTag",
recipientSetName: "recipientSet",
- quickGarbageCollection: true
+ quickGarbageCollection: true,
+ // Increase timeout because blocking in the critical section contributes to operation latency.
+ nodeOptions: {setParameter: {shardSplitTimeoutMS: 100000}}
});
test.addRecipientNodes();
diff --git a/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js b/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js
index 01de40772a1..b6e53662901 100644
--- a/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js
+++ b/jstests/serverless/shard_split_concurrent_writes_on_donor_blocking.js
@@ -29,7 +29,9 @@ const tenantMigrationTest = new ShardSplitTest({
recipientSetName,
quickGarbageCollection: true,
allowStaleReadsOnDonor: true,
- initiateWithShortElectionTimeout: true
+ initiateWithShortElectionTimeout: true,
+ // Increase timeout because blocking in the critical section contributes to operation latency.
+ nodeOptions: {setParameter: {shardSplitTimeoutMS: 100000}}
});
const donorPrimary = tenantMigrationTest.getDonorPrimary();
diff --git a/src/mongo/db/repl/repl_server_parameters.idl b/src/mongo/db/repl/repl_server_parameters.idl
index 65b87404f7e..14637377c6c 100644
--- a/src/mongo/db/repl/repl_server_parameters.idl
+++ b/src/mongo/db/repl/repl_server_parameters.idl
@@ -533,14 +533,13 @@ server_parameters:
validator:
gte: 1
-
shardSplitTimeoutMS:
description: >-
Period of time, in milliseconds, after which a shard split should be interrupted.
set_at: [ startup, runtime ]
cpp_vartype: AtomicWord<int>
cpp_varname: shardSplitTimeoutMS
- default: 3600000
+ default: 10000
validator:
gte: 1
@@ -720,4 +719,4 @@ feature_flags:
allow kDowngradingFromLatestToLastLTS -> kUpgrading -> kUpgraded path.
cpp_varname: feature_flags::gDowngradingToUpgrading
default: false
- \ No newline at end of file
+
diff --git a/src/mongo/db/serverless/shard_split_donor_service.cpp b/src/mongo/db/serverless/shard_split_donor_service.cpp
index cabaeb0d722..80697cbe0f4 100644
--- a/src/mongo/db/serverless/shard_split_donor_service.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_service.cpp
@@ -1039,8 +1039,7 @@ ShardSplitDonorService::DonorStateMachine::_handleErrorOrEnterAbortedState(
ON_BLOCK_EXIT([&] {
stdx::lock_guard<Latch> lg(_mutex);
if (_abortSource) {
- // Cancel source to ensure all child threads (RSM monitor, etc)
- // terminate.
+ // Cancel source to ensure all child threads (RSM monitor, etc) terminate.
_abortSource->cancel();
}
});
@@ -1049,6 +1048,11 @@ ShardSplitDonorService::DonorStateMachine::_handleErrorOrEnterAbortedState(
stdx::lock_guard<Latch> lg(_mutex);
if (isAbortedDocumentPersistent(lg, _stateDoc)) {
// The document is already in aborted state. No need to write it.
+ LOGV2(8423376,
+ "Shard split already aborted.",
+ "id"_attr = _migrationId,
+ "abortReason"_attr = _abortReason.value());
+
return ExecutorFuture(**executor,
DurableState{ShardSplitDonorStateEnum::kAborted, _abortReason});
}