summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Polato <paolo.polato@mongodb.com>2021-05-14 10:15:47 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-05-21 10:35:12 +0000
commit3394ccd20ee2a77f3526c481799943c7418bff00 (patch)
tree7d5b7ecf42ac3e63a0007f42b7fc34bce3beab46
parent641bdc9e26743d4c792765e3020ba1cd161cf3c2 (diff)
downloadmongo-3394ccd20ee2a77f3526c481799943c7418bff00.tar.gz
SERVER-56307 Allow the donor to enter the critical section when the untransferred mods are within a convergence threshold.
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp53
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy.h3
-rw-r--r--src/mongo/db/s/migration_destination_manager.cpp27
-rw-r--r--src/mongo/db/s/sharding_runtime_d_params.idl11
-rw-r--r--src/mongo/db/s/start_chunk_clone_request.h3
5 files changed, 90 insertions, 7 deletions
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
index b17f14298d7..ab1ba26f7f2 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
@@ -39,12 +39,14 @@
#include "mongo/db/catalog_raii.h"
#include "mongo/db/dbhelpers.h"
#include "mongo/db/exec/working_set_common.h"
+#include "mongo/db/index/index_access_method.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/query/internal_plans.h"
#include "mongo/db/repl/optime.h"
#include "mongo/db/repl/replication_process.h"
#include "mongo/db/s/collection_sharding_runtime.h"
#include "mongo/db/s/migration_source_manager.h"
+#include "mongo/db/s/sharding_runtime_d_params_gen.h"
#include "mongo/db/s/sharding_statistics.h"
#include "mongo/db/s/start_chunk_clone_request.h"
#include "mongo/db/service_context.h"
@@ -339,6 +341,31 @@ Status MigrationChunkClonerSourceLegacy::awaitUntilCriticalSectionIsAppropriate(
return Status::OK();
}
+ bool supportsCriticalSectionDuringCatchUp = false;
+ if (auto featureSupportedField =
+ res[StartChunkCloneRequest::kSupportsCriticalSectionDuringCatchUp]) {
+ if (!featureSupportedField.booleanSafe()) {
+ return {ErrorCodes::Error(5576604),
+ str::stream()
+ << "Illegal value for "
+ << StartChunkCloneRequest::kSupportsCriticalSectionDuringCatchUp};
+ }
+ supportsCriticalSectionDuringCatchUp = true;
+ }
+
+ if (res["state"].String() == "catchup" && supportsCriticalSectionDuringCatchUp) {
+ int64_t estimatedUntransferredModsSize = _deleted.size() * _averageObjectIdSize +
+ _reload.size() * _averageObjectSizeForCloneLocs;
+ auto estimatedUntransferredChunkPercentage =
+ (std::min(_args.getMaxChunkSizeBytes(), estimatedUntransferredModsSize) * 100) /
+ _args.getMaxChunkSizeBytes();
+ if (estimatedUntransferredChunkPercentage < maxCatchUpPercentageBeforeBlockingWrites) {
+ // The recipient is sufficiently caught-up with the writes on the donor.
+ // Block writes, so that it can drain everything.
+ return Status::OK();
+ }
+ }
+
if (res["state"].String() == "fail") {
return {ErrorCodes::OperationFailed,
str::stream() << "Data transfer error: " << res["errmsg"].str()};
@@ -743,18 +770,18 @@ Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opC
// Allow multiKey based on the invariant that shard keys must be single-valued. Therefore, any
// multi-key index prefixed by shard key cannot be multikey over the shard key fields.
- const IndexDescriptor* idx =
+ const IndexDescriptor* shardKeyIdx =
collection->getIndexCatalog()->findShardKeyPrefixedIndex(opCtx,
_shardKeyPattern.toBSON(),
false); // requireSingleKey
- if (!idx) {
+ if (!shardKeyIdx) {
return {ErrorCodes::IndexNotFound,
str::stream() << "can't find index with prefix " << _shardKeyPattern.toBSON()
<< " in storeCurrentLocs for " << _args.getNss().ns()};
}
// Assume both min and max non-empty, append MinKey's to make them fit chosen index
- const KeyPattern kp(idx->keyPattern());
+ const KeyPattern kp(shardKeyIdx->keyPattern());
BSONObj min = Helpers::toKeyFormat(kp.extendRangeBound(_args.getMinKey(), false));
BSONObj max = Helpers::toKeyFormat(kp.extendRangeBound(_args.getMaxKey(), false));
@@ -763,7 +790,7 @@ Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opC
// being queued and will migrate in the 'transferMods' stage.
auto exec = InternalPlanner::indexScan(opCtx,
collection,
- idx,
+ shardKeyIdx,
min,
max,
BoundInclusion::kIncludeStartKeyOnly,
@@ -818,6 +845,20 @@ Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opC
const uint64_t collectionAverageObjectSize = collection->averageObjectSize(opCtx);
+ uint64_t averageObjectIdSize = 0;
+ const uint64_t defaultObjectIdSize = OID::kOIDSize;
+ if (totalRecs > 0) {
+ const auto indexCatalog = collection->getIndexCatalog();
+ const auto idIdx = indexCatalog->findIdIndex(opCtx);
+ if (!idIdx) {
+ return {ErrorCodes::IndexNotFound,
+ str::stream() << "can't find index '_id' in storeCurrentLocs for "
+ << _args.getNss().ns()};
+ }
+ averageObjectIdSize =
+ indexCatalog->getEntry(idIdx)->accessMethod()->getSpaceUsedBytes(opCtx) / totalRecs;
+ }
+
if (isLargeChunk) {
return {
ErrorCodes::ChunkTooBig,
@@ -830,8 +871,8 @@ Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opC
}
stdx::lock_guard<Latch> lk(_mutex);
- _averageObjectSizeForCloneLocs = collectionAverageObjectSize + 12;
-
+ _averageObjectSizeForCloneLocs = collectionAverageObjectSize + defaultObjectIdSize;
+ _averageObjectIdSize = std::max(averageObjectIdSize, defaultObjectIdSize);
return Status::OK();
}
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h
index e5263466c11..de22918f89e 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h
@@ -347,6 +347,9 @@ private:
// Indicates whether new requests to track an operation are accepted.
bool _acceptingNewOperationTrackRequests{true};
+ // The estimated average object _id size during the clone phase.
+ uint64_t _averageObjectIdSize{0};
+
// List of _id of documents that were modified that must be re-cloned (xfer mods)
std::list<BSONObj> _reload;
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index bff99cf7029..f8307753044 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -70,6 +70,9 @@
#include "mongo/util/str.h"
namespace mongo {
+
+constexpr StringData StartChunkCloneRequest::kSupportsCriticalSectionDuringCatchUp;
+
namespace {
const auto getMigrationDestinationManager =
@@ -296,6 +299,7 @@ void MigrationDestinationManager::report(BSONObjBuilder& b,
b.append("min", _min);
b.append("max", _max);
b.append("shardKeyPattern", _shardKeyPattern);
+ b.append(StartChunkCloneRequest::kSupportsCriticalSectionDuringCatchUp, true);
b.append("state", stateToString(_state));
@@ -471,6 +475,24 @@ Status MigrationDestinationManager::startCommit(const MigrationSessionId& sessio
stdx::unique_lock<Latch> lock(_mutex);
+ const auto convergenceTimeout =
+ Shard::kDefaultConfigCommandTimeout + Shard::kDefaultConfigCommandTimeout / 4;
+
+ // The donor may have started the commit while the recipient is still busy processing
+ // the last batch of mods sent in the catch up phase. Allow some time for synching up.
+ auto deadline = Date_t::now() + convergenceTimeout;
+
+ while (_state == CATCHUP) {
+ if (stdx::cv_status::timeout ==
+ _stateChangedCV.wait_until(lock, deadline.toSystemTimePoint())) {
+ return {ErrorCodes::CommandFailed,
+ str::stream() << "startCommit timed out waiting for the catch up completion. "
+ << "Sender's session is " << sessionId.toString()
+ << ". Current session is "
+ << (_sessionId ? _sessionId->toString() : "none.")};
+ }
+ }
+
if (_state != STEADY) {
return {ErrorCodes::CommandFailed,
str::stream() << "Migration startCommit attempted when not in STEADY state."
@@ -496,7 +518,9 @@ Status MigrationDestinationManager::startCommit(const MigrationSessionId& sessio
_state = COMMIT_START;
_stateChangedCV.notify_all();
- auto const deadline = Date_t::now() + Seconds(30);
+ // Assigning a timeout slightly higher than the one used for network requests to the config
+ // server. Enough time to retry at least once in case of network failures (SERVER-51397).
+ deadline = Date_t::now() + convergenceTimeout;
while (_sessionId) {
if (stdx::cv_status::timeout ==
_isActiveCV.wait_until(lock, deadline.toSystemTimePoint())) {
@@ -901,6 +925,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* opCtx) {
const auto& mods = res.response;
if (mods["size"].number() == 0) {
+ // There are no more pending modifications to be applied. End the catchup phase
break;
}
diff --git a/src/mongo/db/s/sharding_runtime_d_params.idl b/src/mongo/db/s/sharding_runtime_d_params.idl
index ee943867ba9..e536fa5dd25 100644
--- a/src/mongo/db/s/sharding_runtime_d_params.idl
+++ b/src/mongo/db/s/sharding_runtime_d_params.idl
@@ -52,6 +52,17 @@ server_parameters:
gte: 0
default: 20
+ maxCatchUpPercentageBeforeBlockingWrites:
+ description: >-
+ The maximum percentage of untrasferred chunk mods at the end of a catch up iteration
+ that may be deferred to the next phase of the migration protocol
+ (where new writes get blocked).
+ set_at: [startup]
+ cpp_vartype: int
+ cpp_varname: maxCatchUpPercentageBeforeBlockingWrites
+ validator: { gte: 0, lte: 100 }
+ default: 10
+
migrateCloneInsertionBatchSize:
description: >-
The maximum number of documents to insert in a single batch during the cloning step of
diff --git a/src/mongo/db/s/start_chunk_clone_request.h b/src/mongo/db/s/start_chunk_clone_request.h
index 40235a8dc0e..751e1781b3b 100644
--- a/src/mongo/db/s/start_chunk_clone_request.h
+++ b/src/mongo/db/s/start_chunk_clone_request.h
@@ -48,6 +48,9 @@ class StatusWith;
*/
class StartChunkCloneRequest {
public:
+ static constexpr auto kSupportsCriticalSectionDuringCatchUp =
+ "supportsCriticalSectionDuringCatchUp"_sd;
+
/**
* Parses the input command and produces a request corresponding to its arguments.
*/