diff options
author | Paolo Polato <paolo.polato@mongodb.com> | 2021-06-18 07:49:12 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-06-21 15:14:59 +0000 |
commit | c855011857e09e31498acf8a45fab630589aaf7c (patch) | |
tree | dfd4a902e2d94ad3d48bca203c1fb953c090dc6b | |
parent | a3caa74ad14fbe1bbbb68382548393a3db29fdb1 (diff) | |
download | mongo-c855011857e09e31498acf8a45fab630589aaf7c.tar.gz |
SERVER-56307 fix the convergence criteria to end the catchup phase
(cherry picked from commit 49209fc34e19bbb15405b0927e38ff3e7d9e9dc5)
4 files changed, 31 insertions, 8 deletions
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp index b1ac46654ff..81e26da2d8a 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp +++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp @@ -535,6 +535,7 @@ void MigrationChunkClonerSourceLegacy::_addToTransferModsQueue( case 'd': { stdx::lock_guard<Latch> sl(_mutex); _deleted.push_back(idObj); + ++_untransferredDeletesCounter; _memoryUsed += idObj.firstElement().size() + 5; } break; @@ -542,6 +543,7 @@ void MigrationChunkClonerSourceLegacy::_addToTransferModsQueue( case 'u': { stdx::lock_guard<Latch> sl(_mutex); _reload.push_back(idObj); + ++_untransferredUpsertsCounter; _memoryUsed += idObj.firstElement().size() + 5; } break; @@ -746,7 +748,9 @@ Status MigrationChunkClonerSourceLegacy::nextModsBatch(OperationContext* opCtx, // Put back remaining ids we didn't consume stdx::unique_lock<Latch> lk(_mutex); _deleted.splice(_deleted.cbegin(), deleteList); + _untransferredDeletesCounter = _deleted.size(); _reload.splice(_reload.cbegin(), updateList); + _untransferredUpsertsCounter = _reload.size(); return Status::OK(); } @@ -758,7 +762,9 @@ void MigrationChunkClonerSourceLegacy::_cleanup(OperationContext* opCtx) { _drainAllOutstandingOperationTrackRequests(lk); _reload.clear(); + _untransferredUpsertsCounter = 0; _deleted.clear(); + _untransferredDeletesCounter = 0; } StatusWith<BSONObj> MigrationChunkClonerSourceLegacy::_callRecipient(const BSONObj& cmdObj) { @@ -1050,14 +1056,24 @@ Status MigrationChunkClonerSourceLegacy::_checkRecipientCloningStatus(OperationC } if (res["state"].String() == "catchup" && supportsCriticalSectionDuringCatchUp) { - int64_t estimatedUntransferredModsSize = _deleted.size() * _averageObjectIdSize + - _reload.size() * _averageObjectSizeForCloneLocs; + int64_t estimatedUntransferredModsSize = + _untransferredDeletesCounter * _averageObjectIdSize + + _untransferredUpsertsCounter * _averageObjectSizeForCloneLocs; auto estimatedUntransferredChunkPercentage = (std::min(_args.getMaxChunkSizeBytes(), estimatedUntransferredModsSize) * 100) / _args.getMaxChunkSizeBytes(); if (estimatedUntransferredChunkPercentage < maxCatchUpPercentageBeforeBlockingWrites) { // The recipient is sufficiently caught-up with the writes on the donor. // Block writes, so that it can drain everything. + LOGV2_DEBUG(5630700, + 1, + "moveChunk data transfer within threshold to allow write blocking", + "_untransferredUpsertsCounter"_attr = _untransferredUpsertsCounter, + "_untransferredDeletesCounter"_attr = _untransferredDeletesCounter, + "_averageObjectSizeForCloneLocs"_attr = _averageObjectSizeForCloneLocs, + "_averageObjectIdSize"_attr = _averageObjectIdSize, + "maxChunksSizeBytes"_attr = _args.getMaxChunkSizeBytes(), + "_sessionId"_attr = _sessionId.toString()); return Status::OK(); } } diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h index f41127afad1..7bc9b80df0b 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h +++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h @@ -373,10 +373,16 @@ private: // List of _id of documents that were modified that must be re-cloned (xfer mods) std::list<BSONObj> _reload; + // Amount of upsert xfer mods that have not yet reached the recipient. + size_t _untransferredUpsertsCounter{0}; + // List of _id of documents that were deleted during clone that should be deleted later (xfer // mods) std::list<BSONObj> _deleted; + // Amount of delete xfer mods that have not yet reached the recipient. + size_t _untransferredDeletesCounter{0}; + // Total bytes in _reload + _deleted (xfer mods) uint64_t _memoryUsed{0}; diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index 877cc863702..9e788b4ec40 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -286,7 +286,7 @@ MigrationDestinationManager::State MigrationDestinationManager::getState() const return _state; } -void MigrationDestinationManager::setState(State newState) { +void MigrationDestinationManager::_setState(State newState) { stdx::lock_guard<Latch> sl(_mutex); _state = newState; _stateChangedCV.notify_all(); @@ -1065,7 +1065,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { repl::OpTime lastOpApplied; { // 3. Initial bulk clone - setState(CLONE); + _setState(CLONE); _sessionMigration->start(opCtx->getServiceContext()); @@ -1175,7 +1175,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { { // 4. Do bulk of mods - setState(CATCHUP); + _setState(CATCHUP); while (true) { auto res = uassertStatusOKWithContext( @@ -1272,7 +1272,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { { // 5. Wait for commit - setState(STEADY); + _setState(STEADY); bool transferAfterCommit = false; while (getState() == STEADY || getState() == COMMIT_START) { @@ -1341,7 +1341,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { return; } - setState(DONE); + _setState(DONE); timing.done(6); migrateThreadHangAtStep6.pauseWhileSet(); diff --git a/src/mongo/db/s/migration_destination_manager.h b/src/mongo/db/s/migration_destination_manager.h index b90a56f52f1..5559b75ace9 100644 --- a/src/mongo/db/s/migration_destination_manager.h +++ b/src/mongo/db/s/migration_destination_manager.h @@ -85,7 +85,6 @@ public: static MigrationDestinationManager* get(OperationContext* opCtx); State getState() const; - void setState(State newState); /** * Checks whether the MigrationDestinationManager is currently handling a migration. @@ -181,6 +180,8 @@ private: void _setStateFail(StringData msg); void _setStateFailWarn(StringData msg); + void _setState(State newState); + /** * Thread which drives the migration apply process on the recipient side. */ |