diff options
author | Paolo Polato <paolo.polato@mongodb.com> | 2021-06-18 07:49:12 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-06-21 12:32:14 +0000 |
commit | dc9ec40e350511d6f74b299cbf52b38609a5c947 (patch) | |
tree | 07e74ed9ecf21a4e596e3ce2de6bb037c8f1c511 | |
parent | d2540340cdd788dc8a54a0030166c0baaa3c26a9 (diff) | |
download | mongo-dc9ec40e350511d6f74b299cbf52b38609a5c947.tar.gz |
SERVER-56307 fix the convergence criteria to end the catchup phase
(cherry picked from commit 49209fc34e19bbb15405b0927e38ff3e7d9e9dc5)
4 files changed, 31 insertions, 8 deletions
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp index 0d59d098615..042f6a4269a 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp +++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp @@ -619,6 +619,7 @@ void MigrationChunkClonerSourceLegacy::_addToTransferModsQueue( case 'd': { stdx::lock_guard<Latch> sl(_mutex); _deleted.push_back(idObj); + ++_untransferredDeletesCounter; _memoryUsed += idObj.firstElement().size() + 5; } break; @@ -626,6 +627,7 @@ void MigrationChunkClonerSourceLegacy::_addToTransferModsQueue( case 'u': { stdx::lock_guard<Latch> sl(_mutex); _reload.push_back(idObj); + ++_untransferredUpsertsCounter; _memoryUsed += idObj.firstElement().size() + 5; } break; @@ -827,7 +829,9 @@ Status MigrationChunkClonerSourceLegacy::nextModsBatch(OperationContext* opCtx, // Put back remaining ids we didn't consume stdx::unique_lock<Latch> lk(_mutex); _deleted.splice(_deleted.cbegin(), deleteList); + _untransferredDeletesCounter = _deleted.size(); _reload.splice(_reload.cbegin(), updateList); + _untransferredUpsertsCounter = _reload.size(); return Status::OK(); } @@ -839,7 +843,9 @@ void MigrationChunkClonerSourceLegacy::_cleanup(OperationContext* opCtx) { _drainAllOutstandingOperationTrackRequests(lk); _reload.clear(); + _untransferredUpsertsCounter = 0; _deleted.clear(); + _untransferredDeletesCounter = 0; } StatusWith<BSONObj> MigrationChunkClonerSourceLegacy::_callRecipient(const BSONObj& cmdObj) { @@ -1136,14 +1142,24 @@ Status MigrationChunkClonerSourceLegacy::_checkRecipientCloningStatus(OperationC } if (res["state"].String() == "catchup" && supportsCriticalSectionDuringCatchUp) { - int64_t estimatedUntransferredModsSize = _deleted.size() * _averageObjectIdSize + - _reload.size() * _averageObjectSizeForCloneLocs; + int64_t estimatedUntransferredModsSize = + _untransferredDeletesCounter * _averageObjectIdSize + + _untransferredUpsertsCounter * _averageObjectSizeForCloneLocs; auto estimatedUntransferredChunkPercentage = (std::min(_args.getMaxChunkSizeBytes(), estimatedUntransferredModsSize) * 100) / _args.getMaxChunkSizeBytes(); if (estimatedUntransferredChunkPercentage < maxCatchUpPercentageBeforeBlockingWrites) { // The recipient is sufficiently caught-up with the writes on the donor. // Block writes, so that it can drain everything. + LOGV2_DEBUG(5630700, + 1, + "moveChunk data transfer within threshold to allow write blocking", + "_untransferredUpsertsCounter"_attr = _untransferredUpsertsCounter, + "_untransferredDeletesCounter"_attr = _untransferredDeletesCounter, + "_averageObjectSizeForCloneLocs"_attr = _averageObjectSizeForCloneLocs, + "_averageObjectIdSize"_attr = _averageObjectIdSize, + "maxChunksSizeBytes"_attr = _args.getMaxChunkSizeBytes(), + "_sessionId"_attr = _sessionId.toString()); return Status::OK(); } } diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h index d95d360b0af..94cf4408d4f 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h +++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h @@ -375,10 +375,16 @@ private: // List of _id of documents that were modified that must be re-cloned (xfer mods) std::list<BSONObj> _reload; + // Amount of upsert xfer mods that have not yet reached the recipient. + size_t _untransferredUpsertsCounter{0}; + // List of _id of documents that were deleted during clone that should be deleted later (xfer // mods) std::list<BSONObj> _deleted; + // Amount of delete xfer mods that have not yet reached the recipient. + size_t _untransferredDeletesCounter{0}; + // Total bytes in _reload + _deleted (xfer mods) uint64_t _memoryUsed{0}; diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index 3066f645590..c55e1e6fec9 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -291,7 +291,7 @@ MigrationDestinationManager::State MigrationDestinationManager::getState() const return _state; } -void MigrationDestinationManager::setState(State newState) { +void MigrationDestinationManager::_setState(State newState) { stdx::lock_guard<Latch> sl(_mutex); _state = newState; _stateChangedCV.notify_all(); @@ -1063,7 +1063,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { repl::OpTime lastOpApplied; { // 3. Initial bulk clone - setState(CLONE); + _setState(CLONE); _sessionMigration->start(opCtx->getServiceContext()); @@ -1177,7 +1177,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { { // 4. Do bulk of mods - setState(CATCHUP); + _setState(CATCHUP); while (true) { auto res = uassertStatusOKWithContext( @@ -1272,7 +1272,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { { // 5. Wait for commit - setState(STEADY); + _setState(STEADY); bool transferAfterCommit = false; while (getState() == STEADY || getState() == COMMIT_START) { @@ -1344,7 +1344,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { return; } - setState(DONE); + _setState(DONE); timing.done(6); migrateThreadHangAtStep6.pauseWhileSet(); diff --git a/src/mongo/db/s/migration_destination_manager.h b/src/mongo/db/s/migration_destination_manager.h index 22204e1d090..1fb683ecfa4 100644 --- a/src/mongo/db/s/migration_destination_manager.h +++ b/src/mongo/db/s/migration_destination_manager.h @@ -85,7 +85,6 @@ public: static MigrationDestinationManager* get(OperationContext* opCtx); State getState() const; - void setState(State newState); /** * Checks whether the MigrationDestinationManager is currently handling a migration. @@ -158,6 +157,8 @@ private: void _setStateFail(StringData msg); void _setStateFailWarn(StringData msg); + void _setState(State newState); + /** * Thread which drives the migration apply process on the recipient side. */ |