diff options
Diffstat (limited to 'src/mongo/db/repl/tenant_collection_cloner.cpp')
-rw-r--r-- | src/mongo/db/repl/tenant_collection_cloner.cpp | 46 |
1 files changed, 45 insertions, 1 deletions
diff --git a/src/mongo/db/repl/tenant_collection_cloner.cpp b/src/mongo/db/repl/tenant_collection_cloner.cpp index c36dbc7a7cf..9a724df617a 100644 --- a/src/mongo/db/repl/tenant_collection_cloner.cpp +++ b/src/mongo/db/repl/tenant_collection_cloner.cpp @@ -82,6 +82,10 @@ TenantCollectionCloner::TenantCollectionCloner(const NamespaceString& sourceNss, _sourceDbAndUuid(NamespaceString("UNINITIALIZED")), _collectionClonerBatchSize(collectionClonerBatchSize), _countStage("count", this, &TenantCollectionCloner::countStage), + _checkIfDonorCollectionIsEmptyStage( + "checkIfDonorCollectionIsEmpty", + this, + &TenantCollectionCloner::checkIfDonorCollectionIsEmptyStage), _listIndexesStage("listIndexes", this, &TenantCollectionCloner::listIndexesStage), _createCollectionStage( "createCollection", this, &TenantCollectionCloner::createCollectionStage), @@ -115,7 +119,11 @@ TenantCollectionCloner::TenantCollectionCloner(const NamespaceString& sourceNss, } BaseCloner::ClonerStages TenantCollectionCloner::getStages() { - return {&_countStage, &_listIndexesStage, &_createCollectionStage, &_queryStage}; + return {&_countStage, + &_checkIfDonorCollectionIsEmptyStage, + &_listIndexesStage, + &_createCollectionStage, + &_queryStage}; } void TenantCollectionCloner::preStage() { @@ -175,6 +183,34 @@ BaseCloner::AfterStageBehavior TenantCollectionCloner::countStage() { return kContinueNormally; } +// This avoids a race where an index may be created and data inserted after we do listIndexes. +// That would result in doing a createIndexes on a non-empty collection during oplog application. +// Instead, if the collection is empty before listIndexes, we do not clone the data -- it will be +// added during oplog application. +// +// Note we cannot simply use the count() above, because that checks metadata which may not be 100% +// accurate. +BaseCloner::AfterStageBehavior TenantCollectionCloner::checkIfDonorCollectionIsEmptyStage() { + auto fieldsToReturn = BSON("_id" << 1); + auto cursor = + getClient()->query(_sourceDbAndUuid, + {} /* Query */, + 1 /* limit */, + 0 /* skip */, + &fieldsToReturn, + QueryOption_SecondaryOk, + 0 /* batchSize */, + ReadConcernArgs(ReadConcernLevel::kMajorityReadConcern).toBSONInner()); + _donorCollectionWasEmptyBeforeListIndexes = !cursor->more(); + LOGV2_DEBUG(5368500, + 1, + "Checked if donor collection was empty", + "wasEmpty"_attr = _donorCollectionWasEmptyBeforeListIndexes, + "namespace"_attr = _sourceNss.ns(), + "tenantId"_attr = _tenantId); + return kContinueNormally; +} + BaseCloner::AfterStageBehavior TenantCollectionCloner::listIndexesStage() { // This will be set after a successful listCollections command. _operationTime = Timestamp(); @@ -341,6 +377,14 @@ BaseCloner::AfterStageBehavior TenantCollectionCloner::createCollectionStage() { } BaseCloner::AfterStageBehavior TenantCollectionCloner::queryStage() { + if (_donorCollectionWasEmptyBeforeListIndexes) { + LOGV2_WARNING(5368501, + "Collection was empty at clone time.", + "namespace"_attr = _sourceNss, + "tenantId"_attr = _tenantId); + return kContinueNormally; + } + // Sets up tracking the lastVisibleOpTime from response metadata. auto requestMetadataWriter = [this](OperationContext* opCtx, BSONObjBuilder* metadataBob) -> Status { |