diff options
author | Gregory Noma <gregory.noma@gmail.com> | 2020-09-30 10:26:06 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-09-30 14:58:26 +0000 |
commit | cadb990e1ac4229bcf419f47c82656b6458faf8a (patch) | |
tree | c0d6f1c5ddb2b9f0144c449c50d4fde047bf7b37 | |
parent | 28186581b3fddfc8d24d47a22e15aa05e0881e89 (diff) | |
download | mongo-cadb990e1ac4229bcf419f47c82656b6458faf8a.tar.gz |
SERVER-51008 Enable resuming index builds in the drain writes phase for rollback
16 files changed, 173 insertions, 195 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_ese.yml b/buildscripts/resmokeconfig/suites/replica_sets_ese.yml index 7142fd7d1cd..14cca4dd598 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_ese.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_ese.yml @@ -8,6 +8,9 @@ selector: roots: - jstests/replsets/*.js exclude_files: + # TODO (SERVER-50479): Enable resumable index build rollback tests once resumable index builds + # support ESE. + - jstests/replsets/rollback_resumable_index_build_*.js executor: config: diff --git a/buildscripts/resmokeconfig/suites/replica_sets_ese_gcm.yml b/buildscripts/resmokeconfig/suites/replica_sets_ese_gcm.yml index 012e539b648..d1d55b2d9f4 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_ese_gcm.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_ese_gcm.yml @@ -8,6 +8,9 @@ selector: roots: - jstests/replsets/*.js exclude_files: + # TODO (SERVER-50479): Enable resumable index build rollback tests once resumable index builds + # support ESE. + - jstests/replsets/rollback_resumable_index_build_*.js executor: config: diff --git a/jstests/noPassthrough/libs/index_build.js b/jstests/noPassthrough/libs/index_build.js index 20949053223..9310d4924d8 100644 --- a/jstests/noPassthrough/libs/index_build.js +++ b/jstests/noPassthrough/libs/index_build.js @@ -474,6 +474,43 @@ const ResumableIndexBuildTest = class { } } + static checkResume(conn, buildUUIDs, expectedResumePhases, resumeChecks) { + for (let i = 0; i < buildUUIDs.length; i++) { + // Ensure that the resume info contains the correct phase to resume from. + checkLog.containsJson(conn, 4841700, { + buildUUID: function(uuid) { + return uuid["uuid"]["$uuid"] === buildUUIDs[i]; + }, + phase: expectedResumePhases[expectedResumePhases.length === 1 ? 0 : i] + }); + + const resumeCheck = resumeChecks[resumeChecks.length === 1 ? 0 : i]; + + if (resumeCheck.numScannedAferResume) { + // Ensure that the resumed index build resumed the collection scan from the correct + // location. + checkLog.containsJson(conn, 20391, { + buildUUID: function(uuid) { + return uuid["uuid"]["$uuid"] === buildUUIDs[i]; + }, + totalRecords: resumeCheck.numScannedAferResume + }); + } else if (resumeCheck.skippedPhaseLogID) { + // Ensure that the resumed index build does not perform a phase that it already + // completed before being interrupted for shutdown. + assert(!checkLog.checkContainsOnceJson(conn, resumeCheck.skippedPhaseLogID, { + buildUUID: function(uuid) { + return uuid["uuid"]["$uuid"] === buildUUIDs[i]; + } + }), + "Found log " + resumeCheck.skippedPhaseLogID + " for index build " + + buildUUIDs[i] + " when this phase should not have run after resuming"); + } else { + assert(false, "Must specify one of numScannedAferResume and skippedPhaseLogID"); + } + } + } + /** * Runs a resumable index build test on the provided replica set and namespace. * @@ -500,7 +537,8 @@ const ResumableIndexBuildTest = class { * 'skippedPhaseLogID'. The former is used to verify that the index build scanned the * expected number of documents in the collection scan after resuming. The latter is used for * phases which do not perform a collection scan after resuming, to verify that the index - * index build did not resume from an earlier phase than expected. + * index build did not resume from an earlier phase than expected. The log message must + * contain the buildUUID attribute. * * 'sideWries' is an array of documents inserted during the initialization phase so that they * are inserted into the side writes table and processed during the drain writes phase. @@ -554,40 +592,8 @@ const ResumableIndexBuildTest = class { awaitCreateIndex(); } - for (let i = 0; i < buildUUIDs.length; i++) { - // Ensure that the resume info contains the correct phase to resume from. - checkLog.containsJson(primary, 4841700, { - buildUUID: function(uuid) { - return uuid["uuid"]["$uuid"] === buildUUIDs[i]; - }, - phase: expectedResumePhases[expectedResumePhases.length === 1 ? 0 : i] - }); - - const resumeCheck = resumeChecks[resumeChecks.length === 1 ? 0 : i]; - - if (resumeCheck.numScannedAferResume) { - // Ensure that the resumed index build resumed the collection scan from the correct - // location. - checkLog.containsJson(primary, 20391, { - buildUUID: function(uuid) { - return uuid["uuid"]["$uuid"] === buildUUIDs[i]; - }, - totalRecords: resumeCheck.numScannedAferResume - }); - } else if (resumeCheck.skippedPhaseLogID) { - // Ensure that the resumed index build does not perform a phase that it already - // completed before being interrupted for shutdown. - assert(!checkLog.checkContainsOnceJson(primary, resumeCheck.skippedPhaseLogID, { - buildUUID: function(uuid) { - return uuid["uuid"]["$uuid"] === buildUUIDs[i]; - } - }), - "Found log " + resumeCheck.skippedPhaseLogID + " for index build " + - buildUUIDs[i] + " when this phase should not have run after resuming"); - } else { - assert(false, "Must specify one of numScannedAferResume and skippedPhaseLogID"); - } - } + ResumableIndexBuildTest.checkResume( + primary, buildUUIDs, expectedResumePhases, resumeChecks); ResumableIndexBuildTest.checkIndexes( rst, dbName, collName, indexNames, postIndexBuildInserts); diff --git a/jstests/replsets/libs/rollback_resumable_index_build.js b/jstests/replsets/libs/rollback_resumable_index_build.js index e0b201fb45e..92b94fbd3e9 100644 --- a/jstests/replsets/libs/rollback_resumable_index_build.js +++ b/jstests/replsets/libs/rollback_resumable_index_build.js @@ -3,14 +3,33 @@ load('jstests/replsets/libs/rollback_test.js'); const RollbackResumableIndexBuildTest = class { /** - * Runs a resumable index build rollback test. The phase that the index build will be in when - * rollback starts is specified by rollbackStartFailPointName. The phase that the index build - * will resume from after rollback completes is specified by rollbackEndFailPointName. If - * either of these points is in the drain writes phase, documents to insert into the side - * writes table must be specified by sideWrites. locksYieldedFailPointName specifies a point - * during the index build between rollbackEndFailPointName and rollbackStartFailPointName at - * which its locks are yielded. Documents specified by insertsToBeRolledBack are inserted after - * transitioning to rollback operations and will be rolled back. + * Runs a resumable index build rollback test. + * + * 'rollbackStartFailPointName' specifies the phase that the index build will be in when + * rollback starts. + * + * 'rollbackEndFailPointName' specifies the phase that the index build resume from after + * rollback completes. + * + * 'locksYieldedFailPointName' specifies a point during the index build between + * 'rollbackEndFailPointName' and 'rollbackStartFailPointName' at which its locks are yielded. + * + * 'expectedResumePhase' is a string which specifies the name of the phase that the index build + * is expected to resume in. + * + * 'resumeCheck' is an object which contains exactly one of 'numScannedAferResume' and + * 'skippedPhaseLogID'. The former is used to verify that the index build scanned the + * expected number of documents in the collection scan after resuming. The latter is used for + * phases which do not perform a collection scan after resuming, to verify that the index + * index build did not resume from an earlier phase than expected. The log message must + * contain the buildUUID attribute. + * + * 'insertsToBeRolledBack' is documents which are inserted after transitioning to rollback + * operations and will be rolled back. + * + * 'sideWrites' is documents to insert into the side writes table. If either + * 'rollbackStartFailPointName' or 'rollbackEndFailPointName' the above two are in the drain + * writes phase, this is required. */ static run(rollbackTest, dbName, @@ -21,6 +40,8 @@ const RollbackResumableIndexBuildTest = class { rollbackEndFailPointName, rollbackEndFailPointIteration, locksYieldedFailPointName, + expectedResumePhase, + resumeCheck, insertsToBeRolledBack, sideWrites = []) { const originalPrimary = rollbackTest.getPrimary(); @@ -32,6 +53,9 @@ const RollbackResumableIndexBuildTest = class { rollbackTest.awaitLastOpCommitted(); + assert.commandWorked( + originalPrimary.adminCommand({setParameter: 1, logComponentVerbosity: {index: 1}})); + // Set internalQueryExecYieldIterations to 0 and maxIndexBuildDrainBatchSize to 1 so that // the index build is guaranteed to yield its locks between the rollback end and start // failpoints. @@ -101,6 +125,10 @@ const RollbackResumableIndexBuildTest = class { rollbackStartFp.off(); locksYieldedFp.wait(); + // Clear the log so that we can verify that the index build resumes from the correct phase + // after rollback. + assert.commandWorked(originalPrimary.adminCommand({clearLog: "global"})); + const awaitDisableFailPoint = startParallelShell( // Wait for the index build to be aborted for rollback. funWithArgs( @@ -135,7 +163,10 @@ const RollbackResumableIndexBuildTest = class { // Ensure that the index build completed after rollback. ResumableIndexBuildTest.assertCompleted(originalPrimary, coll, [buildUUID], [indexName]); - assert.commandWorked( - rollbackTest.getPrimary().getDB(dbName).getCollection(collName).dropIndex(indexName)); + ResumableIndexBuildTest.checkResume( + originalPrimary, [buildUUID], [expectedResumePhase], [resumeCheck]); + + ResumableIndexBuildTest.checkIndexes( + rollbackTest.getTestFixture(), dbName, collName, [indexName], []); } };
\ No newline at end of file diff --git a/jstests/replsets/rollback_resumable_index_build_collection_scan_phase.js b/jstests/replsets/rollback_resumable_index_build_collection_scan_phase.js index a81267804a3..b0d2e33c8a6 100644 --- a/jstests/replsets/rollback_resumable_index_build_collection_scan_phase.js +++ b/jstests/replsets/rollback_resumable_index_build_collection_scan_phase.js @@ -30,6 +30,8 @@ const runTest = function(rollbackEndFailPointName, rollbackEndFailPointIteration rollbackEndFailPointName, rollbackEndFailPointIteration, "setYieldAllLocksHang", + "collection scan", + {numScannedAferResume: 1}, [{a: 6}, {a: 7}]); }; diff --git a/jstests/replsets/rollback_resumable_index_build_drain_writes_phase.js b/jstests/replsets/rollback_resumable_index_build_drain_writes_phase.js index 0e10c8bd81d..13fe0942b03 100644 --- a/jstests/replsets/rollback_resumable_index_build_drain_writes_phase.js +++ b/jstests/replsets/rollback_resumable_index_build_drain_writes_phase.js @@ -33,6 +33,8 @@ const runTest = function(rollbackStartFailPointIteration, rollbackEndFailPointName, rollbackEndFailPointIteration, "hangDuringIndexBuildDrainYield", + "drain writes", + {skippedPhaseLogID: 20392}, [{a: 18}, {a: 19}], sideWrites); }; @@ -44,7 +46,7 @@ runTest(1, "hangAfterSettingUpIndexBuild", {}, [{a: 4}, {a: 5}, {a: 6}]); runTest(1, "hangIndexBuildDuringCollectionScanPhaseBeforeInsertion", 1, [{a: 7}, {a: 8}, {a: 9}]); // Rollback to the bulk load phase. -runTest(1, "hangIndexBuildDuringBulkLoadPhase", 1, [{a: 7}, {a: 8}, {a: 9}]); +runTest(1, "hangIndexBuildDuringBulkLoadPhase", 1, [{a: 10}, {a: 11}, {a: 12}]); // Rollback to earlier in the drain writes phase. runTest(3, diff --git a/src/mongo/db/catalog/index_builds_manager.cpp b/src/mongo/db/catalog/index_builds_manager.cpp index 1edf3e07a87..10c4edf6e92 100644 --- a/src/mongo/db/catalog/index_builds_manager.cpp +++ b/src/mongo/db/catalog/index_builds_manager.cpp @@ -333,41 +333,19 @@ bool IndexBuildsManager::abortIndexBuild(OperationContext* opCtx, return true; } -bool IndexBuildsManager::abortIndexBuildWithoutCleanupForRollback(OperationContext* opCtx, - const CollectionPtr& collection, - const UUID& buildUUID, - bool isResumable) { +bool IndexBuildsManager::abortIndexBuildWithoutCleanup(OperationContext* opCtx, + const CollectionPtr& collection, + const UUID& buildUUID, + bool isResumable) { auto builder = _getBuilder(buildUUID); if (!builder.isOK()) { return false; } - LOGV2(20347, - "Index build aborted without cleanup for rollback: {uuid}", - "Index build aborted without cleanup for rollback", - "buildUUID"_attr = buildUUID); + LOGV2(20347, "Index build aborted without cleanup", "buildUUID"_attr = buildUUID); - if (auto resumeInfo = - builder.getValue()->abortWithoutCleanupForRollback(opCtx, collection, isResumable)) { - _resumeInfos.push_back(std::move(*resumeInfo)); - } - - return true; -} - -bool IndexBuildsManager::abortIndexBuildWithoutCleanupForShutdown(OperationContext* opCtx, - const CollectionPtr& collection, - const UUID& buildUUID, - bool isResumable) { - auto builder = _getBuilder(buildUUID); - if (!builder.isOK()) { - return false; - } + builder.getValue()->abortWithoutCleanup(opCtx, collection, isResumable); - LOGV2( - 4841500, "Index build aborted without cleanup for shutdown", "buildUUID"_attr = buildUUID); - - builder.getValue()->abortWithoutCleanupForShutdown(opCtx, collection, isResumable); return true; } @@ -472,12 +450,4 @@ StatusWith<int> IndexBuildsManager::_moveRecordToLostAndFound( }); } -std::vector<ResumeIndexInfo> IndexBuildsManager::getResumeInfos() const { - return std::move(_resumeInfos); -} - -void IndexBuildsManager::clearResumeInfos() { - _resumeInfos.clear(); -} - } // namespace mongo diff --git a/src/mongo/db/catalog/index_builds_manager.h b/src/mongo/db/catalog/index_builds_manager.h index bf731dfeb15..2f2a4d6663a 100644 --- a/src/mongo/db/catalog/index_builds_manager.h +++ b/src/mongo/db/catalog/index_builds_manager.h @@ -168,23 +168,16 @@ public: * Signals the index build to be aborted without being cleaned up and returns without waiting * for it to stop. Does nothing if the index build has already been cleared away. * + * Writes the current state of the index build to disk if the specified index build is a + * two-phase hybrid index build and resumable index builds are supported. + * * Returns true if a build existed to be signaled, as opposed to having already finished and - * been cleared away, or not having yet started.. - */ - bool abortIndexBuildWithoutCleanupForRollback(OperationContext* opCtx, - const CollectionPtr& collection, - const UUID& buildUUID, - bool isResumable); - - /** - * The same as abortIndexBuildWithoutCleanupForRollback above, but additionally writes the - * current state of the index build to disk if the specified index build is a two-phase hybrid - * index build and resumable index builds are supported. + * been cleared away, or not having yet started. */ - bool abortIndexBuildWithoutCleanupForShutdown(OperationContext* opCtx, - const CollectionPtr& collection, - const UUID& buildUUID, - bool isResumable); + bool abortIndexBuildWithoutCleanup(OperationContext* opCtx, + const CollectionPtr& collection, + const UUID& buildUUID, + bool isResumable); /** * Returns true if the index build supports background writes while building an index. This is @@ -197,17 +190,6 @@ public: */ void verifyNoIndexBuilds_forTestOnly(); - /** - * Returns the information to resume each resumable index build that was aborted for rollback. - */ - std::vector<ResumeIndexInfo> getResumeInfos() const; - - /** - * Clears the vector that was used to store the information to resume each resumable index - * build after rollback. - */ - void clearResumeInfos(); - private: /** * Creates and registers a new builder in the _builders map, mapped by the provided buildUUID. @@ -226,9 +208,6 @@ private: // taken on and information passed to and from index builds. std::map<UUID, std::unique_ptr<MultiIndexBlock>> _builders; - // The information to resume each resumable index build that was aborted for rollback. - std::vector<ResumeIndexInfo> _resumeInfos; - /** * Deletes record containing duplicate keys and insert it into a local lost and found collection * titled "local.lost_and_found.<original collection UUID>". Returns the size of the diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp index f7ec01cb624..f90cb4bef80 100644 --- a/src/mongo/db/catalog/multi_index_block.cpp +++ b/src/mongo/db/catalog/multi_index_block.cpp @@ -648,6 +648,8 @@ Status MultiIndexBlock::drainBackgroundWrites( IndexBuildPhase_serializer(_phase).toString()); _phase = IndexBuildPhaseEnum::kDrainWrites; + ReadSourceScope readSourceScope(opCtx, readSource); + const CollectionPtr& coll = CollectionCatalog::get(opCtx).lookupCollectionByUUID(opCtx, _collectionUUID.get()); @@ -708,17 +710,6 @@ Status MultiIndexBlock::checkConstraints(OperationContext* opCtx, const Collecti return Status::OK(); } -boost::optional<ResumeIndexInfo> MultiIndexBlock::abortWithoutCleanupForRollback( - OperationContext* opCtx, const CollectionPtr& collection, bool isResumable) { - return _abortWithoutCleanup(opCtx, collection, false /* shutdown */, isResumable); -} - -void MultiIndexBlock::abortWithoutCleanupForShutdown(OperationContext* opCtx, - const CollectionPtr& collection, - bool isResumable) { - _abortWithoutCleanup(opCtx, collection, true /* shutdown */, isResumable); -} - MultiIndexBlock::OnCreateEachFn MultiIndexBlock::kNoopOnCreateEachFn = [](const BSONObj& spec) {}; MultiIndexBlock::OnCommitFn MultiIndexBlock::kNoopOnCommitFn = []() {}; @@ -796,8 +787,9 @@ void MultiIndexBlock::setIndexBuildMethod(IndexBuildMethod indexBuildMethod) { _method = indexBuildMethod; } -boost::optional<ResumeIndexInfo> MultiIndexBlock::_abortWithoutCleanup( - OperationContext* opCtx, const CollectionPtr& collection, bool shutdown, bool isResumable) { +void MultiIndexBlock::abortWithoutCleanup(OperationContext* opCtx, + const CollectionPtr& collection, + bool isResumable) { invariant(!_buildIsCleanedUp); UninterruptibleLockGuard noInterrupt(opCtx->lockState()); // Lock if it's not already locked, to ensure storage engine cannot be destructed out from @@ -808,15 +800,14 @@ boost::optional<ResumeIndexInfo> MultiIndexBlock::_abortWithoutCleanup( } auto action = TemporaryRecordStore::FinalizationAction::kDelete; - boost::optional<ResumeIndexInfo> resumeInfo; - if (isResumable && (shutdown || IndexBuildPhaseEnum::kDrainWrites != _phase)) { + if (isResumable) { invariant(_buildUUID); invariant(_method == IndexBuildMethod::kHybrid); // Index builds do not yield locks during the bulk load phase so it is not possible for // rollback to interrupt an index build during this phase. - if (!shutdown) { + if (!ErrorCodes::isShutdownError(opCtx->checkForInterruptNoAssert())) { invariant(IndexBuildPhaseEnum::kBulkLoad != _phase, str::stream() << *_buildUUID); } @@ -829,8 +820,6 @@ boost::optional<ResumeIndexInfo> MultiIndexBlock::_abortWithoutCleanup( } _buildIsCleanedUp = true; - - return resumeInfo; } void MultiIndexBlock::_writeStateToDisk(OperationContext* opCtx, diff --git a/src/mongo/db/catalog/multi_index_block.h b/src/mongo/db/catalog/multi_index_block.h index 7ce46547d04..54b12fb622a 100644 --- a/src/mongo/db/catalog/multi_index_block.h +++ b/src/mongo/db/catalog/multi_index_block.h @@ -271,31 +271,15 @@ public: /** * May be called at any time after construction but before a successful commit(). Suppresses - * the default behavior on destruction of removing all traces of uncommitted index builds. Does - * not perform any storage engine writes. May delete internal tables, but this is not - * transactional. + * the default behavior on destruction of removing all traces of uncommitted index builds. May + * delete internal tables, but this is not transactional. Writes the resumable index build + * state to disk if resumable index builds are supported. * - * If the indexes being built were resumable, returns the information to resume them. - * Otherwise, returns boost::none. - * - * This should only be used during rollback. + * This should only be used during shutdown or rollback. */ - boost::optional<ResumeIndexInfo> abortWithoutCleanupForRollback(OperationContext* opCtx, - const CollectionPtr& collection, - bool isResumable); - - /** - * May be called at any time after construction but before a successful commit(). Suppresses - * the default behavior on destruction of removing all traces of uncommitted index builds. If - * this is a two-phase hybrid index build and resumable index builds are supported, writes the - * current state of the index build to disk using the storage engine. May delete internal - * tables, but this is not transactional. - * - * This should only be used during shutdown. - */ - void abortWithoutCleanupForShutdown(OperationContext* opCtx, - const CollectionPtr& collection, - bool isResumable); + void abortWithoutCleanup(OperationContext* opCtx, + const CollectionPtr& collection, + bool isResumable); /** * Returns true if this build block supports background writes while building an index. This is @@ -316,17 +300,6 @@ private: InsertDeleteOptions options; }; - /** - * This function should be used for shutdown and rollback. When called for shutdown, writes the - * resumable index build state to disk if resumable index builds are supported. When called for - * rollback, returns the information to resume the index build if resumable index builds are - * supported. - */ - boost::optional<ResumeIndexInfo> _abortWithoutCleanup(OperationContext* opCtx, - const CollectionPtr& collection, - bool shutdown, - bool isResumable); - void _writeStateToDisk(OperationContext* opCtx, const CollectionPtr& collection) const; BSONObj _constructStateObject(OperationContext* opCtx, const CollectionPtr& collection) const; diff --git a/src/mongo/db/catalog/multi_index_block_test.cpp b/src/mongo/db/catalog/multi_index_block_test.cpp index a62ae34a574..6edbb173115 100644 --- a/src/mongo/db/catalog/multi_index_block_test.cpp +++ b/src/mongo/db/catalog/multi_index_block_test.cpp @@ -148,7 +148,7 @@ TEST_F(MultiIndexBlockTest, AbortWithoutCleanupAfterInsertingSingleDocument) { ASSERT_EQUALS(0U, specs.size()); ASSERT_OK(indexer->insertSingleDocumentForInitialSyncOrRecovery(operationContext(), {}, {})); auto isResumable = false; - indexer->abortWithoutCleanupForRollback(operationContext(), coll.get(), isResumable); + indexer->abortWithoutCleanup(operationContext(), coll.get(), isResumable); } TEST_F(MultiIndexBlockTest, InitWriteConflictException) { diff --git a/src/mongo/db/index/index_build_interceptor.cpp b/src/mongo/db/index/index_build_interceptor.cpp index e81ea389524..7554e7351aa 100644 --- a/src/mongo/db/index/index_build_interceptor.cpp +++ b/src/mongo/db/index/index_build_interceptor.cpp @@ -84,8 +84,7 @@ IndexBuildInterceptor::IndexBuildInterceptor(OperationContext* opCtx, IndexCatal : _indexCatalogEntry(entry), _sideWritesTable( opCtx->getServiceContext()->getStorageEngine()->makeTemporaryRecordStore(opCtx)), - _skippedRecordTracker(opCtx, entry, boost::none), - _sideWritesCounter(std::make_shared<AtomicWord<long long>>()) { + _skippedRecordTracker(opCtx, entry, boost::none) { if (entry->descriptor()->unique()) { _duplicateKeyTracker = std::make_unique<DuplicateKeyTracker>(opCtx, entry); @@ -104,8 +103,7 @@ IndexBuildInterceptor::IndexBuildInterceptor(OperationContext* opCtx, opCtx->getServiceContext()->getStorageEngine()->makeTemporaryRecordStoreFromExistingIdent( opCtx, sideWritesIdent)), _skippedRecordTracker(opCtx, entry, skippedRecordTrackerIdent), - _sideWritesCounter( - std::make_shared<AtomicWord<long long>>(_sideWritesTable->rs()->numRecords(opCtx))) { + _skipNumAppliedCheck(true) { auto finalizeTableOnFailure = makeGuard([&] { _sideWritesTable->finalizeTemporaryTable(opCtx, @@ -398,28 +396,30 @@ void IndexBuildInterceptor::_yield(OperationContext* opCtx) { bool IndexBuildInterceptor::areAllWritesApplied(OperationContext* opCtx) const { invariant(_sideWritesTable); - auto cursor = _sideWritesTable->rs()->getCursor(opCtx); - auto record = cursor->next(); // The table is empty only when all writes are applied. - if (!record) { - auto writesRecorded = _sideWritesCounter->load(); - if (writesRecorded != _numApplied) { - dassert(writesRecorded == _numApplied, - (str::stream() - << "The number of side writes recorded does not match the number " - "applied, despite the table appearing empty. Writes recorded: " - << writesRecorded << ", applied: " << _numApplied)); - LOGV2_WARNING(20692, - "The number of side writes recorded does not match the number applied, " - "despite the table appearing empty", - "writesRecorded"_attr = writesRecorded, - "applied"_attr = _numApplied); - } + if (_sideWritesTable->rs()->getCursor(opCtx)->next()) { + return false; + } + + if (_skipNumAppliedCheck) { return true; } - return false; + auto writesRecorded = _sideWritesCounter->load(); + if (writesRecorded != _numApplied) { + dassert(writesRecorded == _numApplied, + (str::stream() << "The number of side writes recorded does not match the number " + "applied, despite the table appearing empty. Writes recorded: " + << writesRecorded << ", applied: " << _numApplied)); + LOGV2_WARNING(20692, + "The number of side writes recorded does not match the number applied, " + "despite the table appearing empty", + "writesRecorded"_attr = writesRecorded, + "applied"_attr = _numApplied); + } + + return true; } boost::optional<MultikeyPaths> IndexBuildInterceptor::getMultikeyPaths() const { diff --git a/src/mongo/db/index/index_build_interceptor.h b/src/mongo/db/index/index_build_interceptor.h index 2eb571650d9..83ab8541d95 100644 --- a/src/mongo/db/index/index_build_interceptor.h +++ b/src/mongo/db/index/index_build_interceptor.h @@ -214,7 +214,13 @@ private: // additional fields that have to be referenced in commit/rollback handlers, this counter should // be moved to a new IndexBuildsInterceptor::InternalState structure that will be managed as a // shared resource. - std::shared_ptr<AtomicWord<long long>> _sideWritesCounter; + std::shared_ptr<AtomicWord<long long>> _sideWritesCounter = + std::make_shared<AtomicWord<long long>>(0); + + // Whether to skip the check the the number of writes applied is equal to the number of writes + // recorded. Resumable index builds to not preserve these counts, so we skip this check for + // index builds that were resumed. + const bool _skipNumAppliedCheck = false; mutable Mutex _multikeyPathMutex = MONGO_MAKE_LATCH("IndexBuildInterceptor::_multikeyPathMutex"); diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 0dce52b94d6..adf4e62c4a9 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -463,6 +463,17 @@ bool isIndexBuildResumable(OperationContext* opCtx, return true; } +/** + * Returns the ReadSource to be used for a drain occurring before the commit quorum has been + * satisfied. + */ +RecoveryUnit::ReadSource getReadSourceForDrainBeforeCommitQuorum( + const ReplIndexBuildState& replState) { + return replState.lastOpTimeBeforeInterceptors.isNull() + ? RecoveryUnit::ReadSource::kNoTimestamp + : RecoveryUnit::ReadSource::kMajorityCommitted; +} + } // namespace const auto getIndexBuildsCoord = @@ -1404,7 +1415,7 @@ void IndexBuildsCoordinator::_completeAbort(OperationContext* opCtx, invariant(replState->protocol == IndexBuildProtocol::kTwoPhase); invariant(replCoord->getMemberState().rollback()); auto isResumable = !replState->lastOpTimeBeforeInterceptors.isNull(); - _indexBuildsManager.abortIndexBuildWithoutCleanupForRollback( + _indexBuildsManager.abortIndexBuildWithoutCleanup( opCtx, coll.get(), replState->buildUUID, isResumable); break; } @@ -1439,7 +1450,7 @@ void IndexBuildsCoordinator::_completeAbortForShutdown( const CollectionPtr& collection) { // Leave it as-if kill -9 happened. Startup recovery will restart the index build. auto isResumable = !replState->lastOpTimeBeforeInterceptors.isNull(); - _indexBuildsManager.abortIndexBuildWithoutCleanupForShutdown( + _indexBuildsManager.abortIndexBuildWithoutCleanup( opCtx, collection, replState->buildUUID, isResumable); { @@ -2677,7 +2688,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesWithoutBlockingWrites( uassertStatusOK(_indexBuildsManager.drainBackgroundWrites( opCtx, replState->buildUUID, - RecoveryUnit::ReadSource::kNoTimestamp, + getReadSourceForDrainBeforeCommitQuorum(*replState), IndexBuildInterceptor::DrainYieldPolicy::kYield)); } @@ -2705,7 +2716,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites( uassertStatusOK(_indexBuildsManager.drainBackgroundWrites( opCtx, replState->buildUUID, - RecoveryUnit::ReadSource::kNoTimestamp, + getReadSourceForDrainBeforeCommitQuorum(*replState), IndexBuildInterceptor::DrainYieldPolicy::kNoYield)); } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 74de31defd4..f56ce964c89 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -1580,7 +1580,7 @@ std::unique_ptr<RecordStore> WiredTigerKVEngine::makeTemporaryRecordStore(Operat WT_SESSION* session = wtSession.getSession(); LOGV2_DEBUG(22337, 2, - "WiredTigerKVEngine::createTemporaryRecordStore uri: {uri} config: {config}", + "WiredTigerKVEngine::makeTemporaryRecordStore", "uri"_attr = uri, "config"_attr = config); uassertStatusOK(wtRCToStatus(session->create(session, uri.c_str(), config.c_str()))); @@ -1592,8 +1592,10 @@ std::unique_ptr<RecordStore> WiredTigerKVEngine::makeTemporaryRecordStore(Operat params.isCapped = false; params.isEphemeral = _ephemeral; params.cappedCallback = nullptr; - params.sizeStorer = _sizeStorer.get(); - params.tracksSizeAdjustments = true; + // Temporary collections do not need to persist size information to the size storer. + params.sizeStorer = nullptr; + // Temporary collections do not need to reconcile collection size/counts. + params.tracksSizeAdjustments = false; params.isReadOnly = false; params.cappedMaxSize = -1; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp index 3439a78a406..83c7e0be7ed 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp @@ -859,8 +859,9 @@ WiredTigerRecordStore::WiredTigerRecordStore(WiredTigerKVEngine* kvEngine, } // If no SizeStorer is in use, start counting at zero. In practice, this will only ever be the - // the case in unit tests. Persistent size information is not required in this case. If a - // RecordStore needs persistent size information, we require it to use a SizeStorer. + // case for temporary RecordStores (those not associated with any collection) and in unit + // tests. Persistent size information is not required in either case. If a RecordStore needs + // persistent size information, we require it to use a SizeStorer. _sizeInfo = _sizeStorer ? _sizeStorer->load(_uri) : std::make_shared<WiredTigerSizeStorer::SizeInfo>(0, 0); } |