diff options
author | Gregory Wlodarek <gregory.wlodarek@mongodb.com> | 2021-05-10 18:11:58 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-06-28 19:08:48 +0000 |
commit | 7f79e08ee7a6184d6367730f93345c14c4cff1a3 (patch) | |
tree | 9865df35ea1db755e1bd11fb3914cc7f0f97a11d | |
parent | 3601e41f25233022c1ced66b091d66a4310bfe71 (diff) | |
download | mongo-7f79e08ee7a6184d6367730f93345c14c4cff1a3.tar.gz |
SERVER-56062 CappedPositionLost during an index builds collection scan phase restarts
(cherry picked from commit d7d1234dc646de99413d956b8815897270811b63)
-rw-r--r-- | jstests/noPassthrough/index_build_capped_position_lost.js | 49 | ||||
-rw-r--r-- | src/mongo/db/catalog/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/catalog/multi_index_block.cpp | 156 | ||||
-rw-r--r-- | src/mongo/db/catalog/multi_index_block.h | 9 |
4 files changed, 159 insertions, 56 deletions
diff --git a/jstests/noPassthrough/index_build_capped_position_lost.js b/jstests/noPassthrough/index_build_capped_position_lost.js new file mode 100644 index 00000000000..95b69b832ac --- /dev/null +++ b/jstests/noPassthrough/index_build_capped_position_lost.js @@ -0,0 +1,49 @@ +/** + * Capped cursors return CappedPositionLost when the document they were positioned on gets deleted. + * When this occurs during the collection scan phase of an index build, it will get restarted. + */ +(function() { +"use strict"; + +load("jstests/noPassthrough/libs/index_build.js"); + +const conn = MongoRunner.runMongod({}); + +const dbName = "test"; +const collName = "index_build_capped_position_lost"; + +const db = conn.getDB(dbName); +assert.commandWorked( + db.createCollection(collName, {capped: true, size: 1024 * 1024 * 1024, max: 5})); + +const coll = db.getCollection(collName); + +for (let i = 0; i < 5; i++) { + assert.commandWorked(coll.insert({i: i})); +} + +// Hang the collection scan phase of the index build when it's halfway finished. +assert.commandWorked(conn.adminCommand( + {configureFailPoint: "hangAfterIndexBuildOf", mode: "alwaysOn", data: {i: 2}})); + +const awaitCreateIndex = IndexBuildTest.startIndexBuild(conn, coll.getFullName(), {i: 1}); +checkLog.contains(conn, "Hanging after index build of i=2"); + +// Rollover the capped collection. +for (let i = 5; i < 10; i++) { + assert.commandWorked(coll.insert({i: i})); +} + +assert.commandWorked(conn.adminCommand({configureFailPoint: "hangAfterIndexBuildOf", mode: "off"})); + +checkLog.contains( + conn, + "index build: collection scan restarting due to 'CollectionScan died due to position in capped collection being deleted."); +checkLog.contains(conn, "index build: collection scan done. scanned 5 total records"); + +awaitCreateIndex(); + +IndexBuildTest.assertIndexes(coll, 2, ["_id_", "i_1"]); + +MongoRunner.stopMongod(conn); +}());
\ No newline at end of file diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript index 75c9a173c79..30141f97b05 100644 --- a/src/mongo/db/catalog/SConscript +++ b/src/mongo/db/catalog/SConscript @@ -302,6 +302,7 @@ env.Library( '$BUILD_DIR/mongo/db/index/index_build_interceptor', '$BUILD_DIR/mongo/db/storage/storage_options', '$BUILD_DIR/mongo/idl/server_parameter', + '$BUILD_DIR/mongo/util/log_and_backoff', ] ) diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp index 36eb9c08eb2..d01b1e92fb2 100644 --- a/src/mongo/db/catalog/multi_index_block.cpp +++ b/src/mongo/db/catalog/multi_index_block.cpp @@ -56,6 +56,7 @@ #include "mongo/util/assert_util.h" #include "mongo/util/fail_point.h" #include "mongo/util/log.h" +#include "mongo/util/log_and_backoff.h" #include "mongo/util/progress_meter.h" #include "mongo/util/quick_exit.h" #include "mongo/util/scopeguard.h" @@ -98,6 +99,19 @@ MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildOf); MONGO_FAIL_POINT_DEFINE(hangAndThenFailIndexBuild); MONGO_FAIL_POINT_DEFINE(leaveIndexBuildUnfinishedForShutdown); +namespace { + +size_t getEachIndexBuildMaxMemoryUsageBytes(size_t numIndexSpecs) { + if (numIndexSpecs == 0) { + return 0; + } + + return static_cast<std::size_t>(maxIndexBuildMemoryUsageMegabytes.load()) * 1024 * 1024 / + numIndexSpecs; +} + +} // namespace + MultiIndexBlock::~MultiIndexBlock() { invariant(_buildIsCleanedUp); } @@ -340,12 +354,8 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(OperationContext* opCtx, std::vector<BSONObj> indexInfoObjs; indexInfoObjs.reserve(indexSpecs.size()); - std::size_t eachIndexBuildMaxMemoryUsageBytes = 0; - if (!indexSpecs.empty()) { - eachIndexBuildMaxMemoryUsageBytes = - static_cast<std::size_t>(maxIndexBuildMemoryUsageMegabytes.load()) * 1024 * 1024 / - indexSpecs.size(); - } + std::size_t eachIndexBuildMaxMemoryUsageBytes = + getEachIndexBuildMaxMemoryUsageBytes(indexSpecs.size()); for (size_t i = 0; i < indexSpecs.size(); i++) { BSONObj info = indexSpecs[i]; @@ -498,10 +508,91 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(OperationContext* opCtx, "Failed index build because of failpoint 'hangAndThenFailIndexBuild'"}; } - Timer t; + // Hint to the storage engine that this collection scan should not keep data in the cache. + // Do not use read-once cursors for background builds because saveState/restoreState is called + // with every insert into the index, which resets the collection scan cursor between every call + // to getNextSnapshotted(). With read-once cursors enabled, this can evict data we may need to + // read again, incurring a significant performance penalty. + // Note: This does not apply to hybrid builds because they write keys to the external sorter. + bool readOnce = + _method != IndexBuildMethod::kBackground && useReadOnceCursorsForIndexBuilds.load(); + opCtx->recoveryUnit()->setReadOnce(readOnce); + + size_t numScanRestarts = 0; + bool restartCollectionScan = false; + do { + restartCollectionScan = false; + progress->reset(collection->numRecords(opCtx)); + Timer timer; + + Status status = _doCollectionScan(opCtx, collection, &progress); + if (status.isOK()) { + log() << "index build: collection scan done. scanned " << progress->hits() + << " total records in " << timer.seconds() << " seconds"; + break; + } + + if (status.code() == ErrorCodes::CappedPositionLost) { + // Capped cursors are invalidated when the document they were positioned on gets + // deleted. The collection scan will restart in this case. + restartCollectionScan = true; + logAndBackoff(::mongo::logger::LogComponent::kIndex, + ::mongo::logger::LogSeverity::Info(), + ++numScanRestarts, + str::stream() << "index build: collection scan restarting due to '" + << status.reason() << "'. Scanned " << progress->hits() + << " total records in " << timer.seconds() << " seconds"); + + for (auto& index : _indexes) { + index.bulk = + index.real->initiateBulk(getEachIndexBuildMaxMemoryUsageBytes(_indexes.size())); + } + } else { + return status; + } + } while (restartCollectionScan); + + if (MONGO_FAIL_POINT(leaveIndexBuildUnfinishedForShutdown)) { + log() << "Index build interrupted due to 'leaveIndexBuildUnfinishedForShutdown' failpoint. " + "Mimicing shutdown error code."; + return Status( + ErrorCodes::InterruptedAtShutdown, + "background index build interrupted due to failpoint. returning a shutdown error."); + } + + if (MONGO_FAIL_POINT(hangAfterStartingIndexBuildUnlocked)) { + // Unlock before hanging so replication recognizes we've completed. + Locker::LockSnapshot lockInfo; + invariant(opCtx->lockState()->saveLockStateAndUnlock(&lockInfo)); - unsigned long long n = 0; + log() << "Hanging index build with no locks due to " + "'hangAfterStartingIndexBuildUnlocked' failpoint"; + MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangAfterStartingIndexBuildUnlocked); + if (isBackgroundBuilding()) { + opCtx->lockState()->restoreLockState(opCtx, lockInfo); + opCtx->recoveryUnit()->abandonSnapshot(); + return Status(ErrorCodes::OperationFailed, + "background index build aborted due to failpoint"); + } else { + invariant(false, + "the hangAfterStartingIndexBuildUnlocked failpoint can't be turned off for " + "foreground index builds"); + } + } + + progress.finished(); + + Status ret = dumpInsertsFromBulk(opCtx); + if (!ret.isOK()) + return ret; + + return Status::OK(); +} + +Status MultiIndexBlock::_doCollectionScan(OperationContext* opCtx, + Collection* collection, + ProgressMeterHolder* progress) { PlanExecutor::YieldPolicy yieldPolicy; if (isBackgroundBuilding()) { yieldPolicy = PlanExecutor::YIELD_AUTO; @@ -511,16 +602,6 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(OperationContext* opCtx, auto exec = collection->makePlanExecutor(opCtx, yieldPolicy, Collection::ScanDirection::kForward); - // Hint to the storage engine that this collection scan should not keep data in the cache. - // Do not use read-once cursors for background builds because saveState/restoreState is called - // with every insert into the index, which resets the collection scan cursor between every call - // to getNextSnapshotted(). With read-once cursors enabled, this can evict data we may need to - // read again, incurring a significant performance penalty. - // Note: This does not apply to hybrid builds because they write keys to the external sorter. - bool readOnce = - _method != IndexBuildMethod::kBackground && useReadOnceCursorsForIndexBuilds.load(); - opCtx->recoveryUnit()->setReadOnce(readOnce); - Snapshotted<BSONObj> objToIndex; RecordId loc; PlanExecutor::ExecState state; @@ -546,7 +627,7 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(OperationContext* opCtx, } // Done before insert so we can retry document if it WCEs. - progress->setTotalWhileRunning(collection->numRecords(opCtx)); + progress->get()->setTotalWhileRunning(collection->numRecords(opCtx)); failPointHangDuringBuild(&hangBeforeIndexBuildOf, "before", objToIndex.value()); @@ -577,7 +658,6 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(OperationContext* opCtx, // Go to the next document progress->hit(); - n++; retries = 0; } catch (const WriteConflictException&) { // Only background builds write inside transactions, and therefore should only ever @@ -604,42 +684,6 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(OperationContext* opCtx, return exec->getMemberObjectStatus(objToIndex.value()); } - if (MONGO_FAIL_POINT(leaveIndexBuildUnfinishedForShutdown)) { - log() << "Index build interrupted due to 'leaveIndexBuildUnfinishedForShutdown' failpoint. " - "Mimicing shutdown error code."; - return Status( - ErrorCodes::InterruptedAtShutdown, - "background index build interrupted due to failpoint. returning a shutdown error."); - } - - if (MONGO_FAIL_POINT(hangAfterStartingIndexBuildUnlocked)) { - // Unlock before hanging so replication recognizes we've completed. - Locker::LockSnapshot lockInfo; - invariant(opCtx->lockState()->saveLockStateAndUnlock(&lockInfo)); - - log() << "Hanging index build with no locks due to " - "'hangAfterStartingIndexBuildUnlocked' failpoint"; - MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangAfterStartingIndexBuildUnlocked); - - if (isBackgroundBuilding()) { - opCtx->lockState()->restoreLockState(opCtx, lockInfo); - opCtx->recoveryUnit()->abandonSnapshot(); - return Status(ErrorCodes::OperationFailed, - "background index build aborted due to failpoint"); - } else { - invariant(!"the hangAfterStartingIndexBuildUnlocked failpoint can't be turned off for foreground index builds"); - } - } - - progress->finished(); - - log() << "index build: collection scan done. scanned " << n << " total records in " - << t.seconds() << " seconds"; - - Status ret = dumpInsertsFromBulk(opCtx); - if (!ret.isOK()) - return ret; - return Status::OK(); } diff --git a/src/mongo/db/catalog/multi_index_block.h b/src/mongo/db/catalog/multi_index_block.h index f456162f2e9..aadf821c2f0 100644 --- a/src/mongo/db/catalog/multi_index_block.h +++ b/src/mongo/db/catalog/multi_index_block.h @@ -56,6 +56,7 @@ class Collection; class MatchExpression; class NamespaceString; class OperationContext; +class ProgressMeterHolder; /** * Builds one or more indexes. @@ -329,6 +330,14 @@ private: */ void _setStateToAbortedIfNotCommitted(StringData reason); + /** + * Performs a collection scan on the given collection and inserts the relevant index keys into + * the external sorter. + */ + Status _doCollectionScan(OperationContext* opCtx, + Collection* collection, + ProgressMeterHolder* progress); + // Is set during init() and ensures subsequent function calls act on the same Collection. boost::optional<UUID> _collectionUUID; |