diff options
author | Gregory Noma <gregory.noma@gmail.com> | 2020-06-09 10:05:41 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-06-09 14:32:47 +0000 |
commit | 514a8301fbe6919ced991942519459854c6cc570 (patch) | |
tree | 5c21a7ee2bfb9bc1af52f7e2147c08d2e62c3eda /src | |
parent | 9c270b00716f910d683cbdfd38b0ccec4db90af1 (diff) | |
download | mongo-514a8301fbe6919ced991942519459854c6cc570.tar.gz |
SERVER-48415 Write placeholder document to internal table on clean shutdown for resumable index builds
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/catalog/index_builds_manager.cpp | 30 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_builds_manager.h | 17 | ||||
-rw-r--r-- | src/mongo/db/catalog/multi_index_block.cpp | 96 | ||||
-rw-r--r-- | src/mongo/db/catalog/multi_index_block.h | 19 | ||||
-rw-r--r-- | src/mongo/db/catalog/multi_index_block_test.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/index/duplicate_key_tracker.h | 4 | ||||
-rw-r--r-- | src/mongo/db/index/index_build_interceptor.h | 9 | ||||
-rw-r--r-- | src/mongo/db/index/skipped_record_tracker.h | 5 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/storage/kv/temporary_kv_record_store.cpp | 22 | ||||
-rw-r--r-- | src/mongo/db/storage/kv/temporary_kv_record_store.h | 10 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_engine_impl.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/storage/temporary_record_store.h | 7 |
13 files changed, 188 insertions, 42 deletions
diff --git a/src/mongo/db/catalog/index_builds_manager.cpp b/src/mongo/db/catalog/index_builds_manager.cpp index c4438db7cd8..203b9a82ffd 100644 --- a/src/mongo/db/catalog/index_builds_manager.cpp +++ b/src/mongo/db/catalog/index_builds_manager.cpp @@ -294,22 +294,36 @@ bool IndexBuildsManager::abortIndexBuild(OperationContext* opCtx, return true; } -bool IndexBuildsManager::abortIndexBuildWithoutCleanup(OperationContext* opCtx, - Collection* collection, - const UUID& buildUUID, - const std::string& reason) { +bool IndexBuildsManager::abortIndexBuildWithoutCleanupForRollback(OperationContext* opCtx, + Collection* collection, + const UUID& buildUUID, + const std::string& reason) { auto builder = _getBuilder(buildUUID); if (!builder.isOK()) { return false; } LOGV2(20347, - "Index build aborted without cleanup: {buildUUID}: {reason}", - "Index build aborted without cleanup", - "buildUUID"_attr = buildUUID, + "Index build aborted without cleanup for rollback: {uuid}: {reason}", + "Index build aborted without cleanup for rollback", + logAttrs(buildUUID), "reason"_attr = reason); - builder.getValue()->abortWithoutCleanup(opCtx); + builder.getValue()->abortWithoutCleanupForRollback(opCtx); + return true; +} + +bool IndexBuildsManager::abortIndexBuildWithoutCleanupForShutdown(OperationContext* opCtx, + Collection* collection, + const UUID& buildUUID) { + auto builder = _getBuilder(buildUUID); + if (!builder.isOK()) { + return false; + } + + LOGV2(4841500, "Index build aborted without cleanup for shutdown", logAttrs(buildUUID)); + + builder.getValue()->abortWithoutCleanupForShutdown(opCtx); return true; } diff --git a/src/mongo/db/catalog/index_builds_manager.h b/src/mongo/db/catalog/index_builds_manager.h index 0686bd6af81..bfcd8d77c12 100644 --- a/src/mongo/db/catalog/index_builds_manager.h +++ b/src/mongo/db/catalog/index_builds_manager.h @@ -158,10 +158,19 @@ public: * Returns true if a build existed to be signaled, as opposed to having already finished and * been cleared away, or not having yet started.. */ - bool abortIndexBuildWithoutCleanup(OperationContext* opCtx, - Collection* collection, - const UUID& buildUUID, - const std::string& reason); + bool abortIndexBuildWithoutCleanupForRollback(OperationContext* opCtx, + Collection* collection, + const UUID& buildUUID, + const std::string& reason); + + /** + * The same as abortIndexBuildWithoutCleanupForRollback above, but additionally writes the + * current state of the index build to disk if the specified index build is a two-phase hybrid + * index build and resumable index builds are supported. + */ + bool abortIndexBuildWithoutCleanupForShutdown(OperationContext* opCtx, + Collection* collection, + const UUID& buildUUID); /** * Returns true if the index build supports background writes while building an index. This is diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp index 41eec009205..ffdee34d949 100644 --- a/src/mongo/db/catalog/multi_index_block.cpp +++ b/src/mongo/db/catalog/multi_index_block.cpp @@ -611,20 +611,12 @@ Status MultiIndexBlock::checkConstraints(OperationContext* opCtx) { return Status::OK(); } -void MultiIndexBlock::abortWithoutCleanup(OperationContext* opCtx) { - invariant(!_buildIsCleanedUp); - UninterruptibleLockGuard noInterrupt(opCtx->lockState()); - // Lock if it's not already locked, to ensure storage engine cannot be destructed out from - // underneath us. - boost::optional<Lock::GlobalLock> lk; - if (!opCtx->lockState()->isWriteLocked()) { - lk.emplace(opCtx, MODE_IS); - } +void MultiIndexBlock::abortWithoutCleanupForRollback(OperationContext* opCtx) { + _abortWithoutCleanup(opCtx, false /* shutdown */); +} - for (auto& index : _indexes) { - index.block->deleteTemporaryTables(opCtx); - } - _buildIsCleanedUp = true; +void MultiIndexBlock::abortWithoutCleanupForShutdown(OperationContext* opCtx) { + _abortWithoutCleanup(opCtx, true /* shutdown */); } MultiIndexBlock::OnCreateEachFn MultiIndexBlock::kNoopOnCreateEachFn = [](const BSONObj& spec) {}; @@ -697,4 +689,82 @@ void MultiIndexBlock::setIndexBuildMethod(IndexBuildMethod indexBuildMethod) { _method = indexBuildMethod; } +void MultiIndexBlock::_abortWithoutCleanup(OperationContext* opCtx, bool shutdown) { + invariant(!_buildIsCleanedUp); + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + // Lock if it's not already locked, to ensure storage engine cannot be destructed out from + // underneath us. + boost::optional<Lock::GlobalLock> lk; + if (!opCtx->lockState()->isWriteLocked()) { + lk.emplace(opCtx, MODE_IX); + } + + if (shutdown && _buildUUID && _method == IndexBuildMethod::kHybrid && + opCtx->getServiceContext()->getStorageEngine()->supportsResumableIndexBuilds()) { + _writeStateToDisk(opCtx); + } + + for (auto& index : _indexes) { + index.block->deleteTemporaryTables(opCtx); + } + + _buildIsCleanedUp = true; +} + +void MultiIndexBlock::_writeStateToDisk(OperationContext* opCtx) const { + BSONObjBuilder builder; + _buildUUID->appendToBuilder(&builder, "_id"); + builder.append("phase", "unknown"); + builder.appendNumber("collectionScanPosition", 0LL); + + BSONArrayBuilder indexesArray(builder.subarrayStart("indexes")); + for (const auto& index : _indexes) { + BSONObjBuilder indexInfo(indexesArray.subobjStart()); + indexInfo.append("tempDir", ""); + indexInfo.append("fileName", ""); + + BSONArrayBuilder ranges(indexInfo.subarrayStart("ranges")); + ranges.done(); + + auto indexBuildInterceptor = index.block->getEntry()->indexBuildInterceptor(); + indexInfo.append("sideWritesTable", indexBuildInterceptor->getSideWritesTableIdent()); + + if (auto duplicateKeyTrackerTableIdent = + indexBuildInterceptor->getDuplicateKeyTrackerTableIdent()) + indexInfo.append("dupKeyTempTable", *duplicateKeyTrackerTableIdent); + + if (auto skippedRecordTrackerTableIdent = + indexBuildInterceptor->getSkippedRecordTracker()->getTableIdent()) + indexInfo.append("skippedRecordTrackerTable", *skippedRecordTrackerTableIdent); + + indexInfo.done(); + } + indexesArray.done(); + + auto obj = builder.done(); + auto rs = opCtx->getServiceContext()->getStorageEngine()->makeTemporaryRecordStore(opCtx); + + WriteUnitOfWork wuow(opCtx); + + auto status = rs->rs()->insertRecord(opCtx, obj.objdata(), obj.objsize(), Timestamp()); + if (!status.isOK()) { + LOGV2_ERROR(4841501, + "Failed to write resumable index build state to disk", + logAttrs(*_buildUUID), + "error"_attr = status.getStatus()); + dassert(status, + str::stream() << "Failed to write resumable index build state to disk. UUID: " + << *_buildUUID); + + rs->deleteTemporaryTable(opCtx); + return; + } + + wuow.commit(); + + LOGV2(4841502, "Wrote resumable index build state to disk", logAttrs(*_buildUUID)); + + rs->keepTemporaryTable(); +} + } // namespace mongo diff --git a/src/mongo/db/catalog/multi_index_block.h b/src/mongo/db/catalog/multi_index_block.h index e6fa117711d..dd29e02f235 100644 --- a/src/mongo/db/catalog/multi_index_block.h +++ b/src/mongo/db/catalog/multi_index_block.h @@ -261,9 +261,20 @@ public: * not perform any storage engine writes. May delete internal tables, but this is not * transactional. * - * This should only be used during shutdown or rollback. + * This should only be used during rollback. */ - void abortWithoutCleanup(OperationContext* opCtx); + void abortWithoutCleanupForRollback(OperationContext* opCtx); + + /** + * May be called at any time after construction but before a successful commit(). Suppresses + * the default behavior on destruction of removing all traces of uncommitted index builds. If + * this is a two-phase hybrid index build and resumable index builds are supported, writes the + * current state of the index build to disk using the storage engine. May delete internal + * tables, but this is not transactional. + * + * This should only be used during shutdown. + */ + void abortWithoutCleanupForShutdown(OperationContext* opCtx); /** * Returns true if this build block supports background writes while building an index. This is @@ -284,6 +295,10 @@ private: InsertDeleteOptions options; }; + void _abortWithoutCleanup(OperationContext* opCtx, bool shutdown); + + void _writeStateToDisk(OperationContext* opCtx) const; + // Is set during init() and ensures subsequent function calls act on the same Collection. boost::optional<UUID> _collectionUUID; diff --git a/src/mongo/db/catalog/multi_index_block_test.cpp b/src/mongo/db/catalog/multi_index_block_test.cpp index 3f0615b14aa..a49464eef01 100644 --- a/src/mongo/db/catalog/multi_index_block_test.cpp +++ b/src/mongo/db/catalog/multi_index_block_test.cpp @@ -156,7 +156,7 @@ TEST_F(MultiIndexBlockTest, AbortWithoutCleanupAfterInsertingSingleDocument) { getOpCtx(), getCollection(), std::vector<BSONObj>(), MultiIndexBlock::kNoopOnInitFn)); ASSERT_EQUALS(0U, specs.size()); ASSERT_OK(indexer->insert(getOpCtx(), {}, {})); - indexer->abortWithoutCleanup(getOpCtx()); + indexer->abortWithoutCleanupForRollback(getOpCtx()); } } // namespace diff --git a/src/mongo/db/index/duplicate_key_tracker.h b/src/mongo/db/index/duplicate_key_tracker.h index 5034bca4c02..122a8530cea 100644 --- a/src/mongo/db/index/duplicate_key_tracker.h +++ b/src/mongo/db/index/duplicate_key_tracker.h @@ -76,6 +76,10 @@ public: */ Status checkConstraints(OperationContext* opCtx) const; + std::string getTableIdent() const { + return _keyConstraintsTable->rs()->getIdent(); + } + private: const IndexCatalogEntry* _indexCatalogEntry; diff --git a/src/mongo/db/index/index_build_interceptor.h b/src/mongo/db/index/index_build_interceptor.h index 28c4c648a77..d4c8c15daee 100644 --- a/src/mongo/db/index/index_build_interceptor.h +++ b/src/mongo/db/index/index_build_interceptor.h @@ -147,6 +147,15 @@ public: */ boost::optional<MultikeyPaths> getMultikeyPaths() const; + std::string getSideWritesTableIdent() const { + return _sideWritesTable->rs()->getIdent(); + } + + boost::optional<std::string> getDuplicateKeyTrackerTableIdent() const { + return _duplicateKeyTracker ? boost::make_optional(_duplicateKeyTracker->getTableIdent()) + : boost::none; + } + private: using SideWriteRecord = std::pair<RecordId, BSONObj>; diff --git a/src/mongo/db/index/skipped_record_tracker.h b/src/mongo/db/index/skipped_record_tracker.h index 2858e3305e5..f37694d0572 100644 --- a/src/mongo/db/index/skipped_record_tracker.h +++ b/src/mongo/db/index/skipped_record_tracker.h @@ -73,6 +73,11 @@ public: */ Status retrySkippedRecords(OperationContext* opCtx, const Collection* collection); + boost::optional<std::string> getTableIdent() const { + return _skippedRecordsTable ? boost::make_optional(_skippedRecordsTable->rs()->getIdent()) + : boost::none; + } + private: IndexCatalogEntry* _indexCatalogEntry; diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 3d4869d512c..3969e1c6ce0 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -1169,7 +1169,7 @@ void IndexBuildsCoordinator::_completeAbort(OperationContext* opCtx, case IndexBuildAction::kRollbackAbort: { invariant(replState->protocol == IndexBuildProtocol::kTwoPhase); invariant(replCoord->getMemberState().rollback()); - _indexBuildsManager.abortIndexBuildWithoutCleanup( + _indexBuildsManager.abortIndexBuildWithoutCleanupForRollback( opCtx, coll, replState->buildUUID, reason.reason()); break; } @@ -1203,8 +1203,8 @@ void IndexBuildsCoordinator::_completeAbortForShutdown( std::shared_ptr<ReplIndexBuildState> replState, Collection* collection) { // Leave it as-if kill -9 happened. Startup recovery will restart the index build. - _indexBuildsManager.abortIndexBuildWithoutCleanup( - opCtx, collection, replState->buildUUID, "shutting down"); + _indexBuildsManager.abortIndexBuildWithoutCleanupForShutdown( + opCtx, collection, replState->buildUUID); { // Promise should be set at least once before it's getting destroyed. diff --git a/src/mongo/db/storage/kv/temporary_kv_record_store.cpp b/src/mongo/db/storage/kv/temporary_kv_record_store.cpp index e92f24bf7bc..e1932356163 100644 --- a/src/mongo/db/storage/kv/temporary_kv_record_store.cpp +++ b/src/mongo/db/storage/kv/temporary_kv_record_store.cpp @@ -35,25 +35,37 @@ #include "mongo/db/operation_context.h" #include "mongo/db/storage/kv/kv_engine.h" +#include "mongo/logv2/log.h" #include "mongo/util/assert_util.h" #include "mongo/util/str.h" namespace mongo { TemporaryKVRecordStore::~TemporaryKVRecordStore() { - invariant(_recordStoreHasBeenDeleted); + invariant(_recordStoreHasBeenDeletedOrKept); } void TemporaryKVRecordStore::deleteTemporaryTable(OperationContext* opCtx) { + invariant(!_recordStoreHasBeenDeletedOrKept); + // Need at least Global IS before calling into the storage engine, to protect against it being // destructed while we're using it. invariant(opCtx->lockState()->isReadLocked()); auto status = _kvEngine->dropIdent(opCtx, opCtx->recoveryUnit(), _rs->getIdent()); - fassert( - 51032, - status.withContext(str::stream() << "failed to drop temporary ident: " << _rs->getIdent())); - _recordStoreHasBeenDeleted = true; + + if (!status.isOK()) { + LOGV2_ERROR(4841503, "Failed to drop temporary table", "ident"_attr = _rs->getIdent()); + } + dassert(status, str::stream() << "Failed to drop temporary table. Ident: " << _rs->getIdent()); + + _recordStoreHasBeenDeletedOrKept = true; +} + +void TemporaryKVRecordStore::keepTemporaryTable() { + invariant(!_recordStoreHasBeenDeletedOrKept); + + _recordStoreHasBeenDeletedOrKept = true; } } // namespace mongo diff --git a/src/mongo/db/storage/kv/temporary_kv_record_store.h b/src/mongo/db/storage/kv/temporary_kv_record_store.h index f4b7c6033bd..27a2844b95d 100644 --- a/src/mongo/db/storage/kv/temporary_kv_record_store.h +++ b/src/mongo/db/storage/kv/temporary_kv_record_store.h @@ -39,8 +39,6 @@ class OperationContext; /** * Implementation of TemporaryRecordStore that manages a temporary RecordStore on a KVEngine. - * - * deleteTemporaryTable() must be called before destruction to delete the underlying RecordStore. */ class TemporaryKVRecordStore : public TemporaryRecordStore { public: @@ -62,9 +60,15 @@ public: */ void deleteTemporaryTable(OperationContext* opCtx); + /** + * Keeps the persisted record store. This should be used for temporary tables that need to be + * be kept across shutdown. + */ + void keepTemporaryTable(); + private: KVEngine* _kvEngine; - bool _recordStoreHasBeenDeleted = false; + bool _recordStoreHasBeenDeletedOrKept = false; }; } // namespace mongo diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp index c353259901d..8a8a1219d8e 100644 --- a/src/mongo/db/storage/storage_engine_impl.cpp +++ b/src/mongo/db/storage/storage_engine_impl.cpp @@ -860,7 +860,8 @@ bool StorageEngineImpl::supportsResumableIndexBuilds() const { return enableResumableIndexBuilds && supportsReadConcernMajority() && !isEphemeral() && serverGlobalParams.featureCompatibility.isVersionInitialized() && serverGlobalParams.featureCompatibility.getVersion() == - ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo46; + ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo46 && + !repl::ReplSettings::shouldRecoverFromOplogAsStandalone(); } bool StorageEngineImpl::supportsPendingDrops() const { diff --git a/src/mongo/db/storage/temporary_record_store.h b/src/mongo/db/storage/temporary_record_store.h index 21859790415..697148c45ab 100644 --- a/src/mongo/db/storage/temporary_record_store.h +++ b/src/mongo/db/storage/temporary_record_store.h @@ -36,8 +36,9 @@ namespace mongo { /** * Manages the lifetime of a temporary RecordStore. * - * Derived classes must implement and call deleteTemporaryTable() to delete the underlying - * RecordStore from the storage engine. + * Derived classes must implement at least one of deleteTemporaryTable(), to delete the underlying + * RecordStore from the storage engine, and keepTemporaryTable(), to keep the underlying + * RecordStore. Exactly one of these functions must be called before destruction. */ class TemporaryRecordStore { public: @@ -54,6 +55,8 @@ public: virtual void deleteTemporaryTable(OperationContext* opCtx) {} + virtual void keepTemporaryTable() {} + RecordStore* rs() { return _rs.get(); } |