diff options
author | Louis Williams <louis.williams@mongodb.com> | 2019-01-02 16:48:56 -0500 |
---|---|---|
committer | Louis Williams <louis.williams@mongodb.com> | 2019-01-17 11:35:32 -0500 |
commit | e12dcc7fdbdb44fb7806dfb42a49bd740f361d82 (patch) | |
tree | d23cdd47b52a10b5100598a8f6464febf8ec9b80 /src | |
parent | 7edc50cf214893688eb8432619e4a8bba18d107b (diff) | |
download | mongo-e12dcc7fdbdb44fb7806dfb42a49bd740f361d82.tar.gz |
SERVER-37270 Remove foreground index builds by default
Diffstat (limited to 'src')
36 files changed, 330 insertions, 278 deletions
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript index aa207774bc4..1d1a5868aae 100644 --- a/src/mongo/db/catalog/SConscript +++ b/src/mongo/db/catalog/SConscript @@ -249,6 +249,7 @@ env.Library( ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/index/index_build_interceptor', + '$BUILD_DIR/mongo/db/storage/storage_options', '$BUILD_DIR/mongo/idl/server_parameter', ] ) diff --git a/src/mongo/db/catalog/collection_compact.cpp b/src/mongo/db/catalog/collection_compact.cpp index 6170ac334d1..cb5e854c11a 100644 --- a/src/mongo/db/catalog/collection_compact.cpp +++ b/src/mongo/db/catalog/collection_compact.cpp @@ -118,7 +118,6 @@ StatusWith<CompactStats> compactCollection(OperationContext* opCtx, CompactStats stats; MultiIndexBlock indexer(opCtx, collection); - indexer.allowInterruption(); indexer.ignoreUniqueConstraint(); // in compact we should be doing no checking Status status = indexer.init(indexSpecs).getStatus(); diff --git a/src/mongo/db/catalog/database_test.cpp b/src/mongo/db/catalog/database_test.cpp index 0e0205a0380..2237571c7e6 100644 --- a/src/mongo/db/catalog/database_test.cpp +++ b/src/mongo/db/catalog/database_test.cpp @@ -308,7 +308,8 @@ void _testDropCollectionThrowsExceptionIfThereAreIndexesInProgress(OperationCont << "ns" << nss.ns()); - auto indexBuildBlock = indexCatalog->createIndexBuildBlock(opCtx, indexInfoObj); + auto indexBuildBlock = + indexCatalog->createIndexBuildBlock(opCtx, indexInfoObj, IndexBuildMethod::kHybrid); { WriteUnitOfWork wuow(opCtx); ASSERT_OK(indexBuildBlock->init()); diff --git a/src/mongo/db/catalog/index_build_block.cpp b/src/mongo/db/catalog/index_build_block.cpp index 270b2f2d12b..51294456a36 100644 --- a/src/mongo/db/catalog/index_build_block.cpp +++ b/src/mongo/db/catalog/index_build_block.cpp @@ -49,11 +49,13 @@ namespace mongo { IndexCatalogImpl::IndexBuildBlock::IndexBuildBlock(OperationContext* opCtx, Collection* collection, IndexCatalogImpl* catalog, - const BSONObj& spec) + const BSONObj& spec, + IndexBuildMethod method) : _collection(collection), _catalog(catalog), _ns(_collection->ns().ns()), _spec(spec.getOwned()), + _method(method), _entry(nullptr), _opCtx(opCtx) { invariant(collection); @@ -71,7 +73,8 @@ Status IndexCatalogImpl::IndexBuildBlock::init() { _indexName = descriptor->indexName(); _indexNamespace = descriptor->indexNamespace(); - bool isBackgroundIndex = _spec["background"].trueValue(); + bool isBackgroundIndex = + _method == IndexBuildMethod::kHybrid || _method == IndexBuildMethod::kBackground; bool isBackgroundSecondaryBuild = false; if (auto replCoord = repl::ReplicationCoordinator::get(_opCtx)) { isBackgroundSecondaryBuild = @@ -91,15 +94,7 @@ Status IndexCatalogImpl::IndexBuildBlock::init() { _entry = _catalog->_setupInMemoryStructures( _opCtx, std::move(descriptor), initFromDisk, isReadyIndex); - // Hybrid indexes are only enabled for background indexes. - bool useHybrid = true; - // TODO: Remove when SERVER-37270 is complete. - useHybrid = useHybrid && isBackgroundIndex; - // TODO: Remove when SERVER-38550 is complete. The mobile storage engine does not suport - // dupsAllowed mode on bulk builders. - useHybrid = useHybrid && storageGlobalParams.engine != "mobile"; - - if (useHybrid) { + if (_method == IndexBuildMethod::kHybrid) { _indexBuildInterceptor = stdx::make_unique<IndexBuildInterceptor>(_opCtx, _entry); _entry->setIndexBuildInterceptor(_indexBuildInterceptor.get()); diff --git a/src/mongo/db/catalog/index_catalog.h b/src/mongo/db/catalog/index_catalog.h index e759efef753..55d5c79446a 100644 --- a/src/mongo/db/catalog/index_catalog.h +++ b/src/mongo/db/catalog/index_catalog.h @@ -54,6 +54,28 @@ struct BsonRecord { const BSONObj* docPtr; }; +enum class IndexBuildMethod { + /** + * Use a collection scan to dump all keys into an external sorter. During this process, + * concurrent client writes are accepted, and their generated keys are written into an + * interceptor. On completion, this interceptor is drained and used to verify uniqueness + * constraints on the index. + * + * This is the default for all index builds. + */ + kHybrid, + /** + * Perform a collection scan by writing each document's generated key directly into the index. + * Accept writes in the background into the index as well. + */ + kBackground, + /** + * Perform a collection scan to dump all keys into the exteral sorter, then into the index. + * During this process, callers guarantee that no writes will be accepted on this collection. + */ + kForeground, +}; + /** * The IndexCatalog is owned by the Collection and is responsible for the lookup and lifetimes of * the indexes in a collection. Every collection has exactly one instance of this class. @@ -429,7 +451,7 @@ public: * spex and OperationContext. */ virtual std::unique_ptr<IndexBuildBlockInterface> createIndexBuildBlock( - OperationContext* opCtx, const BSONObj& spec) = 0; + OperationContext* opCtx, const BSONObj& spec, IndexBuildMethod method) = 0; // public helpers diff --git a/src/mongo/db/catalog/index_catalog_entry.h b/src/mongo/db/catalog/index_catalog_entry.h index 1e52a6ac447..ef6628cfffc 100644 --- a/src/mongo/db/catalog/index_catalog_entry.h +++ b/src/mongo/db/catalog/index_catalog_entry.h @@ -75,7 +75,7 @@ public: virtual const IndexAccessMethod* accessMethod() const = 0; - virtual bool isBuilding() const = 0; + virtual bool isHybridBuilding() const = 0; virtual IndexBuildInterceptor* indexBuildInterceptor() = 0; diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.h b/src/mongo/db/catalog/index_catalog_entry_impl.h index a5a53f2dff2..7460b765b34 100644 --- a/src/mongo/db/catalog/index_catalog_entry_impl.h +++ b/src/mongo/db/catalog/index_catalog_entry_impl.h @@ -90,7 +90,7 @@ public: return _accessMethod.get(); } - bool isBuilding() const final { + bool isHybridBuilding() const final { return _indexBuildInterceptor != nullptr; } diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp index 02def7005d9..08498dd3d8b 100644 --- a/src/mongo/db/catalog/index_catalog_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_impl.cpp @@ -310,7 +310,7 @@ StatusWith<BSONObj> IndexCatalogImpl::createIndexOnEmptyCollection(OperationCont spec = statusWithSpec.getValue(); // now going to touch disk - IndexBuildBlock indexBuildBlock(opCtx, _collection, this, spec); + IndexBuildBlock indexBuildBlock(opCtx, _collection, this, spec, IndexBuildMethod::kForeground); status = indexBuildBlock.init(); if (!status.isOK()) return status; @@ -1187,11 +1187,12 @@ Status IndexCatalogImpl::_indexFilteredRecords(OperationContext* opCtx, } Status status = Status::OK(); - if (index->isBuilding()) { + if (index->isHybridBuilding()) { int64_t inserted; status = index->indexBuildInterceptor()->sideWrite(opCtx, index->accessMethod(), bsonRecord.docPtr, + options, bsonRecord.id, IndexBuildInterceptor::Op::kInsert, &inserted); @@ -1237,10 +1238,19 @@ Status IndexCatalogImpl::_unindexRecord(OperationContext* opCtx, const RecordId& loc, bool logIfError, int64_t* keysDeletedOut) { - if (index->isBuilding()) { + InsertDeleteOptions options; + prepareInsertDeleteOptions(opCtx, index->descriptor(), &options); + options.logIfError = logIfError; + + if (index->isHybridBuilding()) { int64_t removed; - auto status = index->indexBuildInterceptor()->sideWrite( - opCtx, index->accessMethod(), &obj, loc, IndexBuildInterceptor::Op::kDelete, &removed); + auto status = index->indexBuildInterceptor()->sideWrite(opCtx, + index->accessMethod(), + &obj, + options, + loc, + IndexBuildInterceptor::Op::kDelete, + &removed); if (status.isOK() && keysDeletedOut) { *keysDeletedOut += removed; } @@ -1248,10 +1258,6 @@ Status IndexCatalogImpl::_unindexRecord(OperationContext* opCtx, return status; } - InsertDeleteOptions options; - prepareInsertDeleteOptions(opCtx, index->descriptor(), &options); - options.logIfError = logIfError; - // On WiredTiger, we do blind unindexing of records for efficiency. However, when duplicates // are allowed in unique indexes, WiredTiger does not do blind unindexing, and instead confirms // that the recordid matches the element we are removing. @@ -1399,8 +1405,8 @@ Status IndexCatalogImpl::compactIndexes(OperationContext* opCtx) { } std::unique_ptr<IndexCatalog::IndexBuildBlockInterface> IndexCatalogImpl::createIndexBuildBlock( - OperationContext* opCtx, const BSONObj& spec) { - return std::make_unique<IndexBuildBlock>(opCtx, _collection, this, spec); + OperationContext* opCtx, const BSONObj& spec, IndexBuildMethod method) { + return std::make_unique<IndexBuildBlock>(opCtx, _collection, this, spec, method); } std::string::size_type IndexCatalogImpl::getLongestIndexNameLength(OperationContext* opCtx) const { diff --git a/src/mongo/db/catalog/index_catalog_impl.h b/src/mongo/db/catalog/index_catalog_impl.h index da755206ba8..36d9e8c41e4 100644 --- a/src/mongo/db/catalog/index_catalog_impl.h +++ b/src/mongo/db/catalog/index_catalog_impl.h @@ -249,7 +249,8 @@ public: IndexBuildBlock(OperationContext* opCtx, Collection* collection, IndexCatalogImpl* catalog, - const BSONObj& spec); + const BSONObj& spec, + IndexBuildMethod method); ~IndexBuildBlock(); @@ -288,6 +289,7 @@ public: const std::string _ns; BSONObj _spec; + IndexBuildMethod _method; std::string _indexName; std::string _indexNamespace; @@ -336,7 +338,7 @@ public: } std::unique_ptr<IndexCatalog::IndexBuildBlockInterface> createIndexBuildBlock( - OperationContext* opCtx, const BSONObj& spec) override; + OperationContext* opCtx, const BSONObj& spec, IndexBuildMethod method) override; std::string::size_type getLongestIndexNameLength(OperationContext* opCtx) const override; diff --git a/src/mongo/db/catalog/index_catalog_noop.h b/src/mongo/db/catalog/index_catalog_noop.h index 4331d8f4afc..0806f98773e 100644 --- a/src/mongo/db/catalog/index_catalog_noop.h +++ b/src/mongo/db/catalog/index_catalog_noop.h @@ -206,8 +206,8 @@ public: return ""; } - std::unique_ptr<IndexBuildBlockInterface> createIndexBuildBlock(OperationContext* opCtx, - const BSONObj& spec) override { + std::unique_ptr<IndexBuildBlockInterface> createIndexBuildBlock( + OperationContext* opCtx, const BSONObj& spec, IndexBuildMethod method) override { return {}; } diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp index b4fb4a481be..2dcfe29b4e3 100644 --- a/src/mongo/db/catalog/multi_index_block.cpp +++ b/src/mongo/db/catalog/multi_index_block.cpp @@ -51,6 +51,7 @@ #include "mongo/db/repl/repl_set_config.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/server_parameters.h" +#include "mongo/db/storage/storage_options.h" #include "mongo/db/storage/write_unit_of_work.h" #include "mongo/logger/redaction.h" #include "mongo/util/assert_util.h" @@ -132,12 +133,16 @@ MultiIndexBlock::~MultiIndexBlock() { } } -void MultiIndexBlock::allowBackgroundBuilding() { - _buildInBackground = true; -} +bool MultiIndexBlock::areHybridIndexBuildsEnabled() { + // The mobile storage engine does not suport dupsAllowed mode on bulk builders, which means that + // it does not support hybrid builds. See SERVER-38550 + if (storageGlobalParams.engine == "mobile") { + return false; + } -void MultiIndexBlock::allowInterruption() { - _allowInterruption = true; + // TODO: SERVER-37272 Disable hybrid if in FCV 4.0. + + return enableHybridIndexBuilds.load(); } void MultiIndexBlock::ignoreUniqueConstraint() { @@ -183,11 +188,25 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(const std::vector<BSONObj if (!status.isOK()) return status; + const bool enableHybrid = areHybridIndexBuildsEnabled(); + + // Parse the specs if this builder is not building hybrid indexes, otherwise log a message. for (size_t i = 0; i < indexSpecs.size(); i++) { BSONObj info = indexSpecs[i]; + if (enableHybrid) { + if (info["background"].isBoolean() && !info["background"].Bool()) { + log() << "ignoring obselete { background: false } index build option because all " + "indexes are built in the background with the hybrid method"; + } + continue; + } - // Any foreground indexes make all indexes be built in the foreground. - _buildInBackground = (_buildInBackground && info["background"].trueValue()); + // A single foreground build makes the entire builder foreground. + if (info["background"].trueValue() && _method != IndexBuildMethod::kForeground) { + _method = IndexBuildMethod::kBackground; + } else { + _method = IndexBuildMethod::kForeground; + } } std::vector<BSONObj> indexInfoObjs; @@ -210,7 +229,7 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(const std::vector<BSONObj indexInfoObjs.push_back(info); IndexToBuild index; - index.block = _collection->getIndexCatalog()->createIndexBuildBlock(_opCtx, info); + index.block = _collection->getIndexCatalog()->createIndexBuildBlock(_opCtx, info, _method); status = index.block->init(); if (!status.isOK()) return status; @@ -220,9 +239,9 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(const std::vector<BSONObj if (!status.isOK()) return status; - // Foreground builds and background builds using an interceptor can use the bulk builder. + // Hybrid builds and non-hybrid foreground builds use the bulk builder. const bool useBulk = - !_buildInBackground || index.block->getEntry()->indexBuildInterceptor(); + _method == IndexBuildMethod::kHybrid || _method == IndexBuildMethod::kForeground; if (useBulk) { // Bulk build process requires foreground building as it assumes nothing is changing // under it. @@ -233,16 +252,18 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(const std::vector<BSONObj _collection->getIndexCatalog()->prepareInsertDeleteOptions( _opCtx, descriptor, &index.options); - // Allow duplicates when explicitly allowed or an interceptor is installed, which will - // perform duplicate checking itself. + + // Allow duplicates when explicitly allowed or when using hybrid builds, which will perform + // duplicate checking itself. index.options.dupsAllowed = index.options.dupsAllowed || _ignoreUnique || - index.block->getEntry()->indexBuildInterceptor(); + index.block->getEntry()->isHybridBuilding(); if (_ignoreUnique) { index.options.getKeysMode = IndexAccessMethod::GetKeysMode::kRelaxConstraints; } index.options.fromIndexBuilder = true; - log() << "index build: starting on " << ns << " properties: " << descriptor->toString(); + log() << "index build: starting on " << ns << " properties: " << descriptor->toString() + << " using method: " << _method; if (index.bulk) log() << "build may temporarily use up to " << eachIndexBuildMaxMemoryUsageBytes / 1024 / 1024 << " megabytes of RAM"; @@ -255,7 +276,7 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(const std::vector<BSONObj _indexes.push_back(std::move(index)); } - if (_buildInBackground) + if (isBackgroundBuilding()) _backgroundOperation.reset(new BackgroundOperation(ns)); auto replCoord = repl::ReplicationCoordinator::get(_opCtx); @@ -321,8 +342,7 @@ Status MultiIndexBlock::insertAllDocumentsInCollection() { unsigned long long n = 0; PlanExecutor::YieldPolicy yieldPolicy; - if (_buildInBackground) { - invariant(_allowInterruption); + if (isBackgroundBuilding()) { yieldPolicy = PlanExecutor::YIELD_AUTO; } else { yieldPolicy = PlanExecutor::WRITE_CONFLICT_RETRY_ONLY; @@ -335,8 +355,9 @@ Status MultiIndexBlock::insertAllDocumentsInCollection() { // with every insert into the index, which resets the collection scan cursor between every call // to getNextSnapshotted(). With read-once cursors enabled, this can evict data we may need to // read again, incurring a significant performance penalty. - // TODO: Enable this for all index builds when SERVER-37268 is complete. - bool readOnce = !_buildInBackground && useReadOnceCursorsForIndexBuilds.load(); + // Note: This does not apply to hybrid builds because they write keys to the external sorter. + bool readOnce = + _method != IndexBuildMethod::kBackground && useReadOnceCursorsForIndexBuilds.load(); _opCtx->recoveryUnit()->setReadOnce(readOnce); Snapshotted<BSONObj> objToIndex; @@ -347,7 +368,8 @@ Status MultiIndexBlock::insertAllDocumentsInCollection() { (PlanExecutor::ADVANCED == (state = exec->getNextSnapshotted(&objToIndex, &loc))) || MONGO_FAIL_POINT(hangAfterStartingIndexBuild)) { try { - if (_allowInterruption && !_opCtx->checkForInterruptNoAssert().isOK()) + auto interruptStatus = _opCtx->checkForInterruptNoAssert(); + if (!interruptStatus.isOK()) return _opCtx->checkForInterruptNoAssert(); if (!retries && PlanExecutor::ADVANCED != state) { @@ -369,14 +391,14 @@ Status MultiIndexBlock::insertAllDocumentsInCollection() { WriteUnitOfWork wunit(_opCtx); Status ret = insert(objToIndex.value(), loc); - if (_buildInBackground) + if (_method == IndexBuildMethod::kBackground) exec->saveState(); if (!ret.isOK()) { // Fail the index build hard. return ret; } wunit.commit(); - if (_buildInBackground) { + if (_method == IndexBuildMethod::kBackground) { try { exec->restoreState(); // Handles any WCEs internally. } catch (...) { @@ -391,6 +413,10 @@ Status MultiIndexBlock::insertAllDocumentsInCollection() { n++; retries = 0; } catch (const WriteConflictException&) { + // Only background builds write inside transactions, and therefore should only ever + // generate WCEs. + invariant(_method == IndexBuildMethod::kBackground); + CurOp::get(_opCtx)->debug().additiveMetrics.incrementWriteConflicts(1); retries++; // logAndBackoff expects this to be 1 on first call. WriteConflictException::logAndBackoff( @@ -421,7 +447,7 @@ Status MultiIndexBlock::insertAllDocumentsInCollection() { "'hangAfterStartingIndexBuildUnlocked' failpoint"; MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangAfterStartingIndexBuildUnlocked); - if (_buildInBackground) { + if (isBackgroundBuilding()) { _opCtx->lockState()->restoreLockState(_opCtx, lockInfo); _opCtx->recoveryUnit()->abandonSnapshot(); return Status(ErrorCodes::OperationFailed, @@ -435,7 +461,7 @@ Status MultiIndexBlock::insertAllDocumentsInCollection() { progress->finished(); log() << "index build: collection scan done. scanned " << n << " total records in " - << t.seconds() << " secs"; + << t.seconds() << " seconds"; Status ret = dumpInsertsFromBulk(); if (!ret.isOK()) @@ -500,13 +526,17 @@ Status MultiIndexBlock::dumpInsertsFromBulk(std::set<RecordId>* dupRecords) { // when 'dupRecords' is not used because these two vectors are mutually incompatible. std::vector<BSONObj> dupKeysInserted; + // When dupRecords is passed, 'dupsAllowed' should be passed to reflect whether or not the + // index is unique. + bool dupsAllowed = (dupRecords) ? !_indexes[i].block->getEntry()->descriptor()->unique() + : _indexes[i].options.dupsAllowed; + IndexCatalogEntry* entry = _indexes[i].block->getEntry(); LOG(1) << "index build: inserting from external sorter into index: " << entry->descriptor()->indexName(); Status status = _indexes[i].real->commitBulk(_opCtx, _indexes[i].bulk.get(), - _allowInterruption, - _indexes[i].options.dupsAllowed, + dupsAllowed, dupRecords, (dupRecords) ? nullptr : &dupKeysInserted); if (!status.isOK()) { @@ -532,7 +562,7 @@ Status MultiIndexBlock::dumpInsertsFromBulk(std::set<RecordId>* dupRecords) { return Status::OK(); } -Status MultiIndexBlock::drainBackgroundWritesIfNeeded() { +Status MultiIndexBlock::drainBackgroundWrites() { if (State::kAborted == _getState()) { return {ErrorCodes::IndexBuildAborted, str::stream() << "Index build aborted: " << _abortReason @@ -678,8 +708,8 @@ void MultiIndexBlock::abort(StringData reason) { } -bool MultiIndexBlock::getBuildInBackground() const { - return _buildInBackground; +bool MultiIndexBlock::isBackgroundBuilding() const { + return _method == IndexBuildMethod::kBackground || _method == IndexBuildMethod::kHybrid; } MultiIndexBlock::State MultiIndexBlock::getState_forTest() const { @@ -755,4 +785,20 @@ std::ostream& operator<<(std::ostream& os, const MultiIndexBlock::State& state) MONGO_UNREACHABLE; } +logger::LogstreamBuilder& operator<<(logger::LogstreamBuilder& out, + const IndexBuildMethod& method) { + switch (method) { + case IndexBuildMethod::kHybrid: + out.stream() << "Hybrid"; + break; + case IndexBuildMethod::kBackground: + out.stream() << "Background"; + break; + case IndexBuildMethod::kForeground: + out.stream() << "Foreground"; + break; + } + return out; +} + } // namespace mongo diff --git a/src/mongo/db/catalog/multi_index_block.h b/src/mongo/db/catalog/multi_index_block.h index 7b5b818ee06..525a193bfd5 100644 --- a/src/mongo/db/catalog/multi_index_block.h +++ b/src/mongo/db/catalog/multi_index_block.h @@ -71,21 +71,7 @@ public: MultiIndexBlock(OperationContext* opCtx, Collection* collection); ~MultiIndexBlock(); - /** - * By default we ignore the 'background' flag in specs when building an index. If this is - * called before init(), we will build the indexes in the background as long as *all* specs - * call for background indexing. If any spec calls for foreground indexing all indexes will - * be built in the foreground, as there is no concurrency benefit to building a subset of - * indexes in the background, but there is a performance benefit to building all in the - * foreground. - */ - void allowBackgroundBuilding(); - - /** - * Call this before init() to allow the index build to be interrupted. - * This only affects builds using the insertAllDocumentsInCollection helper. - */ - void allowInterruption(); + static bool areHybridIndexBuildsEnabled(); /** * By default we enforce the 'unique' flag in specs when building an index by failing. @@ -157,7 +143,7 @@ public: * * Must not be in a WriteUnitOfWork. */ - Status drainBackgroundWritesIfNeeded(); + Status drainBackgroundWrites(); /** * Check any constraits that may have been temporarily violated during the index build for @@ -225,7 +211,11 @@ public: */ void abortWithoutCleanup(); - bool getBuildInBackground() const; + /** + * Returns true if this build block supports background writes while building an index. This is + * true for the kHybrid and kBackground methods. + */ + bool isBackgroundBuilding() const; /** * State transitions: @@ -289,8 +279,8 @@ private: Collection* _collection; OperationContext* _opCtx; - bool _buildInBackground = false; - bool _allowInterruption = false; + IndexBuildMethod _method = IndexBuildMethod::kHybrid; + bool _ignoreUnique = false; bool _needToCleanup = true; @@ -306,4 +296,5 @@ private: // The ASSERT_*() macros use this function to print the value of 'state' when the predicate fails. std::ostream& operator<<(std::ostream& os, const MultiIndexBlock::State& state); +logger::LogstreamBuilder& operator<<(logger::LogstreamBuilder& out, const IndexBuildMethod& method); } // namespace mongo diff --git a/src/mongo/db/catalog/multi_index_block.idl b/src/mongo/db/catalog/multi_index_block.idl index 3d1d4a145d7..08b183e3560 100644 --- a/src/mongo/db/catalog/multi_index_block.idl +++ b/src/mongo/db/catalog/multi_index_block.idl @@ -42,6 +42,15 @@ server_parameters: cpp_vartype: AtomicWord<bool> default: true + enableHybridIndexBuilds: + description: "Build all indexes by performing external sorting while accepting concurrent client writes" + set_at: + - runtime + - startup + cpp_varname: enableHybridIndexBuilds + cpp_vartype: AtomicWord<bool> + default: true + maxIndexBuildMemoryUsageMegabytes: description: "Limits the amount of memory that simultaneous foreground index builds on one collection may consume for the duration of the builds" set_at: diff --git a/src/mongo/db/cloner.cpp b/src/mongo/db/cloner.cpp index d248aeae42d..340a08e4252 100644 --- a/src/mongo/db/cloner.cpp +++ b/src/mongo/db/cloner.cpp @@ -403,7 +403,6 @@ void Cloner::copyIndexes(OperationContext* opCtx, // matches. It also wouldn't work on non-empty collections so we would need both // implementations anyway as long as that is supported. MultiIndexBlock indexer(opCtx, collection); - indexer.allowInterruption(); auto indexCatalog = collection->getIndexCatalog(); auto prunedIndexesToBuild = indexCatalog->removeExistingIndexes(opCtx, indexesToBuild); diff --git a/src/mongo/db/commands/create_indexes.cpp b/src/mongo/db/commands/create_indexes.cpp index ae73af62bb7..44f8bb228d3 100644 --- a/src/mongo/db/commands/create_indexes.cpp +++ b/src/mongo/db/commands/create_indexes.cpp @@ -287,8 +287,6 @@ bool runCreateIndexes(OperationContext* opCtx, MultiIndexBlock indexer(opCtx, collection); - indexer.allowBackgroundBuilding(); - indexer.allowInterruption(); const size_t origSpecsSize = specs.size(); specs = resolveDefaultsAndRemoveExistingIndexes(opCtx, collection, std::move(specs)); @@ -318,7 +316,7 @@ bool runCreateIndexes(OperationContext* opCtx, // If we're a background index, replace exclusive db lock with an intent lock, so that // other readers and writers can proceed during this phase. - if (indexer.getBuildInBackground()) { + if (indexer.isBackgroundBuilding()) { opCtx->recoveryUnit()->abandonSnapshot(); dbLock.relockWithMode(MODE_IX); } @@ -326,7 +324,7 @@ bool runCreateIndexes(OperationContext* opCtx, auto relockOnErrorGuard = makeGuard([&] { // Must have exclusive DB lock before we clean up the index build via the // destructor of 'indexer'. - if (indexer.getBuildInBackground()) { + if (indexer.isBackgroundBuilding()) { try { // This function cannot throw today, but we will preemptively prepare for // that day, to avoid data corruption due to lack of index cleanup. @@ -355,7 +353,7 @@ bool runCreateIndexes(OperationContext* opCtx, opCtx->recoveryUnit()->abandonSnapshot(); Lock::CollectionLock colLock(opCtx->lockState(), ns.ns(), MODE_IS); - uassertStatusOK(indexer.drainBackgroundWritesIfNeeded()); + uassertStatusOK(indexer.drainBackgroundWrites()); } if (MONGO_FAIL_POINT(hangAfterIndexBuildFirstDrain)) { @@ -368,7 +366,7 @@ bool runCreateIndexes(OperationContext* opCtx, opCtx->recoveryUnit()->abandonSnapshot(); Lock::CollectionLock colLock(opCtx->lockState(), ns.ns(), MODE_S); - uassertStatusOK(indexer.drainBackgroundWritesIfNeeded()); + uassertStatusOK(indexer.drainBackgroundWrites()); } if (MONGO_FAIL_POINT(hangAfterIndexBuildSecondDrain)) { @@ -379,7 +377,7 @@ bool runCreateIndexes(OperationContext* opCtx, relockOnErrorGuard.dismiss(); // Need to return db lock back to exclusive, to complete the index build. - if (indexer.getBuildInBackground()) { + if (indexer.isBackgroundBuilding()) { opCtx->recoveryUnit()->abandonSnapshot(); dbLock.relockWithMode(MODE_X); @@ -394,7 +392,7 @@ bool runCreateIndexes(OperationContext* opCtx, // Perform the third and final drain after releasing a shared lock and reacquiring an // exclusive lock on the database. - uassertStatusOK(indexer.drainBackgroundWritesIfNeeded()); + uassertStatusOK(indexer.drainBackgroundWrites()); // This is required before completion. uassertStatusOK(indexer.checkConstraints()); diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp index f078ccf4966..8555132b7ba 100644 --- a/src/mongo/db/db_raii.cpp +++ b/src/mongo/db/db_raii.cpp @@ -164,7 +164,7 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, // the PBWM lock and retry. if (lastAppliedTimestamp) { LOG(2) << "Tried reading at last-applied time: " << *lastAppliedTimestamp - << " on nss: " << nss.ns() << ", but future catalog changes are pending at time " + << " on ns: " << nss.ns() << ", but future catalog changes are pending at time " << *minSnapshot << ". Trying again without reading at last-applied time."; // Destructing the block sets _shouldConflictWithSecondaryBatchApplication back to the // previous value. If the previous value is false (because there is another @@ -172,6 +172,10 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, // does not take the PBWM lock. _shouldNotConflictWithSecondaryBatchApplicationBlock = boost::none; invariant(opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()); + + // Cannot change ReadSource while a RecoveryUnit is active, which may result from + // calling getPointInTimeReadTimestamp(). + opCtx->recoveryUnit()->abandonSnapshot(); opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); } diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index 081bb8a2ed2..952afd749db 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -183,7 +183,7 @@ Status AbstractIndexAccessMethod::insert(OperationContext* opCtx, const RecordId& loc, const InsertDeleteOptions& options, InsertResult* result) { - invariant(options.fromIndexBuilder || !_btreeState->isBuilding()); + invariant(options.fromIndexBuilder || !_btreeState->isHybridBuilding()); BSONObjSet multikeyMetadataKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); @@ -283,7 +283,7 @@ Status AbstractIndexAccessMethod::remove(OperationContext* opCtx, const RecordId& loc, const InsertDeleteOptions& options, int64_t* numDeleted) { - invariant(!_btreeState->isBuilding()); + invariant(!_btreeState->isHybridBuilding()); invariant(numDeleted); *numDeleted = 0; @@ -468,7 +468,7 @@ Status AbstractIndexAccessMethod::update(OperationContext* opCtx, const UpdateTicket& ticket, int64_t* numInserted, int64_t* numDeleted) { - invariant(!_btreeState->isBuilding()); + invariant(!_btreeState->isHybridBuilding()); invariant(ticket.newKeys.size() == ticket.oldKeys.size() + ticket.added.size() - ticket.removed.size()); invariant(numInserted); @@ -634,7 +634,6 @@ int64_t AbstractIndexAccessMethod::BulkBuilderImpl::getKeysInserted() const { Status AbstractIndexAccessMethod::commitBulk(OperationContext* opCtx, BulkBuilder* bulk, - bool mayInterrupt, bool dupsAllowed, set<RecordId>* dupRecords, std::vector<BSONObj>* dupKeysInserted) { @@ -663,9 +662,7 @@ Status AbstractIndexAccessMethod::commitBulk(OperationContext* opCtx, const Ordering ordering = Ordering::make(_descriptor->keyPattern()); while (it->more()) { - if (mayInterrupt) { - opCtx->checkForInterrupt(); - } + opCtx->checkForInterrupt(); WriteUnitOfWork wunit(opCtx); @@ -722,11 +719,11 @@ Status AbstractIndexAccessMethod::commitBulk(OperationContext* opCtx, pm.finished(); - log() << "index build: inserted keys from external sorter into index in " << timer.seconds() - << " seconds"; + log() << "index build: inserted " << bulk->getKeysInserted() + << " keys from external sorter into index in " << timer.seconds() << " seconds"; WriteUnitOfWork wunit(opCtx); - SpecialFormatInserted specialFormatInserted = builder->commit(mayInterrupt); + SpecialFormatInserted specialFormatInserted = builder->commit(true /* mayInterrupt */); // It's ok to insert KeyStrings with long TypeBits but we need to mark the feature // tracker bit so that downgrade binary which cannot read the long TypeBits fails to // start up. diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index 1edf7fdd209..8931ec64517 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -286,7 +286,6 @@ public: virtual Status commitBulk(OperationContext* opCtx, BulkBuilder* bulk, - bool mayInterrupt, bool dupsAllowed, std::set<RecordId>* dupRecords, std::vector<BSONObj>* dupKeys) = 0; @@ -520,7 +519,6 @@ public: Status commitBulk(OperationContext* opCtx, BulkBuilder* bulk, - bool mayInterrupt, bool dupsAllowed, std::set<RecordId>* dupRecords, std::vector<BSONObj>* dupKeys) final; diff --git a/src/mongo/db/index/index_build_interceptor.cpp b/src/mongo/db/index/index_build_interceptor.cpp index c1a2d3d34a6..5769ada5698 100644 --- a/src/mongo/db/index/index_build_interceptor.cpp +++ b/src/mongo/db/index/index_build_interceptor.cpp @@ -271,6 +271,7 @@ boost::optional<MultikeyPaths> IndexBuildInterceptor::getMultikeyPaths() const { Status IndexBuildInterceptor::sideWrite(OperationContext* opCtx, IndexAccessMethod* indexAccessMethod, const BSONObj* obj, + const InsertDeleteOptions& options, RecordId loc, Op op, int64_t* const numKeysOut) { @@ -281,11 +282,14 @@ Status IndexBuildInterceptor::sideWrite(OperationContext* opCtx, BSONObjSet multikeyMetadataKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); MultikeyPaths multikeyPaths; - indexAccessMethod->getKeys(*obj, - IndexAccessMethod::GetKeysMode::kEnforceConstraints, - &keys, - &multikeyMetadataKeys, - &multikeyPaths); + // Override key constraints when generating keys for removal. This is the same behavior as + // IndexAccessMethod::remove and only applies to keys that do not apply to a partial filter + // expression. + const auto getKeysMode = op == Op::kInsert + ? options.getKeysMode + : IndexAccessMethod::GetKeysMode::kRelaxConstraintsUnfiltered; + indexAccessMethod->getKeys(*obj, getKeysMode, &keys, &multikeyMetadataKeys, &multikeyPaths); + // Maintain parity with IndexAccessMethods handling of key counting. Only include // `multikeyMetadataKeys` when inserting. *numKeysOut = keys.size() + (op == Op::kInsert ? multikeyMetadataKeys.size() : 0); diff --git a/src/mongo/db/index/index_build_interceptor.h b/src/mongo/db/index/index_build_interceptor.h index fbfb8c298d2..9842e999489 100644 --- a/src/mongo/db/index/index_build_interceptor.h +++ b/src/mongo/db/index/index_build_interceptor.h @@ -63,6 +63,7 @@ public: Status sideWrite(OperationContext* opCtx, IndexAccessMethod* indexAccessMethod, const BSONObj* obj, + const InsertDeleteOptions& options, RecordId loc, Op op, int64_t* const numKeysOut); diff --git a/src/mongo/db/index_builder.cpp b/src/mongo/db/index_builder.cpp index 791179e16fb..f2895bce0c4 100644 --- a/src/mongo/db/index_builder.cpp +++ b/src/mongo/db/index_builder.cpp @@ -132,6 +132,10 @@ IndexBuilder::IndexBuilder(const BSONObj& index, IndexBuilder::~IndexBuilder() {} +bool IndexBuilder::canBuildInBackground() { + return MultiIndexBlock::areHybridIndexBuildsEnabled(); +} + std::string IndexBuilder::name() const { return _name; } @@ -168,8 +172,8 @@ void IndexBuilder::run() { // OperationContext::checkForInterrupt() will see the kill status and respond accordingly // (checkForInterrupt() will throw an exception while checkForInterruptNoAssert() returns // an error Status). - Status status = - opCtx->runWithoutInterruption([&, this] { return _build(opCtx.get(), db, true, &dlk); }); + Status status = opCtx->runWithoutInterruption( + [&, this] { return _build(opCtx.get(), db, true /* buildInBackground */, &dlk); }); if (!status.isOK()) { error() << "IndexBuilder could not build index: " << redact(status); fassert(28555, ErrorCodes::isInterruption(status.code())); @@ -177,7 +181,7 @@ void IndexBuilder::run() { } Status IndexBuilder::buildInForeground(OperationContext* opCtx, Database* db) const { - return _build(opCtx, db, false, NULL); + return _build(opCtx, db, false /*buildInBackground */, nullptr); } void IndexBuilder::waitForBgIndexStarting() { @@ -193,11 +197,11 @@ namespace { Status _failIndexBuild(OperationContext* opCtx, MultiIndexBlock& indexer, Lock::DBLock* dbLock, - Status status, - bool allowBackgroundBuilding) { + Status status) { invariant(status.code() != ErrorCodes::WriteConflict); - if (!allowBackgroundBuilding) { + // Only background builds pass a dbLock. + if (!dbLock) { return status; } @@ -219,7 +223,7 @@ Status _failIndexBuild(OperationContext* opCtx, Status IndexBuilder::_build(OperationContext* opCtx, Database* db, - bool allowBackgroundBuilding, + bool buildInBackground, Lock::DBLock* dbLock) const try { const NamespaceString ns(_index["ns"].String()); @@ -246,9 +250,13 @@ Status IndexBuilder::_build(OperationContext* opCtx, } MultiIndexBlock indexer(opCtx, coll); - indexer.allowInterruption(); - if (allowBackgroundBuilding) - indexer.allowBackgroundBuilding(); + + // Ignore uniqueness constraint violations when relaxed (on secondaries). Secondaries can + // complete index builds in the middle of batches, which creates the potential for finding + // duplicate key violations where there otherwise would be none at consistent states. + if (_indexConstraints == IndexConstraints::kRelax) { + indexer.ignoreUniqueConstraint(); + } Status status = Status::OK(); { @@ -261,27 +269,23 @@ Status IndexBuilder::_build(OperationContext* opCtx, (status == ErrorCodes::IndexOptionsConflict && _indexConstraints == IndexConstraints::kRelax)) { LOG(1) << "Ignoring indexing error: " << redact(status); - if (allowBackgroundBuilding) { - // Must set this in case anyone is waiting for this build. + + // Must set this in case anyone is waiting for this build. + if (dbLock) { _setBgIndexStarting(); } return Status::OK(); } if (!status.isOK()) { - return _failIndexBuild(opCtx, indexer, dbLock, status, allowBackgroundBuilding); + return _failIndexBuild(opCtx, indexer, dbLock, status); } - // Ignore uniqueness constraint violations when relaxed (on secondaries). Secondaries can - // complete index builds in the middle of batches, which creates the potential for finding - // duplicate key violations where there otherwise would be none at consistent states. - if (_indexConstraints == IndexConstraints::kRelax) { - indexer.ignoreUniqueConstraint(); - } + if (buildInBackground) { + invariant(dbLock); - if (allowBackgroundBuilding) { _setBgIndexStarting(); - invariant(dbLock); opCtx->recoveryUnit()->abandonSnapshot(); + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); dbLock->relockWithMode(MODE_IX); } @@ -294,45 +298,46 @@ Status IndexBuilder::_build(OperationContext* opCtx, // _failIndexBuild upgrades our database lock, so we must take care to release our Collection IX // lock first. if (!status.isOK()) { - return _failIndexBuild(opCtx, indexer, dbLock, status, allowBackgroundBuilding); + return _failIndexBuild(opCtx, indexer, dbLock, status); } { // Perform the first drain while holding an intent lock. Lock::CollectionLock collLock(opCtx->lockState(), ns.ns(), MODE_IX); - status = indexer.drainBackgroundWritesIfNeeded(); + status = indexer.drainBackgroundWrites(); } if (!status.isOK()) { - return _failIndexBuild(opCtx, indexer, dbLock, status, allowBackgroundBuilding); + return _failIndexBuild(opCtx, indexer, dbLock, status); } // Perform the second drain while stopping inserts into the collection. { Lock::CollectionLock colLock(opCtx->lockState(), ns.ns(), MODE_S); - status = indexer.drainBackgroundWritesIfNeeded(); + status = indexer.drainBackgroundWrites(); } if (!status.isOK()) { - return _failIndexBuild(opCtx, indexer, dbLock, status, allowBackgroundBuilding); + return _failIndexBuild(opCtx, indexer, dbLock, status); } - if (allowBackgroundBuilding) { + if (buildInBackground) { opCtx->recoveryUnit()->abandonSnapshot(); + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); dbLock->relockWithMode(MODE_X); } // Perform the third and final drain after releasing a shared lock and reacquiring an // exclusive lock on the database. - status = indexer.drainBackgroundWritesIfNeeded(); + status = indexer.drainBackgroundWrites(); if (!status.isOK()) { - return _failIndexBuild(opCtx, indexer, dbLock, status, allowBackgroundBuilding); + return _failIndexBuild(opCtx, indexer, dbLock, status); } // Only perform constraint checking when enforced (on primaries). if (_indexConstraints == IndexConstraints::kEnforce) { status = indexer.checkConstraints(); if (!status.isOK()) { - return _failIndexBuild(opCtx, indexer, dbLock, status, allowBackgroundBuilding); + return _failIndexBuild(opCtx, indexer, dbLock, status); } } @@ -370,10 +375,10 @@ Status IndexBuilder::_build(OperationContext* opCtx, return Status::OK(); }); if (!status.isOK()) { - return _failIndexBuild(opCtx, indexer, dbLock, status, allowBackgroundBuilding); + return _failIndexBuild(opCtx, indexer, dbLock, status); } - if (allowBackgroundBuilding) { + if (buildInBackground) { invariant(opCtx->lockState()->isDbLockedForMode(ns.db(), MODE_X), str::stream() << "Database not locked in exclusive mode after committing " "background index: " diff --git a/src/mongo/db/index_builder.h b/src/mongo/db/index_builder.h index 0f41d2af579..478083d6174 100644 --- a/src/mongo/db/index_builder.h +++ b/src/mongo/db/index_builder.h @@ -92,6 +92,9 @@ public: */ virtual std::string name() const; + /** + * Instead of building the index in a background thread, build on the current thread. + */ Status buildInForeground(OperationContext* opCtx, Database* db) const; /** @@ -101,12 +104,13 @@ public: */ static void waitForBgIndexStarting(); + static bool canBuildInBackground(); + private: Status _build(OperationContext* opCtx, Database* db, - bool allowBackgroundBuilding, + bool buildInBackground, Lock::DBLock* dbLock) const; - const BSONObj _index; const IndexConstraints _indexConstraints; const ReplicatedWrites _replicatedWrites; diff --git a/src/mongo/db/repl/collection_bulk_loader_impl.cpp b/src/mongo/db/repl/collection_bulk_loader_impl.cpp index b529bfc33be..bddf1243ec4 100644 --- a/src/mongo/db/repl/collection_bulk_loader_impl.cpp +++ b/src/mongo/db/repl/collection_bulk_loader_impl.cpp @@ -196,7 +196,6 @@ Status CollectionBulkLoaderImpl::commit() { if (!status.isOK()) { return status; } - for (auto&& it : dups) { writeConflictRetry( _opCtx.get(), "CollectionBulkLoaderImpl::commit", _nss.ns(), [this, &it] { @@ -210,6 +209,14 @@ Status CollectionBulkLoaderImpl::commit() { wunit.commit(); }); } + status = _idIndexBlock->drainBackgroundWrites(); + if (!status.isOK()) { + return status; + } + status = _idIndexBlock->checkConstraints(); + if (!status.isOK()) { + return status; + } // Commit _id index, without dups. status = writeConflictRetry( diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index 27a1f77282c..be070da1b4b 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -222,10 +222,6 @@ bool shouldBuildInForeground(OperationContext* opCtx, return true; } - if (!index["background"].trueValue()) { - return true; - } - // Primaries should build indexes in the foreground because failures cannot be handled // by the background thread. const bool isPrimary = @@ -235,6 +231,13 @@ bool shouldBuildInForeground(OperationContext* opCtx, << " in a background thread because this is a primary"; return true; } + + // Without hybrid builds enabled, indexes should build with the behavior of their specs. + bool hybrid = IndexBuilder::canBuildInBackground(); + if (!hybrid) { + return !index["background"].trueValue(); + } + return false; } @@ -273,12 +276,12 @@ void createIndexForApplyOps(OperationContext* opCtx, OpCounters* opCounters = opCtx->writesAreReplicated() ? &globalOpCounters : &replOpCounters; opCounters->gotInsert(); - const IndexBuilder::IndexConstraints constraints = + const auto constraints = ReplicationCoordinator::get(opCtx)->shouldRelaxIndexConstraints(opCtx, indexNss) ? IndexBuilder::IndexConstraints::kRelax : IndexBuilder::IndexConstraints::kEnforce; - const IndexBuilder::ReplicatedWrites replicatedWrites = opCtx->writesAreReplicated() + const auto replicatedWrites = opCtx->writesAreReplicated() ? IndexBuilder::ReplicatedWrites::kReplicated : IndexBuilder::ReplicatedWrites::kUnreplicated; diff --git a/src/mongo/db/service_context.cpp b/src/mongo/db/service_context.cpp index da1040ca6d8..f0b2dbbf44a 100644 --- a/src/mongo/db/service_context.cpp +++ b/src/mongo/db/service_context.cpp @@ -225,6 +225,9 @@ void ServiceContext::ClientDeleter::operator()(Client* client) const { { stdx::lock_guard<stdx::mutex> lk(service->_mutex); invariant(service->_clients.erase(client)); + if (service->_clients.empty()) { + service->_clientsEmptyCondVar.notify_all(); + } } onDestroy(client, service->_clientObservers); delete client; @@ -342,6 +345,14 @@ void ServiceContext::waitForStartupComplete() { _startupCompleteCondVar.wait(lk, [this] { return _startupComplete; }); } +void ServiceContext::waitForClientsToFinish() { + stdx::unique_lock<stdx::mutex> lk(_mutex); + for (const auto& client : _clients) { + log() << "Waiting for client " << client->desc() << " to exit"; + } + _clientsEmptyCondVar.wait(lk, [this] { return _clients.empty(); }); +} + void ServiceContext::notifyStartupComplete() { stdx::unique_lock<stdx::mutex> lk(_mutex); _startupComplete = true; diff --git a/src/mongo/db/service_context.h b/src/mongo/db/service_context.h index 32f2351444c..cae0c488712 100644 --- a/src/mongo/db/service_context.h +++ b/src/mongo/db/service_context.h @@ -427,6 +427,14 @@ public: */ void waitForStartupComplete(); + /** + * Wait for all clients to complete and unregister themselves. Shutting-down, unmanaged threads + * may potentially be unable to acquire the service context mutex before the service context + * itself does on destruction. + * Used for testing. + */ + void waitForClientsToFinish(); + /* * Marks initialization as complete and all transport layers as started. */ @@ -557,6 +565,7 @@ private: */ std::vector<ClientObserverHolder> _clientObservers; ClientSet _clients; + stdx::condition_variable _clientsEmptyCondVar; /** * The registered OpObserver. diff --git a/src/mongo/db/service_context_test_fixture.cpp b/src/mongo/db/service_context_test_fixture.cpp index 859c1b3a32c..9666cb770d9 100644 --- a/src/mongo/db/service_context_test_fixture.cpp +++ b/src/mongo/db/service_context_test_fixture.cpp @@ -48,6 +48,9 @@ ScopedGlobalServiceContextForTest::ScopedGlobalServiceContextForTest() { } ScopedGlobalServiceContextForTest::~ScopedGlobalServiceContextForTest() { + if (hasGlobalServiceContext()) { + getGlobalServiceContext()->waitForClientsToFinish(); + } setGlobalServiceContext({}); } diff --git a/src/mongo/db/storage/devnull/devnull_kv_engine.cpp b/src/mongo/db/storage/devnull/devnull_kv_engine.cpp index 0b2e344e84e..1764b322df8 100644 --- a/src/mongo/db/storage/devnull/devnull_kv_engine.cpp +++ b/src/mongo/db/storage/devnull/devnull_kv_engine.cpp @@ -70,7 +70,7 @@ public: } const std::string& getIdent() const override { - MONGO_UNREACHABLE; + return _ident; } virtual void setCappedCallback(CappedCallback*) {} @@ -182,6 +182,7 @@ private: CollectionOptions _options; long long _numInserts; BSONObj _dummy; + std::string _ident; }; class DevNullSortedDataBuilderInterface : public SortedDataBuilderInterface { diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h index 183b0ec857f..42f91ad0b0b 100644 --- a/src/mongo/db/storage/recovery_unit.h +++ b/src/mongo/db/storage/recovery_unit.h @@ -194,13 +194,15 @@ public: * engine's last applied timestamp. * - when using ReadSource::kAllCommittedSnapshot, the timestamp chosen using the storage * engine's all-committed timestamp. - * - when using ReadSource::kLastApplied, the last applied timestamp at which the current - * storage transaction was opened, if one is open. + * - when using ReadSource::kLastApplied, the timestamp chosen using the storage engine's last + * applied timestamp. Can return boost::none if no timestamp has been established. * - when using ReadSource::kMajorityCommitted, the majority committed timestamp chosen by the * storage engine after a transaction has been opened or after a call to * obtainMajorityCommittedSnapshot(). + * + * This may passively start a storage engine transaction to establish a read timestamp. */ - virtual boost::optional<Timestamp> getPointInTimeReadTimestamp() const { + virtual boost::optional<Timestamp> getPointInTimeReadTimestamp() { return boost::none; } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp index ddb9f38c2c2..e72af03ba2e 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp @@ -441,21 +441,43 @@ Status WiredTigerRecoveryUnit::obtainMajorityCommittedSnapshot() { return Status::OK(); } -boost::optional<Timestamp> WiredTigerRecoveryUnit::getPointInTimeReadTimestamp() const { - if (_timestampReadSource == ReadSource::kProvided || - _timestampReadSource == ReadSource::kLastAppliedSnapshot || - _timestampReadSource == ReadSource::kAllCommittedSnapshot) { +boost::optional<Timestamp> WiredTigerRecoveryUnit::getPointInTimeReadTimestamp() { + // After a ReadSource has been set on this RecoveryUnit, callers expect that this method returns + // the read timestamp that will be used for current or future transactions. Because callers use + // this timestamp to inform visiblity of operations, it is therefore necessary to open a + // transaction to establish a read timestamp, but only for ReadSources that are expected to have + // read timestamps. + if (_timestampReadSource == ReadSource::kUnset || + _timestampReadSource == ReadSource::kNoTimestamp) { + return boost::none; + } + + // This ReadSource depends on a previous call to obtainMajorityCommittedSnapshot() and does not + // require an open transaction to return a valid timestamp. + if (_timestampReadSource == ReadSource::kMajorityCommitted) { + invariant(!_majorityCommittedSnapshot.isNull()); + return _majorityCommittedSnapshot; + } + + // The read timestamp is set by the user and does not require a transaction to be open. + if (_timestampReadSource == ReadSource::kProvided) { invariant(!_readAtTimestamp.isNull()); return _readAtTimestamp; } - if (_timestampReadSource == ReadSource::kLastApplied && !_readAtTimestamp.isNull()) { + // The following ReadSources can only establish a read timestamp when a transaction is opened. + getSession(); + + if (_timestampReadSource == ReadSource::kLastAppliedSnapshot || + _timestampReadSource == ReadSource::kAllCommittedSnapshot) { + invariant(!_readAtTimestamp.isNull()); return _readAtTimestamp; } - if (_timestampReadSource == ReadSource::kMajorityCommitted) { - invariant(!_majorityCommittedSnapshot.isNull()); - return _majorityCommittedSnapshot; + // The lastApplied timestamp is not always available, so it is not possible to invariant that + // it exists as other ReadSources do. + if (_timestampReadSource == ReadSource::kLastApplied && !_readAtTimestamp.isNull()) { + return _readAtTimestamp; } return boost::none; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h index 421552c47e9..729e708702b 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h @@ -116,7 +116,7 @@ public: Status obtainMajorityCommittedSnapshot() override; - boost::optional<Timestamp> getPointInTimeReadTimestamp() const override; + boost::optional<Timestamp> getPointInTimeReadTimestamp() override; SnapshotId getSnapshotId() const override; diff --git a/src/mongo/dbtests/dbtests.cpp b/src/mongo/dbtests/dbtests.cpp index 4b00ad43143..4f1d6fe71e1 100644 --- a/src/mongo/dbtests/dbtests.cpp +++ b/src/mongo/dbtests/dbtests.cpp @@ -116,6 +116,10 @@ Status createIndexFromSpec(OperationContext* opCtx, StringData ns, const BSONObj if (!status.isOK()) { return status; } + status = indexer.checkConstraints(); + if (!status.isOK()) { + return status; + } WriteUnitOfWork wunit(opCtx); ASSERT_OK(indexer.commit()); wunit.commit(); diff --git a/src/mongo/dbtests/indexupdatetests.cpp b/src/mongo/dbtests/indexupdatetests.cpp index 040eec76c91..78959428c8f 100644 --- a/src/mongo/dbtests/indexupdatetests.cpp +++ b/src/mongo/dbtests/indexupdatetests.cpp @@ -74,11 +74,9 @@ public: protected: Status createIndex(const std::string& dbname, const BSONObj& indexSpec); - bool buildIndexInterrupted(const BSONObj& key, bool allowInterruption) { + bool buildIndexInterrupted(const BSONObj& key) { try { MultiIndexBlock indexer(&_opCtx, collection()); - if (allowInterruption) - indexer.allowInterruption(); uassertStatusOK(indexer.init(key)); uassertStatusOK(indexer.insertAllDocumentsInCollection()); @@ -130,8 +128,6 @@ public: } MultiIndexBlock indexer(&_opCtx, coll); - indexer.allowBackgroundBuilding(); - indexer.allowInterruption(); indexer.ignoreUniqueConstraint(); const BSONObj spec = BSON("name" @@ -184,8 +180,6 @@ public: } MultiIndexBlock indexer(&_opCtx, coll); - indexer.allowBackgroundBuilding(); - indexer.allowInterruption(); // indexer.ignoreUniqueConstraint(); // not calling this const BSONObj spec = BSON("name" @@ -206,16 +200,10 @@ public: coll->getIndexCatalog()->findIndexByName(&_opCtx, "a", true /* includeUnfinished */); ASSERT(desc); - Status status = indexer.insertAllDocumentsInCollection(); - if (!coll->getIndexCatalog()->getEntry(desc)->isBuilding()) { - ASSERT_EQUALS(status.code(), ErrorCodes::DuplicateKey); - return; - } - - // Hybrid index builds, with an interceptor, check duplicates explicitly. - ASSERT_OK(status); + // Hybrid index builds check duplicates explicitly. + ASSERT_OK(indexer.insertAllDocumentsInCollection()); - status = indexer.checkConstraints(); + auto status = indexer.checkConstraints(); ASSERT_EQUALS(status.code(), ErrorCodes::DuplicateKey); } }; @@ -249,7 +237,7 @@ public: << "v" << static_cast<int>(kIndexVersion)); // The call is interrupted because mayInterrupt == true. - ASSERT_TRUE(buildIndexInterrupted(indexInfo, true)); + ASSERT_TRUE(buildIndexInterrupted(indexInfo)); // only want to interrupt the index build getGlobalServiceContext()->unsetKillAllOperations(); // The new index is not listed in the index catalog because the index build failed. @@ -257,42 +245,6 @@ public: } }; -/** Index creation is not killed if mayInterrupt is false. */ -class InsertBuildIndexInterruptDisallowed : public IndexBuildBase { -public: - void run() { - // Create a new collection. - Database* db = _ctx.db(); - Collection* coll; - { - WriteUnitOfWork wunit(&_opCtx); - db->dropCollection(&_opCtx, _ns).transitional_ignore(); - coll = db->createCollection(&_opCtx, _ns); - coll->getIndexCatalog()->dropAllIndexes(&_opCtx, true); - // Insert some documents. - int32_t nDocs = 1000; - OpDebug* const nullOpDebug = nullptr; - for (int32_t i = 0; i < nDocs; ++i) { - coll->insertDocument(&_opCtx, InsertStatement(BSON("a" << i)), nullOpDebug, true) - .transitional_ignore(); - } - wunit.commit(); - } - // Request an interrupt. - getGlobalServiceContext()->setKillAllOperations(); - BSONObj indexInfo = BSON("key" << BSON("a" << 1) << "ns" << _ns << "name" - << "a_1" - << "v" - << static_cast<int>(kIndexVersion)); - // The call is not interrupted because mayInterrupt == false. - ASSERT_FALSE(buildIndexInterrupted(indexInfo, false)); - // only want to interrupt the index build - getGlobalServiceContext()->unsetKillAllOperations(); - // The new index is listed in the index catalog because the index build completed. - ASSERT(coll->getIndexCatalog()->findIndexByName(&_opCtx, "a_1")); - } -}; - /** Index creation is killed when building the _id index. */ class InsertBuildIdIndexInterrupt : public IndexBuildBase { public: @@ -328,8 +280,7 @@ public: << "_id_" << "v" << static_cast<int>(kIndexVersion)); - // The call is interrupted because mayInterrupt == true. - ASSERT_TRUE(buildIndexInterrupted(indexInfo, true)); + ASSERT_TRUE(buildIndexInterrupted(indexInfo)); // only want to interrupt the index build getGlobalServiceContext()->unsetKillAllOperations(); // The new index is not listed in the index catalog because the index build failed. @@ -337,50 +288,6 @@ public: } }; -/** Index creation is not killed when building the _id index if mayInterrupt is false. */ -class InsertBuildIdIndexInterruptDisallowed : public IndexBuildBase { -public: - void run() { - // Skip the test if the storage engine doesn't support capped collections. - if (!getGlobalServiceContext()->getStorageEngine()->supportsCappedCollections()) { - return; - } - - // Recreate the collection as capped, without an _id index. - Database* db = _ctx.db(); - Collection* coll; - { - WriteUnitOfWork wunit(&_opCtx); - db->dropCollection(&_opCtx, _ns).transitional_ignore(); - CollectionOptions options; - options.capped = true; - options.cappedSize = 10 * 1024; - coll = db->createCollection(&_opCtx, _ns, options); - coll->getIndexCatalog()->dropAllIndexes(&_opCtx, true); - // Insert some documents. - int32_t nDocs = 1000; - OpDebug* const nullOpDebug = nullptr; - for (int32_t i = 0; i < nDocs; ++i) { - coll->insertDocument(&_opCtx, InsertStatement(BSON("_id" << i)), nullOpDebug, true) - .transitional_ignore(); - } - wunit.commit(); - } - // Request an interrupt. - getGlobalServiceContext()->setKillAllOperations(); - BSONObj indexInfo = BSON("key" << BSON("_id" << 1) << "ns" << _ns << "name" - << "_id_" - << "v" - << static_cast<int>(kIndexVersion)); - // The call is not interrupted because mayInterrupt == false. - ASSERT_FALSE(buildIndexInterrupted(indexInfo, false)); - // only want to interrupt the index build - getGlobalServiceContext()->unsetKillAllOperations(); - // The new index is listed in the index catalog because the index build succeeded. - ASSERT(coll->getIndexCatalog()->findIndexByName(&_opCtx, "_id_")); - } -}; - Status IndexBuildBase::createIndex(const std::string& dbname, const BSONObj& indexSpec) { MultiIndexBlock indexer(&_opCtx, collection()); Status status = indexer.init(indexSpec).getStatus(); @@ -777,13 +684,11 @@ public: // tests are disabled. add<InsertBuildIgnoreUnique<true>>(); add<InsertBuildIgnoreUnique<false>>(); + add<InsertBuildEnforceUnique<true>>(); + add<InsertBuildEnforceUnique<false>>(); } - add<InsertBuildEnforceUnique<true>>(); - add<InsertBuildEnforceUnique<false>>(); add<InsertBuildIndexInterrupt>(); - add<InsertBuildIndexInterruptDisallowed>(); add<InsertBuildIdIndexInterrupt>(); - add<InsertBuildIdIndexInterruptDisallowed>(); add<SameSpecDifferentOption>(); add<SameSpecSameOptions>(); add<DifferentSpecSameName>(); diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp index a3b701d51e9..f9c38721776 100644 --- a/src/mongo/dbtests/storage_timestamp_tests.cpp +++ b/src/mongo/dbtests/storage_timestamp_tests.cpp @@ -2255,10 +2255,9 @@ public: * indexes. Specifically, when a primary builds an index from an oplog entry which can happen on * primary catch-up, drain, a secondary step-up or `applyOps`. * - * This test will exercise IndexBuilder code on primaries by performing a background index build - * via an `applyOps` command. + * This test will exercise IndexBuilder code on primaries by performing an index build via an + * `applyOps` command. */ -template <bool Foreground> class TimestampIndexBuilderOnPrimary : public StorageTimestampTest { public: void run() { @@ -2295,9 +2294,8 @@ public: } { - // Create a background index via `applyOps`. We will timestamp the beginning at - // `startBuildTs` and the end, due to manipulation of the logical clock, should be - // timestamped at `endBuildTs`. + // Create an index via `applyOps`. Because this is a primary, the index build is + // timestamped with `startBuildTs`. const auto beforeBuildTime = _clock->reserveTicks(2); const auto startBuildTs = beforeBuildTime.addTicks(1).asTimestamp(); @@ -2316,9 +2314,7 @@ public: << "key" << BSON("field" << 1) << "name" - << "field_1" - << "background" - << (Foreground ? false : true)); + << "field_1"); auto createIndexOp = BSON("ts" << startBuildTs << "t" << 1LL << "h" << 0xBEEFBEEFLL << "v" << 2 << "op" @@ -2339,7 +2335,8 @@ public: kvCatalog, "", indexIdent, beforeBuildTime.asTimestamp()); assertIdentsExistAtTimestamp(kvCatalog, "", indexIdent, startBuildTs); - // In all cases, the index build should start and finish at `startBuildTs`. + // On a primary, the index build should start and finish at `startBuildTs` because it is + // built in the foreground. ASSERT_TRUE( getIndexMetaData(getMetaDataAtTime(kvCatalog, nss, startBuildTs), "field_1").ready); } @@ -2705,9 +2702,7 @@ public: add<TimestampMultiIndexBuilds>(); add<TimestampMultiIndexBuildsDuringRename>(); add<TimestampIndexDrops>(); - // TimestampIndexBuilderOnPrimary<Background> - add<TimestampIndexBuilderOnPrimary<false>>(); - add<TimestampIndexBuilderOnPrimary<true>>(); + add<TimestampIndexBuilderOnPrimary>(); add<SecondaryReadsDuringBatchApplicationAreAllowed>(); add<ViewCreationSeparateTransaction>(); add<CreateCollectionWithSystemIndex>(); diff --git a/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp b/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp index 3b0456a1224..6e1743a0263 100644 --- a/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp +++ b/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp @@ -195,8 +195,6 @@ protected: auto coll = autoColl.getCollection(); MultiIndexBlock indexer(opCtx(), coll); - indexer.allowBackgroundBuilding(); - indexer.allowInterruption(); // Initialize the index builder and add all documents currently in the collection. ASSERT_OK(indexer.init(indexSpec).getStatus()); diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js index 6783114e252..8f1522b7618 100644 --- a/src/mongo/shell/replsettest.js +++ b/src/mongo/shell/replsettest.js @@ -1482,6 +1482,16 @@ var ReplSetTest = function(opts) { }, "awaiting replication", timeout); }; + // TODO: SERVER-38961 Remove when simultaneous index builds complete. + this.waitForAllIndexBuildsToFinish = function(dbName, collName) { + // Run a no-op command and wait for it to be applied on secondaries. Due to the asynchronous + // completion nature of indexes on secondaries, we can guarantee an index build is complete + // on all secondaries once all secondaries have applied this collMod command. + assert.commandWorked(this.getPrimary().getDB(dbName).runCommand( + {collMod: collName, usePowerOf2Sizes: true})); + this.awaitReplication(); + }; + this.getHashesUsingSessions = function(sessions, dbName, { filterCapped: filterCapped = true, filterMapReduce: filterMapReduce = true, |