/** * Copyright (C) 2018-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #include "mongo/platform/basic.h" #include "mongo/db/index/btree_access_method.h" #include #include #include "mongo/base/error_codes.h" #include "mongo/base/status.h" #include "mongo/db/catalog/index_catalog.h" #include "mongo/db/catalog/index_consistency.h" #include "mongo/db/client.h" #include "mongo/db/commands/server_status.h" #include "mongo/db/concurrency/exception_util.h" #include "mongo/db/curop.h" #include "mongo/db/index/bulk_builder_common.h" #include "mongo/db/index/index_build_interceptor.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/jsobj.h" #include "mongo/db/keypattern.h" #include "mongo/db/operation_context.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/timestamp_block.h" #include "mongo/db/storage/execution_context.h" #include "mongo/db/storage/storage_options.h" #include "mongo/logv2/log.h" #include "mongo/platform/atomic_word.h" #include "mongo/util/progress_meter.h" #include "mongo/util/scopeguard.h" #include "mongo/util/stacktrace.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kCommand namespace mongo { using std::pair; using IndexVersion = IndexDescriptor::IndexVersion; MONGO_FAIL_POINT_DEFINE(hangIndexBuildDuringBulkLoadPhase); MONGO_FAIL_POINT_DEFINE(hangIndexBuildDuringBulkLoadPhaseSecond); MONGO_FAIL_POINT_DEFINE(hangDuringIndexBuildBulkLoadYield); MONGO_FAIL_POINT_DEFINE(hangDuringIndexBuildBulkLoadYieldSecond); namespace { /** * Metrics for index bulk builder operations. Intended to support index build diagnostics * during the following scenarios: * - createIndex commands; * - collection cloning during initial sync; and * - resuming index builds at startup. * * Also includes statistics for disk usage (by the external sorter) for index builds that * do not fit in memory. */ class IndexBulkBuilderSSS : public ServerStatusSection { public: IndexBulkBuilderSSS() : ServerStatusSection("indexBulkBuilder") {} bool includeByDefault() const final { return true; } void addRequiredPrivileges(std::vector* out) final {} BSONObj generateSection(OperationContext* opCtx, const BSONElement& configElement) const final { BSONObjBuilder builder; builder.append("count", count.loadRelaxed()); builder.append("resumed", resumed.loadRelaxed()); builder.append("filesOpenedForExternalSort", sorterFileStats.opened.loadRelaxed()); builder.append("filesClosedForExternalSort", sorterFileStats.closed.loadRelaxed()); builder.append("spilledRanges", sorterTracker.spilledRanges.loadRelaxed()); builder.append("bytesSpilled", sorterTracker.bytesSpilled.loadRelaxed()); return builder.obj(); } // Number of instances of the bulk builder created. AtomicWord count; // Number of times the bulk builder was created for a resumable index build. // This value should not exceed 'count'. AtomicWord resumed; // Sorter statistics that are aggregate of all sorters. SorterTracker sorterTracker; // Number of times the external sorter opened/closed a file handle to spill data to disk. // This pair of counters in aggregate indicate the number of open file handles used by // the external sorter and may be useful in diagnosing situations where the process is // close to exhausting this finite resource. SorterFileStats sorterFileStats = {&sorterTracker}; } indexBulkBuilderSSS; /** * Returns true if at least one prefix of any of the indexed fields causes the index to be * multikey, and returns false otherwise. This function returns false if the 'multikeyPaths' * vector is empty. */ bool isMultikeyFromPaths(const MultikeyPaths& multikeyPaths) { return std::any_of(multikeyPaths.cbegin(), multikeyPaths.cend(), [](const MultikeyComponents& components) { return !components.empty(); }); } SortOptions makeSortOptions(size_t maxMemoryUsageBytes, StringData dbName, SorterFileStats* stats) { return SortOptions() .TempDir(storageGlobalParams.dbpath + "/_tmp") .ExtSortAllowed() .MaxMemoryUsageBytes(maxMemoryUsageBytes) .FileStats(stats) .Tracker(&indexBulkBuilderSSS.sorterTracker) .DBName(dbName.toString()); } MultikeyPaths createMultikeyPaths(const std::vector& multikeyPathsVec) { MultikeyPaths multikeyPaths; for (const auto& multikeyPath : multikeyPathsVec) { multikeyPaths.emplace_back(boost::container::ordered_unique_range_t(), multikeyPath.getMultikeyComponents().begin(), multikeyPath.getMultikeyComponents().end()); } return multikeyPaths; } } // namespace struct BtreeExternalSortComparison { typedef std::pair Data; int operator()(const Data& l, const Data& r) const { return l.first.compare(r.first); } }; SortedDataIndexAccessMethod::SortedDataIndexAccessMethod(const IndexCatalogEntry* btreeState, std::unique_ptr btree) : _indexCatalogEntry(btreeState), _descriptor(btreeState->descriptor()), _newInterface(std::move(btree)) { verify(IndexDescriptor::isIndexVersionSupported(_descriptor->version())); } Status SortedDataIndexAccessMethod::insert(OperationContext* opCtx, SharedBufferFragmentBuilder& pooledBuilder, const CollectionPtr& coll, const std::vector& bsonRecords, const InsertDeleteOptions& options, int64_t* numInserted) { for (const auto& bsonRecord : bsonRecords) { invariant(bsonRecord.id != RecordId()); if (!bsonRecord.ts.isNull()) { Status status = opCtx->recoveryUnit()->setTimestamp(bsonRecord.ts); if (!status.isOK()) return status; } auto& executionCtx = StorageExecutionContext::get(opCtx); auto keys = executionCtx.keys(); auto multikeyMetadataKeys = executionCtx.multikeyMetadataKeys(); auto multikeyPaths = executionCtx.multikeyPaths(); getKeys(opCtx, coll, pooledBuilder, *bsonRecord.docPtr, options.getKeysMode, GetKeysContext::kAddingKeys, keys.get(), multikeyMetadataKeys.get(), multikeyPaths.get(), bsonRecord.id); Status status = _indexKeysOrWriteToSideTable(opCtx, coll, *keys, *multikeyMetadataKeys, *multikeyPaths, *bsonRecord.docPtr, options, numInserted); if (!status.isOK()) { return status; } } return Status::OK(); } void SortedDataIndexAccessMethod::remove(OperationContext* opCtx, SharedBufferFragmentBuilder& pooledBuilder, const CollectionPtr& coll, const BSONObj& obj, const RecordId& loc, bool logIfError, const InsertDeleteOptions& options, int64_t* numDeleted, CheckRecordId checkRecordId) { auto& executionCtx = StorageExecutionContext::get(opCtx); // There's no need to compute the prefixes of the indexed fields that cause the index to be // multikey when removing a document since the index metadata isn't updated when keys are // deleted. auto keys = executionCtx.keys(); getKeys(opCtx, coll, pooledBuilder, obj, InsertDeleteOptions::ConstraintEnforcementMode::kRelaxConstraintsUnfiltered, GetKeysContext::kRemovingKeys, keys.get(), nullptr, nullptr, loc); _unindexKeysOrWriteToSideTable( opCtx, coll->ns(), *keys, obj, logIfError, numDeleted, options, checkRecordId); } Status SortedDataIndexAccessMethod::update(OperationContext* opCtx, SharedBufferFragmentBuilder& pooledBufferBuilder, const BSONObj& oldDoc, const BSONObj& newDoc, const RecordId& loc, const CollectionPtr& coll, const InsertDeleteOptions& options, int64_t* numInserted, int64_t* numDeleted) { UpdateTicket updateTicket; prepareUpdate(opCtx, coll, oldDoc, newDoc, loc, options, &updateTicket); auto status = Status::OK(); if (_indexCatalogEntry->isHybridBuilding() || !_indexCatalogEntry->isReady(opCtx)) { bool logIfError = false; _unindexKeysOrWriteToSideTable(opCtx, coll->ns(), updateTicket.removed, oldDoc, logIfError, numDeleted, options, CheckRecordId::Off); return _indexKeysOrWriteToSideTable(opCtx, coll, updateTicket.added, updateTicket.newMultikeyMetadataKeys, updateTicket.newMultikeyPaths, newDoc, options, numInserted); } else { return doUpdate(opCtx, coll, updateTicket, numInserted, numDeleted); } } Status SortedDataIndexAccessMethod::insertKeysAndUpdateMultikeyPaths( OperationContext* opCtx, const CollectionPtr& coll, const KeyStringSet& keys, const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, const InsertDeleteOptions& options, KeyHandlerFn&& onDuplicateKey, int64_t* numInserted) { // Insert the specified data keys into the index. auto status = insertKeys(opCtx, coll, keys, options, std::move(onDuplicateKey), numInserted); if (!status.isOK()) { return status; } // If these keys should cause the index to become multikey, pass them into the catalog. if (shouldMarkIndexAsMultikey(keys.size(), multikeyMetadataKeys, multikeyPaths)) { _indexCatalogEntry->setMultikey(opCtx, coll, multikeyMetadataKeys, multikeyPaths); } // If we have some multikey metadata keys, they should have been added while marking the index // as multikey in the catalog. Add them to the count of keys inserted for completeness. if (numInserted && !multikeyMetadataKeys.empty()) { *numInserted += multikeyMetadataKeys.size(); } return Status::OK(); } Status SortedDataIndexAccessMethod::insertKeys(OperationContext* opCtx, const CollectionPtr& coll, const KeyStringSet& keys, const InsertDeleteOptions& options, KeyHandlerFn&& onDuplicateKey, int64_t* numInserted) { // Initialize the 'numInserted' out-parameter to zero in case the caller did not already do so. if (numInserted) { *numInserted = 0; } bool unique = _descriptor->unique(); bool prepareUnique = _descriptor->prepareUnique(); bool dupsAllowed; if (!_descriptor->isIdIndex() && !opCtx->isEnforcingConstraints() && coll->isIndexReady(_descriptor->indexName())) { // Oplog application should avoid checking for duplicates on unique indexes except when: // 1. Building an index. We have to use the duplicate key error to record possible // conflicts. // 2. Inserting into the '_id' index. We never allow duplicates in the '_id' index. // // Additionally, unique indexes conflict checking can cause out-of-order updates in // wiredtiger. See SERVER-59831. dupsAllowed = true; } else if (prepareUnique) { // Before the index build commits, duplicate keys are allowed to exist with the // 'prepareUnique' option. After that, duplicates are not allowed. dupsAllowed = !coll->isIndexReady(_descriptor->indexName()); } else { dupsAllowed = !unique; } // Add all new keys into the index. The RecordId for each is already encoded in the KeyString. for (const auto& keyString : keys) { auto status = _newInterface->insert(opCtx, keyString, dupsAllowed); // When duplicates are encountered and allowed, retry with dupsAllowed. Call // onDuplicateKey() with the inserted duplicate key. if (ErrorCodes::DuplicateKey == status.code() && options.dupsAllowed && !prepareUnique) { invariant(unique); status = _newInterface->insert(opCtx, keyString, true /* dupsAllowed */); if (status.isOK() && onDuplicateKey) { status = onDuplicateKey(keyString); } } if (!status.isOK()) { return status; } } if (numInserted) { *numInserted = keys.size(); } return Status::OK(); } void SortedDataIndexAccessMethod::removeOneKey(OperationContext* opCtx, const KeyString::Value& keyString, bool dupsAllowed) { try { _newInterface->unindex(opCtx, keyString, dupsAllowed); } catch (AssertionException& e) { NamespaceString ns = _indexCatalogEntry->getNSSFromCatalog(opCtx); LOGV2(20683, "Assertion failure: _unindex failed on: {namespace} for index: {indexName}. " "{error} KeyString:{keyString}", "Assertion failure: _unindex failed", "error"_attr = redact(e), "keyString"_attr = keyString, "namespace"_attr = ns, "indexName"_attr = _descriptor->indexName()); printStackTrace(); } } std::unique_ptr SortedDataIndexAccessMethod::newCursor( OperationContext* opCtx, bool isForward) const { return _newInterface->newCursor(opCtx, isForward); } Status SortedDataIndexAccessMethod::removeKeys(OperationContext* opCtx, const KeyStringSet& keys, const InsertDeleteOptions& options, int64_t* numDeleted) { for (const auto& key : keys) { removeOneKey(opCtx, key, options.dupsAllowed); } *numDeleted = keys.size(); return Status::OK(); } Status SortedDataIndexAccessMethod::initializeAsEmpty(OperationContext* opCtx) { return _newInterface->initAsEmpty(opCtx); } RecordId SortedDataIndexAccessMethod::findSingle(OperationContext* opCtx, const CollectionPtr& collection, const BSONObj& requestedKey) const { // Generate the key for this index. KeyString::Value actualKey = [&]() { if (_indexCatalogEntry->getCollator()) { // For performance, call get keys only if there is a non-simple collation. SharedBufferFragmentBuilder pooledBuilder( KeyString::HeapBuilder::kHeapAllocatorDefaultBytes); auto& executionCtx = StorageExecutionContext::get(opCtx); auto keys = executionCtx.keys(); KeyStringSet* multikeyMetadataKeys = nullptr; MultikeyPaths* multikeyPaths = nullptr; getKeys(opCtx, collection, pooledBuilder, requestedKey, InsertDeleteOptions::ConstraintEnforcementMode::kEnforceConstraints, GetKeysContext::kAddingKeys, keys.get(), multikeyMetadataKeys, multikeyPaths, boost::none /* loc */); invariant(keys->size() == 1); return *keys->begin(); } else { KeyString::HeapBuilder requestedKeyString( getSortedDataInterface()->getKeyStringVersion(), BSONObj::stripFieldNames(requestedKey), getSortedDataInterface()->getOrdering()); return requestedKeyString.release(); } }(); if (auto loc = _newInterface->findLoc(opCtx, actualKey)) { dassert(!loc->isNull()); return std::move(*loc); } return RecordId(); } void SortedDataIndexAccessMethod::validate(OperationContext* opCtx, int64_t* numKeys, IndexValidateResults* fullResults) const { long long keys = 0; _newInterface->fullValidate(opCtx, &keys, fullResults); *numKeys = keys; } bool SortedDataIndexAccessMethod::appendCustomStats(OperationContext* opCtx, BSONObjBuilder* output, double scale) const { return _newInterface->appendCustomStats(opCtx, output, scale); } long long SortedDataIndexAccessMethod::getSpaceUsedBytes(OperationContext* opCtx) const { return _newInterface->getSpaceUsedBytes(opCtx); } long long SortedDataIndexAccessMethod::getFreeStorageBytes(OperationContext* opCtx) const { return _newInterface->getFreeStorageBytes(opCtx); } pair SortedDataIndexAccessMethod::setDifference( const KeyStringSet& left, const KeyStringSet& right) { // Two iterators to traverse the two sets in sorted order. auto leftIt = left.begin(); auto rightIt = right.begin(); KeyStringSet::sequence_type onlyLeft; KeyStringSet::sequence_type onlyRight; while (leftIt != left.end() && rightIt != right.end()) { // Use compareWithTypeBits instead of the regular compare as we want just a difference in // typeinfo to also result in an index change. const int cmp = leftIt->compareWithTypeBits(*rightIt); if (cmp == 0) { ++leftIt; ++rightIt; } else if (cmp > 0) { onlyRight.push_back(*rightIt); ++rightIt; } else { onlyLeft.push_back(*leftIt); ++leftIt; } } // Add the rest of 'left' to 'onlyLeft', and the rest of 'right' to 'onlyRight', if any. onlyLeft.insert(onlyLeft.end(), leftIt, left.end()); onlyRight.insert(onlyRight.end(), rightIt, right.end()); KeyStringSet outLeft; KeyStringSet outRight; // The above algorithm guarantees that the elements are sorted and unique, so we can let the // container know so we get O(1) complexity adopting it. outLeft.adopt_sequence(boost::container::ordered_unique_range_t(), std::move(onlyLeft)); outRight.adopt_sequence(boost::container::ordered_unique_range_t(), std::move(onlyRight)); return {{std::move(outLeft)}, {std::move(outRight)}}; } void SortedDataIndexAccessMethod::prepareUpdate(OperationContext* opCtx, const CollectionPtr& collection, const BSONObj& from, const BSONObj& to, const RecordId& record, const InsertDeleteOptions& options, UpdateTicket* ticket) const { SharedBufferFragmentBuilder pooledBuilder(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes); const MatchExpression* indexFilter = _indexCatalogEntry->getFilterExpression(); if (!indexFilter || indexFilter->matchesBSON(from)) { // Override key constraints when generating keys for removal. This only applies to keys // that do not apply to a partial filter expression. const auto getKeysMode = _indexCatalogEntry->isHybridBuilding() ? InsertDeleteOptions::ConstraintEnforcementMode::kRelaxConstraintsUnfiltered : options.getKeysMode; // There's no need to compute the prefixes of the indexed fields that possibly caused the // index to be multikey when the old version of the document was written since the index // metadata isn't updated when keys are deleted. getKeys(opCtx, collection, pooledBuilder, from, getKeysMode, GetKeysContext::kRemovingKeys, &ticket->oldKeys, nullptr, nullptr, record); } if (!indexFilter || indexFilter->matchesBSON(to)) { getKeys(opCtx, collection, pooledBuilder, to, options.getKeysMode, GetKeysContext::kAddingKeys, &ticket->newKeys, &ticket->newMultikeyMetadataKeys, &ticket->newMultikeyPaths, record); } ticket->loc = record; ticket->dupsAllowed = options.dupsAllowed; std::tie(ticket->removed, ticket->added) = setDifference(ticket->oldKeys, ticket->newKeys); ticket->_isValid = true; } Status SortedDataIndexAccessMethod::doUpdate(OperationContext* opCtx, const CollectionPtr& coll, const UpdateTicket& ticket, int64_t* numInserted, int64_t* numDeleted) { invariant(!_indexCatalogEntry->isHybridBuilding()); invariant(ticket.newKeys.size() == ticket.oldKeys.size() + ticket.added.size() - ticket.removed.size()); invariant(numInserted); invariant(numDeleted); *numInserted = 0; *numDeleted = 0; if (!ticket._isValid) { return Status(ErrorCodes::InternalError, "Invalid UpdateTicket in update"); } for (const auto& remKey : ticket.removed) { _newInterface->unindex(opCtx, remKey, ticket.dupsAllowed); } // Add all new data keys into the index. for (const auto& keyString : ticket.added) { bool dupsAllowed = !_descriptor->prepareUnique() && ticket.dupsAllowed; auto status = _newInterface->insert(opCtx, keyString, dupsAllowed); if (!status.isOK()) return status; } // If these keys should cause the index to become multikey, pass them into the catalog. if (shouldMarkIndexAsMultikey( ticket.newKeys.size(), ticket.newMultikeyMetadataKeys, ticket.newMultikeyPaths)) { _indexCatalogEntry->setMultikey( opCtx, coll, ticket.newMultikeyMetadataKeys, ticket.newMultikeyPaths); } // If we have some multikey metadata keys, they should have been added while marking the index // as multikey in the catalog. Add them to the count of keys inserted for completeness. *numInserted = ticket.added.size() + ticket.newMultikeyMetadataKeys.size(); *numDeleted = ticket.removed.size(); return Status::OK(); } Status SortedDataIndexAccessMethod::compact(OperationContext* opCtx) { return this->_newInterface->compact(opCtx); } std::shared_ptr SortedDataIndexAccessMethod::getSharedIdent() const { return this->_newInterface->getSharedIdent(); } void SortedDataIndexAccessMethod::setIdent(std::shared_ptr newIdent) { this->_newInterface->setIdent(std::move(newIdent)); } void IndexAccessMethod::BulkBuilder::countNewBuildInStats() { indexBulkBuilderSSS.count.addAndFetch(1); } void IndexAccessMethod::BulkBuilder::countResumedBuildInStats() { indexBulkBuilderSSS.count.addAndFetch(1); indexBulkBuilderSSS.resumed.addAndFetch(1); } SorterFileStats* IndexAccessMethod::BulkBuilder::bulkBuilderFileStats() { return &indexBulkBuilderSSS.sorterFileStats; } SorterTracker* IndexAccessMethod::BulkBuilder::bulkBuilderTracker() { return &indexBulkBuilderSSS.sorterTracker; } void IndexAccessMethod::BulkBuilder::yield(OperationContext* opCtx, const Yieldable* yieldable, const NamespaceString& ns) { // Releasing locks means a new snapshot should be acquired when restored. opCtx->recoveryUnit()->abandonSnapshot(); yieldable->yield(); auto locker = opCtx->lockState(); Locker::LockSnapshot snapshot; if (locker->saveLockStateAndUnlock(&snapshot)) { // Track the number of yields in CurOp. CurOp::get(opCtx)->yielded(); auto failPointHang = [opCtx, &ns](FailPoint* fp) { fp->executeIf( [fp](auto&&) { LOGV2(5180600, "Hanging index build during bulk load yield"); fp->pauseWhileSet(); }, [opCtx, &ns](auto&& config) { return config.getStringField("namespace") == ns.ns(); }); }; failPointHang(&hangDuringIndexBuildBulkLoadYield); failPointHang(&hangDuringIndexBuildBulkLoadYieldSecond); locker->restoreLockState(opCtx, snapshot); } yieldable->restore(); } class SortedDataIndexAccessMethod::BulkBuilderImpl final : public BulkBuilderCommon { public: using Sorter = mongo::Sorter; BulkBuilderImpl(SortedDataIndexAccessMethod* iam, size_t maxMemoryUsageBytes, StringData dbName); BulkBuilderImpl(SortedDataIndexAccessMethod* iam, size_t maxMemoryUsageBytes, const IndexStateInfo& stateInfo, StringData dbName); Status insert(OperationContext* opCtx, const CollectionPtr& collection, SharedBufferFragmentBuilder& pooledBuilder, const BSONObj& obj, const RecordId& loc, const InsertDeleteOptions& options, const std::function& saveCursorBeforeWrite, const std::function& restoreCursorAfterWrite) final; const MultikeyPaths& getMultikeyPaths() const final; bool isMultikey() const final; IndexStateInfo persistDataForShutdown() final; std::unique_ptr finalizeSort(); std::unique_ptr setUpBulkInserter(OperationContext* opCtx, bool dupsAllowed); void debugEnsureSorted(const Sorter::Data& data); bool duplicateCheck(OperationContext* opCtx, const Sorter::Data& data, bool dupsAllowed, const RecordIdHandlerFn& onDuplicateRecord); void insertKey(std::unique_ptr& inserter, const Sorter::Data& data); Status keyCommitted(const KeyHandlerFn& onDuplicateKeyInserted, const Sorter::Data& data, bool isDup); private: void _insertMultikeyMetadataKeysIntoSorter(); Sorter* _makeSorter( size_t maxMemoryUsageBytes, StringData dbName, boost::optional fileName = boost::none, const boost::optional>& ranges = boost::none) const; Sorter::Settings _makeSorterSettings() const; SortedDataIndexAccessMethod* _iam; std::unique_ptr _sorter; KeyString::Value _previousKey; // Set to true if any document added to the BulkBuilder causes the index to become multikey. bool _isMultiKey = false; // Holds the path components that cause this index to be multikey. The '_indexMultikeyPaths' // vector remains empty if this index doesn't support path-level multikey tracking. MultikeyPaths _indexMultikeyPaths; // Caches the set of all multikey metadata keys generated during the bulk build process. // These are inserted into the sorter after all normal data keys have been added, just // before the bulk build is committed. KeyStringSet _multikeyMetadataKeys; }; std::unique_ptr SortedDataIndexAccessMethod::initiateBulk( size_t maxMemoryUsageBytes, const boost::optional& stateInfo, StringData dbName) { return stateInfo ? std::make_unique(this, maxMemoryUsageBytes, *stateInfo, dbName) : std::make_unique(this, maxMemoryUsageBytes, dbName); } SortedDataIndexAccessMethod::BulkBuilderImpl::BulkBuilderImpl(SortedDataIndexAccessMethod* iam, size_t maxMemoryUsageBytes, StringData dbName) : BulkBuilderCommon(0, "Index Build: inserting keys from external sorter into index", iam->_descriptor->indexName()), _iam(iam), _sorter(_makeSorter(maxMemoryUsageBytes, dbName)) { countNewBuildInStats(); } SortedDataIndexAccessMethod::BulkBuilderImpl::BulkBuilderImpl(SortedDataIndexAccessMethod* iam, size_t maxMemoryUsageBytes, const IndexStateInfo& stateInfo, StringData dbName) : BulkBuilderCommon(stateInfo.getNumKeys().value_or(0), "Index Build: inserting keys from external sorter into index", iam->_descriptor->indexName()), _iam(iam), _sorter( _makeSorter(maxMemoryUsageBytes, dbName, stateInfo.getFileName(), stateInfo.getRanges())), _isMultiKey(stateInfo.getIsMultikey()), _indexMultikeyPaths(createMultikeyPaths(stateInfo.getMultikeyPaths())) { countResumedBuildInStats(); } Status SortedDataIndexAccessMethod::BulkBuilderImpl::insert( OperationContext* opCtx, const CollectionPtr& collection, SharedBufferFragmentBuilder& pooledBuilder, const BSONObj& obj, const RecordId& loc, const InsertDeleteOptions& options, const std::function& saveCursorBeforeWrite, const std::function& restoreCursorAfterWrite) { auto& executionCtx = StorageExecutionContext::get(opCtx); auto keys = executionCtx.keys(); auto multikeyPaths = executionCtx.multikeyPaths(); try { _iam->getKeys(opCtx, collection, pooledBuilder, obj, options.getKeysMode, GetKeysContext::kAddingKeys, keys.get(), &_multikeyMetadataKeys, multikeyPaths.get(), loc, [&](Status status, const BSONObj&, const boost::optional&) { // If a key generation error was suppressed, record the document as // "skipped" so the index builder can retry at a point when data is // consistent. auto interceptor = _iam->_indexCatalogEntry->indexBuildInterceptor(); if (interceptor && interceptor->getSkippedRecordTracker()) { LOGV2_DEBUG( 20684, 1, "Recording suppressed key generation error to retry later: " "{error} on {loc}: {obj}", "error"_attr = status, "loc"_attr = loc, "obj"_attr = redact(obj)); // Save and restore the cursor around the write in case it throws a // WCE internally and causes the cursor to be unpositioned. saveCursorBeforeWrite(); interceptor->getSkippedRecordTracker()->record(opCtx, loc); restoreCursorAfterWrite(); } }); } catch (...) { return exceptionToStatus(); } if (!multikeyPaths->empty()) { if (_indexMultikeyPaths.empty()) { _indexMultikeyPaths = *multikeyPaths; } else { invariant(_indexMultikeyPaths.size() == multikeyPaths->size()); for (size_t i = 0; i < multikeyPaths->size(); ++i) { _indexMultikeyPaths[i].insert(boost::container::ordered_unique_range_t(), (*multikeyPaths)[i].begin(), (*multikeyPaths)[i].end()); } } } for (const auto& keyString : *keys) { _sorter->add(keyString, mongo::NullValue()); ++_keysInserted; } _isMultiKey = _isMultiKey || _iam->shouldMarkIndexAsMultikey(keys->size(), _multikeyMetadataKeys, *multikeyPaths); return Status::OK(); } const MultikeyPaths& SortedDataIndexAccessMethod::BulkBuilderImpl::getMultikeyPaths() const { return _indexMultikeyPaths; } bool SortedDataIndexAccessMethod::BulkBuilderImpl::isMultikey() const { return _isMultiKey; } IndexStateInfo SortedDataIndexAccessMethod::BulkBuilderImpl::persistDataForShutdown() { _insertMultikeyMetadataKeysIntoSorter(); auto state = _sorter->persistDataForShutdown(); IndexStateInfo stateInfo; stateInfo.setFileName(StringData(state.fileName)); stateInfo.setNumKeys(_keysInserted); stateInfo.setRanges(std::move(state.ranges)); return stateInfo; } void SortedDataIndexAccessMethod::BulkBuilderImpl::_insertMultikeyMetadataKeysIntoSorter() { for (const auto& keyString : _multikeyMetadataKeys) { _sorter->add(keyString, mongo::NullValue()); ++_keysInserted; } // We clear the multikey metadata keys to prevent them from being inserted into the Sorter // twice in the case that done() is called and then persistDataForShutdown() is later called. _multikeyMetadataKeys.clear(); } SortedDataIndexAccessMethod::BulkBuilderImpl::Sorter::Settings SortedDataIndexAccessMethod::BulkBuilderImpl::_makeSorterSettings() const { return std::pair( {_iam->getSortedDataInterface()->getKeyStringVersion()}, {}); } SortedDataIndexAccessMethod::BulkBuilderImpl::Sorter* SortedDataIndexAccessMethod::BulkBuilderImpl::_makeSorter( size_t maxMemoryUsageBytes, StringData dbName, boost::optional fileName, const boost::optional>& ranges) const { return fileName ? Sorter::makeFromExistingRanges( fileName->toString(), *ranges, makeSortOptions(maxMemoryUsageBytes, dbName, bulkBuilderFileStats()), BtreeExternalSortComparison(), _makeSorterSettings()) : Sorter::make(makeSortOptions(maxMemoryUsageBytes, dbName, bulkBuilderFileStats()), BtreeExternalSortComparison(), _makeSorterSettings()); } std::unique_ptr::Iterator> SortedDataIndexAccessMethod::BulkBuilderImpl::finalizeSort() { _insertMultikeyMetadataKeysIntoSorter(); return std::unique_ptr(_sorter->done()); } std::unique_ptr SortedDataIndexAccessMethod::BulkBuilderImpl::setUpBulkInserter(OperationContext* opCtx, bool dupsAllowed) { _ns = _iam->_indexCatalogEntry->getNSSFromCatalog(opCtx); return _iam->getSortedDataInterface()->makeBulkBuilder(opCtx, dupsAllowed); } void SortedDataIndexAccessMethod::BulkBuilderImpl::debugEnsureSorted(const Sorter::Data& data) { if (data.first.compare(_previousKey) < 0) { LOGV2_FATAL_NOTRACE(31171, "Expected the next key to be greater than or equal to the previous key", "nextKey"_attr = data.first.toString(), "previousKey"_attr = _previousKey.toString(), "index"_attr = _indexName); } } bool SortedDataIndexAccessMethod::BulkBuilderImpl::duplicateCheck( OperationContext* opCtx, const Sorter::Data& data, bool dupsAllowed, const RecordIdHandlerFn& onDuplicateRecord) { auto descriptor = _iam->_descriptor; bool isDup = false; if (descriptor->unique()) { int cmpData = (_iam->getSortedDataInterface()->rsKeyFormat() == KeyFormat::Long) ? data.first.compareWithoutRecordIdLong(_previousKey) : data.first.compareWithoutRecordIdStr(_previousKey); isDup = (cmpData == 0); } // Before attempting to insert, perform a duplicate key check. if (isDup && !dupsAllowed) { uassertStatusOK(_iam->_handleDuplicateKey(opCtx, data.first, onDuplicateRecord)); } return isDup; } void SortedDataIndexAccessMethod::BulkBuilderImpl::insertKey( std::unique_ptr& inserter, const Sorter::Data& data) { uassertStatusOK(inserter->addKey(data.first)); } Status SortedDataIndexAccessMethod::BulkBuilderImpl::keyCommitted( const KeyHandlerFn& onDuplicateKeyInserted, const Sorter::Data& data, bool isDup) { _previousKey = data.first; if (isDup) { return onDuplicateKeyInserted(data.first); } return Status::OK(); } void SortedDataIndexAccessMethod::getKeys(OperationContext* opCtx, const CollectionPtr& collection, SharedBufferFragmentBuilder& pooledBufferBuilder, const BSONObj& obj, InsertDeleteOptions::ConstraintEnforcementMode mode, GetKeysContext context, KeyStringSet* keys, KeyStringSet* multikeyMetadataKeys, MultikeyPaths* multikeyPaths, const boost::optional& id, OnSuppressedErrorFn&& onSuppressedError) const { invariant(!id || _newInterface->rsKeyFormat() != KeyFormat::String || id->isStr(), fmt::format("RecordId is not in the same string format as its RecordStore; id: {}", id->toString())); invariant(!id || _newInterface->rsKeyFormat() != KeyFormat::Long || id->isLong(), fmt::format("RecordId is not in the same long format as its RecordStore; id: {}", id->toString())); try { if (_indexCatalogEntry->shouldValidateDocument()) { validateDocument(collection, obj, _descriptor->keyPattern()); } doGetKeys(opCtx, collection, pooledBufferBuilder, obj, context, keys, multikeyMetadataKeys, multikeyPaths, id); } catch (const AssertionException& ex) { // Suppress all indexing errors when mode is kRelaxConstraints. if (mode == InsertDeleteOptions::ConstraintEnforcementMode::kEnforceConstraints) { throw; } keys->clear(); if (multikeyPaths) { multikeyPaths->clear(); } if (opCtx->isKillPending()) { throw; } // If the document applies to the filter (which means that it should have never been // indexed), do not suppress the error. const MatchExpression* filter = _indexCatalogEntry->getFilterExpression(); if (mode == InsertDeleteOptions::ConstraintEnforcementMode::kRelaxConstraintsUnfiltered && filter && filter->matchesBSON(obj)) { throw; } if (onSuppressedError) { onSuppressedError(ex.toStatus(), obj, id); } else { LOGV2_DEBUG(20686, 1, "Suppressed key generation error", "error"_attr = redact(ex.toStatus()), "loc"_attr = id, "obj"_attr = redact(obj)); } } } bool SortedDataIndexAccessMethod::shouldMarkIndexAsMultikey( size_t numberOfKeys, const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const { return numberOfKeys > 1 || isMultikeyFromPaths(multikeyPaths); } void SortedDataIndexAccessMethod::validateDocument(const CollectionPtr& collection, const BSONObj& obj, const BSONObj& keyPattern) const {} /** * Generates a new file name on each call using a static, atomic and monotonically increasing * number. Each name is suffixed with a random number generated at startup, to prevent name * collisions when the index build external sort files are preserved across restarts. * * Each user of the Sorter must implement this function to ensure that all temporary files that the * Sorter instances produce are uniquely identified using a unique file name extension with separate * atomic variable. This is necessary because the sorter.cpp code is separately included in multiple * places, rather than compiled in one place and linked, and so cannot provide a globally unique ID. */ std::string nextFileName() { static AtomicWord indexAccessMethodFileCounter; static const int64_t randomSuffix = SecureRandom().nextInt64(); return str::stream() << "extsort-index." << indexAccessMethodFileCounter.fetchAndAdd(1) << '-' << randomSuffix; } Status SortedDataIndexAccessMethod::_handleDuplicateKey( OperationContext* opCtx, const KeyString::Value& dataKey, const RecordIdHandlerFn& onDuplicateRecord) { RecordId recordId = (KeyFormat::Long == _newInterface->rsKeyFormat()) ? KeyString::decodeRecordIdLongAtEnd(dataKey.getBuffer(), dataKey.getSize()) : KeyString::decodeRecordIdStrAtEnd(dataKey.getBuffer(), dataKey.getSize()); if (onDuplicateRecord) { return onDuplicateRecord(recordId); } BSONObj dupKey = KeyString::toBson(dataKey, getSortedDataInterface()->getOrdering()); return buildDupKeyErrorStatus(dupKey.getOwned(), _indexCatalogEntry->getNSSFromCatalog(opCtx), _descriptor->indexName(), _descriptor->keyPattern(), _descriptor->collation()); } Status SortedDataIndexAccessMethod::_indexKeysOrWriteToSideTable( OperationContext* opCtx, const CollectionPtr& coll, const KeyStringSet& keys, const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, const BSONObj& obj, const InsertDeleteOptions& options, int64_t* keysInsertedOut) { Status status = Status::OK(); if (_indexCatalogEntry->isHybridBuilding()) { // The side table interface accepts only records that meet the criteria for this partial // index. // See SERVER-28975 and SERVER-39705 for details. if (auto filter = _indexCatalogEntry->getFilterExpression()) { if (!filter->matchesBSON(obj)) { return Status::OK(); } } int64_t inserted = 0; status = _indexCatalogEntry->indexBuildInterceptor()->sideWrite( opCtx, keys, multikeyMetadataKeys, multikeyPaths, IndexBuildInterceptor::Op::kInsert, &inserted); if (keysInsertedOut) { *keysInsertedOut += inserted; } } else { // Ensure that our snapshot is compatible with the index's minimum visibile snapshot. const auto minVisibleTimestamp = _indexCatalogEntry->getMinimumVisibleSnapshot(); const auto readTimestamp = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx).value_or( opCtx->recoveryUnit()->getCatalogConflictingTimestamp()); if (minVisibleTimestamp && !readTimestamp.isNull() && readTimestamp < *minVisibleTimestamp) { throwWriteConflictException( "Unable to read from a snapshot due to pending catalog changes."); } int64_t numInserted = 0; status = insertKeysAndUpdateMultikeyPaths( opCtx, coll, keys, {multikeyMetadataKeys.begin(), multikeyMetadataKeys.end()}, multikeyPaths, options, nullptr, &numInserted); if (keysInsertedOut) { *keysInsertedOut += numInserted; } } return status; } void SortedDataIndexAccessMethod::_unindexKeysOrWriteToSideTable( OperationContext* opCtx, const NamespaceString& ns, const KeyStringSet& keys, const BSONObj& obj, bool logIfError, int64_t* const keysDeletedOut, InsertDeleteOptions options, // copy! CheckRecordId checkRecordId) { options.logIfError = logIfError; if (_indexCatalogEntry->isHybridBuilding()) { // The side table interface accepts only records that meet the criteria for this partial // index. // See SERVER-28975 and SERVER-39705 for details. if (auto filter = _indexCatalogEntry->getFilterExpression()) { if (!filter->matchesBSON(obj)) { return; } } int64_t removed = 0; fassert(31155, _indexCatalogEntry->indexBuildInterceptor()->sideWrite( opCtx, keys, {}, {}, IndexBuildInterceptor::Op::kDelete, &removed)); if (keysDeletedOut) { *keysDeletedOut += removed; } return; } // On WiredTiger, we do blind unindexing of records for efficiency. However, when duplicates // are allowed in unique indexes, WiredTiger does not do blind unindexing, and instead confirms // that the recordid matches the element we are removing. // // We need to disable blind-deletes if 'checkRecordId' is explicitly set 'On', or for // in-progress indexes, in order to force recordid-matching for unindex operations, since // initial sync can build an index over a collection with duplicates. See SERVER-17487 for more // details. options.dupsAllowed = options.dupsAllowed || !_indexCatalogEntry->isReady(opCtx) || (checkRecordId == CheckRecordId::On); // Ensure that our snapshot is compatible with the index's minimum visibile snapshot. const auto minVisibleTimestamp = _indexCatalogEntry->getMinimumVisibleSnapshot(); const auto readTimestamp = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx).value_or( opCtx->recoveryUnit()->getCatalogConflictingTimestamp()); if (minVisibleTimestamp && !readTimestamp.isNull() && readTimestamp < *minVisibleTimestamp) { throwWriteConflictException( "Unable to read from a snapshot due to pending catalog changes."); } int64_t removed = 0; Status status = removeKeys(opCtx, keys, options, &removed); if (!status.isOK()) { LOGV2(20362, "Couldn't unindex record {obj} from collection {namespace}: {error}", "Couldn't unindex record", "record"_attr = redact(obj), "namespace"_attr = ns, "error"_attr = redact(status)); } if (keysDeletedOut) { *keysDeletedOut += removed; } } } // namespace mongo #undef MONGO_LOGV2_DEFAULT_COMPONENT #include "mongo/db/sorter/sorter.cpp" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kCommand MONGO_CREATE_SORTER(mongo::KeyString::Value, mongo::NullValue, mongo::BtreeExternalSortComparison);