/** * Copyright (C) 2018-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage #include "mongo/platform/basic.h" #include "mongo/db/catalog/index_builds_manager.h" #include "mongo/db/catalog/collection.h" #include "mongo/db/catalog/collection_catalog.h" #include "mongo/db/catalog/index_catalog.h" #include "mongo/db/catalog/index_timestamp_helper.h" #include "mongo/db/concurrency/write_conflict_exception.h" #include "mongo/db/operation_context.h" #include "mongo/db/storage/write_unit_of_work.h" #include "mongo/util/assert_util.h" #include "mongo/util/log.h" #include "mongo/util/str.h" namespace mongo { namespace { /** * Returns basic info on index builders. */ std::string toSummary(const std::map>& builders) { str::stream ss; ss << "Number of builders: " << builders.size() << ": ["; bool first = true; for (const auto& pair : builders) { if (!first) { ss << ", "; } ss << pair.first; first = false; } ss << "]"; return ss; } } // namespace IndexBuildsManager::SetupOptions::SetupOptions() = default; IndexBuildsManager::~IndexBuildsManager() { invariant(_builders.empty(), str::stream() << "Index builds still active: " << toSummary(_builders)); } Status IndexBuildsManager::setUpIndexBuild(OperationContext* opCtx, Collection* collection, const std::vector& specs, const UUID& buildUUID, OnInitFn onInit, SetupOptions options) { _registerIndexBuild(buildUUID); const auto& nss = collection->ns(); invariant(opCtx->lockState()->isCollectionLockedForMode(nss, MODE_X), str::stream() << "Unable to set up index build " << buildUUID << ": collection " << nss.ns() << " is not locked in exclusive mode."); auto builder = _getBuilder(buildUUID); // Ignore uniqueness constraint violations when relaxed (on secondaries). Secondaries can // complete index builds in the middle of batches, which creates the potential for finding // duplicate key violations where there otherwise would be none at consistent states. if (options.indexConstraints == IndexConstraints::kRelax) { builder->ignoreUniqueConstraint(); } auto initResult = writeConflictRetry(opCtx, "IndexBuildsManager::setUpIndexBuild", nss.ns(), [opCtx, collection, builder, &onInit, &specs] { return builder->init(opCtx, collection, specs, onInit); }); if (!initResult.isOK()) { return initResult.getStatus(); } if (options.forRecovery) { log() << "Index build initialized: " << buildUUID << ": " << nss << ": indexes: " << initResult.getValue().size(); } else { log() << "Index build initialized: " << buildUUID << ": " << nss << " (" << *collection->uuid() << " ): indexes: " << initResult.getValue().size(); } return Status::OK(); } StatusWith IndexBuildsManager::recoverIndexBuild( const NamespaceString& nss, const UUID& buildUUID, std::vector indexNames) { // TODO: Not yet implemented. return IndexBuildRecoveryState::Building; } Status IndexBuildsManager::startBuildingIndex(OperationContext* opCtx, Collection* collection, const UUID& buildUUID) { auto builder = _getBuilder(buildUUID); return builder->insertAllDocumentsInCollection(opCtx, collection); } StatusWith> IndexBuildsManager::startBuildingIndexForRecovery( OperationContext* opCtx, NamespaceString ns, const UUID& buildUUID, RepairData repair) { auto builder = _getBuilder(buildUUID); auto coll = CollectionCatalog::get(opCtx).lookupCollectionByNamespace(ns); auto rs = coll ? coll->getRecordStore() : nullptr; // Iterate all records in the collection. Validate the records and index them // if they are valid. Delete them (if in repair mode), or crash, if they are not valid. long long numRecords = 0; long long dataSize = 0; auto cursor = rs->getCursor(opCtx); auto record = cursor->next(); while (record) { opCtx->checkForInterrupt(); // Cursor is left one past the end of the batch inside writeConflictRetry auto beginBatchId = record->id; Status status = writeConflictRetry(opCtx, "repairDatabase", ns.ns(), [&] { // In the case of WCE in a partial batch, we need to go back to the beginning if (!record || (beginBatchId != record->id)) { record = cursor->seekExact(beginBatchId); } WriteUnitOfWork wunit(opCtx); for (int i = 0; record && i < internalInsertMaxBatchSize.load(); i++) { RecordId id = record->id; RecordData& data = record->data; // Use the latest BSON validation version. We retain decimal data when repairing // database even if decimal is disabled. auto validStatus = validateBSON(data.data(), data.size(), BSONVersion::kLatest); if (!validStatus.isOK()) { if (repair == RepairData::kNo) { severe() << "Invalid BSON detected at " << id << ": " << redact(validStatus); fassertFailed(31396); } warning() << "Invalid BSON detected at " << id << ": " << redact(validStatus) << ". Deleting."; rs->deleteRecord(opCtx, id); } else { numRecords++; dataSize += data.size(); auto insertStatus = builder->insert(opCtx, data.releaseToBson(), id); if (!insertStatus.isOK()) { return insertStatus; } } record = cursor->next(); } // Time to yield; make a safe copy of the current record before releasing our cursor. if (record) record->data.makeOwned(); cursor->save(); // Can't fail per API definition // When this exits via success or WCE, we need to restore the cursor ON_BLOCK_EXIT([opCtx, ns, &cursor]() { // restore CAN throw WCE per API writeConflictRetry( opCtx, "retryRestoreCursor", ns.ns(), [&cursor] { cursor->restore(); }); }); wunit.commit(); return Status::OK(); }); if (!status.isOK()) { return status; } } Status status = builder->dumpInsertsFromBulk(opCtx); if (!status.isOK()) { return status; } return std::make_pair(numRecords, dataSize); } Status IndexBuildsManager::drainBackgroundWrites(OperationContext* opCtx, const UUID& buildUUID, RecoveryUnit::ReadSource readSource) { auto builder = _getBuilder(buildUUID); return builder->drainBackgroundWrites(opCtx, readSource); } Status IndexBuildsManager::finishBuildingPhase(const UUID& buildUUID) { auto multiIndexBlockPtr = _getBuilder(buildUUID); // TODO: verify that the index builder is in the expected state. // TODO: Not yet implemented. return Status::OK(); } Status IndexBuildsManager::checkIndexConstraintViolations(OperationContext* opCtx, const UUID& buildUUID) { auto builder = _getBuilder(buildUUID); return builder->checkConstraints(opCtx); } Status IndexBuildsManager::commitIndexBuild(OperationContext* opCtx, Collection* collection, const NamespaceString& nss, const UUID& buildUUID, MultiIndexBlock::OnCreateEachFn onCreateEachFn, MultiIndexBlock::OnCommitFn onCommitFn) { auto builder = _getBuilder(buildUUID); return writeConflictRetry( opCtx, "IndexBuildsManager::commitIndexBuild", nss.ns(), [builder, opCtx, collection, nss, &onCreateEachFn, &onCommitFn] { WriteUnitOfWork wunit(opCtx); auto status = builder->commit(opCtx, collection, onCreateEachFn, onCommitFn); if (!status.isOK()) { return status; } // Eventually, we will obtain the timestamp for completing the index build from the // commitIndexBuild oplog entry. // The current logic for timestamping index completion is consistent with the // IndexBuilder. See SERVER-38986 and SERVER-34896. IndexTimestampHelper::setGhostCommitTimestampForCatalogWrite(opCtx, nss); wunit.commit(); return Status::OK(); }); } bool IndexBuildsManager::abortIndexBuild(const UUID& buildUUID, const std::string& reason) { stdx::unique_lock lk(_mutex); auto builderIt = _builders.find(buildUUID); if (builderIt == _builders.end()) { return false; } std::shared_ptr builder = builderIt->second; lk.unlock(); builder->abort(reason); return true; } bool IndexBuildsManager::interruptIndexBuild(OperationContext* opCtx, const UUID& buildUUID, const std::string& reason) { stdx::unique_lock lk(_mutex); auto builderIt = _builders.find(buildUUID); if (builderIt == _builders.end()) { return false; } log() << "Index build interrupted: " << buildUUID << ": " << reason; std::shared_ptr builder = builderIt->second; lk.unlock(); builder->abortWithoutCleanup(opCtx); return true; } void IndexBuildsManager::tearDownIndexBuild(OperationContext* opCtx, Collection* collection, const UUID& buildUUID) { // TODO verify that the index builder is in a finished state before allowing its destruction. auto builder = _getBuilder(buildUUID); builder->cleanUpAfterBuild(opCtx, collection); _unregisterIndexBuild(buildUUID); } bool IndexBuildsManager::isBackgroundBuilding(const UUID& buildUUID) { auto builder = _getBuilder(buildUUID); return builder->isBackgroundBuilding(); } void IndexBuildsManager::verifyNoIndexBuilds_forTestOnly() { invariant(_builders.empty()); } void IndexBuildsManager::_registerIndexBuild(UUID buildUUID) { stdx::unique_lock lk(_mutex); std::shared_ptr mib = std::make_shared(); invariant(_builders.insert(std::make_pair(buildUUID, mib)).second); } void IndexBuildsManager::_unregisterIndexBuild(const UUID& buildUUID) { stdx::unique_lock lk(_mutex); auto builderIt = _builders.find(buildUUID); invariant(builderIt != _builders.end()); _builders.erase(builderIt); } std::shared_ptr IndexBuildsManager::_getBuilder(const UUID& buildUUID) { stdx::unique_lock lk(_mutex); auto builderIt = _builders.find(buildUUID); invariant(builderIt != _builders.end()); return builderIt->second; } } // namespace mongo