/** * Copyright (C) 2023-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #include "mongo/db/s/metadata_consistency_util.h" #include "mongo/db/auth/authorization_session.h" #include "mongo/db/catalog/collection_catalog.h" #include "mongo/db/cursor_manager.h" #include "mongo/db/exec/queued_data_stage.h" #include "mongo/db/exec/working_set.h" #include "mongo/db/metadata_consistency_types_gen.h" #include "mongo/db/query/cursor_response.h" #include "mongo/db/query/find_common.h" #include "mongo/db/query/plan_executor_factory.h" #include "mongo/db/s/collection_sharding_runtime.h" #include "mongo/db/s/shard_key_index_util.h" #include "mongo/logv2/log.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding namespace mongo { namespace metadata_consistency_util { namespace { MONGO_FAIL_POINT_DEFINE(insertFakeInconsistencies); /* * Emit a warning log containing information about the given inconsistency */ void logMetadataInconsistency(const MetadataInconsistencyItem& inconsistencyItem) { // Please do not change the error code of this log message if not strictly necessary. // Automated log ingestion system relies on this specific log message to monitor cluster. // inconsistencies LOGV2_WARNING(7514800, "Detected sharding metadata inconsistency", "inconsistency"_attr = inconsistencyItem); } void _checkShardKeyIndexInconsistencies(OperationContext* opCtx, const NamespaceString& nss, const ShardId& shardId, const BSONObj& shardKey, const CollectionPtr& localColl, std::vector& inconsistencies) { const auto performChecks = [&](const CollectionPtr& localColl, std::vector& inconsistencies) { // Check that the collection has an index that supports the shard key. If so, check that // exists an index that supports the shard key and is not multikey. if (!findShardKeyPrefixedIndex(opCtx, localColl, shardKey, false /*requireSingleKey*/)) { inconsistencies.emplace_back(metadata_consistency_util::makeInconsistency( MetadataInconsistencyTypeEnum::kMissingShardKeyIndex, MissingShardKeyIndexDetails{localColl->ns(), shardId, shardKey})); } }; std::vector tmpInconsistencies; // Shards that do not own any chunks do not participate in the creation of new indexes, so they // could potentially miss any indexes created after they no longer own chunks. Thus we first // perform a check optimistically without taking collection lock, if missing indexes are found // we check under the collection lock if this shard currently own any chunk and re-execute again // the checks under the lock to ensure stability of the ShardVersion. performChecks(localColl, tmpInconsistencies); if (!tmpInconsistencies.size()) { // No index inconsistencies found return; } // Pessimistic check under collection lock to serialize with chunk migration commit. AutoGetCollection ac(opCtx, nss, MODE_IS); tassert(7531700, str::stream() << "Collection unexpectedly disappeared while holding database DDL lock: " << nss.toStringForErrorMsg(), ac); const auto scopedCsr = CollectionShardingRuntime::assertCollectionLockedAndAcquireShared(opCtx, nss); auto optCollDescr = scopedCsr->getCurrentMetadataIfKnown(); if (!optCollDescr) { LOGV2_DEBUG(7531701, 1, "Ignoring index inconsistencies because collection metadata is unknown", logAttrs(nss), "inconsistencies"_attr = tmpInconsistencies); return; } if (!optCollDescr->isSharded()) { // The collection is registered as SHARDED in the sharding catalog. This shard has the // collection locally but is marked as UNSHARDED. inconsistencies.emplace_back(metadata_consistency_util::makeInconsistency( MetadataInconsistencyTypeEnum::kShardThinksCollectionIsUnsharded, ShardThinksCollectionIsUnshardedDetails{localColl->ns(), localColl->uuid(), shardId})); return; } if (!optCollDescr->currentShardHasAnyChunks()) { LOGV2_DEBUG(7531703, 1, "Ignoring index inconsistencies because shard does not own any chunk for " "this collection", logAttrs(nss), "inconsistencies"_attr = tmpInconsistencies); return; } tmpInconsistencies.clear(); performChecks(*ac, inconsistencies); } } // namespace MetadataConsistencyCommandLevelEnum getCommandLevel(const NamespaceString& nss) { if (nss.isAdminDB()) { return MetadataConsistencyCommandLevelEnum::kClusterLevel; } else if (nss.isCollectionlessCursorNamespace()) { return MetadataConsistencyCommandLevelEnum::kDatabaseLevel; } else { return MetadataConsistencyCommandLevelEnum::kCollectionLevel; } } std::unique_ptr makeQueuedPlanExecutor( OperationContext* opCtx, std::vector&& inconsistencies, const NamespaceString& nss) { auto expCtx = make_intrusive(opCtx, std::unique_ptr(nullptr), nss); auto ws = std::make_unique(); auto root = std::make_unique(expCtx.get(), ws.get()); insertFakeInconsistencies.execute([&](const BSONObj& data) { const auto numInconsistencies = data["numInconsistencies"].safeNumberLong(); for (int i = 0; i < numInconsistencies; i++) { inconsistencies.emplace_back(makeInconsistency( MetadataInconsistencyTypeEnum::kCollectionUUIDMismatch, CollectionUUIDMismatchDetails{nss, ShardId{"shard"}, UUID::gen(), UUID::gen()})); } }); for (auto&& inconsistency : inconsistencies) { // Every inconsistency encountered need to be logged with the same format // to allow log injestion systems to correctly detect them. logMetadataInconsistency(inconsistency); WorkingSetID id = ws->allocate(); WorkingSetMember* member = ws->get(id); member->keyData.clear(); member->recordId = RecordId(); member->resetDocument(SnapshotId(), inconsistency.toBSON().getOwned()); member->transitionToOwnedObj(); root->pushBack(id); } return uassertStatusOK( plan_executor_factory::make(expCtx, std::move(ws), std::move(root), &CollectionPtr::null, PlanYieldPolicy::YieldPolicy::NO_YIELD, false, /* whether returned BSON must be owned */ nss)); } CursorInitialReply createInitialCursorReplyMongod(OperationContext* opCtx, ClientCursorParams&& cursorParams, long long batchSize) { auto& exec = cursorParams.exec; auto& nss = cursorParams.nss; std::vector firstBatch; FindCommon::BSONArrayResponseSizeTracker responseSizeTracker; for (long long objCount = 0; objCount < batchSize; objCount++) { BSONObj nextDoc; PlanExecutor::ExecState state = exec->getNext(&nextDoc, nullptr); if (state == PlanExecutor::IS_EOF) { break; } invariant(state == PlanExecutor::ADVANCED); // If we can't fit this result inside the current batch, then we stash it for // later. if (!responseSizeTracker.haveSpaceForNext(nextDoc)) { exec->stashResult(nextDoc); break; } responseSizeTracker.add(nextDoc); firstBatch.push_back(std::move(nextDoc)); } auto&& opDebug = CurOp::get(opCtx)->debug(); opDebug.additiveMetrics.nBatches = 1; opDebug.additiveMetrics.nreturned = firstBatch.size(); if (exec->isEOF()) { opDebug.cursorExhausted = true; CursorInitialReply resp; InitialResponseCursor initRespCursor{std::move(firstBatch)}; initRespCursor.setResponseCursorBase({0LL /* cursorId */, nss}); resp.setCursor(std::move(initRespCursor)); return resp; } exec->saveState(); exec->detachFromOperationContext(); auto pinnedCursor = CursorManager::get(opCtx)->registerCursor(opCtx, std::move(cursorParams)); pinnedCursor->incNBatches(); pinnedCursor->incNReturnedSoFar(firstBatch.size()); CursorInitialReply resp; InitialResponseCursor initRespCursor{std::move(firstBatch)}; const auto cursorId = pinnedCursor.getCursor()->cursorid(); initRespCursor.setResponseCursorBase({cursorId, nss}); resp.setCursor(std::move(initRespCursor)); // Record the cursorID in CurOp. opDebug.cursorid = cursorId; return resp; } std::vector checkCollectionMetadataInconsistencies( OperationContext* opCtx, const ShardId& shardId, const ShardId& primaryShardId, const std::vector& catalogClientCollections, const std::vector& localCollections) { std::vector inconsistencies; auto itLocalCollections = localCollections.begin(); auto itCatalogCollections = catalogClientCollections.begin(); while (itLocalCollections != localCollections.end() && itCatalogCollections != catalogClientCollections.end()) { const auto& localColl = *itLocalCollections; const auto& localUUID = localColl->uuid(); const auto& localNss = localColl->ns(); const auto& remoteNss = itCatalogCollections->getNss(); const auto cmp = remoteNss.coll().compare(localNss.coll()); if (cmp < 0) { // Case where we have found a collection in the catalog client that it is not in the // local catalog. itCatalogCollections++; } else if (cmp == 0) { const auto& nss = remoteNss; // Case where we have found same collection in the catalog client than in the local // catalog. // Check that local collection has the same UUID as the one in the catalog client. const auto& UUID = itCatalogCollections->getUuid(); if (UUID != localUUID) { inconsistencies.emplace_back(makeInconsistency( MetadataInconsistencyTypeEnum::kCollectionUUIDMismatch, CollectionUUIDMismatchDetails{localNss, shardId, localUUID, UUID})); } else { _checkShardKeyIndexInconsistencies(opCtx, nss, shardId, itCatalogCollections->getKeyPattern().toBSON(), localColl, inconsistencies); } itLocalCollections++; itCatalogCollections++; } else { // Case where we have found a local collection that is not in the catalog client. const auto& nss = localNss; // TODO SERVER-59957 use function introduced in this ticket to decide if a namesapce // should be ignored and stop using isNamepsaceAlwaysUnsharded(). if (!nss.isNamespaceAlwaysUnsharded() && shardId != primaryShardId) { inconsistencies.emplace_back( makeInconsistency(MetadataInconsistencyTypeEnum::kMisplacedCollection, MisplacedCollectionDetails{localNss, shardId, localUUID})); } itLocalCollections++; } } if (shardId != primaryShardId) { // Case where we have found more local collections than in the catalog client. It is a // hidden unsharded collection inconsistency if we are not the db primary shard. while (itLocalCollections != localCollections.end()) { const auto localColl = itLocalCollections->get(); // TODO SERVER-59957 use function introduced in this ticket to decide if a namesapce // should be ignored and stop using isNamepsaceAlwaysUnsharded(). if (!localColl->ns().isNamespaceAlwaysUnsharded()) { inconsistencies.emplace_back(makeInconsistency( MetadataInconsistencyTypeEnum::kMisplacedCollection, MisplacedCollectionDetails{localColl->ns(), shardId, localColl->uuid()})); } itLocalCollections++; } } return inconsistencies; } std::vector checkChunksInconsistencies( OperationContext* opCtx, const CollectionType& collection, const std::vector& chunks) { const auto& uuid = collection.getUuid(); const auto& nss = collection.getNss(); const auto shardKeyPattern = ShardKeyPattern{collection.getKeyPattern()}; std::vector inconsistencies; auto previousChunk = chunks.begin(); for (auto it = chunks.begin(); it != chunks.end(); it++) { const auto& chunk = *it; // Skip the first iteration as we need to compare the current chunk with the previous one. if (it == chunks.begin()) { continue; } if (!shardKeyPattern.isShardKey(chunk.getMin()) || !shardKeyPattern.isShardKey(chunk.getMax())) { inconsistencies.emplace_back( makeInconsistency(MetadataInconsistencyTypeEnum::kCorruptedChunkShardKey, CorruptedChunkShardKeyDetails{ nss, uuid, chunk.toConfigBSON(), shardKeyPattern.toBSON()})); } auto cmp = previousChunk->getMax().woCompare(chunk.getMin()); if (cmp < 0) { inconsistencies.emplace_back(makeInconsistency( MetadataInconsistencyTypeEnum::kRoutingTableRangeGap, RoutingTableRangeGapDetails{ nss, uuid, previousChunk->toConfigBSON(), chunk.toConfigBSON()})); } else if (cmp > 0) { inconsistencies.emplace_back(makeInconsistency( MetadataInconsistencyTypeEnum::kRoutingTableRangeOverlap, RoutingTableRangeOverlapDetails{ nss, uuid, previousChunk->toConfigBSON(), chunk.toConfigBSON()})); } previousChunk = it; } // Check if the first and last chunk have MinKey and MaxKey respectively if (chunks.empty()) { inconsistencies.emplace_back( makeInconsistency(MetadataInconsistencyTypeEnum::kMissingRoutingTable, MissingRoutingTableDetails{nss, uuid})); } else { const BSONObj& minKeyObj = chunks.front().getMin(); const auto globalMin = shardKeyPattern.getKeyPattern().globalMin(); if (minKeyObj.woCompare(shardKeyPattern.getKeyPattern().globalMin()) != 0) { inconsistencies.emplace_back(makeInconsistency( MetadataInconsistencyTypeEnum::kRoutingTableMissingMinKey, RoutingTableMissingMinKeyDetails{nss, uuid, minKeyObj, globalMin})); } const BSONObj& maxKeyObj = chunks.back().getMax(); const auto globalMax = shardKeyPattern.getKeyPattern().globalMax(); if (maxKeyObj.woCompare(globalMax) != 0) { inconsistencies.emplace_back(makeInconsistency( MetadataInconsistencyTypeEnum::kRoutingTableMissingMaxKey, RoutingTableMissingMaxKeyDetails{nss, uuid, maxKeyObj, globalMax})); } } return inconsistencies; } std::vector checkZonesInconsistencies( OperationContext* opCtx, const CollectionType& collection, const std::vector& zones) { const auto& uuid = collection.getUuid(); const auto& nss = collection.getNss(); const auto shardKeyPattern = ShardKeyPattern{collection.getKeyPattern()}; std::vector inconsistencies; auto previousZone = zones.begin(); for (auto it = zones.begin(); it != zones.end(); it++) { const auto& zone = *it; // Skip the first iteration as we need to compare the current zone with the previous one. if (it == zones.begin()) { continue; } if (!shardKeyPattern.isShardKey(zone.getMinKey()) || !shardKeyPattern.isShardKey(zone.getMaxKey())) { inconsistencies.emplace_back(makeInconsistency( MetadataInconsistencyTypeEnum::kCorruptedZoneShardKey, CorruptedZoneShardKeyDetails{nss, uuid, zone.toBSON(), shardKeyPattern.toBSON()})); } // As the zones are sorted by minKey, we can check if the previous zone maxKey is less than // the current zone minKey. const auto& minKey = zone.getMinKey(); auto cmp = previousZone->getMaxKey().woCompare(minKey); if (cmp > 0) { inconsistencies.emplace_back(makeInconsistency( MetadataInconsistencyTypeEnum::kZonesRangeOverlap, ZonesRangeOverlapDetails{nss, uuid, previousZone->toBSON(), zone.toBSON()})); } previousZone = it; } return inconsistencies; } } // namespace metadata_consistency_util } // namespace mongo