/** * Copyright (C) 2018-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #include "mongo/platform/basic.h" #include #include #include #include "mongo/db/auth/authorization_checks.h" #include "mongo/db/auth/authorization_session.h" #include "mongo/db/bson/dotted_path_support.h" #include "mongo/db/client.h" #include "mongo/db/clientcursor.h" #include "mongo/db/commands.h" #include "mongo/db/commands/run_aggregate.h" #include "mongo/db/db_raii.h" #include "mongo/db/exec/working_set_common.h" #include "mongo/db/jsobj.h" #include "mongo/db/matcher/extensions_callback_real.h" #include "mongo/db/namespace_string.h" #include "mongo/db/pipeline/aggregation_request_helper.h" #include "mongo/db/query/collection_query_info.h" #include "mongo/db/query/cursor_response.h" #include "mongo/db/query/explain.h" #include "mongo/db/query/find_common.h" #include "mongo/db/query/get_executor.h" #include "mongo/db/query/parsed_distinct.h" #include "mongo/db/query/plan_summary_stats.h" #include "mongo/db/query/query_planner_common.h" #include "mongo/db/query/view_response_formatter.h" #include "mongo/db/s/collection_sharding_state.h" #include "mongo/db/s/query_analysis_writer.h" #include "mongo/db/views/resolved_view.h" #include "mongo/logv2/log.h" #include "mongo/util/database_name_util.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery namespace mongo { namespace { namespace dps = dotted_path_support; class DistinctCommand : public BasicCommand { public: DistinctCommand() : BasicCommand("distinct") {} std::string help() const override { return "{ distinct : 'collection name' , key : 'a.b' , query : {} }"; } AllowedOnSecondary secondaryAllowed(ServiceContext*) const override { return AllowedOnSecondary::kOptIn; } bool maintenanceOk() const override { return false; } bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } bool collectsResourceConsumptionMetrics() const override { return true; } bool canIgnorePrepareConflicts() const override { return true; } ReadConcernSupportResult supportsReadConcern(const BSONObj& cmdObj, repl::ReadConcernLevel level, bool isImplicitDefault) const override { return ReadConcernSupportResult::allSupportedAndDefaultPermitted(); } bool shouldAffectReadConcernCounter() const override { return true; } bool supportsReadMirroring(const BSONObj&) const override { return true; } ReadWriteType getReadWriteType() const override { return ReadWriteType::kRead; } std::size_t reserveBytesForReply() const override { return FindCommon::kInitReplyBufferSize; } Status checkAuthForOperation(OperationContext* opCtx, const DatabaseName& dbname, const BSONObj& cmdObj) const override { AuthorizationSession* authSession = AuthorizationSession::get(opCtx->getClient()); if (!authSession->isAuthorizedToParseNamespaceElement(cmdObj.firstElement())) { return Status(ErrorCodes::Unauthorized, "Unauthorized"); } const auto hasTerm = false; return auth::checkAuthForFind(authSession, CollectionCatalog::get(opCtx)->resolveNamespaceStringOrUUID( opCtx, CommandHelpers::parseNsOrUUID(dbname, cmdObj)), hasTerm); } bool allowedInTransactions() const final { return true; } bool allowedWithSecurityToken() const final { return true; } Status explain(OperationContext* opCtx, const OpMsgRequest& request, ExplainOptions::Verbosity verbosity, rpc::ReplyBuilderInterface* result) const override { const DatabaseName dbName = DatabaseNameUtil::deserialize(request.getValidatedTenantId(), request.getDatabase()); const BSONObj& cmdObj = request.body; // Acquire locks. The RAII object is optional, because in the case of a view, the locks // need to be released. boost::optional ctx; ctx.emplace( opCtx, CommandHelpers::parseNsCollectionRequired(dbName, cmdObj), AutoGetCollection::Options{}.viewMode(auto_get_collection::ViewMode::kViewsPermitted)); const auto nss = ctx->getNss(); const ExtensionsCallbackReal extensionsCallback(opCtx, &nss); auto defaultCollator = ctx->getCollection() ? ctx->getCollection()->getDefaultCollator() : nullptr; auto parsedDistinct = uassertStatusOK( ParsedDistinct::parse(opCtx, nss, cmdObj, extensionsCallback, true, defaultCollator)); if (ctx->getView()) { // Relinquish locks. The aggregation command will re-acquire them. ctx.reset(); auto viewAggregation = parsedDistinct.asAggregationCommand(); if (!viewAggregation.isOK()) { return viewAggregation.getStatus(); } auto viewAggCmd = OpMsgRequest::fromDBAndBody(nss.db(), viewAggregation.getValue()).body; auto viewAggRequest = aggregation_request_helper::parseFromBSON( opCtx, nss, viewAggCmd, verbosity, APIParameters::get(opCtx).getAPIStrict().value_or(false)); // An empty PrivilegeVector is acceptable because these privileges are only checked on // getMore and explain will not open a cursor. return runAggregate( opCtx, nss, viewAggRequest, viewAggregation.getValue(), PrivilegeVector(), result); } const auto& collection = ctx->getCollection(); auto executor = uassertStatusOK( getExecutorDistinct(&collection, QueryPlannerParams::DEFAULT, &parsedDistinct)); auto bodyBuilder = result->getBodyBuilder(); Explain::explainStages( executor.get(), collection, verbosity, BSONObj(), cmdObj, &bodyBuilder); return Status::OK(); } bool run(OperationContext* opCtx, const DatabaseName& dbName, const BSONObj& cmdObj, BSONObjBuilder& result) override { CommandHelpers::handleMarkKillOnClientDisconnect(opCtx); // Acquire locks and resolve possible UUID. The RAII object is optional, because in the case // of a view, the locks need to be released. boost::optional ctx; ctx.emplace( opCtx, CommandHelpers::parseNsOrUUID(dbName, cmdObj), AutoGetCollection::Options{}.viewMode(auto_get_collection::ViewMode::kViewsPermitted)); const auto& nss = ctx->getNss(); if (!ctx->getView()) { // Distinct doesn't filter orphan documents so it is not allowed to run on sharded // collections in multi-document transactions. uassert( ErrorCodes::OperationNotSupportedInTransaction, "Cannot run 'distinct' on a sharded collection in a multi-document transaction. " "Please see http://dochub.mongodb.org/core/transaction-distinct for a recommended " "alternative.", !opCtx->inMultiDocumentTransaction() || !ctx->getCollection().isSharded()); // Similarly, we ban readConcern level snapshot for sharded collections. uassert( ErrorCodes::InvalidOptions, "Cannot run 'distinct' on a sharded collection with readConcern level 'snapshot'", repl::ReadConcernArgs::get(opCtx).getLevel() != repl::ReadConcernLevel::kSnapshotReadConcern || !ctx->getCollection().isSharded()); } const ExtensionsCallbackReal extensionsCallback(opCtx, &nss); auto defaultCollation = ctx->getCollection() ? ctx->getCollection()->getDefaultCollator() : nullptr; auto parsedDistinct = uassertStatusOK( ParsedDistinct::parse(opCtx, nss, cmdObj, extensionsCallback, false, defaultCollation)); if (parsedDistinct.isMirrored()) { const auto& invocation = CommandInvocation::get(opCtx); invocation->markMirrored(); } if (analyze_shard_key::supportsPersistingSampledQueries() && parsedDistinct.getSampleId()) { auto cq = parsedDistinct.getQuery(); analyze_shard_key::QueryAnalysisWriter::get(opCtx) .addDistinctQuery(*parsedDistinct.getSampleId(), nss, cq->getQueryObj(), cq->getFindCommandRequest().getCollation()) .getAsync([](auto) {}); } if (ctx->getView()) { // Relinquish locks. The aggregation command will re-acquire them. ctx.reset(); auto viewAggregation = parsedDistinct.asAggregationCommand(); uassertStatusOK(viewAggregation.getStatus()); BSONObj aggResult = CommandHelpers::runCommandDirectly( opCtx, OpMsgRequest::fromDBAndBody(dbName.db(), std::move(viewAggregation.getValue()))); uassertStatusOK(ViewResponseFormatter(aggResult).appendAsDistinctResponse(&result)); return true; } // Check whether we are allowed to read from this node after acquiring our locks. auto replCoord = repl::ReplicationCoordinator::get(opCtx); uassertStatusOK(replCoord->checkCanServeReadsFor( opCtx, nss, ReadPreferenceSetting::get(opCtx).canRunOnSecondary())); const auto& collection = ctx->getCollection(); auto executor = getExecutorDistinct(&collection, QueryPlannerParams::DEFAULT, &parsedDistinct); uassertStatusOK(executor.getStatus()); { stdx::lock_guard lk(*opCtx->getClient()); CurOp::get(opCtx)->setPlanSummary_inlock( executor.getValue()->getPlanExplainer().getPlanSummary()); } const auto key = cmdObj.getStringField(ParsedDistinct::kKeyField); std::vector distinctValueHolder; BSONElementSet values(executor.getValue()->getCanonicalQuery()->getCollator()); const int kMaxResponseSize = BSONObjMaxUserSize - 4096; try { size_t listApproxBytes = 0; BSONObj obj; while (PlanExecutor::ADVANCED == executor.getValue()->getNext(&obj, nullptr)) { // Distinct expands arrays. // // If our query is covered, each value of the key should be in the index key and // available to us without this. If a collection scan is providing the data, we may // have to expand an array. BSONElementSet elts; dps::extractAllElementsAlongPath(obj, key, elts); for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) { BSONElement elt = *it; if (values.count(elt)) { continue; } // This is an approximate size check which safeguards against use of unbounded // memory by the distinct command. We perform a more precise check at the end of // this method to confirm that the response size is less than 16MB. listApproxBytes += elt.size(); uassert( 17217, "distinct too big, 16mb cap", listApproxBytes < kMaxResponseSize); auto distinctObj = elt.wrap(); values.insert(distinctObj.firstElement()); distinctValueHolder.push_back(std::move(distinctObj)); } } } catch (DBException& exception) { auto&& explainer = executor.getValue()->getPlanExplainer(); auto&& [stats, _] = explainer.getWinningPlanStats(ExplainOptions::Verbosity::kExecStats); LOGV2_WARNING(23797, "Plan executor error during distinct command: {error}, " "stats: {stats}, cmd: {cmd}", "Plan executor error during distinct command", "error"_attr = exception.toStatus(), "stats"_attr = redact(stats), "cmd"_attr = cmdObj); exception.addContext("Executor error during distinct command"); throw; } auto curOp = CurOp::get(opCtx); // Get summary information about the plan. PlanSummaryStats stats; auto&& explainer = executor.getValue()->getPlanExplainer(); explainer.getSummaryStats(&stats); if (collection) { CollectionQueryInfo::get(collection).notifyOfQuery(opCtx, collection, stats); } curOp->debug().setPlanSummaryMetrics(stats); if (curOp->shouldDBProfile(opCtx)) { auto&& [stats, _] = explainer.getWinningPlanStats(ExplainOptions::Verbosity::kExecStats); curOp->debug().execStats = std::move(stats); } BSONArrayBuilder valueListBuilder(result.subarrayStart("values")); for (const auto& value : values) { valueListBuilder.append(value); } valueListBuilder.doneFast(); if (!opCtx->inMultiDocumentTransaction() && repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime()) { result.append("atClusterTime"_sd, repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime()->asTimestamp()); } uassert(31299, "distinct too big, 16mb cap", result.len() < kMaxResponseSize); return true; } void appendMirrorableRequest(BSONObjBuilder* bob, const BSONObj& cmdObj) const override { static const auto kMirrorableKeys = [] { BSONObjBuilder keyBob; keyBob.append("distinct", 1); keyBob.append("key", 1); keyBob.append("query", 1); keyBob.append("collation", 1); keyBob.append("shardVersion", 1); return keyBob.obj(); }(); // Filter the keys that can be mirrored cmdObj.filterFieldsUndotted(bob, kMirrorableKeys, true); } } distinctCmd; } // namespace } // namespace mongo