/** * Copyright (C) 2018-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #pragma once #include "mongo/db/pipeline/pipeline.h" #include "mongo/s/async_requests_sender.h" #include "mongo/s/catalog_cache.h" #include "mongo/s/query/owned_remote_cursor.h" #include "mongo/s/stale_shard_version_helpers.h" #include "mongo/stdx/variant.h" namespace mongo { namespace sharded_agg_helpers { /** * Represents the two halves of a pipeline that will execute in a sharded cluster. 'shardsPipeline' * will execute in parallel on each shard, and 'mergePipeline' will execute on the merge host - * either one of the shards or a mongos. */ struct SplitPipeline { SplitPipeline(std::unique_ptr shardsPipeline, std::unique_ptr mergePipeline, boost::optional shardCursorsSortSpec) : shardsPipeline(std::move(shardsPipeline)), mergePipeline(std::move(mergePipeline)), shardCursorsSortSpec(std::move(shardCursorsSortSpec)) {} std::unique_ptr shardsPipeline; std::unique_ptr mergePipeline; // If set, the cursors from the shards are expected to be sorted according to this spec, and to // have populated a "$sortKey" metadata field which can be used to compare the results. boost::optional shardCursorsSortSpec; }; struct ShardedExchangePolicy { // The exchange specification that will be sent to shards as part of the aggregate command. // It will be used by producers to determine how to distribute documents to consumers. ExchangeSpec exchangeSpec; // Shards that will run the consumer part of the exchange. std::vector consumerShards; }; struct DispatchShardPipelineResults { // True if this pipeline was split, and the second half of the pipeline needs to be run on // the primary shard for the database. bool needsPrimaryShardMerge; // Populated if this *is not* an explain, this vector represents the cursors on the remote // shards. std::vector remoteCursors; // Populated if this *is* an explain, this vector represents the results from each shard. std::vector remoteExplainOutput; // The split version of the pipeline if more than one shard was targeted, otherwise // boost::none. boost::optional splitPipeline; // If the pipeline targeted a single shard, this is the pipeline to run on that shard. std::unique_ptr pipelineForSingleShard; // The command object to send to the targeted shards. BSONObj commandForTargetedShards; // How many exchange producers are running the shard part of splitPipeline. size_t numProducers; // The exchange specification if the query can run with the exchange otherwise boost::none. boost::optional exchangeSpec; }; /** * If the merging pipeline is eligible for an $exchange merge optimization, returns the information * required to set that up. */ boost::optional checkIfEligibleForExchange(OperationContext* opCtx, const Pipeline* mergePipeline); /** * Split the current Pipeline into a Pipeline for each shard, and a Pipeline that combines the * results within a merging process. This call also performs optimizations with the aim of reducing * computing time and network traffic when a pipeline has been split into two pieces. * * The 'mergePipeline' returned as part of the SplitPipeline here is not ready to execute until the * 'shardsPipeline' has been sent to the shards and cursors have been established. Once cursors have * been established, the merge pipeline can be made executable by calling 'addMergeCursorsSource()' */ SplitPipeline splitPipeline(std::unique_ptr pipeline); /** * Targets shards for the pipeline and returns a struct with the remote cursors or results, and * the pipeline that will need to be executed to merge the results from the remotes. If a stale * shard version is encountered, refreshes the routing table and tries again. */ DispatchShardPipelineResults dispatchShardPipeline( Document serializedCommand, bool hasChangeStream, std::unique_ptr pipeline); BSONObj createPassthroughCommandForShard( const boost::intrusive_ptr& expCtx, Document serializedCommand, boost::optional explainVerbosity, Pipeline* pipeline, BSONObj collationObj); BSONObj createCommandForTargetedShards(const boost::intrusive_ptr& expCtx, Document serializedCommand, const SplitPipeline& splitPipeline, const boost::optional exchangeSpec, bool needsMerge); /** * Creates a new DocumentSourceMergeCursors from the provided 'remoteCursors' and adds it to the * front of 'mergePipeline'. */ void addMergeCursorsSource(Pipeline* mergePipeline, BSONObj cmdSentToShards, std::vector ownedCursors, const std::vector& targetedShards, boost::optional shardCursorsSortSpec, bool hasChangeStream); /** * Targets the shards with an aggregation command built from `ownedPipeline` and explain set to * true. Returns a BSONObj of the form {"pipeline": {}}. */ BSONObj targetShardsForExplain(Pipeline* ownedPipeline); /** * Appends the explain output of `dispatchResults` to `result`. */ Status appendExplainResults(DispatchShardPipelineResults&& dispatchResults, const boost::intrusive_ptr& mergeCtx, BSONObjBuilder* result); /** * Returns the proper routing table to use for targeting shards: either a historical routing table * based on the global read timestamp if there is an active transaction with snapshot level read * concern or the latest routing table otherwise. * * Returns 'ShardNotFound' or 'NamespaceNotFound' if there are no shards in the cluster or if * collection 'execNss' does not exist, respectively. */ StatusWith getExecutionNsRoutingInfo(OperationContext* opCtx, const NamespaceString& execNss); /** * Returns true if an aggregation over 'nss' must run on all shards. */ bool mustRunOnAllShards(const NamespaceString& nss, bool hasChangeStream); /** * Retrieves the desired retry policy based on whether the default writeConcern is set on 'opCtx'. */ Shard::RetryPolicy getDesiredRetryPolicy(OperationContext* opCtx); /** * Uses sharded_agg_helpers to split the pipeline and dispatch half to the shards, leaving the * merging half executing in this process after attaching a $mergeCursors. Will retry on network * errors and also on StaleConfig errors to avoid restarting the entire operation. */ std::unique_ptr attachCursorToPipeline(Pipeline* ownedPipeline, bool allowTargetingShards); /** * For a sharded collection, establishes remote cursors on each shard that may have results, and * creates a DocumentSourceMergeCursors stage to merge the remote cursors. Returns a pipeline * beginning with that DocumentSourceMergeCursors stage. Note that one of the 'remote' cursors might * be this node itself. * * Use the AggregateCommand alternative for 'targetRequest' to explicitly specify command options * (e.g. read concern) to the shards when establishing remote cursors. Note that doing so incurs the * cost of parsing the pipeline. */ std::unique_ptr targetShardsAndAddMergeCursors( const boost::intrusive_ptr& expCtx, stdx::variant, AggregateCommand> targetRequest, boost::optional shardCursorsSortSpec = boost::none); /** * For a sharded or unsharded collection, establishes a remote cursor on only the specified shard, * and creates a DocumentSourceMergeCursors stage to consume the remote cursor. Returns a pipeline * beginning with that DocumentSourceMergeCursors stage. * * This function bypasses normal shard targeting for sharded and unsharded collections. It is * especially useful for reading from unsharded collections such as config.transactions and * local.oplog.rs that cannot be targeted by targetShardsAndAddMergeCursors(). * * Note that the specified AggregateCommand must not be for an explain command. */ std::unique_ptr runPipelineDirectlyOnSingleShard( const boost::intrusive_ptr& expCtx, AggregateCommand request, ShardId shardId); } // namespace sharded_agg_helpers } // namespace mongo