summaryrefslogtreecommitdiff
path: root/src/mongo/db/pipeline/mongo_process_interface.h
blob: c4d5267f452bea63cc005439e5b85dc8409c4d1f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
/**
 *    Copyright (C) 2018-present MongoDB, Inc.
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the Server Side Public License, version 1,
 *    as published by MongoDB, Inc.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    Server Side Public License for more details.
 *
 *    You should have received a copy of the Server Side Public License
 *    along with this program. If not, see
 *    <http://www.mongodb.com/licensing/server-side-public-license>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the Server Side Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#pragma once

#include <boost/intrusive_ptr.hpp>
#include <boost/optional.hpp>
#include <list>
#include <memory>
#include <string>
#include <vector>

#include "mongo/client/dbclient_base.h"
#include "mongo/db/collection_index_usage_tracker.h"
#include "mongo/db/exec/document_value/document.h"
#include "mongo/db/exec/document_value/value.h"
#include "mongo/db/generic_cursor.h"
#include "mongo/db/matcher/expression.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/ops/write_ops_exec.h"
#include "mongo/db/ops/write_ops_parsers.h"
#include "mongo/db/pipeline/field_path.h"
#include "mongo/db/pipeline/lite_parsed_document_source.h"
#include "mongo/db/query/explain_options.h"
#include "mongo/db/repl/oplog_entry.h"
#include "mongo/db/resource_yielder.h"
#include "mongo/db/storage/backup_cursor_hooks.h"
#include "mongo/db/storage/backup_cursor_state.h"
#include "mongo/s/chunk_version.h"

namespace mongo {

class ShardFilterer;
class ExpressionContext;
class JsExecution;
class Pipeline;
class PipelineDeleter;
class TransactionHistoryIteratorBase;

/**
 * Any functionality needed by an aggregation stage that is either context specific to a mongod or
 * mongos process, or is only compiled in to one of those two binaries must be accessed via this
 * interface. This allows all DocumentSources to be parsed on either mongos or mongod, but only
 * executable where it makes sense.
 */
class MongoProcessInterface {
public:
    /**
     * Storage for a batch of BSON Objects to be updated in the write namespace. For each element
     * in the batch we store a tuple of the folliwng elements:
     *   1. BSONObj - specifies the query that identifies a document in the to collection to be
     *      updated.
     *   2. write_ops::UpdateModification - either the new document we want to upsert or insert into
     *      the collection (i.e. a 'classic' replacement update), or the pipeline to run to compute
     *      the new document.
     *   3. boost::optional<BSONObj> - for pipeline-style updated, specifies variables that can be
     *      referred to in the pipeline performing the custom update.
     */
    using BatchObject =
        std::tuple<BSONObj, write_ops::UpdateModification, boost::optional<BSONObj>>;
    using BatchedObjects = std::vector<BatchObject>;

    enum class UpsertType {
        kNone,              // This operation is not an upsert.
        kGenerateNewDoc,    // If no documents match, generate a new document using the update spec.
        kInsertSuppliedDoc  // If no documents match, insert the document supplied in 'c.new' as-is.
    };

    enum class CurrentOpConnectionsMode { kIncludeIdle, kExcludeIdle };
    enum class CurrentOpUserMode { kIncludeAll, kExcludeOthers };
    enum class CurrentOpTruncateMode { kNoTruncation, kTruncateOps };
    enum class CurrentOpLocalOpsMode { kLocalMongosOps, kRemoteShardOps };
    enum class CurrentOpSessionsMode { kIncludeIdle, kExcludeIdle };
    enum class CurrentOpCursorMode { kIncludeCursors, kExcludeCursors };
    enum class CurrentOpBacktraceMode { kIncludeBacktrace, kExcludeBacktrace };

    /**
     * Factory function to create MongoProcessInterface of the right type. The implementation will
     * be installed by a lib higher up in the link graph depending on the application type.
     */
    static std::shared_ptr<MongoProcessInterface> create(OperationContext* opCtx);

    struct MakePipelineOptions {
        MakePipelineOptions(){};

        bool optimize = true;
        bool attachCursorSource = true;
        bool allowTargetingShards = true;
    };

    /**
     * This structure holds the result of a batched update operation, such as the number of
     * documents that matched the query predicate, and the number of documents modified by the
     * update operation.
     */
    struct UpdateResult {
        int64_t nMatched{0};
        int64_t nModified{0};
    };

    virtual ~MongoProcessInterface(){};

    /**
     * Creates a new TransactionHistoryIterator object. Only applicable in processes which support
     * locally traversing the oplog.
     */
    virtual std::unique_ptr<TransactionHistoryIteratorBase> createTransactionHistoryIterator(
        repl::OpTime time) const = 0;

    /**
     * Note that in some rare cases this could return a false negative but will never return a false
     * positive. This method will be fixed in the future once it becomes possible to avoid false
     * negatives.
     */
    virtual bool isSharded(OperationContext* opCtx, const NamespaceString& ns) = 0;

    /**
     * Inserts 'objs' into 'ns' and returns an error Status if the insert fails. If 'targetEpoch' is
     * set, throws ErrorCodes::StaleEpoch if the targeted collection does not have the same epoch or
     * the epoch changes during the course of the insert.
     */
    virtual Status insert(const boost::intrusive_ptr<ExpressionContext>& expCtx,
                          const NamespaceString& ns,
                          std::vector<BSONObj>&& objs,
                          const WriteConcernOptions& wc,
                          boost::optional<OID> targetEpoch) = 0;

    /**
     * Updates the documents matching 'queries' with the objects 'updates'. Returns an error Status
     * if any of the updates fail, otherwise returns an 'UpdateResult' objects with the details of
     * the update operation.  If 'targetEpoch' is set, throws ErrorCodes::StaleEpoch if the targeted
     * collection does not have the same epoch, or if the epoch changes during the update.
     */
    virtual StatusWith<UpdateResult> update(const boost::intrusive_ptr<ExpressionContext>& expCtx,
                                            const NamespaceString& ns,
                                            BatchedObjects&& batch,
                                            const WriteConcernOptions& wc,
                                            UpsertType upsert,
                                            bool multi,
                                            boost::optional<OID> targetEpoch) = 0;

    /**
     * Returns index usage statistics for each index on collection 'ns' along with additional
     * information including the index specification and whether the index is currently being built.
     *
     * By passing true for 'addShardName', the caller can request that each document in the
     * resulting vector includes a 'shard' field which denotes this node's shard name. It is illegal
     * to set this option unless this node is a shardsvr.
     */
    virtual std::vector<Document> getIndexStats(OperationContext* opCtx,
                                                const NamespaceString& ns,
                                                StringData host,
                                                bool addShardName) = 0;

    virtual std::list<BSONObj> getIndexSpecs(OperationContext* opCtx,
                                             const NamespaceString& ns,
                                             bool includeBuildUUIDs) = 0;

    /**
     * Appends operation latency statistics for collection "nss" to "builder"
     */
    virtual void appendLatencyStats(OperationContext* opCtx,
                                    const NamespaceString& nss,
                                    bool includeHistograms,
                                    BSONObjBuilder* builder) const = 0;

    /**
     * Appends storage statistics for collection "nss" to "builder"
     */
    virtual Status appendStorageStats(OperationContext* opCtx,
                                      const NamespaceString& nss,
                                      const BSONObj& param,
                                      BSONObjBuilder* builder) const = 0;

    /**
     * Appends the record count for collection "nss" to "builder".
     */
    virtual Status appendRecordCount(OperationContext* opCtx,
                                     const NamespaceString& nss,
                                     BSONObjBuilder* builder) const = 0;
    /**
     * Appends the exec stats for the collection 'nss' to 'builder'.
     */
    virtual Status appendQueryExecStats(OperationContext* opCtx,
                                        const NamespaceString& nss,
                                        BSONObjBuilder* builder) const = 0;

    /**
     * Gets the collection options for the collection given by 'nss'. Throws
     * ErrorCodes::CommandNotSupportedOnView if 'nss' describes a view. Future callers may want to
     * parameterize this behavior.
     */
    virtual BSONObj getCollectionOptions(OperationContext* opCtx, const NamespaceString& nss) = 0;

    /**
     * Performs the given rename command if the collection given by 'targetNs' has the same options
     * as specified in 'originalCollectionOptions', and has the same indexes as 'originalIndexes'.
     *
     * Throws an exception if the collection options and/or indexes are different.
     */
    virtual void renameIfOptionsAndIndexesHaveNotChanged(
        OperationContext* opCtx,
        const BSONObj& renameCommandObj,
        const NamespaceString& targetNs,
        const BSONObj& originalCollectionOptions,
        const std::list<BSONObj>& originalIndexes) = 0;

    /**
     * Creates a collection on the given database by running the given command. On shardsvr targets
     * the primary shard of 'dbName'.
     */
    virtual void createCollection(OperationContext* opCtx,
                                  const std::string& dbName,
                                  const BSONObj& cmdObj) = 0;

    /**
     * Runs createIndexes on the given database for the given index specs. If running on a shardsvr
     * this targets the primary shard of the database part of 'ns'.
     */
    virtual void createIndexesOnEmptyCollection(OperationContext* opCtx,
                                                const NamespaceString& ns,
                                                const std::vector<BSONObj>& indexSpecs) = 0;

    virtual void dropCollection(OperationContext* opCtx, const NamespaceString& collection) = 0;

    /**
     * Parses a Pipeline from a vector of BSONObjs representing DocumentSources. The state of the
     * returned pipeline will depend upon the supplied MakePipelineOptions:
     * - The boolean opts.optimize determines whether the pipeline will be optimized.
     * - If opts.attachCursorSource is false, the pipeline will be returned without attempting to
     *   add an initial cursor source.
     *
     * This function throws if parsing the pipeline failed.
     */
    virtual std::unique_ptr<Pipeline, PipelineDeleter> makePipeline(
        const std::vector<BSONObj>& rawPipeline,
        const boost::intrusive_ptr<ExpressionContext>& expCtx,
        const MakePipelineOptions opts = MakePipelineOptions{}) = 0;

    /**
     * Accepts a pipeline and returns a new one which will draw input from the underlying
     * collection. Performs no further optimization of the pipeline. NamespaceNotFound will be
     * thrown if ExpressionContext has a UUID and that UUID doesn't exist anymore. That should be
     * the only case where NamespaceNotFound is returned.
     *
     * This function takes ownership of the 'pipeline' argument as if it were a unique_ptr.
     * Changing it to a unique_ptr introduces a circular dependency on certain platforms where the
     * compiler expects to find an implementation of PipelineDeleter.
     *
     * If `allowTargetingShards` is true, the cursor will only be for local reads regardless of
     * whether or not this function is called in a sharded environment.
     */
    virtual std::unique_ptr<Pipeline, PipelineDeleter> attachCursorSourceToPipeline(
        const boost::intrusive_ptr<ExpressionContext>& expCtx,
        Pipeline* pipeline,
        bool allowTargetingShards = true) = 0;

    /**
     * Accepts a pipeline and returns a new one which will draw input from the underlying
     * collection _locally_. Trying to run this method on mongos is a programming error. Running
     * this method on a shard server will only return results which match the pipeline on that
     * shard.

     * Performs no further optimization of the pipeline. NamespaceNotFound will be
     * thrown if ExpressionContext has a UUID and that UUID doesn't exist anymore. That should be
     * the only case where NamespaceNotFound is returned.
     *
     * This function takes ownership of the 'pipeline' argument as if it were a unique_ptr.
     * Changing it to a unique_ptr introduces a circular dependency on certain platforms where the
     * compiler expects to find an implementation of PipelineDeleter.
     */
    virtual std::unique_ptr<Pipeline, PipelineDeleter> attachCursorSourceToPipelineForLocalRead(
        const boost::intrusive_ptr<ExpressionContext>& expCtx, Pipeline* pipeline) = 0;

    /**
     * Produces a ShardFilterer. May return null.
     */
    virtual std::unique_ptr<ShardFilterer> getShardFilterer(
        const boost::intrusive_ptr<ExpressionContext>& expCtx) const = 0;

    /**
     * Returns a vector of owned BSONObjs, each of which contains details of an in-progress
     * operation or, optionally, an idle connection. If userMode is kIncludeAllUsers, report
     * operations for all authenticated users; otherwise, report only the current user's operations.
     */
    virtual std::vector<BSONObj> getCurrentOps(
        const boost::intrusive_ptr<ExpressionContext>& expCtx,
        CurrentOpConnectionsMode connMode,
        CurrentOpSessionsMode sessionMode,
        CurrentOpUserMode userMode,
        CurrentOpTruncateMode,
        CurrentOpCursorMode,
        CurrentOpBacktraceMode) const = 0;

    /**
     * Returns the name of the local shard if sharding is enabled, or an empty string.
     */
    virtual std::string getShardName(OperationContext* opCtx) const = 0;

    /**
     * Returns the "host:port" string for this node.
     */
    virtual std::string getHostAndPort(OperationContext* opCtx) const = 0;

    /**
     * Returns the fields of the document key (in order) for the collection corresponding to 'uuid',
     * including the shard key and _id. If _id is not in the shard key, it is added last. If the
     * collection is not sharded or no longer exists, returns only _id. Also returns a boolean that
     * indicates whether the returned fields of the document key are final and will never change for
     * the given collection, either because the collection was dropped or has become sharded.
     *
     * This method is meant to be called from a mongod which owns at least one chunk for this
     * collection. It will inspect the CollectionShardingState, not the CatalogCache. If asked about
     * a collection not hosted on this shard, the answer will be incorrect.
     */
    virtual std::pair<std::vector<FieldPath>, bool> collectDocumentKeyFieldsForHostedCollection(
        OperationContext* opCtx, const NamespaceString&, UUID) const = 0;

    /**
     * Returns the fields of the document key (in order) for the collection 'nss', according to the
     * CatalogCache. The document key fields are the shard key (if sharded) and the _id (if not
     * already in the shard key). If _id is not in the shard key, it is added last. If the
     * collection is not sharded or is not known to exist, returns only _id. Does not refresh the
     * CatalogCache.
     */
    virtual std::vector<FieldPath> collectDocumentKeyFieldsActingAsRouter(
        OperationContext* opCtx, const NamespaceString&) const = 0;

    /**
     * Returns zero or one documents with the document key 'documentKey'. 'documentKey' is treated
     * as a unique identifier of a document, and may include an _id or all fields from the shard key
     * and an _id. Throws if more than one match was found. Returns boost::none if no matching
     * documents were found, including cases where the given namespace does not exist.
     */
    virtual boost::optional<Document> lookupSingleDocument(
        const boost::intrusive_ptr<ExpressionContext>& expCtx,
        const NamespaceString& nss,
        UUID,
        const Document& documentKey,
        boost::optional<BSONObj> readConcern,
        bool allowSpeculativeMajorityRead = false) = 0;

    /**
     * Returns a vector of all idle (non-pinned) local cursors.
     */
    virtual std::vector<GenericCursor> getIdleCursors(
        const boost::intrusive_ptr<ExpressionContext>& expCtx,
        CurrentOpUserMode userMode) const = 0;

    /**
     * The following methods forward to the BackupCursorHooks decorating the ServiceContext.
     */
    virtual BackupCursorState openBackupCursor(OperationContext* opCtx,
                                               const StorageEngine::BackupOptions& options) = 0;

    virtual void closeBackupCursor(OperationContext* opCtx, const UUID& backupId) = 0;

    virtual BackupCursorExtendState extendBackupCursor(OperationContext* opCtx,
                                                       const UUID& backupId,
                                                       const Timestamp& extendTo) = 0;

    /**
     * Returns a vector of BSON objects, where each entry in the vector describes a plan cache entry
     * inside the cache for the given namespace. Only those entries which match the supplied
     * MatchExpression are returned.
     */
    virtual std::vector<BSONObj> getMatchingPlanCacheEntryStats(OperationContext*,
                                                                const NamespaceString&,
                                                                const MatchExpression*) const = 0;

    /**
     * Returns true if there is an index on 'nss' with properties that will guarantee that a
     * document with non-array values for each of 'fieldPaths' will have at most one matching
     * document in 'nss'.
     *
     * Specifically, such an index must include all the fields, be unique, not be a partial index,
     * and match the operation's collation as given by 'expCtx'.
     */
    virtual bool fieldsHaveSupportingUniqueIndex(
        const boost::intrusive_ptr<ExpressionContext>& expCtx,
        const NamespaceString& nss,
        const std::set<FieldPath>& fieldPaths) const = 0;

    /**
     * Refreshes the CatalogCache entry for the namespace 'nss', and returns the epoch associated
     * with that namespace, if any. Note that this refresh will not necessarily force a new
     * request to be sent to the config servers. If another thread has already requested a refresh,
     * it will instead wait for that response.
     */
    virtual boost::optional<ChunkVersion> refreshAndGetCollectionVersion(
        const boost::intrusive_ptr<ExpressionContext>& expCtx,
        const NamespaceString& nss) const = 0;

    /**
     * Consults the CatalogCache to determine if this node has routing information for the
     * collection given by 'nss' which reports the same epoch as given by 'targetCollectionVersion'.
     * Major and minor versions in 'targetCollectionVersion' are ignored.
     */
    virtual void checkRoutingInfoEpochOrThrow(const boost::intrusive_ptr<ExpressionContext>& expCtx,
                                              const NamespaceString& nss,
                                              ChunkVersion targetCollectionVersion) const = 0;

    virtual std::unique_ptr<ResourceYielder> getResourceYielder() const = 0;

    /**
     * If the user supplied the 'fields' array, ensures that it can be used to uniquely identify a
     * document. Otherwise, picks a default unique key, which can be either the "_id" field, or
     * or a shard key, depending on the 'outputNs' collection type and the server type (mongod or
     * mongos). Also returns an optional ChunkVersion, populated with the version stored in the
     * sharding catalog when we asked for the shard key (on mongos only). On mongod, this is the
     * value of the 'targetCollectionVersion' parameter, which is the target shard version of the
     * collection, as sent by mongos.
     */
    virtual std::pair<std::set<FieldPath>, boost::optional<ChunkVersion>>
    ensureFieldsUniqueOrResolveDocumentKey(const boost::intrusive_ptr<ExpressionContext>& expCtx,
                                           boost::optional<std::vector<std::string>> fields,
                                           boost::optional<ChunkVersion> targetCollectionVersion,
                                           const NamespaceString& outputNs) const = 0;
};

}  // namespace mongo