summaryrefslogtreecommitdiff
path: root/src/mongo/db/storage/sorted_data_interface.h
blob: b2719faa3bc5df01e7697977885795fadd71dd0e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
/**
 *    Copyright (C) 2018-present MongoDB, Inc.
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the Server Side Public License, version 1,
 *    as published by MongoDB, Inc.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    Server Side Public License for more details.
 *
 *    You should have received a copy of the Server Side Public License
 *    along with this program. If not, see
 *    <http://www.mongodb.com/licensing/server-side-public-license>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the Server Side Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#include <boost/optional/optional.hpp>
#include <boost/optional/optional_io.hpp>
#include <memory>

#include "mongo/db/jsobj.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/record_id.h"
#include "mongo/db/storage/ident.h"
#include "mongo/db/storage/index_entry_comparison.h"
#include "mongo/db/storage/key_format.h"
#include "mongo/db/storage/key_string.h"

#pragma once

namespace mongo {

class BSONObjBuilder;
class BucketDeletionNotification;
class SortedDataBuilderInterface;
struct IndexValidateResults;

/**
 * This is the uniform interface for storing indexes and supporting point queries as well as range
 * queries. The actual implementation is up to the storage engine. All the storage engines must
 * support an index key size up to the maximum document size.
 */
class SortedDataInterface : public Ident {
public:
    /**
     * Constructs a SortedDataInterface. The rsKeyFormat is the RecordId key format of the related
     * RecordStore.
     */
    SortedDataInterface(StringData ident,
                        KeyString::Version keyStringVersion,
                        Ordering ordering,
                        KeyFormat rsKeyFormat)
        : Ident(ident.toString()),
          _keyStringVersion(keyStringVersion),
          _ordering(ordering),
          _rsKeyFormat(rsKeyFormat) {}

    virtual ~SortedDataInterface() {}

    //
    // Data changes
    //

    /**
     * Return a bulk builder for 'this' index.
     *
     * Implementations can assume that 'this' index outlives its bulk
     * builder.
     *
     * @param opCtx the transaction under which keys are added to 'this' index
     * @param dupsAllowed true if duplicate keys are allowed, and false
     *        otherwise
     */
    virtual std::unique_ptr<SortedDataBuilderInterface> makeBulkBuilder(OperationContext* opCtx,
                                                                        bool dupsAllowed) = 0;

    /**
     * Insert an entry into the index with the specified KeyString, which must have a RecordId
     * appended to the end.
     *
     * @param opCtx the transaction under which the insert takes place
     * @param dupsAllowed true if duplicate keys are allowed, and false
     *        otherwise
     *
     * @return true if the key was inserted
     *
     *         false if 'dupsAllowed' is true the exact key (including RecordId) already existed in
     *         the index
     *
     *         ErrorCodes::DuplicateKey if 'dupsAllowed' is false and the prefix key (ignoring
     *         RecordId) already existed in the index
     */
    virtual StatusWith<bool> insert(OperationContext* opCtx,
                                    const KeyString::Value& keyString,
                                    bool dupsAllowed) = 0;

    /**
     * Remove the entry from the index with the specified KeyString, which must have a RecordId
     * appended to the end.
     *
     * @param opCtx the transaction under which the remove takes place
     * @param dupsAllowed true to enforce strict checks to ensure we only delete a key with an exact
     *        match, false otherwise
     */
    virtual void unindex(OperationContext* opCtx,
                         const KeyString::Value& keyString,
                         bool dupsAllowed) = 0;

    /**
     * Retuns the RecordId of the first key whose prefix matches this KeyString.
     *
     * This will not accept a KeyString with a Discriminator other than kInclusive.
     */
    virtual boost::optional<RecordId> findLoc(OperationContext* opCtx,
                                              const KeyString::Value& keyString) const = 0;

    /**
     * Return ErrorCodes::DuplicateKey if there is more than one occurence of 'KeyString' in this
     * index, and Status::OK() otherwise. This call is only allowed on a unique index, and will
     * invariant otherwise.
     *
     * @param opCtx the transaction under which this operation takes place
     */
    virtual Status dupKeyCheck(OperationContext* opCtx, const KeyString::Value& keyString) = 0;

    /**
     * Attempt to reduce the storage space used by this index via compaction. Only called if the
     * indexed record store supports compaction-in-place.
     */
    virtual Status compact(OperationContext* opCtx) {
        return Status::OK();
    }

    //
    // Information about the tree
    //

    /**
     * TODO: expose full set of args for testing?
     */
    virtual void fullValidate(OperationContext* opCtx,
                              long long* numKeysOut,
                              IndexValidateResults* fullResults) const = 0;

    virtual bool appendCustomStats(OperationContext* opCtx,
                                   BSONObjBuilder* output,
                                   double scale) const = 0;


    /**
     * Return the number of bytes consumed by 'this' index.
     *
     * @param opCtx the transaction under which this operation takes place
     *
     * @see IndexAccessMethod::getSpaceUsedBytes
     */
    virtual long long getSpaceUsedBytes(OperationContext* opCtx) const = 0;

    /**
     * The number of unused free bytes consumed by this index on disk.
     */
    virtual long long getFreeStorageBytes(OperationContext* opCtx) const = 0;

    /**
     * Return true if 'this' index is empty, and false otherwise.
     */
    virtual bool isEmpty(OperationContext* opCtx) = 0;

    /**
     * Return the number of entries in 'this' index.
     *
     * The default implementation should be overridden with a more
     * efficient one if at all possible.
     */
    virtual long long numEntries(OperationContext* opCtx) const {
        long long x = -1;
        fullValidate(opCtx, &x, nullptr);
        return x;
    }

    /*
     * Return the KeyString version for 'this' index.
     */
    KeyString::Version getKeyStringVersion() const {
        return _keyStringVersion;
    }

    /*
     * Return the ordering for 'this' index.
     */
    Ordering getOrdering() const {
        return _ordering;
    }

    /**
     * Returns the format of the associated RecordStore's RecordId keys.
     */
    KeyFormat rsKeyFormat() const {
        return _rsKeyFormat;
    }

    /**
     * Navigates over the sorted data.
     *
     * A cursor is constructed with a direction flag with the following effects:
     *      - The direction that next() moves.
     *      - If a seek method hits an exact match on key, forward cursors will be positioned on
     *        the first value for that key, reverse cursors on the last.
     *      - If a seek method or restore does not hit an exact match, cursors will be
     *        positioned on the closest position *after* the query in the direction of the
     *        search.
     *      - The end position is on the "far" side of the query. In a forward cursor that means
     *        that it is the lowest value for the key if the end is exclusive or the first entry
     *        past the key if the end is inclusive or there are no exact matches.
     *
     * A cursor is tied to a transaction, such as the OperationContext or a WriteUnitOfWork
     * inside that context. Any cursor acquired inside a transaction is invalid outside
     * of that transaction, instead use the save and restore methods to reestablish the cursor.
     *
     * Any method other than the save methods may throw WriteConflict exception. If that
     * happens, the cursor may not be used again until it has been saved and successfully
     * restored. If next() or restore() throw a WCE the cursor's position will be the same as
     * before the call (strong exception guarantee). All other methods leave the cursor in a
     * valid state but with an unspecified position (basic exception guarantee). All methods
     * only provide the basic guarantee for exceptions other than WCE.
     *
     * Any returned unowned BSON is only valid until the next call to any method on this
     * interface. The implementations must assume that passed-in unowned BSON is only valid for
     * the duration of the call.
     *
     * Implementations may override any default implementation if they can provide a more
     * efficient implementation.
     */
    class Cursor {
    public:
        /**
         * Tells methods that return an IndexKeyEntry what part of the data the caller is
         * interested in.
         *
         * Methods returning an engaged optional<T> will only return null RecordIds or empty
         * BSONObjs if they have been explicitly left out of the request.
         *
         * Implementations are allowed to return more data than requested, but not less.
         */
        enum RequestedInfo {
            // Only usable part of the return is whether it is engaged or not.
            kJustExistance = 0,
            // Key must be filled in.
            kWantKey = 1,
            // Loc must be fulled in.
            kWantLoc = 2,
            // Both must be returned.
            kKeyAndLoc = kWantKey | kWantLoc,
        };

        virtual ~Cursor() = default;


        /**
         * Sets the position to stop scanning. An empty key unsets the end position.
         *
         * If next() hits this position, or a seek method attempts to seek past it they
         * unposition the cursor and return boost::none.
         *
         * Setting the end position should be done before seeking since the current position, if
         * any, isn't checked.
         */
        virtual void setEndPosition(const BSONObj& key, bool inclusive) = 0;

        /**
         * Moves forward and returns the new data or boost::none if there is no more data.
         * If not positioned, returns boost::none.
         */
        virtual boost::optional<IndexKeyEntry> next(RequestedInfo parts = kKeyAndLoc) = 0;
        virtual boost::optional<KeyStringEntry> nextKeyString() = 0;

        //
        // Seeking
        //

        /**
         * Seeks to the provided keyString and returns the KeyStringEntry.
         * The provided keyString has discriminator information encoded.
         */
        virtual boost::optional<KeyStringEntry> seekForKeyString(
            const KeyString::Value& keyString) = 0;

        /**
         * Seeks to the provided keyString and returns the IndexKeyEntry.
         * The provided keyString has discriminator information encoded.
         */
        virtual boost::optional<IndexKeyEntry> seek(const KeyString::Value& keyString,
                                                    RequestedInfo parts = kKeyAndLoc) = 0;

        //
        // Saving and restoring state
        //

        /**
         * Prepares for state changes in underlying data in a way that allows the cursor's
         * current position to be restored.
         *
         * It is safe to call save multiple times in a row.
         * No other method (excluding destructor) may be called until successfully restored.
         */
        virtual void save() = 0;

        /**
         * Prepares for state changes in underlying data without necessarily saving the current
         * state.
         *
         * The cursor's position when restored is unspecified. Caller is expected to seek
         * following the restore.
         *
         * It is safe to call saveUnpositioned multiple times in a row.
         * No other method (excluding destructor) may be called until successfully restored.
         */
        virtual void saveUnpositioned() {
            save();
        }

        /**
         * Recovers from potential state changes in underlying data.
         *
         * If the former position no longer exists, a following call to next() will return the
         * next closest position in the direction of the scan, if any.
         *
         * This handles restoring after either save() or saveUnpositioned().
         */
        virtual void restore() = 0;

        /**
         * Detaches from the OperationContext. Releases storage-engine resources, unless
         * setSaveStorageCursorOnDetachFromOperationContext() has been set to true.
         */
        virtual void detachFromOperationContext() = 0;

        /**
         * Reattaches to the OperationContext and reacquires any storage-engine state if necessary.
         *
         * It is only legal to call this in the "detached" state. On return, the cursor may still
         * be a "saved" state if there was a prior call to save(). In this case, callers must still
         * call restore() to use this object.
         */
        virtual void reattachToOperationContext(OperationContext* opCtx) = 0;

        /**
         * Toggles behavior on whether to give up the underlying storage cursor (and any record
         * pointed to by it) on detachFromOperationContext(). This supports the query layer
         * retaining valid and positioned cursors across commands.
         */
        virtual void setSaveStorageCursorOnDetachFromOperationContext(bool) = 0;

        /**
         * Returns true if the record id can be extracted from the unique index key string.
         *
         * Unique indexes created prior to 4.2 may contain key strings that do not have
         * an embedded record id. We will have to look up the record for this key in the index
         * to obtain the record id.
         *
         * This is used primarily during validation to identify unique indexes with keys in the
         * old format due to rolling upgrades.
         */
        virtual bool isRecordIdAtEndOfKeyString() const = 0;
    };

    /**
     * Returns an unpositioned cursor over 'this' index.
     *
     * Implementations can assume that 'this' index outlives all cursors it produces.
     */
    virtual std::unique_ptr<Cursor> newCursor(OperationContext* opCtx,
                                              bool isForward = true) const = 0;

    //
    // Index creation
    //

    virtual Status initAsEmpty(OperationContext* opCtx) = 0;

    /**
     * Insert an entry into the index with the specified KeyString, without a RecordId
     * appended to the end.
     *
     * This generates index entries consistent with index keys created in the server prior to 4.2.
     *
     * For testing only.
     */
    virtual void insertWithRecordIdInValue_forTest(OperationContext* opCtx,
                                                   const KeyString::Value& keyString,
                                                   RecordId rid) = 0;

protected:
    const KeyString::Version _keyStringVersion;
    const Ordering _ordering;
    const KeyFormat _rsKeyFormat;
};

/**
 * A version-hiding wrapper around the bulk builder for the Btree.
 */
class SortedDataBuilderInterface {
public:
    virtual ~SortedDataBuilderInterface() {}

    /**
     * Adds 'keyString' to intermediate storage.
     *
     * 'keyString' must be > or >= the last key passed to this function (depends on _dupsAllowed).
     * If this is violated an error Status (ErrorCodes::InternalError) will be returned.
     *
     * Some storage engines require callers to manage a WriteUnitOfWork to perform these inserts
     * transactionally. Other storage engines do not perform inserts transactionally and will ignore
     * any parent WriteUnitOfWork.
     */
    virtual Status addKey(const KeyString::Value& keyString) = 0;
};

}  // namespace mongo