summaryrefslogtreecommitdiff
path: root/src/mongo/db/timeseries/bucket_catalog/flat_bson.h
blob: 658ba003c0f190de872a46a2735ba7554a2ef931 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
/**
 *    Copyright (C) 2021-present MongoDB, Inc.
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the Server Side Public License, version 1,
 *    as published by MongoDB, Inc.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    Server Side Public License for more details.
 *
 *    You should have received a copy of the Server Side Public License
 *    along with this program. If not, see
 *    <http://www.mongodb.com/licensing/server-side-public-license>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the Server Side Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#pragma once

#include "mongo/base/string_data.h"
#include "mongo/bson/bsonelement.h"
#include "mongo/util/string_map.h"

#include <memory>
#include <string>
#include <vector>

namespace mongo::timeseries::bucket_catalog {

/**
 * Stores a BSON hierarchy in a flat contigous memory structure. Optimized for fast traversal
 * in lock-step of a BSONObj with the same internal field order. It does this at the expense of
 * insert performance which should be a rare operation when adding measurements to a timeseries
 * bucket. Usually we need to traverse the FlatBSONStore structure to check if we need to update any
 * values, or to check for compatibility with an incoming measurement.
 *
 * Provides search capability when field order is not what was expected and contains a fallback to
 * map lookup when linear search does not find elements within a constant search threshold.
 */
template <class Element, class Value>
class FlatBSONStore {
public:
    /**
     * Element data type
     */
    enum class Type : uint8_t {
        kValue,
        kObject,
        kArray,
        kUnset,
    };

    /**
     * Stored data for an Element
     */
    class Data {
    public:
        friend class FlatBSONStore;

        /**
         * DataType stored by this Data
         */
        Type type() const;

        /**
         * Flag to indicate if this FlatBSON::Data was updated since last clear.
         */
        bool updated() const;

        /**
         * Clear update flag.
         */
        void clearUpdated();

        /**
         * Value stored by this Data. Only valid to call if type is kValue.
         */
        const Value& value() const;

        /**
         * Sets the type of this Data.
         */
        void setObject();
        void setArray();
        void setUnset();
        void setValue(const BSONElement& elem);

    private:
        Value _value;
        Type _type = Type::kUnset;
        bool _updated = false;
    };

    class Obj;

    /**
     * Internal storage type to manage iterator offsets needed to implement a tree structure in a
     * flat buffer.
     */
    struct Entry {
    public:
        // Iterator offset to the entry after the last subelement
        uint32_t _offsetEnd;
        // Iterator offset to the parent entry
        uint32_t _offsetParent;
        // Data bearing element, exposed through public interfaces
        Element _element;
        // Map for faster searches. Contain mapping from field name to iterator offset to
        // subelement. Only instantiated when we've depleted our allowed linear search depth.
        std::unique_ptr<StringMap<uint32_t>> _fieldNameToIndex;
    };
    using Entries = std::vector<Entry>;

    /**
     * Forward iterator over subelements in an Obj.
     */
    class Iterator {
    public:
        friend class FlatBSONStore::Obj;

        using iterator_category = std::forward_iterator_tag;
        using difference_type = ptrdiff_t;
        using value_type = Element;
        using pointer = Element*;
        using reference = Element&;

        pointer operator->();
        reference operator*();

        Iterator& operator++();

        bool operator==(const Iterator& rhs) const;
        bool operator!=(const Iterator& rhs) const;

    private:
        Iterator(typename Entries::iterator pos);

        typename Entries::iterator _pos;
    };

    /**
     * Forward iterator over subelements in an ObjView.
     */
    class ConstIterator {
    public:
        friend class FlatBSONStore::Obj;

        using iterator_category = std::forward_iterator_tag;
        using difference_type = ptrdiff_t;
        using value_type = const Element;
        using pointer = const Element*;
        using reference = const Element&;

        pointer operator->() const;
        reference operator*() const;

        ConstIterator& operator++();

        bool operator==(const ConstIterator& rhs) const;
        bool operator!=(const ConstIterator& rhs) const;

    private:
        ConstIterator(typename std::vector<Entry>::const_iterator pos);

        typename Entries::const_iterator _pos;
    };


    /**
     * Represents an 'Object' within the FlatBSONStore. Analogous BSONObj for BSON. Provides
     * iteration, insertion and search capability for subelements.
     */
    class Obj {
    public:
        friend class FlatBSONStore;

        /**
         * Copying an Obj copies the internal position. It is not allowed to copy to an Obj owned by
         * a different FlatBSONStore.
         */
        Obj& operator=(const Obj& rhs);

        /**
         * Access to the element containing user-data such as field names and stored values.
         */
        Element& element();
        const Element& element() const;

        /**
         * Creates an Obj to the parent of this Obj.
         */
        Obj parent() const;

        /**
         * Creates an Obj for a subelement of this Obj.
         */
        Obj object(Iterator pos) const;

        /**
         * Returns the iterator position of this Obj.
         */
        Iterator iterator() const;

        /**
         * Searches this Obj for the subelement with the provided fieldName. Performs linear search
         * from start to end with a maximum search threshold. When the maximum search threshold is
         * reached this Object will perform all subsequent searches using a map lookup.
         *
         * The returned Iterator may be outside of the range [start, last).
         * If the element is not found 'last' is returned.
         */
        Iterator search(Iterator first, Iterator last, StringData fieldName);

        /**
         * As above but last = end()
         */
        Iterator search(Iterator first, StringData fieldName);

        /**
         * Insert a new element before the position 'pos'. Invalidates all previously returned
         * Iterators and Obj. This Obj remain valid.
         *
         * Returns an iterator to the newly inserted element together with the end iterator for this
         * Obj.
         */
        std::pair<Iterator, Iterator> insert(Iterator pos, std::string fieldName);

        /**
         * Iteration for subelements of this Obj.
         */
        Iterator begin();
        Iterator end();
        ConstIterator begin() const;
        ConstIterator end() const;

    private:
        Obj(std::vector<Entry>& entries, typename std::vector<Entry>::iterator pos);

        Entries& _entries;
        typename Entries::iterator _pos;
    };

    FlatBSONStore();

    /**
     * Access to the root Obj for this store.
     */
    Obj root() {
        return {entries, entries.begin()};
    }

private:
    Entries entries;
};

/**
 * Manages updating and extracting values in a FlatBSONStore.
 */
template <class Derived, class Element, class Value>
class FlatBSON {
public:
    enum class UpdateStatus { Updated, Failed, NoChange };
    static std::string updateStatusString(UpdateStatus updateStatus);

    /**
     * Updates the stored fields provided by 'doc', ignoring the 'metaField' field.
     */
    UpdateStatus update(const BSONObj& doc,
                        boost::optional<StringData> metaField,
                        const StringData::ComparatorInterface* stringComparator);

protected:
    // Helper for update() above to provide recursion of FlatBSONStore element together with a
    // BSONElement
    std::tuple<UpdateStatus,
               typename FlatBSONStore<Element, Value>::Iterator,
               typename FlatBSONStore<Element, Value>::Iterator>
    _update(typename FlatBSONStore<Element, Value>::Obj obj,
            BSONElement elem,
            typename Element::UpdateContext updateContext,
            const StringData::ComparatorInterface* stringComparator);

    // Helper to search and compare field names between FlatBSONStore::Obj and BSONObj.
    UpdateStatus _updateObj(typename FlatBSONStore<Element, Value>::Obj& obj,
                            const BSONObj& doc,
                            typename Element::UpdateContext updateContext,
                            const StringData::ComparatorInterface* stringComparator,
                            std::function<bool(StringData)> skipFieldFn);

    /**
     * Appends the BSONObj represented by the FlatBSONStore to the builder.
     */
    template <typename GetDataFn>
    void _append(typename FlatBSONStore<Element, Value>::Obj obj,
                 BSONObjBuilder* builder,
                 GetDataFn getData);
    template <typename GetDataFn>
    void _append(typename FlatBSONStore<Element, Value>::Obj obj,
                 BSONArrayBuilder* builder,
                 GetDataFn getData);

    /**
     * Appends updates, if any, to the builder. Returns whether any updates were appended.
     */
    template <typename GetDataFn>
    bool _appendUpdates(typename FlatBSONStore<Element, Value>::Obj obj,
                        BSONObjBuilder* builder,
                        GetDataFn getData);

    /**
     * Clears the '_updated' flag on this Iterator and all its subelements.
     */
    template <typename GetDataFn>
    void _clearUpdated(typename FlatBSONStore<Element, Value>::Iterator elem, GetDataFn getData);

    template <typename GetDataFn>
    static void _setTypeObject(typename FlatBSONStore<Element, Value>::Obj& obj, GetDataFn getData);

    template <typename GetDataFn>
    static void _setTypeArray(typename FlatBSONStore<Element, Value>::Obj& obj, GetDataFn getData);

    FlatBSONStore<Element, Value> _store;
};

/**
 * Buffer value for a Data of type kValue, storing a full BSONElement value.
 */
struct BSONElementValueBuffer {
    BSONElement get() const;
    void set(const BSONElement&);
    BSONType type() const;

private:
    std::unique_ptr<char[]> _buffer;
    int _size = 0;
};

/**
 * Base class for an element stored in a FlatBSONStore.
 */
class Element {
public:
    /**
     * Field name component
     */
    StringData fieldName() const;

    void setFieldName(std::string&& fieldName);

    /**
     * Returns true if this Element is only used for Array storage. Array field names are not
     * used and may be claimed by an Object.
     */
    bool isArrayFieldName() const;
    void claimArrayFieldNameForObject(std::string name);

private:
    std::string _fieldName;
};


class MinMaxElement;
typedef FlatBSONStore<MinMaxElement, BSONElementValueBuffer> MinMaxStore;

/**
 * Element representing both the min and max values for a given field path across all measurements
 * in a bucket.
 */
class MinMaxElement : public Element {
public:
    struct UpdateContext {
        bool min = true;
        bool max = true;
    };

    void initializeRoot();

    /**
     * Min data component access
     */
    MinMaxStore::Data& min();
    const MinMaxStore::Data& min() const;

    /**
     * Max data component access
     */
    MinMaxStore::Data& max();
    const MinMaxStore::Data& max() const;

private:
    MinMaxStore::Data _min;
    MinMaxStore::Data _max;
};

/**
 * Manages Min and Max values for timeseries measurements within a bucket.
 */
class MinMax : public FlatBSON<MinMax, MinMaxElement, BSONElementValueBuffer> {
    friend class FlatBSON<MinMax, MinMaxElement, BSONElementValueBuffer>;

public:
    /**
     * Returns the full min/max object.
     */
    BSONObj min();
    BSONObj max();

    /**
     * Returns the updates since the previous time this function or the min/max functions were
     * called in the format for an update op.
     */
    BSONObj minUpdates();
    BSONObj maxUpdates();

    /**
     * Generates and returns a MinMax object from the passed in min and max documents.
     */
    static MinMax parseFromBSON(const BSONObj& min,
                                const BSONObj& max,
                                const StringData::ComparatorInterface* stringComparator);

protected:
    static std::pair<UpdateStatus, MinMaxElement::UpdateContext> _shouldUpdateObj(
        MinMaxStore::Obj& obj, const BSONElement& elem, MinMaxElement::UpdateContext updateContext);

    static std::pair<UpdateStatus, MinMaxElement::UpdateContext> _shouldUpdateArr(
        MinMaxStore::Obj& obj, const BSONElement& elem, MinMaxElement::UpdateContext updateContext);

    static UpdateStatus _maybeUpdateValue(MinMaxStore::Obj& obj,
                                          const BSONElement& elem,
                                          MinMaxElement::UpdateContext updateContext,
                                          const StringData::ComparatorInterface* stringComparator);

private:
    /**
     * Helper for the recursive internal functions to access the min data component.
     */
    struct GetMin {
        MinMaxStore::Data& operator()(MinMaxElement& element) const {
            return element.min();
        }
        const MinMaxStore::Data& operator()(const MinMaxElement& element) const {
            return element.min();
        }
    };

    /**
     * Helper for the recursive internal functions to access the max data component.
     */
    struct GetMax {
        MinMaxStore::Data& operator()(MinMaxElement& element) const {
            return element.max();
        }
        const MinMaxStore::Data& operator()(const MinMaxElement& element) const {
            return element.max();
        }
    };
};

/**
 * Buffer value for a Data of type kValue, storing just the BSONElement type.
 */
struct BSONTypeValue {
    BSONElement get() const;
    void set(const BSONElement&);
    BSONType type() const;

private:
    BSONType _type;
};

class SchemaElement;
typedef FlatBSONStore<SchemaElement, BSONTypeValue> SchemaStore;

/**
 * Element representing schema type for a given field path for all measurements in a bucket.
 */
class SchemaElement : public Element {
public:
    struct UpdateContext {};

    void initializeRoot();

    /**
     * Min data component access
     */
    SchemaStore::Data& data();
    const SchemaStore::Data& data() const;

private:
    SchemaStore::Data _data;
};

/**
 * Manages schema data for timeseries measurements within a bucket.
 */
class Schema : public FlatBSON<Schema, SchemaElement, BSONTypeValue> {
    friend class FlatBSON<Schema, SchemaElement, BSONTypeValue>;

public:
    /**
     * Generates and returns a Schema object from the passed in min and max documents.
     */
    static Schema parseFromBSON(const BSONObj& min,
                                const BSONObj& max,
                                const StringData::ComparatorInterface* stringComparator);

protected:
    static std::pair<UpdateStatus, typename SchemaElement::UpdateContext> _shouldUpdateObj(
        SchemaStore::Obj& obj, const BSONElement& elem, SchemaElement::UpdateContext updateContext);

    static std::pair<UpdateStatus, typename SchemaElement::UpdateContext> _shouldUpdateArr(
        SchemaStore::Obj& obj, const BSONElement& elem, SchemaElement::UpdateContext updateContext);

    static UpdateStatus _maybeUpdateValue(SchemaStore::Obj& obj,
                                          const BSONElement& elem,
                                          SchemaElement::UpdateContext updateContext,
                                          const StringData::ComparatorInterface* stringComparator);

private:
    /**
     * Helper for the recursive internal functions to access the type data component.
     */
    struct GetData {
        SchemaStore::Data& operator()(SchemaElement& element) const {
            return element.data();
        }
        const SchemaStore::Data& operator()(const SchemaElement& element) const {
            return element.data();
        }
    };
};

}  // namespace mongo::timeseries::bucket_catalog