summaryrefslogtreecommitdiff
path: root/src/mongo/db/storage/key_string.h
blob: 04b341521bffbe6fe8f834237e4bbf5e223ebd4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
// key_string.h

/**
 *    Copyright (C) 2014 MongoDB Inc.
 *
 *    This program is free software: you can redistribute it and/or  modify
 *    it under the terms of the GNU Affero General Public License, version 3,
 *    as published by the Free Software Foundation.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU Affero General Public License for more details.
 *
 *    You should have received a copy of the GNU Affero General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the GNU Affero General Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#pragma once

#include "mongo/bson/bsonobj.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/timestamp.h"
#include "mongo/bson/ordering.h"
#include "mongo/db/record_id.h"

namespace mongo {

    class KeyString {
    public:

        /**
         * Encodes info needed to restore the original BSONTypes from a KeyString. They cannot be
         * stored in place since we don't want them to affect the ordering (1 and 1.0 compare as
         * equal).
         */
        class TypeBits {
        public:
            // Sufficient bytes to encode extra type information for any BSON key that fits in 1KB.
            // The encoding format will need to change if we raise this limit.
            static const uint8_t kMaxBytesNeeded = 127;

            TypeBits() { reset(); }

            /**
             * If there are no bytes remaining, assumes AllZeros. Otherwise, reads bytes out of the
             * BufReader in the format described on the getBuffer() method.
             */
            void resetFromBuffer(BufReader* reader);
            static TypeBits fromBuffer(BufReader* reader) {
                TypeBits out;
                out.resetFromBuffer(reader);
                return out;
            }

            /**
             * If true, no bits have been set to one. This is true if no bits have been set at all.
             */
            bool isAllZeros() const { return _isAllZeros; }
            
            /**
             * These methods return a buffer and size which encodes all of the type bits in this
             * instance.
             *
             * Encoded format:
             * Case 1 (first byte has high bit set to 1):
             *     Remaining bits of first byte encode number of follow-up bytes that are data
             *     bytes. Note that _buf is always maintained in this format but these methods may
             *     return one of the other formats, if possible, by skipping over the first byte.
             *
             * Case 2 (first byte is 0x0):
             *     This encodes the "AllZeros" state which represents an infinite stream of bits set
             *     to 0. Callers may optionally encode this case as an empty buffer if they have
             *     another way to mark the end of the buffer. There are no follow-up bytes.
             *
             * Case 3 (first byte isn't 0x0 but has high bit set to 0):
             *     The first byte is the only data byte. This can represent any 7-bit sequence or an
             *     8-bit sequence if the 8th bit is 0, since the 8th bit is the same as the bit that
             *     is 1 if the first byte is the size byte. There are no follow-up bytes.
             *
             * Within data bytes (ie everything excluding the size byte if there is one), bits are
             * packed in from low to high.
             */
            const uint8_t* getBuffer() const { return getSize() == 1 ? _buf + 1 : _buf; }
            size_t getSize() const {
                if (_isAllZeros) { // Case 2
                    dassert(_buf[1] == 0);
                    return 1;
                }

                uint8_t rawSize = getSizeByte();
                dassert(rawSize >= 1); // 0 should be handled as isAllZeros.
                if (rawSize == 1 && !(_buf[1] & 0x80)) { // Case 3
                    return 1;
                }

                return rawSize + 1; // Case 1
            }
            
            //
            // Everything below is only for use by KeyString.
            //

            // Note: No space is used if all bits are 0 so the most common cases should be 0x0.
            static const uint8_t kString = 0x0;
            static const uint8_t kSymbol = 0x1;

            static const uint8_t kInt = 0x0;
            static const uint8_t kDouble = 0x1;
            static const uint8_t kLong = 0x2;
            static const uint8_t kNegativeZero = 0x3; // decodes as a double

            void reset() {
                _curBit = 0;
                _isAllZeros = true;
                setSizeByte(0);
                _buf[1] = 0;
            }

            void appendString() { appendBit(kString); }
            void appendSymbol() { appendBit(kSymbol); }

            void appendNumberDouble() { appendBit(kDouble & 1); appendBit(kDouble >> 1); }
            void appendNumberInt() { appendBit(kInt & 1); appendBit(kInt >> 1); }
            void appendNumberLong() { appendBit(kLong & 1); appendBit(kLong >> 1); }
            void appendNegativeZero() {
                appendBit(kNegativeZero & 1);
                appendBit(kNegativeZero >> 1);
            }

            class Reader {
            public:
                /**
                 * Passed in TypeBits must outlive this Reader instance.
                 */
                explicit Reader(const TypeBits& typeBits) : _curBit(0), _typeBits(typeBits) {}

                uint8_t readStringLike() { return readBit(); }
                uint8_t readNumeric() {
                    uint8_t lowBit = readBit();
                    return lowBit | (readBit() << 1);
                }

            private:
                uint8_t readBit();

                size_t _curBit;
                const TypeBits& _typeBits;
            };
            
        private:
            /**
             * size only includes data bytes, not the size byte itself.
             */
            uint8_t getSizeByte() const { return _buf[0] & 0x3f; }
            void setSizeByte(uint8_t size) {
                dassert(size < kMaxBytesNeeded);
                _buf[0] = 0x80 | size;
            }

            void appendBit(uint8_t oneOrZero);

            size_t _curBit;
            bool _isAllZeros;

            // See getBuffer()/getSize() documentation for a description of how data is encoded.
            // Currently whole buffer is copied in default copy methods. If they ever show up as hot
            // in profiling, we should add copy operations that only copy the parts of _buf that are
            // in use.
            uint8_t _buf[1/*size*/ + kMaxBytesNeeded];
        };

        enum Discriminator {
            kInclusive, // Anything to be stored in an index must use this.
            kExclusiveBefore,
            kExclusiveAfter,
        };

        KeyString() {}

        KeyString(const BSONObj& obj, Ordering ord, RecordId recordId) {
            resetToKey(obj, ord, recordId);
        }

        KeyString(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive) {
            resetToKey(obj, ord, discriminator);
        }

        explicit KeyString(RecordId rid) {
            appendRecordId(rid);
        }

        static BSONObj toBson(StringData data, Ordering ord, const TypeBits& types);
        static BSONObj toBson(const char* buffer, size_t len, Ordering ord,
                              const TypeBits& types);

        /**
         * Decodes a RecordId from the end of a buffer.
         */
        static RecordId decodeRecordIdAtEnd(const void* buf, size_t size);

        /**
         * Decodes a RecordId, consuming all bytes needed from reader.
         */
        static RecordId decodeRecordId(BufReader* reader);

        void appendRecordId(RecordId loc);
        void appendTypeBits(const TypeBits& bits);

        /**
         * Resets to an empty state.
         * Equivalent to but faster than *this = KeyString()
         */
        void resetToEmpty() {
            _buffer.reset();
            _typeBits.reset();
        }

        void resetToKey(const BSONObj& obj, Ordering ord, RecordId recordId);
        void resetToKey(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive);
        void resetFromBuffer(const void* buffer, size_t size) {
            _buffer.reset();
            memcpy(_buffer.skip(size), buffer, size);
        }

        const char* getBuffer() const { return _buffer.buf(); }
        size_t getSize() const { return _buffer.len(); }
        bool isEmpty() const { return _buffer.len() == 0; }

        const TypeBits& getTypeBits() const { return _typeBits; }

        int compare(const KeyString& other) const;

        /**
         * @return a hex encoding of this key
         */
        std::string toString() const;

    private:

        void _appendAllElementsForIndexing(const BSONObj& obj, Ordering ord,
                                           Discriminator discriminator);

        void _appendBool(bool val, bool invert);
        void _appendDate(Date_t val, bool invert);
        void _appendTimestamp(Timestamp val, bool invert);
        void _appendOID(OID val, bool invert);
        void _appendString(StringData val, bool invert);
        void _appendSymbol(StringData val, bool invert);
        void _appendCode(StringData val, bool invert);
        void _appendCodeWString(const BSONCodeWScope& val, bool invert);
        void _appendBinData(const BSONBinData& val, bool invert);
        void _appendRegex(const BSONRegEx& val, bool invert);
        void _appendDBRef(const BSONDBRef& val, bool invert);
        void _appendArray(const BSONArray& val, bool invert);
        void _appendObject(const BSONObj& val, bool invert);
        void _appendNumberDouble(const double num, bool invert);
        void _appendNumberLong(const long long num, bool invert);
        void _appendNumberInt(const int num, bool invert);

        /**
         * @param name - optional, can be NULL
         *              if NULL, not included in encoding
         *              if not NULL, put in after type, before value
         */
        void _appendBsonValue(const BSONElement& elem,
                              bool invert,
                              const StringData* name);

        void _appendStringLike(StringData str, bool invert);
        void _appendBson(const BSONObj& obj, bool invert);
        void _appendSmallDouble(double value, bool invert);
        void _appendLargeDouble(double value, bool invert);
        void _appendInteger(const long long num, bool invert);
        void _appendPreshiftedIntegerPortion(uint64_t value, bool isNegative, bool invert);

        template <typename T> void _append(const T& thing, bool invert);
        void _appendBytes(const void* source, size_t bytes, bool invert);

        TypeBits _typeBits;
        StackBufBuilder _buffer;
    };

    inline bool operator<(const KeyString& lhs, const KeyString& rhs) {
        return lhs.compare(rhs) < 0;
    }

    inline bool operator<=(const KeyString& lhs, const KeyString& rhs) {
        return lhs.compare(rhs) <= 0;
    }

    inline bool operator==(const KeyString& lhs, const KeyString& rhs) {
        return lhs.compare(rhs) == 0;
    }

    inline bool operator>(const KeyString& lhs, const KeyString& rhs) {
        return lhs.compare(rhs) > 0;
    }

    inline bool operator>=(const KeyString& lhs, const KeyString& rhs) {
        return lhs.compare(rhs) >= 0;
    }

    inline bool operator!=(const KeyString& lhs, const KeyString& rhs) {
        return !(lhs == rhs);
    }

    inline std::ostream& operator<<(std::ostream& stream, const KeyString& value) {
        return stream << value.toString();
    }

} // namespace mongo