diff options
-rw-r--r-- | src/mongo/bson/ordering.h | 15 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/sbe_key_string_test.cpp | 209 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/slot.h | 15 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value.cpp | 43 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value.h | 23 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value_builder.h | 262 | ||||
-rw-r--r-- | src/mongo/db/storage/key_string.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/storage/key_string.h | 16 |
9 files changed, 595 insertions, 15 deletions
diff --git a/src/mongo/bson/ordering.h b/src/mongo/bson/ordering.h index 1521044d468..d1178623f91 100644 --- a/src/mongo/bson/ordering.h +++ b/src/mongo/bson/ordering.h @@ -39,11 +39,12 @@ namespace mongo { * Over time we should push this up higher and higher. */ class Ordering { - unsigned bits; - Ordering(unsigned b) : bits(b) {} + uint32_t bits; + Ordering(uint32_t b) : bits(b) {} public: static constexpr size_t kMaxCompoundIndexKeys = size_t{32}; + static_assert(kMaxCompoundIndexKeys == 8 * sizeof(bits)); static Ordering allAscending() { return {0}; @@ -62,24 +63,24 @@ public: uassert(ErrorCodes::Overflow, str::stream() << "Ordering offset is out of bounds: " << i, i >= 0 && static_cast<size_t>(i) < kMaxCompoundIndexKeys); - return ((1 << i) & bits) ? -1 : 1; + return ((1u << i) & bits) ? -1 : 1; } - unsigned descending(unsigned mask) const { + uint32_t descending(uint32_t mask) const { return bits & mask; } static Ordering make(const BSONObj& obj) { - unsigned b = 0; + uint32_t b = 0; BSONObjIterator k(obj); - unsigned n = 0; + uint32_t n = 0; while (1) { BSONElement e = k.next(); if (e.eoo()) break; uassert(13103, "too many compound keys", n < kMaxCompoundIndexKeys); if (e.number() < 0) - b |= (1 << n); + b |= (1u << n); n++; } return Ordering(b); diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript index 60b09f35a12..d0de3caf933 100644 --- a/src/mongo/db/exec/sbe/SConscript +++ b/src/mongo/db/exec/sbe/SConscript @@ -80,6 +80,7 @@ env.CppUnitTest( target='db_sbe_test', source=[ 'sbe_test.cpp', + 'sbe_key_string_test.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/db/concurrency/lock_manager', diff --git a/src/mongo/db/exec/sbe/sbe_key_string_test.cpp b/src/mongo/db/exec/sbe/sbe_key_string_test.cpp new file mode 100644 index 00000000000..b69ab6ad3e3 --- /dev/null +++ b/src/mongo/db/exec/sbe/sbe_key_string_test.cpp @@ -0,0 +1,209 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include <queue> + +#include "mongo/db/exec/sbe/parser/parser.h" +#include "mongo/db/exec/sbe/stages/co_scan.h" +#include "mongo/db/exec/sbe/stages/project.h" +#include "mongo/db/exec/sbe/vm/vm.h" +#include "mongo/db/storage/key_string.h" +#include "mongo/unittest/unittest.h" + +namespace mongo::sbe { + +namespace { +std::string valueDebugString(std::pair<value::TypeTags, value::Value> value) { + std::stringstream stream; + value::printValue(stream, value.first, value.second); + return stream.str(); +}; +} // namespace + +#define APPEND_TWICE(BOB, NAME, VALUE) \ + do { \ + BOB.append(NAME "-ascending", VALUE); \ + BOB.append(NAME "-descending", VALUE); \ + } while (false); + +TEST(SBEKeyStringTest, Basic) { + // Add interesting values to a BSON object. Note that we add each value twice: one will have an + // "ascending" ordering, and the other will have a "descending" ordering. + BSONObjBuilder bob; + APPEND_TWICE(bob, "zeroInt", 0); + APPEND_TWICE(bob, "oneByteInt", 0x10); + APPEND_TWICE(bob, "twoByteInt", 0x1010); + APPEND_TWICE(bob, "threeByteInt", 0x101010); + APPEND_TWICE(bob, "fourByteInt", 0x10101010); + APPEND_TWICE(bob, "fiveByteInt", int64_t{0x1010101010}); + APPEND_TWICE(bob, "sixByteInt", int64_t{0x101010101010}); + APPEND_TWICE(bob, "sevenByteInt", int64_t{0x10101010101010}); + APPEND_TWICE(bob, "eightByteInt", int64_t{0x1010101010101010}); + APPEND_TWICE(bob, "negativeZeroInt", 0); + APPEND_TWICE(bob, "negativeOneByteInt", -0x10); + APPEND_TWICE(bob, "negativeTwoByteInt", -0x1010); + APPEND_TWICE(bob, "negativeThreeByteInt", -0x101010); + APPEND_TWICE(bob, "negativeFourByteInt", -0x10101010); + APPEND_TWICE(bob, "negativeFiveByteInt", int64_t{-0x1010101010}); + APPEND_TWICE(bob, "negativeSixByteInt", int64_t{-0x101010101010}); + APPEND_TWICE(bob, "negativeSevenByteInt", int64_t{-0x10101010101010}); + APPEND_TWICE(bob, "negativeEightByteInt", int64_t{-0x1010101010101010}); + APPEND_TWICE(bob, "boolFalse", false); + APPEND_TWICE(bob, "boolTrue", true); + APPEND_TWICE(bob, "doubleVal", 123.45); + APPEND_TWICE(bob, "doubleInfinity", std::numeric_limits<double>::infinity()); + APPEND_TWICE(bob, "negativeDoubleInfinity", -std::numeric_limits<double>::infinity()); + APPEND_TWICE(bob, "decimalValue", Decimal128("01189998819991197253")); + APPEND_TWICE(bob, "decimalInfinity", Decimal128::kPositiveInfinity); + APPEND_TWICE(bob, "decimalNegativeInfinity", Decimal128::kNegativeInfinity); + APPEND_TWICE(bob, "shortString", "str"); + APPEND_TWICE(bob, "longString", "I am the very model of a modern major general."); + APPEND_TWICE(bob, "date", Date_t::fromMillisSinceEpoch(123)); + APPEND_TWICE(bob, "timestamp", Timestamp(123)); + + bob.appendNull("null-ascending"); + bob.appendNull("null-descending"); + auto testValues = bob.done(); + + // Copy each element from 'testValues' into a KeyString::Value. Each KeyString::Value has a + // maximum number of components, so we have to break the elements up into groups. + std::queue<std::tuple<KeyString::Value, Ordering, size_t>> keyStringQueue; + std::vector<BSONElement> elements; + testValues.elems(elements); + + for (size_t i = 0; i < elements.size(); i += Ordering::kMaxCompoundIndexKeys) { + auto endBound = std::min(i + Ordering::kMaxCompoundIndexKeys, elements.size()); + + BSONObjBuilder patternBob; + for (auto j = i; j < endBound; ++j) { + patternBob.append(elements[j].fieldNameStringData(), (j % 2 == 0) ? 1 : -1); + } + auto ordering = Ordering::make(patternBob.done()); + + KeyString::Builder keyStringBuilder(KeyString::Version::V1, ordering); + for (auto j = i; j < endBound; ++j) { + keyStringBuilder.appendBSONElement(elements[j]); + } + keyStringQueue.emplace(keyStringBuilder.getValueCopy(), ordering, endBound - i); + } + + // Set up an SBE expression that will compare one element in the 'testValues' BSON object with + // one of the KeyString components. + CompileCtx ctx; + CoScanStage emptyStage; + ctx.root = &emptyStage; + + // The expression takes three inputs: + // 1) the BSON object, + value::SlotId bsonObjSlot = 1; + value::ViewOfValueAccessor bsonObjAccessor; + ctx.pushCorrelated(bsonObjSlot, &bsonObjAccessor); + + // 2) the field name corresponding to the BSON element, + value::SlotId fieldNameSlot = 2; + value::OwnedValueAccessor fieldNameAccessor; + ctx.pushCorrelated(fieldNameSlot, &fieldNameAccessor); + + // 3) and the KeyString component. + value::SlotId keyStringComponentSlot = 3; + value::ViewOfValueAccessor keyStringComponentAccessor; + ctx.pushCorrelated(keyStringComponentSlot, &keyStringComponentAccessor); + + auto comparisonExpr = makeE<EPrimBinary>( + EPrimBinary::eq, + makeE<EVariable>(keyStringComponentSlot), + makeE<EFunction>("getField", + makeEs(makeE<EVariable>(bsonObjSlot), makeE<EVariable>(fieldNameSlot)))); + auto compiledComparison = comparisonExpr->compile(ctx); + + + bsonObjAccessor.reset(value::TypeTags::bsonObject, value::bitcastFrom(testValues.objdata())); + std::vector<sbe::value::ViewOfValueAccessor> keyStringValues; + BufBuilder builder; + for (auto&& element : testValues) { + while (keyStringValues.empty()) { + ASSERT(!keyStringQueue.empty()); + + auto [keyString, ordering, size] = keyStringQueue.front(); + keyStringQueue.pop(); + + builder.reset(); + keyStringValues.resize(size); + readKeyStringValueIntoAccessors(keyString, ordering, &builder, &keyStringValues); + } + + auto [componentTag, componentVal] = keyStringValues.front().getViewOfValue(); + keyStringComponentAccessor.reset(componentTag, componentVal); + keyStringValues.erase(keyStringValues.begin()); + + auto [fieldNameTag, fieldNameVal] = value::makeNewString(element.fieldName()); + fieldNameAccessor.reset(fieldNameTag, fieldNameVal); + + vm::ByteCode vm; + auto result = vm.runPredicate(compiledComparison.get()); + ASSERT(result) << "BSONElement (" << element << ") failed to match KeyString component (" + << valueDebugString(std::make_pair(componentTag, componentVal)) << ")"; + } + + ASSERT(keyStringValues.empty()); + ASSERT(keyStringQueue.empty()); +} + +TEST(SBEKeyStringTest, KeyComponentInclusion) { + KeyString::Builder keyStringBuilder(KeyString::Version::V1, KeyString::ALL_ASCENDING); + keyStringBuilder.appendNumberLong(12345); // Included + keyStringBuilder.appendString("I've information vegetable, animal, and mineral"_sd); + keyStringBuilder.appendString( + "I know the kings of England, and I quote the fights historical"_sd); // Included + keyStringBuilder.appendString("From Marathon to Waterloo, in order categorical"); + auto keyString = keyStringBuilder.getValueCopy(); + + IndexKeysInclusionSet indexKeysToInclude; + indexKeysToInclude.set(0); + indexKeysToInclude.set(2); + + std::vector<value::ViewOfValueAccessor> accessors; + accessors.resize(2); + + BufBuilder builder; + readKeyStringValueIntoAccessors( + keyString, KeyString::ALL_ASCENDING, &builder, &accessors, indexKeysToInclude); + + ASSERT(std::make_pair(value::TypeTags::NumberInt64, value::bitcastFrom(12345)) == + accessors[0].getViewOfValue()) + << "Incorrect value from accessor: " << valueDebugString(accessors[0].getViewOfValue()); + + auto value = accessors[1].getViewOfValue(); + ASSERT(value::isString(value.first) && + ("I know the kings of England, and I quote the fights historical" == + value::getStringView(value.first, value.second))) + << "Incorrect value from accessor: " << valueDebugString(value); +} + +} // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/values/slot.h b/src/mongo/db/exec/sbe/values/slot.h index 7eaeba98e34..f2dccba7245 100644 --- a/src/mongo/db/exec/sbe/values/slot.h +++ b/src/mongo/db/exec/sbe/values/slot.h @@ -380,6 +380,21 @@ struct MaterializedRowHasher { }; /** + * Read the components of the 'keyString' value and populate 'accessors' with those components. Some + * components are appended into the 'valueBufferBuilder' object's internal buffer, and the accessors + * populated with those values will hold pointers into the buffer. The 'valueBufferBuilder' is + * caller owned, and it can be reset and reused once it is safe to invalidate any accessors that + * might reference it. + */ +void readKeyStringValueIntoAccessors( + const KeyString::Value& keyString, + const Ordering& ordering, + BufBuilder* valueBufferBuilder, + std::vector<ViewOfValueAccessor>* accessors, + boost::optional<IndexKeysInclusionSet> indexKeysToInclude = boost::none); + + +/** * Commonly used containers. */ template <typename T> diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp index 4ed55ffb75c..aa9efdeb687 100644 --- a/src/mongo/db/exec/sbe/values/value.cpp +++ b/src/mongo/db/exec/sbe/values/value.cpp @@ -34,6 +34,7 @@ #include <pcrecpp.h> #include "mongo/db/exec/sbe/values/bson.h" +#include "mongo/db/exec/sbe/values/value_builder.h" #include "mongo/db/storage/key_string.h" namespace mongo { @@ -613,6 +614,48 @@ std::string_view ObjectEnumerator::getFieldName() const { } } +void readKeyStringValueIntoAccessors(const KeyString::Value& keyString, + const Ordering& ordering, + BufBuilder* valueBufferBuilder, + std::vector<ViewOfValueAccessor>* accessors, + boost::optional<IndexKeysInclusionSet> indexKeysToInclude) { + ValueBuilder valBuilder(valueBufferBuilder); + invariant(!indexKeysToInclude || indexKeysToInclude->count() == accessors->size()); + + BufReader reader(keyString.getBuffer(), keyString.getSize()); + KeyString::TypeBits typeBits(keyString.getTypeBits()); + KeyString::TypeBits::Reader typeBitsReader(typeBits); + + bool keepReading = true; + size_t componentIndex = 0; + do { + // In the edge case that 'componentIndex' indicates that we have already read + // 'kMaxCompoundIndexKeys' components, we expect that the next 'readSBEValue()' will return + // false (to indicate EOF), so the value of 'inverted' does not matter. + bool inverted = (componentIndex < Ordering::kMaxCompoundIndexKeys) + ? (ordering.get(componentIndex) == -1) + : false; + + keepReading = KeyString::readSBEValue( + &reader, &typeBitsReader, inverted, typeBits.version, &valBuilder); + + invariant(componentIndex < Ordering::kMaxCompoundIndexKeys || !keepReading); + + // If 'indexKeysToInclude' indicates that this index key component is not part of the + // projection, remove it from the list of values that will be fed to the 'accessors' list. + // Note that, even when we are excluding a key component, we can't skip the call to + // 'KeyString::readSBEValue()' because it is needed to advance the 'reader' and + // 'typeBitsReader' stream. + if (indexKeysToInclude && (componentIndex < Ordering::kMaxCompoundIndexKeys) && + !(*indexKeysToInclude)[componentIndex]) { + valBuilder.popValue(); + } + ++componentIndex; + } while (keepReading && valBuilder.numValues() < accessors->size()); + + valBuilder.readValues(accessors); +} + } // namespace value } // namespace sbe } // namespace mongo diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h index e783c7f4641..e4fdb27d510 100644 --- a/src/mongo/db/exec/sbe/values/value.h +++ b/src/mongo/db/exec/sbe/values/value.h @@ -32,6 +32,7 @@ #include <absl/container/flat_hash_map.h> #include <absl/container/flat_hash_set.h> #include <array> +#include <bitset> #include <cstdint> #include <ostream> #include <string> @@ -40,6 +41,7 @@ #include "mongo/base/data_type_endian.h" #include "mongo/base/data_view.h" +#include "mongo/bson/ordering.h" #include "mongo/platform/decimal128.h" #include "mongo/util/assert_util.h" @@ -58,6 +60,8 @@ namespace sbe { using FrameId = int64_t; using SpoolId = int64_t; +using IndexKeysInclusionSet = std::bitset<Ordering::kMaxCompoundIndexKeys>; + namespace value { /** @@ -462,15 +466,22 @@ inline std::string_view getStringView(TypeTags tag, Value& val) noexcept { MONGO_UNREACHABLE; } +inline std::pair<TypeTags, Value> makeSmallString(std::string_view input) { + size_t len = input.size(); + invariant(len < kSmallStringThreshold - 1); + + Value smallString; + // This is OK - we are aliasing to char*. + auto stringAlias = getSmallStringView(smallString); + memcpy(stringAlias, input.data(), len); + stringAlias[len] = 0; + return {TypeTags::StringSmall, smallString}; +} + inline std::pair<TypeTags, Value> makeNewString(std::string_view input) { size_t len = input.size(); if (len < kSmallStringThreshold - 1) { - Value smallString; - // This is OK - we are aliasing to char*. - auto stringAlias = getSmallStringView(smallString); - memcpy(stringAlias, input.data(), len); - stringAlias[len] = 0; - return {TypeTags::StringSmall, smallString}; + return makeSmallString(input); } else { auto str = new char[len + 1]; memcpy(str, input.data(), len); diff --git a/src/mongo/db/exec/sbe/values/value_builder.h b/src/mongo/db/exec/sbe/values/value_builder.h new file mode 100644 index 00000000000..31e87617e75 --- /dev/null +++ b/src/mongo/db/exec/sbe/values/value_builder.h @@ -0,0 +1,262 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <vector> + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/exec/sbe/values/slot.h" + +namespace mongo::sbe::value { + +/** + * A ValueBuilder can be used as a stream input (with a << operator), like a BSONObjBuilder. Instead + * of converting its inputs to BSON, it converts them to pairs of sbe::value::TypeTags and + * sbe::value::Value. During construction, these pairs are stored in the parallel '_tagList' and + * '_valList' arrays, as a "structure of arrays." + * + * After constructing the array, use the 'readValues()' method to populate a ViewOfValueAccessor + * vector. Some "views" (values that are pointers into other memory) are constructed by appending + * them to the 'valueBufferBuilder' provided to the constructor, and the internal buffer in that + * 'valueBufferBuilder' must be kept alive for as long as the accessors are to remain valid. + * + * Note that, in addition to destroying the 'valueBufferBuilder' or calling its 'reset()' or + * 'release()' function, appending more values to the buffer (either directly or via this + * ValueBuilder) can invalidate the underlying buffer memory. + * + * The 'valueBufferBuilder' is _not_ owned by the ValueBuilder class, so that the caller can reuse + * it without freeing and then reallocating its memory. + * + * NB: The ValueBuilder is specifically intended to adapt KeyString::Value conversion, which + * operates by appending results to a BSONObjBuilder, to instead convert to SBE values. It is not + * intended as a general-purpose tool for populating SBE accessors, and no new code should construct + * or use a ValueBuilder. + * + * Also note that some data types are not yet supported by SBE and appending them will throw a + * query-fatal error. + */ +class ValueBuilder { +public: + ValueBuilder(BufBuilder* valueBufferBuilder) : _valueBufferBuilder(valueBufferBuilder) {} + ValueBuilder(ValueBuilder& other) = delete; + + void append(const MinKeyLabeler& id) { + unsupportedType("minKey"); + } + + void append(const MaxKeyLabeler& id) { + unsupportedType("maxKey"); + } + + void append(const NullLabeler& id) { + appendValue(TypeTags::Null, 0); + } + + void append(const UndefinedLabeler& id) { + unsupportedType("undefined"); + } + + void append(const bool in) { + appendValue(TypeTags::Boolean, value::bitcastFrom(in)); + } + + void append(const Date_t& in) { + appendValue(TypeTags::Date, value::bitcastFrom(in.toMillisSinceEpoch())); + } + + void append(const Timestamp& in) { + appendValue(TypeTags::Timestamp, value::bitcastFrom(in.asLL())); + } + + void append(const OID& in) { + appendValueBufferOffset(TypeTags::ObjectId); + _valueBufferBuilder->appendBuf(in.view().view(), OID::kOIDSize); + } + + void append(const std::string& in) { + append(StringData{in}); + } + + void append(StringData in) { + if (in.size() < kSmallStringThreshold - 1) { + appendValue(makeSmallString(std::string_view(in.rawData(), in.size()))); + } else { + appendValueBufferOffset(TypeTags::StringBig); + + // Note: This _will_ write a NULL-terminated string, even if the input StringData does + // not have a NULL terminator. + _valueBufferBuilder->appendStr(in); + } + } + + void append(const BSONSymbol& in) { + unsupportedType("symbol"); + } + + void append(const BSONCode& in) { + unsupportedType("javascript"); + } + + void append(const BSONCodeWScope& in) { + unsupportedType("javascriptWithScope"); + } + + void append(const BSONBinData& in) { + unsupportedType("binData"); + } + + void append(const BSONRegEx& in) { + unsupportedType("regex"); + } + + void append(const BSONDBRef& in) { + unsupportedType("dbPointer"); + } + + void append(double in) { + appendValue(TypeTags::NumberDouble, value::bitcastFrom(in)); + } + + void append(const Decimal128& in) { + appendValueBufferOffset(TypeTags::NumberDecimal); + _valueBufferBuilder->appendStruct(in); + } + + void append(long long in) { + appendValue(TypeTags::NumberInt64, value::bitcastFrom(in)); + } + + void append(int32_t in) { + appendValue(TypeTags::NumberInt32, value::bitcastFrom(in)); + } + + BufBuilder& subobjStart() { + appendValueBufferOffset(TypeTags::bsonObject); + return *_valueBufferBuilder; + } + + BufBuilder& subarrayStart() { + appendValueBufferOffset(TypeTags::bsonArray); + return *_valueBufferBuilder; + } + + /** + * Remove the last value that was streamed to this ValueBuilder. + */ + void popValue() { + // If the removed value was a view of a string, object or array in the '_valueBufferBuilder' + // buffer, this value will remain in that buffer, even though we've removed it from the + // list. It will still get deallocated along with everything else when that buffer gets + // cleared or deleted, though, so there is no leak. + --_numValues; + } + + size_t numValues() const { + return _numValues; + } + + /** + * Populate the given list of accessors with TypeTags and Values. Some Values may be "views" + * into the memory constructed by the '_valueBufferBuilder' object, which is a caller-owned + * object that must remain valid for as long as these accessors are to remain valid. + */ + void readValues(std::vector<ViewOfValueAccessor>* accessors) { + auto bufferLen = _valueBufferBuilder->len(); + for (size_t i = 0; i < _numValues; ++i) { + auto tag = _tagList[i]; + auto val = _valList[i]; + + switch (tag) { + // As noted in the comments for the 'appendValueBufferOffset' function, some values + // are stored as offsets into the buffer during construction. This is where we + // convert those offsets into pointers. + case TypeTags::ObjectId: + case TypeTags::StringBig: + case TypeTags::NumberDecimal: + case TypeTags::bsonObject: + case TypeTags::bsonArray: { + auto offset = bitcastTo<decltype(bufferLen)>(val); + invariant(offset < bufferLen); + val = bitcastFrom(_valueBufferBuilder->buf() + offset); + break; + } + default: + // 'val' is already set correctly. + break; + } + + invariant(i < accessors->size()); + (*accessors)[i].reset(tag, val); + } + } + +private: + void unsupportedType(const char* typeDescription) { + uasserted(4935100, + str::stream() << "SBE does not support type present in index entry: " + << typeDescription); + } + + void appendValue(TypeTags tag, Value val) noexcept { + _tagList[_numValues] = tag; + _valList[_numValues] = val; + ++_numValues; + } + + void appendValue(std::pair<TypeTags, Value> in) noexcept { + appendValue(in.first, in.second); + } + + // For some TypeTags (e.g., StringBig), the corresponding Value is actually a pointer to the + // value's location in memory. In the case of the ValueBuilder, that memory will be within the + // buffer created by the '_valueBufferBuilder' object. + // + // During the building process, pointers into that memory can become invalidated, so instead of + // storing a pointer, we store an _offset_ into the under-construction buffer. Translation from + // offset to pointer occurs as part of the 'releaseValues()' function. + void appendValueBufferOffset(TypeTags tag) { + _tagList[_numValues] = tag; + _valList[_numValues] = value::bitcastFrom(_valueBufferBuilder->len()); + ++_numValues; + } + + std::array<TypeTags, Ordering::kMaxCompoundIndexKeys> _tagList; + std::array<Value, Ordering::kMaxCompoundIndexKeys> _valList; + size_t _numValues = 0; + + BufBuilder* _valueBufferBuilder; +}; + +template <typename T> +void operator<<(ValueBuilder& valBuilder, T operand) { + valBuilder.append(operand); +} + +} // namespace mongo::sbe::value
\ No newline at end of file diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp index 796995754ff..ec2dae09662 100644 --- a/src/mongo/db/storage/key_string.cpp +++ b/src/mongo/db/storage/key_string.cpp @@ -39,6 +39,7 @@ #include "mongo/base/data_cursor.h" #include "mongo/base/data_view.h" #include "mongo/bson/bson_depth.h" +#include "mongo/db/exec/sbe/values/value_builder.h" #include "mongo/platform/bits.h" #include "mongo/platform/strnlen.h" #include "mongo/util/decimal_counter.h" @@ -1212,12 +1213,13 @@ void BuilderBase<BufferT>::_appendBytes(const void* source, size_t bytes, bool i // ---------------------------------------------------------------------- namespace { +template <class Stream> void toBsonValue(uint8_t ctype, BufReader* reader, TypeBits::Reader* typeBits, bool inverted, Version version, - BSONObjBuilderValueStream* stream, + Stream* stream, uint32_t depth); void toBson(BufReader* reader, @@ -1276,12 +1278,13 @@ Decimal128 readDecimalContinuation(BufReader* reader, bool inverted, Decimal128 return num; } +template <class Stream> void toBsonValue(uint8_t ctype, BufReader* reader, TypeBits::Reader* typeBits, bool inverted, Version version, - BSONObjBuilderValueStream* stream, + Stream* stream, uint32_t depth) { keyStringAssert(ErrorCodes::Overflow, "KeyString encoding exceeded maximum allowable BSON nesting depth", @@ -2568,6 +2571,25 @@ int Value::compareWithTypeBits(const Value& other) const { return KeyString::compare(getBuffer(), other.getBuffer(), _buffer.size(), other._buffer.size()); } +bool readSBEValue(BufReader* reader, + TypeBits::Reader* typeBits, + bool inverted, + Version version, + sbe::value::ValueBuilder* valueBuilder) { + uint8_t ctype; + if (!reader->remaining() || (ctype = readType<uint8_t>(reader, inverted)) == kEnd) { + return false; + } + + // This function is only intended to read stored index entries. The 'kLess' and 'kGreater' + // "discriminator" types are used for querying and are never stored in an index. + invariant(ctype > kLess && ctype < kGreater); + + const uint32_t depth = 1; // This function only gets called for a top-level KeyString::Value. + toBsonValue(ctype, reader, typeBits, inverted, version, valueBuilder, depth); + return true; +} + template class BuilderBase<Builder>; template class BuilderBase<HeapBuilder>; template class BuilderBase<PooledBuilder>; diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h index 8582e9d2e30..a87eea8a6c1 100644 --- a/src/mongo/db/storage/key_string.h +++ b/src/mongo/db/storage/key_string.h @@ -48,6 +48,10 @@ namespace mongo { +namespace sbe::value { +class ValueBuilder; +} + namespace KeyString { enum class Version : uint8_t { V0 = 0, V1 = 1, kLatestVersion = V1 }; @@ -951,6 +955,18 @@ RecordId decodeRecordId(BufReader* reader); int compare(const char* leftBuf, const char* rightBuf, size_t leftSize, size_t rightSize); +/** + * Read one KeyString component from the given 'reader' and 'typeBits' inputs and stream it to the + * 'valueBuilder' object, which converts it to a "Slot-Based Execution" (SBE) representation. When + * no components remain in the KeyString, this function returns false and leaves 'valueBuilder' + * unmodified. + */ +bool readSBEValue(BufReader* reader, + TypeBits::Reader* typeBits, + bool inverted, + Version version, + sbe::value::ValueBuilder* valueBuilder); + template <class BufferT> template <class T> int BuilderBase<BufferT>::compare(const T& other) const { |