diff options
author | Justin Seyster <justin.seyster@mongodb.com> | 2022-04-14 20:38:01 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-04-16 00:09:55 +0000 |
commit | c42aa3255d25b39d5ce3747434b58110b45475d3 (patch) | |
tree | e2fff770c0a7957b797a9aa3add9d636e2e8652b /src | |
parent | 7725a1e2d179947e8e00b9cba2d82ddfc1960f9a (diff) | |
download | mongo-c42aa3255d25b39d5ce3747434b58110b45475d3.tar.gz |
SERVER-63439 Add ColumnStore encoder for SBE values
Note: unit tests are in a follow-on commit
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/exec/sbe/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/column_store_encoder.cpp | 35 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/column_store_encoder.h | 161 | ||||
-rw-r--r-- | src/mongo/db/storage/column_store.h | 21 | ||||
-rw-r--r-- | src/mongo/platform/decimal128.cpp | 27 | ||||
-rw-r--r-- | src/mongo/platform/decimal128.h | 39 |
6 files changed, 274 insertions, 10 deletions
diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript index 755497e7b9c..3daa31cc237 100644 --- a/src/mongo/db/exec/sbe/SConscript +++ b/src/mongo/db/exec/sbe/SConscript @@ -62,6 +62,7 @@ sbeEnv.Library( 'util/debug_print.cpp', 'util/spilling.cpp', 'util/stage_results_printer.cpp', + 'values/column_store_encoder.cpp', 'values/sbe_pattern_value_cmp.cpp', 'values/slot.cpp', 'values/slot_printer.cpp', diff --git a/src/mongo/db/exec/sbe/values/column_store_encoder.cpp b/src/mongo/db/exec/sbe/values/column_store_encoder.cpp new file mode 100644 index 00000000000..fc33e459cc2 --- /dev/null +++ b/src/mongo/db/exec/sbe/values/column_store_encoder.cpp @@ -0,0 +1,35 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/exec/sbe/values/column_store_encoder.h" + +namespace mongo::sbe::value { +const Object ColumnStoreEncoder::emptyObject{}; +const Array ColumnStoreEncoder::emptyArray{}; +} // namespace mongo::sbe::value diff --git a/src/mongo/db/exec/sbe/values/column_store_encoder.h b/src/mongo/db/exec/sbe/values/column_store_encoder.h new file mode 100644 index 00000000000..a816bf6704c --- /dev/null +++ b/src/mongo/db/exec/sbe/values/column_store_encoder.h @@ -0,0 +1,161 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <vector> + +#include "mongo/db/exec/sbe/values/bson.h" +#include "mongo/db/exec/sbe/values/slot.h" +#include "mongo/db/storage/column_store.h" + +namespace mongo::sbe::value { +/** + * This encoder provides a mechanism to represent BSON values as 'sbe::value' pairs, suitable for + * use in the SBE VM. + * + * Values returned by 'operator()' are _unowned_ and are invalidated by the next 'operator()' call + * or when the 'ColumnStoreEncoder' is destroyed. Additionally, modifying or destroying any string + * or BSONElement passed to 'operator()' can also invalidate the resulting value. + * + * Callers that need long-lived values should make "owned" copies. + */ +struct ColumnStoreEncoder { + using Out = boost::optional<std::pair<TypeTags, Value>>; + + std::pair<TypeTags, Value> operator()(BSONElement value) { + return bson::convertFrom<true>(value); + } + + std::pair<TypeTags, Value> operator()(NullLabeler) { + return {TypeTags::Null, 0}; + } + + std::pair<TypeTags, Value> operator()(MinKeyLabeler) { + return {TypeTags::MinKey, 0}; + } + + std::pair<TypeTags, Value> operator()(MaxKeyLabeler) { + return {TypeTags::MaxKey, 0}; + } + + std::pair<TypeTags, Value> operator()(BSONObj value) { + tassert(6343901, "Unexpected non-trivial object in columnar value", value.isEmpty()); + return {TypeTags::Object, reinterpret_cast<Value>(&emptyObject)}; + } + + std::pair<TypeTags, Value> operator()(BSONArray value) { + tassert(6343902, "Unexpected non-trivial array in columnar value", value.isEmpty()); + return {TypeTags::Array, reinterpret_cast<Value>(&emptyArray)}; + } + + std::pair<TypeTags, Value> operator()(bool value) { + return {TypeTags::Boolean, bitcastFrom<bool>(value)}; + } + + std::pair<TypeTags, Value> operator()(int32_t value) { + return {TypeTags::NumberInt32, bitcastFrom<int32_t>(value)}; + } + + std::pair<TypeTags, Value> operator()(int64_t value) { + return {TypeTags::NumberInt64, bitcastFrom<int64_t>(value)}; + } + + std::pair<TypeTags, Value> operator()(double value) { + return {TypeTags::NumberDouble, bitcastFrom<double>(value)}; + } + + std::pair<TypeTags, Value> operator()(StringData value) { + if (canUseSmallString(value)) { + auto [tag, newValue] = makeSmallString(value); + return {tag, newValue}; + } else { + uassert(6343903, + "Expected large strings to be encoded as a BSONElement", + value.size() + 5 <= temporaryStorage.size()); + + auto outputPtr = temporaryStorage.data(); + DataView(std::exchange(outputPtr, outputPtr + sizeof(int32_t))) + .write<LittleEndian<int32_t>>(value.size() + 1); + memcpy( + std::exchange(outputPtr, outputPtr + value.size()), value.rawData(), value.size()); + DataView(outputPtr).write<LittleEndian<char>>('\0'); + + return {TypeTags::StringBig, bitcastFrom<const char*>(temporaryStorage.data())}; + } + } + + std::pair<TypeTags, Value> operator()(Decimal128 value) { + DataView(temporaryStorage.data()).write(value); + return {TypeTags::NumberDecimal, reinterpret_cast<Value>(temporaryStorage.data())}; + } + + std::pair<TypeTags, Value> operator()(const OID& value) { + auto oidBytes = value.view().view(); + std::copy(oidBytes, oidBytes + OID::kOIDSize, temporaryStorage.begin()); + + return {TypeTags::ObjectId, reinterpret_cast<Value>(temporaryStorage.data())}; + } + + std::pair<TypeTags, Value> operator()(const UUID& value) { + // The 'ColumnStoreEncoder' returns a UUID by formatting it in temporary storage as a + // BSONBinData object. + + // Write the payload length. + DataView binDataView(temporaryStorage.data()); + std::ptrdiff_t offset = 0; + binDataView.write<LittleEndian<uint32_t>>(UUID::kNumBytes, + std::exchange(offset, offset + sizeof(uint32_t))); + + // Write the BinDataType value. + binDataView.write<char>(newUUID, std::exchange(offset, offset + 1)); + + // Write the UUID payload. + static_assert(sizeof(value.data()) == UUID::kNumBytes); + binDataView.write(value.data(), offset); // No need to update 'offset' for the last write. + + return {TypeTags::ObjectId, reinterpret_cast<Value>(temporaryStorage.data())}; + } + +private: + static const Object emptyObject; + static const Array emptyArray; + + static constexpr std::size_t kSizeOfDecimal = 2 * sizeof(long long); + static constexpr std::size_t kSizeOfUUIDBinData = sizeof(uint32_t) // Length field + + 1 // BinData type field + + UUID::kNumBytes; // UUID payload + static constexpr std::size_t kSizeOfStringBuffer = sizeof(uint32_t) // Length field + + ColumnStore::Bytes::TinySize::kStringMax // String + + 1; // Null terminator + static constexpr std::size_t kSizeOfTemporary = std::max<std::size_t>( + {kSizeOfDecimal, OID::kOIDSize, kSizeOfUUIDBinData, kSizeOfStringBuffer}); + std::array<char, kSizeOfTemporary> temporaryStorage; +}; +} // namespace mongo::sbe::value diff --git a/src/mongo/db/storage/column_store.h b/src/mongo/db/storage/column_store.h index aab7a719634..a0cb38189a4 100644 --- a/src/mongo/db/storage/column_store.h +++ b/src/mongo/db/storage/column_store.h @@ -347,11 +347,12 @@ struct SplitCellView { template <class ValueEncoder> auto subcellValuesGenerator(ValueEncoder&& valEncoder) const { struct Cursor { - typename ValueEncoder::Out nextValue() { + using Out = typename std::remove_reference_t<ValueEncoder>::Out; + Out nextValue() { if (!elemPtr) - return ValueEncoder::Out(); + return Out(); if (elemPtr == end) - return ValueEncoder::Out(); + return Out(); invariant(elemPtr < end); return decodeAndAdvance(elemPtr, encoder); @@ -361,7 +362,7 @@ struct SplitCellView { const char* end; ValueEncoder encoder; }; - return Cursor{firstElementPtr, arrInfo.rawData(), std::forward(valEncoder)}; + return Cursor{firstElementPtr, arrInfo.rawData(), std::forward<ValueEncoder>(valEncoder)}; } static SplitCellView parse(CellView cell) { @@ -438,11 +439,11 @@ struct SplitCellView { } // TODO SERVER-63284: This would be more concisely expressed using the case range syntax. - if (Bytes::kTinyIntMin >= byte && byte <= Bytes::kTinyIntMax) { + if (byte >= Bytes::kTinyIntMin && byte <= Bytes::kTinyIntMax) { return encoder(int32_t(int8_t(byte - TinyNum::kTinyIntZero))); - } else if (Bytes::kTinyLongMin >= byte && byte <= Bytes::kTinyLongMax) { + } else if (byte >= Bytes::kTinyLongMin && byte <= Bytes::kTinyLongMax) { return encoder(int64_t(int8_t(byte - TinyNum::kTinyLongZero))); - } else if (Bytes::kStringSizeMin >= byte && byte <= Bytes::kStringSizeMax) { + } else if (byte >= Bytes::kStringSizeMin && byte <= Bytes::kStringSizeMax) { auto size = size_t(byte - Bytes::kStringSizeMin); return encoder(StringData(std::exchange(ptr, ptr + size), size)); } else { @@ -464,9 +465,9 @@ struct SplitCellView { return encoder(true); // Size and type encoded in byte, value follows. case Bytes::kDecimal128: { - auto val = ConstDataView(ptr).read<Decimal128>(); - ptr += 16; - return encoder(val); + auto val = encoder(ConstDataView(ptr).read<Decimal128>()); + ptr += DataType::Handler<Decimal128>::kSizeOfDecimal; + return val; } case Bytes::kDouble: { auto val = ConstDataView(ptr).read<LittleEndian<double>>(); diff --git a/src/mongo/platform/decimal128.cpp b/src/mongo/platform/decimal128.cpp index 0d12479f70a..630d2037b50 100644 --- a/src/mongo/platform/decimal128.cpp +++ b/src/mongo/platform/decimal128.cpp @@ -1044,4 +1044,31 @@ std::ostream& operator<<(std::ostream& stream, const Decimal128& value) { return stream << value.toString(); } +void DataType::Handler<Decimal128>::unsafeLoad(Decimal128* valueOut, + const char* ptr, + size_t* advanced) { + if (valueOut) { + ConstDataView decimalView(ptr); + uint64_t low = decimalView.read<LittleEndian<uint64_t>>(); + uint64_t high = decimalView.read<LittleEndian<uint64_t>>(sizeof(uint64_t)); + *valueOut = Decimal128(Decimal128::Value{low, high}); + } + + if (advanced) { + *advanced = kSizeOfDecimal; + } +} + +void DataType::Handler<Decimal128>::unsafeStore(const Decimal128& valueIn, + char* ptr, + size_t* advanced) { + DataView decimalView(ptr); + decimalView.write<LittleEndian<uint64_t>>(valueIn.getValue().low64, 0); + decimalView.write<LittleEndian<uint64_t>>(valueIn.getValue().high64, sizeof(uint64_t)); + + if (advanced) { + *advanced = kSizeOfDecimal; + } +} + } // namespace mongo diff --git a/src/mongo/platform/decimal128.h b/src/mongo/platform/decimal128.h index 64306df0e0c..f14e6b6d73b 100644 --- a/src/mongo/platform/decimal128.h +++ b/src/mongo/platform/decimal128.h @@ -39,6 +39,7 @@ #include "mongo/config.h" +#include "mongo/base/data_type.h" #include "mongo/util/assert_util.h" namespace mongo { @@ -635,4 +636,42 @@ inline bool operator!=(const Decimal128& lhs, const Decimal128& rhs) { } // namespace literals +template <> +struct DataType::Handler<Decimal128> { + static void unsafeLoad(Decimal128* valueOut, const char* ptr, size_t* advanced); + static void unsafeStore(const Decimal128& valueIn, char* ptr, size_t* advanced); + + static Status load(Decimal128* valueOut, + const char* ptr, + size_t length, + size_t* advanced, + std::ptrdiff_t debug_offset) { + if (length < kSizeOfDecimal) { + return Status(ErrorCodes::Overflow, "Buffer too small to hold Decimal128 value"); + } + + unsafeLoad(valueOut, ptr, advanced); + return Status::OK(); + } + + static Status store(const Decimal128& valueIn, + char* ptr, + size_t length, + size_t* advanced, + std::ptrdiff_t debug_offset) { + if (length < kSizeOfDecimal) { + return Status(ErrorCodes::Overflow, "Buffer too small to write Decimal128 value"); + } + + unsafeStore(valueIn, ptr, advanced); + return Status::OK(); + } + + static Decimal128 defaultConstruct() { + return Decimal128(); + } + + static constexpr size_t kSizeOfDecimal = 2 * sizeof(uint64_t); +}; + } // namespace mongo |