summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJustin Seyster <justin.seyster@mongodb.com>2022-04-14 20:38:01 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-04-16 00:09:55 +0000
commitc42aa3255d25b39d5ce3747434b58110b45475d3 (patch)
treee2fff770c0a7957b797a9aa3add9d636e2e8652b /src
parent7725a1e2d179947e8e00b9cba2d82ddfc1960f9a (diff)
downloadmongo-c42aa3255d25b39d5ce3747434b58110b45475d3.tar.gz
SERVER-63439 Add ColumnStore encoder for SBE values
Note: unit tests are in a follow-on commit
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/exec/sbe/SConscript1
-rw-r--r--src/mongo/db/exec/sbe/values/column_store_encoder.cpp35
-rw-r--r--src/mongo/db/exec/sbe/values/column_store_encoder.h161
-rw-r--r--src/mongo/db/storage/column_store.h21
-rw-r--r--src/mongo/platform/decimal128.cpp27
-rw-r--r--src/mongo/platform/decimal128.h39
6 files changed, 274 insertions, 10 deletions
diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript
index 755497e7b9c..3daa31cc237 100644
--- a/src/mongo/db/exec/sbe/SConscript
+++ b/src/mongo/db/exec/sbe/SConscript
@@ -62,6 +62,7 @@ sbeEnv.Library(
'util/debug_print.cpp',
'util/spilling.cpp',
'util/stage_results_printer.cpp',
+ 'values/column_store_encoder.cpp',
'values/sbe_pattern_value_cmp.cpp',
'values/slot.cpp',
'values/slot_printer.cpp',
diff --git a/src/mongo/db/exec/sbe/values/column_store_encoder.cpp b/src/mongo/db/exec/sbe/values/column_store_encoder.cpp
new file mode 100644
index 00000000000..fc33e459cc2
--- /dev/null
+++ b/src/mongo/db/exec/sbe/values/column_store_encoder.cpp
@@ -0,0 +1,35 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/exec/sbe/values/column_store_encoder.h"
+
+namespace mongo::sbe::value {
+const Object ColumnStoreEncoder::emptyObject{};
+const Array ColumnStoreEncoder::emptyArray{};
+} // namespace mongo::sbe::value
diff --git a/src/mongo/db/exec/sbe/values/column_store_encoder.h b/src/mongo/db/exec/sbe/values/column_store_encoder.h
new file mode 100644
index 00000000000..a816bf6704c
--- /dev/null
+++ b/src/mongo/db/exec/sbe/values/column_store_encoder.h
@@ -0,0 +1,161 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <vector>
+
+#include "mongo/db/exec/sbe/values/bson.h"
+#include "mongo/db/exec/sbe/values/slot.h"
+#include "mongo/db/storage/column_store.h"
+
+namespace mongo::sbe::value {
+/**
+ * This encoder provides a mechanism to represent BSON values as 'sbe::value' pairs, suitable for
+ * use in the SBE VM.
+ *
+ * Values returned by 'operator()' are _unowned_ and are invalidated by the next 'operator()' call
+ * or when the 'ColumnStoreEncoder' is destroyed. Additionally, modifying or destroying any string
+ * or BSONElement passed to 'operator()' can also invalidate the resulting value.
+ *
+ * Callers that need long-lived values should make "owned" copies.
+ */
+struct ColumnStoreEncoder {
+ using Out = boost::optional<std::pair<TypeTags, Value>>;
+
+ std::pair<TypeTags, Value> operator()(BSONElement value) {
+ return bson::convertFrom<true>(value);
+ }
+
+ std::pair<TypeTags, Value> operator()(NullLabeler) {
+ return {TypeTags::Null, 0};
+ }
+
+ std::pair<TypeTags, Value> operator()(MinKeyLabeler) {
+ return {TypeTags::MinKey, 0};
+ }
+
+ std::pair<TypeTags, Value> operator()(MaxKeyLabeler) {
+ return {TypeTags::MaxKey, 0};
+ }
+
+ std::pair<TypeTags, Value> operator()(BSONObj value) {
+ tassert(6343901, "Unexpected non-trivial object in columnar value", value.isEmpty());
+ return {TypeTags::Object, reinterpret_cast<Value>(&emptyObject)};
+ }
+
+ std::pair<TypeTags, Value> operator()(BSONArray value) {
+ tassert(6343902, "Unexpected non-trivial array in columnar value", value.isEmpty());
+ return {TypeTags::Array, reinterpret_cast<Value>(&emptyArray)};
+ }
+
+ std::pair<TypeTags, Value> operator()(bool value) {
+ return {TypeTags::Boolean, bitcastFrom<bool>(value)};
+ }
+
+ std::pair<TypeTags, Value> operator()(int32_t value) {
+ return {TypeTags::NumberInt32, bitcastFrom<int32_t>(value)};
+ }
+
+ std::pair<TypeTags, Value> operator()(int64_t value) {
+ return {TypeTags::NumberInt64, bitcastFrom<int64_t>(value)};
+ }
+
+ std::pair<TypeTags, Value> operator()(double value) {
+ return {TypeTags::NumberDouble, bitcastFrom<double>(value)};
+ }
+
+ std::pair<TypeTags, Value> operator()(StringData value) {
+ if (canUseSmallString(value)) {
+ auto [tag, newValue] = makeSmallString(value);
+ return {tag, newValue};
+ } else {
+ uassert(6343903,
+ "Expected large strings to be encoded as a BSONElement",
+ value.size() + 5 <= temporaryStorage.size());
+
+ auto outputPtr = temporaryStorage.data();
+ DataView(std::exchange(outputPtr, outputPtr + sizeof(int32_t)))
+ .write<LittleEndian<int32_t>>(value.size() + 1);
+ memcpy(
+ std::exchange(outputPtr, outputPtr + value.size()), value.rawData(), value.size());
+ DataView(outputPtr).write<LittleEndian<char>>('\0');
+
+ return {TypeTags::StringBig, bitcastFrom<const char*>(temporaryStorage.data())};
+ }
+ }
+
+ std::pair<TypeTags, Value> operator()(Decimal128 value) {
+ DataView(temporaryStorage.data()).write(value);
+ return {TypeTags::NumberDecimal, reinterpret_cast<Value>(temporaryStorage.data())};
+ }
+
+ std::pair<TypeTags, Value> operator()(const OID& value) {
+ auto oidBytes = value.view().view();
+ std::copy(oidBytes, oidBytes + OID::kOIDSize, temporaryStorage.begin());
+
+ return {TypeTags::ObjectId, reinterpret_cast<Value>(temporaryStorage.data())};
+ }
+
+ std::pair<TypeTags, Value> operator()(const UUID& value) {
+ // The 'ColumnStoreEncoder' returns a UUID by formatting it in temporary storage as a
+ // BSONBinData object.
+
+ // Write the payload length.
+ DataView binDataView(temporaryStorage.data());
+ std::ptrdiff_t offset = 0;
+ binDataView.write<LittleEndian<uint32_t>>(UUID::kNumBytes,
+ std::exchange(offset, offset + sizeof(uint32_t)));
+
+ // Write the BinDataType value.
+ binDataView.write<char>(newUUID, std::exchange(offset, offset + 1));
+
+ // Write the UUID payload.
+ static_assert(sizeof(value.data()) == UUID::kNumBytes);
+ binDataView.write(value.data(), offset); // No need to update 'offset' for the last write.
+
+ return {TypeTags::ObjectId, reinterpret_cast<Value>(temporaryStorage.data())};
+ }
+
+private:
+ static const Object emptyObject;
+ static const Array emptyArray;
+
+ static constexpr std::size_t kSizeOfDecimal = 2 * sizeof(long long);
+ static constexpr std::size_t kSizeOfUUIDBinData = sizeof(uint32_t) // Length field
+ + 1 // BinData type field
+ + UUID::kNumBytes; // UUID payload
+ static constexpr std::size_t kSizeOfStringBuffer = sizeof(uint32_t) // Length field
+ + ColumnStore::Bytes::TinySize::kStringMax // String
+ + 1; // Null terminator
+ static constexpr std::size_t kSizeOfTemporary = std::max<std::size_t>(
+ {kSizeOfDecimal, OID::kOIDSize, kSizeOfUUIDBinData, kSizeOfStringBuffer});
+ std::array<char, kSizeOfTemporary> temporaryStorage;
+};
+} // namespace mongo::sbe::value
diff --git a/src/mongo/db/storage/column_store.h b/src/mongo/db/storage/column_store.h
index aab7a719634..a0cb38189a4 100644
--- a/src/mongo/db/storage/column_store.h
+++ b/src/mongo/db/storage/column_store.h
@@ -347,11 +347,12 @@ struct SplitCellView {
template <class ValueEncoder>
auto subcellValuesGenerator(ValueEncoder&& valEncoder) const {
struct Cursor {
- typename ValueEncoder::Out nextValue() {
+ using Out = typename std::remove_reference_t<ValueEncoder>::Out;
+ Out nextValue() {
if (!elemPtr)
- return ValueEncoder::Out();
+ return Out();
if (elemPtr == end)
- return ValueEncoder::Out();
+ return Out();
invariant(elemPtr < end);
return decodeAndAdvance(elemPtr, encoder);
@@ -361,7 +362,7 @@ struct SplitCellView {
const char* end;
ValueEncoder encoder;
};
- return Cursor{firstElementPtr, arrInfo.rawData(), std::forward(valEncoder)};
+ return Cursor{firstElementPtr, arrInfo.rawData(), std::forward<ValueEncoder>(valEncoder)};
}
static SplitCellView parse(CellView cell) {
@@ -438,11 +439,11 @@ struct SplitCellView {
}
// TODO SERVER-63284: This would be more concisely expressed using the case range syntax.
- if (Bytes::kTinyIntMin >= byte && byte <= Bytes::kTinyIntMax) {
+ if (byte >= Bytes::kTinyIntMin && byte <= Bytes::kTinyIntMax) {
return encoder(int32_t(int8_t(byte - TinyNum::kTinyIntZero)));
- } else if (Bytes::kTinyLongMin >= byte && byte <= Bytes::kTinyLongMax) {
+ } else if (byte >= Bytes::kTinyLongMin && byte <= Bytes::kTinyLongMax) {
return encoder(int64_t(int8_t(byte - TinyNum::kTinyLongZero)));
- } else if (Bytes::kStringSizeMin >= byte && byte <= Bytes::kStringSizeMax) {
+ } else if (byte >= Bytes::kStringSizeMin && byte <= Bytes::kStringSizeMax) {
auto size = size_t(byte - Bytes::kStringSizeMin);
return encoder(StringData(std::exchange(ptr, ptr + size), size));
} else {
@@ -464,9 +465,9 @@ struct SplitCellView {
return encoder(true);
// Size and type encoded in byte, value follows.
case Bytes::kDecimal128: {
- auto val = ConstDataView(ptr).read<Decimal128>();
- ptr += 16;
- return encoder(val);
+ auto val = encoder(ConstDataView(ptr).read<Decimal128>());
+ ptr += DataType::Handler<Decimal128>::kSizeOfDecimal;
+ return val;
}
case Bytes::kDouble: {
auto val = ConstDataView(ptr).read<LittleEndian<double>>();
diff --git a/src/mongo/platform/decimal128.cpp b/src/mongo/platform/decimal128.cpp
index 0d12479f70a..630d2037b50 100644
--- a/src/mongo/platform/decimal128.cpp
+++ b/src/mongo/platform/decimal128.cpp
@@ -1044,4 +1044,31 @@ std::ostream& operator<<(std::ostream& stream, const Decimal128& value) {
return stream << value.toString();
}
+void DataType::Handler<Decimal128>::unsafeLoad(Decimal128* valueOut,
+ const char* ptr,
+ size_t* advanced) {
+ if (valueOut) {
+ ConstDataView decimalView(ptr);
+ uint64_t low = decimalView.read<LittleEndian<uint64_t>>();
+ uint64_t high = decimalView.read<LittleEndian<uint64_t>>(sizeof(uint64_t));
+ *valueOut = Decimal128(Decimal128::Value{low, high});
+ }
+
+ if (advanced) {
+ *advanced = kSizeOfDecimal;
+ }
+}
+
+void DataType::Handler<Decimal128>::unsafeStore(const Decimal128& valueIn,
+ char* ptr,
+ size_t* advanced) {
+ DataView decimalView(ptr);
+ decimalView.write<LittleEndian<uint64_t>>(valueIn.getValue().low64, 0);
+ decimalView.write<LittleEndian<uint64_t>>(valueIn.getValue().high64, sizeof(uint64_t));
+
+ if (advanced) {
+ *advanced = kSizeOfDecimal;
+ }
+}
+
} // namespace mongo
diff --git a/src/mongo/platform/decimal128.h b/src/mongo/platform/decimal128.h
index 64306df0e0c..f14e6b6d73b 100644
--- a/src/mongo/platform/decimal128.h
+++ b/src/mongo/platform/decimal128.h
@@ -39,6 +39,7 @@
#include "mongo/config.h"
+#include "mongo/base/data_type.h"
#include "mongo/util/assert_util.h"
namespace mongo {
@@ -635,4 +636,42 @@ inline bool operator!=(const Decimal128& lhs, const Decimal128& rhs) {
} // namespace literals
+template <>
+struct DataType::Handler<Decimal128> {
+ static void unsafeLoad(Decimal128* valueOut, const char* ptr, size_t* advanced);
+ static void unsafeStore(const Decimal128& valueIn, char* ptr, size_t* advanced);
+
+ static Status load(Decimal128* valueOut,
+ const char* ptr,
+ size_t length,
+ size_t* advanced,
+ std::ptrdiff_t debug_offset) {
+ if (length < kSizeOfDecimal) {
+ return Status(ErrorCodes::Overflow, "Buffer too small to hold Decimal128 value");
+ }
+
+ unsafeLoad(valueOut, ptr, advanced);
+ return Status::OK();
+ }
+
+ static Status store(const Decimal128& valueIn,
+ char* ptr,
+ size_t length,
+ size_t* advanced,
+ std::ptrdiff_t debug_offset) {
+ if (length < kSizeOfDecimal) {
+ return Status(ErrorCodes::Overflow, "Buffer too small to write Decimal128 value");
+ }
+
+ unsafeStore(valueIn, ptr, advanced);
+ return Status::OK();
+ }
+
+ static Decimal128 defaultConstruct() {
+ return Decimal128();
+ }
+
+ static constexpr size_t kSizeOfDecimal = 2 * sizeof(uint64_t);
+};
+
} // namespace mongo