diff options
-rw-r--r-- | src/mongo/bson/util/SConscript | 7 | ||||
-rw-r--r-- | src/mongo/bson/util/bsoncolumn.cpp | 30 | ||||
-rw-r--r-- | src/mongo/bson/util/bsoncolumn.h | 30 | ||||
-rw-r--r-- | src/mongo/bson/util/bsoncolumn_test.cpp | 326 | ||||
-rw-r--r-- | src/mongo/bson/util/bsoncolumnbuilder.cpp | 169 | ||||
-rw-r--r-- | src/mongo/bson/util/bsoncolumnbuilder.h | 103 |
6 files changed, 663 insertions, 2 deletions
diff --git a/src/mongo/bson/util/SConscript b/src/mongo/bson/util/SConscript index 3f6e2af1018..8cb379fc723 100644 --- a/src/mongo/bson/util/SConscript +++ b/src/mongo/bson/util/SConscript @@ -15,8 +15,10 @@ env.Library( ) env.Library( - target='type_compressor', + target='bson_column', source=[ + 'bsoncolumn.cpp', + 'bsoncolumnbuilder.cpp', 'simple8b.cpp', 'simple8b_type_util.cpp', ], @@ -30,13 +32,14 @@ env.CppUnitTest( source=[ 'bson_check_test.cpp', 'bson_extract_test.cpp', + 'bsoncolumn_test.cpp', 'builder_test.cpp', 'simple8b_test.cpp', 'simple8b_type_util_test.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/base', + 'bson_column', 'bson_extract', - 'type_compressor', ], ) diff --git a/src/mongo/bson/util/bsoncolumn.cpp b/src/mongo/bson/util/bsoncolumn.cpp new file mode 100644 index 00000000000..a0e48c1fbfa --- /dev/null +++ b/src/mongo/bson/util/bsoncolumn.cpp @@ -0,0 +1,30 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/bson/util/bsoncolumn.h" diff --git a/src/mongo/bson/util/bsoncolumn.h b/src/mongo/bson/util/bsoncolumn.h new file mode 100644 index 00000000000..d3f38aaf98d --- /dev/null +++ b/src/mongo/bson/util/bsoncolumn.h @@ -0,0 +1,30 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once diff --git a/src/mongo/bson/util/bsoncolumn_test.cpp b/src/mongo/bson/util/bsoncolumn_test.cpp new file mode 100644 index 00000000000..ae39bf82d99 --- /dev/null +++ b/src/mongo/bson/util/bsoncolumn_test.cpp @@ -0,0 +1,326 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/bson/util/bsoncolumnbuilder.h" +#include "mongo/bson/util/simple8b_type_util.h" + +#include "mongo/unittest/unittest.h" + +#include <boost/optional/optional_io.hpp> + +namespace mongo { +namespace { + +class BSONColumnTest : public unittest::Test { +public: + BSONElement createElementInt32(int32_t val) { + BSONObjBuilder ob; + ob.append("0"_sd, val); + _elementMemory.emplace_front(ob.obj()); + return _elementMemory.front().firstElement(); + } + + BSONElement createElementInt64(int64_t val) { + BSONObjBuilder ob; + ob.append("0"_sd, val); + _elementMemory.emplace_front(ob.obj()); + return _elementMemory.front().firstElement(); + } + + static uint64_t deltaInt32(BSONElement val, BSONElement prev) { + return Simple8bTypeUtil::encodeInt64(val.Int() - prev.Int()); + } + + static uint64_t deltaInt64(BSONElement val, BSONElement prev) { + return Simple8bTypeUtil::encodeInt64(val.Long() - prev.Long()); + } + + template <typename It> + static std::vector<uint64_t> deltaInt64(It begin, It end, BSONElement prev) { + std::vector<uint64_t> deltas; + for (; begin != end; ++begin) { + deltas.push_back(deltaInt64(*begin, prev)); + prev = *begin; + } + return deltas; + } + + static void appendLiteral(BufBuilder& builder, BSONElement elem) { + // BSON Type byte + builder.appendChar(elem.type()); + + // Null terminator for field name + builder.appendChar('\0'); + + // Element value + builder.appendBuf(elem.value(), elem.valuesize()); + } + + static void appendSimple8bControl(BufBuilder& builder, uint8_t control, uint8_t count) { + builder.appendChar(control << 4 | count); + } + + static void appendSimple8bBlock(BufBuilder& builder, boost::optional<uint64_t> val) { + auto prev = builder.len(); + Simple8bBuilder<uint64_t> s8bBuilder([&builder](uint64_t block) { + builder.appendNum(block); + return true; + }); + if (val) { + s8bBuilder.append(*val); + } else { + s8bBuilder.skip(); + } + + s8bBuilder.flush(); + ASSERT_EQ(builder.len() - prev, sizeof(uint64_t)); + } + + static void appendSimple8bBlocks(BufBuilder& builder, + const std::vector<uint64_t>& vals, + uint32_t expectedNum) { + auto prev = builder.len(); + Simple8bBuilder<uint64_t> s8bBuilder([&builder](uint64_t block) { + builder.appendNum(block); + return true; + }); + for (auto val : vals) { + s8bBuilder.append(val); + } + s8bBuilder.flush(); + ASSERT_EQ((builder.len() - prev) / sizeof(uint64_t), expectedNum); + } + + static void appendEOO(BufBuilder& builder) { + builder.appendChar(EOO); + } + + static void verifyBinary(BSONBinData columnBinary, const BufBuilder& expected) { + ASSERT_EQ(columnBinary.type, BinDataType::Column); + ASSERT_EQ(columnBinary.length, expected.len()); + ASSERT_EQ(memcmp(columnBinary.data, expected.buf(), columnBinary.length), 0); + } + +private: + std::forward_list<BSONObj> _elementMemory; +}; + + +TEST_F(BSONColumnTest, BasicValue) { + BSONColumnBuilder cb("test"_sd); + + auto elem = createElementInt32(1); + cb.append(elem); + cb.append(elem); + + BufBuilder expected; + appendLiteral(expected, elem); + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlock(expected, 0); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, BasicSkip) { + BSONColumnBuilder cb("test"_sd); + + auto elem = createElementInt32(1); + cb.append(elem); + cb.skip(); + + BufBuilder expected; + appendLiteral(expected, elem); + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlock(expected, boost::none); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, OnlySkip) { + BSONColumnBuilder cb("test"_sd); + + cb.skip(); + + BufBuilder expected; + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlock(expected, boost::none); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, ValueAfterSkip) { + BSONColumnBuilder cb("test"_sd); + + auto elem = createElementInt32(1); + cb.skip(); + cb.append(elem); + + BufBuilder expected; + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlock(expected, boost::none); + appendLiteral(expected, elem); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + + +TEST_F(BSONColumnTest, LargeDeltaIsLiteral) { + BSONColumnBuilder cb("test"_sd); + + auto first = createElementInt64(1); + cb.append(first); + + auto second = createElementInt64(0x7FFFFFFFFFFFFFFF); + cb.append(second); + + BufBuilder expected; + appendLiteral(expected, first); + appendLiteral(expected, second); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, LargeDeltaIsLiteralAfterSimple8b) { + BSONColumnBuilder cb("test"_sd); + + auto zero = createElementInt64(0); + cb.append(zero); + cb.append(zero); + + auto large = createElementInt64(0x7FFFFFFFFFFFFFFF); + cb.append(large); + cb.append(large); + + BufBuilder expected; + appendLiteral(expected, zero); + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlock(expected, deltaInt64(zero, zero)); + appendLiteral(expected, large); + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlock(expected, deltaInt64(large, large)); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, OverBlockCount) { + BSONColumnBuilder cb("test"_sd); + + std::vector<BSONElement> elems; + int64_t val = 0xFFFFFFFFFFFF; + + for (int i = 0; i < 20; ++i) { + elems.push_back(createElementInt64(val)); + val = -val; + } + + for (const auto& elem : elems) { + cb.append(elem); + } + + BufBuilder expected; + appendLiteral(expected, elems.front()); + appendSimple8bControl(expected, 0b1000, 0b1111); + + auto chunk1Begin = elems.begin() + 1; + auto chunk1End = chunk1Begin + 16; + appendSimple8bBlocks(expected, deltaInt64(chunk1Begin, chunk1End, elems.front()), 16); + + appendSimple8bControl(expected, 0b1000, 0b0010); + appendSimple8bBlocks(expected, deltaInt64(chunk1End, elems.end(), *(chunk1End - 1)), 3); + + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, TypeChangeAfterLiteral) { + BSONColumnBuilder cb("test"_sd); + + auto elemInt32 = createElementInt32(0); + auto elemInt64 = createElementInt64(0); + + cb.append(elemInt32); + cb.append(elemInt64); + + BufBuilder expected; + appendLiteral(expected, elemInt32); + appendLiteral(expected, elemInt64); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, TypeChangeAfterSimple8b) { + BSONColumnBuilder cb("test"_sd); + + auto elemInt32 = createElementInt32(0); + auto elemInt64 = createElementInt64(0); + + cb.append(elemInt32); + cb.append(elemInt32); + cb.append(elemInt64); + + BufBuilder expected; + appendLiteral(expected, elemInt32); + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlock(expected, 0); + appendLiteral(expected, elemInt64); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +TEST_F(BSONColumnTest, Simple8bAfterTypeChange) { + BSONColumnBuilder cb("test"_sd); + + auto elemInt32 = createElementInt32(0); + auto elemInt64 = createElementInt64(0); + + cb.append(elemInt32); + cb.append(elemInt64); + cb.append(elemInt64); + + BufBuilder expected; + appendLiteral(expected, elemInt32); + appendLiteral(expected, elemInt64); + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlock(expected, 0); + appendEOO(expected); + + verifyBinary(cb.finalize(), expected); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/bson/util/bsoncolumnbuilder.cpp b/src/mongo/bson/util/bsoncolumnbuilder.cpp new file mode 100644 index 00000000000..854c592cd99 --- /dev/null +++ b/src/mongo/bson/util/bsoncolumnbuilder.cpp @@ -0,0 +1,169 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/bson/util/bsoncolumnbuilder.h" + +#include "mongo/bson/util/simple8b_type_util.h" + +#include <memory> + +namespace mongo { +static constexpr uint8_t kMaxCount = 16; +static constexpr uint8_t kCountMask = 0x0F; +static constexpr uint8_t kNoScaleControl = 0x80; + +BSONColumnBuilder::BSONColumnBuilder(StringData fieldName) + : _simple8bBuilder(_createSimple8bBuilder()), _fieldName(fieldName) { + + // Store EOO type with empty field name as previous. + _storePrevious(BSONElement()); +} + +BSONElement BSONColumnBuilder::_previous() const { + return {_prev.get(), 1, _prevSize, BSONElement::CachedSizeTag{}}; +} + +BSONColumnBuilder& BSONColumnBuilder::append(BSONElement elem) { + auto type = elem.type(); + auto previous = _previous(); + + // If we detect a type change (or this is first value). Flush all pending values in Simple-8b + // and write uncompressed literal. + if (previous.type() != elem.type()) { + _storePrevious(elem); + _simple8bBuilder.flush(); + _writeLiteralFromPrevious(); + return *this; + } + + // Store delta in Simple-8b if types match + int64_t delta = 0; + if (!elem.binaryEqualValues(previous)) { + switch (type) { + case NumberInt: + delta = elem._numberInt() - previous._numberInt(); + break; + case NumberLong: + delta = elem._numberLong() - previous._numberLong(); + break; + default: + // Nothing else is implemented yet + invariant(false); + }; + } + + bool result = _simple8bBuilder.append(Simple8bTypeUtil::encodeInt64(delta)); + _storePrevious(elem); + + // Store uncompressed literal if value is outside of range of encodable values. + if (!result) { + _simple8bBuilder.flush(); + _writeLiteralFromPrevious(); + } + + return *this; +} + +BSONColumnBuilder& BSONColumnBuilder::skip() { + _simple8bBuilder.skip(); + return *this; +} + +BSONBinData BSONColumnBuilder::finalize() { + _simple8bBuilder.flush(); + + // Write EOO at the end + _bufBuilder.appendChar(EOO); + + return {_bufBuilder.buf(), _bufBuilder.len(), BinDataType::Column}; +} + +void BSONColumnBuilder::_storePrevious(BSONElement elem) { + auto valuesize = elem.valuesize(); + + // Add space for type byte and field name null terminator + auto size = valuesize + 2; + + // Re-allocate buffer if not large enough + if (size > _prevCapacity) { + _prevCapacity = size; + _prev = std::make_unique<char[]>(_prevCapacity); + + // Store null terminator, this byte will never change + _prev[1] = '\0'; + } + + // Copy element into buffer for previous. Omit field name. + _prev[0] = elem.type(); + memcpy(_prev.get() + 2, elem.value(), valuesize); + _prevSize = size; +} + +void BSONColumnBuilder::_writeLiteralFromPrevious() { + // Write literal without field name and reset control byte to force new one to be written when + // appending next value + _bufBuilder.appendBuf(_prev.get(), _prevSize); + _controlByteOffset = 0; +} + +void BSONColumnBuilder::_incrementSimple8bCount() { + char* byte; + uint8_t count; + + if (_controlByteOffset == 0) { + // Allocate new control byte if we don't already have one. Record its offset so we can find + // it even if the underlying buffer reallocates. + byte = _bufBuilder.skip(1); + _controlByteOffset = std::distance(_bufBuilder.buf(), byte); + count = 0; + } else { + // Read current count from previous control byte + byte = _bufBuilder.buf() + _controlByteOffset; + count = (*byte & kCountMask) + 1; + } + + // Write back new count and clear offset if we have reached max count + *byte = kNoScaleControl | (count & kCountMask); + if (count + 1 == kMaxCount) { + _controlByteOffset = 0; + } +} + +Simple8bBuilder<uint64_t> BSONColumnBuilder::_createSimple8bBuilder() { + return Simple8bBuilder<uint64_t>([this](uint64_t block) { + // Write/update block count + _incrementSimple8bCount(); + + // Write Simple-8b block + _bufBuilder.appendNum(block); + return true; + }); +} + +} // namespace mongo diff --git a/src/mongo/bson/util/bsoncolumnbuilder.h b/src/mongo/bson/util/bsoncolumnbuilder.h new file mode 100644 index 00000000000..0e6efeb06a5 --- /dev/null +++ b/src/mongo/bson/util/bsoncolumnbuilder.h @@ -0,0 +1,103 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/bson/bsonelement.h" +#include "mongo/bson/bsonmisc.h" +#include "mongo/bson/util/builder.h" +#include "mongo/bson/util/simple8b.h" + +#include <memory> + +namespace mongo { + +/** + * Class to build BSON Subtype 7 (Column) binaries. + */ +class BSONColumnBuilder { +public: + BSONColumnBuilder(StringData fieldName); + + /** + * Appends a BSONElement to this BSONColumnBuilder. + * + * Value will be stored delta compressed if possible and uncompressed otherwise. + * + * The field name will be ignored. + */ + BSONColumnBuilder& append(BSONElement elem); + + /** + * Appends an index skip to this BSONColumnBuilder. + */ + BSONColumnBuilder& skip(); + + /** + * Returns the field name this BSONColumnBuilder was created with. + */ + StringData fieldName() const { + return _fieldName; + } + + /** + * Finalizes the BSON Column and returns the BinData binary. + * + * The BSONColumnBuilder must remain in scope for the pointer to be valid. + */ + BSONBinData finalize(); + +private: + BSONElement _previous() const; + + void _storePrevious(BSONElement elem); + void _writeLiteralFromPrevious(); + void _incrementSimple8bCount(); + + Simple8bBuilder<uint64_t> _createSimple8bBuilder(); + + // Storage for the previously appended BSONElement + std::unique_ptr<char[]> _prev; + int _prevSize = 0; + int _prevCapacity = 0; + + // Simple-8b builder for storing compressed deltas + Simple8bBuilder<uint64_t> _simple8bBuilder; + + // Offset to last Simple-8b control byte + std::ptrdiff_t _controlByteOffset = 0; + + // Buffer for the BSON Column binary + BufBuilder _bufBuilder; + + // Field name + std::string _fieldName; +}; + +} // namespace mongo |