diff options
author | Jackson Xie <jackson.xie@mongodb.com> | 2021-06-15 20:16:31 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-06-22 17:47:24 +0000 |
commit | d7158b8a3c46da624025bac6c6edd8e4eb969733 (patch) | |
tree | da3e38695362f1a6c89c6054a964fad690676ad1 | |
parent | 08fb6c3d1130dbf12325798104cc81e1e93dfabb (diff) | |
download | mongo-d7158b8a3c46da624025bac6c6edd8e4eb969733.tar.gz |
SERVER-57643: Create Basic Decoder and Encoder Functions
-rw-r--r-- | src/mongo/bson/util/SConscript | 12 | ||||
-rw-r--r-- | src/mongo/bson/util/simple8b.cpp | 115 | ||||
-rw-r--r-- | src/mongo/bson/util/simple8b.h | 65 | ||||
-rw-r--r-- | src/mongo/bson/util/simple8b_test.cpp | 119 |
4 files changed, 311 insertions, 0 deletions
diff --git a/src/mongo/bson/util/SConscript b/src/mongo/bson/util/SConscript index 4ffcf2a6bbc..81150f5e28a 100644 --- a/src/mongo/bson/util/SConscript +++ b/src/mongo/bson/util/SConscript @@ -24,6 +24,16 @@ env.Library( ], ) +env.Library( + target='simple8b', + source=[ + 'simple8b.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + ], +) + env.CppUnitTest( target='bson_util_test', source=[ @@ -31,10 +41,12 @@ env.CppUnitTest( 'bson_extract_test.cpp', 'builder_test.cpp', 'bitstream_builder_test.cpp', + 'simple8b_test.cpp' ], LIBDEPS=[ '$BUILD_DIR/mongo/base', 'bitstream_builder', 'bson_extract', + 'simple8b', ], ) diff --git a/src/mongo/bson/util/simple8b.cpp b/src/mongo/bson/util/simple8b.cpp new file mode 100644 index 00000000000..006fc22a3d2 --- /dev/null +++ b/src/mongo/bson/util/simple8b.cpp @@ -0,0 +1,115 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/bson/util/simple8b.h" + +namespace mongo { + +namespace { +static constexpr uint8_t _maxSelector = 15; +static constexpr uint8_t _minSelector = 2; +static constexpr uint64_t _selectorMask = 0xF000000000000000; +static constexpr uint8_t _selectorSize = 4; +static constexpr uint8_t _dataSize = 60; + +// Pass the selector value as the index to get the number of bits per integer in the Simple8b block. +const uint8_t _selectorForBitsPerInteger[16] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 15, 20, 30, 60}; + +// Pass the selector value as the index to get the number of integers coded in the Simple8b block. +const uint8_t _selectorForIntegersCoded[16] = { + 240, 120, 60, 30, 20, 15, 12, 10, 8, 7, 6, 5, 4, 3, 2, 1}; + +// Pass the selector as the index to get the corresponding mask. +// Get the maskSize by getting the number of bits for the selector. Then 2^maskSize - 1. +const uint64_t _selectorForMask[16] = {0, + 0, + 1, + 3, + 7, + 15, + 31, + 63, + 127, + 255, + 1023, + 4095, + 32767, + 1048575, + 1073741823, + 1152921504606846975}; + +} // namespace + +uint64_t Simple8b::encodeSimple8b(uint8_t selector, const std::vector<uint64_t>& values) { + if (selector > _maxSelector || selector < _minSelector) + return errCode; + + uint8_t bitsPerInteger = _selectorForBitsPerInteger[selector]; + uint8_t integersCoded = _selectorForIntegersCoded[selector]; + + uint64_t encodedWord = (uint64_t)selector << _dataSize; + + for (uint8_t i = 0; i < integersCoded; ++i) { + uint8_t shiftSize = _dataSize - bitsPerInteger * (i + 1); + encodedWord += values[i] << shiftSize; + } + + return encodedWord; +} + + +std::vector<uint64_t> Simple8b::decodeSimple8b(const uint64_t simple8bWord) { + std::vector<uint64_t> values; + + uint8_t selector = (simple8bWord & _selectorMask) >> _dataSize; + + if (selector < _minSelector) + return values; + + uint8_t bitsPerInteger = _selectorForBitsPerInteger[selector]; + uint8_t integersCoded = _selectorForIntegersCoded[selector]; + + for (int8_t i = integersCoded - 1; i >= 0; --i) { + uint8_t startIdx = bitsPerInteger * i; + + // If there are dirty bits, shift over them. + if (selector == 8 || selector == 9) + startIdx += 4; + + uint64_t mask = _selectorForMask[selector] << startIdx; + uint64_t value = (simple8bWord & mask) >> startIdx; + + values.push_back(value); + } + + return values; +} + +} // namespace mongo diff --git a/src/mongo/bson/util/simple8b.h b/src/mongo/bson/util/simple8b.h new file mode 100644 index 00000000000..bc9df2c954e --- /dev/null +++ b/src/mongo/bson/util/simple8b.h @@ -0,0 +1,65 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <cmath> +#include <iostream> +#include <vector> + + +namespace mongo { + +/** + * As of now, Simple8b is a static class that can encode a series of integers and a selector value + * into a single 64 bit Simple8b word. + */ +class Simple8b { +public: + // A valid Simple8b should never have a selector value of 1, so we will use it as an error code. + static const uint64_t errCode = 0x1000000000000000; + + /** + * encodeSimple8b takes a selector and a vector of integers to be compressed into a 64 bit word. + * The values will be stored from left to right. + * If there are wasted bits, they will be placed at the very end on the right. + * For now, we will assume that all ints in the vector are greater or equal to zero. + * We will also assume that the selector and all values will fit into the 64 bit word. + * Returns the encoded Simple8b word if the inputs are valid and errCode otherwise. + */ + static uint64_t encodeSimple8b(uint8_t selector, const std::vector<uint64_t>& values); + + /** + * decodeSimple8b decodes a simple8b word into a vector of integers. + * Only when the selector is invalid will the returned vector be empty. + */ + static std::vector<uint64_t> decodeSimple8b(uint64_t simple8bWord); +}; + +} // namespace mongo diff --git a/src/mongo/bson/util/simple8b_test.cpp b/src/mongo/bson/util/simple8b_test.cpp new file mode 100644 index 00000000000..b16166ba45d --- /dev/null +++ b/src/mongo/bson/util/simple8b_test.cpp @@ -0,0 +1,119 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include <vector> + +#include "mongo/bson/util/simple8b.h" +#include "mongo/unittest/unittest.h" + +using namespace mongo; + +void areVectorsEqual(std::vector<uint64_t> actualVector, std::vector<uint64_t> expectedVector) { + ASSERT_EQ(actualVector.size(), expectedVector.size()); + + for (unsigned i = 0; i < actualVector.size(); ++i) { + ASSERT_EQ(actualVector[i], expectedVector[i]); + } +} + +TEST(Simple8b, EncodeOneValueTest) { + uint8_t selector = 15; + std::vector<uint64_t> values = {1}; + uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values); + ASSERT_EQUALS(simple8bWord, 0xF000000000000001); +} + +TEST(Simple8b, DecodeOneValueTest) { + uint64_t simple8bWord = 0xF000000000000001; + std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord); + std::vector<uint64_t> expectedValues = {1}; + areVectorsEqual(values, expectedValues); +} + +TEST(Simple8b, EncodeMultipleValuesTest) { + uint8_t selector = 13; + std::vector<uint64_t> values = {1, 2, 3}; + uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values); + ASSERT_EQUALS(simple8bWord, 0xD000010000200003); +} + +TEST(Simple8b, DecodeMultipleValuesTest) { + uint64_t simple8bWord = 0xD000010000200003; + std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord); + std::vector<uint64_t> expectedValues = {1, 2, 3}; + areVectorsEqual(values, expectedValues); +} + +TEST(Simple8b, EncodeMaxValuesTest) { + uint8_t selector = 2; + std::vector<uint64_t> values(60, 1); + uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values); + ASSERT_EQUALS(simple8bWord, 0x2FFFFFFFFFFFFFFF); +} + +TEST(Simple8b, DecodeMaxValuesTest) { + uint64_t simple8bWord = 0x2FFFFFFFFFFFFFFF; + std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord); + std::vector<uint64_t> expectedValues(60, 1); + areVectorsEqual(values, expectedValues); +} + +TEST(Simple8b, EncodeWithTrailingDirtyBitsTest) { + uint8_t selector = 9; + std::vector<uint64_t> values(7, 1); + uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values); + ASSERT_EQUALS(simple8bWord, 0x9010101010101010); // 4 bits is dirty. +} + +TEST(Simple8b, DecodeWithTrailingDirtyBitsTest) { + uint64_t simple8bWord = 0x9010101010101010; // 4 bits is dirty. + std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord); + std::vector<uint64_t> expectedValues(7, 1); + areVectorsEqual(values, expectedValues); +} + +TEST(Simple8b, InvalidEncodeOneSelectorTest) { + uint8_t selector = 0; + std::vector<uint64_t> values = {}; + uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values); + ASSERT_EQUALS(simple8bWord, 0x1000000000000000); +} + +TEST(Simple8b, InvalidEncodeSixteenSelectorTest) { + uint8_t selector = 16; + std::vector<uint64_t> values = {}; + uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values); + ASSERT_EQUALS(simple8bWord, 0x1000000000000000); +} + +TEST(Simple8b, InvalidDecodeOneSelectorTest) { + uint64_t simple8bWord = 0x1000000000000000; + std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord); + ASSERT_EQUALS(values.size(), 0); +} |