summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJackson Xie <jackson.xie@mongodb.com>2021-06-15 20:16:31 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-22 17:47:24 +0000
commitd7158b8a3c46da624025bac6c6edd8e4eb969733 (patch)
treeda3e38695362f1a6c89c6054a964fad690676ad1
parent08fb6c3d1130dbf12325798104cc81e1e93dfabb (diff)
downloadmongo-d7158b8a3c46da624025bac6c6edd8e4eb969733.tar.gz
SERVER-57643: Create Basic Decoder and Encoder Functions
-rw-r--r--src/mongo/bson/util/SConscript12
-rw-r--r--src/mongo/bson/util/simple8b.cpp115
-rw-r--r--src/mongo/bson/util/simple8b.h65
-rw-r--r--src/mongo/bson/util/simple8b_test.cpp119
4 files changed, 311 insertions, 0 deletions
diff --git a/src/mongo/bson/util/SConscript b/src/mongo/bson/util/SConscript
index 4ffcf2a6bbc..81150f5e28a 100644
--- a/src/mongo/bson/util/SConscript
+++ b/src/mongo/bson/util/SConscript
@@ -24,6 +24,16 @@ env.Library(
],
)
+env.Library(
+ target='simple8b',
+ source=[
+ 'simple8b.cpp',
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/base',
+ ],
+)
+
env.CppUnitTest(
target='bson_util_test',
source=[
@@ -31,10 +41,12 @@ env.CppUnitTest(
'bson_extract_test.cpp',
'builder_test.cpp',
'bitstream_builder_test.cpp',
+ 'simple8b_test.cpp'
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
'bitstream_builder',
'bson_extract',
+ 'simple8b',
],
)
diff --git a/src/mongo/bson/util/simple8b.cpp b/src/mongo/bson/util/simple8b.cpp
new file mode 100644
index 00000000000..006fc22a3d2
--- /dev/null
+++ b/src/mongo/bson/util/simple8b.cpp
@@ -0,0 +1,115 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/bson/util/simple8b.h"
+
+namespace mongo {
+
+namespace {
+static constexpr uint8_t _maxSelector = 15;
+static constexpr uint8_t _minSelector = 2;
+static constexpr uint64_t _selectorMask = 0xF000000000000000;
+static constexpr uint8_t _selectorSize = 4;
+static constexpr uint8_t _dataSize = 60;
+
+// Pass the selector value as the index to get the number of bits per integer in the Simple8b block.
+const uint8_t _selectorForBitsPerInteger[16] = {
+ 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 15, 20, 30, 60};
+
+// Pass the selector value as the index to get the number of integers coded in the Simple8b block.
+const uint8_t _selectorForIntegersCoded[16] = {
+ 240, 120, 60, 30, 20, 15, 12, 10, 8, 7, 6, 5, 4, 3, 2, 1};
+
+// Pass the selector as the index to get the corresponding mask.
+// Get the maskSize by getting the number of bits for the selector. Then 2^maskSize - 1.
+const uint64_t _selectorForMask[16] = {0,
+ 0,
+ 1,
+ 3,
+ 7,
+ 15,
+ 31,
+ 63,
+ 127,
+ 255,
+ 1023,
+ 4095,
+ 32767,
+ 1048575,
+ 1073741823,
+ 1152921504606846975};
+
+} // namespace
+
+uint64_t Simple8b::encodeSimple8b(uint8_t selector, const std::vector<uint64_t>& values) {
+ if (selector > _maxSelector || selector < _minSelector)
+ return errCode;
+
+ uint8_t bitsPerInteger = _selectorForBitsPerInteger[selector];
+ uint8_t integersCoded = _selectorForIntegersCoded[selector];
+
+ uint64_t encodedWord = (uint64_t)selector << _dataSize;
+
+ for (uint8_t i = 0; i < integersCoded; ++i) {
+ uint8_t shiftSize = _dataSize - bitsPerInteger * (i + 1);
+ encodedWord += values[i] << shiftSize;
+ }
+
+ return encodedWord;
+}
+
+
+std::vector<uint64_t> Simple8b::decodeSimple8b(const uint64_t simple8bWord) {
+ std::vector<uint64_t> values;
+
+ uint8_t selector = (simple8bWord & _selectorMask) >> _dataSize;
+
+ if (selector < _minSelector)
+ return values;
+
+ uint8_t bitsPerInteger = _selectorForBitsPerInteger[selector];
+ uint8_t integersCoded = _selectorForIntegersCoded[selector];
+
+ for (int8_t i = integersCoded - 1; i >= 0; --i) {
+ uint8_t startIdx = bitsPerInteger * i;
+
+ // If there are dirty bits, shift over them.
+ if (selector == 8 || selector == 9)
+ startIdx += 4;
+
+ uint64_t mask = _selectorForMask[selector] << startIdx;
+ uint64_t value = (simple8bWord & mask) >> startIdx;
+
+ values.push_back(value);
+ }
+
+ return values;
+}
+
+} // namespace mongo
diff --git a/src/mongo/bson/util/simple8b.h b/src/mongo/bson/util/simple8b.h
new file mode 100644
index 00000000000..bc9df2c954e
--- /dev/null
+++ b/src/mongo/bson/util/simple8b.h
@@ -0,0 +1,65 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <cmath>
+#include <iostream>
+#include <vector>
+
+
+namespace mongo {
+
+/**
+ * As of now, Simple8b is a static class that can encode a series of integers and a selector value
+ * into a single 64 bit Simple8b word.
+ */
+class Simple8b {
+public:
+ // A valid Simple8b should never have a selector value of 1, so we will use it as an error code.
+ static const uint64_t errCode = 0x1000000000000000;
+
+ /**
+ * encodeSimple8b takes a selector and a vector of integers to be compressed into a 64 bit word.
+ * The values will be stored from left to right.
+ * If there are wasted bits, they will be placed at the very end on the right.
+ * For now, we will assume that all ints in the vector are greater or equal to zero.
+ * We will also assume that the selector and all values will fit into the 64 bit word.
+ * Returns the encoded Simple8b word if the inputs are valid and errCode otherwise.
+ */
+ static uint64_t encodeSimple8b(uint8_t selector, const std::vector<uint64_t>& values);
+
+ /**
+ * decodeSimple8b decodes a simple8b word into a vector of integers.
+ * Only when the selector is invalid will the returned vector be empty.
+ */
+ static std::vector<uint64_t> decodeSimple8b(uint64_t simple8bWord);
+};
+
+} // namespace mongo
diff --git a/src/mongo/bson/util/simple8b_test.cpp b/src/mongo/bson/util/simple8b_test.cpp
new file mode 100644
index 00000000000..b16166ba45d
--- /dev/null
+++ b/src/mongo/bson/util/simple8b_test.cpp
@@ -0,0 +1,119 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include <vector>
+
+#include "mongo/bson/util/simple8b.h"
+#include "mongo/unittest/unittest.h"
+
+using namespace mongo;
+
+void areVectorsEqual(std::vector<uint64_t> actualVector, std::vector<uint64_t> expectedVector) {
+ ASSERT_EQ(actualVector.size(), expectedVector.size());
+
+ for (unsigned i = 0; i < actualVector.size(); ++i) {
+ ASSERT_EQ(actualVector[i], expectedVector[i]);
+ }
+}
+
+TEST(Simple8b, EncodeOneValueTest) {
+ uint8_t selector = 15;
+ std::vector<uint64_t> values = {1};
+ uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values);
+ ASSERT_EQUALS(simple8bWord, 0xF000000000000001);
+}
+
+TEST(Simple8b, DecodeOneValueTest) {
+ uint64_t simple8bWord = 0xF000000000000001;
+ std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord);
+ std::vector<uint64_t> expectedValues = {1};
+ areVectorsEqual(values, expectedValues);
+}
+
+TEST(Simple8b, EncodeMultipleValuesTest) {
+ uint8_t selector = 13;
+ std::vector<uint64_t> values = {1, 2, 3};
+ uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values);
+ ASSERT_EQUALS(simple8bWord, 0xD000010000200003);
+}
+
+TEST(Simple8b, DecodeMultipleValuesTest) {
+ uint64_t simple8bWord = 0xD000010000200003;
+ std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord);
+ std::vector<uint64_t> expectedValues = {1, 2, 3};
+ areVectorsEqual(values, expectedValues);
+}
+
+TEST(Simple8b, EncodeMaxValuesTest) {
+ uint8_t selector = 2;
+ std::vector<uint64_t> values(60, 1);
+ uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values);
+ ASSERT_EQUALS(simple8bWord, 0x2FFFFFFFFFFFFFFF);
+}
+
+TEST(Simple8b, DecodeMaxValuesTest) {
+ uint64_t simple8bWord = 0x2FFFFFFFFFFFFFFF;
+ std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord);
+ std::vector<uint64_t> expectedValues(60, 1);
+ areVectorsEqual(values, expectedValues);
+}
+
+TEST(Simple8b, EncodeWithTrailingDirtyBitsTest) {
+ uint8_t selector = 9;
+ std::vector<uint64_t> values(7, 1);
+ uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values);
+ ASSERT_EQUALS(simple8bWord, 0x9010101010101010); // 4 bits is dirty.
+}
+
+TEST(Simple8b, DecodeWithTrailingDirtyBitsTest) {
+ uint64_t simple8bWord = 0x9010101010101010; // 4 bits is dirty.
+ std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord);
+ std::vector<uint64_t> expectedValues(7, 1);
+ areVectorsEqual(values, expectedValues);
+}
+
+TEST(Simple8b, InvalidEncodeOneSelectorTest) {
+ uint8_t selector = 0;
+ std::vector<uint64_t> values = {};
+ uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values);
+ ASSERT_EQUALS(simple8bWord, 0x1000000000000000);
+}
+
+TEST(Simple8b, InvalidEncodeSixteenSelectorTest) {
+ uint8_t selector = 16;
+ std::vector<uint64_t> values = {};
+ uint64_t simple8bWord = Simple8b::encodeSimple8b(selector, values);
+ ASSERT_EQUALS(simple8bWord, 0x1000000000000000);
+}
+
+TEST(Simple8b, InvalidDecodeOneSelectorTest) {
+ uint64_t simple8bWord = 0x1000000000000000;
+ std::vector<uint64_t> values = Simple8b::decodeSimple8b(simple8bWord);
+ ASSERT_EQUALS(values.size(), 0);
+}