summaryrefslogtreecommitdiff
path: root/src/mongo/bson/util
diff options
context:
space:
mode:
authorHenrik Edin <henrik.edin@mongodb.com>2021-11-16 14:46:14 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-11-16 15:27:27 +0000
commit718cd16474ad9e75e6ab4b553554d16dce80b34f (patch)
tree6596ddf1b66946d31ea5032207464997cc35c9f0 /src/mongo/bson/util
parent57e351357ea73450df84b905674b127508aeb064 (diff)
downloadmongo-718cd16474ad9e75e6ab4b553554d16dce80b34f.tar.gz
SERVER-61502 Only allow RLE on value 0 after flush
Diffstat (limited to 'src/mongo/bson/util')
-rw-r--r--src/mongo/bson/util/simple8b.cpp4
-rw-r--r--src/mongo/bson/util/simple8b_test.cpp97
2 files changed, 100 insertions, 1 deletions
diff --git a/src/mongo/bson/util/simple8b.cpp b/src/mongo/bson/util/simple8b.cpp
index 3dc77036e34..8105b247cfb 100644
--- a/src/mongo/bson/util/simple8b.cpp
+++ b/src/mongo/bson/util/simple8b.cpp
@@ -458,8 +458,10 @@ void Simple8bBuilder<T>::flush() {
// There are no more words in _pendingValues and RLE is possible.
// However the _rleCount is 0 because we have not read any of the values in the next word.
_rleCount = 0;
- _lastValueInPrevWord = {};
}
+
+ // Always reset _lastValueInPrevWord. We may only start RLE after flush on 0 value.
+ _lastValueInPrevWord = {};
}
template <typename T>
diff --git a/src/mongo/bson/util/simple8b_test.cpp b/src/mongo/bson/util/simple8b_test.cpp
index 361196a21de..222aa7e5bdc 100644
--- a/src/mongo/bson/util/simple8b_test.cpp
+++ b/src/mongo/bson/util/simple8b_test.cpp
@@ -1193,6 +1193,103 @@ TEST(Simple8b, RleFlushResetsPossibleSelectors) {
assertValuesEqual(s8b, expectedInts);
}
+TEST(Simple8b, FlushResetsLastInPreviousWhenFlushingRle) {
+ BufBuilder buffer;
+ Simple8bBuilder<uint64_t> builder([&buffer](uint64_t simple8bBlock) {
+ buffer.appendNum(simple8bBlock);
+ return true;
+ });
+
+ // Write 150 1s and flush. This should result in a word with 30 1s followed by RLE. We make sure
+ // that last value written is reset when RLE is the last thing we flush.
+ for (int i = 0; i < 150; ++i) {
+ ASSERT_TRUE(builder.append(1));
+ }
+ builder.flush();
+
+ // Last value written is only used for RLE so append 120 values of the same value and make sure
+ // this does _NOT_ start RLE as flush occured in between.
+ for (int i = 0; i < 120; ++i) {
+ ASSERT_TRUE(builder.append(1));
+ }
+ builder.flush();
+
+ auto size = buffer.len();
+ auto sharedBuffer = buffer.release();
+
+ std::vector<uint8_t> simple8bBlockThirty1s = {0x52, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55};
+ std::vector<uint8_t> simple8bBlockRLE = {0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+ std::vector<uint8_t> expectedBinary = simple8bBlockThirty1s;
+ expectedBinary.insert(expectedBinary.end(), simple8bBlockRLE.begin(), simple8bBlockRLE.end());
+ for (int i = 0; i < 4; ++i) {
+ expectedBinary.insert(
+ expectedBinary.end(), simple8bBlockThirty1s.begin(), simple8bBlockThirty1s.end());
+ }
+
+ ASSERT_EQ(size, expectedBinary.size());
+ ASSERT_EQ(memcmp(sharedBuffer.get(), expectedBinary.data(), size), 0);
+
+ Simple8b<uint64_t> s8b(sharedBuffer.get(), size);
+ assertValuesEqual(s8b, std::vector<boost::optional<uint64_t>>(270, 1));
+}
+
+TEST(Simple8b, FlushResetsLastInPreviousWhenFlushingRleZeroRleAfter) {
+ BufBuilder buffer;
+ Simple8bBuilder<uint64_t> builder([&buffer](uint64_t simple8bBlock) {
+ buffer.appendNum(simple8bBlock);
+ return true;
+ });
+
+ // Write 150 1s and flush. This should result in a word with 30 1s followed by RLE. We make sure
+ // that last value written is reset when RLE is the last thing we flush.
+ for (int i = 0; i < 150; ++i) {
+ ASSERT_TRUE(builder.append(1));
+ }
+ builder.flush();
+ auto sizeAfterFlush = buffer.len();
+
+ // Write 120 0s. They should be encoded as a single RLE block.
+ for (int i = 0; i < 120; ++i) {
+ ASSERT_TRUE(builder.append(0));
+ }
+ builder.flush();
+
+ auto size = buffer.len();
+ auto sharedBuffer = buffer.release();
+
+ std::vector<uint8_t> simple8bBlockThirty1s = {0x52, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55};
+ std::vector<uint8_t> simple8bBlockRLE = {0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+ std::vector<uint8_t> expectedBinary = simple8bBlockThirty1s;
+ for (int i = 0; i < 2; ++i) {
+ expectedBinary.insert(
+ expectedBinary.end(), simple8bBlockRLE.begin(), simple8bBlockRLE.end());
+ }
+
+ ASSERT_EQ(size, expectedBinary.size());
+ ASSERT_EQ(memcmp(sharedBuffer.get(), expectedBinary.data(), size), 0);
+
+ {
+ // Reading all values as one block would be interpreted as everything is 1s as we wrote a
+ // RLE block immediately after a block containing 1 values.
+ Simple8b<uint64_t> s8b(sharedBuffer.get(), size);
+ assertValuesEqual(s8b, std::vector<boost::optional<uint64_t>>(270, 1));
+ }
+
+ // In practise the binary is split up in two parts where we can initialize the second part on
+ // how the RLE should be interpreted.
+ {
+ Simple8b<uint64_t> s8b(sharedBuffer.get(), sizeAfterFlush);
+ assertValuesEqual(s8b, std::vector<boost::optional<uint64_t>>(150, 1));
+ }
+ {
+ Simple8b<uint64_t> s8b(
+ sharedBuffer.get() + sizeAfterFlush, size - sizeAfterFlush, 0 /* previous */);
+ assertValuesEqual(s8b, std::vector<boost::optional<uint64_t>>(120, 0));
+ }
+}
+
TEST(Simple8b, EightSelectorLargeMax) {
// Selector 8 value
// 1111 + 124 zeros