summaryrefslogtreecommitdiff
path: root/src/mongo/bson/util
diff options
context:
space:
mode:
authorHenrik Edin <henrik.edin@mongodb.com>2021-08-11 11:17:19 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-08-24 02:41:29 +0000
commit2873b99f1f7477d7053504dcd3f0caa68745f506 (patch)
treef2fc0df393b67a118b3c3203df2b6edd2ef7e7ff /src/mongo/bson/util
parenta241a584d81ade104f7a2e37ca7edfdc974a47cc (diff)
downloadmongo-2873b99f1f7477d7053504dcd3f0caa68745f506.tar.gz
SERVER-58577 Add BSONColumn to interpret BSON Binary Subtype 7 data.
BSONColumn provides forward iteration over BSON Binary Subtype 7 data Decompressed BSONElement from deltas need to be re-materialized. BSONColumn manage this memory and any BSONElement reference has the same lifetime as BSONColumn.
Diffstat (limited to 'src/mongo/bson/util')
-rw-r--r--src/mongo/bson/util/SConscript1
-rw-r--r--src/mongo/bson/util/bsoncolumn.cpp465
-rw-r--r--src/mongo/bson/util/bsoncolumn.h223
-rw-r--r--src/mongo/bson/util/bsoncolumn_test.cpp568
-rw-r--r--src/mongo/bson/util/bsoncolumn_util.cpp64
-rw-r--r--src/mongo/bson/util/bsoncolumn_util.h45
-rw-r--r--src/mongo/bson/util/bsoncolumnbuilder.cpp84
-rw-r--r--src/mongo/bson/util/bsoncolumnbuilder.h1
-rw-r--r--src/mongo/bson/util/simple8b.cpp25
-rw-r--r--src/mongo/bson/util/simple8b.h6
-rw-r--r--src/mongo/bson/util/simple8b_type_util.cpp33
-rw-r--r--src/mongo/bson/util/simple8b_type_util.h2
-rw-r--r--src/mongo/bson/util/simple8b_type_util_test.cpp10
13 files changed, 1347 insertions, 180 deletions
diff --git a/src/mongo/bson/util/SConscript b/src/mongo/bson/util/SConscript
index f5fb665919d..7be350eb69d 100644
--- a/src/mongo/bson/util/SConscript
+++ b/src/mongo/bson/util/SConscript
@@ -19,6 +19,7 @@ env.Library(
source=[
'bsoncolumn.cpp',
'bsoncolumnbuilder.cpp',
+ 'bsoncolumn_util.cpp',
'simple8b.cpp',
'simple8b_type_util.cpp',
],
diff --git a/src/mongo/bson/util/bsoncolumn.cpp b/src/mongo/bson/util/bsoncolumn.cpp
index a0e48c1fbfa..4ad5f16738c 100644
--- a/src/mongo/bson/util/bsoncolumn.cpp
+++ b/src/mongo/bson/util/bsoncolumn.cpp
@@ -28,3 +28,468 @@
*/
#include "mongo/bson/util/bsoncolumn.h"
+#include "mongo/bson/util/bsoncolumn_util.h"
+#include "mongo/bson/util/simple8b_type_util.h"
+
+#include <algorithm>
+
+namespace mongo {
+using namespace bsoncolumn;
+
+namespace {
+// Start capacity for memory blocks allocated by ElementStorage
+constexpr int kStartCapacity = 128;
+
+// Max capacity for memory blocks allocated by ElementStorage
+constexpr int kMaxCapacity = 1024 * 32;
+
+// Memory offset to get to BSONElement value when field name is an empty string.
+constexpr int kElementValueOffset = 2;
+
+// Sentinel to indicate end index for BSONColumn Iterator.
+constexpr size_t kEndIndex = 0xFFFFFFFFFFFFFFFF;
+
+// Lookup table to go from Control byte (high 4 bits) to scale index.
+constexpr uint8_t kInvalidScaleIndex = 0xFF;
+constexpr std::array<uint8_t, 16> kControlToScaleIndex = {
+ kInvalidScaleIndex,
+ kInvalidScaleIndex,
+ kInvalidScaleIndex,
+ kInvalidScaleIndex,
+ kInvalidScaleIndex,
+ kInvalidScaleIndex,
+ kInvalidScaleIndex,
+ kInvalidScaleIndex,
+ Simple8bTypeUtil::kMemoryAsInteger, // 0b1000
+ 0, // 0b1001
+ 1, // 0b1010
+ 2, // 0b1011
+ 3, // 0b1100
+ 4, // 0b1101
+ kInvalidScaleIndex,
+ kInvalidScaleIndex};
+
+} // namespace
+
+BSONColumn::ElementStorage::Element::Element(char* buffer, int valueSize)
+ : _buffer(buffer), _valueSize(valueSize) {}
+
+char* BSONColumn::ElementStorage::Element::value() {
+ // Skip over type byte and null terminator for field name
+ return _buffer + kElementValueOffset;
+}
+
+int BSONColumn::ElementStorage::Element::size() const {
+ return _valueSize;
+}
+
+BSONElement BSONColumn::ElementStorage::Element::element() const {
+ return {_buffer, 1, _valueSize + kElementValueOffset, BSONElement::CachedSizeTag{}};
+}
+
+BSONColumn::ElementStorage::Element BSONColumn::ElementStorage::allocate(BSONType type,
+ int valueSize) {
+ // Size needed for this BSONElement
+ int size = valueSize + kElementValueOffset;
+
+ // If current block don't have enough capacity we need to allocate a new one
+ if (_capacity - _pos < size) {
+ // Keep track of current block if it exists.
+ if (_block) {
+ _blocks.push_back(std::move(_block));
+ }
+
+ // Double block size while keeping it within [kStartCapacity, kMaxCapacity] range, unless a
+ // size larger than kMaxCapacity is requested.
+ _capacity = std::max(std::clamp(_capacity * 2, kStartCapacity, kMaxCapacity), size);
+ _block = std::make_unique<char[]>(_capacity);
+ _pos = 0;
+ }
+
+ // Write type and null terminator in the first two bytes
+ _block[_pos] = type;
+ _block[_pos + 1] = '\0';
+
+ // Construct the Element, current block will have enough size at this point
+ Element elem(_block.get() + _pos, valueSize);
+
+ // Increment the used size and return
+ _pos += size;
+ return elem;
+}
+
+BSONColumn::Iterator::Iterator(BSONColumn& column, const char* pos, const char* end)
+ : _column(column), _control(pos), _end(end) {}
+
+BSONColumn::Iterator& BSONColumn::Iterator::operator++() {
+ // We need to setup iterator state even if this is not the first time we iterate in case we need
+ // to decompress elements further along
+ ++_index;
+
+ // Get reference to last non-skipped element. Needed to apply delta.
+ const auto& lastVal = _column._decompressed.at(_lastValueIndex);
+
+ // Traverse current Simple8b block for 64bit values if it exists
+ if (_decoder64 && ++_decoder64->pos != _decoder64->end) {
+ _loadDelta(lastVal, *_decoder64->pos);
+ return *this;
+ }
+
+ // Traverse current Simple8b block for 128bit values if it exists
+ if (_decoder128 && ++_decoder128->pos != _decoder128->end) {
+ _loadDelta(lastVal, *_decoder128->pos);
+ return *this;
+ }
+
+ // We don't have any more delta values in current block so we need to move to the next control
+ // byte.
+ if (_literal(*_control)) {
+ // If we were positioned on a literal, move to the byte after
+ _control += lastVal.size();
+ } else {
+ // If we were positioned on Simple-8b blocks, move to the byte after then
+ uint8_t blocks = (*_control & 0x0F) + 1;
+ _control += sizeof(uint64_t) * blocks + 1;
+ }
+
+ // Validate that we are not reading out of bounds
+ uassert(ErrorCodes::BadValue, "Invalid BSON Column encoding", _control < _end);
+
+ // Load new control byte
+ _loadControl(lastVal);
+
+ return *this;
+}
+
+BSONColumn::Iterator BSONColumn::Iterator::operator++(int) {
+ auto ret = *this;
+ operator++();
+ return ret;
+}
+
+bool BSONColumn::Iterator::operator==(const Iterator& rhs) const {
+ return _index == rhs._index;
+}
+bool BSONColumn::Iterator::operator!=(const Iterator& rhs) const {
+ return !operator==(rhs);
+}
+
+void BSONColumn::Iterator::_loadControl(const BSONElement& prev) {
+ // Load current control byte, it can be either a literal or Simple-8b deltas
+ uint8_t control = *_control;
+ if (_literal(control)) {
+ // If we detect EOO we are done, set values to be the same as an end iterator
+ if (control == EOO) {
+ _control += 1;
+ _index = kEndIndex;
+ _column._fullyDecompressed = true;
+ return;
+ }
+
+ // Load BSONElement from the literal and set last encoded in case we need to calculate
+ // deltas from this literal
+ BSONElement literalElem(_control, 1, -1, BSONElement::CachedSizeTag{});
+ switch (literalElem.type()) {
+ case BinData: {
+ int size;
+ const char* binary = literalElem.binData(size);
+ if (size <= 16) {
+ _lastEncodedValue128 = Simple8bTypeUtil::encodeBinary(binary, size);
+ }
+ break;
+ }
+ case jstOID:
+ _lastEncodedValue64 = Simple8bTypeUtil::encodeObjectId(literalElem.__oid());
+ break;
+ case Date:
+ _lastEncodedValue64 = literalElem.date().toMillisSinceEpoch();
+ break;
+ case Bool:
+ _lastEncodedValue64 = literalElem.boolean();
+ break;
+ case NumberInt:
+ _lastEncodedValue64 = literalElem._numberInt();
+ break;
+ case NumberLong:
+ _lastEncodedValue64 = literalElem._numberLong();
+ break;
+ case bsonTimestamp:
+ _lastEncodedValue64 = 0;
+ _lastEncodedValueForDeltaOfDelta = literalElem.timestamp().asULL();
+ break;
+ case NumberDecimal:
+ _lastEncodedValue128 =
+ Simple8bTypeUtil::encodeDecimal128(literalElem._numberDecimal());
+ break;
+ default:
+ break;
+ };
+
+ // Store at and and reset any previous decoders
+ _storeElementIfNeeded(literalElem);
+
+ _lastValueIndex = _index;
+ _decoder64 = boost::none;
+ _decoder128 = boost::none;
+
+ // Remember index to last literal to speed up "random access".
+ if (_column._indexLastLiteral < _index) {
+ _column._controlLastLiteral = _control;
+ _column._indexLastLiteral = _index;
+ }
+
+ return;
+ }
+
+ // Simple-8b delta block, load its scale factor and validate for sanity
+ _scaleIndex = kControlToScaleIndex[(control & 0xF0) >> 4];
+ uassert(ErrorCodes::BadValue,
+ "Invalid control byte in BSON Column",
+ _scaleIndex != kInvalidScaleIndex);
+
+ // If Double, scale last value according to this scale factor
+ auto type = prev.type();
+ if (type == NumberDouble) {
+ auto encoded = Simple8bTypeUtil::encodeDouble(prev._numberDouble(), _scaleIndex);
+ uassert(ErrorCodes::BadValue, "Invalid double encoding in BSON Column", encoded);
+ _lastEncodedValue64 = *encoded;
+ }
+
+ // Setup decoder for this range of Simple-8b blocks
+ uint8_t blocks = _numSimple8bBlocks(control);
+ auto size = sizeof(uint64_t) * blocks;
+ uassert(ErrorCodes::BadValue, "Invalid BSON Column encoding", _control + size + 1 < _end);
+
+ // Instantiate decoder and load first value, every Simple-8b block should have at least one
+ // value
+ if (!uses128bit(type)) {
+ _decoder64.emplace(_control + 1, size);
+ _loadDelta(prev, *_decoder64->pos);
+ } else {
+ _decoder128.emplace(_control + 1, size);
+ _loadDelta(prev, *_decoder128->pos);
+ }
+}
+
+void BSONColumn::Iterator::_loadDelta(const BSONElement& prev,
+ const boost::optional<uint64_t>& delta) {
+ // boost::none represent skip, just append EOO BSONElement.
+ if (!delta) {
+ _storeElementIfNeeded(BSONElement());
+ return;
+ }
+
+ // We have an actual value, remember this index for future delta lookups.
+ _lastValueIndex = _index;
+ BSONType type = prev.type();
+
+ // If we have a zero delta no need to allocate a new Element, we can just use previous.
+ bool deltaOfDelta = usesDeltaOfDelta(type);
+ if (!deltaOfDelta && *delta == 0) {
+ _storeElementIfNeeded(prev);
+ return;
+ }
+
+ // Expand delta or delta-of-delta as last encoded.
+ _lastEncodedValue64 = expandDelta(_lastEncodedValue64, Simple8bTypeUtil::decodeInt64(*delta));
+ if (deltaOfDelta) {
+ _lastEncodedValueForDeltaOfDelta =
+ expandDelta(_lastEncodedValueForDeltaOfDelta, _lastEncodedValue64);
+ }
+
+ // Decoder state is now setup, no need to create BSONElement if already exist decompressed
+ if (!_needStoreElement()) {
+ return;
+ }
+
+ // Allocate a new BSONElement that fits same value size as previous
+ ElementStorage::Element elem = _column._elementStorage.allocate(type, prev.valuesize());
+
+ // Write value depending on type
+ switch (type) {
+ case NumberDouble:
+ DataView(elem.value())
+ .write<LittleEndian<double>>(
+ Simple8bTypeUtil::decodeDouble(_lastEncodedValue64, _scaleIndex));
+ break;
+ case jstOID: {
+ Simple8bTypeUtil::decodeObjectIdInto(
+ elem.value(), _lastEncodedValue64, prev.__oid().getInstanceUnique());
+ } break;
+ case Date:
+ case NumberLong:
+ DataView(elem.value()).write<LittleEndian<long long>>(_lastEncodedValue64);
+ break;
+ case Bool:
+ DataView(elem.value()).write<LittleEndian<char>>(_lastEncodedValue64);
+ break;
+ case NumberInt:
+ DataView(elem.value()).write<LittleEndian<int>>(_lastEncodedValue64);
+ break;
+ case bsonTimestamp: {
+ DataView(elem.value()).write<LittleEndian<long long>>(_lastEncodedValueForDeltaOfDelta);
+ } break;
+ default:
+ // unhandled for now
+ break;
+ }
+
+ // Append our written BSONElement to decompressed values
+ _column._decompressed.push_back(elem.element());
+}
+
+void BSONColumn::Iterator::_loadDelta(const BSONElement& prev,
+ const boost::optional<uint128_t>& delta) {
+ // boost::none represent skip, just append EOO BSONElement.
+ if (!delta) {
+ _storeElementIfNeeded(BSONElement());
+ return;
+ }
+
+ // We have an actual value, remember this index for future delta lookups.
+ _lastValueIndex = _index;
+ BSONType type = prev.type();
+
+ // If we have a zero delta no need to allocate a new Element, we can just use previous.
+ if (*delta == 0) {
+ _storeElementIfNeeded(prev);
+ return;
+ }
+
+ // Expand delta as last encoded.
+ _lastEncodedValue128 =
+ expandDelta(_lastEncodedValue128, Simple8bTypeUtil::decodeInt128(*delta));
+
+ // Decoder state is now setup, no need to create BSONElement if already exist decompressed
+ if (!_needStoreElement()) {
+ return;
+ }
+
+ // Allocate a new BSONElement that fits same value size as previous
+ ElementStorage::Element elem = _column._elementStorage.allocate(type, prev.valuesize());
+
+ // Write value depending on type
+ switch (type) {
+ case BinData:
+ // The first 5 bytes in binData is a count and subType, copy them from previous
+ memcpy(elem.value(), prev.value(), 5);
+ Simple8bTypeUtil::decodeBinary(
+ _lastEncodedValue128, elem.value() + 5, prev.valuestrsize());
+ break;
+ case NumberDecimal: {
+ Decimal128 d128 = Simple8bTypeUtil::decodeDecimal128(_lastEncodedValue128);
+ Decimal128::Value d128Val = d128.getValue();
+ DataView(elem.value()).write<LittleEndian<long long>>(d128Val.low64);
+ DataView(elem.value() + sizeof(long long))
+ .write<LittleEndian<long long>>(d128Val.high64);
+ break;
+ }
+ default:
+ // unhandled for now
+ break;
+ }
+
+ _column._decompressed.push_back(elem.element());
+}
+
+bool BSONColumn::Iterator::_needStoreElement() const {
+ return _index == _column._decompressed.size();
+}
+
+void BSONColumn::Iterator::_storeElementIfNeeded(BSONElement elem) {
+ if (_needStoreElement()) {
+ _column._decompressed.emplace_back(elem);
+ }
+}
+
+BSONColumn::BSONColumn(BSONElement bin) {
+ tassert(5857700,
+ "Invalid BSON type for column",
+ bin.type() == BSONType::BinData && bin.binDataType() == BinDataType::Column);
+ _binary = bin.binData(_size);
+ _controlLastLiteral = _binary;
+ uassert(ErrorCodes::BadValue, "Invalid BSON Column encoding", _size > 0);
+}
+
+BSONColumn::Iterator BSONColumn::begin() {
+ Iterator it{*this, _binary, _binary + _size};
+ it._loadControl(BSONElement());
+ return it;
+}
+
+BSONColumn::Iterator BSONColumn::end() {
+ Iterator it{*this, _binary + _size, _binary + _size};
+ it._index = kEndIndex;
+ return it;
+}
+
+BSONElement BSONColumn::operator[](size_t index) {
+ // If index is already decompressed, we can just return the element
+ if (index < _decompressed.size()) {
+ return _decompressed[index];
+ }
+
+ // No more elements to be found if we are fully decompressed, return EOO
+ if (_fullyDecompressed)
+ return BSONElement();
+
+ // We can begin iterating from last known literal
+ Iterator it{*this, _controlLastLiteral, _binary + _size};
+ it._index = _indexLastLiteral;
+ it._loadControl(BSONElement()); // previous doesn't matter when we load literals
+
+ // Traverse until we reach desired index or end
+ auto e = end();
+ for (size_t i = _indexLastLiteral; it != e && i < index; ++it, ++i) {
+ }
+
+ // Return EOO if not found
+ if (it == e)
+ return BSONElement();
+
+ return *it;
+}
+
+size_t BSONColumn::size() const {
+ // Size is known when we are fully decompressed
+ if (_fullyDecompressed)
+ return _decompressed.size();
+
+ // We can begin iterating from last known literal
+ auto e = _binary + _size;
+ size_t num = _indexLastLiteral;
+
+ // Iterate as long as we are within the Column binary
+ const char* pos = _controlLastLiteral;
+ while (pos < e) {
+ uint8_t control = *pos;
+ if (Iterator::_literal(control)) {
+ // We are done at EOO
+ if (control == EOO) {
+ break;
+ }
+
+ // Count our literal and set our position to the byte after
+ ++num;
+ pos += BSONElement(pos, 1, -1, BSONElement::CachedSizeTag{}).size();
+ } else {
+ // Count elements in Simple-8b blocks, no need to decompress
+ uint8_t blocks = Iterator::_numSimple8bBlocks(control);
+ int blockSize = blocks * sizeof(uint64_t);
+ uassert(
+ ErrorCodes::BadValue, "Invalid BSON Column encoding", (pos + blockSize + 1) < e);
+ Simple8b<uint128_t> s8b(pos + 1, blockSize);
+ for (auto it = s8b.begin(), s8bEnd = s8b.end(); it != s8bEnd; it.advanceBlock()) {
+ num += it.blockSize();
+ }
+
+ // Set our position to the byte after the Simple-8b blocks
+ pos += blockSize + 1;
+ }
+ }
+
+ return num;
+}
+
+
+} // namespace mongo
diff --git a/src/mongo/bson/util/bsoncolumn.h b/src/mongo/bson/util/bsoncolumn.h
index d3f38aaf98d..c43143521d1 100644
--- a/src/mongo/bson/util/bsoncolumn.h
+++ b/src/mongo/bson/util/bsoncolumn.h
@@ -28,3 +28,226 @@
*/
#pragma once
+
+#include "mongo/bson/bsonelement.h"
+#include "mongo/bson/util/simple8b.h"
+
+#include <deque>
+#include <vector>
+
+namespace mongo {
+
+/**
+ * The BSONColumn class represents a reference to a BSONElement of BinDataType 7, which can
+ * efficiently store any BSONArray and also allows for missing values. At a high level, two
+ * optimizations are applied:
+ * - implied field names: do not store decimal keys representing index keys.
+ * - delta compression using Simple-8b: store difference between subsequent scalars of the same
+ * type
+ *
+ * The BSONColumn will not take ownership of the BinData element, but otherwise implements
+ * an interface similar to BSONObj. Because iterators over the BSONColumn need to rematerialize
+ * deltas, they use additional storage owned by the BSONColumn for this. As all iterators will
+ * create new delta's in the same order, they share a single ElementStore, with a worst-case memory
+ * usage bounded to a total size on the order of that of the size of the expanded BSONColumn.
+ */
+class BSONColumn {
+public:
+ BSONColumn(BSONElement bin);
+
+ /**
+ * Forward iterator type to access BSONElement from BSONColumn.
+ *
+ * Default-constructed BSONElement (EOO type) represent missing value.
+ * Returned BSONElement are owned by BSONColumn instance and should not be kept after the
+ * BSONColumn instance goes out of scope.
+ */
+ class Iterator {
+ public:
+ friend class BSONColumn;
+
+ // typedefs expected in iterators
+ using iterator_category = std::forward_iterator_tag;
+ using difference_type = ptrdiff_t;
+ using value_type = BSONElement;
+ using pointer = const BSONElement*;
+ using reference = const BSONElement&;
+
+ reference operator*() const {
+ return _column._decompressed.at(_index);
+ }
+ pointer operator->() const {
+ return &operator*();
+ }
+
+ // pre-increment operator
+ Iterator& operator++();
+
+ // post-increment operator
+ Iterator operator++(int);
+
+ bool operator==(const Iterator& rhs) const;
+ bool operator!=(const Iterator& rhs) const;
+
+ private:
+ Iterator(BSONColumn& column, const char* pos, const char* end);
+
+ // Loads current control byte
+ void _loadControl(const BSONElement& prev);
+
+ // Loads delta value
+ void _loadDelta(const BSONElement& prev, const boost::optional<uint64_t>& delta);
+ void _loadDelta(const BSONElement& prev, const boost::optional<uint128_t>& delta);
+
+ // Helpers to determine if we need to store uncompressed element when advancing iterator
+ bool _needStoreElement() const;
+ void _storeElementIfNeeded(BSONElement elem);
+
+ // Checks if control byte is literal
+ static bool _literal(uint8_t control) {
+ return (control & 0xE0) == 0;
+ }
+
+ // Returns number of Simple-8b blocks from control byte
+ static uint8_t _numSimple8bBlocks(uint8_t control) {
+ return (control & 0x0F) + 1;
+ }
+
+ // Reference to BSONColumn this Iterator is created from
+ BSONColumn& _column;
+
+ // Current iterator position
+ size_t _index = 0;
+
+ // Last index observed with a non-skipped value
+ size_t _lastValueIndex = 0;
+
+ // Last encoded values used to calculate delta and delta-of-delta
+ int64_t _lastEncodedValue64 = 0;
+ int64_t _lastEncodedValueForDeltaOfDelta = 0;
+ int128_t _lastEncodedValue128 = 0;
+
+ // Current control byte on iterator position
+ const char* _control;
+
+ // End of BSONColumn memory block, we may not dereference any memory passed this.
+ const char* _end;
+
+ // Helper to create Simple8b decoding iterators for 64bit and 128bit value types.
+ template <typename T>
+ struct Decoder {
+ Decoder(const char* buf, size_t size)
+ : s8b(buf, size), pos(s8b.begin()), end(s8b.end()) {}
+
+ Simple8b<T> s8b;
+ typename Simple8b<T>::Iterator pos;
+ typename Simple8b<T>::Iterator end;
+ };
+
+ // Decoders, only one should be instantiated at a time.
+ boost::optional<Decoder<uint64_t>> _decoder64;
+ boost::optional<Decoder<uint128_t>> _decoder128;
+
+ // Current scale index
+ uint8_t _scaleIndex;
+ };
+
+ /**
+ * Forward iterator access.
+ *
+ * Iterator value is EOO
+ *
+ * Iterators materialize compressed BSONElement as they iterate over the compressed binary.
+ * It is NOT safe to do this from multiple threads concurrently.
+ *
+ * Throws BadValue if invalid encoding is encountered.
+ */
+ Iterator begin();
+ Iterator end();
+
+ /**
+ * Element lookup by index
+ *
+ * Returns EOO if index represent skipped element or is out of bounds.
+ * O(1) time complexity if element has been previously accessed
+ * O(N) time complexity otherwise
+ *
+ * Materializes compressed BSONElement as needed. It is NOT safe to do this from multiple
+ * threads concurrently.
+ *
+ * Throws BadValue if invalid encoding is encountered.
+ */
+ BSONElement operator[](size_t index);
+
+ /**
+ * Number of elements stored in this BSONColumn
+ *
+ * O(1) time complexity if BSONColumn is fully decompressed
+ * O(N) time complexity otherwise
+ *
+ * Throws BadValue if invalid encoding is encountered
+ */
+ size_t size() const;
+
+private:
+ /**
+ * BSONElement storage, owns materialised BSONElement returned by BSONColumn.
+ * Allocates memory in blocks which double in size as they grow.
+ */
+ class ElementStorage {
+ public:
+ class Element {
+ public:
+ Element(char* buffer, int valueSize);
+
+ /**
+ * Returns a pointer for writing a BSONElement value.
+ */
+ char* value();
+
+ /**
+ * Size for the pointer returned by value()
+ */
+ int size() const;
+
+ /**
+ * Constructs a BSONElement from the owned buffer.
+ */
+ BSONElement element() const;
+
+ private:
+ char* _buffer;
+ int _valueSize;
+ };
+
+ /**
+ * Allocates a BSONElement of provided type and value size. Field name is set to empty
+ * string.
+ */
+ Element allocate(BSONType type, int valueSize);
+
+ private:
+ // Full memory blocks that are kept alive.
+ std::vector<std::unique_ptr<char[]>> _blocks;
+
+ // Current memory block
+ std::unique_ptr<char[]> _block;
+
+ // Capacity of current memory block
+ int _capacity = 0;
+
+ // Position to first unused byte in current memory block
+ int _pos = 0;
+ };
+
+ std::deque<BSONElement> _decompressed;
+ ElementStorage _elementStorage;
+
+ const char* _binary;
+ int _size;
+
+ const char* _controlLastLiteral;
+ size_t _indexLastLiteral = 0;
+ bool _fullyDecompressed = false;
+};
+} // namespace mongo
diff --git a/src/mongo/bson/util/bsoncolumn_test.cpp b/src/mongo/bson/util/bsoncolumn_test.cpp
index f1aa7228dfa..a1e790e46e9 100644
--- a/src/mongo/bson/util/bsoncolumn_test.cpp
+++ b/src/mongo/bson/util/bsoncolumn_test.cpp
@@ -27,6 +27,7 @@
* it in the license file.
*/
+#include "mongo/bson/util/bsoncolumn.h"
#include "mongo/bson/util/bsoncolumnbuilder.h"
#include "mongo/bson/util/simple8b_type_util.h"
@@ -39,6 +40,13 @@ namespace {
class BSONColumnTest : public unittest::Test {
public:
+ BSONElement createBSONColumn(const char* buffer, int size) {
+ BSONObjBuilder ob;
+ ob.appendBinData(""_sd, size, BinDataType::Column, buffer);
+ _elementMemory.emplace_front(ob.obj());
+ return _elementMemory.front().firstElement();
+ }
+
template <typename T>
BSONElement _createElement(T val) {
BSONObjBuilder ob;
@@ -137,13 +145,21 @@ public:
return _elementMemory.front().firstElement();
}
- static uint128_t deltaBinData(BSONElement val, BSONElement prev) {
+ static boost::optional<uint128_t> deltaBinData(BSONElement val, BSONElement prev) {
if (val.binaryEqualValues(prev)) {
- return 0;
+ return uint128_t(0);
}
- return Simple8bTypeUtil::encodeInt128(
- Simple8bTypeUtil::encodeBinary(val.valuestr(), val.valuestrsize()) -
- Simple8bTypeUtil::encodeBinary(prev.valuestr(), prev.valuestrsize()));
+
+ int valSize;
+ int prevSize;
+ const char* valBinary = val.binData(valSize);
+ const char* prevBinary = prev.binData(prevSize);
+ if (valSize != prevSize || valSize > 16) {
+ return boost::none;
+ }
+
+ return Simple8bTypeUtil::encodeInt128(Simple8bTypeUtil::encodeBinary(valBinary, valSize) -
+ Simple8bTypeUtil::encodeBinary(prevBinary, prevSize));
}
static uint64_t deltaInt32(BSONElement val, BSONElement prev) {
@@ -318,6 +334,85 @@ public:
ASSERT_EQ(memcmp(columnBinary.data, buf, columnBinary.length), 0);
}
+ static void verifyDecompression(BSONBinData columnBinary,
+ const std::vector<BSONElement>& expected) {
+ BSONObjBuilder obj;
+ obj.append(""_sd, columnBinary);
+ BSONElement columnElement = obj.done().firstElement();
+
+ // Verify that we can traverse BSONColumn twice and extract values on the second pass
+ {
+ BSONColumn col(columnElement);
+ ASSERT_EQ(col.size(), expected.size());
+ ASSERT_EQ(std::distance(col.begin(), col.end()), expected.size());
+ ASSERT_EQ(col.size(), expected.size());
+
+ auto it = col.begin();
+ for (auto elem : expected) {
+ BSONElement other = *(it++);
+ ASSERT(elem.binaryEqualValues(other));
+ }
+ }
+
+ // Verify that we can traverse BSONColumn and extract values on the first pass
+ {
+ BSONColumn col(columnElement);
+
+ auto it = col.begin();
+ for (auto elem : expected) {
+ BSONElement other = *(it++);
+ ASSERT(elem.binaryEqualValues(other));
+ }
+ }
+
+ // Verify operator[] when accessing in order
+ {
+ BSONColumn col(columnElement);
+
+ for (size_t i = 0; i < expected.size(); ++i) {
+ ASSERT(expected[i].binaryEqualValues(col[i]));
+ }
+ }
+
+ // Verify operator[] when accessing in reverse order
+ {
+ BSONColumn col(columnElement);
+
+ for (int i = (int)expected.size() - 1; i >= 0; --i) {
+ ASSERT(expected[i].binaryEqualValues(col[i]));
+ }
+ }
+
+ // Verify that we can continue traverse with new iterators when we stop before end
+ {
+ BSONColumn col(columnElement);
+
+ for (size_t e = 0; e < expected.size(); ++e) {
+ auto it = col.begin();
+ for (size_t i = 0; i < e; ++i, ++it) {
+ ASSERT(expected[i].binaryEqualValues(*it));
+ }
+ ASSERT_EQ(col.size(), expected.size());
+ }
+ }
+
+ // Verify that we can have multiple iterators on the same thread
+ {
+ BSONColumn col(columnElement);
+
+ auto it1 = col.begin();
+ auto it2 = col.begin();
+ auto itEnd = col.end();
+
+ for (; it1 != itEnd && it2 != itEnd; ++it1, ++it2) {
+ ASSERT(it1->binaryEqualValues(*it2));
+ ASSERT_EQ(&*it1, &*it2); // Iterators should point to same reference
+ }
+
+ ASSERT(it1 == it2);
+ }
+ }
+
const boost::optional<uint64_t> kDeltaForBinaryEqualValues = Simple8bTypeUtil::encodeInt64(0);
private:
@@ -337,7 +432,9 @@ TEST_F(BSONColumnTest, BasicValue) {
appendSimple8bBlock64(expected, 0);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elem, elem});
}
TEST_F(BSONColumnTest, BasicSkip) {
@@ -353,7 +450,9 @@ TEST_F(BSONColumnTest, BasicSkip) {
appendSimple8bBlock64(expected, boost::none);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elem, BSONElement()});
}
TEST_F(BSONColumnTest, OnlySkip) {
@@ -366,7 +465,9 @@ TEST_F(BSONColumnTest, OnlySkip) {
appendSimple8bBlock64(expected, boost::none);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {BSONElement()});
}
TEST_F(BSONColumnTest, ValueAfterSkip) {
@@ -382,7 +483,9 @@ TEST_F(BSONColumnTest, ValueAfterSkip) {
appendLiteral(expected, elem);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {BSONElement(), elem});
}
@@ -400,7 +503,9 @@ TEST_F(BSONColumnTest, LargeDeltaIsLiteral) {
appendLiteral(expected, second);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second});
}
TEST_F(BSONColumnTest, LargeDeltaIsLiteralAfterSimple8b) {
@@ -423,7 +528,9 @@ TEST_F(BSONColumnTest, LargeDeltaIsLiteralAfterSimple8b) {
appendSimple8bBlock64(expected, deltaInt64(large, large));
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {zero, zero, large, large});
}
TEST_F(BSONColumnTest, OverBlockCount) {
@@ -454,7 +561,9 @@ TEST_F(BSONColumnTest, OverBlockCount) {
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, elems);
}
TEST_F(BSONColumnTest, TypeChangeAfterLiteral) {
@@ -471,7 +580,9 @@ TEST_F(BSONColumnTest, TypeChangeAfterLiteral) {
appendLiteral(expected, elemInt64);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemInt32, elemInt64});
}
TEST_F(BSONColumnTest, TypeChangeAfterSimple8b) {
@@ -491,7 +602,9 @@ TEST_F(BSONColumnTest, TypeChangeAfterSimple8b) {
appendLiteral(expected, elemInt64);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemInt32, elemInt32, elemInt64});
}
TEST_F(BSONColumnTest, Simple8bAfterTypeChange) {
@@ -511,38 +624,9 @@ TEST_F(BSONColumnTest, Simple8bAfterTypeChange) {
appendSimple8bBlock64(expected, 0);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
-}
-
-TEST_F(BSONColumnTest, Decimal128Base) {
- BSONColumnBuilder cb("test"_sd);
-
- auto elemDec128 = createElementDecimal128(Decimal128());
-
- cb.append(elemDec128);
-
- BufBuilder expected;
- appendLiteral(expected, elemDec128);
- appendEOO(expected);
-
- verifyBinary(cb.finalize(), expected);
-}
-
-TEST_F(BSONColumnTest, Decimal128Delta) {
- BSONColumnBuilder cb("test"_sd);
-
- auto elemDec128 = createElementDecimal128(Decimal128(1));
-
- cb.append(elemDec128);
- cb.append(elemDec128);
-
- BufBuilder expected;
- appendLiteral(expected, elemDec128);
- appendSimple8bControl(expected, 0b1000, 0b0000);
- appendSimple8bBlock128(expected, deltaDecimal128(elemDec128, elemDec128));
- appendEOO(expected);
-
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemInt32, elemInt64, elemInt64});
}
TEST_F(BSONColumnTest, BasicDouble) {
@@ -559,7 +643,9 @@ TEST_F(BSONColumnTest, BasicDouble) {
appendSimple8bBlock64(expected, deltaDouble(d2, d1, 1));
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {d1, d2});
}
TEST_F(BSONColumnTest, DoubleSameScale) {
@@ -581,7 +667,9 @@ TEST_F(BSONColumnTest, DoubleSameScale) {
expected, deltaDouble(elems.begin() + 1, elems.end(), elems.front(), 1), 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, elems);
}
TEST_F(BSONColumnTest, DoubleIncreaseScaleFromLiteral) {
@@ -598,7 +686,9 @@ TEST_F(BSONColumnTest, DoubleIncreaseScaleFromLiteral) {
appendSimple8bBlock64(expected, deltaDouble(d2, d1, 10));
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {d1, d2});
}
TEST_F(BSONColumnTest, DoubleLiteralAndScaleAfterSkip) {
@@ -618,7 +708,9 @@ TEST_F(BSONColumnTest, DoubleLiteralAndScaleAfterSkip) {
appendSimple8bBlock64(expected, deltaDouble(d2, d1, 10));
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {BSONElement(), d1, d2});
}
TEST_F(BSONColumnTest, DoubleIncreaseScaleFromLiteralAfterSkip) {
@@ -640,7 +732,9 @@ TEST_F(BSONColumnTest, DoubleIncreaseScaleFromLiteralAfterSkip) {
appendSimple8bBlocks64(expected, expectedVals, 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {d1, BSONElement(), BSONElement(), d2});
}
TEST_F(BSONColumnTest, DoubleIncreaseScaleFromDeltaWithRescale) {
@@ -663,7 +757,9 @@ TEST_F(BSONColumnTest, DoubleIncreaseScaleFromDeltaWithRescale) {
expected, deltaDouble(elems.begin() + 1, elems.end(), elems.front(), 10), 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, elems);
}
TEST_F(BSONColumnTest, DoubleIncreaseScaleFromDeltaNoRescale) {
@@ -693,7 +789,9 @@ TEST_F(BSONColumnTest, DoubleIncreaseScaleFromDeltaNoRescale) {
expected, deltaDouble(deltaEnd, elems.end(), *(deltaEnd - 1), 100000000), 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, elems);
}
TEST_F(BSONColumnTest, DoubleDecreaseScaleAfterBlock) {
@@ -722,7 +820,9 @@ TEST_F(BSONColumnTest, DoubleDecreaseScaleAfterBlock) {
appendSimple8bBlocks64(expected, deltaDouble(deltaEnd, elems.end(), *(deltaEnd - 1), 1), 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, elems);
}
TEST_F(BSONColumnTest, DoubleDecreaseScaleAfterBlockUsingSkip) {
@@ -756,7 +856,9 @@ TEST_F(BSONColumnTest, DoubleDecreaseScaleAfterBlockUsingSkip) {
appendSimple8bBlocks64(expected, deltaDouble(deltaEnd, elems.end(), *deltaBegin, 1), 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, elems);
}
TEST_F(BSONColumnTest, DoubleDecreaseScaleAfterBlockThenScaleBackUp) {
@@ -780,7 +882,9 @@ TEST_F(BSONColumnTest, DoubleDecreaseScaleAfterBlockThenScaleBackUp) {
expected, deltaDouble(elems.begin() + 1, elems.end(), elems.front(), 100000000), 2);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, elems);
}
TEST_F(BSONColumnTest, DoubleDecreaseScaleAfterBlockUsingSkipThenScaleBackUp) {
@@ -808,7 +912,9 @@ TEST_F(BSONColumnTest, DoubleDecreaseScaleAfterBlockUsingSkipThenScaleBackUp) {
expected, deltaDouble(elems.begin() + 1, elems.end(), elems.front(), 100000000), 2);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, elems);
}
TEST_F(BSONColumnTest, DoubleUnscalable) {
@@ -833,7 +939,9 @@ TEST_F(BSONColumnTest, DoubleUnscalable) {
appendSimple8bBlocks64(expected, expectedVals, 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, elems);
}
TEST_F(BSONColumnTest, DoubleSignalingNaN) {
@@ -857,7 +965,9 @@ TEST_F(BSONColumnTest, DoubleSignalingNaN) {
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elem, nan});
}
TEST_F(BSONColumnTest, DoubleQuietNaN) {
@@ -879,7 +989,9 @@ TEST_F(BSONColumnTest, DoubleQuietNaN) {
}
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elem, nan});
}
TEST_F(BSONColumnTest, DoubleInfinity) {
@@ -901,7 +1013,9 @@ TEST_F(BSONColumnTest, DoubleInfinity) {
}
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elem, inf});
}
TEST_F(BSONColumnTest, DoubleDenorm) {
@@ -923,7 +1037,9 @@ TEST_F(BSONColumnTest, DoubleDenorm) {
}
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elem, denorm});
}
TEST_F(BSONColumnTest, DoubleIntegerOverflow) {
@@ -944,6 +1060,45 @@ TEST_F(BSONColumnTest, DoubleIntegerOverflow) {
appendSimple8bControl(expected, 0b1000, 0b0000);
appendSimple8bBlock64(expected, deltaDoubleMemory(e2, e1));
appendEOO(expected);
+
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {e1, e2});
+}
+
+TEST_F(BSONColumnTest, Decimal128Base) {
+ BSONColumnBuilder cb("test"_sd);
+
+ auto elemDec128 = createElementDecimal128(Decimal128());
+
+ cb.append(elemDec128);
+
+ BufBuilder expected;
+ appendLiteral(expected, elemDec128);
+ appendEOO(expected);
+
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemDec128});
+}
+
+TEST_F(BSONColumnTest, Decimal128Delta) {
+ BSONColumnBuilder cb("test"_sd);
+
+ auto elemDec128 = createElementDecimal128(Decimal128(1));
+
+ cb.append(elemDec128);
+ cb.append(elemDec128);
+
+ BufBuilder expected;
+ appendLiteral(expected, elemDec128);
+ appendSimple8bControl(expected, 0b1000, 0b0000);
+ appendSimple8bBlock128(expected, deltaDecimal128(elemDec128, elemDec128));
+ appendEOO(expected);
+
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemDec128, elemDec128});
}
TEST_F(BSONColumnTest, DecimalNonZeroDelta) {
@@ -959,7 +1114,10 @@ TEST_F(BSONColumnTest, DecimalNonZeroDelta) {
appendSimple8bControl(expected, 0b1000, 0b0000);
appendSimple8bBlock128(expected, deltaDecimal128(elemDec128Max, elemDec128Zero));
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemDec128Zero, elemDec128Max});
}
TEST_F(BSONColumnTest, DecimalMaxMin) {
@@ -975,7 +1133,10 @@ TEST_F(BSONColumnTest, DecimalMaxMin) {
appendSimple8bControl(expected, 0b1000, 0b0000);
appendSimple8bBlock128(expected, deltaDecimal128(elemDec128Max, elemDec128Zero));
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemDec128Zero, elemDec128Max});
}
TEST_F(BSONColumnTest, DecimalMultiElement) {
@@ -999,7 +1160,11 @@ TEST_F(BSONColumnTest, DecimalMultiElement) {
deltaDecimal128(elemDec128One, elemDec128Zero)};
appendSimple8bBlocks128(expected, valuesToAppend, 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(
+ binData, {elemDec128Zero, elemDec128Max, elemDec128Zero, elemDec128Zero, elemDec128One});
}
TEST_F(BSONColumnTest, DecimalMultiElementSkips) {
@@ -1027,7 +1192,17 @@ TEST_F(BSONColumnTest, DecimalMultiElementSkips) {
deltaDecimal128(elemDec128One, elemDec128Zero)};
appendSimple8bBlocks128(expected, valuesToAppend, 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData,
+ {elemDec128Zero,
+ elemDec128Max,
+ BSONElement(),
+ BSONElement(),
+ elemDec128Zero,
+ elemDec128Zero,
+ elemDec128One});
}
TEST_F(BSONColumnTest, BasicObjectId) {
@@ -1051,7 +1226,9 @@ TEST_F(BSONColumnTest, BasicObjectId) {
appendSimple8bBlocks64(expected, expectedDeltas, 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second, second, third});
}
TEST_F(BSONColumnTest, ObjectIdDifferentProcessUnique) {
@@ -1068,7 +1245,9 @@ TEST_F(BSONColumnTest, ObjectIdDifferentProcessUnique) {
appendLiteral(expected, second);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second});
}
TEST_F(BSONColumnTest, ObjectIdAfterChangeBack) {
@@ -1099,7 +1278,9 @@ TEST_F(BSONColumnTest, ObjectIdAfterChangeBack) {
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second, elemInt32, first, second});
}
TEST_F(BSONColumnTest, Simple8bTimestamp) {
@@ -1120,7 +1301,9 @@ TEST_F(BSONColumnTest, Simple8bTimestamp) {
appendSimple8bBlocks64(expected, expectedDeltaOfDeltas, 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second, second});
}
TEST_F(BSONColumnTest, Simple8bTimestampNegativeDeltaOfDelta) {
@@ -1142,7 +1325,9 @@ TEST_F(BSONColumnTest, Simple8bTimestampNegativeDeltaOfDelta) {
appendSimple8bBlocks64(expected, expectedDeltaOfDeltas, 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second, third});
}
TEST_F(BSONColumnTest, Simple8bTimestampAfterChangeBack) {
@@ -1172,7 +1357,9 @@ TEST_F(BSONColumnTest, Simple8bTimestampAfterChangeBack) {
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second, elemInt32, first, second});
}
TEST_F(BSONColumnTest, LargeDeltaOfDeltaTimestamp) {
@@ -1189,7 +1376,9 @@ TEST_F(BSONColumnTest, LargeDeltaOfDeltaTimestamp) {
appendLiteral(expected, second);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second});
}
TEST_F(BSONColumnTest, LargeDeltaOfDeltaIsLiteralAfterSimple8bTimestamp) {
@@ -1218,7 +1407,9 @@ TEST_F(BSONColumnTest, LargeDeltaOfDeltaIsLiteralAfterSimple8bTimestamp) {
appendSimple8bBlocks64(expected, expectedDeltaOfDeltas, 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {zero, zero, large, large, semiLarge});
}
TEST_F(BSONColumnTest, DateBasic) {
@@ -1238,7 +1429,9 @@ TEST_F(BSONColumnTest, DateBasic) {
_appendSimple8bBlocks(expected, expectedDeltaOfDeltas, 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second, second});
}
TEST_F(BSONColumnTest, DateAfterChangeBack) {
@@ -1259,7 +1452,9 @@ TEST_F(BSONColumnTest, DateAfterChangeBack) {
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemInt32, date, date});
}
TEST_F(BSONColumnTest, DateLargeDelta) {
@@ -1276,7 +1471,9 @@ TEST_F(BSONColumnTest, DateLargeDelta) {
appendLiteral(expected, second);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second});
}
TEST_F(BSONColumnTest, BoolBasic) {
@@ -1298,7 +1495,9 @@ TEST_F(BSONColumnTest, BoolBasic) {
_appendSimple8bBlocks(expected, expectedDeltaOfDeltas, 1);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {trueBson, trueBson, falseBson, trueBson});
}
TEST_F(BSONColumnTest, BoolAfterChangeBack) {
@@ -1319,7 +1518,9 @@ TEST_F(BSONColumnTest, BoolAfterChangeBack) {
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemInt32, trueBson, trueBson});
}
TEST_F(BSONColumnTest, UndefinedBasic) {
@@ -1335,7 +1536,9 @@ TEST_F(BSONColumnTest, UndefinedBasic) {
appendSimple8bBlock64(expected, kDeltaForBinaryEqualValues);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, first});
}
TEST_F(BSONColumnTest, UndefinedAfterChangeBack) {
@@ -1356,7 +1559,9 @@ TEST_F(BSONColumnTest, UndefinedAfterChangeBack) {
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemInt32, undefined, undefined});
}
TEST_F(BSONColumnTest, NullBasic) {
@@ -1372,7 +1577,9 @@ TEST_F(BSONColumnTest, NullBasic) {
appendSimple8bBlock64(expected, kDeltaForBinaryEqualValues);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, first});
}
TEST_F(BSONColumnTest, NullAfterChangeBack) {
@@ -1393,7 +1600,9 @@ TEST_F(BSONColumnTest, NullAfterChangeBack) {
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemInt32, null, null});
}
TEST_F(BSONColumnTest, RegexBasic) {
@@ -1412,7 +1621,9 @@ TEST_F(BSONColumnTest, RegexBasic) {
appendSimple8bBlock64(expected, kDeltaForBinaryEqualValues);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second, second});
}
TEST_F(BSONColumnTest, RegexAfterChangeBack) {
@@ -1433,7 +1644,9 @@ TEST_F(BSONColumnTest, RegexAfterChangeBack) {
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemInt32, regex, regex});
}
TEST_F(BSONColumnTest, DBRefBasic) {
@@ -1453,7 +1666,9 @@ TEST_F(BSONColumnTest, DBRefBasic) {
appendSimple8bBlock64(expected, kDeltaForBinaryEqualValues);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second, second});
}
TEST_F(BSONColumnTest, DBRefAfterChangeBack) {
@@ -1475,7 +1690,9 @@ TEST_F(BSONColumnTest, DBRefAfterChangeBack) {
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemInt32, dbRef, dbRef});
}
TEST_F(BSONColumnTest, CodeWScopeBasic) {
@@ -1494,7 +1711,9 @@ TEST_F(BSONColumnTest, CodeWScopeBasic) {
appendSimple8bBlock64(expected, kDeltaForBinaryEqualValues);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second, second});
}
TEST_F(BSONColumnTest, CodeWScopeAfterChangeBack) {
@@ -1515,7 +1734,9 @@ TEST_F(BSONColumnTest, CodeWScopeAfterChangeBack) {
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemInt32, codeWScope, codeWScope});
}
TEST_F(BSONColumnTest, SymbolBasic) {
@@ -1534,7 +1755,9 @@ TEST_F(BSONColumnTest, SymbolBasic) {
appendSimple8bBlock64(expected, kDeltaForBinaryEqualValues);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {first, second, second});
}
TEST_F(BSONColumnTest, SymbolAfterChangeBack) {
@@ -1555,7 +1778,9 @@ TEST_F(BSONColumnTest, SymbolAfterChangeBack) {
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemInt32, symbol, symbol});
}
TEST_F(BSONColumnTest, BinDataBase) {
@@ -1569,7 +1794,9 @@ TEST_F(BSONColumnTest, BinDataBase) {
appendLiteral(expected, elemBinData);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemBinData});
}
TEST_F(BSONColumnTest, BinDataOdd) {
@@ -1583,7 +1810,9 @@ TEST_F(BSONColumnTest, BinDataOdd) {
appendLiteral(expected, elemBinData);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemBinData});
}
TEST_F(BSONColumnTest, BinDataDelta) {
@@ -1600,7 +1829,9 @@ TEST_F(BSONColumnTest, BinDataDelta) {
appendSimple8bBlock128(expected, deltaBinData(elemBinData, elemBinData));
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemBinData, elemBinData});
}
TEST_F(BSONColumnTest, BinDataDeltaShouldFail) {
@@ -1619,7 +1850,9 @@ TEST_F(BSONColumnTest, BinDataDeltaShouldFail) {
appendLiteral(expected, elemBinDataLong);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemBinData, elemBinDataLong});
}
TEST_F(BSONColumnTest, BinDataDeltaCheckSkips) {
@@ -1637,15 +1870,17 @@ TEST_F(BSONColumnTest, BinDataDeltaCheckSkips) {
BufBuilder expected;
appendLiteral(expected, elemBinData);
- appendSimple8bControl(expected, 0b1000, 0b0000);
+ appendSimple8bControl(expected, 0b1000, 0b0001);
std::vector<boost::optional<uint128_t>> expectedValues = {
deltaBinData(elemBinDataLong, elemBinData),
boost::none,
deltaBinData(elemBinData, elemBinDataLong)};
- appendSimple8bBlocks128(expected, expectedValues, 1);
+ appendSimple8bBlocks128(expected, expectedValues, 2);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemBinData, elemBinDataLong, BSONElement(), elemBinData});
}
TEST_F(BSONColumnTest, BinDataLargerThan16) {
@@ -1666,7 +1901,9 @@ TEST_F(BSONColumnTest, BinDataLargerThan16) {
appendLiteral(expected, elemBinDataLong);
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemBinData, elemBinDataLong});
}
TEST_F(BSONColumnTest, BinDataEqualTo16) {
@@ -1688,7 +1925,9 @@ TEST_F(BSONColumnTest, BinDataEqualTo16) {
appendSimple8bBlock128(expected, deltaBinData(elemBinDataLong, elemBinData));
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemBinData, elemBinDataLong});
}
TEST_F(BSONColumnTest, BinDataLargerThan16SameValue) {
@@ -1706,8 +1945,135 @@ TEST_F(BSONColumnTest, BinDataLargerThan16SameValue) {
appendSimple8bBlock128(expected, deltaBinData(elemBinData, elemBinData));
appendEOO(expected);
- verifyBinary(cb.finalize(), expected);
+ auto binData = cb.finalize();
+ verifyBinary(binData, expected);
+ verifyDecompression(binData, {elemBinData, elemBinData});
+}
+
+TEST_F(BSONColumnTest, InvalidControlByte) {
+ auto elem = createElementInt32(0);
+
+ BufBuilder expected;
+ appendLiteral(expected, elem);
+ appendSimple8bControl(expected, 0b0010, 0b0000);
+ appendSimple8bBlock64(expected, deltaInt32(elem, elem));
+ appendEOO(expected);
+
+ try {
+ BSONColumn col(createBSONColumn(expected.buf(), expected.len()));
+ for (auto it = col.begin(), e = col.end(); it != e; ++it) {
+ }
+ ASSERT(false);
+
+ } catch (DBException&) {
+ }
}
+TEST_F(BSONColumnTest, InvalidSize) {
+ auto elem = createElementInt32(0);
+
+ BufBuilder expected;
+ appendLiteral(expected, elem);
+ appendSimple8bControl(expected, 0b1000, 0b0000);
+ appendSimple8bBlock64(expected, deltaInt32(elem, elem));
+ appendEOO(expected);
+
+ try {
+ BSONColumn col(createBSONColumn(expected.buf(), expected.len() - 2));
+ for (auto it = col.begin(), e = col.end(); it != e; ++it) {
+ }
+ ASSERT(false);
+
+ } catch (DBException&) {
+ }
+}
+
+TEST_F(BSONColumnTest, InvalidDoubleScale) {
+ auto d1 = createElementDouble(1.11);
+ auto d2 = createElementDouble(1.12);
+
+ BufBuilder expected;
+ appendLiteral(expected, d1);
+ appendSimple8bControl(expected, 0b1001, 0b0000);
+ appendSimple8bBlock64(expected, deltaDouble(d2, d1, 100));
+ appendEOO(expected);
+
+ try {
+ BSONColumn col(createBSONColumn(expected.buf(), expected.len()));
+ for (auto it = col.begin(), e = col.end(); it != e; ++it) {
+ }
+ ASSERT(false);
+
+ } catch (DBException&) {
+ }
+}
+
+TEST_F(BSONColumnTest, MissingEOO) {
+ auto elem = createElementInt32(0);
+
+ BufBuilder expected;
+ appendLiteral(expected, elem);
+
+ try {
+ BSONColumn col(createBSONColumn(expected.buf(), expected.len()));
+ for (auto it = col.begin(), e = col.end(); it != e; ++it) {
+ }
+ ASSERT(false);
+
+ } catch (DBException&) {
+ }
+}
+
+TEST_F(BSONColumnTest, EmptyBuffer) {
+ BufBuilder expected;
+
+ try {
+ BSONColumn col(createBSONColumn(expected.buf(), expected.len()));
+ for (auto it = col.begin(), e = col.end(); it != e; ++it) {
+ }
+ ASSERT(false);
+
+ } catch (DBException&) {
+ }
+}
+
+TEST_F(BSONColumnTest, InvalidSimple8b) {
+ // A Simple8b block with an invalid selector doesn't throw an error, but make sure we can handle
+ // it gracefully. Check so we don't read out of bounds and can iterate.
+
+ auto elem = createElementInt32(0);
+
+ BufBuilder expected;
+ appendLiteral(expected, elem);
+ appendSimple8bControl(expected, 0b1000, 0b0000);
+ uint64_t invalidSimple8b = 0;
+ expected.appendNum(invalidSimple8b);
+ appendEOO(expected);
+
+ BSONColumn col(createBSONColumn(expected.buf(), expected.len()));
+ for (auto it = col.begin(), e = col.end(); it != e; ++it) {
+ }
+}
+
+TEST_F(BSONColumnTest, NoLiteralStart) {
+ // Starting the stream with a delta block doesn't throw an error. Make sure we handle it
+ // gracefully even if the values we extracted may not be meaningful. Check so we don't read out
+ // of bounds and can iterate.
+
+ auto elem = createElementInt32(0);
+
+ BufBuilder expected;
+ appendLiteral(expected, elem);
+ appendSimple8bControl(expected, 0b1000, 0b0000);
+ uint64_t invalidSimple8b = 0;
+ expected.appendNum(invalidSimple8b);
+ appendEOO(expected);
+
+ BSONColumn col(createBSONColumn(expected.buf(), expected.len()));
+ for (auto it = col.begin(), e = col.end(); it != e; ++it) {
+ }
+}
+
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/bson/util/bsoncolumn_util.cpp b/src/mongo/bson/util/bsoncolumn_util.cpp
new file mode 100644
index 00000000000..921df59e921
--- /dev/null
+++ b/src/mongo/bson/util/bsoncolumn_util.cpp
@@ -0,0 +1,64 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/bson/util/bsoncolumn_util.h"
+
+namespace mongo::bsoncolumn {
+bool usesDeltaOfDelta(BSONType type) {
+ return type == bsonTimestamp;
+}
+
+bool uses128bit(BSONType type) {
+ return type == NumberDecimal || type == BinData;
+}
+
+int64_t calcDelta(int64_t val, int64_t prev) {
+ // Do the subtraction as unsigned and cast back to signed to get overflow defined to wrapped
+ // around instead of undefined behavior.
+ return static_cast<int64_t>(static_cast<uint64_t>(val) - static_cast<uint64_t>(prev));
+}
+
+int128_t calcDelta(int128_t val, int128_t prev) {
+ // Do the subtraction as unsigned and cast back to signed to get overflow defined to wrapped
+ // around instead of undefined behavior.
+ return static_cast<int128_t>(static_cast<uint128_t>(val) - static_cast<uint128_t>(prev));
+}
+
+int64_t expandDelta(int64_t prev, int64_t delta) {
+ // Do the addition as unsigned and cast back to signed to get overflow defined to wrapped around
+ // instead of undefined behavior.
+ return static_cast<int64_t>(static_cast<uint64_t>(prev) + static_cast<uint64_t>(delta));
+}
+
+int128_t expandDelta(int128_t prev, int128_t delta) {
+ // Do the addition as unsigned and cast back to signed to get overflow defined to wrapped around
+ // instead of undefined behavior.
+ return static_cast<int128_t>(static_cast<uint128_t>(prev) + static_cast<uint128_t>(delta));
+}
+} // namespace mongo::bsoncolumn
diff --git a/src/mongo/bson/util/bsoncolumn_util.h b/src/mongo/bson/util/bsoncolumn_util.h
new file mode 100644
index 00000000000..89be5b2bb4b
--- /dev/null
+++ b/src/mongo/bson/util/bsoncolumn_util.h
@@ -0,0 +1,45 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/bson/bsontypes.h"
+#include "mongo/platform/int128.h"
+
+namespace mongo::bsoncolumn {
+bool usesDeltaOfDelta(BSONType type);
+bool uses128bit(BSONType type);
+
+int64_t calcDelta(int64_t val, int64_t prev);
+int128_t calcDelta(int128_t val, int128_t prev);
+
+int64_t expandDelta(int64_t prev, int64_t delta);
+int128_t expandDelta(int128_t prev, int128_t delta);
+
+} // namespace mongo::bsoncolumn
diff --git a/src/mongo/bson/util/bsoncolumnbuilder.cpp b/src/mongo/bson/util/bsoncolumnbuilder.cpp
index 73a2f4b7431..3e35b3fda29 100644
--- a/src/mongo/bson/util/bsoncolumnbuilder.cpp
+++ b/src/mongo/bson/util/bsoncolumnbuilder.cpp
@@ -28,12 +28,15 @@
*/
#include "mongo/bson/util/bsoncolumnbuilder.h"
+#include "mongo/bson/util/bsoncolumn_util.h"
#include "mongo/bson/util/simple8b_type_util.h"
#include <memory>
namespace mongo {
+using namespace bsoncolumn;
+
namespace {
static constexpr uint8_t kMaxCount = 16;
static constexpr uint8_t kCountMask = 0x0F;
@@ -42,18 +45,6 @@ static constexpr uint8_t kControlMask = 0xF0;
static constexpr std::array<uint8_t, Simple8bTypeUtil::kMemoryAsInteger + 1>
kControlByteForScaleIndex = {0x90, 0xA0, 0xB0, 0xC0, 0xD0, 0x80};
-int64_t calcDelta(int64_t val, int64_t prev) {
- // Do the subtraction as unsigned and cast back to signed to get overflow defined to wrapped
- // around instead of undefined behavior.
- return static_cast<int64_t>(static_cast<uint64_t>(val) - static_cast<uint64_t>(prev));
-}
-
-int64_t expandDelta(int64_t prev, int64_t delta) {
- // Do the addition as unsigned and cast back to signed to get overflow defined to wrapped around
- // instead of undefined behavior.
- return static_cast<int64_t>(static_cast<uint64_t>(prev) + static_cast<uint64_t>(delta));
-}
-
// Encodes the double with the lowest possible scale index. In worst case we will interpret the
// memory as integer which is guaranteed to succeed.
std::pair<int64_t, uint8_t> scaleAndEncodeDouble(double value, uint8_t minScaleIndex) {
@@ -91,13 +82,13 @@ BSONColumnBuilder& BSONColumnBuilder::append(BSONElement elem) {
_storePrevious(elem);
_simple8bBuilder128.flush();
_simple8bBuilder64.flush();
- _storeWith128 = elem.type() == NumberDecimal || elem.type() == BinData;
+ _storeWith128 = uses128bit(type);
_writeLiteralFromPrevious();
return *this;
}
// Store delta in Simple-8b if types match
- bool compressed = !_usesDeltaOfDelta(type) && elem.binaryEqualValues(previous);
+ bool compressed = !usesDeltaOfDelta(type) && elem.binaryEqualValues(previous);
if (compressed) {
if (_storeWith128) {
_simple8bBuilder128.append(0);
@@ -112,18 +103,19 @@ BSONColumnBuilder& BSONColumnBuilder::append(BSONElement elem) {
int128_t delta = 0;
switch (type) {
case BinData: {
- encodingPossible =
- elem.valuestrsize() == previous.valuestrsize() && elem.valuestrsize() <= 16;
+ int size;
+ const char* binary = elem.binData(size);
+ encodingPossible = size == previous.valuestrsize() && elem.valuestrsize() <= 16;
if (!encodingPossible)
break;
- int128_t curEncoded =
- Simple8bTypeUtil::encodeBinary(elem.valuestr(), elem.valuestrsize());
- delta = curEncoded - _prevEncoded128;
+
+ int128_t curEncoded = Simple8bTypeUtil::encodeBinary(binary, size);
+ delta = calcDelta(curEncoded, _prevEncoded128);
_prevEncoded128 = curEncoded;
} break;
case NumberDecimal: {
int128_t curEncoded = Simple8bTypeUtil::encodeDecimal128(elem._numberDecimal());
- delta = curEncoded - _prevEncoded128;
+ delta = calcDelta(curEncoded, _prevEncoded128);
_prevEncoded128 = curEncoded;
} break;
default:
@@ -144,12 +136,16 @@ BSONColumnBuilder& BSONColumnBuilder::append(BSONElement elem) {
case NumberLong:
value = calcDelta(elem._numberLong(), previous._numberLong());
break;
- case jstOID:
+ case jstOID: {
encodingPossible = _objectIdDeltaPossible(elem, previous);
- if (encodingPossible)
- value = calcDelta(Simple8bTypeUtil::encodeObjectId(elem.OID()),
- Simple8bTypeUtil::encodeObjectId(previous.OID()));
+ if (!encodingPossible)
+ break;
+
+ int64_t curEncoded = Simple8bTypeUtil::encodeObjectId(elem.OID());
+ value = calcDelta(curEncoded, _prevEncoded64);
+ _prevEncoded64 = curEncoded;
break;
+ }
case bsonTimestamp: {
int64_t currTimestampDelta =
calcDelta(elem.timestamp().asULL(), previous.timestamp().asULL());
@@ -158,10 +154,11 @@ BSONColumnBuilder& BSONColumnBuilder::append(BSONElement elem) {
break;
}
case Date:
- value = elem.date().toMillisSinceEpoch() - previous.date().toMillisSinceEpoch();
+ value = calcDelta(elem.date().toMillisSinceEpoch(),
+ previous.date().toMillisSinceEpoch());
break;
case Bool:
- value = elem.boolean() - previous.boolean();
+ value = calcDelta(elem.boolean(), previous.boolean());
break;
case Undefined:
case jstNULL:
@@ -374,29 +371,34 @@ void BSONColumnBuilder::_writeLiteralFromPrevious() {
// appending next value.
auto prevElem = _previous();
_bufBuilder.appendBuf(_prev.get(), _prevSize);
+
+ // Reset state
_controlByteOffset = 0;
- // There is no previous timestamp delta. Set to default.
+ _scaleIndex = Simple8bTypeUtil::kMemoryAsInteger;
_prevDelta = 0;
+
+ // Initialize previous encoded when needed
switch (prevElem.type()) {
- case BinData:
- if (prevElem.valuestrsize() <= 16)
- _prevEncoded128 =
- Simple8bTypeUtil::encodeBinary(prevElem.valuestr(), prevElem.valuestrsize());
+ case NumberDouble:
+ _lastValueInPrevBlock = prevElem._numberDouble();
+ std::tie(_prevEncoded64, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0);
+ break;
+ case BinData: {
+ int size;
+ const char* binary = prevElem.binData(size);
+ if (size <= 16)
+ _prevEncoded128 = Simple8bTypeUtil::encodeBinary(binary, size);
break;
+ }
case NumberDecimal:
_prevEncoded128 = Simple8bTypeUtil::encodeDecimal128(prevElem._numberDecimal());
break;
+ case jstOID:
+ _prevEncoded64 = Simple8bTypeUtil::encodeObjectId(prevElem.__oid());
+ break;
default:
break;
}
-
- // Set scale factor for this literal and values needed to append values
- if (_prev[0] == NumberDouble) {
- _lastValueInPrevBlock = prevElem._numberDouble();
- std::tie(_prevEncoded64, _scaleIndex) = scaleAndEncodeDouble(_lastValueInPrevBlock, 0);
- } else {
- _scaleIndex = Simple8bTypeUtil::kMemoryAsInteger;
- }
}
void BSONColumnBuilder::_incrementSimple8bCount() {
@@ -448,10 +450,6 @@ Simple8bWriteFn BSONColumnBuilder::_createBufferWriter() {
};
}
-bool BSONColumnBuilder::_usesDeltaOfDelta(BSONType type) {
- return type == bsonTimestamp;
-}
-
bool BSONColumnBuilder::_objectIdDeltaPossible(BSONElement elem, BSONElement prev) {
return !memcmp(prev.OID().getInstanceUnique().bytes,
elem.OID().getInstanceUnique().bytes,
diff --git a/src/mongo/bson/util/bsoncolumnbuilder.h b/src/mongo/bson/util/bsoncolumnbuilder.h
index d4d76c0e89a..70f26f43e37 100644
--- a/src/mongo/bson/util/bsoncolumnbuilder.h
+++ b/src/mongo/bson/util/bsoncolumnbuilder.h
@@ -80,7 +80,6 @@ private:
void _storePrevious(BSONElement elem);
void _writeLiteralFromPrevious();
void _incrementSimple8bCount();
- bool _usesDeltaOfDelta(BSONType type);
bool _objectIdDeltaPossible(BSONElement elem, BSONElement prev);
// Helper to append doubles to this Column builder. Returns true if append was successful and
diff --git a/src/mongo/bson/util/simple8b.cpp b/src/mongo/bson/util/simple8b.cpp
index 4b0de312a66..98fca714193 100644
--- a/src/mongo/bson/util/simple8b.cpp
+++ b/src/mongo/bson/util/simple8b.cpp
@@ -212,11 +212,12 @@ constexpr std::array<std::array<uint64_t, 16>, 4> kDecodeMask = {
0}};
// The number of meaningful bits for each selector. This does not include any trailing zero bits.
+// We use 64 bits for all invalid selectors, this is to make sure iteration does not get stuck.
constexpr std::array<std::array<uint8_t, 16>, 4> kBitsPerIntForSelector = {
- std::array<uint8_t, 16>{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 15, 20, 30, 60, 0},
- std::array<uint8_t, 16>{0, 2, 3, 4, 5, 7, 10, 14, 24, 52, 0, 0, 0, 0, 0, 0},
- std::array<uint8_t, 16>{0, 4, 5, 7, 10, 14, 24, 52, 0, 0, 0, 0, 0, 0, 0, 0},
- std::array<uint8_t, 16>{0, 0, 0, 0, 0, 0, 0, 0, 4, 6, 9, 13, 23, 51, 0, 0}};
+ std::array<uint8_t, 16>{64, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 15, 20, 30, 60, 64},
+ std::array<uint8_t, 16>{64, 2, 3, 4, 5, 7, 10, 14, 24, 52, 64, 64, 64, 64, 64, 64},
+ std::array<uint8_t, 16>{64, 4, 5, 7, 10, 14, 24, 52, 0, 0, 64, 64, 64, 64, 64, 64},
+ std::array<uint8_t, 16>{64, 0, 0, 0, 0, 0, 0, 0, 4, 6, 9, 13, 23, 51, 64, 64}};
// The number of integers coded for each selector.
constexpr std::array<std::array<uint8_t, 16>, 4> kIntsStoreForSelector = {
@@ -723,7 +724,7 @@ void Simple8bBuilder<T>::setWriteCallback(Simple8bWriteFn writer) {
}
template <typename T>
-Simple8b<T>::Iterator::Iterator(const uint64_t* pos, const uint64_t* end)
+Simple8b<T>::Iterator::Iterator(const char* pos, const char* end)
: _pos(pos), _end(end), _value(0), _rleRemaining(0), _shift(0) {
if (pos != end) {
_loadBlock();
@@ -732,7 +733,7 @@ Simple8b<T>::Iterator::Iterator(const uint64_t* pos, const uint64_t* end)
template <typename T>
void Simple8b<T>::Iterator::_loadBlock() {
- _current = LittleEndian<uint64_t>::load(*_pos);
+ _current = ConstDataView(_pos).read<LittleEndian<uint64_t>>();
_selector = _current & kBaseSelectorMask;
uint8_t selectorExtension = ((_current >> kSelectorBits) & kBaseSelectorMask);
@@ -823,7 +824,7 @@ typename Simple8b<T>::Iterator& Simple8b<T>::Iterator::operator++() {
template <typename T>
typename Simple8b<T>::Iterator& Simple8b<T>::Iterator::advanceBlock() {
- ++_pos;
+ _pos += sizeof(uint64_t);
if (_pos == _end) {
_rleRemaining = 0;
_shift = 0;
@@ -845,18 +846,18 @@ bool Simple8b<T>::Iterator::operator!=(const Simple8b::Iterator& rhs) const {
}
template <typename T>
-Simple8b<T>::Simple8b(const char* buffer, int size) : _buffer(buffer), _size(size) {}
+Simple8b<T>::Simple8b(const char* buffer, int size) : _buffer(buffer), _size(size) {
+ invariant(size % sizeof(uint64_t) == 0);
+}
template <typename T>
typename Simple8b<T>::Iterator Simple8b<T>::begin() const {
- return {reinterpret_cast<const uint64_t*>(_buffer),
- reinterpret_cast<const uint64_t*>(_buffer + _size)};
+ return {_buffer, _buffer + _size};
}
template <typename T>
typename Simple8b<T>::Iterator Simple8b<T>::end() const {
- return {reinterpret_cast<const uint64_t*>(_buffer + _size),
- reinterpret_cast<const uint64_t*>(_buffer + _size)};
+ return {_buffer + _size, _buffer + _size};
}
template class Simple8b<uint64_t>;
diff --git a/src/mongo/bson/util/simple8b.h b/src/mongo/bson/util/simple8b.h
index 46beaa26627..9fad90c6f28 100644
--- a/src/mongo/bson/util/simple8b.h
+++ b/src/mongo/bson/util/simple8b.h
@@ -322,7 +322,7 @@ public:
bool operator!=(const Iterator& rhs) const;
private:
- Iterator(const uint64_t* pos, const uint64_t* end);
+ Iterator(const char* pos, const char* end);
/**
* Loads the current Simple8b block into the iterator
@@ -335,8 +335,8 @@ public:
*/
uint16_t _rleCountInCurrent(uint8_t selectorExtension) const;
- const uint64_t* _pos;
- const uint64_t* _end;
+ const char* _pos;
+ const char* _end;
// Current Simple8b block in native endian
uint64_t _current;
diff --git a/src/mongo/bson/util/simple8b_type_util.cpp b/src/mongo/bson/util/simple8b_type_util.cpp
index 32b9fe4b558..fec46ad80be 100644
--- a/src/mongo/bson/util/simple8b_type_util.cpp
+++ b/src/mongo/bson/util/simple8b_type_util.cpp
@@ -82,26 +82,30 @@ int64_t Simple8bTypeUtil::encodeObjectId(const OID& oid) {
return LittleEndian<uint64_t>::load(encoded);
}
-OID Simple8bTypeUtil::decodeObjectId(int64_t val, OID::InstanceUnique processUnique) {
- unsigned char objId[OID::kOIDSize];
-
+void Simple8bTypeUtil::decodeObjectIdInto(char* buffer,
+ int64_t val,
+ OID::InstanceUnique processUnique) {
val = LittleEndian<uint64_t>::store(val);
uint8_t* encodedBytes = reinterpret_cast<uint8_t*>(&val);
// Set Timestamp and Counter variables together.
- objId[0] = encodedBytes[6]; // Timestamp index 0.
- objId[1] = encodedBytes[4]; // Timestamp index 1.
- objId[2] = encodedBytes[2]; // Timestamp index 2.
- objId[3] = encodedBytes[0]; // Timestamp index 3.
- objId[9] = encodedBytes[5]; // Counter index 0;
- objId[10] = encodedBytes[3]; // Counter index 1.
- objId[11] = encodedBytes[1]; // Counter index 2.
+ buffer[0] = encodedBytes[6]; // Timestamp index 0.
+ buffer[1] = encodedBytes[4]; // Timestamp index 1.
+ buffer[2] = encodedBytes[2]; // Timestamp index 2.
+ buffer[3] = encodedBytes[0]; // Timestamp index 3.
+ buffer[9] = encodedBytes[5]; // Counter index 0;
+ buffer[10] = encodedBytes[3]; // Counter index 1.
+ buffer[11] = encodedBytes[1]; // Counter index 2.
// Finally set Process Unique.
std::copy(processUnique.bytes,
processUnique.bytes + OID::kInstanceUniqueSize,
- objId + OID::kTimestampSize);
+ buffer + OID::kTimestampSize);
+}
+OID Simple8bTypeUtil::decodeObjectId(int64_t val, OID::InstanceUnique processUnique) {
+ unsigned char objId[OID::kOIDSize];
+ decodeObjectIdInto(reinterpret_cast<char*>(objId), val, processUnique);
return OID(objId);
}
@@ -186,8 +190,9 @@ Decimal128 Simple8bTypeUtil::decodeDecimal128(int128_t val) {
}
int128_t Simple8bTypeUtil::encodeBinary(const char* val, size_t size) {
- char arr[16] = {};
+ char arr[16];
memcpy(arr, val, size);
+ memset(arr + size, 0, sizeof(arr) - size);
uint64_t low = ConstDataView(arr).read<LittleEndian<uint64_t>>();
uint64_t high = ConstDataView(arr + 8).read<LittleEndian<uint64_t>>();
return absl::MakeInt128(high, low);
@@ -202,10 +207,6 @@ void Simple8bTypeUtil::decodeBinary(int128_t val, char* result, size_t size) {
} else {
memcpy(result, &low, size);
}
- if (size < 16) {
- // Set the position at end of binary to be always one.
- result[size] = 1;
- }
}
} // namespace mongo
diff --git a/src/mongo/bson/util/simple8b_type_util.h b/src/mongo/bson/util/simple8b_type_util.h
index 345e66ce81a..bf59d48b47a 100644
--- a/src/mongo/bson/util/simple8b_type_util.h
+++ b/src/mongo/bson/util/simple8b_type_util.h
@@ -61,7 +61,9 @@ public:
// then rearrange the bytes to:
// | Byte Usage | TS3 | C2 | TS2 | C1 | TS1 | C0 | TS0 |
// | Byte Index | 0 | 1 | 2 | 3 | 4 | 5 | 6 |
+ // The buffer passed to decodeObjectIdInto() must have at least OID::kOIDSize size.
static int64_t encodeObjectId(const OID& oid);
+ static void decodeObjectIdInto(char* buffer, int64_t val, OID::InstanceUnique processUnique);
static OID decodeObjectId(int64_t val, OID::InstanceUnique processUnique);
// These methods add floating point support for encoding and decoding with simple8b up to 8
diff --git a/src/mongo/bson/util/simple8b_type_util_test.cpp b/src/mongo/bson/util/simple8b_type_util_test.cpp
index 90ff2a4dd71..9b3ae3e19de 100644
--- a/src/mongo/bson/util/simple8b_type_util_test.cpp
+++ b/src/mongo/bson/util/simple8b_type_util_test.cpp
@@ -57,12 +57,14 @@ void assertDecimal128Equal(Decimal128 val) {
void assertBinaryEqual(char* val, size_t size, int128_t expected) {
int128_t encodeResult = Simple8bTypeUtil::encodeBinary(val, size);
ASSERT_EQUALS(encodeResult, expected);
- char charPtr[16] = {1};
+
+ // Initialize to something non zero so we can verify that we did not write out of bounds
+ char charPtr[17];
+ char unused = 'x';
+ memset(charPtr, unused, sizeof(charPtr));
Simple8bTypeUtil::decodeBinary(encodeResult, charPtr, size);
ASSERT_EQUALS(std::memcmp(charPtr, val, size), 0);
- if (size <= 16) {
- ASSERT_EQUALS(charPtr[size], 1);
- }
+ ASSERT_EQUALS(charPtr[size], unused);
}
TEST(Simple8bTypeUtil, EncodeAndDecodePositiveSignedInt) {