diff options
author | Henrik Edin <henrik.edin@mongodb.com> | 2021-12-09 18:37:46 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-12-09 19:02:11 +0000 |
commit | 6ea30e6fce2cea6a1080156cf9a5b11037f1fdb1 (patch) | |
tree | deac03f9f32f4eda64c900960ffde21d76f38003 /src/mongo/bson/util | |
parent | c56c062cd24b8af98ef8a094d2e6a60a89845cc9 (diff) | |
download | mongo-6ea30e6fce2cea6a1080156cf9a5b11037f1fdb1.tar.gz |
SERVER-61954 Improve decoding performance for BSONColumn
* Use pre-increment in the bucket unpacker, also cache the end iterator
* Improve inlining for bsoncolumn_util
* Cache some more variables so we need to re-calculate less when
incrementing the iterator
Diffstat (limited to 'src/mongo/bson/util')
-rw-r--r-- | src/mongo/bson/util/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/bson/util/bsoncolumn.cpp | 63 | ||||
-rw-r--r-- | src/mongo/bson/util/bsoncolumn.h | 22 | ||||
-rw-r--r-- | src/mongo/bson/util/bsoncolumn_util.cpp | 64 | ||||
-rw-r--r-- | src/mongo/bson/util/bsoncolumn_util.h | 32 |
5 files changed, 69 insertions, 113 deletions
diff --git a/src/mongo/bson/util/SConscript b/src/mongo/bson/util/SConscript index 9251a640497..e759ec9873f 100644 --- a/src/mongo/bson/util/SConscript +++ b/src/mongo/bson/util/SConscript @@ -19,7 +19,6 @@ env.Library( source=[ 'bsoncolumn.cpp', 'bsoncolumnbuilder.cpp', - 'bsoncolumn_util.cpp', 'simple8b.cpp', 'simple8b_type_util.cpp', ], diff --git a/src/mongo/bson/util/bsoncolumn.cpp b/src/mongo/bson/util/bsoncolumn.cpp index 7abb5b4e570..ce288893f2d 100644 --- a/src/mongo/bson/util/bsoncolumn.cpp +++ b/src/mongo/bson/util/bsoncolumn.cpp @@ -272,13 +272,11 @@ void BSONColumn::Iterator::_initialize(size_t index) { return; } - _states.resize(1); const BSONElement* current = nullptr; if (index < _column->_decompressed.size()) { current = &_column->_decompressed[index]; - _states.front()._lastValue = *current; + _state._lastValue = *current; } - // If we are at EOO then start at end. if (*_control == EOO) { _handleEOO(); @@ -286,7 +284,7 @@ void BSONColumn::Iterator::_initialize(size_t index) { } // previous doesn't matter when we load literals - auto result = _states.front()._loadControl(*_column, _control, _end, current); + auto result = _state._loadControl(*_column, _control, _end, current); if (!current) { _column->_decompressed.push_back(result.element); } @@ -294,8 +292,6 @@ void BSONColumn::Iterator::_initialize(size_t index) { } void BSONColumn::Iterator::_initializeInterleaving() { - _states.clear(); - _interleaved = true; _interleavedReferenceObj = BSONObj(_control + 1); BSONObjTraversal t([](StringData fieldName, const BSONObj& obj) { return true; }, @@ -305,6 +301,7 @@ void BSONColumn::Iterator::_initializeInterleaving() { return true; }); t.traverse(_interleavedReferenceObj); + uassert(6067610, "Invalid BSONColumn encoding", !_states.empty()); _control += _interleavedReferenceObj.objsize() + 1; _incrementInterleaved(); @@ -315,10 +312,10 @@ BSONColumn::Iterator& BSONColumn::Iterator::operator++() { // to decompress elements further along ++_index; - if (_interleaved) { - _incrementInterleaved(); - } else { + if (_states.empty()) { _incrementRegular(); + } else { + _incrementInterleaved(); } return *this; @@ -331,12 +328,11 @@ BSONColumn::Iterator BSONColumn::Iterator::operator++(int) { } void BSONColumn::Iterator::_incrementRegular() { - DecodingState& state = _states.front(); + DecodingState& state = _state; // Get pointer to current element if we are already decompressed const BSONElement* current = _index < _column->_decompressed.size() ? &_column->_decompressed[_index] : nullptr; - // Traverse current Simple8b block for 64bit values if it exists if (state._decoder64 && ++state._decoder64->pos != state._decoder64->end) { auto elem = state._loadDelta(*_column, *state._decoder64->pos, current); @@ -402,13 +398,14 @@ void BSONColumn::Iterator::_incrementInterleaved() { // This handles writing the BSONObj size and EOO bytes for subobjects. auto stateIt = _states.begin(); auto stateEnd = _states.end(); + int processed = 0; BSONObjTraversal t( [this](StringData fieldName, const BSONObj& obj) { // Called every time we recurse into a subobject. It makes sure we write the size and // EOO bytes. return SubObjectAllocator(_column->_elementStorage, fieldName, obj); }, - [this, &stateIt, &stateEnd](const BSONElement& referenceField) { + [this, &stateIt, &stateEnd, &processed](const BSONElement& referenceField) { // Called for every scalar field in the reference interleaved BSONObj. We have as many // decoding states as scalars. uassert(6067603, "Invalid BSON Column interleaved encoding", stateIt != stateEnd); @@ -463,6 +460,7 @@ void BSONColumn::Iterator::_incrementInterleaved() { state._lastValue = elem; } + ++processed; return true; }); @@ -471,13 +469,10 @@ void BSONColumn::Iterator::_incrementInterleaved() { if (!res) { // Exit interleaved mode and load as regular. Re-instantiate the state and set last known // value. - _interleaved = false; _states.clear(); - _states.resize(1); - uassert(6067604, - "Invalid BSON Column interleaved encoding", - _index > 0 && _index - 1 < _column->_decompressed.size()); - _states.front()._lastValue = _column->_decompressed[_index - 1]; + uassert(6067604, "Invalid BSON Column interleaved encoding", processed == 0); + _state = {}; + _state._lastValue = _column->_decompressed[_index - 1]; _incrementRegular(); return; @@ -525,8 +520,9 @@ BSONColumn::Iterator BSONColumn::Iterator::moveTo(BSONColumn& column) { } void BSONColumn::Iterator::DecodingState::_loadLiteral(const BSONElement& elem) { - auto type = elem.type(); - switch (type) { + _lastType = elem.type(); + _deltaOfDelta = usesDeltaOfDelta(_lastType); + switch (_lastType) { case String: case Code: _lastEncodedValue128 = @@ -562,7 +558,7 @@ void BSONColumn::Iterator::DecodingState::_loadLiteral(const BSONElement& elem) default: break; }; - if (usesDeltaOfDelta(type)) { + if (_deltaOfDelta) { _lastEncodedValueForDeltaOfDelta = _lastEncodedValue64; _lastEncodedValue64 = 0; } @@ -633,17 +629,14 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column, return BSONElement(); } - BSONType type = _lastValue.type(); - // If we have a zero delta no need to allocate a new Element, we can just use previous. - bool deltaOfDelta = usesDeltaOfDelta(type); - if (!deltaOfDelta && *delta == 0) { + if (!_deltaOfDelta && *delta == 0) { return _lastValue; } // Expand delta or delta-of-delta as last encoded. _lastEncodedValue64 = expandDelta(_lastEncodedValue64, Simple8bTypeUtil::decodeInt64(*delta)); - if (deltaOfDelta) { + if (_deltaOfDelta) { _lastEncodedValueForDeltaOfDelta = expandDelta(_lastEncodedValueForDeltaOfDelta, _lastEncodedValue64); } @@ -654,14 +647,13 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column, _lastValue = *current; return *current; } - // Allocate a new BSONElement that fits same value size as previous ElementStorage::Element elem = column._elementStorage.allocate( - type, _lastValue.fieldNameStringData(), _lastValue.valuesize()); + _lastType, _lastValue.fieldNameStringData(), _lastValue.valuesize()); // Write value depending on type - int64_t valueToWrite = deltaOfDelta ? _lastEncodedValueForDeltaOfDelta : _lastEncodedValue64; - switch (type) { + int64_t valueToWrite = _deltaOfDelta ? _lastEncodedValueForDeltaOfDelta : _lastEncodedValue64; + switch (_lastType) { case NumberDouble: DataView(elem.value()) .write<LittleEndian<double>>( @@ -701,8 +693,6 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column, return BSONElement(); } - BSONType type = _lastValue.type(); - // If we have a zero delta no need to allocate a new Element, we can just use previous. if (*delta == 0) { return _lastValue; @@ -721,7 +711,7 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column, // Write value depending on type auto elem = [&]() -> ElementStorage::Element { - switch (type) { + switch (_lastType) { case String: case Code: { Simple8bTypeUtil::SmallString ss = @@ -729,7 +719,7 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column, // Add 5 bytes to size, strings begin with a 4 byte count and ends with a null // terminator auto elem = column._elementStorage.allocate( - type, _lastValue.fieldNameStringData(), ss.size + 5); + _lastType, _lastValue.fieldNameStringData(), ss.size + 5); // Write count, size includes null terminator DataView(elem.value()).write<LittleEndian<int32_t>>(ss.size + 1); // Write string value @@ -740,7 +730,7 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column, } case BinData: { auto elem = column._elementStorage.allocate( - type, _lastValue.fieldNameStringData(), _lastValue.valuesize()); + _lastType, _lastValue.fieldNameStringData(), _lastValue.valuesize()); // The first 5 bytes in binData is a count and subType, copy them from previous memcpy(elem.value(), _lastValue.value(), 5); Simple8bTypeUtil::decodeBinary( @@ -749,7 +739,7 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column, } case NumberDecimal: { auto elem = column._elementStorage.allocate( - type, _lastValue.fieldNameStringData(), _lastValue.valuesize()); + _lastType, _lastValue.fieldNameStringData(), _lastValue.valuesize()); Decimal128 d128 = Simple8bTypeUtil::decodeDecimal128(_lastEncodedValue128); Decimal128::Value d128Val = d128.getValue(); DataView(elem.value()).write<LittleEndian<long long>>(d128Val.low64); @@ -787,7 +777,6 @@ BSONColumn::BSONColumn(BSONBinData bin, StringData name) { void BSONColumn::_init() { uassert(6067609, "Invalid BSON Column encoding", _size > 0); - _elementCount = ConstDataView(_binary).read<LittleEndian<uint32_t>>(); _maxDecodingStartPos._control = _binary; } diff --git a/src/mongo/bson/util/bsoncolumn.h b/src/mongo/bson/util/bsoncolumn.h index 09aee9505c8..f69b52c2f92 100644 --- a/src/mongo/bson/util/bsoncolumn.h +++ b/src/mongo/bson/util/bsoncolumn.h @@ -34,7 +34,6 @@ #include "mongo/bson/bsonobj.h" #include "mongo/bson/util/simple8b.h" -#include <boost/container/small_vector.hpp> #include <deque> #include <memory> #include <vector> @@ -194,30 +193,32 @@ public: boost::optional<Decoder<uint128_t>> _decoder128; // Last encoded values used to calculate delta and delta-of-delta + BSONType _lastType; + bool _deltaOfDelta; + BSONElement _lastValue; int64_t _lastEncodedValue64 = 0; int64_t _lastEncodedValueForDeltaOfDelta = 0; int128_t _lastEncodedValue128 = 0; - BSONElement _lastValue; - // Current scale index uint8_t _scaleIndex; }; - // Interleaved decoding states. When in regular mode we just have one. - boost::container::small_vector<DecodingState, 1> _states; + // Decoding states. Interleaved mode is active when '_states' is not empty. When in regular + // mode we use '_state'. + DecodingState _state; + std::vector<DecodingState> _states; + // Interleaving reference object read when encountered the interleaving start control byte. // We setup a decoding state for each scalar field in this object. The object hierarchy is // used to re-construct with full objects with the correct hierachy to the user. BSONObj _interleavedReferenceObj; - // Boolean to indicate if we are in interleaved mode or not. - bool _interleaved = false; }; /** * Forward iterator access. * - * Iterator value is EOO + * Iterator value is EOO when element is skipped. * * Iterators materialize compressed BSONElement as they iterate over the compressed binary. * It is NOT safe to do this from multiple threads concurrently. @@ -380,7 +381,8 @@ private: }; /** - * Initializes the BSONColumn. '_binary', '_size' and '_name' must be set before calling this. + * Validates the BSONColumn on init(). Should be the last call in the constructor when all + * members are initialized. */ void _init(); @@ -392,8 +394,6 @@ private: const char* _binary; int _size; - uint32_t _elementCount; - struct DecodingStartPosition { void setIfLarger(size_t index, const char* control); diff --git a/src/mongo/bson/util/bsoncolumn_util.cpp b/src/mongo/bson/util/bsoncolumn_util.cpp deleted file mode 100644 index 51c5a74feb1..00000000000 --- a/src/mongo/bson/util/bsoncolumn_util.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/** - * Copyright (C) 2021-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * <http://www.mongodb.com/licensing/server-side-public-license>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/bson/util/bsoncolumn_util.h" - -namespace mongo::bsoncolumn { -bool usesDeltaOfDelta(BSONType type) { - return type == jstOID || type == Date || type == bsonTimestamp; -} - -bool uses128bit(BSONType type) { - return type == NumberDecimal || type == BinData || type == String || type == Code; -} - -int64_t calcDelta(int64_t val, int64_t prev) { - // Do the subtraction as unsigned and cast back to signed to get overflow defined to wrapped - // around instead of undefined behavior. - return static_cast<int64_t>(static_cast<uint64_t>(val) - static_cast<uint64_t>(prev)); -} - -int128_t calcDelta(int128_t val, int128_t prev) { - // Do the subtraction as unsigned and cast back to signed to get overflow defined to wrapped - // around instead of undefined behavior. - return static_cast<int128_t>(static_cast<uint128_t>(val) - static_cast<uint128_t>(prev)); -} - -int64_t expandDelta(int64_t prev, int64_t delta) { - // Do the addition as unsigned and cast back to signed to get overflow defined to wrapped around - // instead of undefined behavior. - return static_cast<int64_t>(static_cast<uint64_t>(prev) + static_cast<uint64_t>(delta)); -} - -int128_t expandDelta(int128_t prev, int128_t delta) { - // Do the addition as unsigned and cast back to signed to get overflow defined to wrapped around - // instead of undefined behavior. - return static_cast<int128_t>(static_cast<uint128_t>(prev) + static_cast<uint128_t>(delta)); -} -} // namespace mongo::bsoncolumn diff --git a/src/mongo/bson/util/bsoncolumn_util.h b/src/mongo/bson/util/bsoncolumn_util.h index 3233abbb1d4..e7fe0d6aae4 100644 --- a/src/mongo/bson/util/bsoncolumn_util.h +++ b/src/mongo/bson/util/bsoncolumn_util.h @@ -52,4 +52,36 @@ int128_t calcDelta(int128_t val, int128_t prev); int64_t expandDelta(int64_t prev, int64_t delta); int128_t expandDelta(int128_t prev, int128_t delta); +inline bool usesDeltaOfDelta(BSONType type) { + return type == jstOID || type == Date || type == bsonTimestamp; +} + +inline bool uses128bit(BSONType type) { + return type == NumberDecimal || type == BinData || type == String || type == Code; +} + +inline int64_t calcDelta(int64_t val, int64_t prev) { + // Do the subtraction as unsigned and cast back to signed to get overflow defined to wrapped + // around instead of undefined behavior. + return static_cast<int64_t>(static_cast<uint64_t>(val) - static_cast<uint64_t>(prev)); +} + +inline int128_t calcDelta(int128_t val, int128_t prev) { + // Do the subtraction as unsigned and cast back to signed to get overflow defined to wrapped + // around instead of undefined behavior. + return static_cast<int128_t>(static_cast<uint128_t>(val) - static_cast<uint128_t>(prev)); +} + +inline int64_t expandDelta(int64_t prev, int64_t delta) { + // Do the addition as unsigned and cast back to signed to get overflow defined to wrapped around + // instead of undefined behavior. + return static_cast<int64_t>(static_cast<uint64_t>(prev) + static_cast<uint64_t>(delta)); +} + +inline int128_t expandDelta(int128_t prev, int128_t delta) { + // Do the addition as unsigned and cast back to signed to get overflow defined to wrapped around + // instead of undefined behavior. + return static_cast<int128_t>(static_cast<uint128_t>(prev) + static_cast<uint128_t>(delta)); +} + } // namespace mongo::bsoncolumn |