summaryrefslogtreecommitdiff
path: root/src/mongo/bson/util
diff options
context:
space:
mode:
authorHenrik Edin <henrik.edin@mongodb.com>2021-12-09 18:37:46 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-12-09 19:02:11 +0000
commit6ea30e6fce2cea6a1080156cf9a5b11037f1fdb1 (patch)
treedeac03f9f32f4eda64c900960ffde21d76f38003 /src/mongo/bson/util
parentc56c062cd24b8af98ef8a094d2e6a60a89845cc9 (diff)
downloadmongo-6ea30e6fce2cea6a1080156cf9a5b11037f1fdb1.tar.gz
SERVER-61954 Improve decoding performance for BSONColumn
* Use pre-increment in the bucket unpacker, also cache the end iterator * Improve inlining for bsoncolumn_util * Cache some more variables so we need to re-calculate less when incrementing the iterator
Diffstat (limited to 'src/mongo/bson/util')
-rw-r--r--src/mongo/bson/util/SConscript1
-rw-r--r--src/mongo/bson/util/bsoncolumn.cpp63
-rw-r--r--src/mongo/bson/util/bsoncolumn.h22
-rw-r--r--src/mongo/bson/util/bsoncolumn_util.cpp64
-rw-r--r--src/mongo/bson/util/bsoncolumn_util.h32
5 files changed, 69 insertions, 113 deletions
diff --git a/src/mongo/bson/util/SConscript b/src/mongo/bson/util/SConscript
index 9251a640497..e759ec9873f 100644
--- a/src/mongo/bson/util/SConscript
+++ b/src/mongo/bson/util/SConscript
@@ -19,7 +19,6 @@ env.Library(
source=[
'bsoncolumn.cpp',
'bsoncolumnbuilder.cpp',
- 'bsoncolumn_util.cpp',
'simple8b.cpp',
'simple8b_type_util.cpp',
],
diff --git a/src/mongo/bson/util/bsoncolumn.cpp b/src/mongo/bson/util/bsoncolumn.cpp
index 7abb5b4e570..ce288893f2d 100644
--- a/src/mongo/bson/util/bsoncolumn.cpp
+++ b/src/mongo/bson/util/bsoncolumn.cpp
@@ -272,13 +272,11 @@ void BSONColumn::Iterator::_initialize(size_t index) {
return;
}
- _states.resize(1);
const BSONElement* current = nullptr;
if (index < _column->_decompressed.size()) {
current = &_column->_decompressed[index];
- _states.front()._lastValue = *current;
+ _state._lastValue = *current;
}
-
// If we are at EOO then start at end.
if (*_control == EOO) {
_handleEOO();
@@ -286,7 +284,7 @@ void BSONColumn::Iterator::_initialize(size_t index) {
}
// previous doesn't matter when we load literals
- auto result = _states.front()._loadControl(*_column, _control, _end, current);
+ auto result = _state._loadControl(*_column, _control, _end, current);
if (!current) {
_column->_decompressed.push_back(result.element);
}
@@ -294,8 +292,6 @@ void BSONColumn::Iterator::_initialize(size_t index) {
}
void BSONColumn::Iterator::_initializeInterleaving() {
- _states.clear();
- _interleaved = true;
_interleavedReferenceObj = BSONObj(_control + 1);
BSONObjTraversal t([](StringData fieldName, const BSONObj& obj) { return true; },
@@ -305,6 +301,7 @@ void BSONColumn::Iterator::_initializeInterleaving() {
return true;
});
t.traverse(_interleavedReferenceObj);
+ uassert(6067610, "Invalid BSONColumn encoding", !_states.empty());
_control += _interleavedReferenceObj.objsize() + 1;
_incrementInterleaved();
@@ -315,10 +312,10 @@ BSONColumn::Iterator& BSONColumn::Iterator::operator++() {
// to decompress elements further along
++_index;
- if (_interleaved) {
- _incrementInterleaved();
- } else {
+ if (_states.empty()) {
_incrementRegular();
+ } else {
+ _incrementInterleaved();
}
return *this;
@@ -331,12 +328,11 @@ BSONColumn::Iterator BSONColumn::Iterator::operator++(int) {
}
void BSONColumn::Iterator::_incrementRegular() {
- DecodingState& state = _states.front();
+ DecodingState& state = _state;
// Get pointer to current element if we are already decompressed
const BSONElement* current =
_index < _column->_decompressed.size() ? &_column->_decompressed[_index] : nullptr;
-
// Traverse current Simple8b block for 64bit values if it exists
if (state._decoder64 && ++state._decoder64->pos != state._decoder64->end) {
auto elem = state._loadDelta(*_column, *state._decoder64->pos, current);
@@ -402,13 +398,14 @@ void BSONColumn::Iterator::_incrementInterleaved() {
// This handles writing the BSONObj size and EOO bytes for subobjects.
auto stateIt = _states.begin();
auto stateEnd = _states.end();
+ int processed = 0;
BSONObjTraversal t(
[this](StringData fieldName, const BSONObj& obj) {
// Called every time we recurse into a subobject. It makes sure we write the size and
// EOO bytes.
return SubObjectAllocator(_column->_elementStorage, fieldName, obj);
},
- [this, &stateIt, &stateEnd](const BSONElement& referenceField) {
+ [this, &stateIt, &stateEnd, &processed](const BSONElement& referenceField) {
// Called for every scalar field in the reference interleaved BSONObj. We have as many
// decoding states as scalars.
uassert(6067603, "Invalid BSON Column interleaved encoding", stateIt != stateEnd);
@@ -463,6 +460,7 @@ void BSONColumn::Iterator::_incrementInterleaved() {
state._lastValue = elem;
}
+ ++processed;
return true;
});
@@ -471,13 +469,10 @@ void BSONColumn::Iterator::_incrementInterleaved() {
if (!res) {
// Exit interleaved mode and load as regular. Re-instantiate the state and set last known
// value.
- _interleaved = false;
_states.clear();
- _states.resize(1);
- uassert(6067604,
- "Invalid BSON Column interleaved encoding",
- _index > 0 && _index - 1 < _column->_decompressed.size());
- _states.front()._lastValue = _column->_decompressed[_index - 1];
+ uassert(6067604, "Invalid BSON Column interleaved encoding", processed == 0);
+ _state = {};
+ _state._lastValue = _column->_decompressed[_index - 1];
_incrementRegular();
return;
@@ -525,8 +520,9 @@ BSONColumn::Iterator BSONColumn::Iterator::moveTo(BSONColumn& column) {
}
void BSONColumn::Iterator::DecodingState::_loadLiteral(const BSONElement& elem) {
- auto type = elem.type();
- switch (type) {
+ _lastType = elem.type();
+ _deltaOfDelta = usesDeltaOfDelta(_lastType);
+ switch (_lastType) {
case String:
case Code:
_lastEncodedValue128 =
@@ -562,7 +558,7 @@ void BSONColumn::Iterator::DecodingState::_loadLiteral(const BSONElement& elem)
default:
break;
};
- if (usesDeltaOfDelta(type)) {
+ if (_deltaOfDelta) {
_lastEncodedValueForDeltaOfDelta = _lastEncodedValue64;
_lastEncodedValue64 = 0;
}
@@ -633,17 +629,14 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column,
return BSONElement();
}
- BSONType type = _lastValue.type();
-
// If we have a zero delta no need to allocate a new Element, we can just use previous.
- bool deltaOfDelta = usesDeltaOfDelta(type);
- if (!deltaOfDelta && *delta == 0) {
+ if (!_deltaOfDelta && *delta == 0) {
return _lastValue;
}
// Expand delta or delta-of-delta as last encoded.
_lastEncodedValue64 = expandDelta(_lastEncodedValue64, Simple8bTypeUtil::decodeInt64(*delta));
- if (deltaOfDelta) {
+ if (_deltaOfDelta) {
_lastEncodedValueForDeltaOfDelta =
expandDelta(_lastEncodedValueForDeltaOfDelta, _lastEncodedValue64);
}
@@ -654,14 +647,13 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column,
_lastValue = *current;
return *current;
}
-
// Allocate a new BSONElement that fits same value size as previous
ElementStorage::Element elem = column._elementStorage.allocate(
- type, _lastValue.fieldNameStringData(), _lastValue.valuesize());
+ _lastType, _lastValue.fieldNameStringData(), _lastValue.valuesize());
// Write value depending on type
- int64_t valueToWrite = deltaOfDelta ? _lastEncodedValueForDeltaOfDelta : _lastEncodedValue64;
- switch (type) {
+ int64_t valueToWrite = _deltaOfDelta ? _lastEncodedValueForDeltaOfDelta : _lastEncodedValue64;
+ switch (_lastType) {
case NumberDouble:
DataView(elem.value())
.write<LittleEndian<double>>(
@@ -701,8 +693,6 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column,
return BSONElement();
}
- BSONType type = _lastValue.type();
-
// If we have a zero delta no need to allocate a new Element, we can just use previous.
if (*delta == 0) {
return _lastValue;
@@ -721,7 +711,7 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column,
// Write value depending on type
auto elem = [&]() -> ElementStorage::Element {
- switch (type) {
+ switch (_lastType) {
case String:
case Code: {
Simple8bTypeUtil::SmallString ss =
@@ -729,7 +719,7 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column,
// Add 5 bytes to size, strings begin with a 4 byte count and ends with a null
// terminator
auto elem = column._elementStorage.allocate(
- type, _lastValue.fieldNameStringData(), ss.size + 5);
+ _lastType, _lastValue.fieldNameStringData(), ss.size + 5);
// Write count, size includes null terminator
DataView(elem.value()).write<LittleEndian<int32_t>>(ss.size + 1);
// Write string value
@@ -740,7 +730,7 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column,
}
case BinData: {
auto elem = column._elementStorage.allocate(
- type, _lastValue.fieldNameStringData(), _lastValue.valuesize());
+ _lastType, _lastValue.fieldNameStringData(), _lastValue.valuesize());
// The first 5 bytes in binData is a count and subType, copy them from previous
memcpy(elem.value(), _lastValue.value(), 5);
Simple8bTypeUtil::decodeBinary(
@@ -749,7 +739,7 @@ BSONElement BSONColumn::Iterator::DecodingState::_loadDelta(BSONColumn& column,
}
case NumberDecimal: {
auto elem = column._elementStorage.allocate(
- type, _lastValue.fieldNameStringData(), _lastValue.valuesize());
+ _lastType, _lastValue.fieldNameStringData(), _lastValue.valuesize());
Decimal128 d128 = Simple8bTypeUtil::decodeDecimal128(_lastEncodedValue128);
Decimal128::Value d128Val = d128.getValue();
DataView(elem.value()).write<LittleEndian<long long>>(d128Val.low64);
@@ -787,7 +777,6 @@ BSONColumn::BSONColumn(BSONBinData bin, StringData name) {
void BSONColumn::_init() {
uassert(6067609, "Invalid BSON Column encoding", _size > 0);
- _elementCount = ConstDataView(_binary).read<LittleEndian<uint32_t>>();
_maxDecodingStartPos._control = _binary;
}
diff --git a/src/mongo/bson/util/bsoncolumn.h b/src/mongo/bson/util/bsoncolumn.h
index 09aee9505c8..f69b52c2f92 100644
--- a/src/mongo/bson/util/bsoncolumn.h
+++ b/src/mongo/bson/util/bsoncolumn.h
@@ -34,7 +34,6 @@
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/util/simple8b.h"
-#include <boost/container/small_vector.hpp>
#include <deque>
#include <memory>
#include <vector>
@@ -194,30 +193,32 @@ public:
boost::optional<Decoder<uint128_t>> _decoder128;
// Last encoded values used to calculate delta and delta-of-delta
+ BSONType _lastType;
+ bool _deltaOfDelta;
+ BSONElement _lastValue;
int64_t _lastEncodedValue64 = 0;
int64_t _lastEncodedValueForDeltaOfDelta = 0;
int128_t _lastEncodedValue128 = 0;
- BSONElement _lastValue;
-
// Current scale index
uint8_t _scaleIndex;
};
- // Interleaved decoding states. When in regular mode we just have one.
- boost::container::small_vector<DecodingState, 1> _states;
+ // Decoding states. Interleaved mode is active when '_states' is not empty. When in regular
+ // mode we use '_state'.
+ DecodingState _state;
+ std::vector<DecodingState> _states;
+
// Interleaving reference object read when encountered the interleaving start control byte.
// We setup a decoding state for each scalar field in this object. The object hierarchy is
// used to re-construct with full objects with the correct hierachy to the user.
BSONObj _interleavedReferenceObj;
- // Boolean to indicate if we are in interleaved mode or not.
- bool _interleaved = false;
};
/**
* Forward iterator access.
*
- * Iterator value is EOO
+ * Iterator value is EOO when element is skipped.
*
* Iterators materialize compressed BSONElement as they iterate over the compressed binary.
* It is NOT safe to do this from multiple threads concurrently.
@@ -380,7 +381,8 @@ private:
};
/**
- * Initializes the BSONColumn. '_binary', '_size' and '_name' must be set before calling this.
+ * Validates the BSONColumn on init(). Should be the last call in the constructor when all
+ * members are initialized.
*/
void _init();
@@ -392,8 +394,6 @@ private:
const char* _binary;
int _size;
- uint32_t _elementCount;
-
struct DecodingStartPosition {
void setIfLarger(size_t index, const char* control);
diff --git a/src/mongo/bson/util/bsoncolumn_util.cpp b/src/mongo/bson/util/bsoncolumn_util.cpp
deleted file mode 100644
index 51c5a74feb1..00000000000
--- a/src/mongo/bson/util/bsoncolumn_util.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Copyright (C) 2021-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/bson/util/bsoncolumn_util.h"
-
-namespace mongo::bsoncolumn {
-bool usesDeltaOfDelta(BSONType type) {
- return type == jstOID || type == Date || type == bsonTimestamp;
-}
-
-bool uses128bit(BSONType type) {
- return type == NumberDecimal || type == BinData || type == String || type == Code;
-}
-
-int64_t calcDelta(int64_t val, int64_t prev) {
- // Do the subtraction as unsigned and cast back to signed to get overflow defined to wrapped
- // around instead of undefined behavior.
- return static_cast<int64_t>(static_cast<uint64_t>(val) - static_cast<uint64_t>(prev));
-}
-
-int128_t calcDelta(int128_t val, int128_t prev) {
- // Do the subtraction as unsigned and cast back to signed to get overflow defined to wrapped
- // around instead of undefined behavior.
- return static_cast<int128_t>(static_cast<uint128_t>(val) - static_cast<uint128_t>(prev));
-}
-
-int64_t expandDelta(int64_t prev, int64_t delta) {
- // Do the addition as unsigned and cast back to signed to get overflow defined to wrapped around
- // instead of undefined behavior.
- return static_cast<int64_t>(static_cast<uint64_t>(prev) + static_cast<uint64_t>(delta));
-}
-
-int128_t expandDelta(int128_t prev, int128_t delta) {
- // Do the addition as unsigned and cast back to signed to get overflow defined to wrapped around
- // instead of undefined behavior.
- return static_cast<int128_t>(static_cast<uint128_t>(prev) + static_cast<uint128_t>(delta));
-}
-} // namespace mongo::bsoncolumn
diff --git a/src/mongo/bson/util/bsoncolumn_util.h b/src/mongo/bson/util/bsoncolumn_util.h
index 3233abbb1d4..e7fe0d6aae4 100644
--- a/src/mongo/bson/util/bsoncolumn_util.h
+++ b/src/mongo/bson/util/bsoncolumn_util.h
@@ -52,4 +52,36 @@ int128_t calcDelta(int128_t val, int128_t prev);
int64_t expandDelta(int64_t prev, int64_t delta);
int128_t expandDelta(int128_t prev, int128_t delta);
+inline bool usesDeltaOfDelta(BSONType type) {
+ return type == jstOID || type == Date || type == bsonTimestamp;
+}
+
+inline bool uses128bit(BSONType type) {
+ return type == NumberDecimal || type == BinData || type == String || type == Code;
+}
+
+inline int64_t calcDelta(int64_t val, int64_t prev) {
+ // Do the subtraction as unsigned and cast back to signed to get overflow defined to wrapped
+ // around instead of undefined behavior.
+ return static_cast<int64_t>(static_cast<uint64_t>(val) - static_cast<uint64_t>(prev));
+}
+
+inline int128_t calcDelta(int128_t val, int128_t prev) {
+ // Do the subtraction as unsigned and cast back to signed to get overflow defined to wrapped
+ // around instead of undefined behavior.
+ return static_cast<int128_t>(static_cast<uint128_t>(val) - static_cast<uint128_t>(prev));
+}
+
+inline int64_t expandDelta(int64_t prev, int64_t delta) {
+ // Do the addition as unsigned and cast back to signed to get overflow defined to wrapped around
+ // instead of undefined behavior.
+ return static_cast<int64_t>(static_cast<uint64_t>(prev) + static_cast<uint64_t>(delta));
+}
+
+inline int128_t expandDelta(int128_t prev, int128_t delta) {
+ // Do the addition as unsigned and cast back to signed to get overflow defined to wrapped around
+ // instead of undefined behavior.
+ return static_cast<int128_t>(static_cast<uint128_t>(prev) + static_cast<uint128_t>(delta));
+}
+
} // namespace mongo::bsoncolumn