diff options
author | Henrik Edin <henrik.edin@mongodb.com> | 2021-10-28 13:02:44 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-10-28 13:29:33 +0000 |
commit | 6e98a462d6e3f2222bc53bc948b60721e7879999 (patch) | |
tree | 503f8fa634ca07e7cde8dfb7b4a7914f94f17dd9 /src/mongo/bson | |
parent | a8408f0d115740981f9ed9ec10ca20aa58776902 (diff) | |
download | mongo-6e98a462d6e3f2222bc53bc948b60721e7879999.tar.gz |
SERVER-60676 Improve robustness when parsing invalid BSONColumn binary or compressed v2 buckets
* Changed BSONColumn::operator[] interface so we can differentiate skipped element from out of bounds
* Replaced BadValue error codes in BSONColumn for unique assertion ids
* Throw exception when interleaved mode exits unexpectedly when it shouldn't
* Test and handle buckets where the compressed columns contain different amount of fields
Diffstat (limited to 'src/mongo/bson')
-rw-r--r-- | src/mongo/bson/util/bsoncolumn.cpp | 27 | ||||
-rw-r--r-- | src/mongo/bson/util/bsoncolumn.h | 12 | ||||
-rw-r--r-- | src/mongo/bson/util/bsoncolumn_test.cpp | 33 |
3 files changed, 52 insertions, 20 deletions
diff --git a/src/mongo/bson/util/bsoncolumn.cpp b/src/mongo/bson/util/bsoncolumn.cpp index 481ad6885bb..042e34c9184 100644 --- a/src/mongo/bson/util/bsoncolumn.cpp +++ b/src/mongo/bson/util/bsoncolumn.cpp @@ -352,7 +352,7 @@ void BSONColumn::Iterator::_incrementRegular() { // We don't have any more delta values in current block so we need to load next control byte. // Validate that we are not reading out of bounds - uassert(ErrorCodes::BadValue, "Invalid BSON Column encoding", _control < _end); + uassert(6067602, "Invalid BSON Column encoding", _control < _end); // Decoders are exhausted, load next control byte. If we are at EOO then decoding is done. if (*_control == EOO) { @@ -408,9 +408,7 @@ void BSONColumn::Iterator::_incrementInterleaved() { [this, &stateIt, &stateEnd](const BSONElement& referenceField) { // Called for every scalar field in the reference interleaved BSONObj. We have as many // decoding states as scalars. - uassert(ErrorCodes::BadValue, - "Invalid BSON Column interleaved encoding", - stateIt != stateEnd); + uassert(6067603, "Invalid BSON Column interleaved encoding", stateIt != stateEnd); auto& state = *(stateIt++); // Remember the iterator position before writing anything. This is to detect that @@ -473,6 +471,9 @@ void BSONColumn::Iterator::_incrementInterleaved() { _interleaved = false; _states.clear(); _states.resize(1); + uassert(6067604, + "Invalid BSON Column interleaved encoding", + _index > 0 && _index - 1 < _column->_decompressed.size()); _states.front()._lastValue = _column->_decompressed[_index - 1]; _incrementRegular(); @@ -480,7 +481,7 @@ void BSONColumn::Iterator::_incrementInterleaved() { } // There should have been as many interleaved states as scalar fields. - uassert(ErrorCodes::BadValue, "Invalid BSON Column interleaved encoding", stateIt == stateEnd); + uassert(6067605, "Invalid BSON Column interleaved encoding", stateIt == stateEnd); // If this element has been decompressed in a previous iteration we don't need to store it in // our decompressed list. @@ -576,22 +577,20 @@ BSONColumn::Iterator::DecodingState::_loadControl(BSONColumn& column, // Simple-8b delta block, load its scale factor and validate for sanity _scaleIndex = kControlToScaleIndex[(control & 0xF0) >> 4]; - uassert(ErrorCodes::BadValue, - "Invalid control byte in BSON Column", - _scaleIndex != kInvalidScaleIndex); + uassert(6067606, "Invalid control byte in BSON Column", _scaleIndex != kInvalidScaleIndex); // If Double, scale last value according to this scale factor auto type = _lastValue.type(); if (type == NumberDouble) { auto encoded = Simple8bTypeUtil::encodeDouble(_lastValue._numberDouble(), _scaleIndex); - uassert(ErrorCodes::BadValue, "Invalid double encoding in BSON Column", encoded); + uassert(6067607, "Invalid double encoding in BSON Column", encoded); _lastEncodedValue64 = *encoded; } // Setup decoder for this range of Simple-8b blocks uint8_t blocks = _numSimple8bBlocks(control); int size = sizeof(uint64_t) * blocks; - uassert(ErrorCodes::BadValue, "Invalid BSON Column encoding", buffer + size + 1 < end); + uassert(6067608, "Invalid BSON Column encoding", buffer + size + 1 < end); // Instantiate decoder and load first value, every Simple-8b block should have at least one // value @@ -757,7 +756,7 @@ BSONColumn::BSONColumn(BSONElement bin) { "Invalid BSON type for column", bin.type() == BSONType::BinData && bin.binDataType() == BinDataType::Column); _binary = bin.binData(_size); - uassert(ErrorCodes::BadValue, "Invalid BSON Column encoding", _size > 0); + uassert(6067609, "Invalid BSON Column encoding", _size > 0); _elementCount = ConstDataView(_binary).read<LittleEndian<uint32_t>>(); _maxDecodingStartPos._control = _binary; _name = bin.fieldNameStringData().toString(); @@ -775,7 +774,7 @@ BSONColumn::Iterator BSONColumn::end() { return it; } -BSONElement BSONColumn::operator[](size_t index) { +boost::optional<const BSONElement&> BSONColumn::operator[](size_t index) { // If index is already decompressed, we can just return the element if (index < _decompressed.size()) { return _decompressed[index]; @@ -783,7 +782,7 @@ BSONElement BSONColumn::operator[](size_t index) { // No more elements to be found if we are fully decompressed, return EOO if (_fullyDecompressed) - return BSONElement(); + return boost::none; // We can begin iterating from last known literal Iterator it{*this, _maxDecodingStartPos._control, _binary + _size}; @@ -796,7 +795,7 @@ BSONElement BSONColumn::operator[](size_t index) { // Return EOO if not found if (it == e) - return BSONElement(); + return boost::none; return *it; } diff --git a/src/mongo/bson/util/bsoncolumn.h b/src/mongo/bson/util/bsoncolumn.h index 2c1f99a1055..716a8b20f26 100644 --- a/src/mongo/bson/util/bsoncolumn.h +++ b/src/mongo/bson/util/bsoncolumn.h @@ -217,7 +217,7 @@ public: * Iterators materialize compressed BSONElement as they iterate over the compressed binary. * It is NOT safe to do this from multiple threads concurrently. * - * Throws BadValue if invalid encoding is encountered. + * Throws if invalid encoding is encountered. */ Iterator begin(); Iterator end(); @@ -225,22 +225,26 @@ public: /** * Element lookup by index * - * Returns EOO if index represent skipped element or is out of bounds. + * Returns EOO if index represent skipped element. + * Returns boost::none if index is out of bounds. + * * O(1) time complexity if element has been previously accessed * O(N) time complexity otherwise * * Materializes compressed BSONElement as needed. It is NOT safe to do this from multiple * threads concurrently. * - * Throws BadValue if invalid encoding is encountered. + * Throws if invalid encoding is encountered. */ - BSONElement operator[](size_t index); + boost::optional<const BSONElement&> operator[](size_t index); /** * Number of elements stored in this BSONColumn * * O(1) time complexity if BSONColumn is fully decompressed (iteration reached end). * O(N) time complexity otherwise, will fully decompress BSONColumn. + * + * * Throws if invalid encoding is encountered. */ size_t size(); diff --git a/src/mongo/bson/util/bsoncolumn_test.cpp b/src/mongo/bson/util/bsoncolumn_test.cpp index 69178921b9e..3fee7dff6cf 100644 --- a/src/mongo/bson/util/bsoncolumn_test.cpp +++ b/src/mongo/bson/util/bsoncolumn_test.cpp @@ -403,7 +403,7 @@ public: BSONColumn col(columnElement); for (size_t i = 0; i < expected.size(); ++i) { - ASSERT(expected[i].binaryEqualValues(col[i])); + ASSERT(expected[i].binaryEqualValues(*col[i])); } } @@ -412,7 +412,7 @@ public: BSONColumn col(columnElement); for (int i = (int)expected.size() - 1; i >= 0; --i) { - ASSERT(expected[i].binaryEqualValues(col[i])); + ASSERT(expected[i].binaryEqualValues(*col[i])); } } @@ -3210,6 +3210,35 @@ TEST_F(BSONColumnTest, NoLiteralStart) { } } +TEST_F(BSONColumnTest, InvalidInterleavedCount) { + // This test sets up an interleaved reference object with two fields but only provides one + // interleaved substream. + BufBuilder expected; + appendInterleavedStart(expected, BSON("a" << 1 << "b" << 1)); + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlocks64(expected, {kDeltaForBinaryEqualValues}, 1); + appendEOO(expected); + appendEOO(expected); + + BSONColumn col(createBSONColumn(expected.buf(), expected.len())); + ASSERT_THROWS(std::distance(col.begin(), col.end()), DBException); +} + +TEST_F(BSONColumnTest, InvalidInterleavedWhenAlreadyInterleaved) { + // This tests that we handle the interleaved start byte when already in interleaved mode. + + BufBuilder expected; + appendInterleavedStart(expected, BSON("a" << 1 << "b" << 1)); + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlocks64(expected, {kDeltaForBinaryEqualValues}, 1); + appendInterleavedStart(expected, BSON("a" << 1 << "b" << 1)); + appendEOO(expected); + appendEOO(expected); + + BSONColumn col(createBSONColumn(expected.buf(), expected.len())); + ASSERT_THROWS(std::distance(col.begin(), col.end()), DBException); +} + TEST_F(BSONColumnTest, AppendMinKey) { BSONColumnBuilder cb("test"); ASSERT_THROWS_CODE(cb.append(createElementMinKey()), DBException, ErrorCodes::InvalidBSONType); |