diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/bson/util/bsoncolumn.cpp | 27 | ||||
-rw-r--r-- | src/mongo/bson/util/bsoncolumn.h | 12 | ||||
-rw-r--r-- | src/mongo/bson/util/bsoncolumn_test.cpp | 33 | ||||
-rw-r--r-- | src/mongo/db/exec/bucket_unpacker.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/exec/bucket_unpacker_test.cpp | 106 |
5 files changed, 168 insertions, 24 deletions
diff --git a/src/mongo/bson/util/bsoncolumn.cpp b/src/mongo/bson/util/bsoncolumn.cpp index 481ad6885bb..042e34c9184 100644 --- a/src/mongo/bson/util/bsoncolumn.cpp +++ b/src/mongo/bson/util/bsoncolumn.cpp @@ -352,7 +352,7 @@ void BSONColumn::Iterator::_incrementRegular() { // We don't have any more delta values in current block so we need to load next control byte. // Validate that we are not reading out of bounds - uassert(ErrorCodes::BadValue, "Invalid BSON Column encoding", _control < _end); + uassert(6067602, "Invalid BSON Column encoding", _control < _end); // Decoders are exhausted, load next control byte. If we are at EOO then decoding is done. if (*_control == EOO) { @@ -408,9 +408,7 @@ void BSONColumn::Iterator::_incrementInterleaved() { [this, &stateIt, &stateEnd](const BSONElement& referenceField) { // Called for every scalar field in the reference interleaved BSONObj. We have as many // decoding states as scalars. - uassert(ErrorCodes::BadValue, - "Invalid BSON Column interleaved encoding", - stateIt != stateEnd); + uassert(6067603, "Invalid BSON Column interleaved encoding", stateIt != stateEnd); auto& state = *(stateIt++); // Remember the iterator position before writing anything. This is to detect that @@ -473,6 +471,9 @@ void BSONColumn::Iterator::_incrementInterleaved() { _interleaved = false; _states.clear(); _states.resize(1); + uassert(6067604, + "Invalid BSON Column interleaved encoding", + _index > 0 && _index - 1 < _column->_decompressed.size()); _states.front()._lastValue = _column->_decompressed[_index - 1]; _incrementRegular(); @@ -480,7 +481,7 @@ void BSONColumn::Iterator::_incrementInterleaved() { } // There should have been as many interleaved states as scalar fields. - uassert(ErrorCodes::BadValue, "Invalid BSON Column interleaved encoding", stateIt == stateEnd); + uassert(6067605, "Invalid BSON Column interleaved encoding", stateIt == stateEnd); // If this element has been decompressed in a previous iteration we don't need to store it in // our decompressed list. @@ -576,22 +577,20 @@ BSONColumn::Iterator::DecodingState::_loadControl(BSONColumn& column, // Simple-8b delta block, load its scale factor and validate for sanity _scaleIndex = kControlToScaleIndex[(control & 0xF0) >> 4]; - uassert(ErrorCodes::BadValue, - "Invalid control byte in BSON Column", - _scaleIndex != kInvalidScaleIndex); + uassert(6067606, "Invalid control byte in BSON Column", _scaleIndex != kInvalidScaleIndex); // If Double, scale last value according to this scale factor auto type = _lastValue.type(); if (type == NumberDouble) { auto encoded = Simple8bTypeUtil::encodeDouble(_lastValue._numberDouble(), _scaleIndex); - uassert(ErrorCodes::BadValue, "Invalid double encoding in BSON Column", encoded); + uassert(6067607, "Invalid double encoding in BSON Column", encoded); _lastEncodedValue64 = *encoded; } // Setup decoder for this range of Simple-8b blocks uint8_t blocks = _numSimple8bBlocks(control); int size = sizeof(uint64_t) * blocks; - uassert(ErrorCodes::BadValue, "Invalid BSON Column encoding", buffer + size + 1 < end); + uassert(6067608, "Invalid BSON Column encoding", buffer + size + 1 < end); // Instantiate decoder and load first value, every Simple-8b block should have at least one // value @@ -757,7 +756,7 @@ BSONColumn::BSONColumn(BSONElement bin) { "Invalid BSON type for column", bin.type() == BSONType::BinData && bin.binDataType() == BinDataType::Column); _binary = bin.binData(_size); - uassert(ErrorCodes::BadValue, "Invalid BSON Column encoding", _size > 0); + uassert(6067609, "Invalid BSON Column encoding", _size > 0); _elementCount = ConstDataView(_binary).read<LittleEndian<uint32_t>>(); _maxDecodingStartPos._control = _binary; _name = bin.fieldNameStringData().toString(); @@ -775,7 +774,7 @@ BSONColumn::Iterator BSONColumn::end() { return it; } -BSONElement BSONColumn::operator[](size_t index) { +boost::optional<const BSONElement&> BSONColumn::operator[](size_t index) { // If index is already decompressed, we can just return the element if (index < _decompressed.size()) { return _decompressed[index]; @@ -783,7 +782,7 @@ BSONElement BSONColumn::operator[](size_t index) { // No more elements to be found if we are fully decompressed, return EOO if (_fullyDecompressed) - return BSONElement(); + return boost::none; // We can begin iterating from last known literal Iterator it{*this, _maxDecodingStartPos._control, _binary + _size}; @@ -796,7 +795,7 @@ BSONElement BSONColumn::operator[](size_t index) { // Return EOO if not found if (it == e) - return BSONElement(); + return boost::none; return *it; } diff --git a/src/mongo/bson/util/bsoncolumn.h b/src/mongo/bson/util/bsoncolumn.h index 2c1f99a1055..716a8b20f26 100644 --- a/src/mongo/bson/util/bsoncolumn.h +++ b/src/mongo/bson/util/bsoncolumn.h @@ -217,7 +217,7 @@ public: * Iterators materialize compressed BSONElement as they iterate over the compressed binary. * It is NOT safe to do this from multiple threads concurrently. * - * Throws BadValue if invalid encoding is encountered. + * Throws if invalid encoding is encountered. */ Iterator begin(); Iterator end(); @@ -225,22 +225,26 @@ public: /** * Element lookup by index * - * Returns EOO if index represent skipped element or is out of bounds. + * Returns EOO if index represent skipped element. + * Returns boost::none if index is out of bounds. + * * O(1) time complexity if element has been previously accessed * O(N) time complexity otherwise * * Materializes compressed BSONElement as needed. It is NOT safe to do this from multiple * threads concurrently. * - * Throws BadValue if invalid encoding is encountered. + * Throws if invalid encoding is encountered. */ - BSONElement operator[](size_t index); + boost::optional<const BSONElement&> operator[](size_t index); /** * Number of elements stored in this BSONColumn * * O(1) time complexity if BSONColumn is fully decompressed (iteration reached end). * O(N) time complexity otherwise, will fully decompress BSONColumn. + * + * * Throws if invalid encoding is encountered. */ size_t size(); diff --git a/src/mongo/bson/util/bsoncolumn_test.cpp b/src/mongo/bson/util/bsoncolumn_test.cpp index 69178921b9e..3fee7dff6cf 100644 --- a/src/mongo/bson/util/bsoncolumn_test.cpp +++ b/src/mongo/bson/util/bsoncolumn_test.cpp @@ -403,7 +403,7 @@ public: BSONColumn col(columnElement); for (size_t i = 0; i < expected.size(); ++i) { - ASSERT(expected[i].binaryEqualValues(col[i])); + ASSERT(expected[i].binaryEqualValues(*col[i])); } } @@ -412,7 +412,7 @@ public: BSONColumn col(columnElement); for (int i = (int)expected.size() - 1; i >= 0; --i) { - ASSERT(expected[i].binaryEqualValues(col[i])); + ASSERT(expected[i].binaryEqualValues(*col[i])); } } @@ -3210,6 +3210,35 @@ TEST_F(BSONColumnTest, NoLiteralStart) { } } +TEST_F(BSONColumnTest, InvalidInterleavedCount) { + // This test sets up an interleaved reference object with two fields but only provides one + // interleaved substream. + BufBuilder expected; + appendInterleavedStart(expected, BSON("a" << 1 << "b" << 1)); + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlocks64(expected, {kDeltaForBinaryEqualValues}, 1); + appendEOO(expected); + appendEOO(expected); + + BSONColumn col(createBSONColumn(expected.buf(), expected.len())); + ASSERT_THROWS(std::distance(col.begin(), col.end()), DBException); +} + +TEST_F(BSONColumnTest, InvalidInterleavedWhenAlreadyInterleaved) { + // This tests that we handle the interleaved start byte when already in interleaved mode. + + BufBuilder expected; + appendInterleavedStart(expected, BSON("a" << 1 << "b" << 1)); + appendSimple8bControl(expected, 0b1000, 0b0000); + appendSimple8bBlocks64(expected, {kDeltaForBinaryEqualValues}, 1); + appendInterleavedStart(expected, BSON("a" << 1 << "b" << 1)); + appendEOO(expected); + appendEOO(expected); + + BSONColumn col(createBSONColumn(expected.buf(), expected.len())); + ASSERT_THROWS(std::distance(col.begin(), col.end()), DBException); +} + TEST_F(BSONColumnTest, AppendMinKey) { BSONColumnBuilder cb("test"); ASSERT_THROWS_CODE(cb.append(createElementMinKey()), DBException, ErrorCodes::InvalidBSONType); diff --git a/src/mongo/db/exec/bucket_unpacker.cpp b/src/mongo/db/exec/bucket_unpacker.cpp index ba7d5554630..87aa9f18c20 100644 --- a/src/mongo/db/exec/bucket_unpacker.cpp +++ b/src/mongo/db/exec/bucket_unpacker.cpp @@ -277,6 +277,9 @@ bool BucketUnpackerV2::getNext(MutableDocument& measurement, } for (auto& fieldColumn : _fieldColumns) { + uassert(6067601, + "Bucket unexpectedly contained fewer values than count", + fieldColumn.it != fieldColumn.column.end()); const BSONElement& elem = *(fieldColumn.it++); // EOO represents missing field if (!elem.eoo()) { @@ -296,9 +299,10 @@ void BucketUnpackerV2::extractSingleMeasurement(MutableDocument& measurement, bool includeTimeField, bool includeMetaField) { if (includeTimeField) { - BSONElement val = _timeColumn.column[j]; - uassert(6067500, "Bucket unexpectedly contained fewer values than count", !val.eoo()); - measurement.addField(_timeColumn.column.name(), Value{val}); + auto val = _timeColumn.column[j]; + uassert( + 6067500, "Bucket unexpectedly contained fewer values than count", val && !val->eoo()); + measurement.addField(_timeColumn.column.name(), Value{*val}); } if (includeMetaField && !metaValue.isNull()) { @@ -307,7 +311,9 @@ void BucketUnpackerV2::extractSingleMeasurement(MutableDocument& measurement, if (includeTimeField) { for (auto& fieldColumn : _fieldColumns) { - measurement.addField(fieldColumn.column.name(), Value{fieldColumn.column[j]}); + auto val = fieldColumn.column[j]; + uassert(6067600, "Bucket unexpectedly contained fewer values than count", val); + measurement.addField(fieldColumn.column.name(), Value{*val}); } } } diff --git a/src/mongo/db/exec/bucket_unpacker_test.cpp b/src/mongo/db/exec/bucket_unpacker_test.cpp index bd2bb8289b0..68c1ccdf847 100644 --- a/src/mongo/db/exec/bucket_unpacker_test.cpp +++ b/src/mongo/db/exec/bucket_unpacker_test.cpp @@ -30,6 +30,8 @@ #include "mongo/platform/basic.h" #include "mongo/bson/json.h" +#include "mongo/bson/util/bsoncolumn.h" +#include "mongo/bson/util/bsoncolumnbuilder.h" #include "mongo/db/exec/bucket_unpacker.h" #include "mongo/db/exec/document_value/document_value_test_util.h" #include "mongo/db/timeseries/bucket_compression.h" @@ -128,6 +130,43 @@ public: } return root.obj(); } + + // Modifies the 'data.<fieldName>' field for a v2 compressed bucket. Rebuilds the compressed + // column with the last element removed. + BSONObj modifyCompressedBucketRemoveLastInField(BSONObj compressedBucket, + StringData fieldName) { + BSONObjBuilder root; + for (auto&& elem : compressedBucket) { + if (elem.fieldNameStringData() != "data"_sd) { + root.append(elem); + continue; + } + + BSONObjBuilder dataBuilder(root.subobjStart("data"_sd)); + for (auto&& dataElem : elem.Obj()) { + if (dataElem.fieldNameStringData() != fieldName) { + dataBuilder.append(dataElem); + continue; + } + + BSONColumn col(dataElem); + int num = col.size() - 1; + ASSERT(num >= 0); + + BSONColumnBuilder builder(fieldName); + for (int i = 0; i < num; ++i) { + auto elem = *col[i]; + if (!elem.eoo()) + builder.append(elem); + else + builder.skip(); + } + + dataBuilder.append(fieldName, builder.finalize()); + } + } + return root.obj(); + } }; TEST_F(BucketUnpackerTest, UnpackBasicIncludeAllMeasurementFields) { @@ -856,5 +895,72 @@ TEST_F(BucketUnpackerTest, TamperedCompressedCountMissing) { ASSERT_FALSE(unpacker.hasNext()); } +TEST_F(BucketUnpackerTest, TamperedCompressedElementMismatchDataField) { + std::set<std::string> fields{ + "_id", kUserDefinedMetaName.toString(), kUserDefinedTimeName.toString(), "a", "b"}; + + auto bucket = fromjson( + "{control: {'version': 1}, meta: {'m1': 999, 'm2': 9999}, data: {_id: {'0':1, '1':2}, " + "time: {'0':1, '1':2}, " + "a:{'0':1, '1':2}, b:{'1':1}}}"); + + auto compressedBucket = timeseries::compressBucket(bucket, "time"_sd); + // Remove an element in the "a" field. + auto modifiedCompressedBucket = + modifyCompressedBucketRemoveLastInField(*compressedBucket, "a"_sd); + + auto unpacker = makeBucketUnpacker(std::move(fields), + BucketUnpacker::Behavior::kInclude, + std::move(modifiedCompressedBucket), + kUserDefinedMetaName.toString()); + + auto doc0 = Document{fromjson("{time: 1, myMeta: {m1: 999, m2: 9999}, _id: 1, a: 1}")}; + + ASSERT_EQ(unpacker.numberOfMeasurements(), 2); + ASSERT_DOCUMENT_EQ(unpacker.extractSingleMeasurement(0), doc0); + // We will now uassert when trying to get the tampered document + ASSERT_THROWS_CODE(unpacker.extractSingleMeasurement(1), AssertionException, 6067600); + + ASSERT_TRUE(unpacker.hasNext()); + assertGetNext(unpacker, doc0); + + ASSERT_TRUE(unpacker.hasNext()); + // We will now uassert when trying to get the tampered document + ASSERT_THROWS_CODE(unpacker.getNext(), AssertionException, 6067601); +} + +TEST_F(BucketUnpackerTest, TamperedCompressedElementMismatchTimeField) { + std::set<std::string> fields{ + "_id", kUserDefinedMetaName.toString(), kUserDefinedTimeName.toString(), "a", "b"}; + + auto bucket = fromjson( + "{control: {'version': 1}, meta: {'m1': 999, 'm2': 9999}, data: {_id: {'0':1, '1':2}, " + "time: {'0':1, '1':2}, " + "a:{'0':1, '1':2}, b:{'1':1}}}"); + + auto compressedBucket = timeseries::compressBucket(bucket, "time"_sd); + // Remove an element in the time field + auto modifiedCompressedBucket = + modifyCompressedBucketRemoveLastInField(*compressedBucket, "time"_sd); + + auto unpacker = makeBucketUnpacker(std::move(fields), + BucketUnpacker::Behavior::kInclude, + std::move(modifiedCompressedBucket), + kUserDefinedMetaName.toString()); + + auto doc0 = Document{fromjson("{time: 1, myMeta: {m1: 999, m2: 9999}, _id: 1, a: 1}")}; + + ASSERT_EQ(unpacker.numberOfMeasurements(), 2); + ASSERT_DOCUMENT_EQ(unpacker.extractSingleMeasurement(0), doc0); + // We will now uassert when trying to get the tampered document + ASSERT_THROWS_CODE(unpacker.extractSingleMeasurement(1), AssertionException, 6067500); + + ASSERT_TRUE(unpacker.hasNext()); + assertGetNext(unpacker, doc0); + + // When time is modified it will look like there is no more elements when iterating + ASSERT_FALSE(unpacker.hasNext()); +} + } // namespace } // namespace mongo |