summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorHenrik Edin <henrik.edin@mongodb.com>2021-10-28 13:02:44 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-10-28 13:29:33 +0000
commit6e98a462d6e3f2222bc53bc948b60721e7879999 (patch)
tree503f8fa634ca07e7cde8dfb7b4a7914f94f17dd9 /src
parenta8408f0d115740981f9ed9ec10ca20aa58776902 (diff)
downloadmongo-6e98a462d6e3f2222bc53bc948b60721e7879999.tar.gz
SERVER-60676 Improve robustness when parsing invalid BSONColumn binary or compressed v2 buckets
* Changed BSONColumn::operator[] interface so we can differentiate skipped element from out of bounds * Replaced BadValue error codes in BSONColumn for unique assertion ids * Throw exception when interleaved mode exits unexpectedly when it shouldn't * Test and handle buckets where the compressed columns contain different amount of fields
Diffstat (limited to 'src')
-rw-r--r--src/mongo/bson/util/bsoncolumn.cpp27
-rw-r--r--src/mongo/bson/util/bsoncolumn.h12
-rw-r--r--src/mongo/bson/util/bsoncolumn_test.cpp33
-rw-r--r--src/mongo/db/exec/bucket_unpacker.cpp14
-rw-r--r--src/mongo/db/exec/bucket_unpacker_test.cpp106
5 files changed, 168 insertions, 24 deletions
diff --git a/src/mongo/bson/util/bsoncolumn.cpp b/src/mongo/bson/util/bsoncolumn.cpp
index 481ad6885bb..042e34c9184 100644
--- a/src/mongo/bson/util/bsoncolumn.cpp
+++ b/src/mongo/bson/util/bsoncolumn.cpp
@@ -352,7 +352,7 @@ void BSONColumn::Iterator::_incrementRegular() {
// We don't have any more delta values in current block so we need to load next control byte.
// Validate that we are not reading out of bounds
- uassert(ErrorCodes::BadValue, "Invalid BSON Column encoding", _control < _end);
+ uassert(6067602, "Invalid BSON Column encoding", _control < _end);
// Decoders are exhausted, load next control byte. If we are at EOO then decoding is done.
if (*_control == EOO) {
@@ -408,9 +408,7 @@ void BSONColumn::Iterator::_incrementInterleaved() {
[this, &stateIt, &stateEnd](const BSONElement& referenceField) {
// Called for every scalar field in the reference interleaved BSONObj. We have as many
// decoding states as scalars.
- uassert(ErrorCodes::BadValue,
- "Invalid BSON Column interleaved encoding",
- stateIt != stateEnd);
+ uassert(6067603, "Invalid BSON Column interleaved encoding", stateIt != stateEnd);
auto& state = *(stateIt++);
// Remember the iterator position before writing anything. This is to detect that
@@ -473,6 +471,9 @@ void BSONColumn::Iterator::_incrementInterleaved() {
_interleaved = false;
_states.clear();
_states.resize(1);
+ uassert(6067604,
+ "Invalid BSON Column interleaved encoding",
+ _index > 0 && _index - 1 < _column->_decompressed.size());
_states.front()._lastValue = _column->_decompressed[_index - 1];
_incrementRegular();
@@ -480,7 +481,7 @@ void BSONColumn::Iterator::_incrementInterleaved() {
}
// There should have been as many interleaved states as scalar fields.
- uassert(ErrorCodes::BadValue, "Invalid BSON Column interleaved encoding", stateIt == stateEnd);
+ uassert(6067605, "Invalid BSON Column interleaved encoding", stateIt == stateEnd);
// If this element has been decompressed in a previous iteration we don't need to store it in
// our decompressed list.
@@ -576,22 +577,20 @@ BSONColumn::Iterator::DecodingState::_loadControl(BSONColumn& column,
// Simple-8b delta block, load its scale factor and validate for sanity
_scaleIndex = kControlToScaleIndex[(control & 0xF0) >> 4];
- uassert(ErrorCodes::BadValue,
- "Invalid control byte in BSON Column",
- _scaleIndex != kInvalidScaleIndex);
+ uassert(6067606, "Invalid control byte in BSON Column", _scaleIndex != kInvalidScaleIndex);
// If Double, scale last value according to this scale factor
auto type = _lastValue.type();
if (type == NumberDouble) {
auto encoded = Simple8bTypeUtil::encodeDouble(_lastValue._numberDouble(), _scaleIndex);
- uassert(ErrorCodes::BadValue, "Invalid double encoding in BSON Column", encoded);
+ uassert(6067607, "Invalid double encoding in BSON Column", encoded);
_lastEncodedValue64 = *encoded;
}
// Setup decoder for this range of Simple-8b blocks
uint8_t blocks = _numSimple8bBlocks(control);
int size = sizeof(uint64_t) * blocks;
- uassert(ErrorCodes::BadValue, "Invalid BSON Column encoding", buffer + size + 1 < end);
+ uassert(6067608, "Invalid BSON Column encoding", buffer + size + 1 < end);
// Instantiate decoder and load first value, every Simple-8b block should have at least one
// value
@@ -757,7 +756,7 @@ BSONColumn::BSONColumn(BSONElement bin) {
"Invalid BSON type for column",
bin.type() == BSONType::BinData && bin.binDataType() == BinDataType::Column);
_binary = bin.binData(_size);
- uassert(ErrorCodes::BadValue, "Invalid BSON Column encoding", _size > 0);
+ uassert(6067609, "Invalid BSON Column encoding", _size > 0);
_elementCount = ConstDataView(_binary).read<LittleEndian<uint32_t>>();
_maxDecodingStartPos._control = _binary;
_name = bin.fieldNameStringData().toString();
@@ -775,7 +774,7 @@ BSONColumn::Iterator BSONColumn::end() {
return it;
}
-BSONElement BSONColumn::operator[](size_t index) {
+boost::optional<const BSONElement&> BSONColumn::operator[](size_t index) {
// If index is already decompressed, we can just return the element
if (index < _decompressed.size()) {
return _decompressed[index];
@@ -783,7 +782,7 @@ BSONElement BSONColumn::operator[](size_t index) {
// No more elements to be found if we are fully decompressed, return EOO
if (_fullyDecompressed)
- return BSONElement();
+ return boost::none;
// We can begin iterating from last known literal
Iterator it{*this, _maxDecodingStartPos._control, _binary + _size};
@@ -796,7 +795,7 @@ BSONElement BSONColumn::operator[](size_t index) {
// Return EOO if not found
if (it == e)
- return BSONElement();
+ return boost::none;
return *it;
}
diff --git a/src/mongo/bson/util/bsoncolumn.h b/src/mongo/bson/util/bsoncolumn.h
index 2c1f99a1055..716a8b20f26 100644
--- a/src/mongo/bson/util/bsoncolumn.h
+++ b/src/mongo/bson/util/bsoncolumn.h
@@ -217,7 +217,7 @@ public:
* Iterators materialize compressed BSONElement as they iterate over the compressed binary.
* It is NOT safe to do this from multiple threads concurrently.
*
- * Throws BadValue if invalid encoding is encountered.
+ * Throws if invalid encoding is encountered.
*/
Iterator begin();
Iterator end();
@@ -225,22 +225,26 @@ public:
/**
* Element lookup by index
*
- * Returns EOO if index represent skipped element or is out of bounds.
+ * Returns EOO if index represent skipped element.
+ * Returns boost::none if index is out of bounds.
+ *
* O(1) time complexity if element has been previously accessed
* O(N) time complexity otherwise
*
* Materializes compressed BSONElement as needed. It is NOT safe to do this from multiple
* threads concurrently.
*
- * Throws BadValue if invalid encoding is encountered.
+ * Throws if invalid encoding is encountered.
*/
- BSONElement operator[](size_t index);
+ boost::optional<const BSONElement&> operator[](size_t index);
/**
* Number of elements stored in this BSONColumn
*
* O(1) time complexity if BSONColumn is fully decompressed (iteration reached end).
* O(N) time complexity otherwise, will fully decompress BSONColumn.
+ *
+ * * Throws if invalid encoding is encountered.
*/
size_t size();
diff --git a/src/mongo/bson/util/bsoncolumn_test.cpp b/src/mongo/bson/util/bsoncolumn_test.cpp
index 69178921b9e..3fee7dff6cf 100644
--- a/src/mongo/bson/util/bsoncolumn_test.cpp
+++ b/src/mongo/bson/util/bsoncolumn_test.cpp
@@ -403,7 +403,7 @@ public:
BSONColumn col(columnElement);
for (size_t i = 0; i < expected.size(); ++i) {
- ASSERT(expected[i].binaryEqualValues(col[i]));
+ ASSERT(expected[i].binaryEqualValues(*col[i]));
}
}
@@ -412,7 +412,7 @@ public:
BSONColumn col(columnElement);
for (int i = (int)expected.size() - 1; i >= 0; --i) {
- ASSERT(expected[i].binaryEqualValues(col[i]));
+ ASSERT(expected[i].binaryEqualValues(*col[i]));
}
}
@@ -3210,6 +3210,35 @@ TEST_F(BSONColumnTest, NoLiteralStart) {
}
}
+TEST_F(BSONColumnTest, InvalidInterleavedCount) {
+ // This test sets up an interleaved reference object with two fields but only provides one
+ // interleaved substream.
+ BufBuilder expected;
+ appendInterleavedStart(expected, BSON("a" << 1 << "b" << 1));
+ appendSimple8bControl(expected, 0b1000, 0b0000);
+ appendSimple8bBlocks64(expected, {kDeltaForBinaryEqualValues}, 1);
+ appendEOO(expected);
+ appendEOO(expected);
+
+ BSONColumn col(createBSONColumn(expected.buf(), expected.len()));
+ ASSERT_THROWS(std::distance(col.begin(), col.end()), DBException);
+}
+
+TEST_F(BSONColumnTest, InvalidInterleavedWhenAlreadyInterleaved) {
+ // This tests that we handle the interleaved start byte when already in interleaved mode.
+
+ BufBuilder expected;
+ appendInterleavedStart(expected, BSON("a" << 1 << "b" << 1));
+ appendSimple8bControl(expected, 0b1000, 0b0000);
+ appendSimple8bBlocks64(expected, {kDeltaForBinaryEqualValues}, 1);
+ appendInterleavedStart(expected, BSON("a" << 1 << "b" << 1));
+ appendEOO(expected);
+ appendEOO(expected);
+
+ BSONColumn col(createBSONColumn(expected.buf(), expected.len()));
+ ASSERT_THROWS(std::distance(col.begin(), col.end()), DBException);
+}
+
TEST_F(BSONColumnTest, AppendMinKey) {
BSONColumnBuilder cb("test");
ASSERT_THROWS_CODE(cb.append(createElementMinKey()), DBException, ErrorCodes::InvalidBSONType);
diff --git a/src/mongo/db/exec/bucket_unpacker.cpp b/src/mongo/db/exec/bucket_unpacker.cpp
index ba7d5554630..87aa9f18c20 100644
--- a/src/mongo/db/exec/bucket_unpacker.cpp
+++ b/src/mongo/db/exec/bucket_unpacker.cpp
@@ -277,6 +277,9 @@ bool BucketUnpackerV2::getNext(MutableDocument& measurement,
}
for (auto& fieldColumn : _fieldColumns) {
+ uassert(6067601,
+ "Bucket unexpectedly contained fewer values than count",
+ fieldColumn.it != fieldColumn.column.end());
const BSONElement& elem = *(fieldColumn.it++);
// EOO represents missing field
if (!elem.eoo()) {
@@ -296,9 +299,10 @@ void BucketUnpackerV2::extractSingleMeasurement(MutableDocument& measurement,
bool includeTimeField,
bool includeMetaField) {
if (includeTimeField) {
- BSONElement val = _timeColumn.column[j];
- uassert(6067500, "Bucket unexpectedly contained fewer values than count", !val.eoo());
- measurement.addField(_timeColumn.column.name(), Value{val});
+ auto val = _timeColumn.column[j];
+ uassert(
+ 6067500, "Bucket unexpectedly contained fewer values than count", val && !val->eoo());
+ measurement.addField(_timeColumn.column.name(), Value{*val});
}
if (includeMetaField && !metaValue.isNull()) {
@@ -307,7 +311,9 @@ void BucketUnpackerV2::extractSingleMeasurement(MutableDocument& measurement,
if (includeTimeField) {
for (auto& fieldColumn : _fieldColumns) {
- measurement.addField(fieldColumn.column.name(), Value{fieldColumn.column[j]});
+ auto val = fieldColumn.column[j];
+ uassert(6067600, "Bucket unexpectedly contained fewer values than count", val);
+ measurement.addField(fieldColumn.column.name(), Value{*val});
}
}
}
diff --git a/src/mongo/db/exec/bucket_unpacker_test.cpp b/src/mongo/db/exec/bucket_unpacker_test.cpp
index bd2bb8289b0..68c1ccdf847 100644
--- a/src/mongo/db/exec/bucket_unpacker_test.cpp
+++ b/src/mongo/db/exec/bucket_unpacker_test.cpp
@@ -30,6 +30,8 @@
#include "mongo/platform/basic.h"
#include "mongo/bson/json.h"
+#include "mongo/bson/util/bsoncolumn.h"
+#include "mongo/bson/util/bsoncolumnbuilder.h"
#include "mongo/db/exec/bucket_unpacker.h"
#include "mongo/db/exec/document_value/document_value_test_util.h"
#include "mongo/db/timeseries/bucket_compression.h"
@@ -128,6 +130,43 @@ public:
}
return root.obj();
}
+
+ // Modifies the 'data.<fieldName>' field for a v2 compressed bucket. Rebuilds the compressed
+ // column with the last element removed.
+ BSONObj modifyCompressedBucketRemoveLastInField(BSONObj compressedBucket,
+ StringData fieldName) {
+ BSONObjBuilder root;
+ for (auto&& elem : compressedBucket) {
+ if (elem.fieldNameStringData() != "data"_sd) {
+ root.append(elem);
+ continue;
+ }
+
+ BSONObjBuilder dataBuilder(root.subobjStart("data"_sd));
+ for (auto&& dataElem : elem.Obj()) {
+ if (dataElem.fieldNameStringData() != fieldName) {
+ dataBuilder.append(dataElem);
+ continue;
+ }
+
+ BSONColumn col(dataElem);
+ int num = col.size() - 1;
+ ASSERT(num >= 0);
+
+ BSONColumnBuilder builder(fieldName);
+ for (int i = 0; i < num; ++i) {
+ auto elem = *col[i];
+ if (!elem.eoo())
+ builder.append(elem);
+ else
+ builder.skip();
+ }
+
+ dataBuilder.append(fieldName, builder.finalize());
+ }
+ }
+ return root.obj();
+ }
};
TEST_F(BucketUnpackerTest, UnpackBasicIncludeAllMeasurementFields) {
@@ -856,5 +895,72 @@ TEST_F(BucketUnpackerTest, TamperedCompressedCountMissing) {
ASSERT_FALSE(unpacker.hasNext());
}
+TEST_F(BucketUnpackerTest, TamperedCompressedElementMismatchDataField) {
+ std::set<std::string> fields{
+ "_id", kUserDefinedMetaName.toString(), kUserDefinedTimeName.toString(), "a", "b"};
+
+ auto bucket = fromjson(
+ "{control: {'version': 1}, meta: {'m1': 999, 'm2': 9999}, data: {_id: {'0':1, '1':2}, "
+ "time: {'0':1, '1':2}, "
+ "a:{'0':1, '1':2}, b:{'1':1}}}");
+
+ auto compressedBucket = timeseries::compressBucket(bucket, "time"_sd);
+ // Remove an element in the "a" field.
+ auto modifiedCompressedBucket =
+ modifyCompressedBucketRemoveLastInField(*compressedBucket, "a"_sd);
+
+ auto unpacker = makeBucketUnpacker(std::move(fields),
+ BucketUnpacker::Behavior::kInclude,
+ std::move(modifiedCompressedBucket),
+ kUserDefinedMetaName.toString());
+
+ auto doc0 = Document{fromjson("{time: 1, myMeta: {m1: 999, m2: 9999}, _id: 1, a: 1}")};
+
+ ASSERT_EQ(unpacker.numberOfMeasurements(), 2);
+ ASSERT_DOCUMENT_EQ(unpacker.extractSingleMeasurement(0), doc0);
+ // We will now uassert when trying to get the tampered document
+ ASSERT_THROWS_CODE(unpacker.extractSingleMeasurement(1), AssertionException, 6067600);
+
+ ASSERT_TRUE(unpacker.hasNext());
+ assertGetNext(unpacker, doc0);
+
+ ASSERT_TRUE(unpacker.hasNext());
+ // We will now uassert when trying to get the tampered document
+ ASSERT_THROWS_CODE(unpacker.getNext(), AssertionException, 6067601);
+}
+
+TEST_F(BucketUnpackerTest, TamperedCompressedElementMismatchTimeField) {
+ std::set<std::string> fields{
+ "_id", kUserDefinedMetaName.toString(), kUserDefinedTimeName.toString(), "a", "b"};
+
+ auto bucket = fromjson(
+ "{control: {'version': 1}, meta: {'m1': 999, 'm2': 9999}, data: {_id: {'0':1, '1':2}, "
+ "time: {'0':1, '1':2}, "
+ "a:{'0':1, '1':2}, b:{'1':1}}}");
+
+ auto compressedBucket = timeseries::compressBucket(bucket, "time"_sd);
+ // Remove an element in the time field
+ auto modifiedCompressedBucket =
+ modifyCompressedBucketRemoveLastInField(*compressedBucket, "time"_sd);
+
+ auto unpacker = makeBucketUnpacker(std::move(fields),
+ BucketUnpacker::Behavior::kInclude,
+ std::move(modifiedCompressedBucket),
+ kUserDefinedMetaName.toString());
+
+ auto doc0 = Document{fromjson("{time: 1, myMeta: {m1: 999, m2: 9999}, _id: 1, a: 1}")};
+
+ ASSERT_EQ(unpacker.numberOfMeasurements(), 2);
+ ASSERT_DOCUMENT_EQ(unpacker.extractSingleMeasurement(0), doc0);
+ // We will now uassert when trying to get the tampered document
+ ASSERT_THROWS_CODE(unpacker.extractSingleMeasurement(1), AssertionException, 6067500);
+
+ ASSERT_TRUE(unpacker.hasNext());
+ assertGetNext(unpacker, doc0);
+
+ // When time is modified it will look like there is no more elements when iterating
+ ASSERT_FALSE(unpacker.hasNext());
+}
+
} // namespace
} // namespace mongo