diff options
author | Henrik Edin <henrik.edin@mongodb.com> | 2021-09-16 09:29:47 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-09-16 15:05:58 +0000 |
commit | a7001db2c3a43c6632904a7a4c34f4d99a61d295 (patch) | |
tree | cb6f90e4aadc387597c057ca21e6db39effcd775 /src/mongo/db/exec | |
parent | aa36306977363038be233195ac5645edb3d71dab (diff) | |
download | mongo-a7001db2c3a43c6632904a7a4c34f4d99a61d295.tar.gz |
SERVER-58578 Timeseries bucket compression
All fields under data will be compressed using BSON Binary Subtype 7.
Measurements are sorted by time field.
Buckets are compressed in a separate update operation when the
BucketCatalog is closing the bucket and no further writes to it will be
performed.
Compression operation are transparant for the user, if it fails for any
reason the bucket will be left uncompressed.
Diffstat (limited to 'src/mongo/db/exec')
-rw-r--r-- | src/mongo/db/exec/SConscript | 3 | ||||
-rw-r--r-- | src/mongo/db/exec/bucket_unpacker_test.cpp | 86 |
2 files changed, 15 insertions, 74 deletions
diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript index 963d12c5372..88655a990db 100644 --- a/src/mongo/db/exec/SConscript +++ b/src/mongo/db/exec/SConscript @@ -151,7 +151,7 @@ env.CppUnitTest( ], LIBDEPS=[ "$BUILD_DIR/mongo/base", - "$BUILD_DIR/mongo/bson/util/bson_column", + "$BUILD_DIR/mongo/bson/util/bson_column", "$BUILD_DIR/mongo/db/auth/authmocks", "$BUILD_DIR/mongo/db/query/collation/collator_factory_mock", "$BUILD_DIR/mongo/db/query/collation/collator_interface_mock", @@ -159,6 +159,7 @@ env.CppUnitTest( "$BUILD_DIR/mongo/db/query_exec", "$BUILD_DIR/mongo/db/record_id_helpers", "$BUILD_DIR/mongo/db/service_context_d_test_fixture", + "$BUILD_DIR/mongo/db/timeseries/bucket_compression", "$BUILD_DIR/mongo/dbtests/mocklib", "$BUILD_DIR/mongo/util/clock_source_mock", "document_value/document_value", diff --git a/src/mongo/db/exec/bucket_unpacker_test.cpp b/src/mongo/db/exec/bucket_unpacker_test.cpp index fcb285dc94b..85e2bfb45d5 100644 --- a/src/mongo/db/exec/bucket_unpacker_test.cpp +++ b/src/mongo/db/exec/bucket_unpacker_test.cpp @@ -30,9 +30,9 @@ #include "mongo/platform/basic.h" #include "mongo/bson/json.h" -#include "mongo/bson/util/bsoncolumnbuilder.h" #include "mongo/db/exec/bucket_unpacker.h" #include "mongo/db/exec/document_value/document_value_test_util.h" +#include "mongo/db/timeseries/bucket_compression.h" #include "mongo/unittest/unittest.h" namespace mongo { @@ -102,65 +102,6 @@ public: BSONObj obj = root.obj(); return {obj, "time"_sd}; } - - // Simple bucket compressor. Does not handle data fields out of order and does not sort fields - // on time. - // TODO (SERVER-58578): Replace with real bucket compressor - BSONObj compress(BSONObj uncompressed, StringData timeField) { - // Rewrite data fields as columns. - BSONObjBuilder builder; - for (auto& elem : uncompressed) { - if (elem.fieldNameStringData() == "control") { - BSONObjBuilder control(builder.subobjStart("control")); - - // Set right version, leave other control fields unchanged - for (const auto& controlField : elem.Obj()) { - if (controlField.fieldNameStringData() == "version") { - control.append("version", 2); - } else { - control.append(controlField); - } - } - - continue; - } - if (elem.fieldNameStringData() != "data") { - // Non-data fields can be unchanged. - builder.append(elem); - continue; - } - - BSONObjBuilder dataBuilder = builder.subobjStart("data"); - std::list<BSONColumnBuilder> columnBuilders; - size_t numTimeFields = 0; - for (auto& column : elem.Obj()) { - // Compress all data fields - columnBuilders.emplace_back(column.fieldNameStringData()); - auto& columnBuilder = columnBuilders.back(); - - for (auto& measurement : column.Obj()) { - int index = std::atoi(measurement.fieldName()); - for (int i = columnBuilder.size(); i < index; ++i) { - columnBuilder.skip(); - } - columnBuilder.append(measurement); - } - if (columnBuilder.fieldName() == timeField) { - numTimeFields = columnBuilder.size(); - } - } - - for (auto& builder : columnBuilders) { - for (size_t i = builder.size(); i < numTimeFields; ++i) { - builder.skip(); - } - - BSONBinData binData = builder.finalize(); - dataBuilder.append(builder.fieldName(), binData); - } - } - return builder.obj(); - } }; TEST_F(BucketUnpackerTest, UnpackBasicIncludeAllMeasurementFields) { @@ -212,7 +153,7 @@ TEST_F(BucketUnpackerTest, ExcludeASingleField) { }; test(bucket); - test(compress(bucket, "time"_sd)); + test(*timeseries::compressBucket(bucket, "time"_sd)); } TEST_F(BucketUnpackerTest, EmptyIncludeGetsEmptyMeasurements) { @@ -238,7 +179,7 @@ TEST_F(BucketUnpackerTest, EmptyIncludeGetsEmptyMeasurements) { }; test(bucket); - test(compress(bucket, "time"_sd)); + test(*timeseries::compressBucket(bucket, "time"_sd)); } TEST_F(BucketUnpackerTest, EmptyExcludeMaterializesAllFields) { @@ -266,7 +207,7 @@ TEST_F(BucketUnpackerTest, EmptyExcludeMaterializesAllFields) { }; test(bucket); - test(compress(bucket, "time"_sd)); + test(*timeseries::compressBucket(bucket, "time"_sd)); } TEST_F(BucketUnpackerTest, SparseColumnsWhereOneColumnIsExhaustedBeforeTheOther) { @@ -292,7 +233,7 @@ TEST_F(BucketUnpackerTest, SparseColumnsWhereOneColumnIsExhaustedBeforeTheOther) }; test(bucket); - test(compress(bucket, "time"_sd)); + test(*timeseries::compressBucket(bucket, "time"_sd)); } TEST_F(BucketUnpackerTest, UnpackBasicIncludeWithDollarPrefix) { @@ -321,7 +262,7 @@ TEST_F(BucketUnpackerTest, UnpackBasicIncludeWithDollarPrefix) { }; test(bucket); - test(compress(bucket, "time"_sd)); + test(*timeseries::compressBucket(bucket, "time"_sd)); } TEST_F(BucketUnpackerTest, BucketsWithMetadataOnly) { @@ -346,7 +287,7 @@ TEST_F(BucketUnpackerTest, BucketsWithMetadataOnly) { }; test(bucket); - test(compress(bucket, "time"_sd)); + test(*timeseries::compressBucket(bucket, "time"_sd)); } TEST_F(BucketUnpackerTest, UnorderedRowKeysDoesntAffectMaterialization) { @@ -404,7 +345,7 @@ TEST_F(BucketUnpackerTest, MissingMetaFieldDoesntMaterializeMetadata) { }; test(bucket); - test(compress(bucket, "time"_sd)); + test(*timeseries::compressBucket(bucket, "time"_sd)); } TEST_F(BucketUnpackerTest, MissingMetaFieldDoesntMaterializeMetadataUnorderedKeys) { @@ -459,7 +400,7 @@ TEST_F(BucketUnpackerTest, ExcludedMetaFieldDoesntMaterializeMetadataWhenBucketH }; test(bucket); - test(compress(bucket, "time"_sd)); + test(*timeseries::compressBucket(bucket, "time"_sd)); } TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUndefinedMeta) { @@ -478,7 +419,7 @@ TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUndefinedMeta) { }; test(bucket); - test(compress(bucket, "time"_sd)); + test(*timeseries::compressBucket(bucket, "time"_sd)); } TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUnexpectedMeta) { @@ -498,7 +439,7 @@ TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUnexpectedMeta) { }; test(bucket); - test(compress(bucket, "time"_sd)); + test(*timeseries::compressBucket(bucket, "time"_sd)); } TEST_F(BucketUnpackerTest, NullMetaInBucketMaterializesAsNull) { @@ -525,7 +466,7 @@ TEST_F(BucketUnpackerTest, NullMetaInBucketMaterializesAsNull) { }; test(bucket); - test(compress(bucket, "time"_sd)); + test(*timeseries::compressBucket(bucket, "time"_sd)); } TEST_F(BucketUnpackerTest, GetNextHandlesMissingMetaInBucket) { @@ -557,7 +498,7 @@ TEST_F(BucketUnpackerTest, GetNextHandlesMissingMetaInBucket) { }; test(bucket); - test(compress(bucket, "time"_sd)); + test(*timeseries::compressBucket(bucket, "time"_sd)); } TEST_F(BucketUnpackerTest, EmptyDataRegionInBucketIsTolerated) { @@ -576,7 +517,6 @@ TEST_F(BucketUnpackerTest, EmptyDataRegionInBucketIsTolerated) { }; test(bucket); - test(compress(bucket, "time"_sd)); } TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnEmptyBucket) { |