summaryrefslogtreecommitdiff
path: root/src/mongo/db/exec
diff options
context:
space:
mode:
authorHenrik Edin <henrik.edin@mongodb.com>2021-09-16 09:29:47 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-09-16 15:05:58 +0000
commita7001db2c3a43c6632904a7a4c34f4d99a61d295 (patch)
treecb6f90e4aadc387597c057ca21e6db39effcd775 /src/mongo/db/exec
parentaa36306977363038be233195ac5645edb3d71dab (diff)
downloadmongo-a7001db2c3a43c6632904a7a4c34f4d99a61d295.tar.gz
SERVER-58578 Timeseries bucket compression
All fields under data will be compressed using BSON Binary Subtype 7. Measurements are sorted by time field. Buckets are compressed in a separate update operation when the BucketCatalog is closing the bucket and no further writes to it will be performed. Compression operation are transparant for the user, if it fails for any reason the bucket will be left uncompressed.
Diffstat (limited to 'src/mongo/db/exec')
-rw-r--r--src/mongo/db/exec/SConscript3
-rw-r--r--src/mongo/db/exec/bucket_unpacker_test.cpp86
2 files changed, 15 insertions, 74 deletions
diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript
index 963d12c5372..88655a990db 100644
--- a/src/mongo/db/exec/SConscript
+++ b/src/mongo/db/exec/SConscript
@@ -151,7 +151,7 @@ env.CppUnitTest(
],
LIBDEPS=[
"$BUILD_DIR/mongo/base",
- "$BUILD_DIR/mongo/bson/util/bson_column",
+ "$BUILD_DIR/mongo/bson/util/bson_column",
"$BUILD_DIR/mongo/db/auth/authmocks",
"$BUILD_DIR/mongo/db/query/collation/collator_factory_mock",
"$BUILD_DIR/mongo/db/query/collation/collator_interface_mock",
@@ -159,6 +159,7 @@ env.CppUnitTest(
"$BUILD_DIR/mongo/db/query_exec",
"$BUILD_DIR/mongo/db/record_id_helpers",
"$BUILD_DIR/mongo/db/service_context_d_test_fixture",
+ "$BUILD_DIR/mongo/db/timeseries/bucket_compression",
"$BUILD_DIR/mongo/dbtests/mocklib",
"$BUILD_DIR/mongo/util/clock_source_mock",
"document_value/document_value",
diff --git a/src/mongo/db/exec/bucket_unpacker_test.cpp b/src/mongo/db/exec/bucket_unpacker_test.cpp
index fcb285dc94b..85e2bfb45d5 100644
--- a/src/mongo/db/exec/bucket_unpacker_test.cpp
+++ b/src/mongo/db/exec/bucket_unpacker_test.cpp
@@ -30,9 +30,9 @@
#include "mongo/platform/basic.h"
#include "mongo/bson/json.h"
-#include "mongo/bson/util/bsoncolumnbuilder.h"
#include "mongo/db/exec/bucket_unpacker.h"
#include "mongo/db/exec/document_value/document_value_test_util.h"
+#include "mongo/db/timeseries/bucket_compression.h"
#include "mongo/unittest/unittest.h"
namespace mongo {
@@ -102,65 +102,6 @@ public:
BSONObj obj = root.obj();
return {obj, "time"_sd};
}
-
- // Simple bucket compressor. Does not handle data fields out of order and does not sort fields
- // on time.
- // TODO (SERVER-58578): Replace with real bucket compressor
- BSONObj compress(BSONObj uncompressed, StringData timeField) {
- // Rewrite data fields as columns.
- BSONObjBuilder builder;
- for (auto& elem : uncompressed) {
- if (elem.fieldNameStringData() == "control") {
- BSONObjBuilder control(builder.subobjStart("control"));
-
- // Set right version, leave other control fields unchanged
- for (const auto& controlField : elem.Obj()) {
- if (controlField.fieldNameStringData() == "version") {
- control.append("version", 2);
- } else {
- control.append(controlField);
- }
- }
-
- continue;
- }
- if (elem.fieldNameStringData() != "data") {
- // Non-data fields can be unchanged.
- builder.append(elem);
- continue;
- }
-
- BSONObjBuilder dataBuilder = builder.subobjStart("data");
- std::list<BSONColumnBuilder> columnBuilders;
- size_t numTimeFields = 0;
- for (auto& column : elem.Obj()) {
- // Compress all data fields
- columnBuilders.emplace_back(column.fieldNameStringData());
- auto& columnBuilder = columnBuilders.back();
-
- for (auto& measurement : column.Obj()) {
- int index = std::atoi(measurement.fieldName());
- for (int i = columnBuilder.size(); i < index; ++i) {
- columnBuilder.skip();
- }
- columnBuilder.append(measurement);
- }
- if (columnBuilder.fieldName() == timeField) {
- numTimeFields = columnBuilder.size();
- }
- }
-
- for (auto& builder : columnBuilders) {
- for (size_t i = builder.size(); i < numTimeFields; ++i) {
- builder.skip();
- }
-
- BSONBinData binData = builder.finalize();
- dataBuilder.append(builder.fieldName(), binData);
- }
- }
- return builder.obj();
- }
};
TEST_F(BucketUnpackerTest, UnpackBasicIncludeAllMeasurementFields) {
@@ -212,7 +153,7 @@ TEST_F(BucketUnpackerTest, ExcludeASingleField) {
};
test(bucket);
- test(compress(bucket, "time"_sd));
+ test(*timeseries::compressBucket(bucket, "time"_sd));
}
TEST_F(BucketUnpackerTest, EmptyIncludeGetsEmptyMeasurements) {
@@ -238,7 +179,7 @@ TEST_F(BucketUnpackerTest, EmptyIncludeGetsEmptyMeasurements) {
};
test(bucket);
- test(compress(bucket, "time"_sd));
+ test(*timeseries::compressBucket(bucket, "time"_sd));
}
TEST_F(BucketUnpackerTest, EmptyExcludeMaterializesAllFields) {
@@ -266,7 +207,7 @@ TEST_F(BucketUnpackerTest, EmptyExcludeMaterializesAllFields) {
};
test(bucket);
- test(compress(bucket, "time"_sd));
+ test(*timeseries::compressBucket(bucket, "time"_sd));
}
TEST_F(BucketUnpackerTest, SparseColumnsWhereOneColumnIsExhaustedBeforeTheOther) {
@@ -292,7 +233,7 @@ TEST_F(BucketUnpackerTest, SparseColumnsWhereOneColumnIsExhaustedBeforeTheOther)
};
test(bucket);
- test(compress(bucket, "time"_sd));
+ test(*timeseries::compressBucket(bucket, "time"_sd));
}
TEST_F(BucketUnpackerTest, UnpackBasicIncludeWithDollarPrefix) {
@@ -321,7 +262,7 @@ TEST_F(BucketUnpackerTest, UnpackBasicIncludeWithDollarPrefix) {
};
test(bucket);
- test(compress(bucket, "time"_sd));
+ test(*timeseries::compressBucket(bucket, "time"_sd));
}
TEST_F(BucketUnpackerTest, BucketsWithMetadataOnly) {
@@ -346,7 +287,7 @@ TEST_F(BucketUnpackerTest, BucketsWithMetadataOnly) {
};
test(bucket);
- test(compress(bucket, "time"_sd));
+ test(*timeseries::compressBucket(bucket, "time"_sd));
}
TEST_F(BucketUnpackerTest, UnorderedRowKeysDoesntAffectMaterialization) {
@@ -404,7 +345,7 @@ TEST_F(BucketUnpackerTest, MissingMetaFieldDoesntMaterializeMetadata) {
};
test(bucket);
- test(compress(bucket, "time"_sd));
+ test(*timeseries::compressBucket(bucket, "time"_sd));
}
TEST_F(BucketUnpackerTest, MissingMetaFieldDoesntMaterializeMetadataUnorderedKeys) {
@@ -459,7 +400,7 @@ TEST_F(BucketUnpackerTest, ExcludedMetaFieldDoesntMaterializeMetadataWhenBucketH
};
test(bucket);
- test(compress(bucket, "time"_sd));
+ test(*timeseries::compressBucket(bucket, "time"_sd));
}
TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUndefinedMeta) {
@@ -478,7 +419,7 @@ TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUndefinedMeta) {
};
test(bucket);
- test(compress(bucket, "time"_sd));
+ test(*timeseries::compressBucket(bucket, "time"_sd));
}
TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUnexpectedMeta) {
@@ -498,7 +439,7 @@ TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUnexpectedMeta) {
};
test(bucket);
- test(compress(bucket, "time"_sd));
+ test(*timeseries::compressBucket(bucket, "time"_sd));
}
TEST_F(BucketUnpackerTest, NullMetaInBucketMaterializesAsNull) {
@@ -525,7 +466,7 @@ TEST_F(BucketUnpackerTest, NullMetaInBucketMaterializesAsNull) {
};
test(bucket);
- test(compress(bucket, "time"_sd));
+ test(*timeseries::compressBucket(bucket, "time"_sd));
}
TEST_F(BucketUnpackerTest, GetNextHandlesMissingMetaInBucket) {
@@ -557,7 +498,7 @@ TEST_F(BucketUnpackerTest, GetNextHandlesMissingMetaInBucket) {
};
test(bucket);
- test(compress(bucket, "time"_sd));
+ test(*timeseries::compressBucket(bucket, "time"_sd));
}
TEST_F(BucketUnpackerTest, EmptyDataRegionInBucketIsTolerated) {
@@ -576,7 +517,6 @@ TEST_F(BucketUnpackerTest, EmptyDataRegionInBucketIsTolerated) {
};
test(bucket);
- test(compress(bucket, "time"_sd));
}
TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnEmptyBucket) {