diff options
author | Matt Boros <matt.boros@mongodb.com> | 2022-02-10 17:05:24 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-10 17:34:57 +0000 |
commit | d89391ea2b1f1dfc6da9a012c4371d65c47717b0 (patch) | |
tree | 9808f2d85fcc9583315da4d75457dc63d691127b | |
parent | 6471c0558b198baeff86e7c87ca856ecc43957c7 (diff) | |
download | mongo-d89391ea2b1f1dfc6da9a012c4371d65c47717b0.tar.gz |
SERVER-63010 Ensure that unpacking measurements doesn't overwrite pushedown addFields that are computed on meta data
-rw-r--r-- | jstests/core/timeseries/timeseries_project.js | 84 | ||||
-rw-r--r-- | src/mongo/db/exec/bucket_unpacker.cpp | 171 | ||||
-rw-r--r-- | src/mongo/db/exec/bucket_unpacker.h | 104 | ||||
-rw-r--r-- | src/mongo/db/exec/bucket_unpacker_test.cpp | 157 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/unpack_bucket_exec_test.cpp | 32 |
6 files changed, 422 insertions, 152 deletions
diff --git a/jstests/core/timeseries/timeseries_project.js b/jstests/core/timeseries/timeseries_project.js new file mode 100644 index 00000000000..b47df95524c --- /dev/null +++ b/jstests/core/timeseries/timeseries_project.js @@ -0,0 +1,84 @@ +/** + * Test the behavior of $project on time-series collections. + * + * @tags: [ + * does_not_support_stepdowns, + * does_not_support_transactions, + * requires_fcv_52, + * ] + */ +(function() { +"use strict"; + +load("jstests/core/timeseries/libs/timeseries.js"); + +const coll = db.timeseries_project; +coll.drop(); +assert.commandWorked( + db.createCollection(coll.getName(), {timeseries: {timeField: 'time', metaField: 'meta'}})); + +const docDate = ISODate(); + +assert.commandWorked(coll.insert({_id: 0, time: docDate, meta: 4, a: {b: 1}, b: 3, c: [{}, {}]})); + +// Check that measurements being unpacked don't overwrite metadata projection pushdown fields. +let result = + coll.aggregate([{ + $project: + {a: 1, b: "$meta", c: {$multiply: [2, "$meta"]}, d: {$multiply: [2, "$meta"]}} + }]) + .toArray(); +assert.docEq(result, [{_id: 0, a: {b: 1}, b: 4, c: 8, d: 8}]); + +// Same as above, but keep the rest of the document. +result = coll.aggregate([{$set: {b: "$meta"}}]).toArray(); +assert.docEq(result, [{_id: 0, time: docDate, meta: 4, a: {b: 1}, b: 4, c: [{}, {}]}]); + +// Check that nested meta project is not overwritten by the unpacked value. +result = coll.aggregate([{$project: {"a.b": "$meta"}}]).toArray(); +assert.docEq(result, [{_id: 0, a: {b: 4}}]); + +// Check that meta project pushed down writes to each value in an array. +result = coll.aggregate([{$project: {"c.a": "$meta"}}]).toArray(); +assert.docEq(result, [{_id: 0, c: [{a: 4}, {a: 4}]}]); + +// Replace meta field with unpacked field. +result = coll.aggregate([{$project: {"meta": "$b"}}]).toArray(); +assert.docEq(result, [{_id: 0, meta: 3}]); + +// Replace meta field with time field. +result = coll.aggregate([{$project: {"meta": "$time"}}]).toArray(); +assert.docEq(result, [{_id: 0, meta: docDate}]); + +// Replace meta field with constant. +result = coll.aggregate([{$project: {"meta": {$const: 5}}}]).toArray(); +assert.docEq(result, [{_id: 0, meta: 5}]); + +// Make sure the time field can be overwritten by the meta field correctly. +result = coll.aggregate([{$set: {time: "$meta"}}]).toArray(); +assert.docEq(result, [{_id: 0, time: 4, meta: 4, a: {b: 1}, b: 3, c: [{}, {}]}]); + +// Check that the time field can be overwritten by the an unpacked field correctly. +result = coll.aggregate([{$set: {time: "$b"}}]).toArray(); +assert.docEq(result, [{_id: 0, time: 3, meta: 4, a: {b: 1}, b: 3, c: [{}, {}]}]); + +// Make sure the time field can be overwritten by a constant correctly. +result = coll.aggregate([{$project: {time: {$const: 5}}}]).toArray(); +assert.docEq(result, [{_id: 0, time: 5}]); + +// Test that a pushed down meta field projection can correctly be excluded. +result = coll.aggregate([{$set: {b: "$meta"}}, {$unset: "a"}]).toArray(); +assert.docEq(result, [{_id: 0, time: docDate, meta: 4, b: 4, c: [{}, {}]}]); + +// Exclude behavior for time field. +result = coll.aggregate([{$set: {b: "$time"}}, {$unset: "a"}]).toArray(); +assert.docEq(result, [{_id: 0, time: docDate, meta: 4, b: docDate, c: [{}, {}]}]); + +// Exclude behavior for consecutive projects. +result = coll.aggregate([{$set: {b: "$meta"}}, {$unset: "meta"}]).toArray(); +assert.docEq(result, [{_id: 0, time: docDate, a: {b: 1}, b: 4, c: [{}, {}]}]); + +// Test that an exclude does not overwrite meta field pushdown. +result = coll.aggregate([{$unset: "b"}, {$set: {b: "$meta"}}]).toArray(); +assert.docEq(result, [{_id: 0, time: docDate, meta: 4, a: {b: 1}, b: 4, c: [{}, {}]}]); +})();
\ No newline at end of file diff --git a/src/mongo/db/exec/bucket_unpacker.cpp b/src/mongo/db/exec/bucket_unpacker.cpp index 782d2ff5235..e47ec3b2648 100644 --- a/src/mongo/db/exec/bucket_unpacker.cpp +++ b/src/mongo/db/exec/bucket_unpacker.cpp @@ -48,6 +48,7 @@ public: virtual void extractSingleMeasurement(MutableDocument& measurement, int j, const BucketSpec& spec, + const std::set<std::string>& unpackFieldsToIncludeExclude, BucketUnpacker::Behavior behavior, const BSONObj& bucket, const Value& metaValue, @@ -99,6 +100,7 @@ public: void extractSingleMeasurement(MutableDocument& measurement, int j, const BucketSpec& spec, + const std::set<std::string>& unpackFieldsToIncludeExclude, BucketUnpacker::Behavior behavior, const BSONObj& bucket, const Value& metaValue, @@ -181,14 +183,16 @@ bool BucketUnpackerV1::getNext(MutableDocument& measurement, return _timeFieldIter.more(); } -void BucketUnpackerV1::extractSingleMeasurement(MutableDocument& measurement, - int j, - const BucketSpec& spec, - BucketUnpacker::Behavior behavior, - const BSONObj& bucket, - const Value& metaValue, - bool includeTimeField, - bool includeMetaField) { +void BucketUnpackerV1::extractSingleMeasurement( + MutableDocument& measurement, + int j, + const BucketSpec& spec, + const std::set<std::string>& unpackFieldsToIncludeExclude, + BucketUnpacker::Behavior behavior, + const BSONObj& bucket, + const Value& metaValue, + bool includeTimeField, + bool includeMetaField) { auto rowKey = std::to_string(j); auto targetIdx = StringData{rowKey}; auto&& dataRegion = bucket.getField(timeseries::kBucketDataFieldName).Obj(); @@ -199,7 +203,7 @@ void BucketUnpackerV1::extractSingleMeasurement(MutableDocument& measurement, for (auto&& dataElem : dataRegion) { auto colName = dataElem.fieldNameStringData(); - if (!determineIncludeField(colName, behavior, spec)) { + if (!determineIncludeField(colName, behavior, unpackFieldsToIncludeExclude)) { continue; } auto value = dataElem[targetIdx]; @@ -230,6 +234,7 @@ public: void extractSingleMeasurement(MutableDocument& measurement, int j, const BucketSpec& spec, + const std::set<std::string>& unpackFieldsToIncludeExclude, BucketUnpacker::Behavior behavior, const BSONObj& bucket, const Value& metaValue, @@ -306,14 +311,16 @@ bool BucketUnpackerV2::getNext(MutableDocument& measurement, return _timeColumn.it != _timeColumn.end; } -void BucketUnpackerV2::extractSingleMeasurement(MutableDocument& measurement, - int j, - const BucketSpec& spec, - BucketUnpacker::Behavior behavior, - const BSONObj& bucket, - const Value& metaValue, - bool includeTimeField, - bool includeMetaField) { +void BucketUnpackerV2::extractSingleMeasurement( + MutableDocument& measurement, + int j, + const BucketSpec& spec, + const std::set<std::string>& unpackFieldsToIncludeExclude, + BucketUnpacker::Behavior behavior, + const BSONObj& bucket, + const Value& metaValue, + bool includeTimeField, + bool includeMetaField) { if (includeTimeField) { auto val = _timeColumn.column[j]; uassert( @@ -340,32 +347,14 @@ std::size_t BucketUnpackerV2::numberOfFields() { return kFixedFieldNumber + _fieldColumns.size(); } -/** - * Erase computed meta projection fields if they are present in the exclusion field set. - */ -void eraseExcludedComputedMetaProjFields(BucketUnpacker::Behavior unpackerBehavior, - BucketSpec* bucketSpec) { - if (unpackerBehavior == BucketUnpacker::Behavior::kExclude && - bucketSpec->computedMetaProjFields.size() > 0) { - for (auto it = bucketSpec->computedMetaProjFields.begin(); - it != bucketSpec->computedMetaProjFields.end();) { - if (bucketSpec->fieldSet.find(*it) != bucketSpec->fieldSet.end()) { - it = bucketSpec->computedMetaProjFields.erase(it); - } else { - it++; - } - } - } -} - } // namespace BucketSpec::BucketSpec(const std::string& timeField, const boost::optional<std::string>& metaField, const std::set<std::string>& fields, - const std::vector<std::string>& computedProjections) - : fieldSet(fields), - computedMetaProjFields(computedProjections), + const std::set<std::string>& computedProjections) + : _fieldSet(fields), + _computedMetaProjFields(computedProjections), _timeField(timeField), _timeFieldHashed(FieldNameHasher().hashedFieldName(_timeField)), _metaField(metaField) { @@ -375,8 +364,8 @@ BucketSpec::BucketSpec(const std::string& timeField, } BucketSpec::BucketSpec(const BucketSpec& other) - : fieldSet(other.fieldSet), - computedMetaProjFields(other.computedMetaProjFields), + : _fieldSet(other._fieldSet), + _computedMetaProjFields(other._computedMetaProjFields), _timeField(other._timeField), _timeFieldHashed(HashedFieldName{_timeField, other._timeFieldHashed->hash()}), _metaField(other._metaField) { @@ -386,8 +375,8 @@ BucketSpec::BucketSpec(const BucketSpec& other) } BucketSpec::BucketSpec(BucketSpec&& other) - : fieldSet(std::move(other.fieldSet)), - computedMetaProjFields(std::move(other.computedMetaProjFields)), + : _fieldSet(std::move(other._fieldSet)), + _computedMetaProjFields(std::move(other._computedMetaProjFields)), _timeField(std::move(other._timeField)), _timeFieldHashed(HashedFieldName{_timeField, other._timeFieldHashed->hash()}), _metaField(std::move(other._metaField)) { @@ -398,8 +387,8 @@ BucketSpec::BucketSpec(BucketSpec&& other) BucketSpec& BucketSpec::operator=(const BucketSpec& other) { if (&other != this) { - fieldSet = other.fieldSet; - computedMetaProjFields = other.computedMetaProjFields; + _fieldSet = other._fieldSet; + _computedMetaProjFields = other._computedMetaProjFields; _timeField = other._timeField; _timeFieldHashed = HashedFieldName{_timeField, other._timeFieldHashed->hash()}; _metaField = other._metaField; @@ -453,14 +442,22 @@ BucketUnpacker::BucketUnpacker(BucketSpec spec, Behavior unpackerBehavior) { void BucketUnpacker::addComputedMetaProjFields(const std::vector<StringData>& computedFieldNames) { for (auto&& field : computedFieldNames) { - _spec.computedMetaProjFields.emplace_back(field.toString()); + _spec.addComputedMetaProjFields(field); // If we're already specifically including fields, we need to add the computed fields to - // the included field set to ensure they are unpacked. + // the included field set to indicate they're in the output doc. if (_unpackerBehavior == BucketUnpacker::Behavior::kInclude) { - _spec.fieldSet.emplace(field); + _spec.addIncludeExcludeField(field); + } else { + // Since exclude is applied after addComputedMetaProjFields, we must erase the new field + // from the include/exclude fields so this doesn't get removed. + _spec.removeIncludeExcludeField(field.toString()); } } + + // Recalculate _includeTimeField, since both computedMetaProjFields and fieldSet may have + // changed. + determineIncludeTimeField(); } Document BucketUnpacker::getNext() { @@ -475,7 +472,7 @@ Document BucketUnpacker::getNext() { measurement, _spec, _metaValue, _includeTimeField, _includeMetaField); // Add computed meta projections. - for (auto&& name : _spec.computedMetaProjFields) { + for (auto&& name : _spec.computedMetaProjFields()) { measurement.addField(name, Value{_computedMetaProjections[name]}); } @@ -492,6 +489,7 @@ Document BucketUnpacker::extractSingleMeasurement(int j) { _unpackingImpl->extractSingleMeasurement(measurement, j, _spec, + fieldsToIncludeExcludeDuringUnpack(), _unpackerBehavior, _bucket, _metaValue, @@ -499,7 +497,7 @@ Document BucketUnpacker::extractSingleMeasurement(int j) { _includeMetaField); // Add computed meta projections. - for (auto&& name : _spec.computedMetaProjFields) { + for (auto&& name : _spec.computedMetaProjFields()) { measurement.addField(name, Value{_computedMetaProjections[name]}); } @@ -578,16 +576,15 @@ void BucketUnpacker::reset(BSONObj&& bucket) { // Includes a field when '_unpackerBehavior' is 'kInclude' and it's found in 'fieldSet' or // _unpackerBehavior is 'kExclude' and it's not found in 'fieldSet'. - if (determineIncludeField(colName, _unpackerBehavior, _spec)) { + if (determineIncludeField( + colName, _unpackerBehavior, fieldsToIncludeExcludeDuringUnpack())) { _unpackingImpl->addField(elem); } } // Update computed meta projections with values from this bucket. - if (!_spec.computedMetaProjFields.empty()) { - for (auto&& name : _spec.computedMetaProjFields) { - _computedMetaProjections[name] = _bucket[name]; - } + for (auto&& name : _spec.computedMetaProjFields()) { + _computedMetaProjections[name] = _bucket[name]; } // Save the measurement count for the bucket. @@ -630,13 +627,71 @@ int BucketUnpacker::computeMeasurementCount(const BSONObj& bucket, StringData ti } } +void BucketUnpacker::determineIncludeTimeField() { + const bool isInclude = _unpackerBehavior == BucketUnpacker::Behavior::kInclude; + const bool fieldSetContainsTime = + _spec.fieldSet().find(_spec.timeField()) != _spec.fieldSet().end(); + + const auto& metaProjFields = _spec.computedMetaProjFields(); + const bool metaProjContains = metaProjFields.find(_spec.timeField()) != metaProjFields.cend(); + + // If computedMetaProjFields contains the time field, we exclude it from unpacking no matter + // what, since it will be overwritten anyway. + _includeTimeField = isInclude == fieldSetContainsTime && !metaProjContains; +} + +void BucketUnpacker::eraseMetaFromFieldSetAndDetermineIncludeMeta() { + if (!_spec.metaField() || + _spec.computedMetaProjFields().find(*_spec.metaField()) != + _spec.computedMetaProjFields().cend()) { + _includeMetaField = false; + } else if (auto itr = _spec.fieldSet().find(*_spec.metaField()); + itr != _spec.fieldSet().end()) { + _spec.removeIncludeExcludeField(*_spec.metaField()); + _includeMetaField = _unpackerBehavior == BucketUnpacker::Behavior::kInclude; + } else { + _includeMetaField = _unpackerBehavior == BucketUnpacker::Behavior::kExclude; + } +} + +void BucketUnpacker::eraseExcludedComputedMetaProjFields() { + if (_unpackerBehavior == BucketUnpacker::Behavior::kExclude) { + for (const auto& field : _spec.fieldSet()) { + _spec.eraseFromComputedMetaProjFields(field); + } + } +} + void BucketUnpacker::setBucketSpecAndBehavior(BucketSpec&& bucketSpec, Behavior behavior) { - _includeMetaField = eraseMetaFromFieldSetAndDetermineIncludeMeta(behavior, &bucketSpec); - _includeTimeField = determineIncludeTimeField(behavior, &bucketSpec); _unpackerBehavior = behavior; - - eraseExcludedComputedMetaProjFields(behavior, &bucketSpec); _spec = std::move(bucketSpec); + + eraseMetaFromFieldSetAndDetermineIncludeMeta(); + determineIncludeTimeField(); + eraseExcludedComputedMetaProjFields(); +} + +const std::set<std::string>& BucketUnpacker::fieldsToIncludeExcludeDuringUnpack() { + if (_unpackFieldsToIncludeExclude) { + return *_unpackFieldsToIncludeExclude; + } + + _unpackFieldsToIncludeExclude = std::set<std::string>(); + const auto& metaProjFields = _spec.computedMetaProjFields(); + if (_unpackerBehavior == BucketUnpacker::Behavior::kInclude) { + // For include, we unpack fieldSet - metaProjFields. + for (auto&& field : _spec.fieldSet()) { + if (metaProjFields.find(field) == metaProjFields.cend()) { + _unpackFieldsToIncludeExclude->insert(field); + } + } + } else { + // For exclude, we unpack everything but fieldSet + metaProjFields. + _unpackFieldsToIncludeExclude->insert(_spec.fieldSet().begin(), _spec.fieldSet().end()); + _unpackFieldsToIncludeExclude->insert(metaProjFields.begin(), metaProjFields.end()); + } + + return *_unpackFieldsToIncludeExclude; } const std::set<StringData> BucketUnpacker::reservedBucketFieldNames = { diff --git a/src/mongo/db/exec/bucket_unpacker.h b/src/mongo/db/exec/bucket_unpacker.h index 2f5fd70b785..c28b9c340c0 100644 --- a/src/mongo/db/exec/bucket_unpacker.h +++ b/src/mongo/db/exec/bucket_unpacker.h @@ -38,7 +38,16 @@ namespace mongo { /** - * Carries parameters for unpacking a bucket. + * Carries parameters for unpacking a bucket. The order of operations applied to determine which + * fields are in the final document are: + * If we are in include mode: + * 1. Unpack all fields from the bucket. + * 2. Remove any fields not in _fieldSet, since we are in include mode. + * 3. Add fields from _computedMetaProjFields. + * If we are in exclude mode: + * 1. Unpack all fields from the bucket. + * 2. Add fields from _computedMetaProjFields. + * 3. Remove any fields in _fieldSet, since we are in exclude mode. */ class BucketSpec { public: @@ -46,7 +55,7 @@ public: BucketSpec(const std::string& timeField, const boost::optional<std::string>& metaField, const std::set<std::string>& fields = {}, - const std::vector<std::string>& computedProjections = {}); + const std::set<std::string>& computedProjections = {}); BucketSpec(const BucketSpec&); BucketSpec(BucketSpec&&); @@ -64,14 +73,42 @@ public: const boost::optional<std::string>& metaField() const; boost::optional<HashedFieldName> metaFieldHashed() const; + void setFieldSet(std::set<std::string>& fieldSet) { + _fieldSet = std::move(fieldSet); + } + + void addIncludeExcludeField(const StringData& field) { + _fieldSet.emplace(field); + } + + void removeIncludeExcludeField(const std::string& field) { + _fieldSet.erase(field); + } + + const std::set<std::string>& fieldSet() const { + return _fieldSet; + } + + void addComputedMetaProjFields(const StringData& field) { + _computedMetaProjFields.emplace(field); + } + + const std::set<std::string>& computedMetaProjFields() const { + return _computedMetaProjFields; + } + + void eraseFromComputedMetaProjFields(const std::string& field) { + _computedMetaProjFields.erase(field); + } + +private: // The set of field names in the data region that should be included or excluded. - std::set<std::string> fieldSet; + std::set<std::string> _fieldSet; - // Vector of computed meta field projection names. Added at the end of materialized + // Set of computed meta field projection names. Added at the end of materialized // measurements. - std::vector<std::string> computedMetaProjFields; + std::set<std::string> _computedMetaProjFields; -private: std::string _timeField; boost::optional<HashedFieldName> _timeFieldHashed; @@ -170,9 +207,23 @@ public: // Add computed meta projection names to the bucket specification. void addComputedMetaProjFields(const std::vector<StringData>& computedFieldNames); + // Fill _spec.unpackFieldsToIncludeExclude with final list of fields to include/exclude during + // unpacking. Only calculates the list the first time it is called. + const std::set<std::string>& fieldsToIncludeExcludeDuringUnpack(); + class UnpackingImpl; private: + // Determines if timestamp values should be included in the materialized measurements. + void determineIncludeTimeField(); + + // Removes metaField from the field set and determines whether metaField should be + // included in the materialized measurements. + void eraseMetaFromFieldSetAndDetermineIncludeMeta(); + + // Erase computed meta projection fields if they are present in the exclusion field set. + void eraseExcludedComputedMetaProjFields(); + BucketSpec _spec; Behavior _unpackerBehavior; @@ -200,44 +251,21 @@ private: // The number of measurements in the bucket. int32_t _numberOfMeasurements = 0; -}; -/** - * Removes metaField from the field set and returns a boolean indicating whether metaField should be - * included in the materialized measurements. Always returns false if metaField does not exist. - */ -inline bool eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior unpackerBehavior, - BucketSpec* bucketSpec) { - if (!bucketSpec->metaField() || - std::find(bucketSpec->computedMetaProjFields.cbegin(), - bucketSpec->computedMetaProjFields.cend(), - *bucketSpec->metaField()) != bucketSpec->computedMetaProjFields.cend()) { - return false; - } else if (auto itr = bucketSpec->fieldSet.find(*bucketSpec->metaField()); - itr != bucketSpec->fieldSet.end()) { - bucketSpec->fieldSet.erase(itr); - return unpackerBehavior == BucketUnpacker::Behavior::kInclude; - } else { - return unpackerBehavior == BucketUnpacker::Behavior::kExclude; - } -} - -/** - * Determines if timestamp values should be included in the materialized measurements. - */ -inline bool determineIncludeTimeField(BucketUnpacker::Behavior unpackerBehavior, - BucketSpec* bucketSpec) { - return (unpackerBehavior == BucketUnpacker::Behavior::kInclude) == - (bucketSpec->fieldSet.find(bucketSpec->timeField()) != bucketSpec->fieldSet.end()); -} + // Final list of fields to include/exclude during unpacking. This is computed once during the + // first doGetNext call so we don't have to recalculate every time we reach a new bucket. + boost::optional<std::set<std::string>> _unpackFieldsToIncludeExclude = boost::none; +}; /** * Determines if an arbitrary field should be included in the materialized measurements. */ inline bool determineIncludeField(StringData fieldName, BucketUnpacker::Behavior unpackerBehavior, - const BucketSpec& bucketSpec) { - return (unpackerBehavior == BucketUnpacker::Behavior::kInclude) == - (bucketSpec.fieldSet.find(fieldName.toString()) != bucketSpec.fieldSet.end()); + const std::set<std::string>& unpackFieldsToIncludeExclude) { + const bool isInclude = unpackerBehavior == BucketUnpacker::Behavior::kInclude; + const bool unpackFieldsContains = unpackFieldsToIncludeExclude.find(fieldName.toString()) != + unpackFieldsToIncludeExclude.cend(); + return isInclude == unpackFieldsContains; } } // namespace mongo diff --git a/src/mongo/db/exec/bucket_unpacker_test.cpp b/src/mongo/db/exec/bucket_unpacker_test.cpp index b736ee7ebba..8ee0f4e05f5 100644 --- a/src/mongo/db/exec/bucket_unpacker_test.cpp +++ b/src/mongo/db/exec/bucket_unpacker_test.cpp @@ -600,65 +600,148 @@ TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnEmptyBucket) { TEST_F(BucketUnpackerTest, EraseMetaFromFieldSetAndDetermineIncludeMeta) { // Tests a missing 'metaField' in the spec. std::set<std::string> empFields{}; - auto spec = BucketSpec{kUserDefinedTimeName.toString(), boost::none, std::move(empFields)}; - ASSERT_FALSE( - eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior::kInclude, &spec)); + auto bucket = fromjson(R"( +{ + control: {version: 1}, + data: { + _id: {'0':4, '1':5, '2':6}, + time: {'0':4, '1': 5, '2': 6} + } +})"); + auto unpacker = makeBucketUnpacker(empFields, + BucketUnpacker::Behavior::kInclude, + std::move(bucket), + kUserDefinedMetaName.toString()); // Tests a spec with 'metaField' in include list. std::set<std::string> fields{kUserDefinedMetaName.toString()}; + auto specWithMetaInclude = BucketSpec{ kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(fields)}; - ASSERT_TRUE(eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior::kInclude, - &specWithMetaInclude)); - ASSERT_EQ(specWithMetaInclude.fieldSet.count(kUserDefinedMetaName.toString()), 0); - - std::set<std::string> fieldsNoMeta{"foo"}; - auto specWithFooInclude = BucketSpec{ - kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(fieldsNoMeta)}; - ASSERT_TRUE(eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior::kExclude, - &specWithFooInclude)); - ASSERT_FALSE(eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior::kInclude, - &specWithFooInclude)); + // This calls eraseMetaFromFieldSetAndDetermineIncludeMeta. + unpacker.setBucketSpecAndBehavior(std::move(specWithMetaInclude), + BucketUnpacker::Behavior::kInclude); + ASSERT_TRUE(unpacker.includeMetaField()); + ASSERT_EQ(unpacker.bucketSpec().fieldSet().count(kUserDefinedMetaName.toString()), 0); + + std::set<std::string> fieldsNoMetaInclude{"foo"}; + auto specWithFooInclude = BucketSpec{kUserDefinedTimeName.toString(), + kUserDefinedMetaName.toString(), + std::move(fieldsNoMetaInclude)}; + + std::set<std::string> fieldsNoMetaExclude{"foo"}; + auto specWithFooExclude = BucketSpec{kUserDefinedTimeName.toString(), + kUserDefinedMetaName.toString(), + std::move(fieldsNoMetaExclude)}; + + unpacker.setBucketSpecAndBehavior(std::move(specWithFooExclude), + BucketUnpacker::Behavior::kExclude); + ASSERT_TRUE(unpacker.includeMetaField()); + unpacker.setBucketSpecAndBehavior(std::move(specWithFooInclude), + BucketUnpacker::Behavior::kInclude); + ASSERT_FALSE(unpacker.includeMetaField()); // Tests a spec with 'metaField' not in exclude list. std::set<std::string> excludeFields{}; - auto specWithMetaExclude = BucketSpec{ + auto specMetaExclude = BucketSpec{ kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(excludeFields)}; - ASSERT_TRUE(eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior::kExclude, - &specWithMetaExclude)); - ASSERT_FALSE(eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior::kInclude, - &specWithMetaExclude)); + auto specMetaInclude = specMetaExclude; + unpacker.setBucketSpecAndBehavior(std::move(specMetaExclude), + BucketUnpacker::Behavior::kExclude); + ASSERT_TRUE(unpacker.includeMetaField()); + unpacker.setBucketSpecAndBehavior(std::move(specMetaInclude), + BucketUnpacker::Behavior::kInclude); + ASSERT_FALSE(unpacker.includeMetaField()); } TEST_F(BucketUnpackerTest, DetermineIncludeTimeField) { - std::set<std::string> fields{kUserDefinedTimeName.toString()}; - auto spec = BucketSpec{ - kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(fields)}; - ASSERT_TRUE(determineIncludeTimeField(BucketUnpacker::Behavior::kInclude, &spec)); - ASSERT_FALSE(determineIncludeTimeField(BucketUnpacker::Behavior::kExclude, &spec)); + auto bucket = fromjson(R"( +{ + control: {version: 1}, + data: { + _id: {'0':4, '1':5, '2':6}, + time: {'0':4, '1': 5, '2': 6} + } +})"); + std::set<std::string> unpackerFields{kUserDefinedTimeName.toString()}; + auto unpacker = makeBucketUnpacker(unpackerFields, + BucketUnpacker::Behavior::kInclude, + std::move(bucket), + kUserDefinedMetaName.toString()); + + std::set<std::string> includeFields{kUserDefinedTimeName.toString()}; + auto includeSpec = BucketSpec{ + kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(includeFields)}; + // This calls determineIncludeTimeField. + unpacker.setBucketSpecAndBehavior(std::move(includeSpec), BucketUnpacker::Behavior::kInclude); + ASSERT_TRUE(unpacker.includeTimeField()); + + std::set<std::string> excludeFields{kUserDefinedTimeName.toString()}; + auto excludeSpec = BucketSpec{ + kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(excludeFields)}; + unpacker.setBucketSpecAndBehavior(std::move(excludeSpec), BucketUnpacker::Behavior::kExclude); + ASSERT_FALSE(unpacker.includeTimeField()); } -TEST_F(BucketUnpackerTest, DetermineIncludeField) { +TEST_F(BucketUnpackerTest, DetermineIncludeFieldIncludeMode) { std::string includedMeasurementField = "measurementField1"; std::string excludedMeasurementField = "measurementField2"; std::set<std::string> fields{kUserDefinedTimeName.toString(), includedMeasurementField}; + + auto bucket = Document{{"_id", 1}, + {"control", Document{{"version", 1}}}, + {"meta", Document{{"m1", 999}, {"m2", 9999}}}, + {"data", Document{}}} + .toBson(); + auto spec = BucketSpec{ kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(fields)}; - ASSERT_TRUE(determineIncludeField( - kUserDefinedTimeName.toString(), BucketUnpacker::Behavior::kInclude, spec)); - ASSERT_FALSE(determineIncludeField( - kUserDefinedTimeName.toString(), BucketUnpacker::Behavior::kExclude, spec)); + BucketUnpacker includeUnpacker; + includeUnpacker.setBucketSpecAndBehavior(std::move(spec), BucketUnpacker::Behavior::kInclude); + // Need to call reset so that the private method calculateFieldsToIncludeExcludeDuringUnpack() + // is called, and _unpackFieldsToIncludeExclude gets filled with fields. + includeUnpacker.reset(std::move(bucket)); + // Now the spec knows which fields to include/exclude. + + ASSERT_TRUE(determineIncludeField(kUserDefinedTimeName, + BucketUnpacker::Behavior::kInclude, + includeUnpacker.fieldsToIncludeExcludeDuringUnpack())); + ASSERT_TRUE(determineIncludeField(includedMeasurementField, + BucketUnpacker::Behavior::kInclude, + includeUnpacker.fieldsToIncludeExcludeDuringUnpack())); + ASSERT_FALSE(determineIncludeField(excludedMeasurementField, + BucketUnpacker::Behavior::kInclude, + includeUnpacker.fieldsToIncludeExcludeDuringUnpack())); +} + +TEST_F(BucketUnpackerTest, DetermineIncludeFieldExcludeMode) { + std::string includedMeasurementField = "measurementField1"; + std::string excludedMeasurementField = "measurementField2"; + std::set<std::string> fields{kUserDefinedTimeName.toString(), includedMeasurementField}; + + auto bucket = Document{{"_id", 1}, + {"control", Document{{"version", 1}}}, + {"meta", Document{{"m1", 999}, {"m2", 9999}}}, + {"data", Document{}}} + .toBson(); - ASSERT_TRUE( - determineIncludeField(includedMeasurementField, BucketUnpacker::Behavior::kInclude, spec)); - ASSERT_FALSE( - determineIncludeField(includedMeasurementField, BucketUnpacker::Behavior::kExclude, spec)); + auto spec = BucketSpec{ + kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(fields)}; - ASSERT_FALSE( - determineIncludeField(excludedMeasurementField, BucketUnpacker::Behavior::kInclude, spec)); - ASSERT_TRUE( - determineIncludeField(excludedMeasurementField, BucketUnpacker::Behavior::kExclude, spec)); + BucketUnpacker excludeUnpacker; + excludeUnpacker.setBucketSpecAndBehavior(std::move(spec), BucketUnpacker::Behavior::kExclude); + excludeUnpacker.reset(std::move(bucket)); + + ASSERT_FALSE(determineIncludeField(kUserDefinedTimeName, + BucketUnpacker::Behavior::kExclude, + excludeUnpacker.fieldsToIncludeExcludeDuringUnpack())); + ASSERT_FALSE(determineIncludeField(includedMeasurementField, + BucketUnpacker::Behavior::kExclude, + excludeUnpacker.fieldsToIncludeExcludeDuringUnpack())); + ASSERT_TRUE(determineIncludeField(excludedMeasurementField, + BucketUnpacker::Behavior::kExclude, + excludeUnpacker.fieldsToIncludeExcludeDuringUnpack())); } /** diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp index ee695db3c96..803b1d833bd 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp @@ -214,7 +214,7 @@ void optimizePrefix(Pipeline::SourceContainer::iterator itr, Pipeline::SourceCon // Returns whether 'field' depends on a pushed down $addFields or computed $project. bool fieldIsComputed(BucketSpec spec, std::string field) { return std::any_of( - spec.computedMetaProjFields.begin(), spec.computedMetaProjFields.end(), [&](auto& s) { + spec.computedMetaProjFields().begin(), spec.computedMetaProjFields().end(), [&](auto& s) { return s == field || expression::isPathPrefixOf(field, s) || expression::isPathPrefixOf(s, field); }); @@ -268,7 +268,7 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF uassert(5346503, "include or exclude field element must be a single-element field path", field.find('.') == std::string::npos); - bucketSpec.fieldSet.emplace(field); + bucketSpec.addIncludeExcludeField(field); } unpackerBehavior = fieldName == kInclude ? BucketUnpacker::Behavior::kInclude : BucketUnpacker::Behavior::kExclude; @@ -314,7 +314,7 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF uassert(5509902, "computedMetaProjFields field element must be a single-element field path", field.find('.') == std::string::npos); - bucketSpec.computedMetaProjFields.emplace_back(field); + bucketSpec.addComputedMetaProjFields(field); } } else { uasserted(5346506, @@ -379,18 +379,18 @@ void DocumentSourceInternalUnpackBucket::serializeToArray( MutableDocument out; auto behavior = _bucketUnpacker.behavior() == BucketUnpacker::Behavior::kInclude ? kInclude : kExclude; - auto&& spec = _bucketUnpacker.bucketSpec(); + const auto& spec = _bucketUnpacker.bucketSpec(); std::vector<Value> fields; - for (auto&& field : spec.fieldSet) { + for (auto&& field : spec.fieldSet()) { fields.emplace_back(field); } if (((_bucketUnpacker.includeMetaField() && _bucketUnpacker.behavior() == BucketUnpacker::Behavior::kInclude) || (!_bucketUnpacker.includeMetaField() && _bucketUnpacker.behavior() == BucketUnpacker::Behavior::kExclude && spec.metaField())) && - std::find(spec.computedMetaProjFields.cbegin(), - spec.computedMetaProjFields.cend(), - *spec.metaField()) == spec.computedMetaProjFields.cend()) + std::find(spec.computedMetaProjFields().cbegin(), + spec.computedMetaProjFields().cend(), + *spec.metaField()) == spec.computedMetaProjFields().cend()) fields.emplace_back(*spec.metaField()); out.addField(behavior, Value{std::move(fields)}); @@ -400,11 +400,11 @@ void DocumentSourceInternalUnpackBucket::serializeToArray( } out.addField(kBucketMaxSpanSeconds, Value{_bucketMaxSpanSeconds}); - if (!spec.computedMetaProjFields.empty()) + if (!spec.computedMetaProjFields().empty()) out.addField("computedMetaProjFields", Value{[&] { std::vector<Value> compFields; - std::transform(spec.computedMetaProjFields.cbegin(), - spec.computedMetaProjFields.cend(), + std::transform(spec.computedMetaProjFields().cbegin(), + spec.computedMetaProjFields().cend(), std::back_inserter(compFields), [](auto&& projString) { return Value{projString}; }); return compFields; @@ -506,7 +506,7 @@ void DocumentSourceInternalUnpackBucket::internalizeProject(const BSONObj& proje // Update '_bucketUnpacker' state with the new fields and behavior. auto spec = _bucketUnpacker.bucketSpec(); - spec.fieldSet = std::move(fields); + spec.setFieldSet(fields); _bucketUnpacker.setBucketSpecAndBehavior(std::move(spec), isInclusion ? BucketUnpacker::Behavior::kInclude : BucketUnpacker::Behavior::kExclude); @@ -514,7 +514,7 @@ void DocumentSourceInternalUnpackBucket::internalizeProject(const BSONObj& proje std::pair<BSONObj, bool> DocumentSourceInternalUnpackBucket::extractOrBuildProjectToInternalize( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) const { - if (std::next(itr) == container->end() || !_bucketUnpacker.bucketSpec().fieldSet.empty()) { + if (std::next(itr) == container->end() || !_bucketUnpacker.bucketSpec().fieldSet().empty()) { // There is no project to internalize or there are already fields being included/excluded. return {BSONObj{}, false}; } diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/unpack_bucket_exec_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/unpack_bucket_exec_test.cpp index dbd56d36e20..06217344dad 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/unpack_bucket_exec_test.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/unpack_bucket_exec_test.cpp @@ -238,7 +238,7 @@ TEST_F(InternalUnpackBucketExecTest, UnpackNeitherIncludeNorExcludeDefaultsToEmp auto source = DocumentSourceMock::createForTest( { R"({ - control: {'version': 1}, + control: {'version': 1}, meta: {'m1': 999, 'm2': 9999}, data: { _id: {'0':1, '1':2}, @@ -248,7 +248,7 @@ TEST_F(InternalUnpackBucketExecTest, UnpackNeitherIncludeNorExcludeDefaultsToEmp } })", R"({ - control: {'version': 1}, + control: {'version': 1}, meta: {m1: 9, m2: 9, m3: 9}, data: { _id: {'0':3, '1':4}, @@ -662,7 +662,7 @@ TEST_F(InternalUnpackBucketExecTest, BucketUnpackerHandlesMissingMetadata) { { R"( { - control: {'version': 1}, + control: {'version': 1}, meta: { 'm1': 999, 'm2': 9999 }, @@ -673,7 +673,7 @@ TEST_F(InternalUnpackBucketExecTest, BucketUnpackerHandlesMissingMetadata) { })", R"( { - control: {'version': 1}, + control: {'version': 1}, data: { _id: {'1':4, '0':5, '2':6}, time: {'1':4, '0': 5, '2': 6} @@ -883,9 +883,19 @@ TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsIncludeMeta) { ASSERT_BSONOBJ_EQ(array[0].getDocument().toBson(), bson); } -TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsComputedMetaProjFields) { +TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsComputedMetaProjFieldsInclude) { auto bson = fromjson( - "{$_internalUnpackBucket: {exclude: [], timeField: 'time', metaField: 'meta', " + "{$_internalUnpackBucket: {include: [], timeField: 'time', metaField: 'meta', " + "bucketMaxSpanSeconds: 3600, computedMetaProjFields: ['a', 'b', 'c']}}"); + auto array = std::vector<Value>{}; + DocumentSourceInternalUnpackBucket::createFromBsonInternal(bson.firstElement(), getExpCtx()) + ->serializeToArray(array); + ASSERT_BSONOBJ_EQ(array[0].getDocument().toBson(), bson); +} + +TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsComputedMetaProjFieldsIncludeWithCompute) { + auto bson = fromjson( + "{$_internalUnpackBucket: {include: ['a', 'b', 'c'], timeField: 'time', metaField: 'meta', " "bucketMaxSpanSeconds: 3600, computedMetaProjFields: ['a', 'b', 'c']}}"); auto array = std::vector<Value>{}; DocumentSourceInternalUnpackBucket::createFromBsonInternal(bson.firstElement(), getExpCtx()) @@ -893,6 +903,16 @@ TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsComputedMetaProjFields) { ASSERT_BSONOBJ_EQ(array[0].getDocument().toBson(), bson); } +TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsComputedMetaProjFieldsExclude) { + auto bson = fromjson( + "{$_internalUnpackBucket: {exclude: [], timeField: 'time', metaField: 'meta', " + "bucketMaxSpanSeconds: 3600, computedMetaProjFields: ['a']}}"); + auto array = std::vector<Value>{}; + DocumentSourceInternalUnpackBucket::createFromBsonInternal(bson.firstElement(), getExpCtx()) + ->serializeToArray(array); + ASSERT_BSONOBJ_EQ(array[0].getDocument().toBson(), bson); +} + TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsComputedMetaProjFieldOverridingMeta) { auto bson = fromjson( "{$_internalUnpackBucket: {exclude: [], timeField: 'time', metaField: 'meta', " |