summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Boros <matt.boros@mongodb.com>2022-02-10 17:05:24 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-02-10 17:34:57 +0000
commitd89391ea2b1f1dfc6da9a012c4371d65c47717b0 (patch)
tree9808f2d85fcc9583315da4d75457dc63d691127b
parent6471c0558b198baeff86e7c87ca856ecc43957c7 (diff)
downloadmongo-d89391ea2b1f1dfc6da9a012c4371d65c47717b0.tar.gz
SERVER-63010 Ensure that unpacking measurements doesn't overwrite pushedown addFields that are computed on meta data
-rw-r--r--jstests/core/timeseries/timeseries_project.js84
-rw-r--r--src/mongo/db/exec/bucket_unpacker.cpp171
-rw-r--r--src/mongo/db/exec/bucket_unpacker.h104
-rw-r--r--src/mongo/db/exec/bucket_unpacker_test.cpp157
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp26
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/unpack_bucket_exec_test.cpp32
6 files changed, 422 insertions, 152 deletions
diff --git a/jstests/core/timeseries/timeseries_project.js b/jstests/core/timeseries/timeseries_project.js
new file mode 100644
index 00000000000..b47df95524c
--- /dev/null
+++ b/jstests/core/timeseries/timeseries_project.js
@@ -0,0 +1,84 @@
+/**
+ * Test the behavior of $project on time-series collections.
+ *
+ * @tags: [
+ * does_not_support_stepdowns,
+ * does_not_support_transactions,
+ * requires_fcv_52,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/core/timeseries/libs/timeseries.js");
+
+const coll = db.timeseries_project;
+coll.drop();
+assert.commandWorked(
+ db.createCollection(coll.getName(), {timeseries: {timeField: 'time', metaField: 'meta'}}));
+
+const docDate = ISODate();
+
+assert.commandWorked(coll.insert({_id: 0, time: docDate, meta: 4, a: {b: 1}, b: 3, c: [{}, {}]}));
+
+// Check that measurements being unpacked don't overwrite metadata projection pushdown fields.
+let result =
+ coll.aggregate([{
+ $project:
+ {a: 1, b: "$meta", c: {$multiply: [2, "$meta"]}, d: {$multiply: [2, "$meta"]}}
+ }])
+ .toArray();
+assert.docEq(result, [{_id: 0, a: {b: 1}, b: 4, c: 8, d: 8}]);
+
+// Same as above, but keep the rest of the document.
+result = coll.aggregate([{$set: {b: "$meta"}}]).toArray();
+assert.docEq(result, [{_id: 0, time: docDate, meta: 4, a: {b: 1}, b: 4, c: [{}, {}]}]);
+
+// Check that nested meta project is not overwritten by the unpacked value.
+result = coll.aggregate([{$project: {"a.b": "$meta"}}]).toArray();
+assert.docEq(result, [{_id: 0, a: {b: 4}}]);
+
+// Check that meta project pushed down writes to each value in an array.
+result = coll.aggregate([{$project: {"c.a": "$meta"}}]).toArray();
+assert.docEq(result, [{_id: 0, c: [{a: 4}, {a: 4}]}]);
+
+// Replace meta field with unpacked field.
+result = coll.aggregate([{$project: {"meta": "$b"}}]).toArray();
+assert.docEq(result, [{_id: 0, meta: 3}]);
+
+// Replace meta field with time field.
+result = coll.aggregate([{$project: {"meta": "$time"}}]).toArray();
+assert.docEq(result, [{_id: 0, meta: docDate}]);
+
+// Replace meta field with constant.
+result = coll.aggregate([{$project: {"meta": {$const: 5}}}]).toArray();
+assert.docEq(result, [{_id: 0, meta: 5}]);
+
+// Make sure the time field can be overwritten by the meta field correctly.
+result = coll.aggregate([{$set: {time: "$meta"}}]).toArray();
+assert.docEq(result, [{_id: 0, time: 4, meta: 4, a: {b: 1}, b: 3, c: [{}, {}]}]);
+
+// Check that the time field can be overwritten by the an unpacked field correctly.
+result = coll.aggregate([{$set: {time: "$b"}}]).toArray();
+assert.docEq(result, [{_id: 0, time: 3, meta: 4, a: {b: 1}, b: 3, c: [{}, {}]}]);
+
+// Make sure the time field can be overwritten by a constant correctly.
+result = coll.aggregate([{$project: {time: {$const: 5}}}]).toArray();
+assert.docEq(result, [{_id: 0, time: 5}]);
+
+// Test that a pushed down meta field projection can correctly be excluded.
+result = coll.aggregate([{$set: {b: "$meta"}}, {$unset: "a"}]).toArray();
+assert.docEq(result, [{_id: 0, time: docDate, meta: 4, b: 4, c: [{}, {}]}]);
+
+// Exclude behavior for time field.
+result = coll.aggregate([{$set: {b: "$time"}}, {$unset: "a"}]).toArray();
+assert.docEq(result, [{_id: 0, time: docDate, meta: 4, b: docDate, c: [{}, {}]}]);
+
+// Exclude behavior for consecutive projects.
+result = coll.aggregate([{$set: {b: "$meta"}}, {$unset: "meta"}]).toArray();
+assert.docEq(result, [{_id: 0, time: docDate, a: {b: 1}, b: 4, c: [{}, {}]}]);
+
+// Test that an exclude does not overwrite meta field pushdown.
+result = coll.aggregate([{$unset: "b"}, {$set: {b: "$meta"}}]).toArray();
+assert.docEq(result, [{_id: 0, time: docDate, meta: 4, a: {b: 1}, b: 4, c: [{}, {}]}]);
+})(); \ No newline at end of file
diff --git a/src/mongo/db/exec/bucket_unpacker.cpp b/src/mongo/db/exec/bucket_unpacker.cpp
index 782d2ff5235..e47ec3b2648 100644
--- a/src/mongo/db/exec/bucket_unpacker.cpp
+++ b/src/mongo/db/exec/bucket_unpacker.cpp
@@ -48,6 +48,7 @@ public:
virtual void extractSingleMeasurement(MutableDocument& measurement,
int j,
const BucketSpec& spec,
+ const std::set<std::string>& unpackFieldsToIncludeExclude,
BucketUnpacker::Behavior behavior,
const BSONObj& bucket,
const Value& metaValue,
@@ -99,6 +100,7 @@ public:
void extractSingleMeasurement(MutableDocument& measurement,
int j,
const BucketSpec& spec,
+ const std::set<std::string>& unpackFieldsToIncludeExclude,
BucketUnpacker::Behavior behavior,
const BSONObj& bucket,
const Value& metaValue,
@@ -181,14 +183,16 @@ bool BucketUnpackerV1::getNext(MutableDocument& measurement,
return _timeFieldIter.more();
}
-void BucketUnpackerV1::extractSingleMeasurement(MutableDocument& measurement,
- int j,
- const BucketSpec& spec,
- BucketUnpacker::Behavior behavior,
- const BSONObj& bucket,
- const Value& metaValue,
- bool includeTimeField,
- bool includeMetaField) {
+void BucketUnpackerV1::extractSingleMeasurement(
+ MutableDocument& measurement,
+ int j,
+ const BucketSpec& spec,
+ const std::set<std::string>& unpackFieldsToIncludeExclude,
+ BucketUnpacker::Behavior behavior,
+ const BSONObj& bucket,
+ const Value& metaValue,
+ bool includeTimeField,
+ bool includeMetaField) {
auto rowKey = std::to_string(j);
auto targetIdx = StringData{rowKey};
auto&& dataRegion = bucket.getField(timeseries::kBucketDataFieldName).Obj();
@@ -199,7 +203,7 @@ void BucketUnpackerV1::extractSingleMeasurement(MutableDocument& measurement,
for (auto&& dataElem : dataRegion) {
auto colName = dataElem.fieldNameStringData();
- if (!determineIncludeField(colName, behavior, spec)) {
+ if (!determineIncludeField(colName, behavior, unpackFieldsToIncludeExclude)) {
continue;
}
auto value = dataElem[targetIdx];
@@ -230,6 +234,7 @@ public:
void extractSingleMeasurement(MutableDocument& measurement,
int j,
const BucketSpec& spec,
+ const std::set<std::string>& unpackFieldsToIncludeExclude,
BucketUnpacker::Behavior behavior,
const BSONObj& bucket,
const Value& metaValue,
@@ -306,14 +311,16 @@ bool BucketUnpackerV2::getNext(MutableDocument& measurement,
return _timeColumn.it != _timeColumn.end;
}
-void BucketUnpackerV2::extractSingleMeasurement(MutableDocument& measurement,
- int j,
- const BucketSpec& spec,
- BucketUnpacker::Behavior behavior,
- const BSONObj& bucket,
- const Value& metaValue,
- bool includeTimeField,
- bool includeMetaField) {
+void BucketUnpackerV2::extractSingleMeasurement(
+ MutableDocument& measurement,
+ int j,
+ const BucketSpec& spec,
+ const std::set<std::string>& unpackFieldsToIncludeExclude,
+ BucketUnpacker::Behavior behavior,
+ const BSONObj& bucket,
+ const Value& metaValue,
+ bool includeTimeField,
+ bool includeMetaField) {
if (includeTimeField) {
auto val = _timeColumn.column[j];
uassert(
@@ -340,32 +347,14 @@ std::size_t BucketUnpackerV2::numberOfFields() {
return kFixedFieldNumber + _fieldColumns.size();
}
-/**
- * Erase computed meta projection fields if they are present in the exclusion field set.
- */
-void eraseExcludedComputedMetaProjFields(BucketUnpacker::Behavior unpackerBehavior,
- BucketSpec* bucketSpec) {
- if (unpackerBehavior == BucketUnpacker::Behavior::kExclude &&
- bucketSpec->computedMetaProjFields.size() > 0) {
- for (auto it = bucketSpec->computedMetaProjFields.begin();
- it != bucketSpec->computedMetaProjFields.end();) {
- if (bucketSpec->fieldSet.find(*it) != bucketSpec->fieldSet.end()) {
- it = bucketSpec->computedMetaProjFields.erase(it);
- } else {
- it++;
- }
- }
- }
-}
-
} // namespace
BucketSpec::BucketSpec(const std::string& timeField,
const boost::optional<std::string>& metaField,
const std::set<std::string>& fields,
- const std::vector<std::string>& computedProjections)
- : fieldSet(fields),
- computedMetaProjFields(computedProjections),
+ const std::set<std::string>& computedProjections)
+ : _fieldSet(fields),
+ _computedMetaProjFields(computedProjections),
_timeField(timeField),
_timeFieldHashed(FieldNameHasher().hashedFieldName(_timeField)),
_metaField(metaField) {
@@ -375,8 +364,8 @@ BucketSpec::BucketSpec(const std::string& timeField,
}
BucketSpec::BucketSpec(const BucketSpec& other)
- : fieldSet(other.fieldSet),
- computedMetaProjFields(other.computedMetaProjFields),
+ : _fieldSet(other._fieldSet),
+ _computedMetaProjFields(other._computedMetaProjFields),
_timeField(other._timeField),
_timeFieldHashed(HashedFieldName{_timeField, other._timeFieldHashed->hash()}),
_metaField(other._metaField) {
@@ -386,8 +375,8 @@ BucketSpec::BucketSpec(const BucketSpec& other)
}
BucketSpec::BucketSpec(BucketSpec&& other)
- : fieldSet(std::move(other.fieldSet)),
- computedMetaProjFields(std::move(other.computedMetaProjFields)),
+ : _fieldSet(std::move(other._fieldSet)),
+ _computedMetaProjFields(std::move(other._computedMetaProjFields)),
_timeField(std::move(other._timeField)),
_timeFieldHashed(HashedFieldName{_timeField, other._timeFieldHashed->hash()}),
_metaField(std::move(other._metaField)) {
@@ -398,8 +387,8 @@ BucketSpec::BucketSpec(BucketSpec&& other)
BucketSpec& BucketSpec::operator=(const BucketSpec& other) {
if (&other != this) {
- fieldSet = other.fieldSet;
- computedMetaProjFields = other.computedMetaProjFields;
+ _fieldSet = other._fieldSet;
+ _computedMetaProjFields = other._computedMetaProjFields;
_timeField = other._timeField;
_timeFieldHashed = HashedFieldName{_timeField, other._timeFieldHashed->hash()};
_metaField = other._metaField;
@@ -453,14 +442,22 @@ BucketUnpacker::BucketUnpacker(BucketSpec spec, Behavior unpackerBehavior) {
void BucketUnpacker::addComputedMetaProjFields(const std::vector<StringData>& computedFieldNames) {
for (auto&& field : computedFieldNames) {
- _spec.computedMetaProjFields.emplace_back(field.toString());
+ _spec.addComputedMetaProjFields(field);
// If we're already specifically including fields, we need to add the computed fields to
- // the included field set to ensure they are unpacked.
+ // the included field set to indicate they're in the output doc.
if (_unpackerBehavior == BucketUnpacker::Behavior::kInclude) {
- _spec.fieldSet.emplace(field);
+ _spec.addIncludeExcludeField(field);
+ } else {
+ // Since exclude is applied after addComputedMetaProjFields, we must erase the new field
+ // from the include/exclude fields so this doesn't get removed.
+ _spec.removeIncludeExcludeField(field.toString());
}
}
+
+ // Recalculate _includeTimeField, since both computedMetaProjFields and fieldSet may have
+ // changed.
+ determineIncludeTimeField();
}
Document BucketUnpacker::getNext() {
@@ -475,7 +472,7 @@ Document BucketUnpacker::getNext() {
measurement, _spec, _metaValue, _includeTimeField, _includeMetaField);
// Add computed meta projections.
- for (auto&& name : _spec.computedMetaProjFields) {
+ for (auto&& name : _spec.computedMetaProjFields()) {
measurement.addField(name, Value{_computedMetaProjections[name]});
}
@@ -492,6 +489,7 @@ Document BucketUnpacker::extractSingleMeasurement(int j) {
_unpackingImpl->extractSingleMeasurement(measurement,
j,
_spec,
+ fieldsToIncludeExcludeDuringUnpack(),
_unpackerBehavior,
_bucket,
_metaValue,
@@ -499,7 +497,7 @@ Document BucketUnpacker::extractSingleMeasurement(int j) {
_includeMetaField);
// Add computed meta projections.
- for (auto&& name : _spec.computedMetaProjFields) {
+ for (auto&& name : _spec.computedMetaProjFields()) {
measurement.addField(name, Value{_computedMetaProjections[name]});
}
@@ -578,16 +576,15 @@ void BucketUnpacker::reset(BSONObj&& bucket) {
// Includes a field when '_unpackerBehavior' is 'kInclude' and it's found in 'fieldSet' or
// _unpackerBehavior is 'kExclude' and it's not found in 'fieldSet'.
- if (determineIncludeField(colName, _unpackerBehavior, _spec)) {
+ if (determineIncludeField(
+ colName, _unpackerBehavior, fieldsToIncludeExcludeDuringUnpack())) {
_unpackingImpl->addField(elem);
}
}
// Update computed meta projections with values from this bucket.
- if (!_spec.computedMetaProjFields.empty()) {
- for (auto&& name : _spec.computedMetaProjFields) {
- _computedMetaProjections[name] = _bucket[name];
- }
+ for (auto&& name : _spec.computedMetaProjFields()) {
+ _computedMetaProjections[name] = _bucket[name];
}
// Save the measurement count for the bucket.
@@ -630,13 +627,71 @@ int BucketUnpacker::computeMeasurementCount(const BSONObj& bucket, StringData ti
}
}
+void BucketUnpacker::determineIncludeTimeField() {
+ const bool isInclude = _unpackerBehavior == BucketUnpacker::Behavior::kInclude;
+ const bool fieldSetContainsTime =
+ _spec.fieldSet().find(_spec.timeField()) != _spec.fieldSet().end();
+
+ const auto& metaProjFields = _spec.computedMetaProjFields();
+ const bool metaProjContains = metaProjFields.find(_spec.timeField()) != metaProjFields.cend();
+
+ // If computedMetaProjFields contains the time field, we exclude it from unpacking no matter
+ // what, since it will be overwritten anyway.
+ _includeTimeField = isInclude == fieldSetContainsTime && !metaProjContains;
+}
+
+void BucketUnpacker::eraseMetaFromFieldSetAndDetermineIncludeMeta() {
+ if (!_spec.metaField() ||
+ _spec.computedMetaProjFields().find(*_spec.metaField()) !=
+ _spec.computedMetaProjFields().cend()) {
+ _includeMetaField = false;
+ } else if (auto itr = _spec.fieldSet().find(*_spec.metaField());
+ itr != _spec.fieldSet().end()) {
+ _spec.removeIncludeExcludeField(*_spec.metaField());
+ _includeMetaField = _unpackerBehavior == BucketUnpacker::Behavior::kInclude;
+ } else {
+ _includeMetaField = _unpackerBehavior == BucketUnpacker::Behavior::kExclude;
+ }
+}
+
+void BucketUnpacker::eraseExcludedComputedMetaProjFields() {
+ if (_unpackerBehavior == BucketUnpacker::Behavior::kExclude) {
+ for (const auto& field : _spec.fieldSet()) {
+ _spec.eraseFromComputedMetaProjFields(field);
+ }
+ }
+}
+
void BucketUnpacker::setBucketSpecAndBehavior(BucketSpec&& bucketSpec, Behavior behavior) {
- _includeMetaField = eraseMetaFromFieldSetAndDetermineIncludeMeta(behavior, &bucketSpec);
- _includeTimeField = determineIncludeTimeField(behavior, &bucketSpec);
_unpackerBehavior = behavior;
-
- eraseExcludedComputedMetaProjFields(behavior, &bucketSpec);
_spec = std::move(bucketSpec);
+
+ eraseMetaFromFieldSetAndDetermineIncludeMeta();
+ determineIncludeTimeField();
+ eraseExcludedComputedMetaProjFields();
+}
+
+const std::set<std::string>& BucketUnpacker::fieldsToIncludeExcludeDuringUnpack() {
+ if (_unpackFieldsToIncludeExclude) {
+ return *_unpackFieldsToIncludeExclude;
+ }
+
+ _unpackFieldsToIncludeExclude = std::set<std::string>();
+ const auto& metaProjFields = _spec.computedMetaProjFields();
+ if (_unpackerBehavior == BucketUnpacker::Behavior::kInclude) {
+ // For include, we unpack fieldSet - metaProjFields.
+ for (auto&& field : _spec.fieldSet()) {
+ if (metaProjFields.find(field) == metaProjFields.cend()) {
+ _unpackFieldsToIncludeExclude->insert(field);
+ }
+ }
+ } else {
+ // For exclude, we unpack everything but fieldSet + metaProjFields.
+ _unpackFieldsToIncludeExclude->insert(_spec.fieldSet().begin(), _spec.fieldSet().end());
+ _unpackFieldsToIncludeExclude->insert(metaProjFields.begin(), metaProjFields.end());
+ }
+
+ return *_unpackFieldsToIncludeExclude;
}
const std::set<StringData> BucketUnpacker::reservedBucketFieldNames = {
diff --git a/src/mongo/db/exec/bucket_unpacker.h b/src/mongo/db/exec/bucket_unpacker.h
index 2f5fd70b785..c28b9c340c0 100644
--- a/src/mongo/db/exec/bucket_unpacker.h
+++ b/src/mongo/db/exec/bucket_unpacker.h
@@ -38,7 +38,16 @@
namespace mongo {
/**
- * Carries parameters for unpacking a bucket.
+ * Carries parameters for unpacking a bucket. The order of operations applied to determine which
+ * fields are in the final document are:
+ * If we are in include mode:
+ * 1. Unpack all fields from the bucket.
+ * 2. Remove any fields not in _fieldSet, since we are in include mode.
+ * 3. Add fields from _computedMetaProjFields.
+ * If we are in exclude mode:
+ * 1. Unpack all fields from the bucket.
+ * 2. Add fields from _computedMetaProjFields.
+ * 3. Remove any fields in _fieldSet, since we are in exclude mode.
*/
class BucketSpec {
public:
@@ -46,7 +55,7 @@ public:
BucketSpec(const std::string& timeField,
const boost::optional<std::string>& metaField,
const std::set<std::string>& fields = {},
- const std::vector<std::string>& computedProjections = {});
+ const std::set<std::string>& computedProjections = {});
BucketSpec(const BucketSpec&);
BucketSpec(BucketSpec&&);
@@ -64,14 +73,42 @@ public:
const boost::optional<std::string>& metaField() const;
boost::optional<HashedFieldName> metaFieldHashed() const;
+ void setFieldSet(std::set<std::string>& fieldSet) {
+ _fieldSet = std::move(fieldSet);
+ }
+
+ void addIncludeExcludeField(const StringData& field) {
+ _fieldSet.emplace(field);
+ }
+
+ void removeIncludeExcludeField(const std::string& field) {
+ _fieldSet.erase(field);
+ }
+
+ const std::set<std::string>& fieldSet() const {
+ return _fieldSet;
+ }
+
+ void addComputedMetaProjFields(const StringData& field) {
+ _computedMetaProjFields.emplace(field);
+ }
+
+ const std::set<std::string>& computedMetaProjFields() const {
+ return _computedMetaProjFields;
+ }
+
+ void eraseFromComputedMetaProjFields(const std::string& field) {
+ _computedMetaProjFields.erase(field);
+ }
+
+private:
// The set of field names in the data region that should be included or excluded.
- std::set<std::string> fieldSet;
+ std::set<std::string> _fieldSet;
- // Vector of computed meta field projection names. Added at the end of materialized
+ // Set of computed meta field projection names. Added at the end of materialized
// measurements.
- std::vector<std::string> computedMetaProjFields;
+ std::set<std::string> _computedMetaProjFields;
-private:
std::string _timeField;
boost::optional<HashedFieldName> _timeFieldHashed;
@@ -170,9 +207,23 @@ public:
// Add computed meta projection names to the bucket specification.
void addComputedMetaProjFields(const std::vector<StringData>& computedFieldNames);
+ // Fill _spec.unpackFieldsToIncludeExclude with final list of fields to include/exclude during
+ // unpacking. Only calculates the list the first time it is called.
+ const std::set<std::string>& fieldsToIncludeExcludeDuringUnpack();
+
class UnpackingImpl;
private:
+ // Determines if timestamp values should be included in the materialized measurements.
+ void determineIncludeTimeField();
+
+ // Removes metaField from the field set and determines whether metaField should be
+ // included in the materialized measurements.
+ void eraseMetaFromFieldSetAndDetermineIncludeMeta();
+
+ // Erase computed meta projection fields if they are present in the exclusion field set.
+ void eraseExcludedComputedMetaProjFields();
+
BucketSpec _spec;
Behavior _unpackerBehavior;
@@ -200,44 +251,21 @@ private:
// The number of measurements in the bucket.
int32_t _numberOfMeasurements = 0;
-};
-/**
- * Removes metaField from the field set and returns a boolean indicating whether metaField should be
- * included in the materialized measurements. Always returns false if metaField does not exist.
- */
-inline bool eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior unpackerBehavior,
- BucketSpec* bucketSpec) {
- if (!bucketSpec->metaField() ||
- std::find(bucketSpec->computedMetaProjFields.cbegin(),
- bucketSpec->computedMetaProjFields.cend(),
- *bucketSpec->metaField()) != bucketSpec->computedMetaProjFields.cend()) {
- return false;
- } else if (auto itr = bucketSpec->fieldSet.find(*bucketSpec->metaField());
- itr != bucketSpec->fieldSet.end()) {
- bucketSpec->fieldSet.erase(itr);
- return unpackerBehavior == BucketUnpacker::Behavior::kInclude;
- } else {
- return unpackerBehavior == BucketUnpacker::Behavior::kExclude;
- }
-}
-
-/**
- * Determines if timestamp values should be included in the materialized measurements.
- */
-inline bool determineIncludeTimeField(BucketUnpacker::Behavior unpackerBehavior,
- BucketSpec* bucketSpec) {
- return (unpackerBehavior == BucketUnpacker::Behavior::kInclude) ==
- (bucketSpec->fieldSet.find(bucketSpec->timeField()) != bucketSpec->fieldSet.end());
-}
+ // Final list of fields to include/exclude during unpacking. This is computed once during the
+ // first doGetNext call so we don't have to recalculate every time we reach a new bucket.
+ boost::optional<std::set<std::string>> _unpackFieldsToIncludeExclude = boost::none;
+};
/**
* Determines if an arbitrary field should be included in the materialized measurements.
*/
inline bool determineIncludeField(StringData fieldName,
BucketUnpacker::Behavior unpackerBehavior,
- const BucketSpec& bucketSpec) {
- return (unpackerBehavior == BucketUnpacker::Behavior::kInclude) ==
- (bucketSpec.fieldSet.find(fieldName.toString()) != bucketSpec.fieldSet.end());
+ const std::set<std::string>& unpackFieldsToIncludeExclude) {
+ const bool isInclude = unpackerBehavior == BucketUnpacker::Behavior::kInclude;
+ const bool unpackFieldsContains = unpackFieldsToIncludeExclude.find(fieldName.toString()) !=
+ unpackFieldsToIncludeExclude.cend();
+ return isInclude == unpackFieldsContains;
}
} // namespace mongo
diff --git a/src/mongo/db/exec/bucket_unpacker_test.cpp b/src/mongo/db/exec/bucket_unpacker_test.cpp
index b736ee7ebba..8ee0f4e05f5 100644
--- a/src/mongo/db/exec/bucket_unpacker_test.cpp
+++ b/src/mongo/db/exec/bucket_unpacker_test.cpp
@@ -600,65 +600,148 @@ TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnEmptyBucket) {
TEST_F(BucketUnpackerTest, EraseMetaFromFieldSetAndDetermineIncludeMeta) {
// Tests a missing 'metaField' in the spec.
std::set<std::string> empFields{};
- auto spec = BucketSpec{kUserDefinedTimeName.toString(), boost::none, std::move(empFields)};
- ASSERT_FALSE(
- eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior::kInclude, &spec));
+ auto bucket = fromjson(R"(
+{
+ control: {version: 1},
+ data: {
+ _id: {'0':4, '1':5, '2':6},
+ time: {'0':4, '1': 5, '2': 6}
+ }
+})");
+ auto unpacker = makeBucketUnpacker(empFields,
+ BucketUnpacker::Behavior::kInclude,
+ std::move(bucket),
+ kUserDefinedMetaName.toString());
// Tests a spec with 'metaField' in include list.
std::set<std::string> fields{kUserDefinedMetaName.toString()};
+
auto specWithMetaInclude = BucketSpec{
kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(fields)};
- ASSERT_TRUE(eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior::kInclude,
- &specWithMetaInclude));
- ASSERT_EQ(specWithMetaInclude.fieldSet.count(kUserDefinedMetaName.toString()), 0);
-
- std::set<std::string> fieldsNoMeta{"foo"};
- auto specWithFooInclude = BucketSpec{
- kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(fieldsNoMeta)};
- ASSERT_TRUE(eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior::kExclude,
- &specWithFooInclude));
- ASSERT_FALSE(eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior::kInclude,
- &specWithFooInclude));
+ // This calls eraseMetaFromFieldSetAndDetermineIncludeMeta.
+ unpacker.setBucketSpecAndBehavior(std::move(specWithMetaInclude),
+ BucketUnpacker::Behavior::kInclude);
+ ASSERT_TRUE(unpacker.includeMetaField());
+ ASSERT_EQ(unpacker.bucketSpec().fieldSet().count(kUserDefinedMetaName.toString()), 0);
+
+ std::set<std::string> fieldsNoMetaInclude{"foo"};
+ auto specWithFooInclude = BucketSpec{kUserDefinedTimeName.toString(),
+ kUserDefinedMetaName.toString(),
+ std::move(fieldsNoMetaInclude)};
+
+ std::set<std::string> fieldsNoMetaExclude{"foo"};
+ auto specWithFooExclude = BucketSpec{kUserDefinedTimeName.toString(),
+ kUserDefinedMetaName.toString(),
+ std::move(fieldsNoMetaExclude)};
+
+ unpacker.setBucketSpecAndBehavior(std::move(specWithFooExclude),
+ BucketUnpacker::Behavior::kExclude);
+ ASSERT_TRUE(unpacker.includeMetaField());
+ unpacker.setBucketSpecAndBehavior(std::move(specWithFooInclude),
+ BucketUnpacker::Behavior::kInclude);
+ ASSERT_FALSE(unpacker.includeMetaField());
// Tests a spec with 'metaField' not in exclude list.
std::set<std::string> excludeFields{};
- auto specWithMetaExclude = BucketSpec{
+ auto specMetaExclude = BucketSpec{
kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(excludeFields)};
- ASSERT_TRUE(eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior::kExclude,
- &specWithMetaExclude));
- ASSERT_FALSE(eraseMetaFromFieldSetAndDetermineIncludeMeta(BucketUnpacker::Behavior::kInclude,
- &specWithMetaExclude));
+ auto specMetaInclude = specMetaExclude;
+ unpacker.setBucketSpecAndBehavior(std::move(specMetaExclude),
+ BucketUnpacker::Behavior::kExclude);
+ ASSERT_TRUE(unpacker.includeMetaField());
+ unpacker.setBucketSpecAndBehavior(std::move(specMetaInclude),
+ BucketUnpacker::Behavior::kInclude);
+ ASSERT_FALSE(unpacker.includeMetaField());
}
TEST_F(BucketUnpackerTest, DetermineIncludeTimeField) {
- std::set<std::string> fields{kUserDefinedTimeName.toString()};
- auto spec = BucketSpec{
- kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(fields)};
- ASSERT_TRUE(determineIncludeTimeField(BucketUnpacker::Behavior::kInclude, &spec));
- ASSERT_FALSE(determineIncludeTimeField(BucketUnpacker::Behavior::kExclude, &spec));
+ auto bucket = fromjson(R"(
+{
+ control: {version: 1},
+ data: {
+ _id: {'0':4, '1':5, '2':6},
+ time: {'0':4, '1': 5, '2': 6}
+ }
+})");
+ std::set<std::string> unpackerFields{kUserDefinedTimeName.toString()};
+ auto unpacker = makeBucketUnpacker(unpackerFields,
+ BucketUnpacker::Behavior::kInclude,
+ std::move(bucket),
+ kUserDefinedMetaName.toString());
+
+ std::set<std::string> includeFields{kUserDefinedTimeName.toString()};
+ auto includeSpec = BucketSpec{
+ kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(includeFields)};
+ // This calls determineIncludeTimeField.
+ unpacker.setBucketSpecAndBehavior(std::move(includeSpec), BucketUnpacker::Behavior::kInclude);
+ ASSERT_TRUE(unpacker.includeTimeField());
+
+ std::set<std::string> excludeFields{kUserDefinedTimeName.toString()};
+ auto excludeSpec = BucketSpec{
+ kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(excludeFields)};
+ unpacker.setBucketSpecAndBehavior(std::move(excludeSpec), BucketUnpacker::Behavior::kExclude);
+ ASSERT_FALSE(unpacker.includeTimeField());
}
-TEST_F(BucketUnpackerTest, DetermineIncludeField) {
+TEST_F(BucketUnpackerTest, DetermineIncludeFieldIncludeMode) {
std::string includedMeasurementField = "measurementField1";
std::string excludedMeasurementField = "measurementField2";
std::set<std::string> fields{kUserDefinedTimeName.toString(), includedMeasurementField};
+
+ auto bucket = Document{{"_id", 1},
+ {"control", Document{{"version", 1}}},
+ {"meta", Document{{"m1", 999}, {"m2", 9999}}},
+ {"data", Document{}}}
+ .toBson();
+
auto spec = BucketSpec{
kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(fields)};
- ASSERT_TRUE(determineIncludeField(
- kUserDefinedTimeName.toString(), BucketUnpacker::Behavior::kInclude, spec));
- ASSERT_FALSE(determineIncludeField(
- kUserDefinedTimeName.toString(), BucketUnpacker::Behavior::kExclude, spec));
+ BucketUnpacker includeUnpacker;
+ includeUnpacker.setBucketSpecAndBehavior(std::move(spec), BucketUnpacker::Behavior::kInclude);
+ // Need to call reset so that the private method calculateFieldsToIncludeExcludeDuringUnpack()
+ // is called, and _unpackFieldsToIncludeExclude gets filled with fields.
+ includeUnpacker.reset(std::move(bucket));
+ // Now the spec knows which fields to include/exclude.
+
+ ASSERT_TRUE(determineIncludeField(kUserDefinedTimeName,
+ BucketUnpacker::Behavior::kInclude,
+ includeUnpacker.fieldsToIncludeExcludeDuringUnpack()));
+ ASSERT_TRUE(determineIncludeField(includedMeasurementField,
+ BucketUnpacker::Behavior::kInclude,
+ includeUnpacker.fieldsToIncludeExcludeDuringUnpack()));
+ ASSERT_FALSE(determineIncludeField(excludedMeasurementField,
+ BucketUnpacker::Behavior::kInclude,
+ includeUnpacker.fieldsToIncludeExcludeDuringUnpack()));
+}
+
+TEST_F(BucketUnpackerTest, DetermineIncludeFieldExcludeMode) {
+ std::string includedMeasurementField = "measurementField1";
+ std::string excludedMeasurementField = "measurementField2";
+ std::set<std::string> fields{kUserDefinedTimeName.toString(), includedMeasurementField};
+
+ auto bucket = Document{{"_id", 1},
+ {"control", Document{{"version", 1}}},
+ {"meta", Document{{"m1", 999}, {"m2", 9999}}},
+ {"data", Document{}}}
+ .toBson();
- ASSERT_TRUE(
- determineIncludeField(includedMeasurementField, BucketUnpacker::Behavior::kInclude, spec));
- ASSERT_FALSE(
- determineIncludeField(includedMeasurementField, BucketUnpacker::Behavior::kExclude, spec));
+ auto spec = BucketSpec{
+ kUserDefinedTimeName.toString(), kUserDefinedMetaName.toString(), std::move(fields)};
- ASSERT_FALSE(
- determineIncludeField(excludedMeasurementField, BucketUnpacker::Behavior::kInclude, spec));
- ASSERT_TRUE(
- determineIncludeField(excludedMeasurementField, BucketUnpacker::Behavior::kExclude, spec));
+ BucketUnpacker excludeUnpacker;
+ excludeUnpacker.setBucketSpecAndBehavior(std::move(spec), BucketUnpacker::Behavior::kExclude);
+ excludeUnpacker.reset(std::move(bucket));
+
+ ASSERT_FALSE(determineIncludeField(kUserDefinedTimeName,
+ BucketUnpacker::Behavior::kExclude,
+ excludeUnpacker.fieldsToIncludeExcludeDuringUnpack()));
+ ASSERT_FALSE(determineIncludeField(includedMeasurementField,
+ BucketUnpacker::Behavior::kExclude,
+ excludeUnpacker.fieldsToIncludeExcludeDuringUnpack()));
+ ASSERT_TRUE(determineIncludeField(excludedMeasurementField,
+ BucketUnpacker::Behavior::kExclude,
+ excludeUnpacker.fieldsToIncludeExcludeDuringUnpack()));
}
/**
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
index ee695db3c96..803b1d833bd 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
@@ -214,7 +214,7 @@ void optimizePrefix(Pipeline::SourceContainer::iterator itr, Pipeline::SourceCon
// Returns whether 'field' depends on a pushed down $addFields or computed $project.
bool fieldIsComputed(BucketSpec spec, std::string field) {
return std::any_of(
- spec.computedMetaProjFields.begin(), spec.computedMetaProjFields.end(), [&](auto& s) {
+ spec.computedMetaProjFields().begin(), spec.computedMetaProjFields().end(), [&](auto& s) {
return s == field || expression::isPathPrefixOf(field, s) ||
expression::isPathPrefixOf(s, field);
});
@@ -268,7 +268,7 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
uassert(5346503,
"include or exclude field element must be a single-element field path",
field.find('.') == std::string::npos);
- bucketSpec.fieldSet.emplace(field);
+ bucketSpec.addIncludeExcludeField(field);
}
unpackerBehavior = fieldName == kInclude ? BucketUnpacker::Behavior::kInclude
: BucketUnpacker::Behavior::kExclude;
@@ -314,7 +314,7 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
uassert(5509902,
"computedMetaProjFields field element must be a single-element field path",
field.find('.') == std::string::npos);
- bucketSpec.computedMetaProjFields.emplace_back(field);
+ bucketSpec.addComputedMetaProjFields(field);
}
} else {
uasserted(5346506,
@@ -379,18 +379,18 @@ void DocumentSourceInternalUnpackBucket::serializeToArray(
MutableDocument out;
auto behavior =
_bucketUnpacker.behavior() == BucketUnpacker::Behavior::kInclude ? kInclude : kExclude;
- auto&& spec = _bucketUnpacker.bucketSpec();
+ const auto& spec = _bucketUnpacker.bucketSpec();
std::vector<Value> fields;
- for (auto&& field : spec.fieldSet) {
+ for (auto&& field : spec.fieldSet()) {
fields.emplace_back(field);
}
if (((_bucketUnpacker.includeMetaField() &&
_bucketUnpacker.behavior() == BucketUnpacker::Behavior::kInclude) ||
(!_bucketUnpacker.includeMetaField() &&
_bucketUnpacker.behavior() == BucketUnpacker::Behavior::kExclude && spec.metaField())) &&
- std::find(spec.computedMetaProjFields.cbegin(),
- spec.computedMetaProjFields.cend(),
- *spec.metaField()) == spec.computedMetaProjFields.cend())
+ std::find(spec.computedMetaProjFields().cbegin(),
+ spec.computedMetaProjFields().cend(),
+ *spec.metaField()) == spec.computedMetaProjFields().cend())
fields.emplace_back(*spec.metaField());
out.addField(behavior, Value{std::move(fields)});
@@ -400,11 +400,11 @@ void DocumentSourceInternalUnpackBucket::serializeToArray(
}
out.addField(kBucketMaxSpanSeconds, Value{_bucketMaxSpanSeconds});
- if (!spec.computedMetaProjFields.empty())
+ if (!spec.computedMetaProjFields().empty())
out.addField("computedMetaProjFields", Value{[&] {
std::vector<Value> compFields;
- std::transform(spec.computedMetaProjFields.cbegin(),
- spec.computedMetaProjFields.cend(),
+ std::transform(spec.computedMetaProjFields().cbegin(),
+ spec.computedMetaProjFields().cend(),
std::back_inserter(compFields),
[](auto&& projString) { return Value{projString}; });
return compFields;
@@ -506,7 +506,7 @@ void DocumentSourceInternalUnpackBucket::internalizeProject(const BSONObj& proje
// Update '_bucketUnpacker' state with the new fields and behavior.
auto spec = _bucketUnpacker.bucketSpec();
- spec.fieldSet = std::move(fields);
+ spec.setFieldSet(fields);
_bucketUnpacker.setBucketSpecAndBehavior(std::move(spec),
isInclusion ? BucketUnpacker::Behavior::kInclude
: BucketUnpacker::Behavior::kExclude);
@@ -514,7 +514,7 @@ void DocumentSourceInternalUnpackBucket::internalizeProject(const BSONObj& proje
std::pair<BSONObj, bool> DocumentSourceInternalUnpackBucket::extractOrBuildProjectToInternalize(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) const {
- if (std::next(itr) == container->end() || !_bucketUnpacker.bucketSpec().fieldSet.empty()) {
+ if (std::next(itr) == container->end() || !_bucketUnpacker.bucketSpec().fieldSet().empty()) {
// There is no project to internalize or there are already fields being included/excluded.
return {BSONObj{}, false};
}
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/unpack_bucket_exec_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/unpack_bucket_exec_test.cpp
index dbd56d36e20..06217344dad 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/unpack_bucket_exec_test.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/unpack_bucket_exec_test.cpp
@@ -238,7 +238,7 @@ TEST_F(InternalUnpackBucketExecTest, UnpackNeitherIncludeNorExcludeDefaultsToEmp
auto source = DocumentSourceMock::createForTest(
{
R"({
- control: {'version': 1},
+ control: {'version': 1},
meta: {'m1': 999, 'm2': 9999},
data: {
_id: {'0':1, '1':2},
@@ -248,7 +248,7 @@ TEST_F(InternalUnpackBucketExecTest, UnpackNeitherIncludeNorExcludeDefaultsToEmp
}
})",
R"({
- control: {'version': 1},
+ control: {'version': 1},
meta: {m1: 9, m2: 9, m3: 9},
data: {
_id: {'0':3, '1':4},
@@ -662,7 +662,7 @@ TEST_F(InternalUnpackBucketExecTest, BucketUnpackerHandlesMissingMetadata) {
{
R"(
{
- control: {'version': 1},
+ control: {'version': 1},
meta: {
'm1': 999, 'm2': 9999
},
@@ -673,7 +673,7 @@ TEST_F(InternalUnpackBucketExecTest, BucketUnpackerHandlesMissingMetadata) {
})",
R"(
{
- control: {'version': 1},
+ control: {'version': 1},
data: {
_id: {'1':4, '0':5, '2':6},
time: {'1':4, '0': 5, '2': 6}
@@ -883,9 +883,19 @@ TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsIncludeMeta) {
ASSERT_BSONOBJ_EQ(array[0].getDocument().toBson(), bson);
}
-TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsComputedMetaProjFields) {
+TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsComputedMetaProjFieldsInclude) {
auto bson = fromjson(
- "{$_internalUnpackBucket: {exclude: [], timeField: 'time', metaField: 'meta', "
+ "{$_internalUnpackBucket: {include: [], timeField: 'time', metaField: 'meta', "
+ "bucketMaxSpanSeconds: 3600, computedMetaProjFields: ['a', 'b', 'c']}}");
+ auto array = std::vector<Value>{};
+ DocumentSourceInternalUnpackBucket::createFromBsonInternal(bson.firstElement(), getExpCtx())
+ ->serializeToArray(array);
+ ASSERT_BSONOBJ_EQ(array[0].getDocument().toBson(), bson);
+}
+
+TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsComputedMetaProjFieldsIncludeWithCompute) {
+ auto bson = fromjson(
+ "{$_internalUnpackBucket: {include: ['a', 'b', 'c'], timeField: 'time', metaField: 'meta', "
"bucketMaxSpanSeconds: 3600, computedMetaProjFields: ['a', 'b', 'c']}}");
auto array = std::vector<Value>{};
DocumentSourceInternalUnpackBucket::createFromBsonInternal(bson.firstElement(), getExpCtx())
@@ -893,6 +903,16 @@ TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsComputedMetaProjFields) {
ASSERT_BSONOBJ_EQ(array[0].getDocument().toBson(), bson);
}
+TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsComputedMetaProjFieldsExclude) {
+ auto bson = fromjson(
+ "{$_internalUnpackBucket: {exclude: [], timeField: 'time', metaField: 'meta', "
+ "bucketMaxSpanSeconds: 3600, computedMetaProjFields: ['a']}}");
+ auto array = std::vector<Value>{};
+ DocumentSourceInternalUnpackBucket::createFromBsonInternal(bson.firstElement(), getExpCtx())
+ ->serializeToArray(array);
+ ASSERT_BSONOBJ_EQ(array[0].getDocument().toBson(), bson);
+}
+
TEST_F(InternalUnpackBucketExecTest, ParserRoundtripsComputedMetaProjFieldOverridingMeta) {
auto bson = fromjson(
"{$_internalUnpackBucket: {exclude: [], timeField: 'time', metaField: 'meta', "