diff options
-rw-r--r-- | jstests/core/timeseries/timeseries_internal_bucket_geo_within.js | 46 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp | 66 |
2 files changed, 103 insertions, 9 deletions
diff --git a/jstests/core/timeseries/timeseries_internal_bucket_geo_within.js b/jstests/core/timeseries/timeseries_internal_bucket_geo_within.js index d411f8d37c4..34929ed18ae 100644 --- a/jstests/core/timeseries/timeseries_internal_bucket_geo_within.js +++ b/jstests/core/timeseries/timeseries_internal_bucket_geo_within.js @@ -73,4 +73,50 @@ assert.commandWorked(coll.insert([ results = coll.aggregate(pipeline).toArray(); // Two documents should match the given query. assert.eq(results.length, 2, results); + +// Test a scenario where the control fields do not properly summarize the events: +// 'a' is a mixture of objects and scalars. +coll.drop(); +assert.commandWorked( + db.createCollection(coll.getName(), {timeseries: {timeField: 'time', metaField: 'meta'}})); +assert.commandWorked(coll.insert([ + {time: ISODate(), a: 5}, + {time: ISODate(), a: {b: {type: "Point", coordinates: [0, 0]}}}, + {time: ISODate(), a: ISODate('2020-01-01')}, +])); +results = coll.aggregate([ + {$match: {'a.b': {$geoWithin: {$centerSphere: [[0, 0], 100]}}}}, + ]) + .toArray(); +assert.eq(results.length, 1, results); +assert.docEq(results[0].a, {b: {type: "Point", coordinates: [0, 0]}}); + +// Test a scenario where $geoWithin does implicit array traversal. +coll.drop(); +assert.commandWorked( + db.createCollection(coll.getName(), {timeseries: {timeField: 'time', metaField: 'meta'}})); +assert.commandWorked(coll.insert([ + { + time: ISODate(), + a: [ + 12345, + {type: "Point", coordinates: [180, 0]}, + {"1": {type: "Point", coordinates: [0, 0]}}, + ] + }, +])); +results = coll.aggregate([ + // The bucket-level predicate does not do any implicit array traversal, so 'a.1' + // refers to the point [180, 0]. (So it rejects the bucket.) But $geoWithin does + // do implicit array traversal, so 'a.1' refers to the "1" field of any element of + // 'a'. (So it should include the event.) + {$match: {'a.1': {$geoWithin: {$centerSphere: [[0, 0], 1]}}}}, + ]) + .toArray(); +assert.eq(results.length, 1, results); +assert.docEq(results[0].a, [ + 12345, + {type: "Point", coordinates: [180, 0]}, + {"1": {type: "Point", coordinates: [0, 0]}}, +]); }());
\ No newline at end of file diff --git a/src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp b/src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp index dfe7e7d5ddf..2b24baa1672 100644 --- a/src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp +++ b/src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp @@ -28,11 +28,16 @@ */ -#include "mongo/db/matcher/expression_internal_bucket_geo_within.h" +#include <boost/optional.hpp> + +#include "mongo/platform/basic.h" + +#include "mongo/bson/bsonobj.h" #include "mongo/db/bson/dotted_path_support.h" #include "mongo/db/geo/geoparser.h" +#include "mongo/db/matcher/expression_internal_bucket_geo_within.h" +#include "mongo/db/pipeline/field_path.h" #include "mongo/db/timeseries/timeseries_constants.h" -#include "mongo/platform/basic.h" #include "third_party/s2/s2cellid.h" #include "third_party/s2/s2cellunion.h" @@ -76,17 +81,60 @@ bool InternalBucketGeoWithinMatchExpression::matches(const MatchableDocument* do return _matchesBSONObj(doc->toBSON()); } +namespace { + +/** + * Dereference a path, treating each component of the path as an object field-name. + * + * If we try to traverse through an array or scalar, return boost::none to indicate that + * dereferencing the path failed. + * + * However, traversing through missing succeeds, returning missing. + * If this function returns missing, then the path only traversed through objects and missing. + */ +boost::optional<BSONElement> derefPath(const BSONObj& obj, const FieldPath& fieldPath) { + // Dereferencing the first path component always succeeds, because we start with an object, + // and because paths always have at least one component. + auto elem = obj[fieldPath.front()]; + // Then we have zero or more path components. + for (size_t i = 1; i < fieldPath.getPathLength(); ++i) { + if (elem.eoo()) + return BSONElement{}; + if (elem.type() != BSONType::Object) + return boost::none; + elem = elem.Obj()[fieldPath.getFieldName(i)]; + } + return elem; +} + +} // namespace + bool InternalBucketGeoWithinMatchExpression::_matchesBSONObj(const BSONObj& obj) const { - auto controlMinElm = dotted_path_support::extractElementAtPath( - obj, str::stream() << timeseries::kControlMinFieldNamePrefix << _field); - auto controlMaxElm = dotted_path_support::extractElementAtPath( - obj, str::stream() << timeseries::kControlMaxFieldNamePrefix << _field); + // Look up the path in control.min and control.max. + // If it goes through an array, return true to avoid dealing with implicit array traversal. + // If it goes through a scalar, return true to avoid dealing with mixed types. + auto controlMinElmOptional = derefPath(obj, timeseries::kControlMinFieldNamePrefix + _field); + if (!controlMinElmOptional) + return true; - std::string path = str::stream() << timeseries::kControlMinFieldNamePrefix << _field; + auto controlMaxElmOptional = derefPath(obj, timeseries::kControlMaxFieldNamePrefix + _field); + if (!controlMaxElmOptional) + return true; - if (controlMinElm.eoo() || controlMaxElm.eoo() || !controlMinElm.isABSONObj() || - !controlMaxElm.isABSONObj()) + auto controlMinElm = *controlMinElmOptional; + auto controlMaxElm = *controlMaxElmOptional; + + if (controlMinElm.eoo() && controlMaxElm.eoo()) { + // If both min and max are missing, and we got here only traversing through objects and + // missing, then this path is missing on every event in the bucket. return false; + } + if (controlMinElm.eoo() || controlMaxElm.eoo() || !controlMinElm.isABSONObj() || + !controlMaxElm.isABSONObj()) { + // If either min or max is missing or a scalar, we may have mixed types, so conservatively + // return true. + return true; + } if (controlMinElm.type() != controlMaxElm.type()) return true; |