summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Percy <david.percy@mongodb.com>2021-10-04 19:35:14 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-10-05 00:22:41 +0000
commit87ed51fe2d571b0c55366e53dfde99608c6ca612 (patch)
tree2b2ccfb8095b0d7cad51fbd429b3a42cb153cdb2
parent4a230c070cb187604d07d04598a912b4feca937d (diff)
downloadmongo-87ed51fe2d571b0c55366e53dfde99608c6ca612.tar.gz
SERVER-60445 Fix $_internalBucketGeoWithin with mixed types
-rw-r--r--jstests/core/timeseries/timeseries_internal_bucket_geo_within.js46
-rw-r--r--src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp66
2 files changed, 103 insertions, 9 deletions
diff --git a/jstests/core/timeseries/timeseries_internal_bucket_geo_within.js b/jstests/core/timeseries/timeseries_internal_bucket_geo_within.js
index d411f8d37c4..34929ed18ae 100644
--- a/jstests/core/timeseries/timeseries_internal_bucket_geo_within.js
+++ b/jstests/core/timeseries/timeseries_internal_bucket_geo_within.js
@@ -73,4 +73,50 @@ assert.commandWorked(coll.insert([
results = coll.aggregate(pipeline).toArray();
// Two documents should match the given query.
assert.eq(results.length, 2, results);
+
+// Test a scenario where the control fields do not properly summarize the events:
+// 'a' is a mixture of objects and scalars.
+coll.drop();
+assert.commandWorked(
+ db.createCollection(coll.getName(), {timeseries: {timeField: 'time', metaField: 'meta'}}));
+assert.commandWorked(coll.insert([
+ {time: ISODate(), a: 5},
+ {time: ISODate(), a: {b: {type: "Point", coordinates: [0, 0]}}},
+ {time: ISODate(), a: ISODate('2020-01-01')},
+]));
+results = coll.aggregate([
+ {$match: {'a.b': {$geoWithin: {$centerSphere: [[0, 0], 100]}}}},
+ ])
+ .toArray();
+assert.eq(results.length, 1, results);
+assert.docEq(results[0].a, {b: {type: "Point", coordinates: [0, 0]}});
+
+// Test a scenario where $geoWithin does implicit array traversal.
+coll.drop();
+assert.commandWorked(
+ db.createCollection(coll.getName(), {timeseries: {timeField: 'time', metaField: 'meta'}}));
+assert.commandWorked(coll.insert([
+ {
+ time: ISODate(),
+ a: [
+ 12345,
+ {type: "Point", coordinates: [180, 0]},
+ {"1": {type: "Point", coordinates: [0, 0]}},
+ ]
+ },
+]));
+results = coll.aggregate([
+ // The bucket-level predicate does not do any implicit array traversal, so 'a.1'
+ // refers to the point [180, 0]. (So it rejects the bucket.) But $geoWithin does
+ // do implicit array traversal, so 'a.1' refers to the "1" field of any element of
+ // 'a'. (So it should include the event.)
+ {$match: {'a.1': {$geoWithin: {$centerSphere: [[0, 0], 1]}}}},
+ ])
+ .toArray();
+assert.eq(results.length, 1, results);
+assert.docEq(results[0].a, [
+ 12345,
+ {type: "Point", coordinates: [180, 0]},
+ {"1": {type: "Point", coordinates: [0, 0]}},
+]);
}()); \ No newline at end of file
diff --git a/src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp b/src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp
index dfe7e7d5ddf..2b24baa1672 100644
--- a/src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp
+++ b/src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp
@@ -28,11 +28,16 @@
*/
-#include "mongo/db/matcher/expression_internal_bucket_geo_within.h"
+#include <boost/optional.hpp>
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/bson/bsonobj.h"
#include "mongo/db/bson/dotted_path_support.h"
#include "mongo/db/geo/geoparser.h"
+#include "mongo/db/matcher/expression_internal_bucket_geo_within.h"
+#include "mongo/db/pipeline/field_path.h"
#include "mongo/db/timeseries/timeseries_constants.h"
-#include "mongo/platform/basic.h"
#include "third_party/s2/s2cellid.h"
#include "third_party/s2/s2cellunion.h"
@@ -76,17 +81,60 @@ bool InternalBucketGeoWithinMatchExpression::matches(const MatchableDocument* do
return _matchesBSONObj(doc->toBSON());
}
+namespace {
+
+/**
+ * Dereference a path, treating each component of the path as an object field-name.
+ *
+ * If we try to traverse through an array or scalar, return boost::none to indicate that
+ * dereferencing the path failed.
+ *
+ * However, traversing through missing succeeds, returning missing.
+ * If this function returns missing, then the path only traversed through objects and missing.
+ */
+boost::optional<BSONElement> derefPath(const BSONObj& obj, const FieldPath& fieldPath) {
+ // Dereferencing the first path component always succeeds, because we start with an object,
+ // and because paths always have at least one component.
+ auto elem = obj[fieldPath.front()];
+ // Then we have zero or more path components.
+ for (size_t i = 1; i < fieldPath.getPathLength(); ++i) {
+ if (elem.eoo())
+ return BSONElement{};
+ if (elem.type() != BSONType::Object)
+ return boost::none;
+ elem = elem.Obj()[fieldPath.getFieldName(i)];
+ }
+ return elem;
+}
+
+} // namespace
+
bool InternalBucketGeoWithinMatchExpression::_matchesBSONObj(const BSONObj& obj) const {
- auto controlMinElm = dotted_path_support::extractElementAtPath(
- obj, str::stream() << timeseries::kControlMinFieldNamePrefix << _field);
- auto controlMaxElm = dotted_path_support::extractElementAtPath(
- obj, str::stream() << timeseries::kControlMaxFieldNamePrefix << _field);
+ // Look up the path in control.min and control.max.
+ // If it goes through an array, return true to avoid dealing with implicit array traversal.
+ // If it goes through a scalar, return true to avoid dealing with mixed types.
+ auto controlMinElmOptional = derefPath(obj, timeseries::kControlMinFieldNamePrefix + _field);
+ if (!controlMinElmOptional)
+ return true;
- std::string path = str::stream() << timeseries::kControlMinFieldNamePrefix << _field;
+ auto controlMaxElmOptional = derefPath(obj, timeseries::kControlMaxFieldNamePrefix + _field);
+ if (!controlMaxElmOptional)
+ return true;
- if (controlMinElm.eoo() || controlMaxElm.eoo() || !controlMinElm.isABSONObj() ||
- !controlMaxElm.isABSONObj())
+ auto controlMinElm = *controlMinElmOptional;
+ auto controlMaxElm = *controlMaxElmOptional;
+
+ if (controlMinElm.eoo() && controlMaxElm.eoo()) {
+ // If both min and max are missing, and we got here only traversing through objects and
+ // missing, then this path is missing on every event in the bucket.
return false;
+ }
+ if (controlMinElm.eoo() || controlMaxElm.eoo() || !controlMinElm.isABSONObj() ||
+ !controlMaxElm.isABSONObj()) {
+ // If either min or max is missing or a scalar, we may have mixed types, so conservatively
+ // return true.
+ return true;
+ }
if (controlMinElm.type() != controlMaxElm.type())
return true;