diff options
author | David Percy <david.percy@mongodb.com> | 2021-11-19 02:57:32 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-11-19 03:36:22 +0000 |
commit | 86b8db8755d10f86a28a843f501007fe05ff0324 (patch) | |
tree | 4f3674aa871a2d25597110ac2f724fd80fa8d1d5 | |
parent | a687be84420c5e8d7ad0ad565baef40b4726e654 (diff) | |
download | mongo-86b8db8755d10f86a28a843f501007fe05ff0324.tar.gz |
SERVER-58602 Implement $geoNear on time-series measurements
15 files changed, 1332 insertions, 72 deletions
diff --git a/jstests/core/timeseries/libs/geo.js b/jstests/core/timeseries/libs/geo.js new file mode 100644 index 00000000000..3c3e5b6db91 --- /dev/null +++ b/jstests/core/timeseries/libs/geo.js @@ -0,0 +1,25 @@ +// Helper for generating random geo data, used in time-series tests. + +function randomLongLat() { + // Sample points uniformly on a sphere in two steps: + // 1. Sample uniformly from a unit ball (the volume within a sphere). + // 2. Project onto a sphere. + + for (;;) { + const x = 2 * Random.rand() - 1; + const y = 2 * Random.rand() - 1; + const z = 2 * Random.rand() - 1; + if (x * x + y * y + z * z > 1) { + // This point is outside the unit ball: skip it. + continue; + } + + // Taking the [long, lat], ignoring distance from the origin, has the effect of projecting + // onto a sphere. + const longRadians = Math.atan2(y, x); + const latRadians = Math.atan2(z, Math.sqrt(x * x + y * y)); + const long = longRadians * 180 / Math.PI; + const lat = latRadians * 180 / Math.PI; + return [long, lat]; + } +}
\ No newline at end of file diff --git a/jstests/core/timeseries/timeseries_geonear.js b/jstests/core/timeseries/timeseries_geonear.js index bbc5dd4b751..d396112e467 100644 --- a/jstests/core/timeseries/timeseries_geonear.js +++ b/jstests/core/timeseries/timeseries_geonear.js @@ -1,6 +1,8 @@ /** - * Test that $geoNear, $near, $nearSphere, and $text are not allowed against timeseries collections - * and such queries fail cleanly. + * Test the behavior of $geoNear queries on time-series collections. + * + * Also test that $near, $nearSphere, and $text are not allowed against timeseries collections and + * such queries fail cleanly. * * @tags: [ * does_not_support_transactions, diff --git a/jstests/core/timeseries/timeseries_geonear_edge_case_measurements.js b/jstests/core/timeseries/timeseries_geonear_edge_case_measurements.js new file mode 100644 index 00000000000..1a17ca73541 --- /dev/null +++ b/jstests/core/timeseries/timeseries_geonear_edge_case_measurements.js @@ -0,0 +1,183 @@ +/** + * Test the behavior of $geoNear minDistance/maxDistance on time-series measurements. + * + * @tags: [ + * does_not_support_transactions, + * does_not_support_stepdowns, + * requires_fcv_51, + * requires_pipeline_optimization, + * requires_timeseries, + * ] + */ + +(function() { +"use strict"; + +load("jstests/core/timeseries/libs/geo.js"); +load("jstests/core/timeseries/libs/timeseries.js"); + +if (!TimeseriesTest.timeseriesMetricIndexesEnabled(db.getMongo())) { + jsTestLog( + "Skipped test as the featureFlagTimeseriesMetricIndexes feature flag is not enabled."); + return; +} +Random.setRandomSeed(7813223789272959000); + +// Value is taken from geoconstants.h. +const earthRadiusMeters = (6378.1 * 1000); +const earthCircumferenceMeters = earthRadiusMeters * Math.PI * 2; + +// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/EPSILON +const epsilon = Math.pow(2, -52); + +function wrap(input, min, max) { + const span = max - min; + if (input > max) + input -= span; + if (input < min) + input += span; + assert.lte(input, max); + assert.gte(input, min); + return input; +} + +function clamp(input, min, max) { + return Math.max(min, Math.min(input, max)); +} + +const queryPoint = randomLongLat(); +jsTestLog("Query point: " + queryPoint); + +const clusterCenter = randomLongLat(); +jsTestLog("Cluster center: " + clusterCenter); + +// Determine how far away the center of the cluster is from the query point. +let sphereDist, flatDist; +{ + const temp = db.getCollection(jsTestName() + '_temp'); + temp.drop(); + assert.commandWorked(temp.insert({loc: clusterCenter})); + assert.commandWorked(temp.createIndex({loc: '2dsphere'})); + assert.commandWorked(temp.createIndex({loc: '2d'})); + + sphereDist = temp.aggregate({ + $geoNear: { + near: {type: "Point", coordinates: queryPoint}, + key: 'loc', + distanceField: 'dist', + } + }) + .toArray()[0] + .dist; + + flatDist = temp.aggregate({ + $geoNear: { + near: queryPoint, + key: 'loc', + distanceField: 'dist', + } + }) + .toArray()[0] + .dist; + + temp.drop(); +} +jsTestLog("Center point has a SPHERE distance of " + sphereDist); +jsTestLog("Center point has a FLAT distance of " + flatDist); + +// Create a cluster of points extremely close together. +const clusterPoints = []; +// We want the cluster of points to be close together, but not so close that their computed distance +// from the query point is the same. The smallest possible difference in distance is roughly +// distance*epsilon. We nudge each point by more than that to ensure they end up with different +// distances from the query point. +const sphereDistDegrees = sphereDist * 360 / earthCircumferenceMeters; +const deltas = [-2 * sphereDistDegrees * epsilon, 0, 2 * sphereDistDegrees * epsilon]; +for (const dx of deltas) { + for (const dy of deltas) { + const x = wrap(clusterCenter[0] + dx, -180, +180); + const y = clamp(clusterCenter[1] + dy, -90, +90); + clusterPoints.push([x, y]); + } +} +jsTestLog("Generated the following points:"); +printjson(clusterPoints); + +// Set up a normal and a time-series collection to compare results. +const coll = db.getCollection(jsTestName() + '_normal'); +const tsColl = db.getCollection(jsTestName() + '_timeseries'); +coll.drop(); +tsColl.drop(); +assert.commandWorked(coll.createIndex({loc: '2dsphere'})); +assert.commandWorked(coll.createIndex({loc: '2d'})); +assert.commandWorked(db.createCollection(tsColl.getName(), {timeseries: {timeField: 'time'}})); + +const docs = clusterPoints.map(point => ({ + time: ISODate(), + loc: point, + })); +assert.commandWorked(coll.insert(docs)); +assert.commandWorked(tsColl.insert(docs)); + +// Compare time-series vs non-time-series. +for (const minOrMax of ['maxDistance', 'minDistance']) { + const pipeline = [ + { + $geoNear: { + near: {type: "Point", coordinates: queryPoint}, + key: 'loc', + distanceField: 'dist', + [minOrMax]: sphereDist, + } + }, + {$sort: {'loc.0': 1, 'loc.1': 1}}, + { + $project: { + _id: 0, + loc: "$loc", + dist: "$dist", + } + } + ]; + const result = coll.aggregate(pipeline).toArray(); + // In most cases we expect the query to find some but not all the points. + // In rare cases (at a pole) the points could be clamped together. + jsTestLog("Spherical " + minOrMax + " query included " + result.length + " out of " + + docs.length + " points."); + + // Make sure the time-series results match. + const tsResult = tsColl.aggregate(pipeline).toArray(); + assert.sameMembers(result, tsResult); +} + +// Test the same thing for flat queries. +for (const minOrMax of ['maxDistance', 'minDistance']) { + const pipeline = [ + { + $geoNear: { + near: queryPoint, + key: 'loc', + distanceField: 'dist', + [minOrMax]: flatDist, + } + }, + {$sort: {'loc.0': 1, 'loc.1': 1}}, + { + $project: { + _id: 0, + loc: "$loc", + dist: "$dist", + } + } + ]; + const result = coll.aggregate(pipeline).toArray(); + // In most cases we expect the query to find some but not all the points. + // In rare cases (at a pole) the points could be clamped together. + jsTestLog("Flat " + minOrMax + " query included " + result.length + " out of " + docs.length + + " points."); + + // Make sure the time-series results match. + const tsResult = tsColl.aggregate(pipeline).toArray(); + assert.sameMembers(result, tsResult); +} +})(); diff --git a/jstests/core/timeseries/timeseries_geonear_measurements.js b/jstests/core/timeseries/timeseries_geonear_measurements.js new file mode 100644 index 00000000000..ddf2c8cf511 --- /dev/null +++ b/jstests/core/timeseries/timeseries_geonear_measurements.js @@ -0,0 +1,632 @@ +/** + * Test the behavior of $geoNear queries on time-series measurements. + * + * $geoNear has a lot of options, which can combine in surprising ways. For example, specifying + * the query point as GeoJSON implicitly makes it a spherical query, but with different units + * than if you pass 'spherical: true' explicitly. + * + * To ensure we get these cases right, this test runs all of its assertions on a normal collection + * first, to ensure the test itself is correct. Then it runs the same assertions on a timeseries + * collection (both with, and without, a 2dsphere index). + * + * @tags: [ + * does_not_support_transactions, + * does_not_support_stepdowns, + * requires_fcv_51, + * requires_pipeline_optimization, + * requires_timeseries, + * ] + */ + +(function() { +"use strict"; + +load("jstests/core/timeseries/libs/timeseries.js"); +load("jstests/libs/analyze_plan.js"); + +if (!TimeseriesTest.timeseriesMetricIndexesEnabled(db.getMongo())) { + jsTestLog( + "Skipped test as the featureFlagTimeseriesMetricIndexes feature flag is not enabled."); + return; +} +Random.setRandomSeed(); + +// Value is taken from geoconstants.h. +const earthRadiusMeters = (6378.1 * 1000); +const earthCircumferenceMeters = earthRadiusMeters * Math.PI * 2; +function degreesToMeters(degrees) { + return degrees * (earthCircumferenceMeters / 360); +} + +function insertTestData(coll) { + // When these points are interpreted as spherical coordinates, [long, lat], + // the units are interpreted as degrees. + const nMeasurements = 10; + const docs = []; + for (let i = 0; i < nMeasurements; i++) { + docs.push({ + time: ISODate(), + loc: [0, i], + }); + } + // Include a few extra docs with no 'loc' field. + // $geoNear queries should exclude these even when there is no minDistance/maxDistance. + docs.push({time: ISODate(), abc: 123}); + docs.push({time: ISODate(), abc: 456}); + + // Insert in a random order to ensure queries are really sorting. + Array.shuffle(docs); + assert.commandWorked(coll.insert(docs)); +} + +function runFlatExamples(coll, isTimeseries) { + let pipeline, plan; + + // Make sure results are ordered by flat (planar) distance. + // [180, 0] is closer to [0, 0] than [0, 9], in flat geometry. + pipeline = [ + { + $geoNear: { + near: [180, 0], + key: 'loc', + distanceField: "distance", + } + }, + {$project: {_id: 0, loc: 1, distance: {$floor: "$distance"}}}, + {$limit: 1}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), [ + {loc: [0, 0], distance: 180}, + ]); + + // For the rest of the examples, query from [0, 0] because it makes distances more convenient. + + // Test entire collection. + pipeline = [ + { + $geoNear: { + near: [0, 0], + key: 'loc', + distanceField: "distance", + } + }, + {$project: {_id: 0, loc: 1, distance: 1}}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), [ + {loc: [0, 0], distance: 0}, + {loc: [0, 1], distance: 1}, + {loc: [0, 2], distance: 2}, + {loc: [0, 3], distance: 3}, + {loc: [0, 4], distance: 4}, + {loc: [0, 5], distance: 5}, + {loc: [0, 6], distance: 6}, + {loc: [0, 7], distance: 7}, + {loc: [0, 8], distance: 8}, + {loc: [0, 9], distance: 9}, + ]); + plan = coll.explain().aggregate(pipeline); + // Since we don't support '2d' index on time-series metrics, and '2dsphere' indexes can't + // answer flat queries, we always expect a collscan for timeseries. + if (isTimeseries) { + assert(aggPlanHasStage(plan, 'COLLSCAN'), plan); + } else { + assert(aggPlanHasStage(plan, 'GEO_NEAR_2D'), plan); + } + + // Limit number of results with $limit. + pipeline = [ + { + $geoNear: { + near: [0, 0], + key: 'loc', + distanceField: "distance", + } + }, + {$limit: 5}, + {$project: {_id: 0, loc: 1, distance: 1}}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), [ + {loc: [0, 0], distance: 0}, + {loc: [0, 1], distance: 1}, + {loc: [0, 2], distance: 2}, + {loc: [0, 3], distance: 3}, + {loc: [0, 4], distance: 4}, + ]); + plan = coll.explain().aggregate(pipeline); + if (isTimeseries) { + assert(aggPlanHasStage(plan, 'COLLSCAN'), plan); + } else { + assert(aggPlanHasStage(plan, 'GEO_NEAR_2D'), plan); + } + + // Upper bound distance with maxDistance. + pipeline = [ + { + $geoNear: { + near: [0, 0], + key: 'loc', + distanceField: "distance", + maxDistance: 6, + } + }, + {$project: {_id: 0, loc: 1, distance: 1}}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), [ + {loc: [0, 0], distance: 0}, + {loc: [0, 1], distance: 1}, + {loc: [0, 2], distance: 2}, + {loc: [0, 3], distance: 3}, + {loc: [0, 4], distance: 4}, + {loc: [0, 5], distance: 5}, + {loc: [0, 6], distance: 6}, + ]); + plan = coll.explain().aggregate(pipeline); + if (isTimeseries) { + assert(aggPlanHasStage(plan, 'COLLSCAN'), plan); + } else { + assert(aggPlanHasStage(plan, 'GEO_NEAR_2D'), plan); + } + + // Lower bound distance with minDistance. + pipeline = [ + { + $geoNear: { + near: [0, 0], + key: 'loc', + distanceField: "distance", + minDistance: 3, + } + }, + {$project: {_id: 0, loc: 1, distance: 1}}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), [ + {loc: [0, 3], distance: 3}, + {loc: [0, 4], distance: 4}, + {loc: [0, 5], distance: 5}, + {loc: [0, 6], distance: 6}, + {loc: [0, 7], distance: 7}, + {loc: [0, 8], distance: 8}, + {loc: [0, 9], distance: 9}, + ]); + plan = coll.explain().aggregate(pipeline); + if (isTimeseries) { + assert(aggPlanHasStage(plan, 'COLLSCAN'), plan); + } else { + assert(aggPlanHasStage(plan, 'GEO_NEAR_2D'), plan); + } + + // Bound distance with both minDistance/maxDistance. + pipeline = [ + { + $geoNear: { + near: [0, 0], + key: 'loc', + distanceField: "distance", + minDistance: 3, + maxDistance: 6, + } + }, + {$project: {_id: 0, loc: 1, distance: 1}}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), [ + {loc: [0, 3], distance: 3}, + {loc: [0, 4], distance: 4}, + {loc: [0, 5], distance: 5}, + {loc: [0, 6], distance: 6}, + ]); + plan = coll.explain().aggregate(pipeline); + if (isTimeseries) { + assert(aggPlanHasStage(plan, 'COLLSCAN'), plan); + } else { + assert(aggPlanHasStage(plan, 'GEO_NEAR_2D'), plan); + } + + // Test interaction of distanceMultiplier with minDistance/maxDistance. + pipeline = [ + { + $geoNear: { + near: [0, 0], + key: 'loc', + distanceField: "distance", + distanceMultiplier: 10, + minDistance: 3, + maxDistance: 6, + } + }, + {$project: {_id: 0, loc: 1, distance: 1}}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), [ + {loc: [0, 3], distance: 30}, + {loc: [0, 4], distance: 40}, + {loc: [0, 5], distance: 50}, + {loc: [0, 6], distance: 60}, + ]); + plan = coll.explain().aggregate(pipeline); + if (isTimeseries) { + assert(aggPlanHasStage(plan, 'COLLSCAN'), plan); + } else { + assert(aggPlanHasStage(plan, 'GEO_NEAR_2D'), plan); + } + + // 'query' option is not allowed on time-series collection. + pipeline = [ + { + $geoNear: { + near: [0, 0], + key: 'loc', + distanceField: 'distance', + query: {no_such_field: 123}, + } + }, + ]; + if (isTimeseries) { + assert.throwsWithCode(() => coll.aggregate(pipeline).toArray(), [ + // Must not specify 'query' for $geoNear on a time-series collection; use $match instead + 1938439, + 5860207, + ]); + } else { + assert.docEq([], coll.aggregate(pipeline).toArray()); + } + + // 'includeLocs' is not implemented. + pipeline = [ + { + $geoNear: { + near: [0, 0], + key: 'loc', + distanceField: 'distance', + includeLocs: "abc", + } + }, + ]; + if (isTimeseries) { + // $geoNear 'includeLocs' is not supported on time-series metrics + assert.throwsWithCode(() => coll.aggregate(pipeline).toArray(), 5860208); + } else { + assert.gt(coll.aggregate(pipeline).toArray().length, 0); + } +} + +function runSphereExamples(coll, isTimeseries, has2dsphereIndex, scaleResult, query) { + let pipeline, plan; + + // Test entire collection. + pipeline = [ + { + $geoNear: Object.assign({}, query, { + key: 'loc', + distanceField: "distance", + }) + }, + {$project: {_id: 0, loc: 1, distance: {$floor: {$multiply: [scaleResult, "$distance"]}}}}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), [ + {loc: [0, 9], distance: Math.floor(degreesToMeters(180 - 9))}, + {loc: [0, 8], distance: Math.floor(degreesToMeters(180 - 8))}, + {loc: [0, 7], distance: Math.floor(degreesToMeters(180 - 7))}, + {loc: [0, 6], distance: Math.floor(degreesToMeters(180 - 6))}, + {loc: [0, 5], distance: Math.floor(degreesToMeters(180 - 5))}, + {loc: [0, 4], distance: Math.floor(degreesToMeters(180 - 4))}, + {loc: [0, 3], distance: Math.floor(degreesToMeters(180 - 3))}, + {loc: [0, 2], distance: Math.floor(degreesToMeters(180 - 2))}, + {loc: [0, 1], distance: Math.floor(degreesToMeters(180 - 1))}, + {loc: [0, 0], distance: Math.floor(degreesToMeters(180 - 0))}, + ]); + plan = coll.explain().aggregate(pipeline); + if (isTimeseries) { + // Without a maxDistance we have to unpack every bucket and sort the events. + // However, we do include a $geoWithin predicate to filter out any non-geo documents. + // This means we end up doing an index scan, which might or might not be beneficial + // depending on how many buckets it allows us to exclude. + if (has2dsphereIndex) { + assert(aggPlanHasStage(plan, 'IXSCAN'), plan); + } else { + assert(aggPlanHasStage(plan, 'COLLSCAN'), plan); + } + } else { + // We progressively scan larger and larger portions of the index. + assert(aggPlanHasStage(plan, 'GEO_NEAR_2DSPHERE'), plan); + } + + // Limit number of results with $limit. + pipeline = [ + { + $geoNear: Object.assign({}, query, { + key: 'loc', + distanceField: "distance", + }) + }, + {$limit: 5}, + {$project: {_id: 0, loc: 1, distance: {$floor: {$multiply: [scaleResult, "$distance"]}}}}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), [ + {loc: [0, 9], distance: Math.floor(degreesToMeters(180 - 9))}, + {loc: [0, 8], distance: Math.floor(degreesToMeters(180 - 8))}, + {loc: [0, 7], distance: Math.floor(degreesToMeters(180 - 7))}, + {loc: [0, 6], distance: Math.floor(degreesToMeters(180 - 6))}, + {loc: [0, 5], distance: Math.floor(degreesToMeters(180 - 5))}, + ]); + plan = coll.explain().aggregate(pipeline); + if (isTimeseries) { + // Without a maxDistance we have to unpack every bucket and sort the events. + // But, at least with a $limit stage we can do a top-k sort (although this doesn't + // seem to show up in explain()). + if (has2dsphereIndex) { + assert(aggPlanHasStage(plan, 'IXSCAN'), plan); + } else { + assert(aggPlanHasStage(plan, 'COLLSCAN'), plan); + } + } else { + assert(aggPlanHasStage(plan, 'GEO_NEAR_2DSPHERE'), plan); + } + + // Upper bound distance with maxDistance. + pipeline = [ + { + $geoNear: Object.assign({}, query, { + key: 'loc', + distanceField: "distance", + maxDistance: Math.ceil(degreesToMeters(180 - 3)) / scaleResult, + }) + }, + {$project: {_id: 0, loc: 1, distance: {$floor: {$multiply: [scaleResult, "$distance"]}}}}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), [ + {loc: [0, 9], distance: Math.floor(degreesToMeters(180 - 9))}, + {loc: [0, 8], distance: Math.floor(degreesToMeters(180 - 8))}, + {loc: [0, 7], distance: Math.floor(degreesToMeters(180 - 7))}, + {loc: [0, 6], distance: Math.floor(degreesToMeters(180 - 6))}, + {loc: [0, 5], distance: Math.floor(degreesToMeters(180 - 5))}, + {loc: [0, 4], distance: Math.floor(degreesToMeters(180 - 4))}, + {loc: [0, 3], distance: Math.floor(degreesToMeters(180 - 3))}, + ]); + plan = coll.explain().aggregate(pipeline); + if (isTimeseries) { + // With maxDistance we can generate a $geoWithin predicate, which can use an index when + // available. + if (has2dsphereIndex) { + assert(aggPlanHasStage(plan, 'IXSCAN'), plan); + } else { + assert(aggPlanHasStage(plan, 'COLLSCAN'), plan); + } + } else { + assert(aggPlanHasStage(plan, 'GEO_NEAR_2DSPHERE'), plan); + } + + // Lower bound distance with minDistance. + pipeline = [ + { + $geoNear: Object.assign({}, query, { + key: 'loc', + distanceField: "distance", + minDistance: Math.floor(degreesToMeters(180 - 7)) / scaleResult, + }) + }, + {$project: {_id: 0, loc: 1, distance: {$floor: {$multiply: [scaleResult, "$distance"]}}}}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), [ + {loc: [0, 7], distance: Math.floor(degreesToMeters(180 - 7))}, + {loc: [0, 6], distance: Math.floor(degreesToMeters(180 - 6))}, + {loc: [0, 5], distance: Math.floor(degreesToMeters(180 - 5))}, + {loc: [0, 4], distance: Math.floor(degreesToMeters(180 - 4))}, + {loc: [0, 3], distance: Math.floor(degreesToMeters(180 - 3))}, + {loc: [0, 2], distance: Math.floor(degreesToMeters(180 - 2))}, + {loc: [0, 1], distance: Math.floor(degreesToMeters(180 - 1))}, + {loc: [0, 0], distance: Math.floor(degreesToMeters(180 - 0))}, + ]); + plan = coll.explain().aggregate(pipeline); + if (isTimeseries) { + if (has2dsphereIndex) { + assert(aggPlanHasStage(plan, 'IXSCAN'), plan); + } else { + assert(aggPlanHasStage(plan, 'COLLSCAN'), plan); + } + } else { + assert(aggPlanHasStage(plan, 'GEO_NEAR_2DSPHERE'), plan); + } + + // Bound distance with both minDistance/maxDistance. + pipeline = [ + { + $geoNear: Object.assign({}, query, { + key: 'loc', + distanceField: "distance", + minDistance: Math.floor(degreesToMeters(180 - 7)) / scaleResult, + maxDistance: Math.ceil(degreesToMeters(180 - 3)) / scaleResult, + }) + }, + {$project: {_id: 0, loc: 1, distance: {$floor: {$multiply: [scaleResult, "$distance"]}}}}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), [ + {loc: [0, 7], distance: Math.floor(degreesToMeters(180 - 7))}, + {loc: [0, 6], distance: Math.floor(degreesToMeters(180 - 6))}, + {loc: [0, 5], distance: Math.floor(degreesToMeters(180 - 5))}, + {loc: [0, 4], distance: Math.floor(degreesToMeters(180 - 4))}, + {loc: [0, 3], distance: Math.floor(degreesToMeters(180 - 3))}, + ]); + plan = coll.explain().aggregate(pipeline); + if (isTimeseries) { + if (has2dsphereIndex) { + assert(aggPlanHasStage(plan, 'IXSCAN'), plan); + } else { + assert(aggPlanHasStage(plan, 'COLLSCAN'), plan); + } + } else { + assert(aggPlanHasStage(plan, 'GEO_NEAR_2DSPHERE'), plan); + } + + // Test interaction of distanceMultiplier with minDistance/maxDistance. + pipeline = [ + { + $geoNear: Object.assign({}, query, { + key: 'loc', + distanceField: "distance", + distanceMultiplier: 10, + minDistance: Math.floor(degreesToMeters(180 - 7)) / scaleResult, + maxDistance: Math.ceil(degreesToMeters(180 - 3)) / scaleResult, + }) + }, + {$project: {_id: 0, loc: 1, distance: {$floor: {$multiply: [scaleResult, "$distance"]}}}}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), [ + {loc: [0, 7], distance: Math.floor(10 * degreesToMeters(180 - 7))}, + {loc: [0, 6], distance: Math.floor(10 * degreesToMeters(180 - 6))}, + {loc: [0, 5], distance: Math.floor(10 * degreesToMeters(180 - 5))}, + {loc: [0, 4], distance: Math.floor(10 * degreesToMeters(180 - 4))}, + {loc: [0, 3], distance: Math.floor(10 * degreesToMeters(180 - 3))}, + ]); + plan = coll.explain().aggregate(pipeline); + if (isTimeseries) { + if (has2dsphereIndex) { + assert(aggPlanHasStage(plan, 'IXSCAN'), plan); + } else { + assert(aggPlanHasStage(plan, 'COLLSCAN'), plan); + } + } else { + assert(aggPlanHasStage(plan, 'GEO_NEAR_2DSPHERE'), plan); + } + + // 'query' option is not allowed on time-series collections. + pipeline = [ + { + $geoNear: Object.assign({}, query, { + key: 'loc', + distanceField: 'distance', + query: {no_such_field: 123}, + }) + }, + ]; + if (isTimeseries) { + assert.throwsWithCode(() => coll.aggregate(pipeline).toArray(), [ + // Must not specify 'query' for $geoNear on a time-series collection; use $match instead + 1938439, + 5860207, + ]); + } else { + assert.docEq([], coll.aggregate(pipeline).toArray()); + } + // Instead use $match. + pipeline = [ + { + $geoNear: Object.assign({}, query, { + key: 'loc', + distanceField: 'distance', + }) + }, + {$match: {no_such_field: 456}}, + ]; + assert.docEq(coll.aggregate(pipeline).toArray(), []); + plan = coll.explain().aggregate(pipeline); + if (isTimeseries) { + if (has2dsphereIndex) { + assert(aggPlanHasStage(plan, 'IXSCAN'), plan); + } else { + assert(aggPlanHasStage(plan, 'COLLSCAN'), plan); + } + // The additional $match predicate should be pushed down and combined with the geo + // predicate. That is, the initial $cursor stage should include both. + const cursorStage = getAggPlanStage(plan, '$cursor'); + const parsedQuery = cursorStage.$cursor.queryPlanner.parsedQuery; + const parsedQueryString = tojson(parsedQuery); + const planString = tojson(plan); + assert.includes(parsedQueryString, 'no_such_field', planString); + assert.includes(parsedQueryString, '_internalBucketGeoWithin', planString); + } else { + assert(aggPlanHasStage(plan, 'GEO_NEAR_2DSPHERE'), plan); + } + + // 'includeLocs' is not implemented. + pipeline = [ + { + $geoNear: Object.assign({}, query, { + key: 'loc', + distanceField: 'distance', + includeLocs: "abc", + }) + }, + ]; + if (isTimeseries) { + // $geoNear 'includeLocs' is not supported on time-series metrics + assert.throwsWithCode(() => coll.aggregate(pipeline).toArray(), 5860208); + } else { + assert.gt(coll.aggregate(pipeline).toArray().length, 0); + } +} + +function runExamples(coll, isTimeseries, has2dsphereIndex) { + runFlatExamples(coll, isTimeseries, has2dsphereIndex); + + // There are three different ways to specify a spherical query. + + // GeoJSON, implicitly uses spherical geometry. + runSphereExamples( + coll, isTimeseries, has2dsphereIndex, 1, {near: {type: "Point", coordinates: [180, 0]}}); + + // GeoJSON, with explicit spherical: true. + runSphereExamples(coll, isTimeseries, has2dsphereIndex, 1, { + near: {type: "Point", coordinates: [180, 0]}, + spherical: true, + }); + + // [x, y] point, but with explicit spherical: true. + // In this case, the resulting distances are expressed in radians by default, so we scale up + // the query results by 'earthRadiusMeters' before comparing with the expectedresult. + // We also scale down the maxDistance/minDistance bounds. + runSphereExamples(coll, isTimeseries, has2dsphereIndex, earthRadiusMeters, { + near: [180, 0], + spherical: true, + }); +} + +// Test $geoNear query results in several contexts: +// 1. on a normal collection, with a 2dsphere index +// 2. on a timeseries collection, with no index +// 3. on a timeseries collection, with a 2dsphere index on measurements + +// 1. Test a normal collection with a 2dsphere index. +// This is our baseline that ensures runExamples() is making correct assertions. +{ + const coll = db.getCollection("timeseries_geonear_measurements_baseline"); + coll.drop(); + assert.commandWorked(coll.createIndex({loc: '2dsphere'})); + + // Actually, we also need a '2d' index for the flat examples to succeed. + assert.commandWorked(coll.createIndex({loc: '2d'})); + + insertTestData(coll); + runExamples(coll, false /* isTimeseries */, true /* has2dsphereIndex */); +} + +// 2. Test a timeseries collection, with no index. +// This ensures that the rewrite of $geoNear to $geoWithin + $sort is correct. +// But it's a naive / unrealistic query plan. +{ + const coll = db.getCollection("timeseries_geonear_measurements_noindex"); + coll.drop(); + assert.commandWorked(db.createCollection(coll.getName(), {timeseries: {timeField: 'time'}})); + + insertTestData(coll); + runExamples(coll, true /* isTimeseries */, false /* has2dsphereIndex */); +} + +// 3. Test a timeseries collection, with a 2dsphere index on measurements. +// This should work if $geoWithin is indexed correctly. +{ + const coll = db.getCollection("timeseries_geonear_measurements_indexed"); + coll.drop(); + assert.commandWorked(db.createCollection(coll.getName(), {timeseries: {timeField: 'time'}})); + assert.commandWorked(coll.createIndex({loc: '2dsphere'})); + + // Make sure the 2dsphere index exists. (If the collection is implicitly sharded then we will + // also see an implicitly created index.) + const buckets = db.getCollection('system.buckets.' + coll.getName()); + assert.sameMembers(buckets.getIndexKeys(), + FixtureHelpers.isSharded(buckets) + ? [{'data.loc': '2dsphere_bucket'}, {'control.min.time': 1}] + : [{'data.loc': '2dsphere_bucket'}]); + + insertTestData(coll); + runExamples(coll, true /* isTimeseries */, true /* has2dsphereIndex */); +} +})(); diff --git a/jstests/core/timeseries/timeseries_geonear_random_measurements.js b/jstests/core/timeseries/timeseries_geonear_random_measurements.js new file mode 100644 index 00000000000..27d0bf297ba --- /dev/null +++ b/jstests/core/timeseries/timeseries_geonear_random_measurements.js @@ -0,0 +1,121 @@ +/** + * Test the behavior of $geoNear queries on randomly chosen time-series measurements. + * + * @tags: [ + * does_not_support_transactions, + * does_not_support_stepdowns, + * requires_fcv_51, + * requires_pipeline_optimization, + * requires_timeseries, + * ] + */ + +(function() { +"use strict"; + +load("jstests/core/timeseries/libs/geo.js"); +load("jstests/core/timeseries/libs/timeseries.js"); + +if (!TimeseriesTest.timeseriesMetricIndexesEnabled(db.getMongo())) { + jsTestLog( + "Skipped test as the featureFlagTimeseriesMetricIndexes feature flag is not enabled."); + return; +} +Random.setRandomSeed(); + +// Value is taken from geoconstants.h. +const earthRadiusMeters = (6378.1 * 1000); +const earthCircumferenceMeters = earthRadiusMeters * Math.PI * 2; + +const numDocs = 100; +let docs = []; +while (docs.length < numDocs) { + const [long, lat] = randomLongLat(); + + // Each document will be used as the center of a query, so also include a randomly chosen + // minDistance and maxDistance. + const lenMeters = Random.rand() * earthCircumferenceMeters; + const minDistanceMeters = Random.rand() * (earthCircumferenceMeters - lenMeters); + const maxDistanceMeters = minDistanceMeters + lenMeters; + + // When interpreted as flat 2d coordinates, the points all lie in a rectangle of width 2pi + // and height pi. Choose a random min/max to use in flat queries. + const minDistanceFlat = Random.rand() * (Math.PI / 2); + const maxDistanceFlat = Random.rand() * (Math.PI / 2) + minDistanceFlat; + + docs.push({ + _id: docs.length, + time: ISODate(), + loc: [long, lat], + minDistanceMeters, + maxDistanceMeters, + minDistanceFlat, + maxDistanceFlat, + }); +} +jsTestLog("Generated the following documents:"); +printjson(docs); + +const coll = db.getCollection(jsTestName() + '_normal'); +const tsColl = db.getCollection(jsTestName() + '_timeseries'); +coll.drop(); +tsColl.drop(); +assert.commandWorked(coll.createIndex({loc: '2dsphere'})); +assert.commandWorked(coll.createIndex({loc: '2d'})); +assert.commandWorked(db.createCollection(tsColl.getName(), {timeseries: {timeField: 'time'}})); +assert.commandWorked(coll.insert(docs)); +assert.commandWorked(tsColl.insert(docs)); + +function assertSortedAscending(numbers) { + for (let i = 1; i < numbers.length; ++i) { + assert.lte(numbers[i - 1], numbers[i], 'Found two descending elements at position ' + i); + } +} + +// Run a query centered on each point we inserted. +// - The result-set should be the same for both collections. +// - Each result should be sorted by 'dist'. +// - The two result-sets may disagree on the order of ties, so we don't compare the order directly. +for (const doc of docs) { + print('Testing sphere query centered at ' + tojson(doc)); + const {minDistanceMeters, maxDistanceMeters, loc: [long, lat]} = doc; + const pipeline = [{ + $geoNear: { + near: {type: "Point", coordinates: [long, lat]}, + key: 'loc', + distanceField: 'dist', + spherical: true, + minDistance: minDistanceMeters, + maxDistance: maxDistanceMeters, + } + }]; + const result = coll.aggregate(pipeline).toArray(); + const tsResult = tsColl.aggregate(pipeline).toArray(); + assert.sameMembers(result, tsResult); + assertSortedAscending(result.map(d => d.dist)); + assertSortedAscending(tsResult.map(d => d.dist)); + print('Got ' + result.length + ' results'); +} + +// Do the same thing, but interpreting the points as lying in a plane. +for (const doc of docs) { + print('Testing flat query centered at ' + tojson(doc)); + const {minDistanceFlat, maxDistanceFlat, loc: [long, lat]} = doc; + const pipeline = [{ + $geoNear: { + near: [long, lat], + key: 'loc', + distanceField: 'dist', + spherical: false, + minDistance: minDistanceFlat, + maxDistance: maxDistanceFlat, + } + }]; + const result = coll.aggregate(pipeline).toArray(); + const tsResult = tsColl.aggregate(pipeline).toArray(); + assert.sameMembers(result, tsResult); + assertSortedAscending(result.map(d => d.dist)); + assertSortedAscending(tsResult.map(d => d.dist)); + print('Got ' + result.length + ' results'); +} +})(); diff --git a/jstests/core/timeseries/timeseries_metric_index_2dsphere.js b/jstests/core/timeseries/timeseries_metric_index_2dsphere.js index 74749eddce0..2e53de7fb24 100644 --- a/jstests/core/timeseries/timeseries_metric_index_2dsphere.js +++ b/jstests/core/timeseries/timeseries_metric_index_2dsphere.js @@ -135,18 +135,19 @@ TimeseriesTest.run((insert) => { timeseriescoll.find({location: {$geoWithin: {$center: [[40, -70], .15]}}}).toArray().length, geoWithinPlan2d); - /* TODO (SERVER-58602): Enable this test once query planner can use 'GEO_2DSPHERE_BUCKET' index - with $geoNear by translating to a $geoWithin + $sort assert.eq(2, bucketscoll .aggregate([{ + assert.eq(4, + timeseriescoll + .aggregate([{ $geoNear: { near: {type: "Point", coordinates: [40.4, -70.4]}, distanceField: "dist", spherical: true, - key: 'data.location' + key: 'location' } }]) .toArray() .length, - "Failed to use 2dsphere index: " + tojson(twoDSphereBucketsIndexSpec));*/ + "Failed to use 2dsphere index: " + tojson(twoDSphereTimeseriesIndexSpec)); assert.commandWorked(timeseriescoll.dropIndex(twoDSphereTimeseriesIndexSpec)); }); diff --git a/jstests/core/timeseries/timeseries_text_geonear_disallowed.js b/jstests/core/timeseries/timeseries_text_geonear_disallowed.js index 89e96b52743..0b0fd3f708b 100644 --- a/jstests/core/timeseries/timeseries_text_geonear_disallowed.js +++ b/jstests/core/timeseries/timeseries_text_geonear_disallowed.js @@ -34,11 +34,11 @@ for (let i = 0; i < nMeasurements; i++) { assert.commandWorked(tsColl.insert(docToInsert)); } -// Test that $geoNear fails cleanly because it cannot be issued against a time-series collection. +// Test that unimplemented match exprs on time-series collections fail cleanly. +// $geoNear (the match expression; not to be confused with the aggregation stage) assert.commandFailedWithCode( assert.throws(() => tsColl.find({"tags.distance": {$geoNear: [0, 0]}}).itcount()), 5626500); -// Test that unimplemented match exprs on time-series collections fail cleanly. // $near assert.commandFailedWithCode( assert.throws(() => tsColl.find({"tags.distance": {$near: [0, 0]}}).itcount()), 5626500); diff --git a/src/mongo/db/pipeline/document_source_geo_near.cpp b/src/mongo/db/pipeline/document_source_geo_near.cpp index 5dd06cd5539..707bd096628 100644 --- a/src/mongo/db/pipeline/document_source_geo_near.cpp +++ b/src/mongo/db/pipeline/document_source_geo_near.cpp @@ -31,12 +31,19 @@ #include "mongo/platform/basic.h" +#include "mongo/db/geo/geoconstants.h" +#include "mongo/db/pipeline/document_source_add_fields.h" #include "mongo/db/pipeline/document_source_geo_near.h" +#include "mongo/db/pipeline/document_source_internal_compute_geo_near_distance.h" +#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h" +#include "mongo/db/pipeline/document_source_match.h" #include "mongo/db/exec/document_value/document.h" +#include "mongo/db/matcher/expression_geo.h" #include "mongo/db/pipeline/document_source_sort.h" #include "mongo/db/pipeline/expression.h" #include "mongo/db/pipeline/lite_parsed_document_source.h" +#include "mongo/db/pipeline/pipeline.h" #include "mongo/logv2/log.h" namespace mongo { @@ -93,6 +100,209 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceGeoNear::optimize() { return this; } +Pipeline::SourceContainer::iterator DocumentSourceGeoNear::doOptimizeAt( + Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { + + // Currently this is the only rewrite. + itr = splitForTimeseries(itr, container); + + return itr; +} + +Pipeline::SourceContainer::iterator DocumentSourceGeoNear::splitForTimeseries( + Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { + invariant(*itr == this); + + // Only do this rewrite if we are immediately following an $_internalUnpackBucket stage. + if (container->begin() == itr || + !dynamic_cast<DocumentSourceInternalUnpackBucket*>(std::prev(itr)->get())) + return std::next(itr); + + // If _nearGeometry is not a constant, do nothing. + // It might be constant in a future call to optimizeAt(), once any variables have been filled + // in. + _nearGeometry = _nearGeometry->optimize(); + auto nearConst = dynamic_cast<ExpressionConstant*>(_nearGeometry.get()); + if (!nearConst) + return std::next(itr); + + // If the user didn't specify a field name to query, do nothing. + // Normally when we use a DocumentSourceGeoNearCursor we infer this from the presence of an + // index, but when we use an explicit $sort there might not be an index involved. + if (!keyFieldPath) + return std::next(itr); + + tassert(5860206, "$geoNear distanceField unexpectedly null", distanceField); + + // In this case, we know: + // - there are stages before us + // - the query point is a known constant + // - the field name + + // It's fine for this error message to say "on a time-series collection" because we only get + // here when an $_internalUnpackBucket stage precedes us. + uassert(5860207, + "Must not specify 'query' for $geoNear on a time-series collection; use $match instead", + query.isEmpty()); + uassert( + 5860208, "$geoNear 'includeLocs' is not supported on time-series metrics", !includeLocs); + + // Replace the stage with $geoWithin, $computeGeoDistance, $sort. + + // Use GeoNearExpression to parse the arguments. This makes it easier to handle a variety of + // cases: for example, if the query point is GeoJSON, then 'spherical' is implicitly true. + GeoNearExpression nearExpr; + // asNearQuery() is something like '{fieldName: {$near: ...}}'. + // GeoNearExpression seems to want something like '{$near: ...}'. + auto nearQuery = asNearQuery(keyFieldPath->fullPath()).firstElement().Obj().getOwned(); + tassert(nearExpr.parseFrom(nearQuery)); + tassert(5860204, + "Unexpected GeoNearExpression field name after asNearQuery(): "_sd + nearExpr.field, + nearExpr.field == ""_sd); + + Pipeline::SourceContainer replacement; + // 1. $geoWithin maxDistance + // We always include a $geoWithin predicate, even if maxDistance covers the entire space, + // because it takes care of excluding documents that don't have the geo field we're querying. + if (nearExpr.centroid->crs == SPHERE) { + // {$match: {field: {$geoWithin: {$centerSphere: [[x, y], radiusRadians]}}}} + double x = nearExpr.centroid->oldPoint.x; + double y = nearExpr.centroid->oldPoint.y; + auto radiusRadians = [&](double radius) -> double { + if (nearExpr.unitsAreRadians) { + // In this mode, $geoNear interprets the given maxDistance as radians. + return radius; + } else { + // Otherwise it interprets maxDistance as meters. + auto maxDistanceMeters = radius; + return maxDistanceMeters / kRadiusOfEarthInMeters; + } + }; + replacement.push_back(DocumentSourceMatch::create( + BSON(keyFieldPath->fullPath() + << BSON("$geoWithin" + << BSON("$centerSphere" << BSON_ARRAY( + BSON_ARRAY(x << y) << radiusRadians(nearExpr.maxDistance))))), + pExpCtx)); + + if (minDistance) { + // Also include an inside-out $geoWithin. This query is imprecise due to rounding error, + // so we will include an additional, precise filter later in the pipeline. + double antipodalX = x < 0 ? x + 180 : x - 180; + double antipodalY = -y; + double insideOutRadiusRadians = M_PI - radiusRadians(nearExpr.minDistance); + if (insideOutRadiusRadians > 0) { + replacement.push_back(DocumentSourceMatch::create( + BSON(keyFieldPath->fullPath() + << BSON("$geoWithin" << BSON("$centerSphere" << BSON_ARRAY( + BSON_ARRAY(antipodalX << antipodalY) + << insideOutRadiusRadians)))), + pExpCtx)); + } + } + } else if (nearExpr.centroid->crs == FLAT) { + // {$match: {field: {$geoWithin: {$center: [[x, y], radius]}}}} + tassert(5860205, + "'isNearSphere' should have resulted in a SPHERE crs.", + !nearExpr.isNearSphere); + auto x = nearExpr.centroid->oldPoint.x; + auto y = nearExpr.centroid->oldPoint.y; + replacement.push_back(DocumentSourceMatch::create( + BSON(keyFieldPath->fullPath() + << BSON("$geoWithin" << BSON( + "$center" << BSON_ARRAY(BSON_ARRAY(x << y) << nearExpr.maxDistance)))), + pExpCtx)); + + if (std::isnormal(nearExpr.minDistance)) { + // $geoWithin includes its boundary, so a negated $geoWithin excludes the boundary. + // So we need to tweak the radius here to include those points on the boundary. + // This means this filter is approximate, so we'll include an additional filter for + // minDistance after unpacking. + + // Making the radius 1% smaller seems like a big enough tweak that we won't miss any + // boundary points, and a small enough tweak to still be selective. It also preserves + // the sign of minDistance (whereas subtracting an epsilon wouldn't, necessarily). + // Only do this when isnormal(minDistance), to ensure we have enough bits of precision. + auto radius = 0.99 * nearExpr.minDistance; + replacement.push_back(DocumentSourceMatch::create( + BSON(keyFieldPath->fullPath() << BSON( + "$not" << BSON("$geoWithin" << BSON("$center" << BSON_ARRAY( + BSON_ARRAY(x << y) << radius))))), + pExpCtx)); + } + } else { + tasserted(5860203, "Expected coordinate system to be either SPHERE or FLAT."); + } + + // 2. Compute geo distance. + { + auto multiplier = (distanceMultiplier ? *distanceMultiplier : 1.0); + if (nearExpr.unitsAreRadians) { + // In this mode, $geoNear would report distances in radians instead of meters. + // To imitate this behavior, we need to scale down here too. + multiplier /= kRadiusOfEarthInMeters; + } + + auto coords = nearExpr.centroid->crs == SPHERE + ? BSON("near" << BSON("type" + << "Point" + << "coordinates" + << BSON_ARRAY(nearExpr.centroid->oldPoint.x + << nearExpr.centroid->oldPoint.y))) + : BSON("near" << BSON_ARRAY(nearExpr.centroid->oldPoint.x + << nearExpr.centroid->oldPoint.y)); + tassert(5860220, "", coords.firstElement().isABSONObj()); + + auto centroid = std::make_unique<PointWithCRS>(); + tassert(GeoParser::parseQueryPoint(coords.firstElement(), centroid.get()) + .withContext("parsing centroid for $geoNear time-series rewrite")); + + replacement.push_back(make_intrusive<DocumentSourceInternalGeoNearDistance>( + pExpCtx, + keyFieldPath->fullPath(), + std::move(centroid), + coords.firstElement().Obj().getOwned(), + distanceField->fullPath(), + multiplier)); + } + + // 3. Filter precisely by minDistance / maxDistance. + if (minDistance) { + // 'minDistance' does not take 'distanceMultiplier' into account. + replacement.push_back(DocumentSourceMatch::create( + BSON(distanceField->fullPath() << BSON( + "$gte" << *minDistance * (distanceMultiplier ? *distanceMultiplier : 1.0))), + pExpCtx)); + } + if (maxDistance) { + // 'maxDistance' does not take 'distanceMultiplier' into account. + replacement.push_back(DocumentSourceMatch::create( + BSON(distanceField->fullPath() << BSON( + "$lte" << *maxDistance * (distanceMultiplier ? *distanceMultiplier : 1.0))), + pExpCtx)); + } + + // 4. $sort by geo distance. + { + // {$sort: {dist: 1}} + replacement.push_back(DocumentSourceSort::create(pExpCtx, + SortPattern({ + {true, *distanceField, nullptr}, + }))); + } + + LOGV2_DEBUG(5860209, + 5, + "$geoNear splitForTimeseries", + "pipeline"_attr = Pipeline::serializeContainer(*container), + "replacement"_attr = Pipeline::serializeContainer(replacement)); + + auto prev = std::prev(itr); + std::move(replacement.begin(), replacement.end(), std::inserter(*container, itr)); + container->erase(itr); + return std::next(prev); +} + intrusive_ptr<DocumentSourceGeoNear> DocumentSourceGeoNear::create( const intrusive_ptr<ExpressionContext>& pCtx) { intrusive_ptr<DocumentSourceGeoNear> source(new DocumentSourceGeoNear(pCtx)); diff --git a/src/mongo/db/pipeline/document_source_geo_near.h b/src/mongo/db/pipeline/document_source_geo_near.h index 40ef29a3861..83ddd02bb88 100644 --- a/src/mongo/db/pipeline/document_source_geo_near.h +++ b/src/mongo/db/pipeline/document_source_geo_near.h @@ -145,10 +145,24 @@ public: */ boost::optional<DistributedPlanLogic> distributedPlanLogic() final; +protected: + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; + private: explicit DocumentSourceGeoNear(const boost::intrusive_ptr<ExpressionContext>& pExpCtx); /** + * If this stage immediately follows an $_internalUnpackBucket, split it up into several stages + * including an explicit $sort. + * + * Does nothing if not immediately following an $_internalUnpackBucket. + */ + Pipeline::SourceContainer::iterator splitForTimeseries(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container); + + + /** * Parses the fields in the object 'options', throwing if an error occurs. */ void parseOptions(BSONObj options, const boost::intrusive_ptr<ExpressionContext>& pCtx); diff --git a/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.cpp b/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.cpp index 7ef8f05b318..f643bcb32f3 100644 --- a/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.cpp +++ b/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.cpp @@ -45,17 +45,34 @@ REGISTER_DOCUMENT_SOURCE(_internalComputeGeoNearDistance, boost::intrusive_ptr<DocumentSource> DocumentSourceInternalGeoNearDistance::createFromBson( BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx) { auto obj = elem.embeddedObjectUserCheck(); - tassert(5874500, + uassert(5874500, str::stream() << DocumentSourceInternalGeoNearDistance::kKeyFieldName << " field is required and must be a string", obj.hasField(DocumentSourceInternalGeoNearDistance::kKeyFieldName) && obj[DocumentSourceInternalGeoNearDistance::kKeyFieldName].type() == BSONType::String); - tassert(5874501, + uassert(5874501, str::stream() << DocumentSourceInternalGeoNearDistance::kNearFieldName << " field is required and must be an object or array", obj.hasField(DocumentSourceInternalGeoNearDistance::kNearFieldName) && obj[DocumentSourceInternalGeoNearDistance::kNearFieldName].isABSONObj()); + uassert(5874502, + str::stream() << DocumentSourceInternalGeoNearDistance::kDistanceFieldFieldName + << " field is required and must be a string", + obj.hasField(DocumentSourceInternalGeoNearDistance::kDistanceFieldFieldName) && + obj[DocumentSourceInternalGeoNearDistance::kDistanceFieldFieldName].type() == + BSONType::String); + uassert( + 5874503, + str::stream() << DocumentSourceInternalGeoNearDistance::kDistanceMultiplierFieldName + << " field is required and must be a number", + obj.hasField(DocumentSourceInternalGeoNearDistance::kDistanceMultiplierFieldName) && + obj[DocumentSourceInternalGeoNearDistance::kDistanceMultiplierFieldName].isNumber()); + int expectedNumArgs = 4; + uassert(5874510, + str::stream() << kStageName << " expected " << expectedNumArgs << " arguments but got " + << obj.nFields(), + obj.nFields() == expectedNumArgs); auto nearElm = obj[DocumentSourceInternalGeoNearDistance::kNearFieldName]; auto centroid = std::make_unique<PointWithCRS>(); @@ -66,20 +83,27 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalGeoNearDistance::crea pExpCtx, obj[DocumentSourceInternalGeoNearDistance::kKeyFieldName].String(), std::move(centroid), - nearElm.embeddedObject().getOwned()); + nearElm.embeddedObject().getOwned(), + obj[DocumentSourceInternalGeoNearDistance::kDistanceFieldFieldName].String(), + obj[DocumentSourceInternalGeoNearDistance::kDistanceMultiplierFieldName] + .numberDouble()); return out; } DocumentSourceInternalGeoNearDistance::DocumentSourceInternalGeoNearDistance( const boost::intrusive_ptr<ExpressionContext>& pExpCtx, - const std::string& key, + std::string key, std::unique_ptr<PointWithCRS> centroid, - const BSONObj& coords) + const BSONObj& coords, + std::string distanceField, + double distanceMultiplier) : DocumentSource(kStageName, pExpCtx), - _key(key), + _key(std::move(key)), _centroid(std::move(centroid)), - _coords(coords) {} + _coords(coords), + _distanceField(std::move(distanceField)), + _distanceMultiplier(distanceMultiplier) {} DocumentSource::GetNextResult DocumentSourceInternalGeoNearDistance::doGetNext() { auto next = pSource->getNext(); @@ -104,9 +128,10 @@ DocumentSource::GetNextResult DocumentSourceInternalGeoNearDistance::doGetNext() minDistance = nextDistance; } } + minDistance *= _distanceMultiplier; MutableDocument doc(next.releaseDocument()); - doc.metadata().setGeoNearDistance(minDistance); + doc.setNestedField(_distanceField, Value{minDistance}); return doc.freeze(); } @@ -119,6 +144,10 @@ Value DocumentSourceInternalGeoNearDistance::serialize( MutableDocument out; out.setField(DocumentSourceInternalGeoNearDistance::kNearFieldName, Value(_coords)); out.setField(DocumentSourceInternalGeoNearDistance::kKeyFieldName, Value(_key)); + out.setField(DocumentSourceInternalGeoNearDistance::kDistanceFieldFieldName, + Value(_distanceField.fullPath())); + out.setField(DocumentSourceInternalGeoNearDistance::kDistanceMultiplierFieldName, + Value(_distanceMultiplier)); return Value(DOC(getSourceName() << out.freeze())); } diff --git a/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.h b/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.h index fe0eafdc7ee..1083d58bb7f 100644 --- a/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.h +++ b/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance.h @@ -43,28 +43,42 @@ public: static constexpr StringData kStageName = "$_internalComputeGeoNearDistance"_sd; static constexpr StringData kNearFieldName = "near"_sd; static constexpr StringData kKeyFieldName = "key"_sd; + static constexpr StringData kDistanceFieldFieldName = "distanceField"_sd; + static constexpr StringData kDistanceMultiplierFieldName = "distanceMultiplier"_sd; static boost::intrusive_ptr<DocumentSource> createFromBson( BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); DocumentSourceInternalGeoNearDistance(const boost::intrusive_ptr<ExpressionContext>& pExpCtx, - const std::string& key, + std::string key, std::unique_ptr<PointWithCRS> centroid, - const BSONObj& coords); + const BSONObj& coords, + std::string distanceField, + double distanceMultiplier); const char* getSourceName() const override { return kStageName.rawData(); } StageConstraints constraints(Pipeline::SplitState pipeState) const override { - return StageConstraints(StreamType::kStreaming, - PositionRequirement::kNone, - HostTypeRequirement::kNone, - DiskUseRequirement::kNoDiskUse, - FacetRequirement::kAllowed, - TransactionRequirement::kAllowed, - LookupRequirement::kAllowed, - UnionRequirement::kAllowed); + StageConstraints result = { + StreamType::kStreaming, + PositionRequirement::kNone, + HostTypeRequirement::kNone, + DiskUseRequirement::kNoDiskUse, + FacetRequirement::kAllowed, + TransactionRequirement::kAllowed, + LookupRequirement::kAllowed, + UnionRequirement::kAllowed, + }; + result.canSwapWithMatch = true; + return result; + } + + DocumentSource::GetModPathsReturn getModifiedPaths() const final { + return {GetModPathsReturn::Type::kFiniteSet, + std::set<std::string>{_distanceField.fullPath()}, + {}}; } boost::optional<DistributedPlanLogic> distributedPlanLogic() override { @@ -80,6 +94,8 @@ private: std::string _key; std::unique_ptr<PointWithCRS> _centroid; BSONObj _coords; // "near" option + FieldPath _distanceField; + double _distanceMultiplier; }; } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance_test.cpp b/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance_test.cpp index 10bc7f7023d..687fe4f9e7a 100644 --- a/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance_test.cpp +++ b/src/mongo/db/pipeline/document_source_internal_compute_geo_near_distance_test.cpp @@ -49,7 +49,9 @@ TEST_F(DocumentSourceInternalGeoNearDistanceTest, DistanceBetweenOverlappingPoin type: "Point", coordinates: [1, 1] }, - key: "loc" + key: "loc", + distanceMultiplier: 1, + distanceField: "dist" }})"); auto geoDist = DocumentSourceInternalGeoNearDistance::createFromBson( computeGeoSpec.firstElement(), getExpCtx()); @@ -64,7 +66,8 @@ TEST_F(DocumentSourceInternalGeoNearDistanceTest, DistanceBetweenOverlappingPoin auto next = geoDist->getNext(); ASSERT_TRUE(next.isAdvanced()); auto doc = next.getDocument(); - ASSERT_EQUALS(doc.metadata().getGeoNearDistance(), 0); + ASSERT_EQUALS(doc["dist"].getType(), BSONType::NumberDouble); + ASSERT_EQUALS(doc["dist"].coerceToDouble(), 0); } TEST_F(DocumentSourceInternalGeoNearDistanceTest, SphericalDistanceBetweenTwoPoints) { @@ -74,7 +77,9 @@ TEST_F(DocumentSourceInternalGeoNearDistanceTest, SphericalDistanceBetweenTwoPoi type: "Point", coordinates: [0, 1] }, - key: "loc" + key: "loc", + distanceMultiplier: 1, + distanceField: "dist" }})"); auto geoDist = DocumentSourceInternalGeoNearDistance::createFromBson( computeGeoSpec.firstElement(), getExpCtx()); @@ -90,14 +95,17 @@ TEST_F(DocumentSourceInternalGeoNearDistanceTest, SphericalDistanceBetweenTwoPoi ASSERT_TRUE(next.isAdvanced()); auto doc = next.getDocument(); const int meterToLatDegree = 111319; // Each degree of latitude is approximately 111km. - ASSERT_APPROX_EQUAL(doc.metadata().getGeoNearDistance(), meterToLatDegree, 300); + ASSERT_EQUALS(doc["dist"].getType(), BSONType::NumberDouble); + ASSERT_APPROX_EQUAL(doc["dist"].coerceToDouble(), meterToLatDegree, 300); } TEST_F(DocumentSourceInternalGeoNearDistanceTest, DistanceBetweenTwoLegacyPoints) { BSONObj computeGeoSpec = fromjson(R"( { $_internalComputeGeoNearDistance: { near: [1, 1], - key: "loc" + key: "loc", + distanceMultiplier: 1, + distanceField: "dist" }})"); auto geoDist = DocumentSourceInternalGeoNearDistance::createFromBson( computeGeoSpec.firstElement(), getExpCtx()); @@ -108,7 +116,8 @@ TEST_F(DocumentSourceInternalGeoNearDistanceTest, DistanceBetweenTwoLegacyPoints auto next = geoDist->getNext(); ASSERT_TRUE(next.isAdvanced()); auto doc = next.getDocument(); - ASSERT_APPROX_EQUAL(doc.metadata().getGeoNearDistance(), 1.41421, 0.01); + ASSERT_EQUALS(doc["dist"].getType(), BSONType::NumberDouble); + ASSERT_APPROX_EQUAL(doc["dist"].coerceToDouble(), 1.41421, 0.01); } TEST_F(DocumentSourceInternalGeoNearDistanceTest, DistanceBetweenTwoMixedPointsSphereAndFlat) { @@ -118,7 +127,9 @@ TEST_F(DocumentSourceInternalGeoNearDistanceTest, DistanceBetweenTwoMixedPointsS type: "Point", coordinates: [0, 1] }, - key: "loc" + key: "loc", + distanceMultiplier: 1, + distanceField: "dist" }})"); auto geoDist = DocumentSourceInternalGeoNearDistance::createFromBson( computeGeoSpec.firstElement(), getExpCtx()); @@ -130,7 +141,8 @@ TEST_F(DocumentSourceInternalGeoNearDistanceTest, DistanceBetweenTwoMixedPointsS ASSERT_TRUE(next.isAdvanced()); auto doc = next.getDocument(); const int meterToLatDegree = 111319; // Each degree of latitude is approximately 111km. - ASSERT_APPROX_EQUAL(doc.metadata().getGeoNearDistance(), meterToLatDegree, 300); + ASSERT_EQUALS(doc["dist"].getType(), BSONType::NumberDouble); + ASSERT_APPROX_EQUAL(doc["dist"].coerceToDouble(), meterToLatDegree, 300); } } // namespace diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp index 1c8af5fd76e..c6f353f3293 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp @@ -980,14 +980,60 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi } } + // Attempt to push geoNear on the metaField past $_internalUnpackBucket. + if (auto nextNear = dynamic_cast<DocumentSourceGeoNear*>(std::next(itr)->get())) { + // Currently we only support geo indexes on the meta field, and we enforce this by + // requiring the key field to be set so we can check before we try to look up indexes. + auto keyField = nextNear->getKeyField(); + uassert(5892921, + "Must specify 'key' option for $geoNear on a time-series collection", + keyField); + + // Currently we do not support query for $geoNear on a bucket + uassert( + 1938439, + "Must not specify 'query' for $geoNear on a time-series collection; use $match instead", + nextNear->getQuery().binaryEqual(BSONObj())); + + auto metaField = _bucketUnpacker.bucketSpec().metaField; + if (metaField && *metaField == keyField->front()) { + // Make sure we actually re-write the key field for the buckets collection so we can + // locate the index. + static const FieldPath baseMetaFieldPath{timeseries::kBucketMetaFieldName}; + nextNear->setKeyField(keyField->getPathLength() > 1 + ? baseMetaFieldPath.concat(keyField->tail()) + : baseMetaFieldPath); + + // Save the source, remove it, and then push it down. + auto source = *std::next(itr); + container->erase(std::next(itr)); + container->insert(itr, source); + return std::prev(itr) == container->begin() ? std::prev(itr) + : std::prev(std::prev(itr)); + } else { + // Don't push down query on measurements. + } + } + // Optimize the pipeline after this stage to merge $match stages and push them forward. if (!_optimizedEndOfPipeline) { _optimizedEndOfPipeline = true; - Pipeline::optimizeEndOfPipeline(itr, container); if (std::next(itr) == container->end()) { return container->end(); } + if (auto nextStage = dynamic_cast<DocumentSourceGeoNear*>(std::next(itr)->get())) { + // If the end of the pipeline starts with a $geoNear stage, make sure it gets optimized + // in a context where it knows there are other stages before it. It will split itself + // up into separate $match and $sort stages. But it doesn't split itself up when it's + // the first stage, because it expects to use a special DocumentSouceGeoNearCursor plan. + nextStage->optimizeAt(std::next(itr), container); + } + + Pipeline::optimizeEndOfPipeline(itr, container); + if (std::next(itr) == container->end()) { + return container->end(); + } } { // Check if we can avoid unpacking if we have a group stage with min/max aggregates. @@ -1031,41 +1077,6 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi } } - // Attempt to push geoNear on the metaField past $_internalUnpackBucket. - if (auto nextNear = dynamic_cast<DocumentSourceGeoNear*>(std::next(itr)->get())) { - // Currently we only support geo indexes on the meta field, and we enforce this by - // requiring the key field to be set so we can check before we try to look up indexes. - auto keyField = nextNear->getKeyField(); - uassert(5892921, - "Must specify 'key' option for $geoNear on a time-series collection", - keyField); - - auto metaField = _bucketUnpacker.bucketSpec().metaField; - uassert( - 4581294, - "Must specify part of metadata field as 'key' for $geoNear on a time-series collection", - metaField && *metaField == keyField->front()); - - // Currently we do not support query for $geoNear on a bucket - uassert( - 1938439, - "Must not specify 'query' for $geoNear on a time-series collection; use $match instead", - nextNear->getQuery().binaryEqual(BSONObj())); - - // Make sure we actually re-write the key field for the buckets collection so we can - // locate the index. - static const FieldPath baseMetaFieldPath{timeseries::kBucketMetaFieldName}; - nextNear->setKeyField(keyField->getPathLength() > 1 - ? baseMetaFieldPath.concat(keyField->tail()) - : baseMetaFieldPath); - - // Save the source, remove it, and then push it down. - auto source = *std::next(itr); - container->erase(std::next(itr)); - container->insert(itr, source); - return std::prev(itr) == container->begin() ? std::prev(itr) : std::prev(std::prev(itr)); - } - // Attempt to map predicates on bucketed fields to predicates on the control field. if (auto nextMatch = dynamic_cast<DocumentSourceMatch*>(std::next(itr)->get()); nextMatch && !_triedBucketLevelFieldsPredicatesPushdown) { diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp index 486b2e3ac7d..1b35c35f90f 100644 --- a/src/mongo/db/pipeline/pipeline.cpp +++ b/src/mongo/db/pipeline/pipeline.cpp @@ -411,13 +411,16 @@ stdx::unordered_set<NamespaceString> Pipeline::getInvolvedCollections() const { return collectionNames; } -vector<Value> Pipeline::serialize() const { +vector<Value> Pipeline::serializeContainer(const SourceContainer& container) { vector<Value> serializedSources; - for (auto&& source : _sources) { + for (auto&& source : container) { source->serializeToArray(serializedSources); } return serializedSources; } +vector<Value> Pipeline::serialize() const { + return serializeContainer(_sources); +} vector<BSONObj> Pipeline::serializeToBson() const { const auto serialized = serialize(); diff --git a/src/mongo/db/pipeline/pipeline.h b/src/mongo/db/pipeline/pipeline.h index dd52df4ab9e..e1ef7b6a0a4 100644 --- a/src/mongo/db/pipeline/pipeline.h +++ b/src/mongo/db/pipeline/pipeline.h @@ -271,6 +271,7 @@ public: */ std::vector<Value> serialize() const; std::vector<BSONObj> serializeToBson() const; + static std::vector<Value> serializeContainer(const SourceContainer& container); /** * Serializes the pipeline into BSON for explain/debug logging purposes. |