diff options
author | David Percy <david.percy@mongodb.com> | 2022-06-08 16:13:10 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-06-13 16:53:30 +0000 |
commit | b8f0fb561f5f6401dfc9a773777963f2f4bcb725 (patch) | |
tree | fd88a921ec6f7eabc1d6b47bc954901f18fe8825 | |
parent | 734ccd1e913d0f3df65bed4948a0694601a0cafa (diff) | |
download | mongo-b8f0fb561f5f6401dfc9a773777963f2f4bcb725.tar.gz |
SERVER-67072 Fix pushdown of time-series metadata predicates in $or
-rw-r--r-- | jstests/core/timeseries/timeseries_predicates.js | 125 | ||||
-rw-r--r-- | src/mongo/db/exec/bucket_unpacker.cpp | 52 |
2 files changed, 156 insertions, 21 deletions
diff --git a/jstests/core/timeseries/timeseries_predicates.js b/jstests/core/timeseries/timeseries_predicates.js index 54e63c0b7d5..a273cc8b128 100644 --- a/jstests/core/timeseries/timeseries_predicates.js +++ b/jstests/core/timeseries/timeseries_predicates.js @@ -39,7 +39,7 @@ function checkPredicateResult(predicate, documents) { function checkAllBucketings(predicate, documents) { for (const doc of documents) { doc._id = ObjectId(); - doc.time = ISODate(); + doc.time = doc.time || ISODate(); } // For N documents, there are 2^N ways to assign them to buckets A and B. @@ -232,4 +232,127 @@ checkAllBucketings({ {meta: {a: +1, b: -1}, x: 'asdf', time: ISODate('2020-02-01')}, {meta: {a: +1, b: -1}, x: 'asdf', time: ISODate('2019-12-31')}, ]); + +// Test $exists on meta, inside $or. +checkAllBucketings({ + $or: [ + {"meta.a": {$exists: true}}, + {"x": {$gt: 2}}, + ] +}, + [ + {meta: {a: 1}, x: 1}, + {meta: {a: 2}, x: 2}, + {meta: {a: 3}, x: 3}, + {meta: {a: 4}, x: 4}, + {meta: {}, x: 1}, + {meta: {}, x: 2}, + {meta: {}, x: 3}, + {meta: {}, x: 4}, + ]); + +// Test $in on meta, inside $or. +checkAllBucketings({ + $or: [ + {"meta.a": {$in: [1, 3]}}, + {"x": {$gt: 2}}, + ] +}, + [ + {meta: {a: 1}, x: 1}, + {meta: {a: 2}, x: 2}, + {meta: {a: 3}, x: 3}, + {meta: {a: 4}, x: 4}, + {meta: {}, x: 1}, + {meta: {}, x: 2}, + {meta: {}, x: 3}, + {meta: {}, x: 4}, + ]); + +// Test geo predicates on meta, inside $or. +for (const pred of ['$geoWithin', '$geoIntersects']) { + checkAllBucketings({ + $or: [ + { + "meta.location": { + [pred]: { + $geometry: { + type: "Polygon", + coordinates: [[ + [0, 0], + [0, 3], + [3, 3], + [3, 0], + [0, 0], + ]] + } + } + } + }, + {x: {$gt: 2}}, + ] + }, + [ + {meta: {location: [1, 1]}, x: 1}, + {meta: {location: [1, 1]}, x: 2}, + {meta: {location: [1, 1]}, x: 3}, + {meta: {location: [1, 1]}, x: 4}, + {meta: {location: [5, 5]}, x: 1}, + {meta: {location: [5, 5]}, x: 2}, + {meta: {location: [5, 5]}, x: 3}, + {meta: {location: [5, 5]}, x: 4}, + ]); +} + +// Test $mod on meta, inside $or. +// $mod is an example of a predicate that we don't handle specially in time-series optimizations: +// it can be pushed down if and only if it's on a metadata field. +checkAllBucketings({ + $or: [ + {"meta.a": {$mod: [2, 0]}}, + {"x": {$gt: 4}}, + ] +}, + [ + {meta: {a: 1}, x: 1}, + {meta: {a: 2}, x: 2}, + {meta: {a: 3}, x: 3}, + {meta: {a: 4}, x: 4}, + {meta: {a: 5}, x: 5}, + {meta: {a: 6}, x: 6}, + {meta: {a: 7}, x: 7}, + {meta: {a: 8}, x: 8}, + ]); + +// Test $elemMatch on meta, inside $or. +checkAllBucketings({ + $or: [ + {"meta.a": {$elemMatch: {b: 3}}}, + {"x": {$gt: 4}}, + ] +}, + [ + {x: 1, meta: {a: []}}, + {x: 2, meta: {a: [{b: 2}]}}, + {x: 3, meta: {a: [{b: 3}]}}, + {x: 4, meta: {a: [{b: 2}, {b: 3}]}}, + {x: 5, meta: {a: []}}, + {x: 6, meta: {a: [{b: 2}]}}, + {x: 7, meta: {a: [{b: 3}]}}, + {x: 8, meta: {a: [{b: 2}, {b: 3}]}}, + ]); +checkAllBucketings({ + $or: [ + {"meta.a": {$elemMatch: {b: 2, c: 3}}}, + {"x": {$gt: 3}}, + ] +}, + [ + {x: 1, meta: {a: []}}, + {x: 2, meta: {a: [{b: 2, c: 3}]}}, + {x: 3, meta: {a: [{b: 2}, {c: 3}]}}, + {x: 4, meta: {a: []}}, + {x: 5, meta: {a: [{b: 2, c: 3}]}}, + {x: 6, meta: {a: [{b: 2}, {c: 3}]}}, + ]); })(); diff --git a/src/mongo/db/exec/bucket_unpacker.cpp b/src/mongo/db/exec/bucket_unpacker.cpp index 6d96dd51676..43ccca4a13a 100644 --- a/src/mongo/db/exec/bucket_unpacker.cpp +++ b/src/mongo/db/exec/bucket_unpacker.cpp @@ -240,29 +240,14 @@ std::unique_ptr<MatchExpression> createComparisonPredicate( policy, matchExpr, "can't handle string comparison with a non-default collation"_sd); } - // We must avoid mapping predicates on the meta field onto the control field. These should be - // mapped to the meta field instead. - // - // You might think these were handled earlier, by splitting the match expression into a - // metadata-only part, and measurement/time-only part. However, splitting a $match into two - // sequential $matches only works when splitting a conjunction. A predicate like - // {$or: [ {a: 5}, {meta.b: 5} ]} cannot be split, and can't be metadata-only, so we have to - // handle it here. + // This function only handles time and measurement predicates--not metadata. if (bucketSpec.metaField() && (matchExprPath == bucketSpec.metaField().get() || expression::isPathPrefixOf(bucketSpec.metaField().get(), matchExprPath))) { - - if (haveComputedMetaField) - return handleIneligible(policy, matchExpr, "can't handle a computed meta field"); - - if (!includeMetaField) - return handleIneligible(policy, matchExpr, "cannot handle an excluded meta field"); - - auto result = matchExpr->shallowClone(); - expression::applyRenamesToExpression( - result.get(), - {{bucketSpec.metaField().get(), timeseries::kBucketMetaFieldName.toString()}}); - return result; + tasserted( + 6707200, + str::stream() << "createComparisonPredicate() does not handle metadata predicates: " + << matchExpr); } // We must avoid mapping predicates on fields computed via $addFields or a computed $project. @@ -456,6 +441,33 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField( tassert(5916304, "BucketSpec::createPredicatesOnBucketLevelField nullptr", matchExpr); + // If we have a leaf predicate on a meta field, we can map it to the bucket's meta field. + // This includes comparisons such as $eq and $lte, as well as other non-comparison predicates + // such as $exists, $mod, or $elemMatch. + // + // Metadata predicates are partially handled earlier, by splitting the match expression into a + // metadata-only part, and measurement/time-only part. However, splitting a $match into two + // sequential $matches only works when splitting a conjunction. A predicate like + // {$or: [ {a: 5}, {meta.b: 5} ]} can't be split, and can't be metadata-only, so we have to + // handle it here. + const auto matchExprPath = matchExpr->path(); + if (!matchExprPath.empty() && bucketSpec.metaField() && + (matchExprPath == bucketSpec.metaField().get() || + expression::isPathPrefixOf(bucketSpec.metaField().get(), matchExprPath))) { + + if (haveComputedMetaField) + return handleIneligible(policy, matchExpr, "can't handle a computed meta field"); + + if (!includeMetaField) + return handleIneligible(policy, matchExpr, "cannot handle an excluded meta field"); + + auto result = matchExpr->shallowClone(); + expression::applyRenamesToExpression( + result.get(), + {{bucketSpec.metaField().get(), timeseries::kBucketMetaFieldName.toString()}}); + return result; + } + if (matchExpr->matchType() == MatchExpression::AND) { auto nextAnd = static_cast<const AndMatchExpression*>(matchExpr); auto andMatchExpr = std::make_unique<AndMatchExpression>(); |