diff options
author | Charlie Swanson <charlie.swanson@mongodb.com> | 2022-03-08 18:13:23 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-03-08 19:54:50 +0000 |
commit | 7a003039250c67c00b1db195a0d8fd8c3c770943 (patch) | |
tree | b7a254e2f6359e33a3a6cf95c9c46933e444d758 /src/mongo/db/matcher | |
parent | 682f784e93b1602f0dcd74115e2105e1433857f5 (diff) | |
download | mongo-7a003039250c67c00b1db195a0d8fd8c3c770943.tar.gz |
SERVER-63124 Add support for pushing filters into column index scan
Diffstat (limited to 'src/mongo/db/matcher')
-rw-r--r-- | src/mongo/db/matcher/expression_algo.cpp | 207 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo.h | 17 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo_test.cpp | 586 |
3 files changed, 808 insertions, 2 deletions
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp index c3d0e777c1f..2016f4b6c39 100644 --- a/src/mongo/db/matcher/expression_algo.cpp +++ b/src/mongo/db/matcher/expression_algo.cpp @@ -27,7 +27,6 @@ * it in the license file. */ -#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery #include "mongo/platform/basic.h" @@ -40,11 +39,11 @@ #include "mongo/db/matcher/expression_internal_bucket_geo_within.h" #include "mongo/db/matcher/expression_leaf.h" #include "mongo/db/matcher/expression_tree.h" +#include "mongo/db/matcher/expression_type.h" #include "mongo/db/matcher/schema/expression_internal_schema_xor.h" #include "mongo/db/pipeline/dependencies.h" #include "mongo/db/query/collation/collation_index_key.h" #include "mongo/db/query/collation/collator_interface.h" -#include "mongo/logv2/log.h" namespace mongo { @@ -438,6 +437,191 @@ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitMatchEx } } +bool tryAddExprHelper(StringData path, + std::unique_ptr<MatchExpression> me, + StringMap<std::unique_ptr<MatchExpression>>& out) { + auto& entryForPath = out[path]; + if (!entryForPath) { + // First predicate for this path, just put it in directly. + entryForPath = std::move(me); + } else { + // We have at least one predicate for this path already. Put all the predicates for the path + // into a giant $and clause. Note this might have to change once we start supporting $or + // predicates. + if (entryForPath->matchType() != MatchExpression::AND) { + // This is the second predicate, we need to make the $and and put in both predicates: + // {$and: [<existing>, 'me']}. + auto andME = std::make_unique<AndMatchExpression>(); + andME->add(std::move(entryForPath)); + entryForPath = std::move(andME); + } + auto andME = checked_cast<AndMatchExpression*>(entryForPath.get()); + andME->add(std::move(me)); + } + return true; +} + +bool tryAddExpr(StringData path, + const MatchExpression* me, + StringMap<std::unique_ptr<MatchExpression>>& out) { + if (FieldRef(path).hasNumericPathComponents()) + return false; + + return tryAddExprHelper(path, me->shallowClone(), out); +} + +bool splitMatchExpressionForColumns(const MatchExpression* me, + StringMap<std::unique_ptr<MatchExpression>>& out) { + auto canCompareWith = [](const BSONElement& elem, bool isEQ) { + // Here we check whether the comparison can work with the given value. Objects and arrays + // are generally not permitted. Objects can't work because the paths will be split apart in + // the columnar index. We could do arrays of scalars since we would have all that + // information in the index, but it proved complex to integrate due to the interface with + // the matcher. It expects to get a BSONElement for the whole Array but we'd like to avoid + // materializing that. + // + // One exception to the above: We can support EQ with empty objects and empty arrays since + // those are more obviously correct. Maybe could also support LT and LTE, but those don't + // seem as important so are left for future work. + if (elem.type() == BSONType::Array || elem.type() == BSONType::Object) { + return isEQ && elem.Obj().isEmpty(); + } + + // We support all other types, except null, since it is equivalent to x==null || !exists(x). + return !elem.isNull(); + }; + switch (me->matchType()) { + // These are always safe since they will never match documents missing their field, or where + // the element is an object or array. + case MatchExpression::REGEX: + case MatchExpression::MOD: + case MatchExpression::BITS_ALL_SET: + case MatchExpression::BITS_ALL_CLEAR: + case MatchExpression::BITS_ANY_SET: + case MatchExpression::BITS_ANY_CLEAR: + case MatchExpression::EXISTS: { + auto sub = checked_cast<const PathMatchExpression*>(me); + return tryAddExpr(sub->path(), me, out); + } + + case MatchExpression::LT: + case MatchExpression::GT: + case MatchExpression::EQ: + case MatchExpression::LTE: + case MatchExpression::GTE: { + auto sub = checked_cast<const ComparisonMatchExpressionBase*>(me); + if (!canCompareWith(sub->getData(), me->matchType() == MatchExpression::EQ)) + return false; + return tryAddExpr(sub->path(), me, out); + } + + + case MatchExpression::MATCH_IN: { + auto sub = checked_cast<const InMatchExpression*>(me); + // Note that $in treats regexes specially and stores them separately than the rest of + // the 'equalities'. We actually don't need to look at them here since any regex should + // be OK. A regex could only match a string, symbol, or other regex, any of which would + // be present in the columnar storage. + for (auto&& elem : sub->getEqualities()) { + if (!canCompareWith(elem, true)) + return false; + } + return tryAddExpr(sub->path(), me, out); + } + + case MatchExpression::TYPE_OPERATOR: { + auto sub = checked_cast<const TypeMatchExpression*>(me); + if (sub->typeSet().hasType(BSONType::EOO) || sub->typeSet().hasType(BSONType::Object) || + sub->typeSet().hasType(BSONType::Array)) + return false; + return tryAddExpr(sub->path(), me, out); + } + + case MatchExpression::AND: { + auto sub = checked_cast<const AndMatchExpression*>(me); + for (size_t i = 0, end = sub->numChildren(); i != end; i++) { + if (!splitMatchExpressionForColumns(sub->getChild(i), out)) { + return false; + } + } + return true; + } + + + case MatchExpression::NOT: { + // {$ne: null} pattern is known to be important in cases like those in SERVER-27646 and + // SERVER-36465. + auto notExpr = checked_cast<const NotMatchExpression*>(me); + auto withinNot = notExpr->getChild(0); + + // Oddly, we parse {$ne: null} to a NOT -> EQ, but we parse {$not: {$eq: null}} into a + // more complex NOT -> AND -> EQ. Let's support both. + auto tryAddNENull = [&](const MatchExpression* negatedPred) { + if (negatedPred->matchType() != MatchExpression::EQ) { + return false; + } + auto eqPred = checked_cast<const EqualityMatchExpression*>(negatedPred); + if (eqPred->getData().isNull()) { + return tryAddExpr(eqPred->path(), me, out); + } + return false; + }; + if (tryAddNENull(withinNot)) { + // {$ne: null}. We had equality just under NOT. + return true; + } else if (withinNot->matchType() == MatchExpression::AND && + withinNot->numChildren() == 1 && tryAddNENull(withinNot->getChild(0))) { + // {$not: {$eq: null}}: NOT -> AND -> EQ. + return true; + } + // May be other cases, but left as future work. + return false; + } + + // We don't currently handle any of these cases, but some may be possible in the future. + case MatchExpression::ALWAYS_FALSE: + case MatchExpression::ALWAYS_TRUE: + case MatchExpression::ELEM_MATCH_OBJECT: + case MatchExpression::ELEM_MATCH_VALUE: // This one should be feasible. May be valuable. + case MatchExpression::EXPRESSION: + case MatchExpression::GEO: + case MatchExpression::GEO_NEAR: + case MatchExpression::INTERNAL_2D_POINT_IN_ANNULUS: + case MatchExpression::INTERNAL_BUCKET_GEO_WITHIN: + case MatchExpression::INTERNAL_EXPR_EQ: // This one could be valuable for $lookup + case MatchExpression::INTERNAL_EXPR_GT: + case MatchExpression::INTERNAL_EXPR_GTE: + case MatchExpression::INTERNAL_EXPR_LT: + case MatchExpression::INTERNAL_EXPR_LTE: + case MatchExpression::INTERNAL_SCHEMA_ALLOWED_PROPERTIES: + case MatchExpression::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX: + case MatchExpression::INTERNAL_SCHEMA_BIN_DATA_ENCRYPTED_TYPE: + case MatchExpression::INTERNAL_SCHEMA_BIN_DATA_SUBTYPE: + case MatchExpression::INTERNAL_SCHEMA_COND: + case MatchExpression::INTERNAL_SCHEMA_EQ: + case MatchExpression::INTERNAL_SCHEMA_FMOD: + case MatchExpression::INTERNAL_SCHEMA_MATCH_ARRAY_INDEX: + case MatchExpression::INTERNAL_SCHEMA_MAX_ITEMS: + case MatchExpression::INTERNAL_SCHEMA_MAX_LENGTH: + case MatchExpression::INTERNAL_SCHEMA_MAX_PROPERTIES: + case MatchExpression::INTERNAL_SCHEMA_MIN_ITEMS: + case MatchExpression::INTERNAL_SCHEMA_MIN_LENGTH: + case MatchExpression::INTERNAL_SCHEMA_MIN_PROPERTIES: + case MatchExpression::INTERNAL_SCHEMA_OBJECT_MATCH: + case MatchExpression::INTERNAL_SCHEMA_ROOT_DOC_EQ: + case MatchExpression::INTERNAL_SCHEMA_TYPE: + case MatchExpression::INTERNAL_SCHEMA_UNIQUE_ITEMS: + case MatchExpression::INTERNAL_SCHEMA_XOR: + case MatchExpression::NOR: + case MatchExpression::OR: + case MatchExpression::SIZE: + case MatchExpression::TEXT: + case MatchExpression::WHERE: + return false; + } + MONGO_UNREACHABLE; +} + } // namespace namespace expression { @@ -719,5 +903,24 @@ bool bidirectionalPathPrefixOf(StringData first, StringData second) { return first == second || expression::isPathPrefixOf(first, second) || expression::isPathPrefixOf(second, first); } + +boost::optional<StringMap<std::unique_ptr<MatchExpression>>> splitMatchExpressionForColumns( + const MatchExpression* me) { + boost::optional<StringMap<std::unique_ptr<MatchExpression>>> out; + out.emplace(); + if (!mongo::splitMatchExpressionForColumns(me, *out)) + out = {}; + return out; +} + +std::string filterMapToString(const StringMap<std::unique_ptr<MatchExpression>>& filterMap) { + StringBuilder sb; + sb << "{"; + for (auto&& [path, matchExpr] : filterMap) { + sb << path << ": " << matchExpr->toString() << ", "; + } + sb << "}"; + return sb.str(); +} } // namespace expression } // namespace mongo diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h index 31bc8f9e227..0fdbac756a0 100644 --- a/src/mongo/db/matcher/expression_algo.h +++ b/src/mongo/db/matcher/expression_algo.h @@ -157,5 +157,22 @@ splitMatchExpressionBy(std::unique_ptr<MatchExpression> expr, * {new: {$gt: 3}}. */ void applyRenamesToExpression(MatchExpression* expr, const StringMap<std::string>& renames); + +/** + * Split a MatchExpression into subexpressions targeted to separate columns. A document will match + * the query if all of the sub expressions match. Returns an empty optional if the entire match + * cannot be handled by the column store. + * + * This API will need to change in order to support more complex queries, such as $or and + * $elemMatch. + */ +boost::optional<StringMap<std::unique_ptr<MatchExpression>>> splitMatchExpressionForColumns( + const MatchExpression* me); + +/** + * Serializes this complex data structure for debugging purposes. + */ +std::string filterMapToString(const StringMap<std::unique_ptr<MatchExpression>>&); + } // namespace expression } // namespace mongo diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp index 257fbc7a734..8679c759cd5 100644 --- a/src/mongo/db/matcher/expression_algo_test.cpp +++ b/src/mongo/db/matcher/expression_algo_test.cpp @@ -1562,4 +1562,590 @@ TEST(HasExistencePredicateOnPath, ReturnsFalseWhenExistsOnSubpath) { expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a"_sd)); } +TEST(SplitMatchExpressionForColumns, PreservesEmptyPredicates) { + ParsedMatchExpression empty("{}"); + auto splitUp = expression::splitMatchExpressionForColumns(empty.get()); + ASSERT(splitUp); + ASSERT(splitUp->size() == 0); +} + +TEST(SplitMatchExpressionForColumns, RejectsUnsupportedPredicates) { + { + // Future work. + ParsedMatchExpression orClause("{$or: [{a: 1}, {b: 2}]}"); + auto splitUp = expression::splitMatchExpressionForColumns(orClause.get()); + ASSERT(!splitUp); + } + + { + // Would match missing values, not safe for a columnar index. + ParsedMatchExpression alwaysTrue("{$alwaysTrue: 1}"); + auto splitUp = expression::splitMatchExpressionForColumns(alwaysTrue.get()); + ASSERT(!splitUp); + } + + { + // Future work. + ParsedMatchExpression exprClause("{$expr: {$eq: ['$x', 0]}}"); + auto splitUp = expression::splitMatchExpressionForColumns(exprClause.get()); + ASSERT(!splitUp); + } +} + +// Test equality predicates that are safe to split (in contrast to next test). +TEST(SplitMatchExpressionForColumns, SplitsSafeEqualities) { + + { + ParsedMatchExpression singleEqualsNumber("{albatross: 1}"); + auto splitUp = expression::splitMatchExpressionForColumns(singleEqualsNumber.get()); + ASSERT(splitUp); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EQ) + << splitUp->at("albatross")->toString(); + } + + { + ParsedMatchExpression singleEqualsString("{albatross: 'flying'}"); + auto splitUp = expression::splitMatchExpressionForColumns(singleEqualsString.get()); + ASSERT(splitUp); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EQ) + << splitUp->at("albatross")->toString(); + } + + { + ParsedMatchExpression doubleEqualsNumber("{albatross: 1, blackbird: 2}"); + auto splitUp = expression::splitMatchExpressionForColumns(doubleEqualsNumber.get()); + ASSERT(splitUp); + ASSERT_EQ(splitUp->size(), 2) << splitUp->size(); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EQ) + << splitUp->at("albatross")->toString(); + ASSERT(splitUp->contains("blackbird")); + ASSERT(splitUp->at("blackbird")->matchType() == MatchExpression::EQ) + << splitUp->at("blackbird")->toString(); + } + + { + ParsedMatchExpression mixedEquals( + "{albatross: 1," + " blackbird: 'flying'," + " cowbird: {$eq: /oreo/}," + " duck: NumberInt(2)," + " eagle: NumberLong(50)," + " grackle: ObjectId('000000000000000000000000')," + " heron: true," + " ibis: false," + " jay: Timestamp(1, 0)," + " kiwi: NumberDecimal('22')," + " 'loggerhead shrike': {$minKey: 1}," + " mallard: {$maxKey: 1}}"); + auto splitUp = expression::splitMatchExpressionForColumns(mixedEquals.get()); + ASSERT(splitUp); + ASSERT_EQ(splitUp->size(), 12) << splitUp->size(); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EQ) + << splitUp->at("albatross")->toString(); + ASSERT(splitUp->contains("blackbird")); + ASSERT(splitUp->at("blackbird")->matchType() == MatchExpression::EQ) + << splitUp->at("blackbird")->toString(); + } +} + + +TEST(SplitMatchExpressionForColumns, SupportsEqualityToEmptyObjects) { + { + ParsedMatchExpression equalsEmptyObj("{albatross: {}}"); + auto splitUp = expression::splitMatchExpressionForColumns(equalsEmptyObj.get()); + ASSERT(splitUp); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EQ) + << splitUp->at("albatross")->toString(); + } +} + +TEST(SplitMatchExpressionForColumns, SupportsEqualityToEmptyArray) { + { + ParsedMatchExpression equalsEmptyArray("{albatross: []}"); + auto splitUp = expression::splitMatchExpressionForColumns(equalsEmptyArray.get()); + ASSERT(splitUp); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EQ) + << splitUp->at("albatross")->toString(); + } +} + +TEST(SplitMatchExpressionForColumns, DoesNotSupportEqualsNull) { + { + ParsedMatchExpression equalsNull("{a: null}"); + auto splitUp = expression::splitMatchExpressionForColumns(equalsNull.get()); + ASSERT(!splitUp); + } +} + +TEST(SplitMatchExpressionForColumns, DoesSupportNotEqualsNull) { + { + ParsedMatchExpression neNull("{a: {$ne: null}}"); + auto splitUp = expression::splitMatchExpressionForColumns(neNull.get()); + ASSERT(splitUp); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + ASSERT(splitUp->contains("a")); + ASSERT(splitUp->at("a")->matchType() == MatchExpression::NOT) + << splitUp->at("a")->toString(); + } + { + ParsedMatchExpression notEqualsNull("{a: {$not: {$eq: null}}}"); + auto splitUp = expression::splitMatchExpressionForColumns(notEqualsNull.get()); + ASSERT(splitUp) << notEqualsNull.get()->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + ASSERT(splitUp->contains("a")); + ASSERT(splitUp->at("a")->matchType() == MatchExpression::NOT) + << splitUp->at("a")->toString(); + } +} + +TEST(SplitMatchExpressionForColumns, DoesNotSupportCompoundEquals) { + { + ParsedMatchExpression implicitEqualsArray("{a: [1, 2]}"); + auto splitUp = expression::splitMatchExpressionForColumns(implicitEqualsArray.get()); + ASSERT(!splitUp); + } + { + ParsedMatchExpression explicitEqualsArray("{a: {$eq: [1, 2]}}"); + auto splitUp = expression::splitMatchExpressionForColumns(explicitEqualsArray.get()); + ASSERT(!splitUp); + } + { + ParsedMatchExpression implicitEqualsObject("{a: {boats: 1, planes: 2}}"); + auto splitUp = expression::splitMatchExpressionForColumns(implicitEqualsObject.get()); + ASSERT(!splitUp); + } + { + ParsedMatchExpression explicitEqualsObject("{a: {$eq: {boats: 1, planes: 2}}}"); + auto splitUp = expression::splitMatchExpressionForColumns(explicitEqualsObject.get()); + ASSERT(!splitUp); + } + // We should be able to do dotted path version though, as a potential workaround. + { + ParsedMatchExpression equalsDotted("{'a.boats': 1, 'a.planes': 2}"); + auto splitUp = expression::splitMatchExpressionForColumns(equalsDotted.get()); + ASSERT(splitUp); + ASSERT(splitUp->size() == 2); + ASSERT(splitUp->contains("a.boats")); + ASSERT(splitUp->at("a.boats")->matchType() == MatchExpression::EQ) + << splitUp->at("a.boats")->toString(); + ASSERT(splitUp->contains("a.planes")); + ASSERT(splitUp->at("a.planes")->matchType() == MatchExpression::EQ) + << splitUp->at("a.planes")->toString(); + } +} + +// Tests that comparisons (like $lt and $gte) have the same splitting rules as equality. +TEST(SplitMatchExpressionForColumns, SupportsComparisonsLikeEqualities) { + + { + ParsedMatchExpression singleLtNumber("{albatross: {$lt: 1}}"); + auto splitUp = expression::splitMatchExpressionForColumns(singleLtNumber.get()); + ASSERT(splitUp); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::LT) + << splitUp->at("albatross")->toString(); + } + { + ParsedMatchExpression singleLteNumber("{albatross: {$lte: 1}}"); + auto splitUp = expression::splitMatchExpressionForColumns(singleLteNumber.get()); + ASSERT(splitUp); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::LTE) + << splitUp->at("albatross")->toString(); + } + { + ParsedMatchExpression singleGtNumber("{albatross: {$gt: 1}}"); + auto splitUp = expression::splitMatchExpressionForColumns(singleGtNumber.get()); + ASSERT(splitUp); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::GT) + << splitUp->at("albatross")->toString(); + } + { + ParsedMatchExpression singleGteNumber("{albatross: {$gte: 1}}"); + auto splitUp = expression::splitMatchExpressionForColumns(singleGteNumber.get()); + ASSERT(splitUp); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::GTE) + << splitUp->at("albatross")->toString(); + } + { + ParsedMatchExpression combinationPredicate( + "{" + " albatross: {$lt: 100}," + " blackbird: {$gt: 0}," + " cowbird: {$gte: 0, $lte: 100}" + "}"); + auto splitUp = expression::splitMatchExpressionForColumns(combinationPredicate.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::LT) + << splitUp->at("albatross")->toString(); + ASSERT(splitUp->contains("blackbird")); + ASSERT(splitUp->at("blackbird")->matchType() == MatchExpression::GT) + << splitUp->at("blackbird")->toString(); + ASSERT(splitUp->contains("cowbird")); + ASSERT(splitUp->at("cowbird")->matchType() == MatchExpression::AND) + << splitUp->at("cowbird")->toString(); + ASSERT_EQ(splitUp->size(), 3) << splitUp->size(); + } +} + +// While equality to [] or {} is OK, inequality is not so obvious. Left as future work. +TEST(SplitMatchExpressionForColumns, DoesNotSupportInequalitiesToObjectsOrArrays) { + { + ParsedMatchExpression ltArray("{albatross: {$lt: []}}"); + auto splitUp = expression::splitMatchExpressionForColumns(ltArray.get()); + ASSERT(!splitUp); + } + { + ParsedMatchExpression ltObject("{albatross: {$lt: {}}}"); + auto splitUp = expression::splitMatchExpressionForColumns(ltObject.get()); + ASSERT(!splitUp); + } + { + ParsedMatchExpression lteArray("{albatross: {$lte: []}}"); + auto splitUp = expression::splitMatchExpressionForColumns(lteArray.get()); + ASSERT(!splitUp); + } + { + ParsedMatchExpression lteObject("{albatross: {$lte: {}}}"); + auto splitUp = expression::splitMatchExpressionForColumns(lteObject.get()); + ASSERT(!splitUp); + } + { + ParsedMatchExpression gtArray("{albatross: {$gt: []}}"); + auto splitUp = expression::splitMatchExpressionForColumns(gtArray.get()); + ASSERT(!splitUp); + } + { + ParsedMatchExpression gtObject("{albatross: {$gt: {}}}"); + auto splitUp = expression::splitMatchExpressionForColumns(gtObject.get()); + ASSERT(!splitUp); + } + { + ParsedMatchExpression gteArray("{albatross: {$gte: []}}"); + auto splitUp = expression::splitMatchExpressionForColumns(gteArray.get()); + ASSERT(!splitUp); + } + { + ParsedMatchExpression gteObject("{albatross: {$gte: {}}}"); + auto splitUp = expression::splitMatchExpressionForColumns(gteObject.get()); + ASSERT(!splitUp); + } +} + +// Tests that comparisons which only match values of a certain type are allowed. +TEST(SplitMatchExpressionForColumns, SupportsTypeSpecificPredicates) { + ParsedMatchExpression combinationPredicate( + "{" + " albatross: /oreo/," + " blackbird: {$mod: [2, 0]}," + " cowbird: {$bitsAllSet: 7}," + " duck: {$bitsAllClear: 24}," + " eagle: {$bitsAnySet: 7}," + " falcon: {$bitsAnyClear: 24}" + "}"); + auto splitUp = expression::splitMatchExpressionForColumns(combinationPredicate.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::REGEX) + << splitUp->at("albatross")->toString(); + ASSERT(splitUp->contains("blackbird")); + ASSERT(splitUp->at("blackbird")->matchType() == MatchExpression::MOD) + << splitUp->at("blackbird")->toString(); + ASSERT(splitUp->contains("cowbird")); + ASSERT(splitUp->at("cowbird")->matchType() == MatchExpression::BITS_ALL_SET) + << splitUp->at("cowbird")->toString(); + ASSERT(splitUp->contains("duck")); + ASSERT(splitUp->at("duck")->matchType() == MatchExpression::BITS_ALL_CLEAR) + << splitUp->at("duck")->toString(); + ASSERT(splitUp->contains("eagle")); + ASSERT(splitUp->at("eagle")->matchType() == MatchExpression::BITS_ANY_SET) + << splitUp->at("eagle")->toString(); + ASSERT(splitUp->contains("falcon")); + ASSERT(splitUp->at("falcon")->matchType() == MatchExpression::BITS_ANY_CLEAR) + << splitUp->at("falcon")->toString(); + ASSERT_EQ(splitUp->size(), 6) << splitUp->size(); +} + +TEST(SplitMatchExpressionForColumns, SupportsInWithRegexes) { + { + // First confirm a $in clause is supported without regexes. + ParsedMatchExpression stringInClause("{albatross: {$in: ['big', 'ol', 'bird']}}"); + auto splitUp = expression::splitMatchExpressionForColumns(stringInClause.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + } + { + // Test that $in with regexes is supported also work. + ParsedMatchExpression regexInClause("{albatross: {$in: [/big/, /bird/]}}"); + auto splitUp = expression::splitMatchExpressionForColumns(regexInClause.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + } + { + // Test that a mix of both is supported + ParsedMatchExpression regexInClause("{albatross: {$in: [/big/, 'bird']}}"); + auto splitUp = expression::splitMatchExpressionForColumns(regexInClause.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + } + { + // Test that it is still disallowed if there's a disqualifying equality such as a null. + ParsedMatchExpression regexInClause("{albatross: {$in: [/big/, null, 'bird']}}"); + auto splitUp = expression::splitMatchExpressionForColumns(regexInClause.get()); + ASSERT(!splitUp); + } +} + +TEST(SplitMatchExpressionForColumns, SupportsExistsTrue) { + ParsedMatchExpression existsPredicate("{albatross: {$exists: true}}"); + auto splitUp = expression::splitMatchExpressionForColumns(existsPredicate.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EXISTS) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); +} + +TEST(SplitMatchExpressionForColumns, DoesNotSupportExistsFalse) { + ParsedMatchExpression existsPredicate("{albatross: {$exists: false}}"); + auto splitUp = expression::splitMatchExpressionForColumns(existsPredicate.get()); + ASSERT(!splitUp); +} + +// $in constraints are similar to equality. Most of them should work, exceptions broken out in the +// next test. +TEST(SplitMatchExpressionForColumns, SupportsInPredicates) { + { + ParsedMatchExpression emptyIn("{albatross: {$in: []}}"); + auto splitUp = expression::splitMatchExpressionForColumns(emptyIn.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + } + { + ParsedMatchExpression singleElementIn("{albatross: {$in: [4]}}"); + auto splitUp = expression::splitMatchExpressionForColumns(singleElementIn.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + } + { + ParsedMatchExpression inWithEmptyArray("{albatross: {$in: [[]]}}"); + auto splitUp = expression::splitMatchExpressionForColumns(inWithEmptyArray.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + } + { + ParsedMatchExpression inWithEmptyObject("{albatross: {$in: [{}]}}"); + auto splitUp = expression::splitMatchExpressionForColumns(inWithEmptyObject.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + } + { + ParsedMatchExpression mixedTypeIn("{albatross: {$in: [4, {}, [], 'string', /regex/]}}"); + auto splitUp = expression::splitMatchExpressionForColumns(mixedTypeIn.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + } +} + +// We can't support compound types, just like for equality. +TEST(SplitMatchExpressionForColumns, DoesNotSupportCertainInEdgeCases) { + { + ParsedMatchExpression inWithArray("{albatross: {$in: [[2,3]]}}"); + auto splitUp = expression::splitMatchExpressionForColumns(inWithArray.get()); + ASSERT(!splitUp); + } + { + ParsedMatchExpression inWithObject("{albatross: {$in: [{wings: 2}]}}"); + auto splitUp = expression::splitMatchExpressionForColumns(inWithObject.get()); + ASSERT(!splitUp); + } + { + ParsedMatchExpression inWithNull("{albatross: {$in: [null]}}"); + auto splitUp = expression::splitMatchExpressionForColumns(inWithNull.get()); + ASSERT(!splitUp); + } + { + ParsedMatchExpression unsupporedMixedInWithSupported( + "{albatross: {$in: ['strings', 1, null, {x: 4}, [0, 0], 4]}}"); + auto splitUp = + expression::splitMatchExpressionForColumns(unsupporedMixedInWithSupported.get()); + ASSERT(!splitUp); + } +} + +TEST(SplitMatchExpressionForColumns, SupportsTypePredicates) { + { + ParsedMatchExpression intFilter("{albatross: {$type: 'int'}}"); + auto splitUp = expression::splitMatchExpressionForColumns(intFilter.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + } + { + ParsedMatchExpression numberFilter("{albatross: {$type: 'number'}}"); + auto splitUp = expression::splitMatchExpressionForColumns(numberFilter.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + } + { + ParsedMatchExpression stringFilter("{albatross: {$type: 'string'}}"); + auto splitUp = expression::splitMatchExpressionForColumns(stringFilter.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + } + { + ParsedMatchExpression nullFilter("{albatross: {$type: 'null'}}"); + auto splitUp = expression::splitMatchExpressionForColumns(nullFilter.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); + } +} + +TEST(SplitMatchExpressionForColumns, DoesNotSupportQueriesForTypeObject) { + ParsedMatchExpression objectFilter("{albatross: {$type: 'object'}}"); + auto splitUp = expression::splitMatchExpressionForColumns(objectFilter.get()); + ASSERT(!splitUp); +} + +// This may be workable. But until we can prove it we'll disallow {$type: "array"}. +TEST(SplitMatchExpressionForColumns, DoesNotSupportQueriesForTypeArray) { + ParsedMatchExpression objectFilter("{albatross: {$type: 'array'}}"); + auto splitUp = expression::splitMatchExpressionForColumns(objectFilter.get()); + ASSERT(!splitUp); +} + +TEST(SplitMatchExpressionForColumns, CanCombinePredicates) { + ParsedMatchExpression compoundFilter( + "{" + " albatross: {$gte: 100}," + " albatross: {$mod: [2, 0]}" + "}"); + auto splitUp = expression::splitMatchExpressionForColumns(compoundFilter.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::AND) + << splitUp->at("albatross")->toString(); + ASSERT_EQ(splitUp->at("albatross")->numChildren(), 2) << splitUp->at("albatross")->toString(); + // Don't care about the order. + auto andExpr = splitUp->at("albatross").get(); + auto firstChild = andExpr->getChild(0); + if (firstChild->matchType() == MatchExpression::GTE) { + ASSERT(firstChild->matchType() == MatchExpression::GTE) << firstChild->toString(); + ASSERT(andExpr->getChild(1)->matchType() == MatchExpression::MOD) << firstChild->toString(); + } else { + ASSERT(firstChild->matchType() == MatchExpression::MOD) << firstChild->toString(); + ASSERT(andExpr->getChild(1)->matchType() == MatchExpression::GTE) << firstChild->toString(); + } + ASSERT_EQ(splitUp->size(), 1) << splitUp->size(); +} + +TEST(SplitMatchExpressionForColumns, SupportsDottedPaths) { + ParsedMatchExpression compoundFilter( + "{" + " albatross: /oreo/," + " \"blackbird.feet\": {$mod: [2, 0]}," + " \"blackbird.softwareUpdates\": {$bitsAllSet: 7}," + // Stress the path combination logic with some prefixes and suffixes to be sure. + " blackbird: {$ne: null}," + " bla: {$ne: null}," + " blackbirds: {$exists: true}," + " \"blackbird.feetsies\": {$ne: null}," + " \"cowbird.beakLength\": {$gte: 24, $lt: 40}," + " \"cowbird.eggSet\": {$bitsAnySet: 7}" + "}"); + auto splitUp = expression::splitMatchExpressionForColumns(compoundFilter.get()); + ASSERT(splitUp); + ASSERT(splitUp->contains("albatross")); + ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::REGEX) + << splitUp->at("albatross")->toString(); + ASSERT(splitUp->contains("blackbird.feet")); + ASSERT(splitUp->at("blackbird.feet")->matchType() == MatchExpression::MOD) + << splitUp->at("blackbird.feet")->toString(); + ASSERT(splitUp->contains("blackbird.softwareUpdates")); + ASSERT(splitUp->at("blackbird.softwareUpdates")->matchType() == MatchExpression::BITS_ALL_SET) + << splitUp->at("blackbird.softwareUpdates")->toString(); + ASSERT(splitUp->contains("blackbird")); + ASSERT(splitUp->at("blackbird")->matchType() == MatchExpression::NOT) + << splitUp->at("blackbird")->toString(); + ASSERT(splitUp->contains("bla")); + ASSERT(splitUp->contains("blackbirds")); + ASSERT(splitUp->at("blackbirds")->matchType() == MatchExpression::EXISTS) + << splitUp->at("blackbirds")->toString(); + ASSERT(splitUp->contains("blackbird.feetsies")); + ASSERT(splitUp->at("cowbird.beakLength")->matchType() == MatchExpression::AND) + << splitUp->at("cowbird.beakLength")->toString(); + ASSERT_EQ(splitUp->at("cowbird.beakLength")->numChildren(), 2) + << splitUp->at("cowbird.beakLength")->toString(); + ASSERT(splitUp->at("cowbird.eggSet")->matchType() == MatchExpression::BITS_ANY_SET) + << splitUp->at("cowbird.eggSet")->toString(); + ASSERT(!splitUp->contains("cowbird")); + ASSERT_EQ(splitUp->size(), 9) << splitUp->size(); +} + +TEST(SplitMatchExpressionForColumns, LeavesOriginalMatchExpressionFunctional) { + ParsedMatchExpression combinationPredicate( + "{" + " albatross: {$lt: 100}," + " blackbird: {$gt: 0}," + " cowbird: {$gte: 0, $lte: 100}" + "}"); + auto splitUp = expression::splitMatchExpressionForColumns(combinationPredicate.get()); + ASSERT(splitUp); + // Won't bother asserting on the detaiils here - done above. + ASSERT(combinationPredicate.get()->matchesBSON( + BSON("albatross" << 45 << "blackbird" << 1 << "cowbird" << 2))); +} + } // namespace mongo |