summaryrefslogtreecommitdiff
path: root/src/mongo/db/matcher
diff options
context:
space:
mode:
authorCharlie Swanson <charlie.swanson@mongodb.com>2022-03-08 18:13:23 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-03-08 19:54:50 +0000
commit7a003039250c67c00b1db195a0d8fd8c3c770943 (patch)
treeb7a254e2f6359e33a3a6cf95c9c46933e444d758 /src/mongo/db/matcher
parent682f784e93b1602f0dcd74115e2105e1433857f5 (diff)
downloadmongo-7a003039250c67c00b1db195a0d8fd8c3c770943.tar.gz
SERVER-63124 Add support for pushing filters into column index scan
Diffstat (limited to 'src/mongo/db/matcher')
-rw-r--r--src/mongo/db/matcher/expression_algo.cpp207
-rw-r--r--src/mongo/db/matcher/expression_algo.h17
-rw-r--r--src/mongo/db/matcher/expression_algo_test.cpp586
3 files changed, 808 insertions, 2 deletions
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp
index c3d0e777c1f..2016f4b6c39 100644
--- a/src/mongo/db/matcher/expression_algo.cpp
+++ b/src/mongo/db/matcher/expression_algo.cpp
@@ -27,7 +27,6 @@
* it in the license file.
*/
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
#include "mongo/platform/basic.h"
@@ -40,11 +39,11 @@
#include "mongo/db/matcher/expression_internal_bucket_geo_within.h"
#include "mongo/db/matcher/expression_leaf.h"
#include "mongo/db/matcher/expression_tree.h"
+#include "mongo/db/matcher/expression_type.h"
#include "mongo/db/matcher/schema/expression_internal_schema_xor.h"
#include "mongo/db/pipeline/dependencies.h"
#include "mongo/db/query/collation/collation_index_key.h"
#include "mongo/db/query/collation/collator_interface.h"
-#include "mongo/logv2/log.h"
namespace mongo {
@@ -438,6 +437,191 @@ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitMatchEx
}
}
+bool tryAddExprHelper(StringData path,
+ std::unique_ptr<MatchExpression> me,
+ StringMap<std::unique_ptr<MatchExpression>>& out) {
+ auto& entryForPath = out[path];
+ if (!entryForPath) {
+ // First predicate for this path, just put it in directly.
+ entryForPath = std::move(me);
+ } else {
+ // We have at least one predicate for this path already. Put all the predicates for the path
+ // into a giant $and clause. Note this might have to change once we start supporting $or
+ // predicates.
+ if (entryForPath->matchType() != MatchExpression::AND) {
+ // This is the second predicate, we need to make the $and and put in both predicates:
+ // {$and: [<existing>, 'me']}.
+ auto andME = std::make_unique<AndMatchExpression>();
+ andME->add(std::move(entryForPath));
+ entryForPath = std::move(andME);
+ }
+ auto andME = checked_cast<AndMatchExpression*>(entryForPath.get());
+ andME->add(std::move(me));
+ }
+ return true;
+}
+
+bool tryAddExpr(StringData path,
+ const MatchExpression* me,
+ StringMap<std::unique_ptr<MatchExpression>>& out) {
+ if (FieldRef(path).hasNumericPathComponents())
+ return false;
+
+ return tryAddExprHelper(path, me->shallowClone(), out);
+}
+
+bool splitMatchExpressionForColumns(const MatchExpression* me,
+ StringMap<std::unique_ptr<MatchExpression>>& out) {
+ auto canCompareWith = [](const BSONElement& elem, bool isEQ) {
+ // Here we check whether the comparison can work with the given value. Objects and arrays
+ // are generally not permitted. Objects can't work because the paths will be split apart in
+ // the columnar index. We could do arrays of scalars since we would have all that
+ // information in the index, but it proved complex to integrate due to the interface with
+ // the matcher. It expects to get a BSONElement for the whole Array but we'd like to avoid
+ // materializing that.
+ //
+ // One exception to the above: We can support EQ with empty objects and empty arrays since
+ // those are more obviously correct. Maybe could also support LT and LTE, but those don't
+ // seem as important so are left for future work.
+ if (elem.type() == BSONType::Array || elem.type() == BSONType::Object) {
+ return isEQ && elem.Obj().isEmpty();
+ }
+
+ // We support all other types, except null, since it is equivalent to x==null || !exists(x).
+ return !elem.isNull();
+ };
+ switch (me->matchType()) {
+ // These are always safe since they will never match documents missing their field, or where
+ // the element is an object or array.
+ case MatchExpression::REGEX:
+ case MatchExpression::MOD:
+ case MatchExpression::BITS_ALL_SET:
+ case MatchExpression::BITS_ALL_CLEAR:
+ case MatchExpression::BITS_ANY_SET:
+ case MatchExpression::BITS_ANY_CLEAR:
+ case MatchExpression::EXISTS: {
+ auto sub = checked_cast<const PathMatchExpression*>(me);
+ return tryAddExpr(sub->path(), me, out);
+ }
+
+ case MatchExpression::LT:
+ case MatchExpression::GT:
+ case MatchExpression::EQ:
+ case MatchExpression::LTE:
+ case MatchExpression::GTE: {
+ auto sub = checked_cast<const ComparisonMatchExpressionBase*>(me);
+ if (!canCompareWith(sub->getData(), me->matchType() == MatchExpression::EQ))
+ return false;
+ return tryAddExpr(sub->path(), me, out);
+ }
+
+
+ case MatchExpression::MATCH_IN: {
+ auto sub = checked_cast<const InMatchExpression*>(me);
+ // Note that $in treats regexes specially and stores them separately than the rest of
+ // the 'equalities'. We actually don't need to look at them here since any regex should
+ // be OK. A regex could only match a string, symbol, or other regex, any of which would
+ // be present in the columnar storage.
+ for (auto&& elem : sub->getEqualities()) {
+ if (!canCompareWith(elem, true))
+ return false;
+ }
+ return tryAddExpr(sub->path(), me, out);
+ }
+
+ case MatchExpression::TYPE_OPERATOR: {
+ auto sub = checked_cast<const TypeMatchExpression*>(me);
+ if (sub->typeSet().hasType(BSONType::EOO) || sub->typeSet().hasType(BSONType::Object) ||
+ sub->typeSet().hasType(BSONType::Array))
+ return false;
+ return tryAddExpr(sub->path(), me, out);
+ }
+
+ case MatchExpression::AND: {
+ auto sub = checked_cast<const AndMatchExpression*>(me);
+ for (size_t i = 0, end = sub->numChildren(); i != end; i++) {
+ if (!splitMatchExpressionForColumns(sub->getChild(i), out)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+
+ case MatchExpression::NOT: {
+ // {$ne: null} pattern is known to be important in cases like those in SERVER-27646 and
+ // SERVER-36465.
+ auto notExpr = checked_cast<const NotMatchExpression*>(me);
+ auto withinNot = notExpr->getChild(0);
+
+ // Oddly, we parse {$ne: null} to a NOT -> EQ, but we parse {$not: {$eq: null}} into a
+ // more complex NOT -> AND -> EQ. Let's support both.
+ auto tryAddNENull = [&](const MatchExpression* negatedPred) {
+ if (negatedPred->matchType() != MatchExpression::EQ) {
+ return false;
+ }
+ auto eqPred = checked_cast<const EqualityMatchExpression*>(negatedPred);
+ if (eqPred->getData().isNull()) {
+ return tryAddExpr(eqPred->path(), me, out);
+ }
+ return false;
+ };
+ if (tryAddNENull(withinNot)) {
+ // {$ne: null}. We had equality just under NOT.
+ return true;
+ } else if (withinNot->matchType() == MatchExpression::AND &&
+ withinNot->numChildren() == 1 && tryAddNENull(withinNot->getChild(0))) {
+ // {$not: {$eq: null}}: NOT -> AND -> EQ.
+ return true;
+ }
+ // May be other cases, but left as future work.
+ return false;
+ }
+
+ // We don't currently handle any of these cases, but some may be possible in the future.
+ case MatchExpression::ALWAYS_FALSE:
+ case MatchExpression::ALWAYS_TRUE:
+ case MatchExpression::ELEM_MATCH_OBJECT:
+ case MatchExpression::ELEM_MATCH_VALUE: // This one should be feasible. May be valuable.
+ case MatchExpression::EXPRESSION:
+ case MatchExpression::GEO:
+ case MatchExpression::GEO_NEAR:
+ case MatchExpression::INTERNAL_2D_POINT_IN_ANNULUS:
+ case MatchExpression::INTERNAL_BUCKET_GEO_WITHIN:
+ case MatchExpression::INTERNAL_EXPR_EQ: // This one could be valuable for $lookup
+ case MatchExpression::INTERNAL_EXPR_GT:
+ case MatchExpression::INTERNAL_EXPR_GTE:
+ case MatchExpression::INTERNAL_EXPR_LT:
+ case MatchExpression::INTERNAL_EXPR_LTE:
+ case MatchExpression::INTERNAL_SCHEMA_ALLOWED_PROPERTIES:
+ case MatchExpression::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX:
+ case MatchExpression::INTERNAL_SCHEMA_BIN_DATA_ENCRYPTED_TYPE:
+ case MatchExpression::INTERNAL_SCHEMA_BIN_DATA_SUBTYPE:
+ case MatchExpression::INTERNAL_SCHEMA_COND:
+ case MatchExpression::INTERNAL_SCHEMA_EQ:
+ case MatchExpression::INTERNAL_SCHEMA_FMOD:
+ case MatchExpression::INTERNAL_SCHEMA_MATCH_ARRAY_INDEX:
+ case MatchExpression::INTERNAL_SCHEMA_MAX_ITEMS:
+ case MatchExpression::INTERNAL_SCHEMA_MAX_LENGTH:
+ case MatchExpression::INTERNAL_SCHEMA_MAX_PROPERTIES:
+ case MatchExpression::INTERNAL_SCHEMA_MIN_ITEMS:
+ case MatchExpression::INTERNAL_SCHEMA_MIN_LENGTH:
+ case MatchExpression::INTERNAL_SCHEMA_MIN_PROPERTIES:
+ case MatchExpression::INTERNAL_SCHEMA_OBJECT_MATCH:
+ case MatchExpression::INTERNAL_SCHEMA_ROOT_DOC_EQ:
+ case MatchExpression::INTERNAL_SCHEMA_TYPE:
+ case MatchExpression::INTERNAL_SCHEMA_UNIQUE_ITEMS:
+ case MatchExpression::INTERNAL_SCHEMA_XOR:
+ case MatchExpression::NOR:
+ case MatchExpression::OR:
+ case MatchExpression::SIZE:
+ case MatchExpression::TEXT:
+ case MatchExpression::WHERE:
+ return false;
+ }
+ MONGO_UNREACHABLE;
+}
+
} // namespace
namespace expression {
@@ -719,5 +903,24 @@ bool bidirectionalPathPrefixOf(StringData first, StringData second) {
return first == second || expression::isPathPrefixOf(first, second) ||
expression::isPathPrefixOf(second, first);
}
+
+boost::optional<StringMap<std::unique_ptr<MatchExpression>>> splitMatchExpressionForColumns(
+ const MatchExpression* me) {
+ boost::optional<StringMap<std::unique_ptr<MatchExpression>>> out;
+ out.emplace();
+ if (!mongo::splitMatchExpressionForColumns(me, *out))
+ out = {};
+ return out;
+}
+
+std::string filterMapToString(const StringMap<std::unique_ptr<MatchExpression>>& filterMap) {
+ StringBuilder sb;
+ sb << "{";
+ for (auto&& [path, matchExpr] : filterMap) {
+ sb << path << ": " << matchExpr->toString() << ", ";
+ }
+ sb << "}";
+ return sb.str();
+}
} // namespace expression
} // namespace mongo
diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h
index 31bc8f9e227..0fdbac756a0 100644
--- a/src/mongo/db/matcher/expression_algo.h
+++ b/src/mongo/db/matcher/expression_algo.h
@@ -157,5 +157,22 @@ splitMatchExpressionBy(std::unique_ptr<MatchExpression> expr,
* {new: {$gt: 3}}.
*/
void applyRenamesToExpression(MatchExpression* expr, const StringMap<std::string>& renames);
+
+/**
+ * Split a MatchExpression into subexpressions targeted to separate columns. A document will match
+ * the query if all of the sub expressions match. Returns an empty optional if the entire match
+ * cannot be handled by the column store.
+ *
+ * This API will need to change in order to support more complex queries, such as $or and
+ * $elemMatch.
+ */
+boost::optional<StringMap<std::unique_ptr<MatchExpression>>> splitMatchExpressionForColumns(
+ const MatchExpression* me);
+
+/**
+ * Serializes this complex data structure for debugging purposes.
+ */
+std::string filterMapToString(const StringMap<std::unique_ptr<MatchExpression>>&);
+
} // namespace expression
} // namespace mongo
diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp
index 257fbc7a734..8679c759cd5 100644
--- a/src/mongo/db/matcher/expression_algo_test.cpp
+++ b/src/mongo/db/matcher/expression_algo_test.cpp
@@ -1562,4 +1562,590 @@ TEST(HasExistencePredicateOnPath, ReturnsFalseWhenExistsOnSubpath) {
expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a"_sd));
}
+TEST(SplitMatchExpressionForColumns, PreservesEmptyPredicates) {
+ ParsedMatchExpression empty("{}");
+ auto splitUp = expression::splitMatchExpressionForColumns(empty.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->size() == 0);
+}
+
+TEST(SplitMatchExpressionForColumns, RejectsUnsupportedPredicates) {
+ {
+ // Future work.
+ ParsedMatchExpression orClause("{$or: [{a: 1}, {b: 2}]}");
+ auto splitUp = expression::splitMatchExpressionForColumns(orClause.get());
+ ASSERT(!splitUp);
+ }
+
+ {
+ // Would match missing values, not safe for a columnar index.
+ ParsedMatchExpression alwaysTrue("{$alwaysTrue: 1}");
+ auto splitUp = expression::splitMatchExpressionForColumns(alwaysTrue.get());
+ ASSERT(!splitUp);
+ }
+
+ {
+ // Future work.
+ ParsedMatchExpression exprClause("{$expr: {$eq: ['$x', 0]}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(exprClause.get());
+ ASSERT(!splitUp);
+ }
+}
+
+// Test equality predicates that are safe to split (in contrast to next test).
+TEST(SplitMatchExpressionForColumns, SplitsSafeEqualities) {
+
+ {
+ ParsedMatchExpression singleEqualsNumber("{albatross: 1}");
+ auto splitUp = expression::splitMatchExpressionForColumns(singleEqualsNumber.get());
+ ASSERT(splitUp);
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EQ)
+ << splitUp->at("albatross")->toString();
+ }
+
+ {
+ ParsedMatchExpression singleEqualsString("{albatross: 'flying'}");
+ auto splitUp = expression::splitMatchExpressionForColumns(singleEqualsString.get());
+ ASSERT(splitUp);
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EQ)
+ << splitUp->at("albatross")->toString();
+ }
+
+ {
+ ParsedMatchExpression doubleEqualsNumber("{albatross: 1, blackbird: 2}");
+ auto splitUp = expression::splitMatchExpressionForColumns(doubleEqualsNumber.get());
+ ASSERT(splitUp);
+ ASSERT_EQ(splitUp->size(), 2) << splitUp->size();
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EQ)
+ << splitUp->at("albatross")->toString();
+ ASSERT(splitUp->contains("blackbird"));
+ ASSERT(splitUp->at("blackbird")->matchType() == MatchExpression::EQ)
+ << splitUp->at("blackbird")->toString();
+ }
+
+ {
+ ParsedMatchExpression mixedEquals(
+ "{albatross: 1,"
+ " blackbird: 'flying',"
+ " cowbird: {$eq: /oreo/},"
+ " duck: NumberInt(2),"
+ " eagle: NumberLong(50),"
+ " grackle: ObjectId('000000000000000000000000'),"
+ " heron: true,"
+ " ibis: false,"
+ " jay: Timestamp(1, 0),"
+ " kiwi: NumberDecimal('22'),"
+ " 'loggerhead shrike': {$minKey: 1},"
+ " mallard: {$maxKey: 1}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(mixedEquals.get());
+ ASSERT(splitUp);
+ ASSERT_EQ(splitUp->size(), 12) << splitUp->size();
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EQ)
+ << splitUp->at("albatross")->toString();
+ ASSERT(splitUp->contains("blackbird"));
+ ASSERT(splitUp->at("blackbird")->matchType() == MatchExpression::EQ)
+ << splitUp->at("blackbird")->toString();
+ }
+}
+
+
+TEST(SplitMatchExpressionForColumns, SupportsEqualityToEmptyObjects) {
+ {
+ ParsedMatchExpression equalsEmptyObj("{albatross: {}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(equalsEmptyObj.get());
+ ASSERT(splitUp);
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EQ)
+ << splitUp->at("albatross")->toString();
+ }
+}
+
+TEST(SplitMatchExpressionForColumns, SupportsEqualityToEmptyArray) {
+ {
+ ParsedMatchExpression equalsEmptyArray("{albatross: []}");
+ auto splitUp = expression::splitMatchExpressionForColumns(equalsEmptyArray.get());
+ ASSERT(splitUp);
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EQ)
+ << splitUp->at("albatross")->toString();
+ }
+}
+
+TEST(SplitMatchExpressionForColumns, DoesNotSupportEqualsNull) {
+ {
+ ParsedMatchExpression equalsNull("{a: null}");
+ auto splitUp = expression::splitMatchExpressionForColumns(equalsNull.get());
+ ASSERT(!splitUp);
+ }
+}
+
+TEST(SplitMatchExpressionForColumns, DoesSupportNotEqualsNull) {
+ {
+ ParsedMatchExpression neNull("{a: {$ne: null}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(neNull.get());
+ ASSERT(splitUp);
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ ASSERT(splitUp->contains("a"));
+ ASSERT(splitUp->at("a")->matchType() == MatchExpression::NOT)
+ << splitUp->at("a")->toString();
+ }
+ {
+ ParsedMatchExpression notEqualsNull("{a: {$not: {$eq: null}}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(notEqualsNull.get());
+ ASSERT(splitUp) << notEqualsNull.get()->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ ASSERT(splitUp->contains("a"));
+ ASSERT(splitUp->at("a")->matchType() == MatchExpression::NOT)
+ << splitUp->at("a")->toString();
+ }
+}
+
+TEST(SplitMatchExpressionForColumns, DoesNotSupportCompoundEquals) {
+ {
+ ParsedMatchExpression implicitEqualsArray("{a: [1, 2]}");
+ auto splitUp = expression::splitMatchExpressionForColumns(implicitEqualsArray.get());
+ ASSERT(!splitUp);
+ }
+ {
+ ParsedMatchExpression explicitEqualsArray("{a: {$eq: [1, 2]}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(explicitEqualsArray.get());
+ ASSERT(!splitUp);
+ }
+ {
+ ParsedMatchExpression implicitEqualsObject("{a: {boats: 1, planes: 2}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(implicitEqualsObject.get());
+ ASSERT(!splitUp);
+ }
+ {
+ ParsedMatchExpression explicitEqualsObject("{a: {$eq: {boats: 1, planes: 2}}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(explicitEqualsObject.get());
+ ASSERT(!splitUp);
+ }
+ // We should be able to do dotted path version though, as a potential workaround.
+ {
+ ParsedMatchExpression equalsDotted("{'a.boats': 1, 'a.planes': 2}");
+ auto splitUp = expression::splitMatchExpressionForColumns(equalsDotted.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->size() == 2);
+ ASSERT(splitUp->contains("a.boats"));
+ ASSERT(splitUp->at("a.boats")->matchType() == MatchExpression::EQ)
+ << splitUp->at("a.boats")->toString();
+ ASSERT(splitUp->contains("a.planes"));
+ ASSERT(splitUp->at("a.planes")->matchType() == MatchExpression::EQ)
+ << splitUp->at("a.planes")->toString();
+ }
+}
+
+// Tests that comparisons (like $lt and $gte) have the same splitting rules as equality.
+TEST(SplitMatchExpressionForColumns, SupportsComparisonsLikeEqualities) {
+
+ {
+ ParsedMatchExpression singleLtNumber("{albatross: {$lt: 1}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(singleLtNumber.get());
+ ASSERT(splitUp);
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::LT)
+ << splitUp->at("albatross")->toString();
+ }
+ {
+ ParsedMatchExpression singleLteNumber("{albatross: {$lte: 1}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(singleLteNumber.get());
+ ASSERT(splitUp);
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::LTE)
+ << splitUp->at("albatross")->toString();
+ }
+ {
+ ParsedMatchExpression singleGtNumber("{albatross: {$gt: 1}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(singleGtNumber.get());
+ ASSERT(splitUp);
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::GT)
+ << splitUp->at("albatross")->toString();
+ }
+ {
+ ParsedMatchExpression singleGteNumber("{albatross: {$gte: 1}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(singleGteNumber.get());
+ ASSERT(splitUp);
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::GTE)
+ << splitUp->at("albatross")->toString();
+ }
+ {
+ ParsedMatchExpression combinationPredicate(
+ "{"
+ " albatross: {$lt: 100},"
+ " blackbird: {$gt: 0},"
+ " cowbird: {$gte: 0, $lte: 100}"
+ "}");
+ auto splitUp = expression::splitMatchExpressionForColumns(combinationPredicate.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::LT)
+ << splitUp->at("albatross")->toString();
+ ASSERT(splitUp->contains("blackbird"));
+ ASSERT(splitUp->at("blackbird")->matchType() == MatchExpression::GT)
+ << splitUp->at("blackbird")->toString();
+ ASSERT(splitUp->contains("cowbird"));
+ ASSERT(splitUp->at("cowbird")->matchType() == MatchExpression::AND)
+ << splitUp->at("cowbird")->toString();
+ ASSERT_EQ(splitUp->size(), 3) << splitUp->size();
+ }
+}
+
+// While equality to [] or {} is OK, inequality is not so obvious. Left as future work.
+TEST(SplitMatchExpressionForColumns, DoesNotSupportInequalitiesToObjectsOrArrays) {
+ {
+ ParsedMatchExpression ltArray("{albatross: {$lt: []}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(ltArray.get());
+ ASSERT(!splitUp);
+ }
+ {
+ ParsedMatchExpression ltObject("{albatross: {$lt: {}}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(ltObject.get());
+ ASSERT(!splitUp);
+ }
+ {
+ ParsedMatchExpression lteArray("{albatross: {$lte: []}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(lteArray.get());
+ ASSERT(!splitUp);
+ }
+ {
+ ParsedMatchExpression lteObject("{albatross: {$lte: {}}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(lteObject.get());
+ ASSERT(!splitUp);
+ }
+ {
+ ParsedMatchExpression gtArray("{albatross: {$gt: []}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(gtArray.get());
+ ASSERT(!splitUp);
+ }
+ {
+ ParsedMatchExpression gtObject("{albatross: {$gt: {}}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(gtObject.get());
+ ASSERT(!splitUp);
+ }
+ {
+ ParsedMatchExpression gteArray("{albatross: {$gte: []}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(gteArray.get());
+ ASSERT(!splitUp);
+ }
+ {
+ ParsedMatchExpression gteObject("{albatross: {$gte: {}}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(gteObject.get());
+ ASSERT(!splitUp);
+ }
+}
+
+// Tests that comparisons which only match values of a certain type are allowed.
+TEST(SplitMatchExpressionForColumns, SupportsTypeSpecificPredicates) {
+ ParsedMatchExpression combinationPredicate(
+ "{"
+ " albatross: /oreo/,"
+ " blackbird: {$mod: [2, 0]},"
+ " cowbird: {$bitsAllSet: 7},"
+ " duck: {$bitsAllClear: 24},"
+ " eagle: {$bitsAnySet: 7},"
+ " falcon: {$bitsAnyClear: 24}"
+ "}");
+ auto splitUp = expression::splitMatchExpressionForColumns(combinationPredicate.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::REGEX)
+ << splitUp->at("albatross")->toString();
+ ASSERT(splitUp->contains("blackbird"));
+ ASSERT(splitUp->at("blackbird")->matchType() == MatchExpression::MOD)
+ << splitUp->at("blackbird")->toString();
+ ASSERT(splitUp->contains("cowbird"));
+ ASSERT(splitUp->at("cowbird")->matchType() == MatchExpression::BITS_ALL_SET)
+ << splitUp->at("cowbird")->toString();
+ ASSERT(splitUp->contains("duck"));
+ ASSERT(splitUp->at("duck")->matchType() == MatchExpression::BITS_ALL_CLEAR)
+ << splitUp->at("duck")->toString();
+ ASSERT(splitUp->contains("eagle"));
+ ASSERT(splitUp->at("eagle")->matchType() == MatchExpression::BITS_ANY_SET)
+ << splitUp->at("eagle")->toString();
+ ASSERT(splitUp->contains("falcon"));
+ ASSERT(splitUp->at("falcon")->matchType() == MatchExpression::BITS_ANY_CLEAR)
+ << splitUp->at("falcon")->toString();
+ ASSERT_EQ(splitUp->size(), 6) << splitUp->size();
+}
+
+TEST(SplitMatchExpressionForColumns, SupportsInWithRegexes) {
+ {
+ // First confirm a $in clause is supported without regexes.
+ ParsedMatchExpression stringInClause("{albatross: {$in: ['big', 'ol', 'bird']}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(stringInClause.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ }
+ {
+ // Test that $in with regexes is supported also work.
+ ParsedMatchExpression regexInClause("{albatross: {$in: [/big/, /bird/]}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(regexInClause.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ }
+ {
+ // Test that a mix of both is supported
+ ParsedMatchExpression regexInClause("{albatross: {$in: [/big/, 'bird']}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(regexInClause.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ }
+ {
+ // Test that it is still disallowed if there's a disqualifying equality such as a null.
+ ParsedMatchExpression regexInClause("{albatross: {$in: [/big/, null, 'bird']}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(regexInClause.get());
+ ASSERT(!splitUp);
+ }
+}
+
+TEST(SplitMatchExpressionForColumns, SupportsExistsTrue) {
+ ParsedMatchExpression existsPredicate("{albatross: {$exists: true}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(existsPredicate.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::EXISTS)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+}
+
+TEST(SplitMatchExpressionForColumns, DoesNotSupportExistsFalse) {
+ ParsedMatchExpression existsPredicate("{albatross: {$exists: false}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(existsPredicate.get());
+ ASSERT(!splitUp);
+}
+
+// $in constraints are similar to equality. Most of them should work, exceptions broken out in the
+// next test.
+TEST(SplitMatchExpressionForColumns, SupportsInPredicates) {
+ {
+ ParsedMatchExpression emptyIn("{albatross: {$in: []}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(emptyIn.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ }
+ {
+ ParsedMatchExpression singleElementIn("{albatross: {$in: [4]}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(singleElementIn.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ }
+ {
+ ParsedMatchExpression inWithEmptyArray("{albatross: {$in: [[]]}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(inWithEmptyArray.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ }
+ {
+ ParsedMatchExpression inWithEmptyObject("{albatross: {$in: [{}]}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(inWithEmptyObject.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ }
+ {
+ ParsedMatchExpression mixedTypeIn("{albatross: {$in: [4, {}, [], 'string', /regex/]}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(mixedTypeIn.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::MATCH_IN)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ }
+}
+
+// We can't support compound types, just like for equality.
+TEST(SplitMatchExpressionForColumns, DoesNotSupportCertainInEdgeCases) {
+ {
+ ParsedMatchExpression inWithArray("{albatross: {$in: [[2,3]]}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(inWithArray.get());
+ ASSERT(!splitUp);
+ }
+ {
+ ParsedMatchExpression inWithObject("{albatross: {$in: [{wings: 2}]}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(inWithObject.get());
+ ASSERT(!splitUp);
+ }
+ {
+ ParsedMatchExpression inWithNull("{albatross: {$in: [null]}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(inWithNull.get());
+ ASSERT(!splitUp);
+ }
+ {
+ ParsedMatchExpression unsupporedMixedInWithSupported(
+ "{albatross: {$in: ['strings', 1, null, {x: 4}, [0, 0], 4]}}");
+ auto splitUp =
+ expression::splitMatchExpressionForColumns(unsupporedMixedInWithSupported.get());
+ ASSERT(!splitUp);
+ }
+}
+
+TEST(SplitMatchExpressionForColumns, SupportsTypePredicates) {
+ {
+ ParsedMatchExpression intFilter("{albatross: {$type: 'int'}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(intFilter.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ }
+ {
+ ParsedMatchExpression numberFilter("{albatross: {$type: 'number'}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(numberFilter.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ }
+ {
+ ParsedMatchExpression stringFilter("{albatross: {$type: 'string'}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(stringFilter.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ }
+ {
+ ParsedMatchExpression nullFilter("{albatross: {$type: 'null'}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(nullFilter.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::TYPE_OPERATOR)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+ }
+}
+
+TEST(SplitMatchExpressionForColumns, DoesNotSupportQueriesForTypeObject) {
+ ParsedMatchExpression objectFilter("{albatross: {$type: 'object'}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(objectFilter.get());
+ ASSERT(!splitUp);
+}
+
+// This may be workable. But until we can prove it we'll disallow {$type: "array"}.
+TEST(SplitMatchExpressionForColumns, DoesNotSupportQueriesForTypeArray) {
+ ParsedMatchExpression objectFilter("{albatross: {$type: 'array'}}");
+ auto splitUp = expression::splitMatchExpressionForColumns(objectFilter.get());
+ ASSERT(!splitUp);
+}
+
+TEST(SplitMatchExpressionForColumns, CanCombinePredicates) {
+ ParsedMatchExpression compoundFilter(
+ "{"
+ " albatross: {$gte: 100},"
+ " albatross: {$mod: [2, 0]}"
+ "}");
+ auto splitUp = expression::splitMatchExpressionForColumns(compoundFilter.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::AND)
+ << splitUp->at("albatross")->toString();
+ ASSERT_EQ(splitUp->at("albatross")->numChildren(), 2) << splitUp->at("albatross")->toString();
+ // Don't care about the order.
+ auto andExpr = splitUp->at("albatross").get();
+ auto firstChild = andExpr->getChild(0);
+ if (firstChild->matchType() == MatchExpression::GTE) {
+ ASSERT(firstChild->matchType() == MatchExpression::GTE) << firstChild->toString();
+ ASSERT(andExpr->getChild(1)->matchType() == MatchExpression::MOD) << firstChild->toString();
+ } else {
+ ASSERT(firstChild->matchType() == MatchExpression::MOD) << firstChild->toString();
+ ASSERT(andExpr->getChild(1)->matchType() == MatchExpression::GTE) << firstChild->toString();
+ }
+ ASSERT_EQ(splitUp->size(), 1) << splitUp->size();
+}
+
+TEST(SplitMatchExpressionForColumns, SupportsDottedPaths) {
+ ParsedMatchExpression compoundFilter(
+ "{"
+ " albatross: /oreo/,"
+ " \"blackbird.feet\": {$mod: [2, 0]},"
+ " \"blackbird.softwareUpdates\": {$bitsAllSet: 7},"
+ // Stress the path combination logic with some prefixes and suffixes to be sure.
+ " blackbird: {$ne: null},"
+ " bla: {$ne: null},"
+ " blackbirds: {$exists: true},"
+ " \"blackbird.feetsies\": {$ne: null},"
+ " \"cowbird.beakLength\": {$gte: 24, $lt: 40},"
+ " \"cowbird.eggSet\": {$bitsAnySet: 7}"
+ "}");
+ auto splitUp = expression::splitMatchExpressionForColumns(compoundFilter.get());
+ ASSERT(splitUp);
+ ASSERT(splitUp->contains("albatross"));
+ ASSERT(splitUp->at("albatross")->matchType() == MatchExpression::REGEX)
+ << splitUp->at("albatross")->toString();
+ ASSERT(splitUp->contains("blackbird.feet"));
+ ASSERT(splitUp->at("blackbird.feet")->matchType() == MatchExpression::MOD)
+ << splitUp->at("blackbird.feet")->toString();
+ ASSERT(splitUp->contains("blackbird.softwareUpdates"));
+ ASSERT(splitUp->at("blackbird.softwareUpdates")->matchType() == MatchExpression::BITS_ALL_SET)
+ << splitUp->at("blackbird.softwareUpdates")->toString();
+ ASSERT(splitUp->contains("blackbird"));
+ ASSERT(splitUp->at("blackbird")->matchType() == MatchExpression::NOT)
+ << splitUp->at("blackbird")->toString();
+ ASSERT(splitUp->contains("bla"));
+ ASSERT(splitUp->contains("blackbirds"));
+ ASSERT(splitUp->at("blackbirds")->matchType() == MatchExpression::EXISTS)
+ << splitUp->at("blackbirds")->toString();
+ ASSERT(splitUp->contains("blackbird.feetsies"));
+ ASSERT(splitUp->at("cowbird.beakLength")->matchType() == MatchExpression::AND)
+ << splitUp->at("cowbird.beakLength")->toString();
+ ASSERT_EQ(splitUp->at("cowbird.beakLength")->numChildren(), 2)
+ << splitUp->at("cowbird.beakLength")->toString();
+ ASSERT(splitUp->at("cowbird.eggSet")->matchType() == MatchExpression::BITS_ANY_SET)
+ << splitUp->at("cowbird.eggSet")->toString();
+ ASSERT(!splitUp->contains("cowbird"));
+ ASSERT_EQ(splitUp->size(), 9) << splitUp->size();
+}
+
+TEST(SplitMatchExpressionForColumns, LeavesOriginalMatchExpressionFunctional) {
+ ParsedMatchExpression combinationPredicate(
+ "{"
+ " albatross: {$lt: 100},"
+ " blackbird: {$gt: 0},"
+ " cowbird: {$gte: 0, $lte: 100}"
+ "}");
+ auto splitUp = expression::splitMatchExpressionForColumns(combinationPredicate.get());
+ ASSERT(splitUp);
+ // Won't bother asserting on the detaiils here - done above.
+ ASSERT(combinationPredicate.get()->matchesBSON(
+ BSON("albatross" << 45 << "blackbird" << 1 << "cowbird" << 2)));
+}
+
} // namespace mongo