From 0d2f72f5471f7c0f283ceea314c48d2e25d7d556 Mon Sep 17 00:00:00 2001 From: Tess Avitabile Date: Wed, 25 May 2016 15:26:24 -0400 Subject: SERVER-23618 Add collation support for partial indexes --- jstests/core/collation_shell_helpers.js | 21 +++++++++ src/mongo/db/catalog/index_catalog.cpp | 47 ++++++++++--------- src/mongo/db/catalog/index_catalog_entry.cpp | 23 +++++---- src/mongo/db/catalog/index_catalog_entry.h | 2 +- src/mongo/db/matcher/SConscript | 1 + src/mongo/db/matcher/expression_algo.cpp | 14 ++++-- src/mongo/db/matcher/expression_algo_test.cpp | 54 ++++++++++++++++++++-- src/mongo/db/query/query_planner.cpp | 6 --- .../db/query/query_planner_partialidx_test.cpp | 51 ++++++++++++++++++++ src/mongo/db/query/query_planner_test_fixture.cpp | 17 ++++++- src/mongo/db/query/query_planner_test_fixture.h | 7 ++- 11 files changed, 191 insertions(+), 52 deletions(-) diff --git a/jstests/core/collation_shell_helpers.js b/jstests/core/collation_shell_helpers.js index e9308960164..bbc7f928bc8 100644 --- a/jstests/core/collation_shell_helpers.js +++ b/jstests/core/collation_shell_helpers.js @@ -206,6 +206,27 @@ .hint({str: 1}) .itcount()); assert.commandWorked(coll.dropIndexes()); + + // With a partial index. + // {_id: 1, str: "foo"} will be indexed even though "foo" > "FOO", since the collation is + // case-insensitive. + assert.commandWorked(coll.ensureIndex({str: 1}, { + partialFilterExpression: {str: {$lte: "FOO"}}, + collation: {locale: "en_US", strength: 2} + })); + assert.eq(1, + coll.find({str: "foo"}) + .collation({locale: "en_US", strength: 2}) + .hint({str: 1}) + .itcount()); + assert.writeOK(coll.insert({_id: 3, str: "goo"})); + assert.eq(0, + coll.find({str: "goo"}) + .collation({locale: "en_US", strength: 2}) + .hint({str: 1}) + .itcount()); + assert.writeOK(coll.remove({_id: 3})); + assert.commandWorked(coll.dropIndexes()); } else { assert.throws(function() { coll.find().collation({locale: "fr"}).itcount(); diff --git a/src/mongo/db/catalog/index_catalog.cpp b/src/mongo/db/catalog/index_catalog.cpp index 58efa054129..b1ed77750ed 100644 --- a/src/mongo/db/catalog/index_catalog.cpp +++ b/src/mongo/db/catalog/index_catalog.cpp @@ -542,6 +542,28 @@ Status IndexCatalog::_isSpecOk(OperationContext* txn, const BSONObj& spec) const << keyStatus.reason()); } + std::unique_ptr collator; + BSONElement collationElement = spec.getField("collation"); + if (collationElement) { + string pluginName = IndexNames::findPluginName(key); + if ((pluginName != IndexNames::BTREE) && (pluginName != IndexNames::GEO_2DSPHERE) && + (pluginName != IndexNames::HASHED)) { + return Status(ErrorCodes::CannotCreateIndex, + str::stream() << "\"collation\" not supported for index type " + << pluginName); + } + if (collationElement.type() != BSONType::Object) { + return Status(ErrorCodes::CannotCreateIndex, + "\"collation\" for an index must be a document"); + } + auto statusWithCollator = CollatorFactoryInterface::get(txn->getServiceContext()) + ->makeFromBSON(collationElement.Obj()); + if (!statusWithCollator.isOK()) { + return statusWithCollator.getStatus(); + } + collator = std::move(statusWithCollator.getValue()); + } + const bool isSparse = spec["sparse"].trueValue(); // Ensure if there is a filter, its valid. @@ -556,9 +578,10 @@ Status IndexCatalog::_isSpecOk(OperationContext* txn, const BSONObj& spec) const return Status(ErrorCodes::CannotCreateIndex, "\"partialFilterExpression\" for an index must be a document"); } - // TODO SERVER-23618: pass the appropriate CollatorInterface* instead of nullptr. + + // The collator must outlive the constructed MatchExpression. StatusWithMatchExpression statusWithMatcher = MatchExpressionParser::parse( - filterElement.Obj(), ExtensionsCallbackDisallowExtensions(), nullptr); + filterElement.Obj(), ExtensionsCallbackDisallowExtensions(), collator.get()); if (!statusWithMatcher.isOK()) { return statusWithMatcher.getStatus(); } @@ -570,26 +593,6 @@ Status IndexCatalog::_isSpecOk(OperationContext* txn, const BSONObj& spec) const } } - BSONElement collationElement = spec.getField("collation"); - if (collationElement) { - string pluginName = IndexNames::findPluginName(key); - if ((pluginName != IndexNames::BTREE) && (pluginName != IndexNames::GEO_2DSPHERE) && - (pluginName != IndexNames::HASHED)) { - return Status(ErrorCodes::CannotCreateIndex, - str::stream() << "\"collation\" not supported for index type " - << pluginName); - } - if (collationElement.type() != BSONType::Object) { - return Status(ErrorCodes::CannotCreateIndex, - "\"collation\" for an index must be a document"); - } - auto statusWithCollator = CollatorFactoryInterface::get(txn->getServiceContext()) - ->makeFromBSON(collationElement.Obj()); - if (!statusWithCollator.isOK()) { - return statusWithCollator.getStatus(); - } - } - if (IndexDescriptor::isIdIndexPattern(key)) { BSONElement uniqueElt = spec["unique"]; if (uniqueElt && !uniqueElt.trueValue()) { diff --git a/src/mongo/db/catalog/index_catalog_entry.cpp b/src/mongo/db/catalog/index_catalog_entry.cpp index ff52a47b089..0ac3ac5665e 100644 --- a/src/mongo/db/catalog/index_catalog_entry.cpp +++ b/src/mongo/db/catalog/index_catalog_entry.cpp @@ -95,27 +95,26 @@ IndexCatalogEntry::IndexCatalogEntry(OperationContext* txn, _indexTracksPathLevelMultikeyInfo = !_indexMultikeyPaths.empty(); } + if (BSONElement collationElement = _descriptor->getInfoElement("collation")) { + invariant(collationElement.isABSONObj()); + BSONObj collation = collationElement.Obj(); + auto statusWithCollator = + CollatorFactoryInterface::get(txn->getServiceContext())->makeFromBSON(collation); + invariantOK(statusWithCollator.getStatus()); + _collator = std::move(statusWithCollator.getValue()); + } + if (BSONElement filterElement = _descriptor->getInfoElement("partialFilterExpression")) { invariant(filterElement.isABSONObj()); BSONObj filter = filterElement.Obj(); - // TODO SERVER-23618: pass the appropriate CollatorInterface* instead of nullptr. - StatusWithMatchExpression statusWithMatcher = - MatchExpressionParser::parse(filter, ExtensionsCallbackDisallowExtensions(), nullptr); + StatusWithMatchExpression statusWithMatcher = MatchExpressionParser::parse( + filter, ExtensionsCallbackDisallowExtensions(), _collator.get()); // this should be checked in create, so can blow up here invariantOK(statusWithMatcher.getStatus()); _filterExpression = std::move(statusWithMatcher.getValue()); LOG(2) << "have filter expression for " << _ns << " " << _descriptor->indexName() << " " << filter; } - - if (BSONElement collationElement = _descriptor->getInfoElement("collation")) { - invariant(collationElement.isABSONObj()); - BSONObj collation = collationElement.Obj(); - auto statusWithCollator = - CollatorFactoryInterface::get(txn->getServiceContext())->makeFromBSON(collation); - invariantOK(statusWithCollator.getStatus()); - _collator = std::move(statusWithCollator.getValue()); - } } IndexCatalogEntry::~IndexCatalogEntry() { diff --git a/src/mongo/db/catalog/index_catalog_entry.h b/src/mongo/db/catalog/index_catalog_entry.h index eb86ea2905c..64f72822404 100644 --- a/src/mongo/db/catalog/index_catalog_entry.h +++ b/src/mongo/db/catalog/index_catalog_entry.h @@ -181,8 +181,8 @@ private: // Owned here. HeadManager* _headManager; - std::unique_ptr _filterExpression; std::unique_ptr _collator; + std::unique_ptr _filterExpression; // cached stuff diff --git a/src/mongo/db/matcher/SConscript b/src/mongo/db/matcher/SConscript index c22650d40ca..ff82d538cba 100644 --- a/src/mongo/db/matcher/SConscript +++ b/src/mongo/db/matcher/SConscript @@ -99,6 +99,7 @@ env.CppUnitTest( 'expression_algo_test.cpp', ], LIBDEPS=[ + '$BUILD_DIR/mongo/db/query/collation/collator_interface_mock', 'expression_algo', ], ) diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp index 02d25a5df9a..966c727501f 100644 --- a/src/mongo/db/matcher/expression_algo.cpp +++ b/src/mongo/db/matcher/expression_algo.cpp @@ -37,6 +37,7 @@ #include "mongo/db/matcher/expression_leaf.h" #include "mongo/db/matcher/expression_tree.h" #include "mongo/db/pipeline/dependencies.h" +#include "mongo/db/query/collation/collator_interface.h" namespace mongo { @@ -94,7 +95,15 @@ bool _isSubsetOf(const ComparisonMatchExpression* lhs, const ComparisonMatchExpr return false; } - int cmp = compareElementValues(lhsData, rhsData); + // Either collator may be used by compareElementValues() here. If lhs (the query) contains + // string comparison, then _isSubsetOf will only be called if lhs and rhs have the same + // collator. Otherwise, the collator will not be used by compareElementValues(). + if (!CollatorInterface::collatorsMatch(lhs->getCollator(), rhs->getCollator())) { + // TODO SERVER-23172: Check that lhsData does not contain string comparison in nested + // objects or arrays. + invariant(lhsData.type() != BSONType::String); + } + int cmp = compareElementValues(lhsData, rhsData, rhs->getCollator()); // Check whether the two expressions are equivalent. if (lhs->matchType() == rhs->matchType() && cmp == 0) { @@ -155,8 +164,7 @@ bool _isSubsetOf(const MatchExpression* lhs, const ComparisonMatchExpression* rh } for (BSONElement elem : ime->getEqualities()) { // Each element in the $in-array represents an equality predicate. - // TODO SERVER-23618: pass the appropriate collator to EqualityMatchExpression(). - EqualityMatchExpression equality(nullptr); + EqualityMatchExpression equality(ime->getCollator()); equality.init(lhs->path(), elem); if (!_isSubsetOf(&equality, rhs)) { return false; diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp index 95e7bcd62fc..ead421bc76e 100644 --- a/src/mongo/db/matcher/expression_algo_test.cpp +++ b/src/mongo/db/matcher/expression_algo_test.cpp @@ -39,6 +39,7 @@ #include "mongo/db/matcher/expression_parser.h" #include "mongo/db/matcher/extensions_callback_disallow_extensions.h" #include "mongo/db/matcher/extensions_callback_noop.h" +#include "mongo/db/query/collation/collator_interface_mock.h" #include "mongo/platform/decimal128.h" namespace mongo { @@ -51,8 +52,8 @@ using std::unique_ptr; */ class ParsedMatchExpression { public: - ParsedMatchExpression(const std::string& str) : _obj(fromjson(str)) { - const CollatorInterface* collator = nullptr; + ParsedMatchExpression(const std::string& str, const CollatorInterface* collator = nullptr) + : _obj(fromjson(str)) { StatusWithMatchExpression result = MatchExpressionParser::parse(_obj, ExtensionsCallbackDisallowExtensions(), collator); ASSERT_OK(result.getStatus()); @@ -489,9 +490,6 @@ TEST(ExpressionAlgoIsSubsetOf, RegexAndIn) { ParsedMatchExpression inRegexAOrEq1("{x: {$in: [/a/, 1]}}"); ParsedMatchExpression inRegexAOrNull("{x: {$in: [/a/, null]}}"); - ASSERT_TRUE(expression::isSubsetOf(inRegexA.get(), inRegexA.get())); - ASSERT_FALSE(expression::isSubsetOf(inRegexAbc.get(), inRegexA.get())); - ASSERT_FALSE(expression::isSubsetOf(inRegexA.get(), inRegexAOrEq1.get())); ASSERT_FALSE(expression::isSubsetOf(inRegexAOrEq1.get(), eq1.get())); ASSERT_FALSE(expression::isSubsetOf(inRegexA.get(), eqA.get())); ASSERT_FALSE(expression::isSubsetOf(inRegexAOrNull.get(), eqA.get())); @@ -652,6 +650,52 @@ TEST(ExpressionAlgoIsSubsetOf, Compare_Exists_NE) { ASSERT_TRUE(expression::isSubsetOf(aNotEqualNull.get(), aExists.get())); } +TEST(ExpressionAlgoIsSubsetOf, CollationAwareStringComparison) { + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + ParsedMatchExpression lhs("{a: {$gt: 'abc'}}", &collator); + ParsedMatchExpression rhs("{a: {$gt: 'cba'}}", &collator); + + ASSERT_TRUE(expression::isSubsetOf(lhs.get(), rhs.get())); + + ParsedMatchExpression lhsLT("{a: {$lt: 'abc'}}", &collator); + ParsedMatchExpression rhsLT("{a: {$lt: 'cba'}}", &collator); + + ASSERT_FALSE(expression::isSubsetOf(lhsLT.get(), rhsLT.get())); +} + +TEST(ExpressionAlgoIsSubsetOf, CollationAwareStringComparisonIn) { + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + ParsedMatchExpression lhsAllGTcba("{a: {$in: ['abc', 'cbc']}}", &collator); + ParsedMatchExpression lhsSomeGTcba("{a: {$in: ['abc', 'aba']}}", &collator); + ParsedMatchExpression rhs("{a: {$gt: 'cba'}}", &collator); + + ASSERT_TRUE(expression::isSubsetOf(lhsAllGTcba.get(), rhs.get())); + ASSERT_FALSE(expression::isSubsetOf(lhsSomeGTcba.get(), rhs.get())); + + ParsedMatchExpression rhsLT("{a: {$lt: 'cba'}}", &collator); + + ASSERT_FALSE(expression::isSubsetOf(lhsAllGTcba.get(), rhsLT.get())); + ASSERT_FALSE(expression::isSubsetOf(lhsSomeGTcba.get(), rhsLT.get())); +} + +TEST(ExpressionAlgoIsSubsetOf, NonMatchingCollationsNoStringComparisonLHS) { + CollatorInterfaceMock collatorAlwaysEqual(CollatorInterfaceMock::MockType::kAlwaysEqual); + CollatorInterfaceMock collatorReverseString(CollatorInterfaceMock::MockType::kReverseString); + ParsedMatchExpression lhs("{a: {b: 1}}", &collatorAlwaysEqual); + ParsedMatchExpression rhs("{a: {$lt: {b: 'abc'}}}", &collatorReverseString); + + ASSERT_TRUE(expression::isSubsetOf(lhs.get(), rhs.get())); +} + +TEST(ExpressionAlgoIsSubsetOf, NonMatchingCollationsNoStringComparison) { + CollatorInterfaceMock collatorAlwaysEqual(CollatorInterfaceMock::MockType::kAlwaysEqual); + CollatorInterfaceMock collatorReverseString(CollatorInterfaceMock::MockType::kReverseString); + ParsedMatchExpression lhs("{a: 1}", &collatorAlwaysEqual); + ParsedMatchExpression rhs("{a: {$gt: 0}}", &collatorReverseString); + + ASSERT_TRUE(expression::isSubsetOf(lhs.get(), rhs.get())); +} + TEST(IsIndependent, AndIsIndependentOnlyIfChildrenAre) { BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 1}]}"); const CollatorInterface* collator = nullptr; diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 2b76ac930f9..faad7ca6aae 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -851,12 +851,6 @@ Status QueryPlanner::plan(const CanonicalQuery& query, continue; } - // If the index collation differs from the query collation, the index should not be - // used to provide a sort, because strings will be ordered incorrectly. - if (!CollatorInterface::collatorsMatch(index.collator, query.getCollator())) { - continue; - } - // Partial indexes can only be used to provide a sort only if the query predicate is // compatible. if (index.filterExpr && !expression::isSubsetOf(query.root(), index.filterExpr)) { diff --git a/src/mongo/db/query/query_planner_partialidx_test.cpp b/src/mongo/db/query/query_planner_partialidx_test.cpp index 6f27939b10a..476e09041e3 100644 --- a/src/mongo/db/query/query_planner_partialidx_test.cpp +++ b/src/mongo/db/query/query_planner_partialidx_test.cpp @@ -28,6 +28,7 @@ #include "mongo/platform/basic.h" +#include "mongo/db/query/collation/collator_interface_mock.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/query/query_planner_test_fixture.h" @@ -443,5 +444,55 @@ TEST_F(QueryPlannerTest, PartialIndexNor) { assertNumSolutions(0U); } +TEST_F(QueryPlannerTest, PartialIndexStringComparisonMatchingCollators) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + BSONObj filterObj(fromjson("{a: {$gt: 'cba'}}")); + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + std::unique_ptr filterExpr = parseMatchExpression(filterObj, &collator); + addIndex(fromjson("{a: 1}"), filterExpr.get(), &collator); + + runQueryAsCommand( + fromjson("{find: 'testns', filter: {a: 'abc'}, collation: {locale: 'reverse'}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {a: 'abc'}, collation: {locale: 'reverse'}, node: {ixscan: " + "{filter: null, pattern: {a: 1}, " + "bounds: {a: [['cba', 'cba', true, true]]}}}}}"); + + runQueryAsCommand( + fromjson("{find: 'testns', filter: {a: 'zaa'}, collation: {locale: 'reverse'}}")); + assertNumSolutions(0U); +} + +TEST_F(QueryPlannerTest, PartialIndexFilterStringComparisonNonMatchingCollators) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + BSONObj filterObj(fromjson("{a: {$lt: {b: 'abc'}}}")); + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + std::unique_ptr filterExpr = parseMatchExpression(filterObj, &collator); + addIndex(fromjson("{a: 1}"), filterExpr.get(), &collator); + + runQuery(fromjson("{a: {b: 1}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: " + "{filter: null, pattern: {a: 1}, " + "bounds: {a: [[{b: 1}, {b: 1}, true, true]]}}}}}"); +} + +TEST_F(QueryPlannerTest, PartialIndexNoStringComparisonNonMatchingCollators) { + params.options = QueryPlannerParams::NO_TABLE_SCAN; + BSONObj filterObj(fromjson("{a: {$gt: 0}}")); + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kAlwaysEqual); + std::unique_ptr filterExpr = parseMatchExpression(filterObj, &collator); + addIndex(fromjson("{a: 1}"), filterExpr.get(), &collator); + + runQueryAsCommand(fromjson("{find: 'testns', filter: {a: 1}, collation: {locale: 'reverse'}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: null, node: {ixscan: " + "{filter: null, pattern: {a: 1}, " + "bounds: {a: [[1, 1, true, true]]}}}}}"); +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/query/query_planner_test_fixture.cpp b/src/mongo/db/query/query_planner_test_fixture.cpp index aae4d95372f..c49df6ab106 100644 --- a/src/mongo/db/query/query_planner_test_fixture.cpp +++ b/src/mongo/db/query/query_planner_test_fixture.cpp @@ -139,6 +139,19 @@ void QueryPlannerTest::addIndex(BSONObj keyPattern, const CollatorInterface* col params.indices.push_back(entry); } +void QueryPlannerTest::addIndex(BSONObj keyPattern, + MatchExpression* filterExpr, + const CollatorInterface* collator) { + const bool sparse = false; + const bool unique = false; + const bool multikey = false; + const char name[] = "my_partial_index_with_collator"; + const BSONObj infoObj; + IndexEntry entry(keyPattern, multikey, sparse, unique, name, filterExpr, infoObj); + entry.collator = collator; + params.indices.push_back(entry); +} + void QueryPlannerTest::runQuery(BSONObj query) { runQuerySortProjSkipNToReturn(query, BSONObj(), BSONObj(), 0, 0); } @@ -399,8 +412,8 @@ void QueryPlannerTest::assertHasOneSolutionOf(const std::vector& so FAIL(ss); } -std::unique_ptr QueryPlannerTest::parseMatchExpression(const BSONObj& obj) { - const CollatorInterface* collator = nullptr; +std::unique_ptr QueryPlannerTest::parseMatchExpression( + const BSONObj& obj, const CollatorInterface* collator) { StatusWithMatchExpression status = MatchExpressionParser::parse(obj, ExtensionsCallbackDisallowExtensions(), collator); if (!status.isOK()) { diff --git a/src/mongo/db/query/query_planner_test_fixture.h b/src/mongo/db/query/query_planner_test_fixture.h index 2e274274fb1..6c649702c74 100644 --- a/src/mongo/db/query/query_planner_test_fixture.h +++ b/src/mongo/db/query/query_planner_test_fixture.h @@ -68,6 +68,10 @@ protected: void addIndex(BSONObj keyPattern, const CollatorInterface* collator); + void addIndex(BSONObj keyPattern, + MatchExpression* filterExpr, + const CollatorInterface* collator); + // // Execute planner. // @@ -192,7 +196,8 @@ protected: /** * Helper function to parse a MatchExpression. */ - static std::unique_ptr parseMatchExpression(const BSONObj& obj); + static std::unique_ptr parseMatchExpression( + const BSONObj& obj, const CollatorInterface* collator = nullptr); // // Data members. -- cgit v1.2.1