diff options
author | David Storch <david.storch@10gen.com> | 2013-11-25 17:31:58 -0500 |
---|---|---|
committer | Matt Kangas <matt.kangas@mongodb.com> | 2013-12-02 12:25:46 -0500 |
commit | bcfc8f0ae35fe08c5336c21cfd9752c06b089956 (patch) | |
tree | 3fdf3097bfc93e6cc9c1f638abadd135ed92caa1 | |
parent | cde3f4bd1aab5c29c880da9e606864f31377889b (diff) | |
download | mongo-bcfc8f0ae35fe08c5336c21cfd9752c06b089956.tar.gz |
SERVER-11852 fix query planning for inexact regexes
Signed-off-by: Matt Kangas <matt.kangas@mongodb.com>
-rw-r--r-- | jstests/and3.js | 12 | ||||
-rw-r--r-- | jstests/regexc.js | 28 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.h | 4 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder_test.cpp | 50 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.cpp | 96 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.h | 11 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test.cpp | 109 |
8 files changed, 279 insertions, 35 deletions
diff --git a/jstests/and3.js b/jstests/and3.js index e243b62a307..73e16d486e8 100644 --- a/jstests/and3.js +++ b/jstests/and3.js @@ -10,10 +10,7 @@ t.ensureIndex( {a:1} ); function checkScanMatch( query, nscannedObjects, n ) { var e = t.find( query ).hint( {a:1} ).explain(); - // NOTE The nscannedObjects values aren't necessarily optimal currently, - // we're just checking current behavior here. - // QUERY_MIGRATION: our nscannedobjects differs slightly. - // assert.eq( nscannedObjects, e.nscannedObjects ); + assert.eq( nscannedObjects, e.nscannedObjects ); assert.eq( n, e.n ); } @@ -26,8 +23,11 @@ checkScanMatch( {$and:[{a:/o/}]}, 1, 1 ); checkScanMatch( {$and:[{a:/a/}]}, 0, 0 ); checkScanMatch( {$and:[{a:{$not:/o/}}]}, 2, 1 ); checkScanMatch( {$and:[{a:{$not:/a/}}]}, 2, 2 ); -checkScanMatch( {$and:[{a:/o/},{a:{$not:/o/}}]}, 1, 0 ); -checkScanMatch( {$and:[{a:/o/},{a:{$not:/a/}}]}, 1, 1 ); +// QUERY_MIGRATION: currently, any query with NOT or +// NOR will just perform a collection scan; consequently, +// our nscannedObjects is 2 for both the following tests +//checkScanMatch( {$and:[{a:/o/},{a:{$not:/o/}}]}, 1, 0 ); +//checkScanMatch( {$and:[{a:/o/},{a:{$not:/a/}}]}, 1, 1 ); checkScanMatch( {$or:[{a:/o/}]}, 1, 1 ); checkScanMatch( {$or:[{a:/a/}]}, 0, 0 ); checkScanMatch( {$nor:[{a:/o/}]}, 2, 1 ); diff --git a/jstests/regexc.js b/jstests/regexc.js new file mode 100644 index 00000000000..f7690c96496 --- /dev/null +++ b/jstests/regexc.js @@ -0,0 +1,28 @@ +// Multiple regular expressions using the same index + +var t = db.jstests_regexc; + +// $and using same index twice +t.drop(); +t.ensureIndex({a: 1}); +t.save({a: "0"}); +t.save({a: "1"}); +t.save({a: "10"}); +assert.eq( 1, t.find({$and: [{a: /0/}, {a: /1/}]}).itcount() ); + +// implicit $and using compound index twice +t.drop(); +t.ensureIndex({a: 1, b: 1}); +t.save({a: "0", b: "1"}); +t.save({a: "10", b: "10"}); +t.save({a: "10", b: "2"}); +assert.eq( 2, t.find({a: /0/, b: /1/}).itcount() ); + +// $or using same index twice +t.drop(); +t.ensureIndex({a: 1}); +t.save({a: "0"}); +t.save({a: "1"}); +t.save({a: "2"}); +t.save({a: "10"}); +assert.eq( 3, t.find({$or: [{a: /0/}, {a: /1/}]}).itcount() ); diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index dfabff79272..22a44d7dff1 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -45,7 +45,7 @@ namespace mongo { string IndexBoundsBuilder::simpleRegex(const char* regex, const char* flags, BoundsTightness* tightnessOut) { string r = ""; - *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; + *tightnessOut = IndexBoundsBuilder::INEXACT_COVERED; bool multilineOK; if ( regex[0] == '\\' && regex[1] == 'A') { @@ -139,7 +139,7 @@ namespace mongo { if ( r.empty() && *regex == 0 ) { r = ss.str(); - *tightnessOut = r.empty() ? IndexBoundsBuilder::INEXACT_FETCH : IndexBoundsBuilder::EXACT; + *tightnessOut = r.empty() ? IndexBoundsBuilder::INEXACT_COVERED : IndexBoundsBuilder::EXACT; } return r; diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h index 078f1510872..9a4bfe8e75e 100644 --- a/src/mongo/db/query/index_bounds_builder.h +++ b/src/mongo/db/query/index_bounds_builder.h @@ -45,7 +45,9 @@ namespace mongo { // Index bounds are exact. EXACT, // Index bounds are inexact, and a fetch is required. - INEXACT_FETCH + INEXACT_FETCH, + // Index bounds are inexact, but no fetch is required + INEXACT_COVERED }; /** diff --git a/src/mongo/db/query/index_bounds_builder_test.cpp b/src/mongo/db/query/index_bounds_builder_test.cpp index d13ec45f6c4..f3e4d38b020 100644 --- a/src/mongo/db/query/index_bounds_builder_test.cpp +++ b/src/mongo/db/query/index_bounds_builder_test.cpp @@ -239,6 +239,10 @@ namespace { ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); } + // + // Test simpleRegex + // + TEST(SimpleRegexTest, RootedLine) { IndexBoundsBuilder::BoundsTightness tightness; string prefix = IndexBoundsBuilder::simpleRegex("^foo", "", &tightness); @@ -257,21 +261,21 @@ namespace { IndexBoundsBuilder::BoundsTightness tightness; string prefix = IndexBoundsBuilder::simpleRegex("^f?oo", "", &tightness); ASSERT_EQUALS(prefix, ""); - ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED); } TEST(SimpleRegexTest, RootedOptionalSecondChar) { IndexBoundsBuilder::BoundsTightness tightness; string prefix = IndexBoundsBuilder::simpleRegex("^fz?oo", "", &tightness); ASSERT_EQUALS(prefix, "f"); - ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED); } TEST(SimpleRegexTest, RootedMultiline) { IndexBoundsBuilder::BoundsTightness tightness; string prefix = IndexBoundsBuilder::simpleRegex("^foo", "m", &tightness); ASSERT_EQUALS(prefix, ""); - ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED); } TEST(SimpleRegexTest, RootedStringMultiline) { @@ -285,7 +289,7 @@ namespace { IndexBoundsBuilder::BoundsTightness tightness; string prefix = IndexBoundsBuilder::simpleRegex("\\Afoo", "mi", &tightness); ASSERT_EQUALS(prefix, ""); - ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED); } TEST(SimpleRegexTest, RootedComplex) { @@ -293,7 +297,7 @@ namespace { string prefix = IndexBoundsBuilder::simpleRegex( "\\Af \t\vo\n\ro \\ \\# #comment", "mx", &tightness); ASSERT_EQUALS(prefix, "foo #"); - ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED); } TEST(SimpleRegexTest, RootedLiteral) { @@ -309,7 +313,7 @@ namespace { string prefix = IndexBoundsBuilder::simpleRegex( "^\\Qasdf\\E.*", "", &tightness); ASSERT_EQUALS(prefix, "asdf"); - ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_FETCH); + ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED); } TEST(SimpleRegexTest, RootedLiteralNoEnd) { @@ -352,4 +356,38 @@ namespace { ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT); } + // + // Regex bounds + // + + TEST(IndexBoundsBuilderTest, SimpleNonPrefixRegex) { + BSONObj obj = fromjson("{a: /foo/}"); + auto_ptr<MatchExpression> expr(parseMatchExpression(obj)); + BSONElement elt = obj.firstElement(); + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate(expr.get(), elt, &oil, &tightness); + ASSERT_EQUALS(oil.intervals.size(), 2U); + ASSERT_EQUALS(Interval::INTERVAL_EQUALS, oil.intervals[0].compare( + Interval(fromjson("{'': '', '': {}}"), true, false))); + ASSERT_EQUALS(Interval::INTERVAL_EQUALS, oil.intervals[1].compare( + Interval(fromjson("{'': /foo/, '': /foo/}"), true, true))); + ASSERT(tightness == IndexBoundsBuilder::INEXACT_COVERED); + } + + TEST(IndexBoundsBuilderTest, SimplePrefixRegex) { + BSONObj obj = fromjson("{a: /^foo/}"); + auto_ptr<MatchExpression> expr(parseMatchExpression(obj)); + BSONElement elt = obj.firstElement(); + OrderedIntervalList oil; + IndexBoundsBuilder::BoundsTightness tightness; + IndexBoundsBuilder::translate(expr.get(), elt, &oil, &tightness); + ASSERT_EQUALS(oil.intervals.size(), 2U); + ASSERT_EQUALS(Interval::INTERVAL_EQUALS, oil.intervals[0].compare( + Interval(fromjson("{'': 'foo', '': 'fop'}"), true, false))); + ASSERT_EQUALS(Interval::INTERVAL_EQUALS, oil.intervals[1].compare( + Interval(fromjson("{'': /^foo/, '': /^foo/}"), true, true))); + ASSERT(tightness == IndexBoundsBuilder::EXACT); + } + } // namespace diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index 1534e1e4533..bb63b5106ef 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -427,14 +427,22 @@ namespace mongo { + curChild); delete child; } - // In the AND case, the filter can be brought above the AND node. - // But in the OR case, the filter only applies to one branch, so - // we must affix curChild's filter now. In order to apply the filter - // to the proper OR branch, create a FETCH node with the filter whose - // child is the IXSCAN. - else if (root->matchType() == MatchExpression::OR) { - verify(NULL != currentScan.get()); + else if (tightness == IndexBoundsBuilder::INEXACT_COVERED) { + // The bounds are not exact, but the information needed to + // evaluate the predicate is in the index key. Remove the + // MatchExpression from its parent and attach it to the filter + // of the index scan we're building. + root->getChildVector()->erase(root->getChildVector()->begin() + + curChild); + _addFilterToSolutionNode(currentScan.get(), child, root->matchType()); + } + else if (root->matchType() == MatchExpression::OR) { + // In the AND case, the filter can be brought above the AND node. + // But in the OR case, the filter only applies to one branch, so + // we must affix curChild's filter now. In order to apply the filter + // to the proper OR branch, create a FETCH node with the filter whose + // child is the IXSCAN. finishLeafNode(currentScan.get(), indices[currentIndexNumber]); root->getChildVector()->erase(root->getChildVector()->begin() + curChild); @@ -478,14 +486,22 @@ namespace mongo { delete child; // Don't increment curChild. } - // In the AND case, the filter can be brought above the AND node. - // But in the OR case, the filter only applies to one branch, so - // we must affix curChild's filter now. In order to apply the filter - // to the proper OR branch, create a FETCH node with the filter whose - // child is the IXSCAN. - else if (root->matchType() == MatchExpression::OR) { - verify(NULL != currentScan.get()); + else if (tightness == IndexBoundsBuilder::INEXACT_COVERED) { + // The bounds are not exact, but the information needed to + // evaluate the predicate is in the index key. Remove the + // MatchExpression from its parent and attach it to the filter + // of the index scan we're building. + root->getChildVector()->erase(root->getChildVector()->begin() + + curChild); + _addFilterToSolutionNode(currentScan.get(), child, root->matchType()); + } + else if (root->matchType() == MatchExpression::OR) { + // In the AND case, the filter can be brought above the AND node. + // But in the OR case, the filter only applies to one branch, so + // we must affix curChild's filter now. In order to apply the filter + // to the proper OR branch, create a FETCH node with the filter whose + // child is the IXSCAN. finishLeafNode(currentScan.get(), indices[currentIndexNumber]); root->getChildVector()->erase(root->getChildVector()->begin() + curChild); @@ -733,12 +749,18 @@ namespace mongo { if (tightness == IndexBoundsBuilder::EXACT) { return soln; } - - FetchNode* fetch = new FetchNode(); - verify(NULL != autoRoot.get()); - fetch->filter.reset(autoRoot.release()); - fetch->children.push_back(soln); - return fetch; + else if (tightness == IndexBoundsBuilder::INEXACT_COVERED) { + verify(NULL == soln->filter.get()); + soln->filter.reset(autoRoot.release()); + return soln; + } + else { // tightness == IndexBoundsBuilder::INEXACT_FETCH + FetchNode* fetch = new FetchNode(); + verify(NULL != autoRoot.get()); + fetch->filter.reset(autoRoot.release()); + fetch->children.push_back(soln); + return fetch; + } } else if (Indexability::arrayUsesIndexOnChildren(root)) { QuerySolutionNode* solution = NULL; @@ -840,4 +862,38 @@ namespace mongo { return solnRoot; } + // static + void QueryPlannerAccess::_addFilterToSolutionNode(QuerySolutionNode* node, + MatchExpression* match, + MatchExpression::MatchType type) { + if (NULL == node->filter) { + node->filter.reset(match); + } + // The 'node' already has either an AND or OR filter that matches + // 'type'. Add 'match' as another branch of the filter. + else if (type == node->filter->matchType()) { + ListOfMatchExpression* listFilter = + static_cast<ListOfMatchExpression*>(node->filter.get()); + listFilter->add(match); + } + // The 'node' already has a filter that does not match + // 'type'. If 'type' is AND, then combine 'match' with + // the existing filter by adding an AND. If 'type' is OR, + // combine by adding an OR node. + else { + ListOfMatchExpression* listFilter; + if (MatchExpression::AND == type) { + listFilter = new AndMatchExpression(); + } + else { + verify(MatchExpression::OR == type); + listFilter = new OrMatchExpression(); + } + MatchExpression* oldFilter = node->filter->shallowClone(); + listFilter->add(oldFilter); + listFilter->add(match); + node->filter.reset(listFilter); + } + } + } // namespace mongo diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h index caeb75494fa..d4d490f5154 100644 --- a/src/mongo/db/query/planner_access.h +++ b/src/mongo/db/query/planner_access.h @@ -155,6 +155,17 @@ namespace mongo { * Aligns OILs (and bounds) according to the kp direction * the scanDir. */ static void alignBounds(IndexBounds* bounds, const BSONObj& kp, int scanDir = 1); + + private: + /** + * Add the filter 'match' to the query solution node 'node'. Takes + * ownership of 'match'. + * + * The MatchType, 'type', indicates whether 'match' is a child of an + * AND or an OR match expression. + */ + static void _addFilterToSolutionNode(QuerySolutionNode* node, MatchExpression* match, + MatchExpression::MatchType type); }; } // namespace mongo diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp index bdefd9cffd7..4fdbe73bbf3 100644 --- a/src/mongo/db/query/query_planner_test.cpp +++ b/src/mongo/db/query/query_planner_test.cpp @@ -1096,6 +1096,115 @@ namespace { assertSolutionExists("{fetch: {ixscan: {a: 1}}}"); } + // + // Regex + // + + TEST_F(IndexAssignmentTest, PrefixRegex) { + addIndex(BSON("a" << 1)); + runQuery(fromjson("{a: /^foo/}")); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists("{cscan: 1}"); + assertSolutionExists("{fetch: {ixscan: {a: 1}}}"); + } + + TEST_F(IndexAssignmentTest, PrefixRegexCovering) { + addIndex(BSON("a" << 1)); + runQuerySortProj(fromjson("{a: /^foo/}"), BSONObj(), fromjson("{_id: 0, a: 1}")); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1}, node: {cscan: 1}}}"); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1}, node: {ixscan: {a: 1}}}}"); + } + + TEST_F(IndexAssignmentTest, NonPrefixRegex) { + addIndex(BSON("a" << 1)); + runQuery(fromjson("{a: /foo/}")); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists("{cscan: 1}"); + assertSolutionExists("{fetch: {ixscan: {a: 1}}}"); + } + + TEST_F(IndexAssignmentTest, NonPrefixRegexCovering) { + addIndex(BSON("a" << 1)); + runQuerySortProj(fromjson("{a: /foo/}"), BSONObj(), fromjson("{_id: 0, a: 1}")); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1}, node: {cscan: 1}}}"); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1}, node: {ixscan: {a: 1}}}}"); + } + + TEST_F(IndexAssignmentTest, NonPrefixRegexAnd) { + addIndex(BSON("a" << 1 << "b" << 1)); + runQuery(fromjson("{a: /foo/, b: 2}")); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists("{cscan: 1}"); + assertSolutionExists("{fetch: {ixscan: {a: 1, b: 1}}}"); + } + + TEST_F(IndexAssignmentTest, NonPrefixRegexAndCovering) { + addIndex(BSON("a" << 1 << "b" << 1)); + runQuerySortProj(fromjson("{a: /foo/, b: 2}"), BSONObj(), + fromjson("{_id: 0, a: 1, b: 1}")); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1, b: 1}, node: {cscan: 1}}}"); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1, b: 1}, node: {ixscan: {a: 1, b: 1}}}}"); + } + + TEST_F(IndexAssignmentTest, NonPrefixRegexOrCovering) { + addIndex(BSON("a" << 1)); + runQuerySortProj(fromjson("{$or: [{a: /0/}, {a: /1/}]}"), BSONObj(), + fromjson("{_id: 0, a: 1}")); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1}, node: {cscan: 1}}}"); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1}, node: {ixscan: {a: 1}}}}"); + } + + TEST_F(IndexAssignmentTest, NonPrefixRegexInCovering) { + addIndex(BSON("a" << 1)); + runQuerySortProj(fromjson("{a: {$in: [/foo/, /bar/]}}"), BSONObj(), + fromjson("{_id: 0, a: 1}")); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1}, node: {cscan: 1}}}"); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1}, node: {ixscan: {a: 1}}}}"); + } + + TEST_F(IndexAssignmentTest, TwoRegexCompoundIndexCovering) { + addIndex(BSON("a" << 1 << "b" << 1)); + runQuerySortProj(fromjson("{a: /0/, b: /1/}"), BSONObj(), + fromjson("{_id: 0, a: 1, b: 1}")); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1, b: 1}, node: {cscan: 1}}}"); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1, b: 1}, node: {ixscan: {a: 1, b: 1}}}}"); + } + + TEST_F(IndexAssignmentTest, TwoRegexSameFieldCovering) { + addIndex(BSON("a" << 1)); + runQuerySortProj(fromjson("{$and: [{a: /0/}, {a: /1/}]}"), BSONObj(), + fromjson("{_id: 0, a: 1}")); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1}, node: {cscan: 1}}}"); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1}, node: {ixscan: {a: 1}}}}"); + } + + TEST_F(IndexAssignmentTest, ThreeRegexSameFieldCovering) { + addIndex(BSON("a" << 1)); + runQuerySortProj(fromjson("{$and: [{a: /0/}, {a: /1/}, {a: /2/}]}"), BSONObj(), + fromjson("{_id: 0, a: 1}")); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1}, node: {cscan: 1}}}"); + assertSolutionExists("{proj: {spec: {_id: 0, a: 1}, node: {ixscan: {a: 1}}}}"); + } + // STOPPED HERE - need to hook up machinery for multiple indexed predicates // second is not working (until the machinery is in place) // |