diff options
author | David Storch <david.storch@10gen.com> | 2014-05-30 16:59:32 -0400 |
---|---|---|
committer | Dan Pasette <dan@mongodb.com> | 2014-06-01 10:29:06 -0400 |
commit | 5900e7a03183f052bf5f2f17f177c65062ac75aa (patch) | |
tree | 58c851cd24340a43e60be201d761edbbd988cc84 | |
parent | 3f02f1a6a157eda94f8d10edbeb4148a11a4bdc1 (diff) | |
download | mongo-5900e7a03183f052bf5f2f17f177c65062ac75aa.tar.gz |
SERVER-13960 bug fixes for OR with inexact predicates
(cherry picked from commit 3242afb803a5cc523f16c2c63c3ee1dfc10a5671)
-rw-r--r-- | jstests/core/or_inexact.js | 227 | ||||
-rw-r--r-- | src/mongo/db/query/index_bounds_builder.h | 8 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.cpp | 23 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.h | 5 |
4 files changed, 255 insertions, 8 deletions
diff --git a/jstests/core/or_inexact.js b/jstests/core/or_inexact.js new file mode 100644 index 00000000000..722d47ee05a --- /dev/null +++ b/jstests/core/or_inexact.js @@ -0,0 +1,227 @@ +// Test $or with predicates that generate inexact bounds. The access planner +// has special logic for such queries. + +var t = db.jstests_or_inexact; +var cursor; + +// A predicate which uses an index falls into one of three categories: +// +// 1) EXACT +// The predicate can be fully evaluated by the index bounds. +// 2) INEXACT_COVERED +// The predicate cannot be fully evaluated by the index bounds. However, +// there is enough information in the index key to evaluate. Such predicates +// are answered by an index scan with an additional filter on the index key. +// 3) INEXACT_FETCH +// We must fetch the full documents in order to evaluate the predicate. + +// Case 1: An EXACT predicate and an INEXACT_COVERED +t.drop(); +t.ensureIndex({name: 1}); +t.insert({_id: 0, name: "thomas"}); +t.insert({_id: 1, name: "alexandra"}); +cursor = t.find({$or: [{name: "thomas"}, {name: /^alexand(er|ra)/}]}); +assert.eq(2, cursor.itcount(), "case 1"); + +// Case 2: Two INEXACT_COVERED predicates. +t.drop(); +t.ensureIndex({name: 1}); +t.insert({_id: 0, name: "thomas"}); +t.insert({_id: 1, name: "alexandra"}); +cursor = t.find({$or: [{name: /omas/}, {name: /^alexand(er|ra)/}]}); +assert.eq(2, cursor.itcount(), "case 2"); + +// Case 3: An EXACT, and INEXACT_COVERED, and an INEXACT_FETCH. +t.drop(); +t.ensureIndex({names: 1}); +t.insert({_id: 0, names: ["thomas", "alexandra"]}); +t.insert({_id: 1, names: "frank"}); +t.insert({_id: 2, names: "alice"}); +t.insert({_id: 3, names: ["dave"]}); +cursor = t.find({$or: [{names: "frank"}, {names: /^al(ice|ex)/}, + {names: {$elemMatch: {$eq: "thomas"}}}]}); +assert.eq(3, cursor.itcount(), "case 3"); + +// Case 4: Two INEXACT_FETCH. +t.drop(); +t.ensureIndex({names: 1}); +t.insert({_id: 0, names: ["thomas", "alexandra"]}); +t.insert({_id: 1, names: ["frank", "alice"]}); +t.insert({_id: 2, names: "frank"}); +cursor = t.find({$or: [{names: {$elemMatch: {$eq: "alexandra"}}}, + {names: {$elemMatch: {$eq: "frank"}}}]}); +assert.eq(2, cursor.itcount(), "case 4"); + +// Case 5: Two indices. One has EXACT and INEXACT_COVERED. The other +// has EXACT and INEXACT_FETCH. +t.drop(); +t.ensureIndex({first: 1}); +t.ensureIndex({last: 1}); +t.insert({_id: 0, first: "frank", last: "smith"}); +t.insert({_id: 1, first: "john", last: "doe"}); +t.insert({_id: 2, first: "dave", last: "st"}); +t.insert({_id: 3, first: ["dave", "david"], last: "pasette"}); +t.insert({_id: 4, first: "joanna", last: ["smith", "doe"]}); +cursor = t.find({$or: [{first: "frank"}, {last: {$elemMatch: {$eq: "doe"}}}, + {first: /david/}, {last: "st"}]}); +assert.eq(4, cursor.itcount(), "case 5"); + +// Case 6: Multikey with only EXACT predicates. +t.drop(); +t.ensureIndex({names: 1}); +t.insert({_id: 0, names: ["david", "dave"]}); +t.insert({_id: 1, names: ["joseph", "joe", "joey"]}); +cursor = t.find({$or: [{names: "dave"}, {names: "joe"}]}); +assert.eq(2, cursor.itcount(), "case 6"); + +// Case 7: Multikey with EXACT and INEXACT_COVERED. +t.drop(); +t.ensureIndex({names: 1}); +t.insert({_id: 0, names: ["david", "dave"]}); +t.insert({_id: 1, names: ["joseph", "joe", "joey"]}); +cursor = t.find({$or: [{names: "dave"}, {names: /joe/}]}); +assert.eq(2, cursor.itcount(), "case 7"); + +// Case 8: Text with EXACT. +t.drop(); +t.ensureIndex({pre: 1, names: "text"}); +t.ensureIndex({other: 1}); +t.insert({_id: 0, pre: 3, names: "david dave", other: 1}); +t.insert({_id: 1, pre: 4, names: "joseph joe joey", other: 2}); +cursor = t.find({$or: [{$text: {$search: "dave"}, pre: 3}, {other: 2}]}); +assert.eq(2, cursor.itcount(), "case 8"); + +// Case 9: Text with INEXACT_COVERED. +t.drop(); +t.ensureIndex({pre: 1, names: "text"}); +t.ensureIndex({other: 1}); +t.insert({_id: 0, pre: 3, names: "david dave", other: "foo"}); +t.insert({_id: 1, pre: 5, names: "david dave", other: "foo"}); +t.insert({_id: 2, pre: 4, names: "joseph joe joey", other: "bar"}); +cursor = t.find({$or: [{$text: {$search: "dave"}, pre: 3}, {other: /bar/}]}); +assert.eq(2, cursor.itcount(), "case 9"); + +// Case 10: Text requiring filter with INEXACT_COVERED. +t.drop(); +t.ensureIndex({pre: 1, names: "text"}); +t.ensureIndex({other: 1}); +t.insert({_id: 0, pre: 3, names: "david dave", other: "foo"}); +t.insert({_id: 1, pre: 3, names: "david dave", other: "foo"}); +t.insert({_id: 2, pre: 4, names: "joseph joe joey", other: "bar"}); +cursor = t.find({$or: [{$text: {$search: "dave"}, pre: 3, other: "foo"}, {other: /bar/}]}); +assert.eq(3, cursor.itcount(), "case 10"); + +// Case 11: GEO with non-geo, same index, 2dsphere. +t.drop(); +t.ensureIndex({pre: 1, loc: "2dsphere"}); +t.insert({_id: 0, pre: 3, loc: {type: "Point", coordinates: [40, 5]}}); +t.insert({_id: 1, pre: 4, loc: {type: "Point", coordinates: [0, 0]}}); +cursor = t.find({$or: [{pre: 3, loc: {$geoWithin: {$geometry: + {type: "Polygon", + coordinates: [[[39,4], [41,4], [41,6], [39,6], [39,4]]]}}}}, + {pre: 4, loc: {$geoWithin: {$geometry: + {type: "Polygon", + coordinates: [[[-1,-1], [1,-1], [1,1], [-1,1], [-1,-1]]]}}}}]}); +assert.eq(2, cursor.itcount(), "case 11"); + +// Case 12: GEO with non-geo, same index, 2d. +t.drop(); +t.ensureIndex({pre: 1, loc: "2d"}); +t.insert({_id: 0, pre: 3, loc: {type: "Point", coordinates: [40, 5]}}); +t.insert({_id: 1, pre: 4, loc: {type: "Point", coordinates: [0, 0]}}); +cursor = t.find({$or: [{pre: 3, loc: {$geoWithin: {$geometry: + {type: "Polygon", + coordinates: [[[39,4], [41,4], [41,6], [39,6], [39,4]]]}}}}, + {pre: 4, loc: {$geoWithin: {$geometry: + {type: "Polygon", + coordinates: [[[-1,-1], [1,-1], [1,1], [-1,1], [-1,-1]]]}}}}]}); +assert.eq(2, cursor.itcount(), "case 12"); + +// Case 13: $elemMatch object. +t.drop(); +t.ensureIndex({"a.b": 1}); +t.insert({_id: 0, a: [{b: 1}, {b: 2}]}); +t.insert({_id: 1, a: [{b: 3}, {b: 4}]}); +cursor = t.find({$or: [{a: {$elemMatch: {b: {$lte: 1}}}}, + {a: {$elemMatch: {b: {$gte: 4}}}}]}); +assert.eq(2, cursor.itcount(), "case 13"); + +// Case 14: $elemMatch object, below an AND. +t.drop(); +t.ensureIndex({"a.b": 1}); +t.insert({_id: 0, a: [{b: 1}, {b: 2}]}); +t.insert({_id: 1, a: [{b: 2}, {b: 4}]}); +cursor = t.find({"a.b": 2, $or: [{a: {$elemMatch: {b: {$lte: 1}}}}, + {a: {$elemMatch: {b: {$gte: 4}}}}]}); +assert.eq(2, cursor.itcount(), "case 14"); + +// Case 15: $or below $elemMatch. +t.drop(); +t.ensureIndex({"a.b": 1}); +t.insert({_id: 0, a: [{b: 1}, {b: 2}]}); +t.insert({_id: 1, a: [{b: 2}, {b: 4}]}); +t.insert({_id: 2, a: {b: 4}}); +cursor = t.find({a: {$elemMatch: {$or: [{b: 1}, {b: 4}]}}}); +assert.eq(2, cursor.itcount(), "case 15"); + +// Case 16: $or below $elemMatch with INEXACT_COVERED. +t.drop(); +t.ensureIndex({"a.b": 1}); +t.insert({_id: 0, a: [{b: "x"}, {b: "y"}]}); +t.insert({_id: 1, a: [{b: "y"}, {b: ["y", "z"]}]}); +t.insert({_id: 2, a: {b: ["y", "z"]}}); +cursor = t.find({a: {$elemMatch: {$or: [{b: "x"}, {b: /z/}]}}}); +assert.eq(2, cursor.itcount(), "case 16"); + +// Case 17: case from SERVER-14030. +t.drop(); +t.ensureIndex({number: 1}); +t.insert({number: null, user_id: 1}); +t.insert({number: 2, user_id: 1}); +t.insert({number: 1, user_id: 1}); +cursor = t.find({$or: [{number: null}, {number: {$lte: 2}}]}); +assert.eq(3, cursor.itcount(), "case 17"); + +// Case 18: $in with EXACT and INEXACT_COVERED. +t.drop(); +t.ensureIndex({name: 1}); +t.insert({_id: 0, name: "thomas"}); +t.insert({_id: 1, name: "alexandra"}); +cursor = t.find({name: {$in: ["thomas", /^alexand(er|ra)/]}}); +assert.eq(2, cursor.itcount(), "case 18"); + +// Case 19: $in with EXACT, INEXACT_COVERED, and INEXACT_FETCH. +t.drop(); +t.ensureIndex({name: 1}); +t.insert({_id: 0, name: "thomas"}); +t.insert({_id: 1, name: "alexandra"}); +t.insert({_id: 2}); +cursor = t.find({$or: [{name: {$in: ["thomas", /^alexand(er|ra)/]}}, + {name: {$exists: false}}]}); +assert.eq(3, cursor.itcount(), "case 19"); + +// Case 20: $in with EXACT, INEXACT_COVERED, and INEXACT_FETCH, two indices. +t.drop(); +t.ensureIndex({a: 1}); +t.ensureIndex({b: 1}); +t.insert({_id: 0, a: "x", b: "y"}); +t.insert({_id: 1, a: "z", b: "z"}); +t.insert({_id: 2}); +t.insert({_id: 3, a: "w", b: "x"}); +t.insert({_id: 4, a: "l", b: "p"}); +cursor = t.find({$or: [{a: {$in: [/z/, /x/]}}, {a: "w"}, + {b: {$exists: false}}, {b: {$in: ["p"]}}]}); +assert.eq(5, cursor.itcount(), "case 19"); + +// Case 21: two $geoWithin that collapse to a single GEO index scan. +t.drop(); +t.ensureIndex({loc: "2dsphere"}); +t.insert({_id: 0, loc: {type: "Point", coordinates: [40, 5]}}); +t.insert({_id: 1, loc: {type: "Point", coordinates: [0, 0]}}); +cursor = t.find({$or: [{loc: {$geoWithin: {$geometry: + {type: "Polygon", + coordinates: [[[39,4], [41,4], [41,6], [39,6], [39,4]]]}}}}, + {loc: {$geoWithin: {$geometry: + {type: "Polygon", + coordinates: [[[-1,-1], [1,-1], [1,1], [-1,1], [-1,-1]]]}}}}]}); +assert.eq(2, cursor.itcount(), "case 21"); diff --git a/src/mongo/db/query/index_bounds_builder.h b/src/mongo/db/query/index_bounds_builder.h index 56ac49d947d..1666ef4aad8 100644 --- a/src/mongo/db/query/index_bounds_builder.h +++ b/src/mongo/db/query/index_bounds_builder.h @@ -51,11 +51,11 @@ namespace mongo { * X have tighter or looser bounds than predicate Y?". */ enum BoundsTightness { - // Index bounds are inexact, but no fetch is required - INEXACT_COVERED = 0, - // Index bounds are inexact, and a fetch is required. - INEXACT_FETCH = 1, + INEXACT_FETCH = 0, + + // Index bounds are inexact, but no fetch is required + INEXACT_COVERED = 1, // Index bounds are exact. EXACT = 2 diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index 3c2d1efdffe..ba8ed3644f7 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -422,16 +422,31 @@ namespace mongo { } // static + bool QueryPlannerAccess::orNeedsFetch(const ScanBuildingState* scanState) { + if (scanState->loosestBounds == IndexBoundsBuilder::EXACT) { + return false; + } + else if (scanState->loosestBounds == IndexBoundsBuilder::INEXACT_FETCH) { + return true; + } + else { + invariant(scanState->loosestBounds == IndexBoundsBuilder::INEXACT_COVERED); + const IndexEntry& index = scanState->indices[scanState->currentIndexNumber]; + return index.multikey; + } + } + + // static void QueryPlannerAccess::finishAndOutputLeaf(ScanBuildingState* scanState, vector<QuerySolutionNode*>* out) { finishLeafNode(scanState->currentScan.get(), scanState->indices[scanState->currentIndexNumber]); if (MatchExpression::OR == scanState->root->matchType()) { - if (scanState->loosestBounds == IndexBoundsBuilder::INEXACT_FETCH) { - // This is an $or where at least one predicate has inexact bounds. In this - // case we must add a fetch node above the index scan whose filter includes - // *all* of the predicates used to generate the ixscan. + if (orNeedsFetch(scanState)) { + // In order to correctly evaluate the predicates for this index, we have to + // fetch the full documents. Add a fetch node above the index scan whose filter + // includes *all* of the predicates used to generate the ixscan. FetchNode* fetch = new FetchNode(); // Takes ownership. fetch->filter.reset(scanState->curOr.release()); diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h index 177483ea5f6..cd3e56fb092 100644 --- a/src/mongo/db/query/planner_access.h +++ b/src/mongo/db/query/planner_access.h @@ -356,6 +356,11 @@ namespace mongo { static void finishAndOutputLeaf(ScanBuildingState* scanState, std::vector<QuerySolutionNode*>* out); + /** + * Returns true if the current scan in 'scanState' requires a FetchNode. + */ + static bool orNeedsFetch(const ScanBuildingState* scanState); + static void finishTextNode(QuerySolutionNode* node, const IndexEntry& index); /** |