diff options
author | David Storch <david.storch@10gen.com> | 2014-07-02 13:13:54 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2014-07-11 13:22:16 -0400 |
commit | f3ba3590ce1ca1bf8f7fbb13ad8311a52df9c176 (patch) | |
tree | c81fe778b2e2604ae97b83d22552d19c04b45691 /src | |
parent | 9dbdefb125b07db9d9801b80d525f9a954bec5f2 (diff) | |
download | mongo-f3ba3590ce1ca1bf8f7fbb13ad8311a52df9c176.tar.gz |
SERVER-14311 account for key skips in plan ranking by returning NEED_TIME in the index scan stage
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/exec/index_scan.cpp | 19 | ||||
-rw-r--r-- | src/mongo/db/exec/index_scan.h | 15 | ||||
-rw-r--r-- | src/mongo/dbtests/plan_ranking.cpp | 32 | ||||
-rw-r--r-- | src/mongo/dbtests/query_stage_and.cpp | 4 |
4 files changed, 67 insertions, 3 deletions
diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp index 30aff504f37..5e0bd22b899 100644 --- a/src/mongo/db/exec/index_scan.cpp +++ b/src/mongo/db/exec/index_scan.cpp @@ -56,6 +56,7 @@ namespace mongo { WorkingSet* workingSet, const MatchExpression* filter) : _txn(txn), + _checkEndKeys(0), _workingSet(workingSet), _hitEnd(false), _filter(filter), @@ -138,10 +139,21 @@ namespace mongo { // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); + // If we examined multiple keys in a prior work cycle, make up for it here by returning + // NEED_TIME. This is done for plan ranking. Refer to the comment for '_checkEndKeys' + // in the .h for details. + if (_checkEndKeys > 0) { + --_checkEndKeys; + ++_commonStats.needTime; + return PlanStage::NEED_TIME; + } + if (NULL == _indexCursor.get()) { // First call to work(). Perform possibly heavy init. initIndexScan(); checkEnd(); + ++_commonStats.needTime; + return PlanStage::NEED_TIME; } else if (_yieldMovedCursor) { _yieldMovedCursor = false; @@ -218,6 +230,10 @@ namespace mongo { } } + if (_checkEndKeys != 0) { + return false; + } + return _hitEnd || _indexCursor->isEOF(); } @@ -340,8 +356,7 @@ namespace mongo { break; } - // TODO: Can we do too much scanning here? Old BtreeCursor stops scanning after a - // while and relies on a Matcher to make sure the result is ok. + ++_checkEndKeys; } } } diff --git a/src/mongo/db/exec/index_scan.h b/src/mongo/db/exec/index_scan.h index 7df62304eef..50e2336a9f9 100644 --- a/src/mongo/db/exec/index_scan.h +++ b/src/mongo/db/exec/index_scan.h @@ -117,6 +117,21 @@ namespace mongo { // transactional context for read locks. Not owned by us OperationContext* _txn; + // The number of keys examined during a call to checkEnd() that have not yet been + // accounted for by returning a NEED_TIME. + // + // Good plan ranking requires that the index scan uses one work cycle per index key + // examined. Since checkEnd() may examine multiple keys, we keep track of them here + // and make up for it later by returning NEED_TIME. + // + // Example of how this is useful for plan ranking: + // Say you have indices {a: 1, b: 1} and {a: 1, x: 1, b: 1}, with predicates over + // fields 'a' and 'b'. It's cheaper to use index {a: 1, b: 1}. Why? Because for + // index {a: 1, x: 1, b: 1} you have to skip lots of keys due to the interceding + // 'x' field. This skipping is done inside checkEnd(), and we use '_checkEndKeys' + // to account for it. + size_t _checkEndKeys; + // The WorkingSet we annotate with results. Not owned by us. WorkingSet* _workingSet; diff --git a/src/mongo/dbtests/plan_ranking.cpp b/src/mongo/dbtests/plan_ranking.cpp index 4908eb180ef..29a01c306c8 100644 --- a/src/mongo/dbtests/plan_ranking.cpp +++ b/src/mongo/dbtests/plan_ranking.cpp @@ -723,6 +723,37 @@ namespace PlanRankingTests { } }; + /** + * Suppose we have two plans which are roughly equivalent, other than that + * one uses an index which involves doing a lot more skipping of index keys. + * Prefer the plan which does not have to do this index key skipping. + */ + class PlanRankingAccountForKeySkips : public PlanRankingTestBase { + public: + void run() { + for (int i = 0; i < 4; ++i) { + insert(BSON("a" << 1 << "b" << 1)); + } + + // Second index has intervening fields which leads to more + // skipping of keys. We should pick a plan that uses the first. + addIndex(BSON("a" << 1 << "b" << 1)); + addIndex(BSON("a" << 1 << "x" << 1 << "y" << 1 << "z" << 1 << "b" << 1)); + + CanonicalQuery* cq; + ASSERT(CanonicalQuery::canonicalize(ns, + fromjson("{a: 1, b: 1}"), + &cq).isOK()); + ASSERT(NULL != cq); + + // Expect to use index {a: 1, b: 1}. + QuerySolution* soln = pickBestPlan(cq); + ASSERT(QueryPlannerTestLib::solutionMatches( + "{fetch: {node: {ixscan: {pattern: {a: 1, b: 1}}}}}", + soln->root.get())); + } + }; + class All : public Suite { public: All() : Suite( "query_plan_ranking" ) {} @@ -744,6 +775,7 @@ namespace PlanRankingTests { // add<PlanRankingChooseBetweenIxisectPlans>(); add<PlanRankingAvoidBlockingSort>(); add<PlanRankingWorkPlansLongEnough>(); + add<PlanRankingAccountForKeySkips>(); } } planRankingAll; diff --git a/src/mongo/dbtests/query_stage_and.cpp b/src/mongo/dbtests/query_stage_and.cpp index a7b77876237..5ebad4fb6e8 100644 --- a/src/mongo/dbtests/query_stage_and.cpp +++ b/src/mongo/dbtests/query_stage_and.cpp @@ -802,7 +802,9 @@ namespace QueryStageAnd { // This isn't true in general if the collection is not dropped beforehand. WorkingSetID id = WorkingSet::INVALID_ID; - // Sorted AND looks at the first child, which is an index scan over foo==1. + // Sorted AND looks at the first child, which is an index scan over foo==1. The + // first work() just does initialization, so we have to call work() twice. + ah->work(&id); ah->work(&id); // The first thing that the index scan returns (due to increasing DiskLoc trick) is the |