summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2014-07-02 13:13:54 -0400
committerDavid Storch <david.storch@10gen.com>2014-07-11 13:22:16 -0400
commitf3ba3590ce1ca1bf8f7fbb13ad8311a52df9c176 (patch)
treec81fe778b2e2604ae97b83d22552d19c04b45691 /src
parent9dbdefb125b07db9d9801b80d525f9a954bec5f2 (diff)
downloadmongo-f3ba3590ce1ca1bf8f7fbb13ad8311a52df9c176.tar.gz
SERVER-14311 account for key skips in plan ranking by returning NEED_TIME in the index scan stage
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/exec/index_scan.cpp19
-rw-r--r--src/mongo/db/exec/index_scan.h15
-rw-r--r--src/mongo/dbtests/plan_ranking.cpp32
-rw-r--r--src/mongo/dbtests/query_stage_and.cpp4
4 files changed, 67 insertions, 3 deletions
diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp
index 30aff504f37..5e0bd22b899 100644
--- a/src/mongo/db/exec/index_scan.cpp
+++ b/src/mongo/db/exec/index_scan.cpp
@@ -56,6 +56,7 @@ namespace mongo {
WorkingSet* workingSet,
const MatchExpression* filter)
: _txn(txn),
+ _checkEndKeys(0),
_workingSet(workingSet),
_hitEnd(false),
_filter(filter),
@@ -138,10 +139,21 @@ namespace mongo {
// Adds the amount of time taken by work() to executionTimeMillis.
ScopedTimer timer(&_commonStats.executionTimeMillis);
+ // If we examined multiple keys in a prior work cycle, make up for it here by returning
+ // NEED_TIME. This is done for plan ranking. Refer to the comment for '_checkEndKeys'
+ // in the .h for details.
+ if (_checkEndKeys > 0) {
+ --_checkEndKeys;
+ ++_commonStats.needTime;
+ return PlanStage::NEED_TIME;
+ }
+
if (NULL == _indexCursor.get()) {
// First call to work(). Perform possibly heavy init.
initIndexScan();
checkEnd();
+ ++_commonStats.needTime;
+ return PlanStage::NEED_TIME;
}
else if (_yieldMovedCursor) {
_yieldMovedCursor = false;
@@ -218,6 +230,10 @@ namespace mongo {
}
}
+ if (_checkEndKeys != 0) {
+ return false;
+ }
+
return _hitEnd || _indexCursor->isEOF();
}
@@ -340,8 +356,7 @@ namespace mongo {
break;
}
- // TODO: Can we do too much scanning here? Old BtreeCursor stops scanning after a
- // while and relies on a Matcher to make sure the result is ok.
+ ++_checkEndKeys;
}
}
}
diff --git a/src/mongo/db/exec/index_scan.h b/src/mongo/db/exec/index_scan.h
index 7df62304eef..50e2336a9f9 100644
--- a/src/mongo/db/exec/index_scan.h
+++ b/src/mongo/db/exec/index_scan.h
@@ -117,6 +117,21 @@ namespace mongo {
// transactional context for read locks. Not owned by us
OperationContext* _txn;
+ // The number of keys examined during a call to checkEnd() that have not yet been
+ // accounted for by returning a NEED_TIME.
+ //
+ // Good plan ranking requires that the index scan uses one work cycle per index key
+ // examined. Since checkEnd() may examine multiple keys, we keep track of them here
+ // and make up for it later by returning NEED_TIME.
+ //
+ // Example of how this is useful for plan ranking:
+ // Say you have indices {a: 1, b: 1} and {a: 1, x: 1, b: 1}, with predicates over
+ // fields 'a' and 'b'. It's cheaper to use index {a: 1, b: 1}. Why? Because for
+ // index {a: 1, x: 1, b: 1} you have to skip lots of keys due to the interceding
+ // 'x' field. This skipping is done inside checkEnd(), and we use '_checkEndKeys'
+ // to account for it.
+ size_t _checkEndKeys;
+
// The WorkingSet we annotate with results. Not owned by us.
WorkingSet* _workingSet;
diff --git a/src/mongo/dbtests/plan_ranking.cpp b/src/mongo/dbtests/plan_ranking.cpp
index 4908eb180ef..29a01c306c8 100644
--- a/src/mongo/dbtests/plan_ranking.cpp
+++ b/src/mongo/dbtests/plan_ranking.cpp
@@ -723,6 +723,37 @@ namespace PlanRankingTests {
}
};
+ /**
+ * Suppose we have two plans which are roughly equivalent, other than that
+ * one uses an index which involves doing a lot more skipping of index keys.
+ * Prefer the plan which does not have to do this index key skipping.
+ */
+ class PlanRankingAccountForKeySkips : public PlanRankingTestBase {
+ public:
+ void run() {
+ for (int i = 0; i < 4; ++i) {
+ insert(BSON("a" << 1 << "b" << 1));
+ }
+
+ // Second index has intervening fields which leads to more
+ // skipping of keys. We should pick a plan that uses the first.
+ addIndex(BSON("a" << 1 << "b" << 1));
+ addIndex(BSON("a" << 1 << "x" << 1 << "y" << 1 << "z" << 1 << "b" << 1));
+
+ CanonicalQuery* cq;
+ ASSERT(CanonicalQuery::canonicalize(ns,
+ fromjson("{a: 1, b: 1}"),
+ &cq).isOK());
+ ASSERT(NULL != cq);
+
+ // Expect to use index {a: 1, b: 1}.
+ QuerySolution* soln = pickBestPlan(cq);
+ ASSERT(QueryPlannerTestLib::solutionMatches(
+ "{fetch: {node: {ixscan: {pattern: {a: 1, b: 1}}}}}",
+ soln->root.get()));
+ }
+ };
+
class All : public Suite {
public:
All() : Suite( "query_plan_ranking" ) {}
@@ -744,6 +775,7 @@ namespace PlanRankingTests {
// add<PlanRankingChooseBetweenIxisectPlans>();
add<PlanRankingAvoidBlockingSort>();
add<PlanRankingWorkPlansLongEnough>();
+ add<PlanRankingAccountForKeySkips>();
}
} planRankingAll;
diff --git a/src/mongo/dbtests/query_stage_and.cpp b/src/mongo/dbtests/query_stage_and.cpp
index a7b77876237..5ebad4fb6e8 100644
--- a/src/mongo/dbtests/query_stage_and.cpp
+++ b/src/mongo/dbtests/query_stage_and.cpp
@@ -802,7 +802,9 @@ namespace QueryStageAnd {
// This isn't true in general if the collection is not dropped beforehand.
WorkingSetID id = WorkingSet::INVALID_ID;
- // Sorted AND looks at the first child, which is an index scan over foo==1.
+ // Sorted AND looks at the first child, which is an index scan over foo==1. The
+ // first work() just does initialization, so we have to call work() twice.
+ ah->work(&id);
ah->work(&id);
// The first thing that the index scan returns (due to increasing DiskLoc trick) is the