summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2014-03-03 12:10:51 -0500
committerDavid Storch <david.storch@10gen.com>2014-03-04 17:20:59 -0500
commit33912523e40467e1a41bed60ed7e198200a040ad (patch)
tree8dc63fd042c36a10a6cfae4a928e5026f2280959
parentdb6f4c6227581996ecba987be21912ed0824592c (diff)
downloadmongo-33912523e40467e1a41bed60ed7e198200a040ad.tar.gz
SERVER-3071 penalize fetches during ranking
-rw-r--r--src/mongo/db/query/plan_ranker.cpp14
-rw-r--r--src/mongo/db/query/query_planner_params.h2
-rw-r--r--src/mongo/dbtests/plan_ranking.cpp278
3 files changed, 278 insertions, 16 deletions
diff --git a/src/mongo/db/query/plan_ranker.cpp b/src/mongo/db/query/plan_ranker.cpp
index cde50d47874..84884db9bdb 100644
--- a/src/mongo/db/query/plan_ranker.cpp
+++ b/src/mongo/db/query/plan_ranker.cpp
@@ -180,10 +180,14 @@ namespace mongo {
// be greater than that.
double baseScore = 1;
+ // How many "units of work" did the plan perform. Each call to work(...)
+ // counts as one unit, and each NEED_FETCH is penalized as an additional work unit.
+ size_t workUnits = stats->common.works + stats->common.needFetch;
+
// How much did a plan produce?
// Range: [0, 1]
double productivity = static_cast<double>(stats->common.advanced)
- / static_cast<double>(stats->common.works);
+ / static_cast<double>(workUnits);
// If we have to perform a fetch, that's not great.
//
@@ -215,7 +219,13 @@ namespace mongo {
double score = baseScore + productivity + noFetchBonus + noIxisectBonus;
QLOG() << "score (" << score << ") = baseScore(" << baseScore << ")"
- << " + productivity(" << productivity << ")"
+ << " + productivity((" << stats->common.advanced
+ << " advanced)/("
+ << stats->common.works
+ << " works + "
+ << stats->common.needFetch
+ << " needFetch) = "
+ << productivity << ")"
<< " + noFetchBonus(" << noFetchBonus << ")"
<< " + noIxisectBonus(" << noIxisectBonus << ")"
<< endl;
diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h
index 49557d8f34c..14a0d55707a 100644
--- a/src/mongo/db/query/query_planner_params.h
+++ b/src/mongo/db/query/query_planner_params.h
@@ -38,7 +38,7 @@ namespace mongo {
struct QueryPlannerParams {
// How many indexed solutions are we willing to output?
- static const size_t kDefaultMaxIndexedSolutions = 5;
+ static const size_t kDefaultMaxIndexedSolutions = 6;
QueryPlannerParams() : options(DEFAULT),
indexFiltersApplied(false),
diff --git a/src/mongo/dbtests/plan_ranking.cpp b/src/mongo/dbtests/plan_ranking.cpp
index ea9d16b3450..5d15a00fd45 100644
--- a/src/mongo/dbtests/plan_ranking.cpp
+++ b/src/mongo/dbtests/plan_ranking.cpp
@@ -43,6 +43,7 @@
#include "mongo/db/query/query_planner_test_lib.h"
#include "mongo/db/query/stage_builder.h"
#include "mongo/dbtests/dbtests.h"
+#include "mongo/util/fail_point_service.h"
namespace mongo {
@@ -55,6 +56,9 @@ namespace PlanRankingTests {
static const char* ns = "unittests.PlanRankingTests";
+ // The name of the "fetch always required" failpoint.
+ static const char* kFetchFpName = "fetchInMemoryFail";
+
class PlanRankingTestBase {
public:
PlanRankingTestBase() : _forceIntersectionPlans(forceIntersectionPlans) {
@@ -92,18 +96,6 @@ namespace PlanRankingTests {
// Turn this off otherwise it pops up in some plans.
plannerParams.options &= ~QueryPlannerParams::KEEP_MUTATIONS;
- // Fill out the available indices.
- IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false);
- while (ii.more()) {
- const IndexDescriptor* desc = ii.next();
- plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
- desc->getAccessMethodName(),
- desc->isMultikey(),
- desc->isSparse(),
- desc->indexName(),
- desc->infoObj()));
- }
-
// Plan.
vector<QuerySolution*> solutions;
Status status = QueryPlanner::plan(*cq, plannerParams, &solutions);
@@ -139,6 +131,20 @@ namespace PlanRankingTests {
return _mpr->hasBackupPlan();
}
+ void turnOnAlwaysFetch() {
+ FailPointRegistry* registry = getGlobalFailPointRegistry();
+ FailPoint* failPoint = registry->getFailPoint(kFetchFpName);
+ ASSERT(NULL != failPoint);
+ failPoint->setMode(FailPoint::alwaysOn);
+ }
+
+ void turnOffAlwaysFetch() {
+ FailPointRegistry* registry = getGlobalFailPointRegistry();
+ FailPoint* failPoint = registry->getFailPoint(kFetchFpName);
+ ASSERT(NULL != failPoint);
+ failPoint->setMode(FailPoint::off);
+ }
+
private:
static DBDirectClient _client;
scoped_ptr<MultiPlanRunner> _mpr;
@@ -149,7 +155,7 @@ namespace PlanRankingTests {
DBDirectClient PlanRankingTestBase::_client;
- /**
+ /**
* Test that the "prefer ixisect" parameter works.
*/
class PlanRankingIntersectOverride : public PlanRankingTestBase {
@@ -453,6 +459,247 @@ namespace PlanRankingTests {
}
};
+ /**
+ * Index intersection solutions can be covered when single-index solutions
+ * are not. If the single-index solutions need to do a lot of fetching,
+ * then ixisect should win.
+ */
+ class PlanRankingIxisectCovered : public PlanRankingTestBase {
+ public:
+ void run() {
+ // Simulate needing lots of FETCH's.
+ turnOnAlwaysFetch();
+
+ static const int N = 10000;
+
+ // Neither 'a' nor 'b' is selective.
+ for (int i = 0; i < N; ++i) {
+ insert(BSON("a" << 1 << "b" << 1));
+ }
+
+ // Add indices on 'a' and 'b'.
+ addIndex(BSON("a" << 1));
+ addIndex(BSON("b" << 1));
+
+ // Query {a:1, b:1}, and project out all fields other than 'a' and 'b'.
+ CanonicalQuery* cq;
+ ASSERT(CanonicalQuery::canonicalize(ns,
+ BSON("a" << 1 << "b" << 1),
+ BSONObj(),
+ BSON("_id" << 0 << "a" << 1 << "b" << 1),
+ &cq).isOK());
+ ASSERT(NULL != cq);
+
+ // We should choose an ixisect plan because it requires fewer fetches.
+ // Takes ownership of cq.
+ QuerySolution* soln = pickBestPlan(cq);
+ ASSERT(QueryPlannerTestLib::solutionMatches(
+ "{proj: {spec: {_id:0,a:1,b:1}, node: {andSorted: {nodes: ["
+ "{ixscan: {filter: null, pattern: {a:1}}},"
+ "{ixscan: {filter: null, pattern: {b:1}}}]}}}}",
+ soln->root.get()));
+
+ turnOffAlwaysFetch();
+ }
+ };
+
+ /**
+ * Use the same data, same indices, and same query as the previous
+ * test case, except without the projection. The query is not covered
+ * by the index in this case, which means that there is no advantage
+ * to an index intersection solution.
+ */
+ class PlanRankingIxisectNonCovered : public PlanRankingTestBase {
+ public:
+ void run() {
+ // Simulate needing lots of FETCH's.
+ turnOnAlwaysFetch();
+
+ static const int N = 10000;
+
+ // Neither 'a' nor 'b' is selective.
+ for (int i = 0; i < N; ++i) {
+ insert(BSON("a" << 1 << "b" << 1));
+ }
+
+ // Add indices on 'a' and 'b'.
+ addIndex(BSON("a" << 1));
+ addIndex(BSON("b" << 1));
+
+ // Query {a:1, b:1}.
+ CanonicalQuery* cq;
+ ASSERT(CanonicalQuery::canonicalize(ns,
+ BSON("a" << 1 << "b" << 1),
+ &cq).isOK());
+ ASSERT(NULL != cq);
+
+ // The intersection is large, and ixisect does not make the
+ // query covered. We should NOT choose an intersection plan.
+ QuerySolution* soln = pickBestPlan(cq);
+ bool bestIsScanOverA = QueryPlannerTestLib::solutionMatches(
+ "{fetch: {node: {ixscan: {pattern: {a: 1}}}}}",
+ soln->root.get());
+ bool bestIsScanOverB = QueryPlannerTestLib::solutionMatches(
+ "{fetch: {node: {ixscan: {pattern: {b: 1}}}}}",
+ soln->root.get());
+ ASSERT(bestIsScanOverA || bestIsScanOverB);
+
+ turnOffAlwaysFetch();
+ }
+ };
+
+ /**
+ * Index intersection solutions may require fewer fetches even if it does not make the
+ * query covered. The ixisect plan will scan as many index keys as the union of the two
+ * single index plans, but only needs to retrieve full documents for the intersection
+ * of the two plans---this could mean fewer fetches!
+ */
+ class PlanRankingNonCoveredIxisectFetchesLess : public PlanRankingTestBase {
+ public:
+ void run() {
+ // Simulate needing lots of FETCH's.
+ turnOnAlwaysFetch();
+
+ static const int N = 10000;
+
+ // Set up data so that the following conditions hold:
+ // 1) Documents matching {a: 1} are of high cardinality.
+ // 2) Documents matching {b: 1} are of high cardinality.
+ // 3) Documents matching {a: 1, b: 1} are of low cardinality---
+ // the intersection is small.
+ // 4) At least one of the documents in the intersection is
+ // returned during the trial period.
+ insert(BSON("a" << 1 << "b" << 1));
+ for (int i = 0; i < N/2; ++i) {
+ insert(BSON("a" << 1 << "b" << 2));
+ }
+ for (int i = 0; i < N/2; ++i) {
+ insert(BSON("a" << 2 << "b" << 1));
+ }
+
+ // Add indices on 'a' and 'b'.
+ addIndex(BSON("a" << 1));
+ addIndex(BSON("b" << 1));
+
+ // Neither the predicate on 'b' nor the predicate on 'a' is
+ // very selective: both retrieve about half the documents.
+ // However, the intersection is very small, which makes
+ // the intersection plan desirable.
+ CanonicalQuery* cq;
+ ASSERT(CanonicalQuery::canonicalize(ns,
+ fromjson("{a: 1, b: 1}"),
+ &cq).isOK());
+ ASSERT(NULL != cq);
+
+ QuerySolution* soln = pickBestPlan(cq);
+ ASSERT(QueryPlannerTestLib::solutionMatches(
+ "{fetch: {filter: null, node: {andSorted: {nodes: ["
+ "{ixscan: {filter: null, pattern: {a:1}}},"
+ "{ixscan: {filter: null, pattern: {b:1}}}]}}}}",
+ soln->root.get()));
+
+ turnOffAlwaysFetch();
+ }
+ };
+
+ /**
+ * If the intersection is small, an AND_SORTED plan may be able to
+ * hit EOF before the single index plans.
+ */
+ class PlanRankingIxisectHitsEOFFirst : public PlanRankingTestBase {
+ public:
+ void run() {
+ // Simulate needing lots of FETCH's.
+ turnOnAlwaysFetch();
+
+ static const int N = 10000;
+
+ // Set up the data so that for the query {a: 1, b: 1}, the
+ // intersection is empty. The single index plans have to do
+ // more fetching from disk in order to determine that the result
+ // set is empty. As a result, the intersection plan hits EOF first.
+ for (int i = 0; i < 30; ++i) {
+ insert(BSON("a" << 1 << "b" << 2));
+ }
+ for (int i = 0; i < 30; ++i) {
+ insert(BSON("a" << 2 << "b" << 1));
+ }
+ for (int i = 0; i < N; ++i) {
+ insert(BSON("a" << 2 << "b" << 2));
+ }
+
+ // Add indices on 'a' and 'b'.
+ addIndex(BSON("a" << 1));
+ addIndex(BSON("b" << 1));
+
+ CanonicalQuery* cq;
+ ASSERT(CanonicalQuery::canonicalize(ns,
+ fromjson("{a: 1, b: 1}"),
+ &cq).isOK());
+ ASSERT(NULL != cq);
+
+ // Choose the index intersection plan.
+ QuerySolution* soln = pickBestPlan(cq);
+ ASSERT(QueryPlannerTestLib::solutionMatches(
+ "{fetch: {filter: null, node: {andSorted: {nodes: ["
+ "{ixscan: {filter: null, pattern: {a:1}}},"
+ "{ixscan: {filter: null, pattern: {b:1}}}]}}}}",
+ soln->root.get()));
+
+ turnOffAlwaysFetch();
+ }
+ };
+
+ /**
+ * If we query on 'a', 'b', and 'c' with indices on all three fields,
+ * then there are three possible size-2 index intersections to consider.
+ * Make sure we choose the right one.
+ */
+ class PlanRankingChooseBetweenIxisectPlans : public PlanRankingTestBase {
+ public:
+ void run() {
+ // Simulate needing lots of FETCH's.
+ turnOnAlwaysFetch();
+
+ static const int N = 10000;
+
+ // Set up the data so that for the query {a: 1, b: 1, c: 1}, the intersection
+ // between 'b' and 'c' is small, and the other intersections are larger.
+ for (int i = 0; i < 10; ++i) {
+ insert(BSON("a" << 1 << "b" << 1 << "c" << 1));
+ }
+ for (int i = 0; i < 10; ++i) {
+ insert(BSON("a" << 2 << "b" << 1 << "c" << 1));
+ }
+ for (int i = 0; i < N/2; ++i) {
+ insert(BSON("a" << 1 << "b" << 1 << "c" << 2));
+ insert(BSON("a" << 1 << "b" << 2 << "c" << 1));
+ }
+
+ // Add indices on 'a', 'b', and 'c'.
+ addIndex(BSON("a" << 1));
+ addIndex(BSON("b" << 1));
+ addIndex(BSON("c" << 1));
+
+ CanonicalQuery* cq;
+ ASSERT(CanonicalQuery::canonicalize(ns,
+ fromjson("{a: 1, b: 1, c: 1}"),
+ &cq).isOK());
+ ASSERT(NULL != cq);
+
+ // Intersection between 'b' and 'c' should hit EOF while the
+ // other plans are busy fetching documents.
+ QuerySolution* soln = pickBestPlan(cq);
+ ASSERT(QueryPlannerTestLib::solutionMatches(
+ "{fetch: {filter: {a:1}, node: {andSorted: {nodes: ["
+ "{ixscan: {filter: null, pattern: {b:1}}},"
+ "{ixscan: {filter: null, pattern: {c:1}}}]}}}}",
+ soln->root.get()));
+
+ turnOffAlwaysFetch();
+ }
+ };
+
class All : public Suite {
public:
All() : Suite( "query_plan_ranking" ) {}
@@ -466,6 +713,11 @@ namespace PlanRankingTests {
add<PlanRankingPreferImmediateEOF>();
add<PlanRankingNoCollscan>();
add<PlanRankingCollscan>();
+ add<PlanRankingIxisectCovered>();
+ add<PlanRankingIxisectNonCovered>();
+ add<PlanRankingNonCoveredIxisectFetchesLess>();
+ add<PlanRankingIxisectHitsEOFFirst>();
+ add<PlanRankingChooseBetweenIxisectPlans>();
}
} planRankingAll;