SERVER-3071 penalize fetches during ranking

author: David Storch <david.storch@10gen.com> 2014-03-03 12:10:51 -0500
committer: David Storch <david.storch@10gen.com> 2014-03-04 17:20:59 -0500
commit: 33912523e40467e1a41bed60ed7e198200a040ad (patch)
tree: 8dc63fd042c36a10a6cfae4a928e5026f2280959
parent: db6f4c6227581996ecba987be21912ed0824592c (diff)
download: mongo-33912523e40467e1a41bed60ed7e198200a040ad.tar.gz
3 files changed, 278 insertions, 16 deletions
diff --git a/src/mongo/db/query/plan_ranker.cpp b/src/mongo/db/query/plan_ranker.cpp
index cde50d47874..84884db9bdb 100644
--- a/src/mongo/db/query/plan_ranker.cpp
+++ b/src/mongo/db/query/plan_ranker.cpp
@@ -180,10 +180,14 @@ namespace mongo {
         // be greater than that.
         double baseScore = 1;
 
+        // How many "units of work" did the plan perform. Each call to work(...)
+        // counts as one unit, and each NEED_FETCH is penalized as an additional work unit.
+        size_t workUnits = stats->common.works + stats->common.needFetch;
+
         // How much did a plan produce?
         // Range: [0, 1]
         double productivity = static_cast<double>(stats->common.advanced)
-                            / static_cast<double>(stats->common.works);
+                            / static_cast<double>(workUnits);
 
         // If we have to perform a fetch, that's not great.
         //
@@ -215,7 +219,13 @@ namespace mongo {
         double score = baseScore + productivity + noFetchBonus + noIxisectBonus;
 
         QLOG() << "score (" << score << ") = baseScore(" << baseScore << ")"
-                                     <<  " + productivity(" << productivity << ")"
+                                     <<  " + productivity((" << stats->common.advanced
+                                                             << " advanced)/("
+                                                             << stats->common.works
+                                                             << " works + "
+                                                             << stats->common.needFetch
+                                                             << " needFetch) = "
+                                                             << productivity << ")"
                                      <<  " + noFetchBonus(" << noFetchBonus << ")"
                                      <<  " + noIxisectBonus(" << noIxisectBonus << ")"
                                      << endl;
diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h
index 49557d8f34c..14a0d55707a 100644
--- a/src/mongo/db/query/query_planner_params.h
+++ b/src/mongo/db/query/query_planner_params.h
@@ -38,7 +38,7 @@ namespace mongo {
     struct QueryPlannerParams {
 
         // How many indexed solutions are we willing to output?
-        static const size_t kDefaultMaxIndexedSolutions = 5;
+        static const size_t kDefaultMaxIndexedSolutions = 6;
 
         QueryPlannerParams() : options(DEFAULT),
                                indexFiltersApplied(false),
diff --git a/src/mongo/dbtests/plan_ranking.cpp b/src/mongo/dbtests/plan_ranking.cpp
index ea9d16b3450..5d15a00fd45 100644
--- a/src/mongo/dbtests/plan_ranking.cpp
+++ b/src/mongo/dbtests/plan_ranking.cpp
@@ -43,6 +43,7 @@
 #include "mongo/db/query/query_planner_test_lib.h"
 #include "mongo/db/query/stage_builder.h"
 #include "mongo/dbtests/dbtests.h"
+#include "mongo/util/fail_point_service.h"
 
 namespace mongo {
 
@@ -55,6 +56,9 @@ namespace PlanRankingTests {
 
     static const char* ns = "unittests.PlanRankingTests";
 
+    // The name of the "fetch always required" failpoint.
+    static const char* kFetchFpName = "fetchInMemoryFail";
+
     class PlanRankingTestBase {
     public:
         PlanRankingTestBase() : _forceIntersectionPlans(forceIntersectionPlans) {
@@ -92,18 +96,6 @@ namespace PlanRankingTests {
             // Turn this off otherwise it pops up in some plans.
             plannerParams.options &= ~QueryPlannerParams::KEEP_MUTATIONS;
 
-            // Fill out the available indices.
-            IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false);
-            while (ii.more()) {
-                const IndexDescriptor* desc = ii.next();
-                plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
-                                                           desc->getAccessMethodName(),
-                                                           desc->isMultikey(),
-                                                           desc->isSparse(),
-                                                           desc->indexName(),
-                                                           desc->infoObj()));
-            }
-
             // Plan.
             vector<QuerySolution*> solutions;
             Status status = QueryPlanner::plan(*cq, plannerParams, &solutions);
@@ -139,6 +131,20 @@ namespace PlanRankingTests {
             return _mpr->hasBackupPlan();
         }
 
+        void turnOnAlwaysFetch() {
+            FailPointRegistry* registry = getGlobalFailPointRegistry();
+            FailPoint* failPoint = registry->getFailPoint(kFetchFpName);
+            ASSERT(NULL != failPoint);
+            failPoint->setMode(FailPoint::alwaysOn);
+        }
+
+        void turnOffAlwaysFetch() {
+            FailPointRegistry* registry = getGlobalFailPointRegistry();
+            FailPoint* failPoint = registry->getFailPoint(kFetchFpName);
+            ASSERT(NULL != failPoint);
+            failPoint->setMode(FailPoint::off);
+        }
+
     private:
         static DBDirectClient _client;
         scoped_ptr<MultiPlanRunner> _mpr;
@@ -149,7 +155,7 @@ namespace PlanRankingTests {
 
     DBDirectClient PlanRankingTestBase::_client;
 
-    /** 
+    /**
      * Test that the "prefer ixisect" parameter works.
      */
     class PlanRankingIntersectOverride : public PlanRankingTestBase {
@@ -453,6 +459,247 @@ namespace PlanRankingTests {
         }
     };
 
+    /**
+     * Index intersection solutions can be covered when single-index solutions
+     * are not. If the single-index solutions need to do a lot of fetching,
+     * then ixisect should win.
+     */
+    class PlanRankingIxisectCovered : public PlanRankingTestBase {
+    public:
+        void run() {
+            // Simulate needing lots of FETCH's.
+            turnOnAlwaysFetch();
+
+            static const int N = 10000;
+
+            // Neither 'a' nor 'b' is selective.
+            for (int i = 0; i < N; ++i) {
+                insert(BSON("a" << 1 << "b" << 1));
+            }
+
+            // Add indices on 'a' and 'b'.
+            addIndex(BSON("a" << 1));
+            addIndex(BSON("b" << 1));
+
+            // Query {a:1, b:1}, and project out all fields other than 'a' and 'b'.
+            CanonicalQuery* cq;
+            ASSERT(CanonicalQuery::canonicalize(ns,
+                                                BSON("a" << 1 << "b" << 1),
+                                                BSONObj(),
+                                                BSON("_id" << 0 << "a" << 1 << "b" << 1),
+                                                &cq).isOK());
+            ASSERT(NULL != cq);
+
+            // We should choose an ixisect plan because it requires fewer fetches.
+            // Takes ownership of cq.
+            QuerySolution* soln = pickBestPlan(cq);
+            ASSERT(QueryPlannerTestLib::solutionMatches(
+                "{proj: {spec: {_id:0,a:1,b:1}, node: {andSorted: {nodes: ["
+                    "{ixscan: {filter: null, pattern: {a:1}}},"
+                    "{ixscan: {filter: null, pattern: {b:1}}}]}}}}",
+                soln->root.get()));
+
+            turnOffAlwaysFetch();
+        }
+    };
+
+    /**
+     * Use the same data, same indices, and same query as the previous
+     * test case, except without the projection. The query is not covered
+     * by the index in this case, which means that there is no advantage
+     * to an index intersection solution.
+     */
+    class PlanRankingIxisectNonCovered : public PlanRankingTestBase {
+    public:
+        void run() {
+            // Simulate needing lots of FETCH's.
+            turnOnAlwaysFetch();
+
+            static const int N = 10000;
+
+            // Neither 'a' nor 'b' is selective.
+            for (int i = 0; i < N; ++i) {
+                insert(BSON("a" << 1 << "b" << 1));
+            }
+
+            // Add indices on 'a' and 'b'.
+            addIndex(BSON("a" << 1));
+            addIndex(BSON("b" << 1));
+
+            // Query {a:1, b:1}.
+            CanonicalQuery* cq;
+            ASSERT(CanonicalQuery::canonicalize(ns,
+                                                BSON("a" << 1 << "b" << 1),
+                                                &cq).isOK());
+            ASSERT(NULL != cq);
+
+            // The intersection is large, and ixisect does not make the
+            // query covered. We should NOT choose an intersection plan.
+            QuerySolution* soln = pickBestPlan(cq);
+            bool bestIsScanOverA = QueryPlannerTestLib::solutionMatches(
+                        "{fetch: {node: {ixscan: {pattern: {a: 1}}}}}",
+                        soln->root.get());
+            bool bestIsScanOverB = QueryPlannerTestLib::solutionMatches(
+                        "{fetch: {node: {ixscan: {pattern: {b: 1}}}}}",
+                        soln->root.get());
+            ASSERT(bestIsScanOverA || bestIsScanOverB);
+
+            turnOffAlwaysFetch();
+        }
+    };
+
+    /**
+     * Index intersection solutions may require fewer fetches even if it does not make the
+     * query covered. The ixisect plan will scan as many index keys as the union of the two
+     * single index plans, but only needs to retrieve full documents for the intersection
+     * of the two plans---this could mean fewer fetches!
+     */
+    class PlanRankingNonCoveredIxisectFetchesLess : public PlanRankingTestBase {
+    public:
+        void run() {
+            // Simulate needing lots of FETCH's.
+            turnOnAlwaysFetch();
+
+            static const int N = 10000;
+
+            // Set up data so that the following conditions hold:
+            //  1) Documents matching {a: 1} are of high cardinality.
+            //  2) Documents matching {b: 1} are of high cardinality.
+            //  3) Documents matching {a: 1, b: 1} are of low cardinality---
+            //  the intersection is small.
+            //  4) At least one of the documents in the intersection is
+            //  returned during the trial period.
+            insert(BSON("a" << 1 << "b" << 1));
+            for (int i = 0; i < N/2; ++i) {
+                insert(BSON("a" << 1 << "b" << 2));
+            }
+            for (int i = 0; i < N/2; ++i) {
+                insert(BSON("a" << 2 << "b" << 1));
+            }
+
+            // Add indices on 'a' and 'b'.
+            addIndex(BSON("a" << 1));
+            addIndex(BSON("b" << 1));
+
+            // Neither the predicate on 'b' nor the predicate on 'a' is
+            // very selective: both retrieve about half the documents.
+            // However, the intersection is very small, which makes
+            // the intersection plan desirable.
+            CanonicalQuery* cq;
+            ASSERT(CanonicalQuery::canonicalize(ns,
+                                                fromjson("{a: 1, b: 1}"),
+                                                &cq).isOK());
+            ASSERT(NULL != cq);
+
+            QuerySolution* soln = pickBestPlan(cq);
+            ASSERT(QueryPlannerTestLib::solutionMatches(
+                "{fetch: {filter: null, node: {andSorted: {nodes: ["
+                    "{ixscan: {filter: null, pattern: {a:1}}},"
+                    "{ixscan: {filter: null, pattern: {b:1}}}]}}}}",
+                soln->root.get()));
+
+            turnOffAlwaysFetch();
+        }
+    };
+
+    /**
+     * If the intersection is small, an AND_SORTED plan may be able to
+     * hit EOF before the single index plans.
+     */
+    class PlanRankingIxisectHitsEOFFirst : public PlanRankingTestBase {
+    public:
+        void run() {
+            // Simulate needing lots of FETCH's.
+            turnOnAlwaysFetch();
+
+            static const int N = 10000;
+
+            // Set up the data so that for the query {a: 1, b: 1}, the
+            // intersection is empty. The single index plans have to do
+            // more fetching from disk in order to determine that the result
+            // set is empty. As a result, the intersection plan hits EOF first.
+            for (int i = 0; i < 30; ++i) {
+                insert(BSON("a" << 1 << "b" << 2));
+            }
+            for (int i = 0; i < 30; ++i) {
+                insert(BSON("a" << 2 << "b" << 1));
+            }
+            for (int i = 0; i < N; ++i) {
+                insert(BSON("a" << 2 << "b" << 2));
+            }
+
+            // Add indices on 'a' and 'b'.
+            addIndex(BSON("a" << 1));
+            addIndex(BSON("b" << 1));
+
+            CanonicalQuery* cq;
+            ASSERT(CanonicalQuery::canonicalize(ns,
+                                                fromjson("{a: 1, b: 1}"),
+                                                &cq).isOK());
+            ASSERT(NULL != cq);
+
+            // Choose the index intersection plan.
+            QuerySolution* soln = pickBestPlan(cq);
+            ASSERT(QueryPlannerTestLib::solutionMatches(
+                "{fetch: {filter: null, node: {andSorted: {nodes: ["
+                    "{ixscan: {filter: null, pattern: {a:1}}},"
+                    "{ixscan: {filter: null, pattern: {b:1}}}]}}}}",
+                soln->root.get()));
+
+            turnOffAlwaysFetch();
+        }
+    };
+
+    /**
+     * If we query on 'a', 'b', and 'c' with indices on all three fields,
+     * then there are three possible size-2 index intersections to consider.
+     * Make sure we choose the right one.
+     */
+    class PlanRankingChooseBetweenIxisectPlans : public PlanRankingTestBase {
+    public:
+        void run() {
+            // Simulate needing lots of FETCH's.
+            turnOnAlwaysFetch();
+
+            static const int N = 10000;
+
+            // Set up the data so that for the query {a: 1, b: 1, c: 1}, the intersection
+            // between 'b' and 'c' is small, and the other intersections are larger.
+            for (int i = 0; i < 10; ++i) {
+                insert(BSON("a" << 1 << "b" << 1 << "c" << 1));
+            }
+            for (int i = 0; i < 10; ++i) {
+                insert(BSON("a" << 2 << "b" << 1 << "c" << 1));
+            }
+            for (int i = 0; i < N/2; ++i) {
+                insert(BSON("a" << 1 << "b" << 1 << "c" << 2));
+                insert(BSON("a" << 1 << "b" << 2 << "c" << 1));
+            }
+
+            // Add indices on 'a', 'b', and 'c'.
+            addIndex(BSON("a" << 1));
+            addIndex(BSON("b" << 1));
+            addIndex(BSON("c" << 1));
+
+            CanonicalQuery* cq;
+            ASSERT(CanonicalQuery::canonicalize(ns,
+                                                fromjson("{a: 1, b: 1, c: 1}"),
+                                                &cq).isOK());
+            ASSERT(NULL != cq);
+
+            // Intersection between 'b' and 'c' should hit EOF while the
+            // other plans are busy fetching documents.
+            QuerySolution* soln = pickBestPlan(cq);
+            ASSERT(QueryPlannerTestLib::solutionMatches(
+                "{fetch: {filter: {a:1}, node: {andSorted: {nodes: ["
+                    "{ixscan: {filter: null, pattern: {b:1}}},"
+                    "{ixscan: {filter: null, pattern: {c:1}}}]}}}}",
+                soln->root.get()));
+
+            turnOffAlwaysFetch();
+        }
+    };
+
     class All : public Suite {
     public:
         All() : Suite( "query_plan_ranking" ) {}
@@ -466,6 +713,11 @@ namespace PlanRankingTests {
             add<PlanRankingPreferImmediateEOF>();
             add<PlanRankingNoCollscan>();
             add<PlanRankingCollscan>();
+            add<PlanRankingIxisectCovered>();
+            add<PlanRankingIxisectNonCovered>();
+            add<PlanRankingNonCoveredIxisectFetchesLess>();
+            add<PlanRankingIxisectHitsEOFFirst>();
+            add<PlanRankingChooseBetweenIxisectPlans>();
         }
     } planRankingAll;
author	David Storch <david.storch@10gen.com>	2014-03-03 12:10:51 -0500
committer	David Storch <david.storch@10gen.com>	2014-03-04 17:20:59 -0500
commit	33912523e40467e1a41bed60ed7e198200a040ad (patch)
tree	8dc63fd042c36a10a6cfae4a928e5026f2280959
parent	db6f4c6227581996ecba987be21912ed0824592c (diff)
download	mongo-33912523e40467e1a41bed60ed7e198200a040ad.tar.gz