/**
* Copyright (C) 2014 MongoDB Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects
* for all of the code used other than as permitted herein. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you do not
* wish to do so, delete this exception statement from your version. If you
* delete this exception statement from all source files in the program,
* then also delete it in the license file.
*/
/**
* This file tests db/query/plan_ranker.cpp and db/query/multi_plan_runner.cpp.
*/
#include "mongo/platform/basic.h"
#include
#include "mongo/client/dbclientcursor.h"
#include "mongo/db/catalog/collection.h"
#include "mongo/db/catalog/database.h"
#include "mongo/db/client.h"
#include "mongo/db/db_raii.h"
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/exec/multi_plan.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/json.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/query/get_executor.h"
#include "mongo/db/query/query_knobs.h"
#include "mongo/db/query/query_planner.h"
#include "mongo/db/query/query_planner_test_lib.h"
#include "mongo/db/query/stage_builder.h"
#include "mongo/dbtests/dbtests.h"
#include "mongo/stdx/memory.h"
namespace mongo {
// How we access the external setParameter testing bool.
extern AtomicBool internalQueryForceIntersectionPlans;
extern AtomicBool internalQueryPlannerEnableHashIntersection;
} // namespace mongo
namespace PlanRankingTests {
using std::unique_ptr;
using std::vector;
static const NamespaceString nss("unittests.PlanRankingTests");
class PlanRankingTestBase {
public:
PlanRankingTestBase()
: _internalQueryForceIntersectionPlans(internalQueryForceIntersectionPlans.load()),
_enableHashIntersection(internalQueryPlannerEnableHashIntersection.load()),
_client(&_opCtx) {
// Run all tests with hash-based intersection enabled.
internalQueryPlannerEnableHashIntersection.store(true);
// Ensure N is significantly larger then internalQueryPlanEvaluationWorks.
ASSERT_GTE(N, internalQueryPlanEvaluationWorks.load() + 1000);
OldClientWriteContext ctx(&_opCtx, nss.ns());
_client.dropCollection(nss.ns());
}
virtual ~PlanRankingTestBase() {
// Restore external setParameter testing bools.
internalQueryForceIntersectionPlans.store(_internalQueryForceIntersectionPlans);
internalQueryPlannerEnableHashIntersection.store(_enableHashIntersection);
}
void insert(const BSONObj& obj) {
OldClientWriteContext ctx(&_opCtx, nss.ns());
_client.insert(nss.ns(), obj);
}
void addIndex(const BSONObj& obj) {
ASSERT_OK(dbtests::createIndex(&_opCtx, nss.ns(), obj));
}
/**
* Use the MultiPlanRunner to pick the best plan for the query 'cq'. Goes through
* normal planning to generate solutions and feeds them to the MPR.
*
* Does NOT take ownership of 'cq'. Caller DOES NOT own the returned QuerySolution*.
*/
QuerySolution* pickBestPlan(CanonicalQuery* cq) {
AutoGetCollectionForReadCommand ctx(&_opCtx, nss);
Collection* collection = ctx.getCollection();
QueryPlannerParams plannerParams;
fillOutPlannerParams(&_opCtx, collection, cq, &plannerParams);
// Turn this off otherwise it pops up in some plans.
plannerParams.options &= ~QueryPlannerParams::KEEP_MUTATIONS;
// Plan.
vector solutions;
Status status = QueryPlanner::plan(*cq, plannerParams, &solutions);
ASSERT(status.isOK());
ASSERT_GREATER_THAN_OR_EQUALS(solutions.size(), 1U);
// Fill out the MPR.
_mps.reset(new MultiPlanStage(&_opCtx, collection, cq));
unique_ptr ws(new WorkingSet());
// Put each solution from the planner into the MPR.
for (size_t i = 0; i < solutions.size(); ++i) {
PlanStage* root;
ASSERT(StageBuilder::build(&_opCtx, collection, *cq, *solutions[i], ws.get(), &root));
// Takes ownership of all (actually some) arguments.
_mps->addPlan(solutions[i], root, ws.get());
}
// This is what sets a backup plan, should we test for it.
PlanYieldPolicy yieldPolicy(PlanExecutor::NO_YIELD,
_opCtx.getServiceContext()->getFastClockSource());
_mps->pickBestPlan(&yieldPolicy).transitional_ignore();
ASSERT(_mps->bestPlanChosen());
size_t bestPlanIdx = _mps->bestPlanIdx();
ASSERT_LESS_THAN(bestPlanIdx, solutions.size());
// And return a pointer to the best solution.
return _mps->bestSolution();
}
/**
* Was a backup plan picked during the ranking process?
*/
bool hasBackupPlan() const {
ASSERT(NULL != _mps.get());
return _mps->hasBackupPlan();
}
OperationContext* opCtx() {
return &_opCtx;
}
protected:
// A large number, which must be larger than the number of times
// candidate plans are worked by the multi plan runner. Used for
// determining the number of documents in the tests below.
const int N = 12000;
const ServiceContext::UniqueOperationContext _txnPtr = cc().makeOperationContext();
OperationContext& _opCtx = *_txnPtr;
private:
// Holds the value of global "internalQueryForceIntersectionPlans" setParameter flag.
// Restored at end of test invocation regardless of test result.
bool _internalQueryForceIntersectionPlans;
// Holds the value of the global set parameter so it can be restored at the end
// of the test.
bool _enableHashIntersection;
unique_ptr _mps;
DBDirectClient _client;
};
/**
* Test that the "prefer ixisect" parameter works.
*/
class PlanRankingIntersectOverride : public PlanRankingTestBase {
public:
void run() {
// 'a' is very selective, 'b' is not.
for (int i = 0; i < N; ++i) {
insert(BSON("a" << i << "b" << 1));
}
// Add indices on 'a' and 'b'.
addIndex(BSON("a" << 1));
addIndex(BSON("b" << 1));
unique_ptr cq;
// Run the query {a:4, b:1}.
{
auto qr = stdx::make_unique(nss);
qr->setFilter(BSON("a" << 100 << "b" << 1));
auto statusWithCQ = CanonicalQuery::canonicalize(opCtx(), std::move(qr));
verify(statusWithCQ.isOK());
cq = std::move(statusWithCQ.getValue());
ASSERT(cq.get());
}
// {a:100} is super selective so choose that.
QuerySolution* soln = pickBestPlan(cq.get());
ASSERT(QueryPlannerTestLib::solutionMatches(
"{fetch: {filter: {b:1}, node: {ixscan: {pattern: {a: 1}}}}}", soln->root.get()));
// Turn on the "force intersect" option.
// This will be reverted by PlanRankingTestBase's destructor when the test completes.
internalQueryForceIntersectionPlans.store(true);
// And run the same query again.
{
auto qr = stdx::make_unique(nss);
qr->setFilter(BSON("a" << 100 << "b" << 1));
auto statusWithCQ = CanonicalQuery::canonicalize(opCtx(), std::move(qr));
verify(statusWithCQ.isOK());
cq = std::move(statusWithCQ.getValue());
}
// With the "ranking picks ixisect always" option we pick an intersection plan that uses
// both the {a:1} and {b:1} indices even though it performs poorly.
soln = pickBestPlan(cq.get());
ASSERT(
QueryPlannerTestLib::solutionMatches("{fetch: {node: {andSorted: {nodes: ["
"{ixscan: {filter: null, pattern: {a:1}}},"
"{ixscan: {filter: null, pattern: {b:1}}}]}}}}",
soln->root.get()));
}
};
/**
* Test that a hashed AND solution plan is picked along with a non-blocking backup solution.
*/
class PlanRankingIntersectWithBackup : public PlanRankingTestBase {
public:
void run() {
// 'a' is very selective, 'b' is not.
for (int i = 0; i < N; ++i) {
insert(BSON("a" << i << "b" << 1));
}
// Add indices on 'a' and 'b'.
addIndex(BSON("a" << 1));
addIndex(BSON("b" << 1));
// Run the query {a:1, b:{$gt:1}.
auto qr = stdx::make_unique(nss);
qr->setFilter(BSON("a" << 1 << "b" << BSON("$gt" << 1)));
auto statusWithCQ = CanonicalQuery::canonicalize(opCtx(), std::move(qr));
verify(statusWithCQ.isOK());
unique_ptr cq = std::move(statusWithCQ.getValue());
ASSERT(NULL != cq.get());
// Turn on the "force intersect" option.
// This will be reverted by PlanRankingTestBase's destructor when the test completes.
internalQueryForceIntersectionPlans.store(true);
QuerySolution* soln = pickBestPlan(cq.get());
ASSERT(
QueryPlannerTestLib::solutionMatches("{fetch: {node: {andHash: {nodes: ["
"{ixscan: {filter: null, pattern: {a:1}}},"
"{ixscan: {filter: null, pattern: {b:1}}}]}}}}",
soln->root.get()));
// Confirm that a backup plan is available.
ASSERT(hasBackupPlan());
}
};
/**
* Two plans hit EOF at the same time, but one is covered. Make sure that we prefer the covered
* plan.
*/
class PlanRankingPreferCovered : public PlanRankingTestBase {
public:
void run() {
// Insert data {a:i, b:i}. Index {a:1} and {a:1, b:1}, query on 'a', projection on 'a'
// and 'b'. Should prefer the second index as we can pull the 'b' data out.
for (int i = 0; i < N; ++i) {
insert(BSON("a" << i << "b" << i));
}
addIndex(BSON("a" << 1));
addIndex(BSON("a" << 1 << "b" << 1));
// Query for a==27 with projection that wants 'a' and 'b'.
auto qr = stdx::make_unique(nss);
qr->setFilter(BSON("a" << 27));
qr->setProj(BSON("_id" << 0 << "a" << 1 << "b" << 1));
auto statusWithCQ = CanonicalQuery::canonicalize(opCtx(), std::move(qr));
ASSERT_OK(statusWithCQ.getStatus());
unique_ptr cq = std::move(statusWithCQ.getValue());
ASSERT(NULL != cq.get());
QuerySolution* soln = pickBestPlan(cq.get());
// Prefer the fully covered plan.
ASSERT(QueryPlannerTestLib::solutionMatches(
"{proj: {spec: {_id:0, a:1, b:1}, node: {ixscan: {pattern: {a: 1, b:1}}}}}",
soln->root.get()));
}
};
/**
* No plan produces any results or hits EOF. In this case we should never choose an index
* intersection solution.
*/
class PlanRankingAvoidIntersectIfNoResults : public PlanRankingTestBase {
public:
void run() {
// We insert lots of copies of {a:1, b:1, c: 20}. We have the indices {a:1} and {b:1},
// and the query is {a:1, b:1, c: 999}. No data that matches the query but we won't
// know that during plan ranking. We don't want to choose an intersection plan here.
for (int i = 0; i < N; ++i) {
insert(BSON("a" << 1 << "b" << 1 << "c" << 20));
}
addIndex(BSON("a" << 1));
addIndex(BSON("b" << 1));
// There is no data that matches this query but we don't know that until EOF.
auto qr = stdx::make_unique(nss);
qr->setFilter(BSON("a" << 1 << "b" << 1 << "c" << 99));
auto statusWithCQ = CanonicalQuery::canonicalize(opCtx(), std::move(qr));
ASSERT_OK(statusWithCQ.getStatus());
unique_ptr cq = std::move(statusWithCQ.getValue());
ASSERT(NULL != cq.get());
QuerySolution* soln = pickBestPlan(cq.get());
// Anti-prefer the intersection plan.
bool bestIsScanOverA = QueryPlannerTestLib::solutionMatches(
"{fetch: {node: {ixscan: {pattern: {a: 1}}}}}", soln->root.get());
bool bestIsScanOverB = QueryPlannerTestLib::solutionMatches(
"{fetch: {node: {ixscan: {pattern: {b: 1}}}}}", soln->root.get());
ASSERT(bestIsScanOverA || bestIsScanOverB);
}
};
/**
* No plan produces any results or hits EOF. In this case we should prefer covered solutions to
* non-covered solutions.
*/
class PlanRankingPreferCoveredEvenIfNoResults : public PlanRankingTestBase {
public:
void run() {
// We insert lots of copies of {a:1, b:1}. We have the indices {a:1} and {a:1, b:1},
// the query is for a doc that doesn't exist, but there is a projection over 'a' and
// 'b'. We should prefer the index that provides a covered query.
for (int i = 0; i < N; ++i) {
insert(BSON("a" << 1 << "b" << 1));
}
addIndex(BSON("a" << 1));
addIndex(BSON("a" << 1 << "b" << 1));
// There is no data that matches this query ({a:2}). Both scans will hit EOF before
// returning any data.
auto qr = stdx::make_unique(nss);
qr->setFilter(BSON("a" << 2));
qr->setProj(BSON("_id" << 0 << "a" << 1 << "b" << 1));
auto statusWithCQ = CanonicalQuery::canonicalize(opCtx(), std::move(qr));
ASSERT_OK(statusWithCQ.getStatus());
unique_ptr cq = std::move(statusWithCQ.getValue());
ASSERT(NULL != cq.get());
QuerySolution* soln = pickBestPlan(cq.get());
// Prefer the fully covered plan.
ASSERT(QueryPlannerTestLib::solutionMatches(
"{proj: {spec: {_id:0, a:1, b:1}, node: {ixscan: {pattern: {a: 1, b:1}}}}}",
soln->root.get()));
}
};
/**
* We have an index on "a" which is somewhat selective and an index on "b" which is highly
* selective (will cause an immediate EOF). Make sure that a query with predicates on both "a"
* and "b" will use the index on "b".
*/
class PlanRankingPreferImmediateEOF : public PlanRankingTestBase {
public:
void run() {
// 'a' is very selective, 'b' is not.
for (int i = 0; i < N; ++i) {
insert(BSON("a" << i << "b" << 1));
}
// Add indices on 'a' and 'b'.
addIndex(BSON("a" << 1));
addIndex(BSON("b" << 1));
// Run the query {a:N+1, b:1}. (No such document.)
auto qr = stdx::make_unique(nss);
qr->setFilter(BSON("a" << N + 1 << "b" << 1));
auto statusWithCQ = CanonicalQuery::canonicalize(opCtx(), std::move(qr));
verify(statusWithCQ.isOK());
unique_ptr cq = std::move(statusWithCQ.getValue());
ASSERT(NULL != cq.get());
// {a: 100} is super selective so choose that.
QuerySolution* soln = pickBestPlan(cq.get());
ASSERT(QueryPlannerTestLib::solutionMatches(
"{fetch: {filter: {b:1}, node: {ixscan: {pattern: {a: 1}}}}}", soln->root.get()));
}
};
/**
* Same as PlanRankingPreferImmediateEOF, but substitute a range predicate on "a" for the
* equality predicate on "a". The presence of the range predicate has an impact on the
* intersection plan that is raced against the single-index plans: since "a" no longer generates
* point interval bounds, the results of the index scan aren't guaranteed to be returned in
* RecordId order, and so the intersection plan uses the AND_HASHED stage instead of the
* AND_SORTED stage. It is still the case that the query should pick the plan that uses index
* "b", instead of the plan that uses index "a" or the (hashed) intersection plan.
*/
class PlanRankingPreferImmediateEOFAgainstHashed : public PlanRankingTestBase {
public:
void run() {
// 'a' is very selective, 'b' is not.
for (int i = 0; i < N; ++i) {
insert(BSON("a" << i << "b" << 1));
}
// Add indices on 'a' and 'b'.
addIndex(BSON("a" << 1));
addIndex(BSON("b" << 1));
// Run the query {a:N+1, b:1}. (No such document.)
auto qr = stdx::make_unique(nss);
qr->setFilter(BSON("a" << BSON("$gte" << N + 1) << "b" << 1));
auto statusWithCQ = CanonicalQuery::canonicalize(opCtx(), std::move(qr));
verify(statusWithCQ.isOK());
unique_ptr cq = std::move(statusWithCQ.getValue());
ASSERT(NULL != cq.get());
// {a: 100} is super selective so choose that.
QuerySolution* soln = pickBestPlan(cq.get());
ASSERT(QueryPlannerTestLib::solutionMatches(
"{fetch: {filter: {b:1}, node: {ixscan: {pattern: {a: 1}}}}}", soln->root.get()));
}
};
/**
* We have an index on _id and a query over _id with a sort. Ensure that we don't pick a
* collscan as the best plan even though the _id-scanning solution doesn't produce any results.
*/
class PlanRankingNoCollscan : public PlanRankingTestBase {
public:
void run() {
for (int i = 0; i < N; ++i) {
insert(BSON("_id" << i));
}
addIndex(BSON("_id" << 1));
// Run a query with a sort. The blocking sort won't produce any data during the
// evaluation period.
auto qr = stdx::make_unique(nss);
qr->setFilter(BSON("_id" << BSON("$gte" << 20 << "$lte" << 200)));
qr->setSort(BSON("c" << 1));
auto statusWithCQ = CanonicalQuery::canonicalize(opCtx(), std::move(qr));
ASSERT_OK(statusWithCQ.getStatus());
unique_ptr cq = std::move(statusWithCQ.getValue());
QuerySolution* soln = pickBestPlan(cq.get());
// The best must not be a collscan.
ASSERT(QueryPlannerTestLib::solutionMatches(
"{sort: {pattern: {c: 1}, limit: 0, node: {sortKeyGen: {node:"
"{fetch: {filter: null, node: "
"{ixscan: {filter: null, pattern: {_id: 1}}}}}}}}}",
soln->root.get()));
}
};
/**
* No indices are available, output a collscan.
*/
class PlanRankingCollscan : public PlanRankingTestBase {
public:
void run() {
// Insert data for which we have no index.
for (int i = 0; i < N; ++i) {
insert(BSON("foo" << i));
}
// Look for A Space Odyssey.
auto qr = stdx::make_unique(nss);
qr->setFilter(BSON("foo" << 2001));
auto statusWithCQ = CanonicalQuery::canonicalize(opCtx(), std::move(qr));
verify(statusWithCQ.isOK());
unique_ptr cq = std::move(statusWithCQ.getValue());
ASSERT(NULL != cq.get());
QuerySolution* soln = pickBestPlan(cq.get());
// The best must be a collscan.
ASSERT(QueryPlannerTestLib::solutionMatches("{cscan: {dir: 1, filter: {foo: 2001}}}",
soln->root.get()));
}
};
/**
* When no other information is available, prefer solutions without
* a blocking sort stage.
*/
class PlanRankingAvoidBlockingSort : public PlanRankingTestBase {
public:
void run() {
for (int i = 0; i < N; ++i) {
insert(BSON("a" << 1 << "d" << i));
}
// The index {d: 1, e: 1} provides the desired sort order,
// while index {a: 1, b: 1} can be used to answer the
// query predicate, but does not provide the sort.
addIndex(BSON("a" << 1 << "b" << 1));
addIndex(BSON("d" << 1 << "e" << 1));
// Query: find({a: 1}).sort({d: 1})
auto qr = stdx::make_unique(nss);
qr->setFilter(BSON("a" << 1));
qr->setSort(BSON("d" << 1));
auto statusWithCQ = CanonicalQuery::canonicalize(opCtx(), std::move(qr));
ASSERT_OK(statusWithCQ.getStatus());
unique_ptr cq = std::move(statusWithCQ.getValue());
ASSERT(NULL != cq.get());
// No results will be returned during the trial period,
// so we expect to choose {d: 1, e: 1}, as it allows us
// to avoid the sort stage.
QuerySolution* soln = pickBestPlan(cq.get());
ASSERT(
QueryPlannerTestLib::solutionMatches("{fetch: {filter: {a:1}, node: "
"{ixscan: {filter: null, pattern: {d:1,e:1}}}}}",
soln->root.get()));
}
};
/**
* Make sure we run candidate plans for long enough when none of the
* plans are producing results.
*/
class PlanRankingWorkPlansLongEnough : public PlanRankingTestBase {
public:
void run() {
for (int i = 0; i < N; ++i) {
insert(BSON("a" << 1));
insert(BSON("a" << 1 << "b" << 1 << "c" << i));
}
// Indices on 'a' and 'b'.
addIndex(BSON("a" << 1));
addIndex(BSON("b" << 1));
// Solutions using either 'a' or 'b' will take a long time to start producing
// results. However, an index scan on 'b' will start producing results sooner
// than an index scan on 'a'.
auto qr = stdx::make_unique(nss);
qr->setFilter(fromjson("{a: 1, b: 1, c: {$gte: 5000}}"));
auto statusWithCQ = CanonicalQuery::canonicalize(opCtx(), std::move(qr));
ASSERT_OK(statusWithCQ.getStatus());
unique_ptr cq = std::move(statusWithCQ.getValue());
ASSERT(NULL != cq.get());
// Use index on 'b'.
QuerySolution* soln = pickBestPlan(cq.get());
ASSERT(QueryPlannerTestLib::solutionMatches("{fetch: {node: {ixscan: {pattern: {b: 1}}}}}",
soln->root.get()));
}
};
/**
* Suppose we have two plans which are roughly equivalent, other than that
* one uses an index which involves doing a lot more skipping of index keys.
* Prefer the plan which does not have to do this index key skipping.
*/
class PlanRankingAccountForKeySkips : public PlanRankingTestBase {
public:
void run() {
for (int i = 0; i < 100; ++i) {
insert(BSON("a" << i << "b" << i << "c" << i));
}
// These indices look equivalent to the ranker for the query below unless we account
// for key skipping. We should pick index {a: 1} if we account for key skipping
// properly.
addIndex(BSON("b" << 1 << "c" << 1));
addIndex(BSON("a" << 1));
auto qr = stdx::make_unique(nss);
qr->setFilter(fromjson("{a: 9, b: {$ne: 10}, c: 9}"));
auto statusWithCQ = CanonicalQuery::canonicalize(opCtx(), std::move(qr));
ASSERT_OK(statusWithCQ.getStatus());
unique_ptr cq = std::move(statusWithCQ.getValue());
ASSERT(NULL != cq.get());
// Expect to use index {a: 1, b: 1}.
QuerySolution* soln = pickBestPlan(cq.get());
ASSERT(QueryPlannerTestLib::solutionMatches("{fetch: {node: {ixscan: {pattern: {a: 1}}}}}",
soln->root.get()));
}
};
class All : public Suite {
public:
All() : Suite("query_plan_ranking") {}
void setupTests() {
add();
add();
add();
add();
add();
add();
add();
add();
add();
add();
add();
add();
}
};
SuiteInstance planRankingAll;
} // namespace PlanRankingTest