/**
* Copyright (C) 2013 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/db/query/canonical_query.h"
#include "mongo/db/query/index_entry.h"
#include "mongo/db/query/query_solution.h"
#include "mongo/stdx/unordered_set.h"
namespace mongo {
class CollatorInterface;
/**
* Methods for determining what fields and predicates can use indices.
*/
class QueryPlannerIXSelect {
public:
/**
* Return all the fields in the tree rooted at 'node' that we can use an index on
* in order to answer the query.
*
* The 'prefix' argument is a path prefix to be prepended to any fields mentioned in
* predicates encountered. Some array operators specify a path prefix.
*/
static void getFields(const MatchExpression* node,
std::string prefix,
stdx::unordered_set* out);
/**
* Find all indices prefixed by fields we have predicates over. Only these indices are
* useful in answering the query.
*/
static void findRelevantIndices(const stdx::unordered_set& fields,
const std::vector& indices,
std::vector* out);
/**
* Return true if the index key pattern field 'elt' (which belongs to 'index') can be used
* to answer the predicate 'node'.
*
* For example, {field: "hashed"} can only be used with sets of equalities.
* {field: "2d"} can only be used with some geo predicates.
* {field: "2dsphere"} can only be used with some other geo predicates.
*/
static bool compatible(const BSONElement& elt,
const IndexEntry& index,
MatchExpression* node,
const CollatorInterface* collator,
bool elemMatchChild = false);
/**
* Determine how useful all of our relevant 'indices' are to all predicates in the subtree
* rooted at 'node'. Affixes a RelevantTag to all predicate nodes which can use an index.
*
* 'prefix' is a path prefix that should be prepended to any path (certain array operators
* imply a path prefix).
*
* For an index to be useful to a predicate, the index must be compatible (see above).
*
* If an index is compound but not prefixed by a predicate's path, it's only useful if
* there exists another predicate that 1. will use that index and 2. is related to the
* original predicate by having an AND as a parent.
*/
static void rateIndices(MatchExpression* node,
std::string prefix,
const std::vector& indices,
const CollatorInterface* collator);
/**
* Amend the RelevantTag lists for all predicates in the subtree rooted at 'node' to remove
* invalid assignments to text and geo indices.
*
* See the body of this function and the specific stripInvalidAssignments functions for details.
*/
static void stripInvalidAssignments(MatchExpression* node,
const std::vector& indices);
/**
* In some special cases, we can strip most of the index assignments from the tree early
* on. Specifically, if we find an AND which has a child tagged for equality over a
* single-field unique index, then all other predicate-to-index assignments can be
* stripped off the subtree rooted at 'node'.
*
* This is used to ensure that we always favor key-value lookup plans over any
* more complex plan.
*
* Example:
* Suppose you have match expression OR (AND (a==1, b==2), AND (c==3, d==4)).
* There are indices on fields, 'a', 'b', 'c', and 'd'. The index on 'd' is
* the only unique index.
*
* This code will find that the subtree AND (c==3, d==4) can be answered by
* looking up the value of 'd' in the unique index. Since no better plan than
* a single key lookup is ever available, all assignments in this subtree
* are stripped, except for the assignment of d==4 to the unique 'd' index.
*
* Stripping the assignment for 'c' causes the planner to generate just two
* possible plans:
* 1) an OR of an index scan over 'a' and an index scan over 'd'
* 2) an OR of an index scan over 'b' and an index scan over 'd'
*/
static void stripUnneededAssignments(MatchExpression* node,
const std::vector& indices);
/**
* Returns true if the indexed field has any multikey components. Illegal to call unless
* 'indexedField' is present in the key pattern for 'index'.
*/
static bool indexedFieldHasMultikeyComponents(StringData indexedField, const IndexEntry& index);
private:
/**
* Amend the RelevantTag lists for all predicates in the subtree rooted at 'node' to remove
* invalid assignments to text indexes.
*
* A predicate on a field from a compound text index with a non-empty index prefix
* (e.g. pred {a: 1, b: 1} on index {a: 1, b: 1, c: "text"}) is only considered valid to
* assign to the text index if it is a direct child of an AND with the following properties:
* - it has a TEXT child
* - for every index prefix component, it has an EQ child on that component's path
*
* Note that compatible() enforces the precondition that only EQ nodes are considered
* relevant to text index prefixes.
* If there is a relevant compound text index with a non-empty "index prefix" (e.g. the
* prefix {a: 1, b: 1} for the index {a: 1, b: 1, c: "text"}), amend the RelevantTag(s)
* created above to remove assignments to the text index where the query does not have
* predicates over each indexed field of the prefix.
*
* This is necessary because text indices do not obey the normal rules of sparseness, in
* that they generate no index keys for documents without indexable text data in at least
* one text field (in fact, text indices ignore the sparse option entirely). For example,
* given the text index {a: 1, b: 1, c: "text"}:
*
* - Document {a: 1, b: 6, c: "hello world"} generates 2 index keys
* - Document {a: 1, b: 7, c: {d: 1}} generates 0 index keys
* - Document {a: 1, b: 8} generates 0 index keys
*
* As a result, the query {a: 1} *cannot* be satisfied by the text index {a: 1, b: 1, c:
* "text"}, since documents without indexed text data would not be returned by the query.
* rateIndices() above will eagerly annotate the pred {a: 1} as relevant to the text index;
* those annotations get removed here.
*/
static void stripInvalidAssignmentsToTextIndexes(MatchExpression* node,
const std::vector& indices);
/**
* For V1 2dsphere indices we ignore the sparse option. As such we can use an index
* like {nongeo: 1, geo: "2dsphere"} to answer queries only involving nongeo.
*
* For V2 2dsphere indices also ignore the sparse flag but indexing behavior as compared to
* V1 is different. If all of the geo fields are missing from the document we do not index
* it. As such we cannot use V2 sparse indices unless we have a predicate over a geo
* field.
*
* 2dsphere indices V2 are "geo-sparse." That is, if there aren't any geo-indexed fields in
* a document it won't be indexed. As such we can't use an index like {foo:1, geo:
* "2dsphere"} to answer a query on 'foo' if the index is V2 as it will not contain the
* document {foo:1}.
*
* We *can* use it to answer a query on 'foo' if the predicate on 'foo' is AND-related to a
* predicate on every geo field in the index.
*/
static void stripInvalidAssignmentsTo2dsphereIndices(MatchExpression* node,
const std::vector& indices);
/**
* This function strips RelevantTag assignments to partial indices, where the assignment is
* incompatible with the index's filter expression.
*
* For example, suppose there exists a partial index in 'indices' with key pattern {a: 1} and
* filter expression {f: {$exists: true}}. If 'node' is {a: 1}, this function would strip the
* EQ predicate's assignment to the partial index (because if it did not, plans that use this
* index would miss documents that don't satisfy the filter expression). On the other hand, if
* 'node' is {a: 1, f: 1}, then the partial index could be used, and so this function would not
* strip the assignment.
*
* Special note about OR clauses: if 'node' contains a leaf with an assignment to a partial
* index inside an OR, this function will look both inside and outside the OR clause in an
* attempt to find predicates that could satisfy the partial index, but these predicates must be
* wholly contained either inside or outside.
*
* To illustrate, given a partial index {a: 1} with filter expression {f: true, g: true}, the
* assignment of the "a" predicate would not be stripped for either of the following
* expressions:
* - {f: true, g: true, $or: [{a: 0}, {a: 1}]}
* - {$or: [{a: 1, f: true, g: true}, {_id: 1}]}
*
* However, the assignment of the "a" predicate would be stripped in the following expression:
* - {f: true, $or: [{a: 1, g: true}, {_id: 1}]}
*
* For the last case, the assignment is stripped is because the {f: true} predicate and the
* {g: true} predicate are both needed for the {a: 1} predicate to be compatible with the
* partial index, but the {f: true} predicate is outside the OR while the {g: true} predicate is
* contained within the OR.
*/
static void stripInvalidAssignmentsToPartialIndices(MatchExpression* node,
const std::vector& indices);
};
} // namespace mongo