summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorIan Boros <ian.boros@10gen.com>2018-10-09 12:22:08 -0400
committerIan Boros <ian.boros@10gen.com>2018-11-06 17:45:50 -0500
commit50630d5c4ea5f731bdfd95b2dab5ab4e9620fd10 (patch)
treec5ff2b837dd7c6de8ccd606434363ad4454b0e3a /src
parent5738e29558432727846902dde7fd257aee36deec (diff)
downloadmongo-50630d5c4ea5f731bdfd95b2dab5ab4e9620fd10.tar.gz
SERVER-33303 Add stable plan cache key and use for index filters
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/commands/index_filter_commands.cpp4
-rw-r--r--src/mongo/db/commands/index_filter_commands_test.cpp3
-rw-r--r--src/mongo/db/commands/plan_cache_commands.cpp1
-rw-r--r--src/mongo/db/curop.cpp5
-rw-r--r--src/mongo/db/curop.h4
-rw-r--r--src/mongo/db/query/SConscript12
-rw-r--r--src/mongo/db/query/canonical_query.cpp5
-rw-r--r--src/mongo/db/query/canonical_query.h10
-rw-r--r--src/mongo/db/query/canonical_query_encoder.cpp525
-rw-r--r--src/mongo/db/query/canonical_query_encoder.h49
-rw-r--r--src/mongo/db/query/canonical_query_encoder_test.cpp286
-rw-r--r--src/mongo/db/query/collation/collation_spec.h20
-rw-r--r--src/mongo/db/query/explain.cpp14
-rw-r--r--src/mongo/db/query/get_executor.cpp15
-rw-r--r--src/mongo/db/query/get_executor_test.cpp4
-rw-r--r--src/mongo/db/query/lru_key_value.h4
-rw-r--r--src/mongo/db/query/plan_cache.cpp532
-rw-r--r--src/mongo/db/query/plan_cache.h84
-rw-r--r--src/mongo/db/query/plan_cache_test.cpp308
-rw-r--r--src/mongo/db/query/query_settings.cpp6
-rw-r--r--src/mongo/db/query/query_settings.h9
21 files changed, 1177 insertions, 723 deletions
diff --git a/src/mongo/db/commands/index_filter_commands.cpp b/src/mongo/db/commands/index_filter_commands.cpp
index c9aec6823f9..f23760a4631 100644
--- a/src/mongo/db/commands/index_filter_commands.cpp
+++ b/src/mongo/db/commands/index_filter_commands.cpp
@@ -262,7 +262,7 @@ Status ClearFilters::clear(OperationContext* opCtx,
}
unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue());
- querySettings->removeAllowedIndices(planCache->computeKey(*cq));
+ querySettings->removeAllowedIndices(cq->encodeKey());
// Remove entry from plan cache
planCache->remove(*cq).transitional_ignore();
@@ -395,7 +395,7 @@ Status SetFilter::set(OperationContext* opCtx,
unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue());
// Add allowed indices to query settings, overriding any previous entries.
- querySettings->setAllowedIndices(*cq, planCache->computeKey(*cq), indexes, indexNames);
+ querySettings->setAllowedIndices(*cq, indexes, indexNames);
// Remove entry from plan cache.
planCache->remove(*cq).transitional_ignore();
diff --git a/src/mongo/db/commands/index_filter_commands_test.cpp b/src/mongo/db/commands/index_filter_commands_test.cpp
index 7634481e091..3427eb1b8f7 100644
--- a/src/mongo/db/commands/index_filter_commands_test.cpp
+++ b/src/mongo/db/commands/index_filter_commands_test.cpp
@@ -459,7 +459,8 @@ TEST(IndexFilterCommandsTest, SetAndClearFiltersCollation) {
ASSERT_EQUALS(StringData(filters[0].getObjectField("collation").getStringField("locale")),
"mock_reverse_string");
- // Plan cache should only contain entry for query without collation.
+ // Setting a filter will remove the cache entry associated with the query so now the plan cache
+ // should only contain the entry for the query without collation.
ASSERT_FALSE(planCacheContains(
opCtx.get(), planCache, "{a: 'foo'}", "{}", "{}", "{locale: 'mock_reverse_string'}"));
ASSERT_TRUE(planCacheContains(opCtx.get(), planCache, "{a: 'foo'}", "{}", "{}", "{}"));
diff --git a/src/mongo/db/commands/plan_cache_commands.cpp b/src/mongo/db/commands/plan_cache_commands.cpp
index ba189fa45bb..f2490831ca4 100644
--- a/src/mongo/db/commands/plan_cache_commands.cpp
+++ b/src/mongo/db/commands/plan_cache_commands.cpp
@@ -424,6 +424,7 @@ Status listPlansOriginalFormat(std::unique_ptr<CanonicalQuery> cq,
// Append the time the entry was inserted into the plan cache.
bob->append("timeOfCreation", entry->timeOfCreation);
bob->append("queryHash", unsignedIntToFixedLengthHex(entry->queryHash));
+ bob->append("planCacheKey", unsignedIntToFixedLengthHex(entry->planCacheKey));
// Append whether or not the entry is active.
bob->append("isActive", entry->isActive);
bob->append("works", static_cast<long long>(entry->works));
diff --git a/src/mongo/db/curop.cpp b/src/mongo/db/curop.cpp
index 06b19898927..c0332b2469b 100644
--- a/src/mongo/db/curop.cpp
+++ b/src/mongo/db/curop.cpp
@@ -634,7 +634,10 @@ string OpDebug::report(Client* client,
if (queryHash) {
s << " queryHash:" << unsignedIntToFixedLengthHex(*queryHash);
+ invariant(planCacheKey);
+ s << " planCacheKey:" << unsignedIntToFixedLengthHex(*planCacheKey);
}
+
if (!errInfo.isOK()) {
s << " ok:" << 0;
if (!errInfo.reason().empty()) {
@@ -719,6 +722,8 @@ void OpDebug::append(const CurOp& curop,
if (queryHash) {
b.append("queryHash", unsignedIntToFixedLengthHex(*queryHash));
+ invariant(planCacheKey);
+ b.append("planCacheKey", unsignedIntToFixedLengthHex(*planCacheKey));
}
{
diff --git a/src/mongo/db/curop.h b/src/mongo/db/curop.h
index 3f3fe4b43a7..1f685438508 100644
--- a/src/mongo/db/curop.h
+++ b/src/mongo/db/curop.h
@@ -185,6 +185,10 @@ public:
BSONObj execStats; // Owned here.
+ // The hash of the PlanCache key for the query being run. This may change depending on what
+ // indexes are present.
+ boost::optional<uint32_t> planCacheKey;
+ // The hash of the query's "stable" key. This represents the query's shape.
boost::optional<uint32_t> queryHash;
// Details of any error (whether from an exception or a command returning failure).
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript
index 64018a41388..009006d125a 100644
--- a/src/mongo/db/query/SConscript
+++ b/src/mongo/db/query/SConscript
@@ -18,6 +18,7 @@ env.Library(
target='query_planner',
source=[
"canonical_query.cpp",
+ "canonical_query_encoder.cpp",
"index_tag.cpp",
"parsed_projection.cpp",
"plan_cache.cpp",
@@ -246,6 +247,17 @@ env.CppUnitTest(
)
env.CppUnitTest(
+ target="canonical_query_encoder_test",
+ source=[
+ "canonical_query_encoder_test.cpp"
+ ],
+ LIBDEPS=[
+ "query_planner",
+ "query_test_service_context",
+ ],
+)
+
+env.CppUnitTest(
target="index_bounds_test",
source=[
"index_bounds_builder_test.cpp",
diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp
index 39c7ecdee16..d1196de88ba 100644
--- a/src/mongo/db/query/canonical_query.cpp
+++ b/src/mongo/db/query/canonical_query.cpp
@@ -38,6 +38,7 @@
#include "mongo/db/matcher/expression_array.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/operation_context.h"
+#include "mongo/db/query/canonical_query_encoder.h"
#include "mongo/db/query/collation/collator_factory_interface.h"
#include "mongo/db/query/indexability.h"
#include "mongo/db/query/query_planner_common.h"
@@ -478,4 +479,8 @@ std::string CanonicalQuery::toStringShort() const {
return ss;
}
+CanonicalQuery::QueryShapeString CanonicalQuery::encodeKey() const {
+ return canonical_query_encoder::encode(*this);
+}
+
} // namespace mongo
diff --git a/src/mongo/db/query/canonical_query.h b/src/mongo/db/query/canonical_query.h
index b94b310a00d..cdee7aef60d 100644
--- a/src/mongo/db/query/canonical_query.h
+++ b/src/mongo/db/query/canonical_query.h
@@ -46,6 +46,10 @@ class OperationContext;
class CanonicalQuery {
public:
+ // A type that encodes the notion of query shape. Essentialy a query's match, projection and
+ // sort with the values taken out.
+ typedef std::string QueryShapeString;
+
/**
* If parsing succeeds, returns a std::unique_ptr<CanonicalQuery> representing the parsed
* query (which will never be NULL). If parsing fails, returns an error Status.
@@ -125,6 +129,12 @@ public:
}
/**
+ * Compute the "shape" of this query by encoding the match, projection and sort, and stripping
+ * out the appropriate values.
+ */
+ QueryShapeString encodeKey() const;
+
+ /**
* Sets this CanonicalQuery's collator, and sets the collator on this CanonicalQuery's match
* expression tree.
*
diff --git a/src/mongo/db/query/canonical_query_encoder.cpp b/src/mongo/db/query/canonical_query_encoder.cpp
new file mode 100644
index 00000000000..4a4050409aa
--- /dev/null
+++ b/src/mongo/db/query/canonical_query_encoder.cpp
@@ -0,0 +1,525 @@
+/**
+ * Copyright (C) 2018-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/query/canonical_query_encoder.h"
+
+#include <boost/iterator/transform_iterator.hpp>
+
+#include "mongo/base/simple_string_data_comparator.h"
+#include "mongo/db/matcher/expression_array.h"
+#include "mongo/db/matcher/expression_geo.h"
+#include "mongo/util/log.h"
+
+namespace mongo {
+namespace {
+
+// Delimiters for cache key encoding.
+const char kEncodeChildrenBegin = '[';
+const char kEncodeChildrenEnd = ']';
+const char kEncodeChildrenSeparator = ',';
+const char kEncodeCollationSection = '#';
+const char kEncodeProjectionSection = '|';
+const char kEncodeRegexFlagsSeparator = '/';
+const char kEncodeSortSection = '~';
+
+/**
+ * Encode user-provided string. Cache key delimiters seen in the
+ * user string are escaped with a backslash.
+ */
+void encodeUserString(StringData s, StringBuilder* keyBuilder) {
+ for (size_t i = 0; i < s.size(); ++i) {
+ char c = s[i];
+ switch (c) {
+ case kEncodeChildrenBegin:
+ case kEncodeChildrenEnd:
+ case kEncodeChildrenSeparator:
+ case kEncodeCollationSection:
+ case kEncodeProjectionSection:
+ case kEncodeRegexFlagsSeparator:
+ case kEncodeSortSection:
+ case '\\':
+ *keyBuilder << '\\';
+ // Fall through to default case.
+ default:
+ *keyBuilder << c;
+ }
+ }
+}
+
+/**
+ * String encoding of MatchExpression::MatchType.
+ */
+const char* encodeMatchType(MatchExpression::MatchType mt) {
+ switch (mt) {
+ case MatchExpression::AND:
+ return "an";
+
+ case MatchExpression::OR:
+ return "or";
+
+ case MatchExpression::NOR:
+ return "nr";
+
+ case MatchExpression::NOT:
+ return "nt";
+
+ case MatchExpression::ELEM_MATCH_OBJECT:
+ return "eo";
+
+ case MatchExpression::ELEM_MATCH_VALUE:
+ return "ev";
+
+ case MatchExpression::SIZE:
+ return "sz";
+
+ case MatchExpression::LTE:
+ return "le";
+
+ case MatchExpression::LT:
+ return "lt";
+
+ case MatchExpression::EQ:
+ return "eq";
+
+ case MatchExpression::GT:
+ return "gt";
+
+ case MatchExpression::GTE:
+ return "ge";
+
+ case MatchExpression::REGEX:
+ return "re";
+
+ case MatchExpression::MOD:
+ return "mo";
+
+ case MatchExpression::EXISTS:
+ return "ex";
+
+ case MatchExpression::MATCH_IN:
+ return "in";
+
+ case MatchExpression::TYPE_OPERATOR:
+ return "ty";
+
+ case MatchExpression::GEO:
+ return "go";
+
+ case MatchExpression::WHERE:
+ return "wh";
+
+ case MatchExpression::ALWAYS_FALSE:
+ return "af";
+
+ case MatchExpression::ALWAYS_TRUE:
+ return "at";
+
+ case MatchExpression::GEO_NEAR:
+ return "gn";
+
+ case MatchExpression::TEXT:
+ return "te";
+
+ case MatchExpression::BITS_ALL_SET:
+ return "ls";
+
+ case MatchExpression::BITS_ALL_CLEAR:
+ return "lc";
+
+ case MatchExpression::BITS_ANY_SET:
+ return "ys";
+
+ case MatchExpression::BITS_ANY_CLEAR:
+ return "yc";
+
+ case MatchExpression::EXPRESSION:
+ return "xp";
+
+ case MatchExpression::INTERNAL_EXPR_EQ:
+ return "ee";
+
+ case MatchExpression::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX:
+ return "internalSchemaAllElemMatchFromIndex";
+
+ case MatchExpression::INTERNAL_SCHEMA_ALLOWED_PROPERTIES:
+ return "internalSchemaAllowedProperties";
+
+ case MatchExpression::INTERNAL_SCHEMA_COND:
+ return "internalSchemaCond";
+
+ case MatchExpression::INTERNAL_SCHEMA_EQ:
+ return "internalSchemaEq";
+
+ case MatchExpression::INTERNAL_SCHEMA_FMOD:
+ return "internalSchemaFmod";
+
+ case MatchExpression::INTERNAL_SCHEMA_MIN_ITEMS:
+ return "internalSchemaMinItems";
+
+ case MatchExpression::INTERNAL_SCHEMA_MAX_ITEMS:
+ return "internalSchemaMaxItems";
+
+ case MatchExpression::INTERNAL_SCHEMA_UNIQUE_ITEMS:
+ return "internalSchemaUniqueItems";
+
+ case MatchExpression::INTERNAL_SCHEMA_XOR:
+ return "internalSchemaXor";
+
+ case MatchExpression::INTERNAL_SCHEMA_OBJECT_MATCH:
+ return "internalSchemaObjectMatch";
+
+ case MatchExpression::INTERNAL_SCHEMA_ROOT_DOC_EQ:
+ return "internalSchemaRootDocEq";
+
+ case MatchExpression::INTERNAL_SCHEMA_MIN_LENGTH:
+ return "internalSchemaMinLength";
+
+ case MatchExpression::INTERNAL_SCHEMA_MAX_LENGTH:
+ return "internalSchemaMaxLength";
+
+ case MatchExpression::INTERNAL_SCHEMA_MIN_PROPERTIES:
+ return "internalSchemaMinProperties";
+
+ case MatchExpression::INTERNAL_SCHEMA_MAX_PROPERTIES:
+ return "internalSchemaMaxProperties";
+
+ case MatchExpression::INTERNAL_SCHEMA_MATCH_ARRAY_INDEX:
+ return "internalSchemaMatchArrayIndex";
+
+ case MatchExpression::INTERNAL_SCHEMA_TYPE:
+ return "internalSchemaType";
+
+ default:
+ MONGO_UNREACHABLE;
+ }
+}
+
+/**
+ * Encodes GEO match expression.
+ * Encoding includes:
+ * - type of geo query (within/intersect/near)
+ * - geometry type
+ * - CRS (flat or spherical)
+ */
+void encodeGeoMatchExpression(const GeoMatchExpression* tree, StringBuilder* keyBuilder) {
+ const GeoExpression& geoQuery = tree->getGeoExpression();
+
+ // Type of geo query.
+ switch (geoQuery.getPred()) {
+ case GeoExpression::WITHIN:
+ *keyBuilder << "wi";
+ break;
+ case GeoExpression::INTERSECT:
+ *keyBuilder << "in";
+ break;
+ case GeoExpression::INVALID:
+ *keyBuilder << "id";
+ break;
+ }
+
+ // Geometry type.
+ // Only one of the shared_ptrs in GeoContainer may be non-NULL.
+ *keyBuilder << geoQuery.getGeometry().getDebugType();
+
+ // CRS (flat or spherical)
+ if (FLAT == geoQuery.getGeometry().getNativeCRS()) {
+ *keyBuilder << "fl";
+ } else if (SPHERE == geoQuery.getGeometry().getNativeCRS()) {
+ *keyBuilder << "sp";
+ } else if (STRICT_SPHERE == geoQuery.getGeometry().getNativeCRS()) {
+ *keyBuilder << "ss";
+ } else {
+ error() << "unknown CRS type " << (int)geoQuery.getGeometry().getNativeCRS()
+ << " in geometry of type " << geoQuery.getGeometry().getDebugType();
+ MONGO_UNREACHABLE;
+ }
+}
+
+/**
+ * Encodes GEO_NEAR match expression.
+ * Encode:
+ * - isNearSphere
+ * - CRS (flat or spherical)
+ */
+void encodeGeoNearMatchExpression(const GeoNearMatchExpression* tree, StringBuilder* keyBuilder) {
+ const GeoNearExpression& nearQuery = tree->getData();
+
+ // isNearSphere
+ *keyBuilder << (nearQuery.isNearSphere ? "ns" : "nr");
+
+ // CRS (flat or spherical or strict-winding spherical)
+ switch (nearQuery.centroid->crs) {
+ case FLAT:
+ *keyBuilder << "fl";
+ break;
+ case SPHERE:
+ *keyBuilder << "sp";
+ break;
+ case STRICT_SPHERE:
+ *keyBuilder << "ss";
+ break;
+ case UNSET:
+ error() << "unknown CRS type " << (int)nearQuery.centroid->crs
+ << " in point geometry for near query";
+ MONGO_UNREACHABLE;
+ break;
+ }
+}
+
+template <class T>
+char encodeEnum(T val) {
+ static_assert(static_cast<int>(T::kMax) <= 9,
+ "enum has too many values to encode as a value between '0' and '9'. You must "
+ "change the encoding scheme");
+ invariant(val <= T::kMax);
+
+ return static_cast<char>(val) + '0';
+}
+
+void encodeCollation(const CollatorInterface* collation, StringBuilder* keyBuilder) {
+ if (!collation) {
+ return;
+ }
+
+ const CollationSpec& spec = collation->getSpec();
+
+ *keyBuilder << kEncodeCollationSection;
+ *keyBuilder << spec.localeID;
+ *keyBuilder << spec.caseLevel;
+
+ // Ensure that we can encode this value with a single ascii byte '0' through '9'.
+ *keyBuilder << encodeEnum(spec.caseFirst);
+ *keyBuilder << encodeEnum(spec.strength);
+ *keyBuilder << spec.numericOrdering;
+
+ *keyBuilder << encodeEnum(spec.alternate);
+ *keyBuilder << encodeEnum(spec.maxVariable);
+ *keyBuilder << spec.normalization;
+ *keyBuilder << spec.backwards;
+
+ // We do not encode 'spec.version' because query shape strings are never persisted, and need
+ // not be stable between versions.
+}
+
+template <class RegexIterator>
+void encodeRegexFlagsForMatch(RegexIterator first, RegexIterator last, StringBuilder* keyBuilder) {
+ // We sort the flags, so that queries with the same regex flags in different orders will have
+ // the same shape. We then add them to a set, so that identical flags across multiple regexes
+ // will be deduplicated and the resulting set of unique flags will be ordered consistently.
+ // Regex flags are not validated at parse-time, so we also ensure that only valid flags
+ // contribute to the encoding.
+ static const auto maxValidFlags = RegexMatchExpression::kValidRegexFlags.size();
+ std::set<char> flags;
+ for (auto it = first; it != last && flags.size() < maxValidFlags; ++it) {
+ auto inserter = std::inserter(flags, flags.begin());
+ std::copy_if((*it)->getFlags().begin(), (*it)->getFlags().end(), inserter, [](auto flag) {
+ return RegexMatchExpression::kValidRegexFlags.count(flag);
+ });
+ }
+ if (!flags.empty()) {
+ *keyBuilder << kEncodeRegexFlagsSeparator;
+ for (const auto& flag : flags) {
+ invariant(RegexMatchExpression::kValidRegexFlags.count(flag));
+ encodeUserString(StringData(&flag, 1), keyBuilder);
+ }
+ *keyBuilder << kEncodeRegexFlagsSeparator;
+ }
+}
+
+// Helper overload to prepare a vector of unique_ptrs for the heavy-lifting function above.
+void encodeRegexFlagsForMatch(const std::vector<std::unique_ptr<RegexMatchExpression>>& regexes,
+ StringBuilder* keyBuilder) {
+ const auto transformFunc = [](const auto& regex) { return regex.get(); };
+ encodeRegexFlagsForMatch(boost::make_transform_iterator(regexes.begin(), transformFunc),
+ boost::make_transform_iterator(regexes.end(), transformFunc),
+ keyBuilder);
+}
+// Helper that passes a range covering the entire source set into the heavy-lifting function above.
+void encodeRegexFlagsForMatch(const std::vector<const RegexMatchExpression*>& regexes,
+ StringBuilder* keyBuilder) {
+ encodeRegexFlagsForMatch(regexes.begin(), regexes.end(), keyBuilder);
+}
+
+/**
+ * Traverses expression tree pre-order.
+ * Appends an encoding of each node's match type and path name
+ * to the output stream.
+ */
+void encodeKeyForMatch(const MatchExpression* tree, StringBuilder* keyBuilder) {
+ invariant(keyBuilder);
+
+ // Encode match type and path.
+ *keyBuilder << encodeMatchType(tree->matchType());
+
+ encodeUserString(tree->path(), keyBuilder);
+
+ // GEO and GEO_NEAR require additional encoding.
+ if (MatchExpression::GEO == tree->matchType()) {
+ encodeGeoMatchExpression(static_cast<const GeoMatchExpression*>(tree), keyBuilder);
+ } else if (MatchExpression::GEO_NEAR == tree->matchType()) {
+ encodeGeoNearMatchExpression(static_cast<const GeoNearMatchExpression*>(tree), keyBuilder);
+ }
+
+ // We encode regular expression flags such that different options produce different shapes.
+ if (MatchExpression::REGEX == tree->matchType()) {
+ encodeRegexFlagsForMatch({static_cast<const RegexMatchExpression*>(tree)}, keyBuilder);
+ } else if (MatchExpression::MATCH_IN == tree->matchType()) {
+ const auto* inMatch = static_cast<const InMatchExpression*>(tree);
+ if (!inMatch->getRegexes().empty()) {
+ // Append '_re' to distinguish an $in without regexes from an $in with regexes.
+ encodeUserString("_re"_sd, keyBuilder);
+ encodeRegexFlagsForMatch(inMatch->getRegexes(), keyBuilder);
+ }
+ }
+
+ // Traverse child nodes.
+ // Enclose children in [].
+ if (tree->numChildren() > 0) {
+ *keyBuilder << kEncodeChildrenBegin;
+ }
+ // Use comma to separate children encoding.
+ for (size_t i = 0; i < tree->numChildren(); ++i) {
+ if (i > 0) {
+ *keyBuilder << kEncodeChildrenSeparator;
+ }
+ encodeKeyForMatch(tree->getChild(i), keyBuilder);
+ }
+
+ if (tree->numChildren() > 0) {
+ *keyBuilder << kEncodeChildrenEnd;
+ }
+}
+
+/**
+* Encodes sort order into cache key.
+* Sort order is normalized because it provided by
+* QueryRequest.
+*/
+void encodeKeyForSort(const BSONObj& sortObj, StringBuilder* keyBuilder) {
+ if (sortObj.isEmpty()) {
+ return;
+ }
+
+ *keyBuilder << kEncodeSortSection;
+
+ BSONObjIterator it(sortObj);
+ while (it.more()) {
+ BSONElement elt = it.next();
+ // $meta text score
+ if (QueryRequest::isTextScoreMeta(elt)) {
+ *keyBuilder << "t";
+ }
+ // Ascending
+ else if (elt.numberInt() == 1) {
+ *keyBuilder << "a";
+ }
+ // Descending
+ else {
+ *keyBuilder << "d";
+ }
+ encodeUserString(elt.fieldName(), keyBuilder);
+
+ // Sort argument separator
+ if (it.more()) {
+ *keyBuilder << ",";
+ }
+ }
+}
+
+/**
+* Encodes parsed projection into cache key.
+* Does a simple toString() on each projected field
+* in the BSON object.
+* Orders the encoded elements in the projection by field name.
+* This handles all the special projection types ($meta, $elemMatch, etc.)
+*/
+void encodeKeyForProj(const BSONObj& projObj, StringBuilder* keyBuilder) {
+ // Sorts the BSON elements by field name using a map.
+ std::map<StringData, BSONElement> elements;
+
+ BSONObjIterator it(projObj);
+ while (it.more()) {
+ BSONElement elt = it.next();
+ StringData fieldName = elt.fieldNameStringData();
+
+ // Internal callers may add $-prefixed fields to the projection. These are not part of a
+ // user query, and therefore are not considered part of the cache key.
+ if (fieldName[0] == '$') {
+ continue;
+ }
+
+ elements[fieldName] = elt;
+ }
+
+ if (!elements.empty()) {
+ *keyBuilder << kEncodeProjectionSection;
+ }
+
+ // Read elements in order of field name
+ for (std::map<StringData, BSONElement>::const_iterator i = elements.begin();
+ i != elements.end();
+ ++i) {
+ const BSONElement& elt = (*i).second;
+
+ if (elt.type() != BSONType::Object) {
+ // For inclusion/exclusion projections, we encode as "i" or "e".
+ *keyBuilder << (elt.trueValue() ? "i" : "e");
+ } else {
+ // For projection operators, we use the verbatim string encoding of the element.
+ encodeUserString(elt.toString(false, // includeFieldName
+ false), // full
+ keyBuilder);
+ }
+
+ encodeUserString(elt.fieldName(), keyBuilder);
+ }
+}
+} // namespace
+
+namespace canonical_query_encoder {
+
+CanonicalQuery::QueryShapeString encode(const CanonicalQuery& cq) {
+ StringBuilder keyBuilder;
+ encodeKeyForMatch(cq.root(), &keyBuilder);
+ encodeKeyForSort(cq.getQueryRequest().getSort(), &keyBuilder);
+ encodeKeyForProj(cq.getQueryRequest().getProj(), &keyBuilder);
+ encodeCollation(cq.getCollator(), &keyBuilder);
+
+ return keyBuilder.str();
+}
+
+uint32_t computeHash(StringData key) {
+ return SimpleStringDataComparator::kInstance.hash(key);
+}
+} // namespace canonical_query_encoder
+} // namespace mongo
diff --git a/src/mongo/db/query/canonical_query_encoder.h b/src/mongo/db/query/canonical_query_encoder.h
new file mode 100644
index 00000000000..d0019ba08c9
--- /dev/null
+++ b/src/mongo/db/query/canonical_query_encoder.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright (C) 2018-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/query/plan_cache.h"
+
+namespace mongo {
+namespace canonical_query_encoder {
+/**
+ * Encode the given CanonicalQuery into a string representation which represents the shape of the
+ * query. This is done by encoding the match, projection and sort and stripping the values from the
+ * match. Two queries with the same shape may not necessarily be able to use the same plan, so the
+ * plan cache has to add information to discriminate between queries with the same shape.
+ */
+CanonicalQuery::QueryShapeString encode(const CanonicalQuery& cq);
+
+/**
+ * Returns a hash of the given key (produced from either a QueryShapeString or a PlanCacheKey).
+ */
+uint32_t computeHash(StringData key);
+}
+}
diff --git a/src/mongo/db/query/canonical_query_encoder_test.cpp b/src/mongo/db/query/canonical_query_encoder_test.cpp
new file mode 100644
index 00000000000..672336e090b
--- /dev/null
+++ b/src/mongo/db/query/canonical_query_encoder_test.cpp
@@ -0,0 +1,286 @@
+/**
+ * Copyright (C) 2018-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/query/canonical_query_encoder.h"
+
+#include "mongo/db/jsobj.h"
+#include "mongo/db/json.h"
+#include "mongo/db/pipeline/expression_context_for_test.h"
+#include "mongo/db/query/canonical_query.h"
+#include "mongo/db/query/query_test_service_context.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/assert_util.h"
+
+
+namespace mongo {
+namespace {
+
+using std::unique_ptr;
+
+static const NamespaceString nss("testdb.testcoll");
+
+/**
+ * Utility functions to create a CanonicalQuery
+ */
+unique_ptr<CanonicalQuery> canonicalize(BSONObj query,
+ BSONObj sort,
+ BSONObj proj,
+ BSONObj collation) {
+ QueryTestServiceContext serviceContext;
+ auto opCtx = serviceContext.makeOperationContext();
+
+ auto qr = stdx::make_unique<QueryRequest>(nss);
+ qr->setFilter(query);
+ qr->setSort(sort);
+ qr->setProj(proj);
+ qr->setCollation(collation);
+ const boost::intrusive_ptr<ExpressionContext> expCtx;
+ auto statusWithCQ =
+ CanonicalQuery::canonicalize(opCtx.get(),
+ std::move(qr),
+ expCtx,
+ ExtensionsCallbackNoop(),
+ MatchExpressionParser::kAllowAllSpecialFeatures);
+ ASSERT_OK(statusWithCQ.getStatus());
+ return std::move(statusWithCQ.getValue());
+}
+
+unique_ptr<CanonicalQuery> canonicalize(const char* queryStr) {
+ BSONObj queryObj = fromjson(queryStr);
+ return canonicalize(queryObj, {}, {}, {});
+}
+
+
+/**
+ * Test functions for computeKey, when no indexes are present. Cache keys are intentionally
+ * obfuscated and are meaningful only within the current lifetime of the server process. Users
+ * should treat plan cache keys as opaque.
+ */
+
+void testComputeKey(const CanonicalQuery& cq, const char* expectedStr) {
+ const auto key = cq.encodeKey();
+ StringData expectedKey(expectedStr);
+ if (key != expectedKey) {
+ str::stream ss;
+ ss << "Unexpected plan cache key. Expected: " << expectedKey << ". Actual: " << key
+ << ". Query: " << cq.toString();
+ FAIL(ss);
+ }
+}
+
+void testComputeKey(BSONObj query, BSONObj sort, BSONObj proj, const char* expectedStr) {
+ BSONObj collation;
+ unique_ptr<CanonicalQuery> cq(canonicalize(query, sort, proj, collation));
+ testComputeKey(*cq, expectedStr);
+}
+
+void testComputeKey(const char* queryStr,
+ const char* sortStr,
+ const char* projStr,
+ const char* expectedStr) {
+ testComputeKey(fromjson(queryStr), fromjson(sortStr), fromjson(projStr), expectedStr);
+}
+
+TEST(CanonicalQueryEncoderTest, ComputeKey) {
+ // Generated cache keys should be treated as opaque to the user.
+
+ // No sorts
+ testComputeKey("{}", "{}", "{}", "an");
+ testComputeKey("{$or: [{a: 1}, {b: 2}]}", "{}", "{}", "or[eqa,eqb]");
+ testComputeKey("{$or: [{a: 1}, {b: 1}, {c: 1}], d: 1}", "{}", "{}", "an[or[eqa,eqb,eqc],eqd]");
+ testComputeKey("{$or: [{a: 1}, {b: 1}], c: 1, d: 1}", "{}", "{}", "an[or[eqa,eqb],eqc,eqd]");
+ testComputeKey("{a: 1, b: 1, c: 1}", "{}", "{}", "an[eqa,eqb,eqc]");
+ testComputeKey("{a: 1, beqc: 1}", "{}", "{}", "an[eqa,eqbeqc]");
+ testComputeKey("{ap1a: 1}", "{}", "{}", "eqap1a");
+ testComputeKey("{aab: 1}", "{}", "{}", "eqaab");
+
+ // With sort
+ testComputeKey("{}", "{a: 1}", "{}", "an~aa");
+ testComputeKey("{}", "{a: -1}", "{}", "an~da");
+ testComputeKey("{}",
+ "{a: {$meta: 'textScore'}}",
+ "{a: {$meta: 'textScore'}}",
+ "an~ta|{ $meta: \"textScore\" }a");
+ testComputeKey("{a: 1}", "{b: 1}", "{}", "eqa~ab");
+
+ // With projection
+ testComputeKey("{}", "{}", "{a: 1}", "an|ia");
+ testComputeKey("{}", "{}", "{a: -1}", "an|ia");
+ testComputeKey("{}", "{}", "{a: -1.0}", "an|ia");
+ testComputeKey("{}", "{}", "{a: true}", "an|ia");
+ testComputeKey("{}", "{}", "{a: 0}", "an|ea");
+ testComputeKey("{}", "{}", "{a: false}", "an|ea");
+ testComputeKey("{}", "{}", "{a: 99}", "an|ia");
+ testComputeKey("{}", "{}", "{a: 'foo'}", "an|ia");
+ testComputeKey("{}", "{}", "{a: {$slice: [3, 5]}}", "an|{ $slice: \\[ 3\\, 5 \\] }a");
+ testComputeKey("{}", "{}", "{a: {$elemMatch: {x: 2}}}", "an|{ $elemMatch: { x: 2 } }a");
+ testComputeKey("{}", "{}", "{a: ObjectId('507f191e810c19729de860ea')}", "an|ia");
+ testComputeKey("{a: 1}", "{}", "{'a.$': 1}", "eqa|ia.$");
+ testComputeKey("{a: 1}", "{}", "{a: 1}", "eqa|ia");
+
+ // Projection should be order-insensitive
+ testComputeKey("{}", "{}", "{a: 1, b: 1}", "an|iaib");
+ testComputeKey("{}", "{}", "{b: 1, a: 1}", "an|iaib");
+
+ // With or-elimination and projection
+ testComputeKey("{$or: [{a: 1}]}", "{}", "{_id: 0, a: 1}", "eqa|e_idia");
+ testComputeKey("{$or: [{a: 1}]}", "{}", "{'a.$': 1}", "eqa|ia.$");
+}
+
+// Delimiters found in user field names or non-standard projection field values
+// must be escaped.
+TEST(CanonicalQueryEncoderTest, ComputeKeyEscaped) {
+ // Field name in query.
+ testComputeKey("{'a,[]~|<>': 1}", "{}", "{}", "eqa\\,\\[\\]\\~\\|<>");
+
+ // Field name in sort.
+ testComputeKey("{}", "{'a,[]~|<>': 1}", "{}", "an~aa\\,\\[\\]\\~\\|<>");
+
+ // Field name in projection.
+ testComputeKey("{}", "{}", "{'a,[]~|<>': 1}", "an|ia\\,\\[\\]\\~\\|<>");
+
+ // Value in projection.
+ testComputeKey("{}", "{}", "{a: 'foo,[]~|<>'}", "an|ia");
+}
+
+// Cache keys for $geoWithin queries with legacy and GeoJSON coordinates should
+// not be the same.
+TEST(CanonicalQueryEncoderTest, ComputeKeyGeoWithin) {
+ PlanCache planCache;
+
+ // Legacy coordinates.
+ unique_ptr<CanonicalQuery> cqLegacy(
+ canonicalize("{a: {$geoWithin: "
+ "{$box: [[-180, -90], [180, 90]]}}}"));
+ // GeoJSON coordinates.
+ unique_ptr<CanonicalQuery> cqNew(
+ canonicalize("{a: {$geoWithin: "
+ "{$geometry: {type: 'Polygon', coordinates: "
+ "[[[0, 0], [0, 90], [90, 0], [0, 0]]]}}}}"));
+ ASSERT_NOT_EQUALS(planCache.computeKey(*cqLegacy), planCache.computeKey(*cqNew));
+}
+
+// GEO_NEAR cache keys should include information on geometry and CRS in addition
+// to the match type and field name.
+TEST(CanonicalQueryEncoderTest, ComputeKeyGeoNear) {
+ testComputeKey("{a: {$near: [0,0], $maxDistance:0.3 }}", "{}", "{}", "gnanrfl");
+ testComputeKey("{a: {$nearSphere: [0,0], $maxDistance: 0.31 }}", "{}", "{}", "gnanssp");
+ testComputeKey(
+ "{a: {$geoNear: {$geometry: {type: 'Point', coordinates: [0,0]},"
+ "$maxDistance:100}}}",
+ "{}",
+ "{}",
+ "gnanrsp");
+}
+
+TEST(CanonicalQueryEncoderTest, ComputeKeyRegexDependsOnFlags) {
+ testComputeKey("{a: {$regex: \"sometext\"}}", "{}", "{}", "rea");
+ testComputeKey("{a: {$regex: \"sometext\", $options: \"\"}}", "{}", "{}", "rea");
+
+ testComputeKey("{a: {$regex: \"sometext\", $options: \"s\"}}", "{}", "{}", "rea/s/");
+ testComputeKey("{a: {$regex: \"sometext\", $options: \"ms\"}}", "{}", "{}", "rea/ms/");
+
+ // Test that the ordering of $options doesn't matter.
+ testComputeKey("{a: {$regex: \"sometext\", $options: \"im\"}}", "{}", "{}", "rea/im/");
+ testComputeKey("{a: {$regex: \"sometext\", $options: \"mi\"}}", "{}", "{}", "rea/im/");
+
+ // Test that only the options affect the key. Two regex match expressions with the same options
+ // but different $regex values should have the same shape.
+ testComputeKey("{a: {$regex: \"abc\", $options: \"mi\"}}", "{}", "{}", "rea/im/");
+ testComputeKey("{a: {$regex: \"efg\", $options: \"mi\"}}", "{}", "{}", "rea/im/");
+
+ testComputeKey("{a: {$regex: \"\", $options: \"ms\"}}", "{}", "{}", "rea/ms/");
+ testComputeKey("{a: {$regex: \"___\", $options: \"ms\"}}", "{}", "{}", "rea/ms/");
+
+ // Test that only valid regex flags contribute to the plan cache key encoding.
+ testComputeKey(BSON("a" << BSON("$regex"
+ << "abc"
+ << "$options"
+ << "abcdefghijklmnopqrstuvwxyz")),
+ {},
+ {},
+ "rea/imsx/");
+ testComputeKey("{a: /abc/gim}", "{}", "{}", "rea/im/");
+}
+
+TEST(CanonicalQueryEncoderTest, ComputeKeyMatchInDependsOnPresenceOfRegexAndFlags) {
+ // Test that an $in containing a single regex is unwrapped to $regex.
+ testComputeKey("{a: {$in: [/foo/]}}", "{}", "{}", "rea");
+ testComputeKey("{a: {$in: [/foo/i]}}", "{}", "{}", "rea/i/");
+
+ // Test that an $in with no regexes does not include any regex information.
+ testComputeKey("{a: {$in: [1, 'foo']}}", "{}", "{}", "ina");
+
+ // Test that an $in with a regex encodes the presence of the regex.
+ testComputeKey("{a: {$in: [1, /foo/]}}", "{}", "{}", "ina_re");
+
+ // Test that an $in with a regex encodes the presence of the regex and its flags.
+ testComputeKey("{a: {$in: [1, /foo/is]}}", "{}", "{}", "ina_re/is/");
+
+ // Test that the computed key is invariant to the order of the flags within each regex.
+ testComputeKey("{a: {$in: [1, /foo/si]}}", "{}", "{}", "ina_re/is/");
+
+ // Test that an $in with multiple regexes encodes all unique flags.
+ testComputeKey("{a: {$in: [1, /foo/i, /bar/m, /baz/s]}}", "{}", "{}", "ina_re/ims/");
+
+ // Test that an $in with multiple regexes deduplicates identical flags.
+ testComputeKey(
+ "{a: {$in: [1, /foo/i, /bar/m, /baz/s, /qux/i, /quux/s]}}", "{}", "{}", "ina_re/ims/");
+
+ // Test that the computed key is invariant to the ordering of the flags across regexes.
+ testComputeKey("{a: {$in: [1, /foo/ism, /bar/msi, /baz/im, /qux/si, /quux/im]}}",
+ "{}",
+ "{}",
+ "ina_re/ims/");
+ testComputeKey("{a: {$in: [1, /foo/msi, /bar/ism, /baz/is, /qux/mi, /quux/im]}}",
+ "{}",
+ "{}",
+ "ina_re/ims/");
+
+ // Test that $not-$in-$regex similarly records the presence and flags of any regexes.
+ testComputeKey("{a: {$not: {$in: [1, 'foo']}}}", "{}", "{}", "nt[ina]");
+ testComputeKey("{a: {$not: {$in: [1, /foo/]}}}", "{}", "{}", "nt[ina_re]");
+ testComputeKey(
+ "{a: {$not: {$in: [1, /foo/i, /bar/i, /baz/msi]}}}", "{}", "{}", "nt[ina_re/ims/]");
+
+ // Test that a $not-$in containing a single regex is unwrapped to $not-$regex.
+ testComputeKey("{a: {$not: {$in: [/foo/]}}}", "{}", "{}", "nt[rea]");
+ testComputeKey("{a: {$not: {$in: [/foo/i]}}}", "{}", "{}", "nt[rea/i/]");
+}
+
+TEST(CanonicalQueryEncoderTest, CheckCollationIsEncoded) {
+
+ unique_ptr<CanonicalQuery> cq(canonicalize(
+ fromjson("{a: 1, b: 1}"), {}, {}, fromjson("{locale: 'mock_reverse_string'}")));
+
+ testComputeKey(*cq, "an[eqa,eqb]#mock_reverse_string02300000");
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/query/collation/collation_spec.h b/src/mongo/db/query/collation/collation_spec.h
index 69d01b40b19..6579dd4cc18 100644
--- a/src/mongo/db/query/collation/collation_spec.h
+++ b/src/mongo/db/query/collation/collation_spec.h
@@ -49,7 +49,10 @@ struct CollationSpec {
kLower,
// Use default sorting behavior for the strength.
- kOff
+ kOff,
+
+ // Update this if you add another value.
+ kMax = kOff,
};
// Controls the set of characteristics used to compare strings.
@@ -70,7 +73,10 @@ struct CollationSpec {
// Equal Unicode point values.
// E.g. Hebrew cantillation marks are only distinguished at this level.
- kIdentical = 5
+ kIdentical = 5,
+
+ // Update this if you add another value.
+ kMax = kIdentical,
};
// Controls whether spaces and punctuation are considered base characters.
@@ -80,7 +86,10 @@ struct CollationSpec {
// Spaces and punctuation are not considered base characters, and are only distinguished at
// strength > 3.
- kShifted
+ kShifted,
+
+ // Update this if you add another value.
+ kMax = kShifted,
};
// Controls which characters are affected by alternate=shifted.
@@ -89,7 +98,10 @@ struct CollationSpec {
kPunct,
// Only spaces are affected
- kSpace
+ kSpace,
+
+ // Update this if you add another value.
+ kMax = kSpace,
};
diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp
index 95b07d91c68..92586ac1c47 100644
--- a/src/mongo/db/query/explain.cpp
+++ b/src/mongo/db/query/explain.cpp
@@ -45,6 +45,7 @@
#include "mongo/db/exec/text.h"
#include "mongo/db/exec/working_set_common.h"
#include "mongo/db/keypattern.h"
+#include "mongo/db/query/canonical_query_encoder.h"
#include "mongo/db/query/get_executor.h"
#include "mongo/db/query/plan_executor.h"
#include "mongo/db/query/plan_summary_stats.h"
@@ -645,13 +646,17 @@ void Explain::generatePlannerInfo(PlanExecutor* exec,
// field will always be false in the case of EOF or idhack plans.
bool indexFilterSet = false;
boost::optional<uint32_t> queryHash;
+ boost::optional<uint32_t> planCacheKeyHash;
if (collection && exec->getCanonicalQuery()) {
const CollectionInfoCache* infoCache = collection->infoCache();
const QuerySettings* querySettings = infoCache->getQuerySettings();
PlanCacheKey planCacheKey =
infoCache->getPlanCache()->computeKey(*exec->getCanonicalQuery());
- queryHash = PlanCache::computeQueryHash(planCacheKey);
- if (auto allowedIndicesFilter = querySettings->getAllowedIndicesFilter(planCacheKey)) {
+ planCacheKeyHash = canonical_query_encoder::computeHash(planCacheKey.toString());
+ queryHash = canonical_query_encoder::computeHash(planCacheKey.getStableKeyStringData());
+
+ if (auto allowedIndicesFilter =
+ querySettings->getAllowedIndicesFilter(planCacheKey.getStableKey())) {
// Found an index filter set on the query shape.
indexFilterSet = true;
}
@@ -675,6 +680,10 @@ void Explain::generatePlannerInfo(PlanExecutor* exec,
plannerBob.append("queryHash", unsignedIntToFixedLengthHex(*queryHash));
}
+ if (planCacheKeyHash) {
+ plannerBob.append("planCacheKey", unsignedIntToFixedLengthHex(*planCacheKeyHash));
+ }
+
BSONObjBuilder winningPlanBob(plannerBob.subobjStart("winningPlan"));
const auto winnerStats = getWinningPlanStatsTree(exec);
statsToBSON(*winnerStats.get(), &winningPlanBob, ExplainOptions::Verbosity::kQueryPlanner);
@@ -999,6 +1008,7 @@ void Explain::planCacheEntryToBSON(const PlanCacheEntry& entry, BSONObjBuilder*
}
shapeBuilder.doneFast();
out->append("queryHash", unsignedIntToFixedLengthHex(entry.queryHash));
+ out->append("planCacheKey", unsignedIntToFixedLengthHex(entry.planCacheKey));
// Append whether or not the entry is active.
out->append("isActive", entry.isActive);
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 8db806ad93e..27adf721c57 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -60,6 +60,7 @@
#include "mongo/db/matcher/extensions_callback_real.h"
#include "mongo/db/ops/update_lifecycle.h"
#include "mongo/db/query/canonical_query.h"
+#include "mongo/db/query/canonical_query_encoder.h"
#include "mongo/db/query/collation/collator_factory_interface.h"
#include "mongo/db/query/explain.h"
#include "mongo/db/query/index_bounds_builder.h"
@@ -198,13 +199,12 @@ void fillOutPlannerParams(OperationContext* opCtx,
// Ignore index filters when it is possible to use the id-hack.
if (!IDHackStage::supportsQuery(collection, *canonicalQuery)) {
QuerySettings* querySettings = collection->infoCache()->getQuerySettings();
- PlanCacheKey planCacheKey =
- collection->infoCache()->getPlanCache()->computeKey(*canonicalQuery);
+ const auto key = canonicalQuery->encodeKey();
// Filter index catalog if index filters are specified for query.
// Also, signal to planner that application hint should be ignored.
if (boost::optional<AllowedIndicesFilter> allowedIndicesFilter =
- querySettings->getAllowedIndicesFilter(planCacheKey)) {
+ querySettings->getAllowedIndicesFilter(key)) {
filterAllowedIndexEntries(*allowedIndicesFilter, &plannerParams->indices);
plannerParams->indexFiltersApplied = true;
}
@@ -399,10 +399,13 @@ StatusWith<PrepareExecutionResult> prepareExecution(OperationContext* opCtx,
// Check that the query should be cached.
if (collection->infoCache()->getPlanCache()->shouldCacheQuery(*canonicalQuery)) {
- auto planCacheKey = collection->infoCache()->getPlanCache()->computeKey(*canonicalQuery);
-
// Fill in opDebug information.
- CurOp::get(opCtx)->debug().queryHash = PlanCache::computeQueryHash(planCacheKey);
+ const auto planCacheKey =
+ collection->infoCache()->getPlanCache()->computeKey(*canonicalQuery);
+ CurOp::get(opCtx)->debug().queryHash =
+ canonical_query_encoder::computeHash(planCacheKey.getStableKeyStringData());
+ CurOp::get(opCtx)->debug().planCacheKey =
+ canonical_query_encoder::computeHash(planCacheKey.toString());
// Try to look up a cached solution for the query.
if (auto cs =
diff --git a/src/mongo/db/query/get_executor_test.cpp b/src/mongo/db/query/get_executor_test.cpp
index 7522621e873..592cfaea3c9 100644
--- a/src/mongo/db/query/get_executor_test.cpp
+++ b/src/mongo/db/query/get_executor_test.cpp
@@ -97,10 +97,10 @@ void testAllowedIndices(std::vector<IndexEntry> indexes,
// getAllowedIndices should return false when query shape is not yet in query settings.
unique_ptr<CanonicalQuery> cq(canonicalize("{a: 1}", "{}", "{}"));
- PlanCacheKey key = planCache.computeKey(*cq);
+ const auto key = cq->encodeKey();
ASSERT_FALSE(querySettings.getAllowedIndicesFilter(key));
- querySettings.setAllowedIndices(*cq, key, keyPatterns, indexNames);
+ querySettings.setAllowedIndices(*cq, keyPatterns, indexNames);
// Index entry vector should contain 1 entry after filtering.
boost::optional<AllowedIndicesFilter> hasFilter = querySettings.getAllowedIndicesFilter(key);
ASSERT_TRUE(hasFilter);
diff --git a/src/mongo/db/query/lru_key_value.h b/src/mongo/db/query/lru_key_value.h
index 524b87c6fc0..e803fec336e 100644
--- a/src/mongo/db/query/lru_key_value.h
+++ b/src/mongo/db/query/lru_key_value.h
@@ -58,7 +58,7 @@ namespace mongo {
* TODO: We could move this into the util/ directory and do any cleanup necessary to make it
* fully general.
*/
-template <class K, class V>
+template <class K, class V, class KeyHasher = std::hash<K>>
class LRUKeyValue {
public:
LRUKeyValue(size_t maxSize) : _maxSize(maxSize), _currentSize(0){};
@@ -73,7 +73,7 @@ public:
typedef typename KVList::iterator KVListIt;
typedef typename KVList::const_iterator KVListConstIt;
- typedef stdx::unordered_map<K, KVListIt> KVMap;
+ typedef stdx::unordered_map<K, KVListIt, KeyHasher> KVMap;
typedef typename KVMap::const_iterator KVMapConstIt;
/**
diff --git a/src/mongo/db/query/plan_cache.cpp b/src/mongo/db/query/plan_cache.cpp
index 44c11138e98..0786f995c17 100644
--- a/src/mongo/db/query/plan_cache.cpp
+++ b/src/mongo/db/query/plan_cache.cpp
@@ -42,10 +42,10 @@
#include <vector>
#include "mongo/base/owned_pointer_vector.h"
-#include "mongo/base/simple_string_data_comparator.h"
#include "mongo/base/string_data_comparator_interface.h"
#include "mongo/db/matcher/expression_array.h"
#include "mongo/db/matcher/expression_geo.h"
+#include "mongo/db/query/canonical_query_encoder.h"
#include "mongo/db/query/collation/collator_interface.h"
#include "mongo/db/query/plan_ranker.h"
#include "mongo/db/query/query_knobs.h"
@@ -59,261 +59,8 @@ namespace mongo {
namespace {
// Delimiters for cache key encoding.
-const char kEncodeChildrenBegin = '[';
-const char kEncodeChildrenEnd = ']';
-const char kEncodeChildrenSeparator = ',';
-const char kEncodeCollationSection = '#';
const char kEncodeDiscriminatorsBegin = '<';
const char kEncodeDiscriminatorsEnd = '>';
-const char kEncodeProjectionSection = '|';
-const char kEncodeRegexFlagsSeparator = '/';
-const char kEncodeSortSection = '~';
-
-/**
- * Encode user-provided string. Cache key delimiters seen in the
- * user string are escaped with a backslash.
- */
-void encodeUserString(StringData s, StringBuilder* keyBuilder) {
- for (size_t i = 0; i < s.size(); ++i) {
- char c = s[i];
- switch (c) {
- case kEncodeChildrenBegin:
- case kEncodeChildrenEnd:
- case kEncodeChildrenSeparator:
- case kEncodeCollationSection:
- case kEncodeDiscriminatorsBegin:
- case kEncodeDiscriminatorsEnd:
- case kEncodeProjectionSection:
- case kEncodeRegexFlagsSeparator:
- case kEncodeSortSection:
- case '\\':
- *keyBuilder << '\\';
- // Fall through to default case.
- default:
- *keyBuilder << c;
- }
- }
-}
-
-/**
- * String encoding of MatchExpression::MatchType.
- */
-const char* encodeMatchType(MatchExpression::MatchType mt) {
- switch (mt) {
- case MatchExpression::AND:
- return "an";
-
- case MatchExpression::OR:
- return "or";
-
- case MatchExpression::NOR:
- return "nr";
-
- case MatchExpression::NOT:
- return "nt";
-
- case MatchExpression::ELEM_MATCH_OBJECT:
- return "eo";
-
- case MatchExpression::ELEM_MATCH_VALUE:
- return "ev";
-
- case MatchExpression::SIZE:
- return "sz";
-
- case MatchExpression::LTE:
- return "le";
-
- case MatchExpression::LT:
- return "lt";
-
- case MatchExpression::EQ:
- return "eq";
-
- case MatchExpression::GT:
- return "gt";
-
- case MatchExpression::GTE:
- return "ge";
-
- case MatchExpression::REGEX:
- return "re";
-
- case MatchExpression::MOD:
- return "mo";
-
- case MatchExpression::EXISTS:
- return "ex";
-
- case MatchExpression::MATCH_IN:
- return "in";
-
- case MatchExpression::TYPE_OPERATOR:
- return "ty";
-
- case MatchExpression::GEO:
- return "go";
-
- case MatchExpression::WHERE:
- return "wh";
-
- case MatchExpression::ALWAYS_FALSE:
- return "af";
-
- case MatchExpression::ALWAYS_TRUE:
- return "at";
-
- case MatchExpression::GEO_NEAR:
- return "gn";
-
- case MatchExpression::TEXT:
- return "te";
-
- case MatchExpression::BITS_ALL_SET:
- return "ls";
-
- case MatchExpression::BITS_ALL_CLEAR:
- return "lc";
-
- case MatchExpression::BITS_ANY_SET:
- return "ys";
-
- case MatchExpression::BITS_ANY_CLEAR:
- return "yc";
-
- case MatchExpression::EXPRESSION:
- return "xp";
-
- case MatchExpression::INTERNAL_EXPR_EQ:
- return "ee";
-
- case MatchExpression::INTERNAL_SCHEMA_ALL_ELEM_MATCH_FROM_INDEX:
- return "internalSchemaAllElemMatchFromIndex";
-
- case MatchExpression::INTERNAL_SCHEMA_ALLOWED_PROPERTIES:
- return "internalSchemaAllowedProperties";
-
- case MatchExpression::INTERNAL_SCHEMA_COND:
- return "internalSchemaCond";
-
- case MatchExpression::INTERNAL_SCHEMA_EQ:
- return "internalSchemaEq";
-
- case MatchExpression::INTERNAL_SCHEMA_FMOD:
- return "internalSchemaFmod";
-
- case MatchExpression::INTERNAL_SCHEMA_MIN_ITEMS:
- return "internalSchemaMinItems";
-
- case MatchExpression::INTERNAL_SCHEMA_MAX_ITEMS:
- return "internalSchemaMaxItems";
-
- case MatchExpression::INTERNAL_SCHEMA_UNIQUE_ITEMS:
- return "internalSchemaUniqueItems";
-
- case MatchExpression::INTERNAL_SCHEMA_XOR:
- return "internalSchemaXor";
-
- case MatchExpression::INTERNAL_SCHEMA_OBJECT_MATCH:
- return "internalSchemaObjectMatch";
-
- case MatchExpression::INTERNAL_SCHEMA_ROOT_DOC_EQ:
- return "internalSchemaRootDocEq";
-
- case MatchExpression::INTERNAL_SCHEMA_MIN_LENGTH:
- return "internalSchemaMinLength";
-
- case MatchExpression::INTERNAL_SCHEMA_MAX_LENGTH:
- return "internalSchemaMaxLength";
-
- case MatchExpression::INTERNAL_SCHEMA_MIN_PROPERTIES:
- return "internalSchemaMinProperties";
-
- case MatchExpression::INTERNAL_SCHEMA_MAX_PROPERTIES:
- return "internalSchemaMaxProperties";
-
- case MatchExpression::INTERNAL_SCHEMA_MATCH_ARRAY_INDEX:
- return "internalSchemaMatchArrayIndex";
-
- case MatchExpression::INTERNAL_SCHEMA_TYPE:
- return "internalSchemaType";
-
- default:
- MONGO_UNREACHABLE;
- }
-}
-
-/**
- * Encodes GEO match expression.
- * Encoding includes:
- * - type of geo query (within/intersect/near)
- * - geometry type
- * - CRS (flat or spherical)
- */
-void encodeGeoMatchExpression(const GeoMatchExpression* tree, StringBuilder* keyBuilder) {
- const GeoExpression& geoQuery = tree->getGeoExpression();
-
- // Type of geo query.
- switch (geoQuery.getPred()) {
- case GeoExpression::WITHIN:
- *keyBuilder << "wi";
- break;
- case GeoExpression::INTERSECT:
- *keyBuilder << "in";
- break;
- case GeoExpression::INVALID:
- *keyBuilder << "id";
- break;
- }
-
- // Geometry type.
- // Only one of the shared_ptrs in GeoContainer may be non-NULL.
- *keyBuilder << geoQuery.getGeometry().getDebugType();
-
- // CRS (flat or spherical)
- if (FLAT == geoQuery.getGeometry().getNativeCRS()) {
- *keyBuilder << "fl";
- } else if (SPHERE == geoQuery.getGeometry().getNativeCRS()) {
- *keyBuilder << "sp";
- } else if (STRICT_SPHERE == geoQuery.getGeometry().getNativeCRS()) {
- *keyBuilder << "ss";
- } else {
- error() << "unknown CRS type " << (int)geoQuery.getGeometry().getNativeCRS()
- << " in geometry of type " << geoQuery.getGeometry().getDebugType();
- MONGO_UNREACHABLE;
- }
-}
-
-/**
- * Encodes GEO_NEAR match expression.
- * Encode:
- * - isNearSphere
- * - CRS (flat or spherical)
- */
-void encodeGeoNearMatchExpression(const GeoNearMatchExpression* tree, StringBuilder* keyBuilder) {
- const GeoNearExpression& nearQuery = tree->getData();
-
- // isNearSphere
- *keyBuilder << (nearQuery.isNearSphere ? "ns" : "nr");
-
- // CRS (flat or spherical or strict-winding spherical)
- switch (nearQuery.centroid->crs) {
- case FLAT:
- *keyBuilder << "fl";
- break;
- case SPHERE:
- *keyBuilder << "sp";
- break;
- case STRICT_SPHERE:
- *keyBuilder << "ss";
- break;
- case UNSET:
- error() << "unknown CRS type " << (int)nearQuery.centroid->crs
- << " in point geometry for near query";
- MONGO_UNREACHABLE;
- break;
- }
-}
void encodeIndexabilityForDiscriminators(const MatchExpression* tree,
const IndexToDiscriminatorMap& discriminators,
@@ -326,65 +73,37 @@ void encodeIndexabilityForDiscriminators(const MatchExpression* tree,
void encodeIndexability(const MatchExpression* tree,
const PlanCacheIndexabilityState& indexabilityState,
StringBuilder* keyBuilder) {
- if (tree->path().empty()) {
- return;
+ if (!tree->path().empty()) {
+ const IndexToDiscriminatorMap& discriminators =
+ indexabilityState.getDiscriminators(tree->path());
+ IndexToDiscriminatorMap wildcardDiscriminators =
+ indexabilityState.buildWildcardDiscriminators(tree->path());
+ if (!discriminators.empty() || !wildcardDiscriminators.empty()) {
+ *keyBuilder << kEncodeDiscriminatorsBegin;
+ // For each discriminator on this path, append the character '0' or '1'.
+ encodeIndexabilityForDiscriminators(tree, discriminators, keyBuilder);
+ encodeIndexabilityForDiscriminators(tree, wildcardDiscriminators, keyBuilder);
+
+ *keyBuilder << kEncodeDiscriminatorsEnd;
+ }
}
- const IndexToDiscriminatorMap& discriminators =
- indexabilityState.getDiscriminators(tree->path());
- IndexToDiscriminatorMap wildcardDiscriminators =
- indexabilityState.buildWildcardDiscriminators(tree->path());
- if (discriminators.empty() && wildcardDiscriminators.empty()) {
- return;
+ for (size_t i = 0; i < tree->numChildren(); ++i) {
+ encodeIndexability(tree->getChild(i), indexabilityState, keyBuilder);
}
-
- *keyBuilder << kEncodeDiscriminatorsBegin;
- // For each discriminator on this path, append the character '0' or '1'.
- encodeIndexabilityForDiscriminators(tree, discriminators, keyBuilder);
- encodeIndexabilityForDiscriminators(tree, wildcardDiscriminators, keyBuilder);
-
- *keyBuilder << kEncodeDiscriminatorsEnd;
}
-template <class RegexIterator>
-void encodeRegexFlagsForMatch(RegexIterator first, RegexIterator last, StringBuilder* keyBuilder) {
- // We sort the flags, so that queries with the same regex flags in different orders will have
- // the same shape. We then add them to a set, so that identical flags across multiple regexes
- // will be deduplicated and the resulting set of unique flags will be ordered consistently.
- // Regex flags are not validated at parse-time, so we also ensure that only valid flags
- // contribute to the encoding.
- static const auto maxValidFlags = RegexMatchExpression::kValidRegexFlags.size();
- std::set<char> flags;
- for (auto it = first; it != last && flags.size() < maxValidFlags; ++it) {
- auto inserter = std::inserter(flags, flags.begin());
- std::copy_if((*it)->getFlags().begin(), (*it)->getFlags().end(), inserter, [](auto flag) {
- return RegexMatchExpression::kValidRegexFlags.count(flag);
- });
- }
- if (!flags.empty()) {
- *keyBuilder << kEncodeRegexFlagsSeparator;
- for (const auto& flag : flags) {
- invariant(RegexMatchExpression::kValidRegexFlags.count(flag));
- encodeUserString(StringData(&flag, 1), keyBuilder);
- }
- *keyBuilder << kEncodeRegexFlagsSeparator;
- }
-}
+} // namespace
-// Helper overload to prepare a vector of unique_ptrs for the heavy-lifting function above.
-void encodeRegexFlagsForMatch(const std::vector<std::unique_ptr<RegexMatchExpression>>& regexes,
- StringBuilder* keyBuilder) {
- const auto transformFunc = [](const auto& regex) { return regex.get(); };
- encodeRegexFlagsForMatch(boost::make_transform_iterator(regexes.begin(), transformFunc),
- boost::make_transform_iterator(regexes.end(), transformFunc),
- keyBuilder);
+std::ostream& operator<<(std::ostream& stream, const PlanCacheKey& key) {
+ stream << key.stringData();
+ return stream;
}
-// Helper that passes a range covering the entire source set into the heavy-lifting function above.
-void encodeRegexFlagsForMatch(const std::vector<const RegexMatchExpression*>& regexes,
- StringBuilder* keyBuilder) {
- encodeRegexFlagsForMatch(regexes.begin(), regexes.end(), keyBuilder);
+
+StringBuilder& operator<<(StringBuilder& builder, const PlanCacheKey& key) {
+ builder << key.stringData();
+ return builder;
}
-} // namespace
//
// Cache-related functions for CanonicalQuery
@@ -469,8 +188,12 @@ CachedSolution::~CachedSolution() {
PlanCacheEntry::PlanCacheEntry(const std::vector<QuerySolution*>& solutions,
PlanRankingDecision* why,
- uint32_t queryHash)
- : plannerData(solutions.size()), queryHash(queryHash), decision(why) {
+ uint32_t queryHash,
+ uint32_t planCacheKey)
+ : plannerData(solutions.size()),
+ queryHash(queryHash),
+ planCacheKey(planCacheKey),
+ decision(why) {
invariant(why);
// The caller of this constructor is responsible for ensuring
@@ -497,8 +220,11 @@ PlanCacheEntry* PlanCacheEntry::clone() const {
qs->cacheData.reset(plannerData[i]->clone());
solutions.push_back(std::move(qs));
}
- PlanCacheEntry* entry = new PlanCacheEntry(
- transitional_tools_do_not_use::unspool_vector(solutions), decision->clone(), queryHash);
+ PlanCacheEntry* entry =
+ new PlanCacheEntry(transitional_tools_do_not_use::unspool_vector(solutions),
+ decision->clone(),
+ queryHash,
+ planCacheKey);
// Copy query shape.
entry->query = query.getOwned();
@@ -647,141 +373,6 @@ std::unique_ptr<CachedSolution> PlanCache::getCacheEntryIfActive(const PlanCache
}
/**
- * Traverses expression tree pre-order.
- * Appends an encoding of each node's match type and path name
- * to the output stream.
- */
-void PlanCache::encodeKeyForMatch(const MatchExpression* tree, StringBuilder* keyBuilder) const {
- // Encode match type and path.
- *keyBuilder << encodeMatchType(tree->matchType());
-
- encodeUserString(tree->path(), keyBuilder);
-
- // GEO and GEO_NEAR require additional encoding.
- if (MatchExpression::GEO == tree->matchType()) {
- encodeGeoMatchExpression(static_cast<const GeoMatchExpression*>(tree), keyBuilder);
- } else if (MatchExpression::GEO_NEAR == tree->matchType()) {
- encodeGeoNearMatchExpression(static_cast<const GeoNearMatchExpression*>(tree), keyBuilder);
- }
-
- // We encode regular expression flags such that different options produce different shapes.
- if (MatchExpression::REGEX == tree->matchType()) {
- encodeRegexFlagsForMatch({static_cast<const RegexMatchExpression*>(tree)}, keyBuilder);
- } else if (MatchExpression::MATCH_IN == tree->matchType()) {
- const auto* inMatch = static_cast<const InMatchExpression*>(tree);
- if (!inMatch->getRegexes().empty()) {
- // Append '_re' to distinguish an $in without regexes from an $in with regexes.
- encodeUserString("_re"_sd, keyBuilder);
- encodeRegexFlagsForMatch(inMatch->getRegexes(), keyBuilder);
- }
- }
-
- encodeIndexability(tree, _indexabilityState, keyBuilder);
-
- // Traverse child nodes.
- // Enclose children in [].
- if (tree->numChildren() > 0) {
- *keyBuilder << kEncodeChildrenBegin;
- }
- // Use comma to separate children encoding.
- for (size_t i = 0; i < tree->numChildren(); ++i) {
- if (i > 0) {
- *keyBuilder << kEncodeChildrenSeparator;
- }
- encodeKeyForMatch(tree->getChild(i), keyBuilder);
- }
-
- if (tree->numChildren() > 0) {
- *keyBuilder << kEncodeChildrenEnd;
- }
-}
-
-/**
- * Encodes sort order into cache key.
- * Sort order is normalized because it provided by
- * QueryRequest.
- */
-void PlanCache::encodeKeyForSort(const BSONObj& sortObj, StringBuilder* keyBuilder) const {
- if (sortObj.isEmpty()) {
- return;
- }
-
- *keyBuilder << kEncodeSortSection;
-
- BSONObjIterator it(sortObj);
- while (it.more()) {
- BSONElement elt = it.next();
- // $meta text score
- if (QueryRequest::isTextScoreMeta(elt)) {
- *keyBuilder << "t";
- }
- // Ascending
- else if (elt.numberInt() == 1) {
- *keyBuilder << "a";
- }
- // Descending
- else {
- *keyBuilder << "d";
- }
- encodeUserString(elt.fieldName(), keyBuilder);
-
- // Sort argument separator
- if (it.more()) {
- *keyBuilder << ",";
- }
- }
-}
-
-/**
- * Encodes parsed projection into cache key.
- * Does a simple toString() on each projected field
- * in the BSON object.
- * Orders the encoded elements in the projection by field name.
- * This handles all the special projection types ($meta, $elemMatch, etc.)
- */
-void PlanCache::encodeKeyForProj(const BSONObj& projObj, StringBuilder* keyBuilder) const {
- // Sorts the BSON elements by field name using a map.
- std::map<StringData, BSONElement> elements;
-
- BSONObjIterator it(projObj);
- while (it.more()) {
- BSONElement elt = it.next();
- StringData fieldName = elt.fieldNameStringData();
-
- // Internal callers may add $-prefixed fields to the projection. These are not part of a
- // user query, and therefore are not considered part of the cache key.
- if (fieldName[0] == '$') {
- continue;
- }
-
- elements[fieldName] = elt;
- }
-
- if (!elements.empty()) {
- *keyBuilder << kEncodeProjectionSection;
- }
-
- // Read elements in order of field name
- for (std::map<StringData, BSONElement>::const_iterator i = elements.begin();
- i != elements.end();
- ++i) {
- const BSONElement& elt = (*i).second;
-
- if (elt.type() != BSONType::Object) {
- // For inclusion/exclusion projections, we encode as "i" or "e".
- *keyBuilder << (elt.trueValue() ? "i" : "e");
- } else {
- // For projection operators, we use the verbatim string encoding of the element.
- encodeUserString(elt.toString(false, // includeFieldName
- false), // full
- keyBuilder);
- }
-
- encodeUserString(elt.fieldName(), keyBuilder);
- }
-}
-
-/**
* Given a query, and an (optional) current cache entry for its shape ('oldEntry'), determine
* whether:
* - We should create a new entry
@@ -789,14 +380,15 @@ void PlanCache::encodeKeyForProj(const BSONObj& projObj, StringBuilder* keyBuild
*/
PlanCache::NewEntryState PlanCache::getNewEntryState(const CanonicalQuery& query,
uint32_t queryHash,
+ uint32_t planCacheKey,
PlanCacheEntry* oldEntry,
size_t newWorks,
double growthCoefficient) {
NewEntryState res;
if (!oldEntry) {
LOG(1) << "Creating inactive cache entry for query shape " << redact(query.toStringShort())
- << " and queryHash " << unsignedIntToFixedLengthHex(queryHash)
- << " with works value " << newWorks;
+ << " queryHash " << unsignedIntToFixedLengthHex(queryHash) << " planCacheKey "
+ << unsignedIntToFixedLengthHex(planCacheKey) << " with works value " << newWorks;
res.shouldBeCreated = true;
res.shouldBeActive = false;
return res;
@@ -807,15 +399,17 @@ PlanCache::NewEntryState PlanCache::getNewEntryState(const CanonicalQuery& query
// occur if many MultiPlanners are run simultaneously.
LOG(1) << "Replacing active cache entry for query " << redact(query.toStringShort())
- << " and queryHash " << unsignedIntToFixedLengthHex(queryHash) << " with works "
- << oldEntry->works << " with a plan with works " << newWorks;
+ << " queryHash " << unsignedIntToFixedLengthHex(queryHash) << " planCacheKey "
+ << unsignedIntToFixedLengthHex(planCacheKey) << " with works " << oldEntry->works
+ << " with a plan with works " << newWorks;
res.shouldBeCreated = true;
res.shouldBeActive = true;
} else if (oldEntry->isActive) {
LOG(1) << "Attempt to write to the planCache for query " << redact(query.toStringShort())
- << " and queryHash " << unsignedIntToFixedLengthHex(queryHash)
- << " with a plan with works " << newWorks
- << " is a noop, since there's already a plan with works value " << oldEntry->works;
+ << " queryHash " << unsignedIntToFixedLengthHex(queryHash) << " planCacheKey "
+ << unsignedIntToFixedLengthHex(planCacheKey) << " with a plan with works "
+ << newWorks << " is a noop, since there's already a plan with works value "
+ << oldEntry->works;
// There is already an active cache entry with a higher works value.
// We do nothing.
res.shouldBeCreated = false;
@@ -832,8 +426,9 @@ PlanCache::NewEntryState PlanCache::getNewEntryState(const CanonicalQuery& query
oldEntry->works + 1u, static_cast<size_t>(oldEntry->works * growthCoefficient));
LOG(1) << "Increasing work value associated with cache entry for query "
- << redact(query.toStringShort()) << " and queryHash "
- << unsignedIntToFixedLengthHex(queryHash) << " from " << oldEntry->works << " to "
+ << redact(query.toStringShort()) << " queryHash "
+ << unsignedIntToFixedLengthHex(queryHash) << " planCacheKey "
+ << unsignedIntToFixedLengthHex(planCacheKey) << " from " << oldEntry->works << " to "
<< increasedWorks;
oldEntry->works = increasedWorks;
@@ -844,9 +439,9 @@ PlanCache::NewEntryState PlanCache::getNewEntryState(const CanonicalQuery& query
// inactive entry's works. We use this as an indicator that it's safe to
// cache (as an active entry) the plan this query used for the future.
LOG(1) << "Inactive cache entry for query " << redact(query.toStringShort())
- << " and queryHash " << unsignedIntToFixedLengthHex(queryHash) << " with works "
- << oldEntry->works << " is being promoted to active entry with works value "
- << newWorks;
+ << " queryHash " << unsignedIntToFixedLengthHex(queryHash) << " planCacheKey "
+ << unsignedIntToFixedLengthHex(planCacheKey) << " with works " << oldEntry->works
+ << " is being promoted to active entry with works value " << newWorks;
// We'll replace the old inactive entry with an active entry.
res.shouldBeCreated = true;
res.shouldBeActive = true;
@@ -885,23 +480,28 @@ Status PlanCache::set(const CanonicalQuery& query,
stdx::lock_guard<stdx::mutex> cacheLock(_cacheMutex);
bool isNewEntryActive = false;
uint32_t queryHash;
+ uint32_t planCacheKey;
if (internalQueryCacheDisableInactiveEntries.load()) {
// All entries are always active.
isNewEntryActive = true;
- queryHash = PlanCache::computeQueryHash(key);
+ planCacheKey = canonical_query_encoder::computeHash(key.stringData());
+ queryHash = canonical_query_encoder::computeHash(key.getStableKeyStringData());
} else {
PlanCacheEntry* oldEntry = nullptr;
Status cacheStatus = _cache.get(key, &oldEntry);
invariant(cacheStatus.isOK() || cacheStatus == ErrorCodes::NoSuchKey);
if (oldEntry) {
queryHash = oldEntry->queryHash;
+ planCacheKey = oldEntry->planCacheKey;
} else {
- queryHash = PlanCache::computeQueryHash(key);
+ planCacheKey = canonical_query_encoder::computeHash(key.stringData());
+ queryHash = canonical_query_encoder::computeHash(key.getStableKeyStringData());
}
- auto newState = getNewEntryState(
+ const auto newState = getNewEntryState(
query,
queryHash,
+ planCacheKey,
oldEntry,
newWorks,
worksGrowthCoefficient.get_value_or(internalQueryCacheWorksGrowthCoefficient));
@@ -912,7 +512,7 @@ Status PlanCache::set(const CanonicalQuery& query,
isNewEntryActive = newState.shouldBeActive;
}
- auto newEntry = std::make_unique<PlanCacheEntry>(solns, why.release(), queryHash);
+ auto newEntry = std::make_unique<PlanCacheEntry>(solns, why.release(), queryHash, planCacheKey);
const QueryRequest& qr = query.getQueryRequest();
newEntry->query = qr.getFilter().getOwned();
newEntry->sort = qr.getSort().getOwned();
@@ -1012,15 +612,11 @@ void PlanCache::clear() {
}
PlanCacheKey PlanCache::computeKey(const CanonicalQuery& cq) const {
- StringBuilder keyBuilder;
- encodeKeyForMatch(cq.root(), &keyBuilder);
- encodeKeyForSort(cq.getQueryRequest().getSort(), &keyBuilder);
- encodeKeyForProj(cq.getQueryRequest().getProj(), &keyBuilder);
- return keyBuilder.str();
-}
+ const auto shapeString = cq.encodeKey();
-uint32_t PlanCache::computeQueryHash(const PlanCacheKey& key) {
- return SimpleStringDataComparator::kInstance.hash(key);
+ StringBuilder indexabilityKeyBuilder;
+ encodeIndexability(cq.root(), _indexabilityState, &indexabilityKeyBuilder);
+ return PlanCacheKey(std::move(shapeString), indexabilityKeyBuilder.str());
}
StatusWith<std::unique_ptr<PlanCacheEntry>> PlanCache::getEntry(const CanonicalQuery& query) const {
diff --git a/src/mongo/db/query/plan_cache.h b/src/mongo/db/query/plan_cache.h
index d88e269bebd..99f88ed23a9 100644
--- a/src/mongo/db/query/plan_cache.h
+++ b/src/mongo/db/query/plan_cache.h
@@ -44,8 +44,69 @@
namespace mongo {
-// A PlanCacheKey is a string-ified version of a query's predicate/projection/sort.
-typedef std::string PlanCacheKey;
+/**
+ * Represents the "key" used in the PlanCache mapping from query shape -> query plan.
+ */
+class PlanCacheKey {
+public:
+ PlanCacheKey(CanonicalQuery::QueryShapeString shapeString, std::string indexabilityString) {
+ _lengthOfStablePart = shapeString.size();
+ _key = std::move(shapeString);
+ _key += indexabilityString;
+ }
+
+ CanonicalQuery::QueryShapeString getStableKey() const {
+ return std::string(_key, 0, _lengthOfStablePart);
+ }
+
+ StringData getStableKeyStringData() const {
+ return StringData(_key.c_str(), _lengthOfStablePart);
+ }
+
+ /**
+ * Return the "unstable" portion of the key, which may vary across catalog changes.
+ */
+ StringData getUnstablePart() const {
+ return StringData(_key.c_str() + _lengthOfStablePart, _key.size() - _lengthOfStablePart);
+ }
+
+ StringData stringData() const {
+ return _key;
+ }
+
+ const std::string& toString() const {
+ return _key;
+ }
+
+ bool operator==(const PlanCacheKey& other) const {
+ return other._key == _key && other._lengthOfStablePart == _lengthOfStablePart;
+ }
+
+ bool operator!=(const PlanCacheKey& other) const {
+ return !(*this == other);
+ }
+
+private:
+ // Key is broken into two parts:
+ // <stable key> | <indexability discriminators>
+ // Combined, the two parts make up the plan cache key. We store them in one std::string so that
+ // we can easily/cheaply extract the stable key.
+ std::string _key;
+
+ // How long the "stable key" is.
+ size_t _lengthOfStablePart;
+};
+
+std::ostream& operator<<(std::ostream& stream, const PlanCacheKey& key);
+StringBuilder& operator<<(StringBuilder& builder, const PlanCacheKey& key);
+
+class PlanCacheKeyHasher {
+public:
+ std::size_t operator()(const PlanCacheKey& k) const {
+ return std::hash<std::string>{}(k.toString());
+ }
+};
+
struct PlanRankingDecision;
struct QuerySolution;
@@ -225,7 +286,8 @@ public:
*/
PlanCacheEntry(const std::vector<QuerySolution*>& solutions,
PlanRankingDecision* why,
- uint32_t queryHash);
+ uint32_t queryHash,
+ uint32_t planCacheKey);
~PlanCacheEntry();
@@ -261,6 +323,9 @@ public:
// diagnostic output.
uint32_t queryHash;
+ // Hash of the "stable" PlanCacheKey, which is the same regardless of what indexes are around.
+ uint32_t planCacheKey;
+
//
// Performance stats
//
@@ -427,12 +492,6 @@ public:
PlanCacheKey computeKey(const CanonicalQuery&) const;
/**
- * Returns a hash of the plan cache key. This hash may not be stable between different versions
- * of the server.
- */
- static uint32_t computeQueryHash(const PlanCacheKey& key);
-
- /**
* Returns a copy of a cache entry.
* Used by planCacheListPlans to display plan details.
*
@@ -480,15 +539,12 @@ private:
NewEntryState getNewEntryState(const CanonicalQuery& query,
uint32_t queryHash,
+ uint32_t planCacheKey,
PlanCacheEntry* oldEntry,
size_t newWorks,
double growthCoefficient);
- void encodeKeyForMatch(const MatchExpression* tree, StringBuilder* keyBuilder) const;
- void encodeKeyForSort(const BSONObj& sortObj, StringBuilder* keyBuilder) const;
- void encodeKeyForProj(const BSONObj& projObj, StringBuilder* keyBuilder) const;
-
- LRUKeyValue<PlanCacheKey, PlanCacheEntry> _cache;
+ LRUKeyValue<PlanCacheKey, PlanCacheEntry, PlanCacheKeyHasher> _cache;
// Protects _cache.
mutable stdx::mutex _cacheMutex;
diff --git a/src/mongo/db/query/plan_cache_test.cpp b/src/mongo/db/query/plan_cache_test.cpp
index 1bca73d27cb..dbc5b13dec4 100644
--- a/src/mongo/db/query/plan_cache_test.cpp
+++ b/src/mongo/db/query/plan_cache_test.cpp
@@ -43,6 +43,7 @@
#include "mongo/db/json.h"
#include "mongo/db/matcher/extensions_callback_noop.h"
#include "mongo/db/pipeline/expression_context_for_test.h"
+#include "mongo/db/query/canonical_query_encoder.h"
#include "mongo/db/query/collation/collator_interface_mock.h"
#include "mongo/db/query/plan_ranker.h"
#include "mongo/db/query/query_knobs.h"
@@ -196,6 +197,17 @@ unique_ptr<CanonicalQuery> canonicalize(const char* queryStr,
}
/**
+ * Check that the stable keys of 'a' and 'b' are equal, but the unstable parts are not.
+ */
+void assertPlanCacheKeysUnequalDueToDiscriminators(const PlanCacheKey& a, const PlanCacheKey& b) {
+ ASSERT_EQ(a.getStableKeyStringData(), b.getStableKeyStringData());
+ ASSERT_EQ(a.getUnstablePart().size(), b.getUnstablePart().size());
+ // Should always have the begin and end delimiters.
+ ASSERT_NE(a.getUnstablePart(), b.getUnstablePart());
+ ASSERT_GTE(a.getUnstablePart().size(), 2u);
+}
+
+/**
* Utility function to create MatchExpression
*/
unique_ptr<MatchExpression> parseMatchExpression(const BSONObj& obj) {
@@ -1080,8 +1092,9 @@ protected:
std::vector<QuerySolution*> solutions;
solutions.push_back(&qs);
- uint32_t queryHash = PlanCache::computeQueryHash(ck);
- PlanCacheEntry entry(solutions, createDecision(1U).release(), queryHash);
+ uint32_t queryHash = canonical_query_encoder::computeHash(ck.stringData());
+ uint32_t planCacheKey = queryHash;
+ PlanCacheEntry entry(solutions, createDecision(1U).release(), queryHash, planCacheKey);
CachedSolution cachedSoln(ck, entry);
auto statusWithQs = QueryPlanner::planFromCache(*scopedCq, params, cachedSoln);
@@ -1176,7 +1189,8 @@ protected:
std::vector<std::unique_ptr<QuerySolution>> solns;
};
-const PlanCacheKey CachePlanSelectionTest::ck = "mock_cache_key";
+const std::string mockKey("mock_cache_key");
+const PlanCacheKey CachePlanSelectionTest::ck(mockKey, "");
//
// Equality
@@ -1745,200 +1759,6 @@ TEST_F(CachePlanSelectionTest, ContainedOrAndIntersection) {
"]}}}}");
}
-/**
- * Test functions for computeKey. Cache keys are intentionally obfuscated and are
- * meaningful only within the current lifetime of the server process. Users should treat plan
- * cache keys as opaque.
- */
-void testComputeKey(BSONObj query, BSONObj sort, BSONObj proj, const char* expectedStr) {
- PlanCache planCache;
- BSONObj collation;
- unique_ptr<CanonicalQuery> cq(canonicalize(query, sort, proj, collation));
- PlanCacheKey key = planCache.computeKey(*cq);
- PlanCacheKey expectedKey(expectedStr);
- if (key == expectedKey) {
- return;
- }
- str::stream ss;
- ss << "Unexpected plan cache key. Expected: " << expectedKey << ". Actual: " << key
- << ". Query: " << cq->toString();
- FAIL(ss);
-}
-
-void testComputeKey(const char* queryStr,
- const char* sortStr,
- const char* projStr,
- const char* expectedStr) {
- testComputeKey(fromjson(queryStr), fromjson(sortStr), fromjson(projStr), expectedStr);
-}
-
-TEST(PlanCacheTest, ComputeKey) {
- // Generated cache keys should be treated as opaque to the user.
-
- // No sorts
- testComputeKey("{}", "{}", "{}", "an");
- testComputeKey("{$or: [{a: 1}, {b: 2}]}", "{}", "{}", "or[eqa,eqb]");
- testComputeKey("{$or: [{a: 1}, {b: 1}, {c: 1}], d: 1}", "{}", "{}", "an[or[eqa,eqb,eqc],eqd]");
- testComputeKey("{$or: [{a: 1}, {b: 1}], c: 1, d: 1}", "{}", "{}", "an[or[eqa,eqb],eqc,eqd]");
- testComputeKey("{a: 1, b: 1, c: 1}", "{}", "{}", "an[eqa,eqb,eqc]");
- testComputeKey("{a: 1, beqc: 1}", "{}", "{}", "an[eqa,eqbeqc]");
- testComputeKey("{ap1a: 1}", "{}", "{}", "eqap1a");
- testComputeKey("{aab: 1}", "{}", "{}", "eqaab");
-
- // With sort
- testComputeKey("{}", "{a: 1}", "{}", "an~aa");
- testComputeKey("{}", "{a: -1}", "{}", "an~da");
- testComputeKey("{}",
- "{a: {$meta: 'textScore'}}",
- "{a: {$meta: 'textScore'}}",
- "an~ta|{ $meta: \"textScore\" }a");
- testComputeKey("{a: 1}", "{b: 1}", "{}", "eqa~ab");
-
- // With projection
- testComputeKey("{}", "{}", "{a: 1}", "an|ia");
- testComputeKey("{}", "{}", "{a: -1}", "an|ia");
- testComputeKey("{}", "{}", "{a: -1.0}", "an|ia");
- testComputeKey("{}", "{}", "{a: true}", "an|ia");
- testComputeKey("{}", "{}", "{a: 0}", "an|ea");
- testComputeKey("{}", "{}", "{a: false}", "an|ea");
- testComputeKey("{}", "{}", "{a: 99}", "an|ia");
- testComputeKey("{}", "{}", "{a: 'foo'}", "an|ia");
- testComputeKey("{}", "{}", "{a: {$slice: [3, 5]}}", "an|{ $slice: \\[ 3\\, 5 \\] }a");
- testComputeKey("{}", "{}", "{a: {$elemMatch: {x: 2}}}", "an|{ $elemMatch: { x: 2 } }a");
- testComputeKey("{}", "{}", "{a: ObjectId('507f191e810c19729de860ea')}", "an|ia");
- testComputeKey("{a: 1}", "{}", "{'a.$': 1}", "eqa|ia.$");
- testComputeKey("{a: 1}", "{}", "{a: 1}", "eqa|ia");
-
- // Projection should be order-insensitive
- testComputeKey("{}", "{}", "{a: 1, b: 1}", "an|iaib");
- testComputeKey("{}", "{}", "{b: 1, a: 1}", "an|iaib");
-
- // With or-elimination and projection
- testComputeKey("{$or: [{a: 1}]}", "{}", "{_id: 0, a: 1}", "eqa|e_idia");
- testComputeKey("{$or: [{a: 1}]}", "{}", "{'a.$': 1}", "eqa|ia.$");
-}
-
-// Delimiters found in user field names or non-standard projection field values
-// must be escaped.
-TEST(PlanCacheTest, ComputeKeyEscaped) {
- // Field name in query.
- testComputeKey("{'a,[]~|<>': 1}", "{}", "{}", "eqa\\,\\[\\]\\~\\|\\<\\>");
-
- // Field name in sort.
- testComputeKey("{}", "{'a,[]~|<>': 1}", "{}", "an~aa\\,\\[\\]\\~\\|\\<\\>");
-
- // Field name in projection.
- testComputeKey("{}", "{}", "{'a,[]~|<>': 1}", "an|ia\\,\\[\\]\\~\\|\\<\\>");
-
- // Value in projection.
- testComputeKey("{}", "{}", "{a: 'foo,[]~|<>'}", "an|ia");
-}
-
-// Cache keys for $geoWithin queries with legacy and GeoJSON coordinates should
-// not be the same.
-TEST(PlanCacheTest, ComputeKeyGeoWithin) {
- PlanCache planCache;
-
- // Legacy coordinates.
- unique_ptr<CanonicalQuery> cqLegacy(
- canonicalize("{a: {$geoWithin: "
- "{$box: [[-180, -90], [180, 90]]}}}"));
- // GeoJSON coordinates.
- unique_ptr<CanonicalQuery> cqNew(
- canonicalize("{a: {$geoWithin: "
- "{$geometry: {type: 'Polygon', coordinates: "
- "[[[0, 0], [0, 90], [90, 0], [0, 0]]]}}}}"));
- ASSERT_NOT_EQUALS(planCache.computeKey(*cqLegacy), planCache.computeKey(*cqNew));
-}
-
-// GEO_NEAR cache keys should include information on geometry and CRS in addition
-// to the match type and field name.
-TEST(PlanCacheTest, ComputeKeyGeoNear) {
- testComputeKey("{a: {$near: [0,0], $maxDistance:0.3 }}", "{}", "{}", "gnanrfl");
- testComputeKey("{a: {$nearSphere: [0,0], $maxDistance: 0.31 }}", "{}", "{}", "gnanssp");
- testComputeKey(
- "{a: {$geoNear: {$geometry: {type: 'Point', coordinates: [0,0]},"
- "$maxDistance:100}}}",
- "{}",
- "{}",
- "gnanrsp");
-}
-
-TEST(PlanCacheTest, ComputeKeyRegexDependsOnFlags) {
- testComputeKey("{a: {$regex: \"sometext\"}}", "{}", "{}", "rea");
- testComputeKey("{a: {$regex: \"sometext\", $options: \"\"}}", "{}", "{}", "rea");
-
- testComputeKey("{a: {$regex: \"sometext\", $options: \"s\"}}", "{}", "{}", "rea/s/");
- testComputeKey("{a: {$regex: \"sometext\", $options: \"ms\"}}", "{}", "{}", "rea/ms/");
-
- // Test that the ordering of $options doesn't matter.
- testComputeKey("{a: {$regex: \"sometext\", $options: \"im\"}}", "{}", "{}", "rea/im/");
- testComputeKey("{a: {$regex: \"sometext\", $options: \"mi\"}}", "{}", "{}", "rea/im/");
-
- // Test that only the options affect the key. Two regex match expressions with the same options
- // but different $regex values should have the same shape.
- testComputeKey("{a: {$regex: \"abc\", $options: \"mi\"}}", "{}", "{}", "rea/im/");
- testComputeKey("{a: {$regex: \"efg\", $options: \"mi\"}}", "{}", "{}", "rea/im/");
-
- testComputeKey("{a: {$regex: \"\", $options: \"ms\"}}", "{}", "{}", "rea/ms/");
- testComputeKey("{a: {$regex: \"___\", $options: \"ms\"}}", "{}", "{}", "rea/ms/");
-
- // Test that only valid regex flags contribute to the plan cache key encoding.
- testComputeKey(BSON("a" << BSON("$regex"
- << "abc"
- << "$options"
- << "abcdefghijklmnopqrstuvwxyz")),
- {},
- {},
- "rea/imsx/");
- testComputeKey("{a: /abc/gim}", "{}", "{}", "rea/im/");
-}
-
-TEST(PlanCacheTest, ComputeKeyMatchInDependsOnPresenceOfRegexAndFlags) {
- // Test that an $in containing a single regex is unwrapped to $regex.
- testComputeKey("{a: {$in: [/foo/]}}", "{}", "{}", "rea");
- testComputeKey("{a: {$in: [/foo/i]}}", "{}", "{}", "rea/i/");
-
- // Test that an $in with no regexes does not include any regex information.
- testComputeKey("{a: {$in: [1, 'foo']}}", "{}", "{}", "ina");
-
- // Test that an $in with a regex encodes the presence of the regex.
- testComputeKey("{a: {$in: [1, /foo/]}}", "{}", "{}", "ina_re");
-
- // Test that an $in with a regex encodes the presence of the regex and its flags.
- testComputeKey("{a: {$in: [1, /foo/is]}}", "{}", "{}", "ina_re/is/");
-
- // Test that the computed key is invariant to the order of the flags within each regex.
- testComputeKey("{a: {$in: [1, /foo/si]}}", "{}", "{}", "ina_re/is/");
-
- // Test that an $in with multiple regexes encodes all unique flags.
- testComputeKey("{a: {$in: [1, /foo/i, /bar/m, /baz/s]}}", "{}", "{}", "ina_re/ims/");
-
- // Test that an $in with multiple regexes deduplicates identical flags.
- testComputeKey(
- "{a: {$in: [1, /foo/i, /bar/m, /baz/s, /qux/i, /quux/s]}}", "{}", "{}", "ina_re/ims/");
-
- // Test that the computed key is invariant to the ordering of the flags across regexes.
- testComputeKey("{a: {$in: [1, /foo/ism, /bar/msi, /baz/im, /qux/si, /quux/im]}}",
- "{}",
- "{}",
- "ina_re/ims/");
- testComputeKey("{a: {$in: [1, /foo/msi, /bar/ism, /baz/is, /qux/mi, /quux/im]}}",
- "{}",
- "{}",
- "ina_re/ims/");
-
- // Test that $not-$in-$regex similarly records the presence and flags of any regexes.
- testComputeKey("{a: {$not: {$in: [1, 'foo']}}}", "{}", "{}", "nt[ina]");
- testComputeKey("{a: {$not: {$in: [1, /foo/]}}}", "{}", "{}", "nt[ina_re]");
- testComputeKey(
- "{a: {$not: {$in: [1, /foo/i, /bar/i, /baz/msi]}}}", "{}", "{}", "nt[ina_re/ims/]");
-
- // Test that a $not-$in containing a single regex is unwrapped to $not-$regex.
- testComputeKey("{a: {$not: {$in: [/foo/]}}}", "{}", "{}", "nt[rea]");
- testComputeKey("{a: {$not: {$in: [/foo/i]}}}", "{}", "{}", "nt[rea/i/]");
-}
-
// When a sparse index is present, computeKey() should generate different keys depending on
// whether or not the predicates in the given query can use the index.
TEST(PlanCacheTest, ComputeKeySparseIndex) {
@@ -1957,10 +1777,16 @@ TEST(PlanCacheTest, ComputeKeySparseIndex) {
// 'cqEqNumber' and 'cqEqString' get the same key, since both are compatible with this
// index.
- ASSERT_EQ(planCache.computeKey(*cqEqNumber), planCache.computeKey(*cqEqString));
+ const auto eqNumberKey = planCache.computeKey(*cqEqNumber);
+ const auto eqStringKey = planCache.computeKey(*cqEqString);
+ ASSERT_EQ(eqNumberKey, eqStringKey);
// 'cqEqNull' gets a different key, since it is not compatible with this index.
- ASSERT_NOT_EQUALS(planCache.computeKey(*cqEqNull), planCache.computeKey(*cqEqNumber));
+ const auto eqNullKey = planCache.computeKey(*cqEqNull);
+ ASSERT_NOT_EQUALS(eqNullKey, eqNumberKey);
+
+ assertPlanCacheKeysUnequalDueToDiscriminators(eqNullKey, eqNumberKey);
+ assertPlanCacheKeysUnequalDueToDiscriminators(eqNullKey, eqStringKey);
}
// When a partial index is present, computeKey() should generate different keys depending on
@@ -1987,7 +1813,8 @@ TEST(PlanCacheTest, ComputeKeyPartialIndex) {
ASSERT_EQ(planCache.computeKey(*cqGtZero), planCache.computeKey(*cqGtFive));
// 'cqGtNegativeFive' gets a different key, since it is not compatible with this index.
- ASSERT_NOT_EQUALS(planCache.computeKey(*cqGtNegativeFive), planCache.computeKey(*cqGtZero));
+ assertPlanCacheKeysUnequalDueToDiscriminators(planCache.computeKey(*cqGtNegativeFive),
+ planCache.computeKey(*cqGtZero));
}
// Query shapes should get the same plan cache key if they have the same collation indexability.
@@ -2018,11 +1845,20 @@ TEST(PlanCacheTest, ComputeKeyCollationIndex) {
ASSERT_EQ(planCache.computeKey(*containsString), planCache.computeKey(*containsArray));
// 'noStrings' gets a different key since it is compatible with the index.
- ASSERT_NOT_EQUALS(planCache.computeKey(*containsString), planCache.computeKey(*noStrings));
-
- // 'noStrings' and 'containsStringHasCollation' get the same key since they compatible with the
- // index.
- ASSERT_EQ(planCache.computeKey(*noStrings), planCache.computeKey(*containsStringHasCollation));
+ assertPlanCacheKeysUnequalDueToDiscriminators(planCache.computeKey(*containsString),
+ planCache.computeKey(*noStrings));
+ ASSERT_EQ(planCache.computeKey(*containsString).getUnstablePart(), "<0>");
+ ASSERT_EQ(planCache.computeKey(*noStrings).getUnstablePart(), "<1>");
+
+ // 'noStrings' and 'containsStringHasCollation' get different keys, since the collation
+ // specified in the query is considered part of its shape. However, they have the same index
+ // compatibility, so the unstable part of their PlanCacheKeys should be the same.
+ PlanCacheKey noStringKey = planCache.computeKey(*noStrings);
+ PlanCacheKey withStringAndCollationKey = planCache.computeKey(*containsStringHasCollation);
+ ASSERT_NE(noStringKey, withStringAndCollationKey);
+ ASSERT_EQ(noStringKey.getUnstablePart(), withStringAndCollationKey.getUnstablePart());
+ ASSERT_NE(noStringKey.getStableKeyStringData(),
+ withStringAndCollationKey.getStableKeyStringData());
unique_ptr<CanonicalQuery> inContainsString(canonicalize("{a: {$in: [1, 'abc', 2]}}"));
unique_ptr<CanonicalQuery> inContainsObject(canonicalize("{a: {$in: [1, {b: 'abc'}, 2]}}"));
@@ -2037,12 +1873,17 @@ TEST(PlanCacheTest, ComputeKeyCollationIndex) {
ASSERT_EQ(planCache.computeKey(*inContainsString), planCache.computeKey(*inContainsArray));
// 'inNoStrings' gets a different key since it is compatible with the index.
- ASSERT_NOT_EQUALS(planCache.computeKey(*inContainsString), planCache.computeKey(*inNoStrings));
+ assertPlanCacheKeysUnequalDueToDiscriminators(planCache.computeKey(*inContainsString),
+ planCache.computeKey(*inNoStrings));
+ ASSERT_EQ(planCache.computeKey(*inContainsString).getUnstablePart(), "<0>");
+ ASSERT_EQ(planCache.computeKey(*inNoStrings).getUnstablePart(), "<1>");
// 'inNoStrings' and 'inContainsStringHasCollation' get the same key since they compatible with
// the index.
- ASSERT_EQ(planCache.computeKey(*inNoStrings),
+ ASSERT_NE(planCache.computeKey(*inNoStrings),
planCache.computeKey(*inContainsStringHasCollation));
+ ASSERT_EQ(planCache.computeKey(*inNoStrings).getUnstablePart(),
+ planCache.computeKey(*inContainsStringHasCollation).getUnstablePart());
}
TEST(PlanCacheTest, ComputeKeyWildcardIndex) {
@@ -2073,7 +1914,10 @@ TEST(PlanCacheTest, ComputeKeyWildcardIndex) {
// different keys.
ASSERT_EQ(planCacheWithNoIndexes.computeKey(*usesPathWithScalar),
planCacheWithNoIndexes.computeKey(*usesPathWithObject));
- ASSERT_NE(planCache.computeKey(*usesPathWithScalar), planCache.computeKey(*usesPathWithObject));
+ assertPlanCacheKeysUnequalDueToDiscriminators(planCache.computeKey(*usesPathWithScalar),
+ planCache.computeKey(*usesPathWithObject));
+ ASSERT_EQ(planCache.computeKey(*usesPathWithScalar).getUnstablePart(), "<1>");
+ ASSERT_EQ(planCache.computeKey(*usesPathWithObject).getUnstablePart(), "<0>");
ASSERT_EQ(planCache.computeKey(*usesPathWithObject), planCache.computeKey(*usesPathWithArray));
ASSERT_EQ(planCache.computeKey(*usesPathWithObject),
@@ -2090,14 +1934,19 @@ TEST(PlanCacheTest, ComputeKeyWildcardIndex) {
// More complex queries with similar shapes. This is to ensure that plan cache key encoding
// correctly traverses the expression tree.
- auto orQueryAllowed = canonicalize("{$or: [{a: 3}, {a: {$gt: [1,2]}}]}");
+ auto orQueryWithOneBranchAllowed = canonicalize("{$or: [{a: 3}, {a: {$gt: [1,2]}}]}");
// Same shape except 'a' is compared to an object.
- auto orQueryNotAllowed = canonicalize("{$or: [{a: {someobject: 1}}, {a: {$gt: [1,2]}}]}");
+ auto orQueryWithNoBranchesAllowed =
+ canonicalize("{$or: [{a: {someobject: 1}}, {a: {$gt: [1,2]}}]}");
// The two queries should have the same shape when no indexes are present, but different shapes
// when a $** index is present.
- ASSERT_EQ(planCacheWithNoIndexes.computeKey(*orQueryAllowed),
- planCacheWithNoIndexes.computeKey(*orQueryNotAllowed));
- ASSERT_NE(planCache.computeKey(*orQueryAllowed), planCache.computeKey(*orQueryNotAllowed));
+ ASSERT_EQ(planCacheWithNoIndexes.computeKey(*orQueryWithOneBranchAllowed),
+ planCacheWithNoIndexes.computeKey(*orQueryWithNoBranchesAllowed));
+ assertPlanCacheKeysUnequalDueToDiscriminators(
+ planCache.computeKey(*orQueryWithOneBranchAllowed),
+ planCache.computeKey(*orQueryWithNoBranchesAllowed));
+ ASSERT_EQ(planCache.computeKey(*orQueryWithOneBranchAllowed).getUnstablePart(), "<1><0>");
+ ASSERT_EQ(planCache.computeKey(*orQueryWithNoBranchesAllowed).getUnstablePart(), "<0><0>");
}
TEST(PlanCacheTest, ComputeKeyWildcardIndexDiscriminatesEqualityToEmptyObj) {
@@ -2109,12 +1958,41 @@ TEST(PlanCacheTest, ComputeKeyWildcardIndexDiscriminatesEqualityToEmptyObj) {
// Equality to empty obj and equality to non-empty obj have different plan cache keys.
std::unique_ptr<CanonicalQuery> equalsEmptyObj(canonicalize("{a: {}}"));
std::unique_ptr<CanonicalQuery> equalsNonEmptyObj(canonicalize("{a: {b: 1}}"));
- ASSERT_NE(planCache.computeKey(*equalsEmptyObj), planCache.computeKey(*equalsNonEmptyObj));
+ assertPlanCacheKeysUnequalDueToDiscriminators(planCache.computeKey(*equalsEmptyObj),
+ planCache.computeKey(*equalsNonEmptyObj));
+ ASSERT_EQ(planCache.computeKey(*equalsNonEmptyObj).getUnstablePart(), "<0>");
+ ASSERT_EQ(planCache.computeKey(*equalsEmptyObj).getUnstablePart(), "<1>");
// $in with empty obj and $in with non-empty obj have different plan cache keys.
std::unique_ptr<CanonicalQuery> inWithEmptyObj(canonicalize("{a: {$in: [{}]}}"));
std::unique_ptr<CanonicalQuery> inWithNonEmptyObj(canonicalize("{a: {$in: [{b: 1}]}}"));
- ASSERT_NE(planCache.computeKey(*inWithEmptyObj), planCache.computeKey(*inWithNonEmptyObj));
+ assertPlanCacheKeysUnequalDueToDiscriminators(planCache.computeKey(*inWithEmptyObj),
+ planCache.computeKey(*inWithNonEmptyObj));
+ ASSERT_EQ(planCache.computeKey(*inWithNonEmptyObj).getUnstablePart(), "<0>");
+ ASSERT_EQ(planCache.computeKey(*inWithEmptyObj).getUnstablePart(), "<1>");
+}
+
+TEST(PlanCacheTest, StableKeyDoesNotChangeAcrossIndexCreation) {
+ PlanCache planCache;
+ unique_ptr<CanonicalQuery> cq(canonicalize("{a: 0}}"));
+ const PlanCacheKey preIndexKey = planCache.computeKey(*cq);
+ const auto preIndexStableKey = preIndexKey.getStableKey();
+ ASSERT_EQ(preIndexKey.getUnstablePart(), "");
+
+ // Create a sparse index (which requires a discriminator).
+ planCache.notifyOfIndexEntries({IndexEntry(BSON("a" << 1),
+ false, // multikey
+ true, // sparse
+ false, // unique
+ IndexEntry::Identifier{""}, // name
+ nullptr, // filterExpr
+ BSONObj())});
+
+ const PlanCacheKey postIndexKey = planCache.computeKey(*cq);
+ const auto postIndexStableKey = postIndexKey.getStableKey();
+ ASSERT_NE(preIndexKey, postIndexKey);
+ ASSERT_EQ(preIndexStableKey, postIndexStableKey);
+ ASSERT_EQ(postIndexKey.getUnstablePart(), "<1>");
}
} // namespace
diff --git a/src/mongo/db/query/query_settings.cpp b/src/mongo/db/query/query_settings.cpp
index 5dd51534581..cd3e52c2c49 100644
--- a/src/mongo/db/query/query_settings.cpp
+++ b/src/mongo/db/query/query_settings.cpp
@@ -78,7 +78,7 @@ AllowedIndexEntry::AllowedIndexEntry(const BSONObj& query,
//
boost::optional<AllowedIndicesFilter> QuerySettings::getAllowedIndicesFilter(
- const PlanCacheKey& key) const {
+ const CanonicalQuery::QueryShapeString& key) const {
stdx::lock_guard<stdx::mutex> cacheLock(_mutex);
AllowedIndexEntryMap::const_iterator cacheIter = _allowedIndexEntryMap.find(key);
@@ -100,13 +100,13 @@ std::vector<AllowedIndexEntry> QuerySettings::getAllAllowedIndices() const {
}
void QuerySettings::setAllowedIndices(const CanonicalQuery& canonicalQuery,
- const PlanCacheKey& key,
const BSONObjSet& indexKeyPatterns,
const stdx::unordered_set<std::string>& indexNames) {
const QueryRequest& qr = canonicalQuery.getQueryRequest();
const BSONObj& query = qr.getFilter();
const BSONObj& sort = qr.getSort();
const BSONObj& projection = qr.getProj();
+ const auto key = canonicalQuery.encodeKey();
const BSONObj collation =
canonicalQuery.getCollator() ? canonicalQuery.getCollator()->getSpec().toBSON() : BSONObj();
@@ -118,7 +118,7 @@ void QuerySettings::setAllowedIndices(const CanonicalQuery& canonicalQuery,
std::forward_as_tuple(query, sort, projection, collation, indexKeyPatterns, indexNames));
}
-void QuerySettings::removeAllowedIndices(const PlanCacheKey& key) {
+void QuerySettings::removeAllowedIndices(const CanonicalQuery::QueryShapeString& key) {
stdx::lock_guard<stdx::mutex> cacheLock(_mutex);
AllowedIndexEntryMap::iterator i = _allowedIndexEntryMap.find(key);
diff --git a/src/mongo/db/query/query_settings.h b/src/mongo/db/query/query_settings.h
index 3e69a2ca55f..48db5f7ed83 100644
--- a/src/mongo/db/query/query_settings.h
+++ b/src/mongo/db/query/query_settings.h
@@ -117,7 +117,8 @@ public:
* Returns AllowedIndicesFilter for the query if it is set in the query settings, or
* boost::none if it isn't.
*/
- boost::optional<AllowedIndicesFilter> getAllowedIndicesFilter(const PlanCacheKey& query) const;
+ boost::optional<AllowedIndicesFilter> getAllowedIndicesFilter(
+ const CanonicalQuery::QueryShapeString& query) const;
/**
* Returns copies of all overrides for the collection.
@@ -129,14 +130,13 @@ public:
* If existing entry is found for the same key, replaces it.
*/
void setAllowedIndices(const CanonicalQuery& canonicalQuery,
- const PlanCacheKey& key,
const BSONObjSet& indexKeyPatterns,
const stdx::unordered_set<std::string>& indexNames);
/**
* Removes single entry from query settings. No effect if query shape is not found.
*/
- void removeAllowedIndices(const PlanCacheKey& canonicalQuery);
+ void removeAllowedIndices(const CanonicalQuery::QueryShapeString& canonicalQuery);
/**
* Clears all allowed indices from query settings.
@@ -145,7 +145,8 @@ public:
private:
// Allowed index entries owned here.
- using AllowedIndexEntryMap = stdx::unordered_map<PlanCacheKey, AllowedIndexEntry>;
+ using AllowedIndexEntryMap =
+ stdx::unordered_map<CanonicalQuery::QueryShapeString, AllowedIndexEntry>;
AllowedIndexEntryMap _allowedIndexEntryMap;
/**