summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDrew Paroski <drew.paroski@mongodb.com>2020-07-14 11:19:06 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-07-15 04:44:54 +0000
commit86296e8ffdae40d0b006276895940c87926c88b0 (patch)
tree54038f2fa0174173149db1ef115c50a761198de0
parentf5926f7c0867bf2726313e8c27be1871c6c16ecb (diff)
downloadmongo-86296e8ffdae40d0b006276895940c87926c88b0.tar.gz
SERVER-48736 Fix dependencies between query_exec and index_access_methods
-rw-r--r--src/mongo/db/SConscript2
-rw-r--r--src/mongo/db/commands/haystack.cpp109
-rw-r--r--src/mongo/db/index/SConscript1
-rw-r--r--src/mongo/db/index/haystack_access_method.cpp90
-rw-r--r--src/mongo/db/index/haystack_access_method.h12
-rw-r--r--src/mongo/db/index/index_access_method.h23
-rw-r--r--src/mongo/db/index/wildcard_access_method.cpp197
-rw-r--r--src/mongo/db/index/wildcard_access_method.h30
-rw-r--r--src/mongo/db/query/get_executor.cpp9
-rw-r--r--src/mongo/db/query/wildcard_multikey_paths.cpp239
-rw-r--r--src/mongo/db/query/wildcard_multikey_paths.h70
-rw-r--r--src/mongo/dbtests/wildcard_access_method_test.cpp4
-rw-r--r--src/mongo/dbtests/wildcard_multikey_persistence_test.cpp5
13 files changed, 425 insertions, 366 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 700b5be4105..6c6b8ce2301 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -1134,10 +1134,12 @@ env.Library(
'query/sbe_stage_builder_projection.cpp',
'query/sbe_sub_planner.cpp',
'query/stage_builder_util.cpp',
+ 'query/wildcard_multikey_paths.cpp',
'run_op_kill_cursors.cpp',
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/mongo/db/index/index_access_methods',
'$BUILD_DIR/mongo/s/common_s',
'$BUILD_DIR/mongo/scripting/scripting',
'$BUILD_DIR/mongo/util/background_job',
diff --git a/src/mongo/db/commands/haystack.cpp b/src/mongo/db/commands/haystack.cpp
index 16098313de1..e4af2078f3a 100644
--- a/src/mongo/db/commands/haystack.cpp
+++ b/src/mongo/db/commands/haystack.cpp
@@ -42,16 +42,18 @@
#include "mongo/db/commands.h"
#include "mongo/db/curop.h"
#include "mongo/db/db_raii.h"
+#include "mongo/db/index/expression_keys_private.h"
#include "mongo/db/index/haystack_access_method.h"
+#include "mongo/db/index/haystack_access_method_internal.h"
#include "mongo/db/index/index_access_method.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/index_names.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/query/find_common.h"
+#include "mongo/db/query/internal_plans.h"
#include "mongo/logv2/log.h"
-
/**
* Examines all documents in a given radius of a given point.
* Returns all documents that match a given search restriction.
@@ -106,6 +108,96 @@ public:
out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions));
}
+ void searchHaystack(const HaystackAccessMethod* ham,
+ OperationContext* opCtx,
+ Collection* collection,
+ const BSONObj& nearObj,
+ double maxDistance,
+ const BSONObj& search,
+ BSONObjBuilder* result,
+ unsigned limit) {
+ Timer t;
+
+ LOGV2_DEBUG(20680,
+ 1,
+ "SEARCH near:{nearObj} maxDistance:{maxDistance} search: {search}",
+ "nearObj"_attr = redact(nearObj),
+ "maxDistance"_attr = maxDistance,
+ "search"_attr = redact(search));
+ int x, y;
+ {
+ BSONObjIterator i(nearObj);
+ x = ExpressionKeysPrivate::hashHaystackElement(i.next(), ham->_bucketSize);
+ y = ExpressionKeysPrivate::hashHaystackElement(i.next(), ham->_bucketSize);
+ }
+ int scale = static_cast<int>(ceil(maxDistance / ham->_bucketSize));
+
+ GeoHaystackSearchHopper hopper(
+ opCtx, nearObj, maxDistance, limit, ham->_geoField, collection);
+
+ long long btreeMatches = 0;
+
+ for (int a = -scale; a <= scale && !hopper.limitReached(); ++a) {
+ for (int b = -scale; b <= scale && !hopper.limitReached(); ++b) {
+ BSONObjBuilder bb;
+ bb.append("", ExpressionKeysPrivate::makeHaystackString(x + a, y + b));
+
+ for (unsigned i = 0; i < ham->_otherFields.size(); i++) {
+ // See if the non-geo field we're indexing on is in the provided search term.
+ BSONElement e = dps::extractElementAtPath(search, ham->_otherFields[i]);
+ if (e.eoo())
+ bb.appendNull("");
+ else
+ bb.appendAs(e, "");
+ }
+
+ BSONObj key = bb.obj();
+
+ stdx::unordered_set<RecordId, RecordId::Hasher> thisPass;
+
+
+ auto exec = InternalPlanner::indexScan(opCtx,
+ collection,
+ ham->_descriptor,
+ key,
+ key,
+ BoundInclusion::kIncludeBothStartAndEndKeys,
+ PlanYieldPolicy::YieldPolicy::NO_YIELD);
+ PlanExecutor::ExecState state;
+ BSONObj obj;
+ RecordId loc;
+ while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, &loc))) {
+ if (hopper.limitReached()) {
+ break;
+ }
+ pair<stdx::unordered_set<RecordId, RecordId::Hasher>::iterator, bool> p =
+ thisPass.insert(loc);
+ // If a new element was inserted (haven't seen the RecordId before), p.second
+ // is true.
+ if (p.second) {
+ hopper.consider(loc);
+ btreeMatches++;
+ }
+ }
+
+ // Non-yielding collection scans from InternalPlanner will never error.
+ invariant(PlanExecutor::ADVANCED == state || PlanExecutor::IS_EOF == state);
+ }
+ }
+
+ BSONArrayBuilder arr(result->subarrayStart("results"));
+ int num = hopper.appendResultsTo(&arr);
+ arr.done();
+
+ {
+ BSONObjBuilder b(result->subobjStart("stats"));
+ b.append("time", t.millis());
+ b.appendNumber("btreeMatches", btreeMatches);
+ b.append("n", num);
+ b.done();
+ }
+ }
+
bool errmsgRun(OperationContext* opCtx,
const string& dbname,
const BSONObj& cmdObj,
@@ -173,13 +265,14 @@ public:
const IndexDescriptor* desc = idxs[0];
auto ham = static_cast<const HaystackAccessMethod*>(
collection->getIndexCatalog()->getEntry(desc)->accessMethod());
- ham->searchCommand(opCtx,
- collection,
- nearElt.Obj(),
- maxDistance.numberDouble(),
- search.Obj(),
- &result,
- limit);
+ searchHaystack(ham,
+ opCtx,
+ collection,
+ nearElt.Obj(),
+ maxDistance.numberDouble(),
+ search.Obj(),
+ &result,
+ limit);
return 1;
}
} nameSearchCommand;
diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript
index 922242cb618..078bd212e1b 100644
--- a/src/mongo/db/index/SConscript
+++ b/src/mongo/db/index/SConscript
@@ -158,7 +158,6 @@ env.Library(
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
- '$BUILD_DIR/mongo/db/query_exec',
'index_access_method',
'key_generator',
],
diff --git a/src/mongo/db/index/haystack_access_method.cpp b/src/mongo/db/index/haystack_access_method.cpp
index 45a9b2a9443..7cd26382182 100644
--- a/src/mongo/db/index/haystack_access_method.cpp
+++ b/src/mongo/db/index/haystack_access_method.cpp
@@ -41,9 +41,7 @@
#include "mongo/db/geo/hash.h"
#include "mongo/db/index/expression_keys_private.h"
#include "mongo/db/index/expression_params.h"
-#include "mongo/db/index/haystack_access_method_internal.h"
#include "mongo/db/jsobj.h"
-#include "mongo/db/query/internal_plans.h"
#include "mongo/logv2/log.h"
namespace mongo {
@@ -82,92 +80,4 @@ void HaystackAccessMethod::doGetKeys(SharedBufferFragmentBuilder& pooledBufferBu
id);
}
-void HaystackAccessMethod::searchCommand(OperationContext* opCtx,
- Collection* collection,
- const BSONObj& nearObj,
- double maxDistance,
- const BSONObj& search,
- BSONObjBuilder* result,
- unsigned limit) const {
- Timer t;
-
- LOGV2_DEBUG(20680,
- 1,
- "SEARCH near:{nearObj} maxDistance:{maxDistance} search: {search}",
- "nearObj"_attr = redact(nearObj),
- "maxDistance"_attr = maxDistance,
- "search"_attr = redact(search));
- int x, y;
- {
- BSONObjIterator i(nearObj);
- x = ExpressionKeysPrivate::hashHaystackElement(i.next(), _bucketSize);
- y = ExpressionKeysPrivate::hashHaystackElement(i.next(), _bucketSize);
- }
- int scale = static_cast<int>(ceil(maxDistance / _bucketSize));
-
- GeoHaystackSearchHopper hopper(opCtx, nearObj, maxDistance, limit, _geoField, collection);
-
- long long btreeMatches = 0;
-
- for (int a = -scale; a <= scale && !hopper.limitReached(); ++a) {
- for (int b = -scale; b <= scale && !hopper.limitReached(); ++b) {
- BSONObjBuilder bb;
- bb.append("", ExpressionKeysPrivate::makeHaystackString(x + a, y + b));
-
- for (unsigned i = 0; i < _otherFields.size(); i++) {
- // See if the non-geo field we're indexing on is in the provided search term.
- BSONElement e = dps::extractElementAtPath(search, _otherFields[i]);
- if (e.eoo())
- bb.appendNull("");
- else
- bb.appendAs(e, "");
- }
-
- BSONObj key = bb.obj();
-
- stdx::unordered_set<RecordId, RecordId::Hasher> thisPass;
-
-
- auto exec = InternalPlanner::indexScan(opCtx,
- collection,
- _descriptor,
- key,
- key,
- BoundInclusion::kIncludeBothStartAndEndKeys,
- PlanYieldPolicy::YieldPolicy::NO_YIELD);
- PlanExecutor::ExecState state;
- BSONObj obj;
- RecordId loc;
- while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, &loc))) {
- if (hopper.limitReached()) {
- break;
- }
- pair<stdx::unordered_set<RecordId, RecordId::Hasher>::iterator, bool> p =
- thisPass.insert(loc);
- // If a new element was inserted (haven't seen the RecordId before), p.second
- // is true.
- if (p.second) {
- hopper.consider(loc);
- btreeMatches++;
- }
- }
-
- // Non-yielding collection scans from InternalPlanner will never error.
- invariant(PlanExecutor::ADVANCED == state || PlanExecutor::IS_EOF == state);
- }
- }
-
- BSONArrayBuilder arr(result->subarrayStart("results"));
- int num = hopper.appendResultsTo(&arr);
- arr.done();
-
- {
- BSONObjBuilder b(result->subobjStart("stats"));
- b.append("time", t.millis());
- b.appendNumber("btreeMatches", btreeMatches);
- b.append("n", num);
- b.done();
- }
-}
-
} // namespace mongo
diff --git a/src/mongo/db/index/haystack_access_method.h b/src/mongo/db/index/haystack_access_method.h
index f7e92c0ceab..0507887132d 100644
--- a/src/mongo/db/index/haystack_access_method.h
+++ b/src/mongo/db/index/haystack_access_method.h
@@ -59,16 +59,6 @@ class HaystackAccessMethod : public AbstractIndexAccessMethod {
public:
HaystackAccessMethod(IndexCatalogEntry* btreeState, std::unique_ptr<SortedDataInterface> btree);
-protected:
- friend class GeoHaystackSearchCommand;
- void searchCommand(OperationContext* opCtx,
- Collection* collection,
- const BSONObj& nearObj,
- double maxDistance,
- const BSONObj& search,
- BSONObjBuilder* result,
- unsigned limit) const;
-
private:
/**
* Fills 'keys' with the keys that should be generated for 'obj' on this index.
@@ -87,6 +77,8 @@ private:
std::string _geoField;
std::vector<std::string> _otherFields;
double _bucketSize;
+
+ friend class GeoHaystackSearchCommand;
};
} // namespace mongo
diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h
index fac28550154..b78e91e8e78 100644
--- a/src/mongo/db/index/index_access_method.h
+++ b/src/mongo/db/index/index_access_method.h
@@ -339,29 +339,6 @@ public:
const MultikeyPaths& multikeyPaths) const = 0;
/**
- * Returns the intersection of 'fields' and the set of multikey metadata paths stored in the
- * index. Only index types which can store metadata describing an arbitrarily large set of
- * multikey paths need to override this method. Statistics reporting index seeks and keys
- * examined are written to 'stats'.
- */
- virtual std::set<FieldRef> getMultikeyPathSet(OperationContext*,
- const stdx::unordered_set<std::string>& fields,
- MultikeyMetadataAccessStats* stats) const {
- return {};
- }
-
- /**
- * Returns the set of all paths for which the index has multikey metadata keys. Only index types
- * which can store metadata describing an arbitrarily large set of multikey paths need to
- * override this method. Statistics reporting index seeks and keys examined are written to
- * 'stats'.
- */
- virtual std::set<FieldRef> getMultikeyPathSet(OperationContext* opCtx,
- MultikeyMetadataAccessStats* stats) const {
- return {};
- }
-
- /**
* Provides direct access to the SortedDataInterface. This should not be used to insert
* documents into an index, except for testing purposes.
*/
diff --git a/src/mongo/db/index/wildcard_access_method.cpp b/src/mongo/db/index/wildcard_access_method.cpp
index d03dd33f175..0d2771b5cef 100644
--- a/src/mongo/db/index/wildcard_access_method.cpp
+++ b/src/mongo/db/index/wildcard_access_method.cpp
@@ -32,7 +32,6 @@
#include "mongo/db/index/wildcard_access_method.h"
#include "mongo/db/catalog/index_catalog_entry.h"
-#include "mongo/db/concurrency/write_conflict_exception.h"
#include "mongo/db/query/index_bounds_builder.h"
namespace mongo {
@@ -61,200 +60,4 @@ void WildcardAccessMethod::doGetKeys(SharedBufferFragmentBuilder& pooledBufferBu
boost::optional<RecordId> id) const {
_keyGen.generateKeys(pooledBufferBuilder, obj, keys, multikeyMetadataKeys, id);
}
-
-FieldRef WildcardAccessMethod::extractMultikeyPathFromIndexKey(const IndexKeyEntry& entry) {
- invariant(entry.loc.isReserved());
- invariant(entry.loc.repr() ==
- static_cast<int64_t>(RecordId::ReservedId::kWildcardMultikeyMetadataId));
-
- // Validate that the first piece of the key is the integer 1.
- BSONObjIterator iter(entry.key);
- invariant(iter.more());
- const auto firstElem = iter.next();
- invariant(firstElem.isNumber());
- invariant(firstElem.numberInt() == 1);
- invariant(iter.more());
-
- // Extract the path from the second piece of the key.
- const auto secondElem = iter.next();
- invariant(!iter.more());
- invariant(secondElem.type() == BSONType::String);
-
- return FieldRef(secondElem.valueStringData());
-}
-
-/**
- * Retrieves from the index the set of multikey path metadata keys bounded by 'indexBounds'. Returns
- * the set of multikey paths represented by the keys.
- */
-std::set<FieldRef> WildcardAccessMethod::_getMultikeyPathSet(
- OperationContext* opCtx,
- const IndexBounds& indexBounds,
- MultikeyMetadataAccessStats* stats) const {
- return writeConflictRetry(
- opCtx, "wildcard multikey path retrieval", "", [&]() -> std::set<FieldRef> {
- stats->numSeeks = 0;
- stats->keysExamined = 0;
- auto cursor = newCursor(opCtx);
-
- constexpr int kForward = 1;
- const auto keyPattern = BSON("" << 1 << "" << 1);
- IndexBoundsChecker checker(&indexBounds, keyPattern, kForward);
- IndexSeekPoint seekPoint;
- if (!checker.getStartSeekPoint(&seekPoint)) {
- return {};
- }
-
- std::set<FieldRef> multikeyPaths{};
- auto entry = cursor->seek(IndexEntryComparison::makeKeyStringFromSeekPointForSeek(
- seekPoint,
- getSortedDataInterface()->getKeyStringVersion(),
- getSortedDataInterface()->getOrdering(),
- kForward));
-
-
- ++stats->numSeeks;
- while (entry) {
- ++stats->keysExamined;
-
- switch (checker.checkKey(entry->key, &seekPoint)) {
- case IndexBoundsChecker::VALID:
- multikeyPaths.emplace(extractMultikeyPathFromIndexKey(*entry));
- entry = cursor->next();
- break;
-
- case IndexBoundsChecker::MUST_ADVANCE:
- ++stats->numSeeks;
- entry =
- cursor->seek(IndexEntryComparison::makeKeyStringFromSeekPointForSeek(
- seekPoint,
- getSortedDataInterface()->getKeyStringVersion(),
- getSortedDataInterface()->getOrdering(),
- kForward));
-
- break;
-
- case IndexBoundsChecker::DONE:
- entry = boost::none;
- break;
-
- default:
- MONGO_UNREACHABLE;
- }
- }
-
- return multikeyPaths;
- });
-}
-
-
-std::vector<Interval> WildcardAccessMethod::getMultikeyPathIndexIntervalsForField(FieldRef field) {
- std::vector<Interval> intervals;
-
- size_t pointIntervalPrefixParts = field.numParts();
- const size_t skipFirstFieldElement = 1;
- const auto numericPathComponents = field.getNumericPathComponents(skipFirstFieldElement);
- const auto hasNumericPathComponent = !numericPathComponents.empty();
-
- if (hasNumericPathComponent) {
- pointIntervalPrefixParts = *numericPathComponents.begin();
- invariant(pointIntervalPrefixParts > 0);
- }
-
- constexpr bool inclusive = true;
- constexpr bool exclusive = false;
-
- // Add a point interval for each path up to the first numeric path component. Field "a.b.c"
- // would produce point intervals ["a", "a"], ["a.b", "a.b"] and ["a.b.c", "a.b.c"]. Field
- // "a.b.0.c" would produce point intervals ["a", "a"] and ["a.b", "a.b"].
- for (size_t i = 1; i <= pointIntervalPrefixParts; ++i) {
- auto multikeyPath = field.dottedSubstring(0, i);
- intervals.push_back(IndexBoundsBuilder::makePointInterval(multikeyPath));
- }
-
- // If "field" contains a numeric path component then we add a range covering all subfields
- // of the non-numeric prefix.
- if (hasNumericPathComponent) {
- auto rangeBase = field.dottedSubstring(0, pointIntervalPrefixParts);
- StringBuilder multikeyPathStart;
- multikeyPathStart << rangeBase << ".";
- StringBuilder multikeyPathEnd;
- multikeyPathEnd << rangeBase << static_cast<char>('.' + 1);
-
- intervals.emplace_back(BSON("" << multikeyPathStart.str() << "" << multikeyPathEnd.str()),
- inclusive,
- exclusive);
- }
-
- return intervals;
-}
-
-std::set<FieldRef> WildcardAccessMethod::getMultikeyPathSet(
- OperationContext* opCtx,
- const stdx::unordered_set<std::string>& fieldSet,
- MultikeyMetadataAccessStats* stats) const {
- invariant(stats);
- IndexBounds indexBounds;
-
- // Multikey metadata keys are stored with the number "1" in the first position of the index to
- // differentiate them from user-data keys, which contain a string representing the path.
- OrderedIntervalList multikeyPathFlagOil;
- multikeyPathFlagOil.intervals.push_back(IndexBoundsBuilder::makePointInterval(BSON("" << 1)));
- indexBounds.fields.push_back(std::move(multikeyPathFlagOil));
-
- OrderedIntervalList fieldNameOil;
-
- for (const auto& field : fieldSet) {
- auto intervals = getMultikeyPathIndexIntervalsForField(FieldRef(field));
- fieldNameOil.intervals.insert(fieldNameOil.intervals.end(),
- std::make_move_iterator(intervals.begin()),
- std::make_move_iterator(intervals.end()));
- }
-
- // IndexBoundsBuilder::unionize() sorts the OrderedIntervalList allowing for in order index
- // traversal.
- IndexBoundsBuilder::unionize(&fieldNameOil);
- indexBounds.fields.push_back(std::move(fieldNameOil));
-
- return _getMultikeyPathSet(opCtx, indexBounds, stats);
-}
-
-std::set<FieldRef> WildcardAccessMethod::getMultikeyPathSet(
- OperationContext* opCtx, MultikeyMetadataAccessStats* stats) const {
- return writeConflictRetry(opCtx, "wildcard multikey path retrieval", "", [&]() {
- invariant(stats);
- stats->numSeeks = 0;
- stats->keysExamined = 0;
-
- auto cursor = newCursor(opCtx);
-
- // All of the keys storing multikeyness metadata are prefixed by a value of 1. Establish
- // an index cursor which will scan this range.
- const BSONObj metadataKeyRangeBegin = BSON("" << 1 << "" << MINKEY);
- const BSONObj metadataKeyRangeEnd = BSON("" << 1 << "" << MAXKEY);
-
- constexpr bool inclusive = true;
- cursor->setEndPosition(metadataKeyRangeEnd, inclusive);
-
- auto keyStringForSeek = IndexEntryComparison::makeKeyStringFromBSONKeyForSeek(
- metadataKeyRangeBegin,
- getSortedDataInterface()->getKeyStringVersion(),
- getSortedDataInterface()->getOrdering(),
- true, /* forward */
- inclusive);
- auto entry = cursor->seek(keyStringForSeek);
- ++stats->numSeeks;
-
- // Iterate the cursor, copying the multikey paths into an in-memory set.
- std::set<FieldRef> multikeyPaths{};
- while (entry) {
- ++stats->keysExamined;
- multikeyPaths.emplace(WildcardAccessMethod::extractMultikeyPathFromIndexKey(*entry));
-
- entry = cursor->next();
- }
-
- return multikeyPaths;
- });
-}
} // namespace mongo
diff --git a/src/mongo/db/index/wildcard_access_method.h b/src/mongo/db/index/wildcard_access_method.h
index fe49c3fcc42..b4d1bdbdb34 100644
--- a/src/mongo/db/index/wildcard_access_method.h
+++ b/src/mongo/db/index/wildcard_access_method.h
@@ -45,17 +45,6 @@ namespace mongo {
*/
class WildcardAccessMethod final : public AbstractIndexAccessMethod {
public:
- /**
- * Returns an exact set or super-set of the bounds required to fetch the multikey metadata keys
- * relevant to 'field'.
- */
- static std::vector<Interval> getMultikeyPathIndexIntervalsForField(FieldRef field);
-
- /**
- * Extracts the multikey path from a metadata key stored within a wildcard index.
- */
- static FieldRef extractMultikeyPathFromIndexKey(const IndexKeyEntry& entry);
-
WildcardAccessMethod(IndexCatalogEntry* wildcardState,
std::unique_ptr<SortedDataInterface> btree);
@@ -77,21 +66,6 @@ public:
return _keyGen.getWildcardProjection();
}
- /**
- * Returns the intersection of 'fieldSet' and the set of paths for which the $** has multikey
- * metadata keys.
- */
- std::set<FieldRef> getMultikeyPathSet(OperationContext*,
- const stdx::unordered_set<std::string>& fieldSet,
- MultikeyMetadataAccessStats* stats) const final;
-
-
- /**
- * Returns the entire set of paths for which the $** has multikey metadata keys.
- */
- std::set<FieldRef> getMultikeyPathSet(OperationContext* opCtx,
- MultikeyMetadataAccessStats* stats) const final;
-
private:
void doGetKeys(SharedBufferFragmentBuilder& pooledBufferBuilder,
const BSONObj& obj,
@@ -101,10 +75,6 @@ private:
MultikeyPaths* multikeyPaths,
boost::optional<RecordId> id) const final;
- std::set<FieldRef> _getMultikeyPathSet(OperationContext* opCtx,
- const IndexBounds& indexBounds,
- MultikeyMetadataAccessStats* stats) const;
-
const WildcardKeyGenerator _keyGen;
};
} // namespace mongo
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index f72ae1f898a..b5d0a2849ca 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -87,6 +87,7 @@
#include "mongo/db/query/sbe_sub_planner.h"
#include "mongo/db/query/stage_builder_util.h"
#include "mongo/db/query/util/make_data_structure.h"
+#include "mongo/db/query/wildcard_multikey_paths.h"
#include "mongo/db/repl/optime.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/s/collection_sharding_state.h"
@@ -197,8 +198,8 @@ IndexEntry indexEntryFromIndexCatalogEntry(OperationContext* opCtx,
const WildcardProjection* wildcardProjection = nullptr;
std::set<FieldRef> multikeyPathSet;
if (desc->getIndexType() == IndexType::INDEX_WILDCARD) {
- wildcardProjection =
- static_cast<const WildcardAccessMethod*>(accessMethod)->getWildcardProjection();
+ auto wam = static_cast<const WildcardAccessMethod*>(accessMethod);
+ wildcardProjection = wam->getWildcardProjection();
if (isMultikey) {
MultikeyMetadataAccessStats mkAccessStats;
@@ -209,9 +210,9 @@ IndexEntry indexEntryFromIndexCatalogEntry(OperationContext* opCtx,
wildcardProjection->exec(), fields);
multikeyPathSet =
- accessMethod->getMultikeyPathSet(opCtx, projectedFields, &mkAccessStats);
+ getWildcardMultikeyPathSet(wam, opCtx, projectedFields, &mkAccessStats);
} else {
- multikeyPathSet = accessMethod->getMultikeyPathSet(opCtx, &mkAccessStats);
+ multikeyPathSet = getWildcardMultikeyPathSet(wam, opCtx, &mkAccessStats);
}
LOGV2_DEBUG(20920,
diff --git a/src/mongo/db/query/wildcard_multikey_paths.cpp b/src/mongo/db/query/wildcard_multikey_paths.cpp
new file mode 100644
index 00000000000..746925cf7ff
--- /dev/null
+++ b/src/mongo/db/query/wildcard_multikey_paths.cpp
@@ -0,0 +1,239 @@
+/**
+ * Copyright (C) 2018-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/query/wildcard_multikey_paths.h"
+
+#include "mongo/db/concurrency/write_conflict_exception.h"
+#include "mongo/db/index/wildcard_access_method.h"
+#include "mongo/db/query/index_bounds_builder.h"
+
+namespace mongo {
+
+/**
+ * Extracts the multikey path from a metadata key stored within a wildcard index.
+ */
+static FieldRef extractMultikeyPathFromIndexKey(const IndexKeyEntry& entry) {
+ invariant(entry.loc.isReserved());
+ invariant(entry.loc.repr() ==
+ static_cast<int64_t>(RecordId::ReservedId::kWildcardMultikeyMetadataId));
+
+ // Validate that the first piece of the key is the integer 1.
+ BSONObjIterator iter(entry.key);
+ invariant(iter.more());
+ const auto firstElem = iter.next();
+ invariant(firstElem.isNumber());
+ invariant(firstElem.numberInt() == 1);
+ invariant(iter.more());
+
+ // Extract the path from the second piece of the key.
+ const auto secondElem = iter.next();
+ invariant(!iter.more());
+ invariant(secondElem.type() == BSONType::String);
+
+ return FieldRef(secondElem.valueStringData());
+}
+
+/**
+ * Retrieves from the wildcard index the set of multikey path metadata keys bounded by
+ * 'indexBounds'. Returns the set of multikey paths represented by the keys.
+ */
+static std::set<FieldRef> getWildcardMultikeyPathSetHelper(const WildcardAccessMethod* wam,
+ OperationContext* opCtx,
+ const IndexBounds& indexBounds,
+ MultikeyMetadataAccessStats* stats) {
+ return writeConflictRetry(
+ opCtx, "wildcard multikey path retrieval", "", [&]() -> std::set<FieldRef> {
+ stats->numSeeks = 0;
+ stats->keysExamined = 0;
+ auto cursor = wam->newCursor(opCtx);
+
+ constexpr int kForward = 1;
+ const auto keyPattern = BSON("" << 1 << "" << 1);
+ IndexBoundsChecker checker(&indexBounds, keyPattern, kForward);
+ IndexSeekPoint seekPoint;
+ if (!checker.getStartSeekPoint(&seekPoint)) {
+ return {};
+ }
+
+ std::set<FieldRef> multikeyPaths{};
+ auto entry = cursor->seek(IndexEntryComparison::makeKeyStringFromSeekPointForSeek(
+ seekPoint,
+ wam->getSortedDataInterface()->getKeyStringVersion(),
+ wam->getSortedDataInterface()->getOrdering(),
+ kForward));
+
+
+ ++stats->numSeeks;
+ while (entry) {
+ ++stats->keysExamined;
+
+ switch (checker.checkKey(entry->key, &seekPoint)) {
+ case IndexBoundsChecker::VALID:
+ multikeyPaths.emplace(extractMultikeyPathFromIndexKey(*entry));
+ entry = cursor->next();
+ break;
+
+ case IndexBoundsChecker::MUST_ADVANCE:
+ ++stats->numSeeks;
+ entry =
+ cursor->seek(IndexEntryComparison::makeKeyStringFromSeekPointForSeek(
+ seekPoint,
+ wam->getSortedDataInterface()->getKeyStringVersion(),
+ wam->getSortedDataInterface()->getOrdering(),
+ kForward));
+
+ break;
+
+ case IndexBoundsChecker::DONE:
+ entry = boost::none;
+ break;
+
+ default:
+ MONGO_UNREACHABLE;
+ }
+ }
+
+ return multikeyPaths;
+ });
+}
+
+std::vector<Interval> getMultikeyPathIndexIntervalsForField(FieldRef field) {
+ std::vector<Interval> intervals;
+
+ size_t pointIntervalPrefixParts = field.numParts();
+ const size_t skipFirstFieldElement = 1;
+ const auto numericPathComponents = field.getNumericPathComponents(skipFirstFieldElement);
+ const auto hasNumericPathComponent = !numericPathComponents.empty();
+
+ if (hasNumericPathComponent) {
+ pointIntervalPrefixParts = *numericPathComponents.begin();
+ invariant(pointIntervalPrefixParts > 0);
+ }
+
+ constexpr bool inclusive = true;
+ constexpr bool exclusive = false;
+
+ // Add a point interval for each path up to the first numeric path component. Field "a.b.c"
+ // would produce point intervals ["a", "a"], ["a.b", "a.b"] and ["a.b.c", "a.b.c"]. Field
+ // "a.b.0.c" would produce point intervals ["a", "a"] and ["a.b", "a.b"].
+ for (size_t i = 1; i <= pointIntervalPrefixParts; ++i) {
+ auto multikeyPath = field.dottedSubstring(0, i);
+ intervals.push_back(IndexBoundsBuilder::makePointInterval(multikeyPath));
+ }
+
+ // If "field" contains a numeric path component then we add a range covering all subfields
+ // of the non-numeric prefix.
+ if (hasNumericPathComponent) {
+ auto rangeBase = field.dottedSubstring(0, pointIntervalPrefixParts);
+ StringBuilder multikeyPathStart;
+ multikeyPathStart << rangeBase << ".";
+ StringBuilder multikeyPathEnd;
+ multikeyPathEnd << rangeBase << static_cast<char>('.' + 1);
+
+ intervals.emplace_back(BSON("" << multikeyPathStart.str() << "" << multikeyPathEnd.str()),
+ inclusive,
+ exclusive);
+ }
+
+ return intervals;
+}
+
+std::set<FieldRef> getWildcardMultikeyPathSet(const WildcardAccessMethod* wam,
+ OperationContext* opCtx,
+ const stdx::unordered_set<std::string>& fieldSet,
+ MultikeyMetadataAccessStats* stats) {
+ invariant(stats);
+ IndexBounds indexBounds;
+
+ // Multikey metadata keys are stored with the number "1" in the first position of the index to
+ // differentiate them from user-data keys, which contain a string representing the path.
+ OrderedIntervalList multikeyPathFlagOil;
+ multikeyPathFlagOil.intervals.push_back(IndexBoundsBuilder::makePointInterval(BSON("" << 1)));
+ indexBounds.fields.push_back(std::move(multikeyPathFlagOil));
+
+ OrderedIntervalList fieldNameOil;
+
+ for (const auto& field : fieldSet) {
+ auto intervals = getMultikeyPathIndexIntervalsForField(FieldRef(field));
+ fieldNameOil.intervals.insert(fieldNameOil.intervals.end(),
+ std::make_move_iterator(intervals.begin()),
+ std::make_move_iterator(intervals.end()));
+ }
+
+ // IndexBoundsBuilder::unionize() sorts the OrderedIntervalList allowing for in order index
+ // traversal.
+ IndexBoundsBuilder::unionize(&fieldNameOil);
+ indexBounds.fields.push_back(std::move(fieldNameOil));
+
+ return getWildcardMultikeyPathSetHelper(wam, opCtx, indexBounds, stats);
+}
+
+std::set<FieldRef> getWildcardMultikeyPathSet(const WildcardAccessMethod* wam,
+ OperationContext* opCtx,
+ MultikeyMetadataAccessStats* stats) {
+ return writeConflictRetry(opCtx, "wildcard multikey path retrieval", "", [&]() {
+ invariant(stats);
+ stats->numSeeks = 0;
+ stats->keysExamined = 0;
+
+ auto cursor = wam->newCursor(opCtx);
+
+ // All of the keys storing multikeyness metadata are prefixed by a value of 1. Establish
+ // an index cursor which will scan this range.
+ const BSONObj metadataKeyRangeBegin = BSON("" << 1 << "" << MINKEY);
+ const BSONObj metadataKeyRangeEnd = BSON("" << 1 << "" << MAXKEY);
+
+ constexpr bool inclusive = true;
+ cursor->setEndPosition(metadataKeyRangeEnd, inclusive);
+
+ auto keyStringForSeek = IndexEntryComparison::makeKeyStringFromBSONKeyForSeek(
+ metadataKeyRangeBegin,
+ wam->getSortedDataInterface()->getKeyStringVersion(),
+ wam->getSortedDataInterface()->getOrdering(),
+ true, /* forward */
+ inclusive);
+ auto entry = cursor->seek(keyStringForSeek);
+ ++stats->numSeeks;
+
+ // Iterate the cursor, copying the multikey paths into an in-memory set.
+ std::set<FieldRef> multikeyPaths{};
+ while (entry) {
+ ++stats->keysExamined;
+ multikeyPaths.emplace(extractMultikeyPathFromIndexKey(*entry));
+
+ entry = cursor->next();
+ }
+
+ return multikeyPaths;
+ });
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/query/wildcard_multikey_paths.h b/src/mongo/db/query/wildcard_multikey_paths.h
new file mode 100644
index 00000000000..0bad9f9656f
--- /dev/null
+++ b/src/mongo/db/query/wildcard_multikey_paths.h
@@ -0,0 +1,70 @@
+/**
+ * Copyright (C) 2018-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <set>
+#include <string>
+
+#include "mongo/db/field_ref.h"
+#include "mongo/stdx/unordered_set.h"
+
+namespace mongo {
+
+struct IndexBounds;
+struct IndexKeyEntry;
+struct Interval;
+struct MultikeyMetadataAccessStats;
+class OperationContext;
+class WildcardAccessMethod;
+
+/**
+ * Returns an exact set or super-set of the bounds required to fetch the multikey metadata keys
+ * relevant to 'field'.
+ */
+std::vector<Interval> getMultikeyPathIndexIntervalsForField(FieldRef field);
+
+/**
+ * Returns the intersection of 'fields' and the set of multikey metadata paths stored in the
+ * wildcard index. Statistics reporting index seeks and keys examined are written to 'stats'.
+ */
+std::set<FieldRef> getWildcardMultikeyPathSet(const WildcardAccessMethod* wam,
+ OperationContext* opCtx,
+ const stdx::unordered_set<std::string>& fieldSet,
+ MultikeyMetadataAccessStats* stats);
+
+/**
+ * Returns the set of all paths for which the wildcard index has multikey metadata keys.
+ * Statistics reporting index seeks and keys examined are written to 'stats'.
+ */
+std::set<FieldRef> getWildcardMultikeyPathSet(const WildcardAccessMethod* wam,
+ OperationContext* opCtx,
+ MultikeyMetadataAccessStats* stats);
+
+} // namespace mongo
diff --git a/src/mongo/dbtests/wildcard_access_method_test.cpp b/src/mongo/dbtests/wildcard_access_method_test.cpp
index 2f24954378a..4f12d060582 100644
--- a/src/mongo/dbtests/wildcard_access_method_test.cpp
+++ b/src/mongo/dbtests/wildcard_access_method_test.cpp
@@ -32,6 +32,7 @@
#include "mongo/bson/json.h"
#include "mongo/db/index/wildcard_access_method.h"
#include "mongo/db/query/query_planner_test_lib.h"
+#include "mongo/db/query/wildcard_multikey_paths.h"
#include "mongo/unittest/unittest.h"
namespace mongo {
@@ -43,8 +44,7 @@ void assertCorrectMultikeyMetadataPathBoundsGenerated(StringData field,
OrderedIntervalList orderedIntervalList;
FieldRef fieldRef(field);
- orderedIntervalList.intervals =
- WildcardAccessMethod::getMultikeyPathIndexIntervalsForField(std::move(fieldRef));
+ orderedIntervalList.intervals = getMultikeyPathIndexIntervalsForField(std::move(fieldRef));
indexBounds.fields.push_back(std::move(orderedIntervalList));
diff --git a/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp b/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp
index 6b3ceac8ad2..69e45464355 100644
--- a/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp
+++ b/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp
@@ -34,6 +34,7 @@
#include "mongo/db/catalog/multi_index_block.h"
#include "mongo/db/db_raii.h"
#include "mongo/db/index/wildcard_access_method.h"
+#include "mongo/db/query/wildcard_multikey_paths.h"
#include "mongo/db/repl/storage_interface_impl.h"
#include "mongo/db/storage/sorted_data_interface.h"
#include "mongo/logv2/log.h"
@@ -147,7 +148,9 @@ protected:
auto collection = autoColl.getCollection();
auto indexAccessMethod = getIndex(collection, indexName);
MultikeyMetadataAccessStats stats;
- auto multikeyPathSet = indexAccessMethod->getMultikeyPathSet(opCtx(), &stats);
+ auto wam = dynamic_cast<const WildcardAccessMethod*>(indexAccessMethod);
+ ASSERT(wam != nullptr);
+ auto multikeyPathSet = getWildcardMultikeyPathSet(wam, opCtx(), &stats);
ASSERT(expectedFieldRefs == multikeyPathSet);
}