diff options
author | Drew Paroski <drew.paroski@mongodb.com> | 2020-07-14 11:19:06 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-07-15 04:44:54 +0000 |
commit | 86296e8ffdae40d0b006276895940c87926c88b0 (patch) | |
tree | 54038f2fa0174173149db1ef115c50a761198de0 | |
parent | f5926f7c0867bf2726313e8c27be1871c6c16ecb (diff) | |
download | mongo-86296e8ffdae40d0b006276895940c87926c88b0.tar.gz |
SERVER-48736 Fix dependencies between query_exec and index_access_methods
-rw-r--r-- | src/mongo/db/SConscript | 2 | ||||
-rw-r--r-- | src/mongo/db/commands/haystack.cpp | 109 | ||||
-rw-r--r-- | src/mongo/db/index/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/index/haystack_access_method.cpp | 90 | ||||
-rw-r--r-- | src/mongo/db/index/haystack_access_method.h | 12 | ||||
-rw-r--r-- | src/mongo/db/index/index_access_method.h | 23 | ||||
-rw-r--r-- | src/mongo/db/index/wildcard_access_method.cpp | 197 | ||||
-rw-r--r-- | src/mongo/db/index/wildcard_access_method.h | 30 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/query/wildcard_multikey_paths.cpp | 239 | ||||
-rw-r--r-- | src/mongo/db/query/wildcard_multikey_paths.h | 70 | ||||
-rw-r--r-- | src/mongo/dbtests/wildcard_access_method_test.cpp | 4 | ||||
-rw-r--r-- | src/mongo/dbtests/wildcard_multikey_persistence_test.cpp | 5 |
13 files changed, 425 insertions, 366 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 700b5be4105..6c6b8ce2301 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -1134,10 +1134,12 @@ env.Library( 'query/sbe_stage_builder_projection.cpp', 'query/sbe_sub_planner.cpp', 'query/stage_builder_util.cpp', + 'query/wildcard_multikey_paths.cpp', 'run_op_kill_cursors.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/index/index_access_methods', '$BUILD_DIR/mongo/s/common_s', '$BUILD_DIR/mongo/scripting/scripting', '$BUILD_DIR/mongo/util/background_job', diff --git a/src/mongo/db/commands/haystack.cpp b/src/mongo/db/commands/haystack.cpp index 16098313de1..e4af2078f3a 100644 --- a/src/mongo/db/commands/haystack.cpp +++ b/src/mongo/db/commands/haystack.cpp @@ -42,16 +42,18 @@ #include "mongo/db/commands.h" #include "mongo/db/curop.h" #include "mongo/db/db_raii.h" +#include "mongo/db/index/expression_keys_private.h" #include "mongo/db/index/haystack_access_method.h" +#include "mongo/db/index/haystack_access_method_internal.h" #include "mongo/db/index/index_access_method.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/index_names.h" #include "mongo/db/jsobj.h" #include "mongo/db/namespace_string.h" #include "mongo/db/query/find_common.h" +#include "mongo/db/query/internal_plans.h" #include "mongo/logv2/log.h" - /** * Examines all documents in a given radius of a given point. * Returns all documents that match a given search restriction. @@ -106,6 +108,96 @@ public: out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); } + void searchHaystack(const HaystackAccessMethod* ham, + OperationContext* opCtx, + Collection* collection, + const BSONObj& nearObj, + double maxDistance, + const BSONObj& search, + BSONObjBuilder* result, + unsigned limit) { + Timer t; + + LOGV2_DEBUG(20680, + 1, + "SEARCH near:{nearObj} maxDistance:{maxDistance} search: {search}", + "nearObj"_attr = redact(nearObj), + "maxDistance"_attr = maxDistance, + "search"_attr = redact(search)); + int x, y; + { + BSONObjIterator i(nearObj); + x = ExpressionKeysPrivate::hashHaystackElement(i.next(), ham->_bucketSize); + y = ExpressionKeysPrivate::hashHaystackElement(i.next(), ham->_bucketSize); + } + int scale = static_cast<int>(ceil(maxDistance / ham->_bucketSize)); + + GeoHaystackSearchHopper hopper( + opCtx, nearObj, maxDistance, limit, ham->_geoField, collection); + + long long btreeMatches = 0; + + for (int a = -scale; a <= scale && !hopper.limitReached(); ++a) { + for (int b = -scale; b <= scale && !hopper.limitReached(); ++b) { + BSONObjBuilder bb; + bb.append("", ExpressionKeysPrivate::makeHaystackString(x + a, y + b)); + + for (unsigned i = 0; i < ham->_otherFields.size(); i++) { + // See if the non-geo field we're indexing on is in the provided search term. + BSONElement e = dps::extractElementAtPath(search, ham->_otherFields[i]); + if (e.eoo()) + bb.appendNull(""); + else + bb.appendAs(e, ""); + } + + BSONObj key = bb.obj(); + + stdx::unordered_set<RecordId, RecordId::Hasher> thisPass; + + + auto exec = InternalPlanner::indexScan(opCtx, + collection, + ham->_descriptor, + key, + key, + BoundInclusion::kIncludeBothStartAndEndKeys, + PlanYieldPolicy::YieldPolicy::NO_YIELD); + PlanExecutor::ExecState state; + BSONObj obj; + RecordId loc; + while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, &loc))) { + if (hopper.limitReached()) { + break; + } + pair<stdx::unordered_set<RecordId, RecordId::Hasher>::iterator, bool> p = + thisPass.insert(loc); + // If a new element was inserted (haven't seen the RecordId before), p.second + // is true. + if (p.second) { + hopper.consider(loc); + btreeMatches++; + } + } + + // Non-yielding collection scans from InternalPlanner will never error. + invariant(PlanExecutor::ADVANCED == state || PlanExecutor::IS_EOF == state); + } + } + + BSONArrayBuilder arr(result->subarrayStart("results")); + int num = hopper.appendResultsTo(&arr); + arr.done(); + + { + BSONObjBuilder b(result->subobjStart("stats")); + b.append("time", t.millis()); + b.appendNumber("btreeMatches", btreeMatches); + b.append("n", num); + b.done(); + } + } + bool errmsgRun(OperationContext* opCtx, const string& dbname, const BSONObj& cmdObj, @@ -173,13 +265,14 @@ public: const IndexDescriptor* desc = idxs[0]; auto ham = static_cast<const HaystackAccessMethod*>( collection->getIndexCatalog()->getEntry(desc)->accessMethod()); - ham->searchCommand(opCtx, - collection, - nearElt.Obj(), - maxDistance.numberDouble(), - search.Obj(), - &result, - limit); + searchHaystack(ham, + opCtx, + collection, + nearElt.Obj(), + maxDistance.numberDouble(), + search.Obj(), + &result, + limit); return 1; } } nameSearchCommand; diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript index 922242cb618..078bd212e1b 100644 --- a/src/mongo/db/index/SConscript +++ b/src/mongo/db/index/SConscript @@ -158,7 +158,6 @@ env.Library( ], LIBDEPS=[ '$BUILD_DIR/mongo/base', - '$BUILD_DIR/mongo/db/query_exec', 'index_access_method', 'key_generator', ], diff --git a/src/mongo/db/index/haystack_access_method.cpp b/src/mongo/db/index/haystack_access_method.cpp index 45a9b2a9443..7cd26382182 100644 --- a/src/mongo/db/index/haystack_access_method.cpp +++ b/src/mongo/db/index/haystack_access_method.cpp @@ -41,9 +41,7 @@ #include "mongo/db/geo/hash.h" #include "mongo/db/index/expression_keys_private.h" #include "mongo/db/index/expression_params.h" -#include "mongo/db/index/haystack_access_method_internal.h" #include "mongo/db/jsobj.h" -#include "mongo/db/query/internal_plans.h" #include "mongo/logv2/log.h" namespace mongo { @@ -82,92 +80,4 @@ void HaystackAccessMethod::doGetKeys(SharedBufferFragmentBuilder& pooledBufferBu id); } -void HaystackAccessMethod::searchCommand(OperationContext* opCtx, - Collection* collection, - const BSONObj& nearObj, - double maxDistance, - const BSONObj& search, - BSONObjBuilder* result, - unsigned limit) const { - Timer t; - - LOGV2_DEBUG(20680, - 1, - "SEARCH near:{nearObj} maxDistance:{maxDistance} search: {search}", - "nearObj"_attr = redact(nearObj), - "maxDistance"_attr = maxDistance, - "search"_attr = redact(search)); - int x, y; - { - BSONObjIterator i(nearObj); - x = ExpressionKeysPrivate::hashHaystackElement(i.next(), _bucketSize); - y = ExpressionKeysPrivate::hashHaystackElement(i.next(), _bucketSize); - } - int scale = static_cast<int>(ceil(maxDistance / _bucketSize)); - - GeoHaystackSearchHopper hopper(opCtx, nearObj, maxDistance, limit, _geoField, collection); - - long long btreeMatches = 0; - - for (int a = -scale; a <= scale && !hopper.limitReached(); ++a) { - for (int b = -scale; b <= scale && !hopper.limitReached(); ++b) { - BSONObjBuilder bb; - bb.append("", ExpressionKeysPrivate::makeHaystackString(x + a, y + b)); - - for (unsigned i = 0; i < _otherFields.size(); i++) { - // See if the non-geo field we're indexing on is in the provided search term. - BSONElement e = dps::extractElementAtPath(search, _otherFields[i]); - if (e.eoo()) - bb.appendNull(""); - else - bb.appendAs(e, ""); - } - - BSONObj key = bb.obj(); - - stdx::unordered_set<RecordId, RecordId::Hasher> thisPass; - - - auto exec = InternalPlanner::indexScan(opCtx, - collection, - _descriptor, - key, - key, - BoundInclusion::kIncludeBothStartAndEndKeys, - PlanYieldPolicy::YieldPolicy::NO_YIELD); - PlanExecutor::ExecState state; - BSONObj obj; - RecordId loc; - while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, &loc))) { - if (hopper.limitReached()) { - break; - } - pair<stdx::unordered_set<RecordId, RecordId::Hasher>::iterator, bool> p = - thisPass.insert(loc); - // If a new element was inserted (haven't seen the RecordId before), p.second - // is true. - if (p.second) { - hopper.consider(loc); - btreeMatches++; - } - } - - // Non-yielding collection scans from InternalPlanner will never error. - invariant(PlanExecutor::ADVANCED == state || PlanExecutor::IS_EOF == state); - } - } - - BSONArrayBuilder arr(result->subarrayStart("results")); - int num = hopper.appendResultsTo(&arr); - arr.done(); - - { - BSONObjBuilder b(result->subobjStart("stats")); - b.append("time", t.millis()); - b.appendNumber("btreeMatches", btreeMatches); - b.append("n", num); - b.done(); - } -} - } // namespace mongo diff --git a/src/mongo/db/index/haystack_access_method.h b/src/mongo/db/index/haystack_access_method.h index f7e92c0ceab..0507887132d 100644 --- a/src/mongo/db/index/haystack_access_method.h +++ b/src/mongo/db/index/haystack_access_method.h @@ -59,16 +59,6 @@ class HaystackAccessMethod : public AbstractIndexAccessMethod { public: HaystackAccessMethod(IndexCatalogEntry* btreeState, std::unique_ptr<SortedDataInterface> btree); -protected: - friend class GeoHaystackSearchCommand; - void searchCommand(OperationContext* opCtx, - Collection* collection, - const BSONObj& nearObj, - double maxDistance, - const BSONObj& search, - BSONObjBuilder* result, - unsigned limit) const; - private: /** * Fills 'keys' with the keys that should be generated for 'obj' on this index. @@ -87,6 +77,8 @@ private: std::string _geoField; std::vector<std::string> _otherFields; double _bucketSize; + + friend class GeoHaystackSearchCommand; }; } // namespace mongo diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index fac28550154..b78e91e8e78 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -339,29 +339,6 @@ public: const MultikeyPaths& multikeyPaths) const = 0; /** - * Returns the intersection of 'fields' and the set of multikey metadata paths stored in the - * index. Only index types which can store metadata describing an arbitrarily large set of - * multikey paths need to override this method. Statistics reporting index seeks and keys - * examined are written to 'stats'. - */ - virtual std::set<FieldRef> getMultikeyPathSet(OperationContext*, - const stdx::unordered_set<std::string>& fields, - MultikeyMetadataAccessStats* stats) const { - return {}; - } - - /** - * Returns the set of all paths for which the index has multikey metadata keys. Only index types - * which can store metadata describing an arbitrarily large set of multikey paths need to - * override this method. Statistics reporting index seeks and keys examined are written to - * 'stats'. - */ - virtual std::set<FieldRef> getMultikeyPathSet(OperationContext* opCtx, - MultikeyMetadataAccessStats* stats) const { - return {}; - } - - /** * Provides direct access to the SortedDataInterface. This should not be used to insert * documents into an index, except for testing purposes. */ diff --git a/src/mongo/db/index/wildcard_access_method.cpp b/src/mongo/db/index/wildcard_access_method.cpp index d03dd33f175..0d2771b5cef 100644 --- a/src/mongo/db/index/wildcard_access_method.cpp +++ b/src/mongo/db/index/wildcard_access_method.cpp @@ -32,7 +32,6 @@ #include "mongo/db/index/wildcard_access_method.h" #include "mongo/db/catalog/index_catalog_entry.h" -#include "mongo/db/concurrency/write_conflict_exception.h" #include "mongo/db/query/index_bounds_builder.h" namespace mongo { @@ -61,200 +60,4 @@ void WildcardAccessMethod::doGetKeys(SharedBufferFragmentBuilder& pooledBufferBu boost::optional<RecordId> id) const { _keyGen.generateKeys(pooledBufferBuilder, obj, keys, multikeyMetadataKeys, id); } - -FieldRef WildcardAccessMethod::extractMultikeyPathFromIndexKey(const IndexKeyEntry& entry) { - invariant(entry.loc.isReserved()); - invariant(entry.loc.repr() == - static_cast<int64_t>(RecordId::ReservedId::kWildcardMultikeyMetadataId)); - - // Validate that the first piece of the key is the integer 1. - BSONObjIterator iter(entry.key); - invariant(iter.more()); - const auto firstElem = iter.next(); - invariant(firstElem.isNumber()); - invariant(firstElem.numberInt() == 1); - invariant(iter.more()); - - // Extract the path from the second piece of the key. - const auto secondElem = iter.next(); - invariant(!iter.more()); - invariant(secondElem.type() == BSONType::String); - - return FieldRef(secondElem.valueStringData()); -} - -/** - * Retrieves from the index the set of multikey path metadata keys bounded by 'indexBounds'. Returns - * the set of multikey paths represented by the keys. - */ -std::set<FieldRef> WildcardAccessMethod::_getMultikeyPathSet( - OperationContext* opCtx, - const IndexBounds& indexBounds, - MultikeyMetadataAccessStats* stats) const { - return writeConflictRetry( - opCtx, "wildcard multikey path retrieval", "", [&]() -> std::set<FieldRef> { - stats->numSeeks = 0; - stats->keysExamined = 0; - auto cursor = newCursor(opCtx); - - constexpr int kForward = 1; - const auto keyPattern = BSON("" << 1 << "" << 1); - IndexBoundsChecker checker(&indexBounds, keyPattern, kForward); - IndexSeekPoint seekPoint; - if (!checker.getStartSeekPoint(&seekPoint)) { - return {}; - } - - std::set<FieldRef> multikeyPaths{}; - auto entry = cursor->seek(IndexEntryComparison::makeKeyStringFromSeekPointForSeek( - seekPoint, - getSortedDataInterface()->getKeyStringVersion(), - getSortedDataInterface()->getOrdering(), - kForward)); - - - ++stats->numSeeks; - while (entry) { - ++stats->keysExamined; - - switch (checker.checkKey(entry->key, &seekPoint)) { - case IndexBoundsChecker::VALID: - multikeyPaths.emplace(extractMultikeyPathFromIndexKey(*entry)); - entry = cursor->next(); - break; - - case IndexBoundsChecker::MUST_ADVANCE: - ++stats->numSeeks; - entry = - cursor->seek(IndexEntryComparison::makeKeyStringFromSeekPointForSeek( - seekPoint, - getSortedDataInterface()->getKeyStringVersion(), - getSortedDataInterface()->getOrdering(), - kForward)); - - break; - - case IndexBoundsChecker::DONE: - entry = boost::none; - break; - - default: - MONGO_UNREACHABLE; - } - } - - return multikeyPaths; - }); -} - - -std::vector<Interval> WildcardAccessMethod::getMultikeyPathIndexIntervalsForField(FieldRef field) { - std::vector<Interval> intervals; - - size_t pointIntervalPrefixParts = field.numParts(); - const size_t skipFirstFieldElement = 1; - const auto numericPathComponents = field.getNumericPathComponents(skipFirstFieldElement); - const auto hasNumericPathComponent = !numericPathComponents.empty(); - - if (hasNumericPathComponent) { - pointIntervalPrefixParts = *numericPathComponents.begin(); - invariant(pointIntervalPrefixParts > 0); - } - - constexpr bool inclusive = true; - constexpr bool exclusive = false; - - // Add a point interval for each path up to the first numeric path component. Field "a.b.c" - // would produce point intervals ["a", "a"], ["a.b", "a.b"] and ["a.b.c", "a.b.c"]. Field - // "a.b.0.c" would produce point intervals ["a", "a"] and ["a.b", "a.b"]. - for (size_t i = 1; i <= pointIntervalPrefixParts; ++i) { - auto multikeyPath = field.dottedSubstring(0, i); - intervals.push_back(IndexBoundsBuilder::makePointInterval(multikeyPath)); - } - - // If "field" contains a numeric path component then we add a range covering all subfields - // of the non-numeric prefix. - if (hasNumericPathComponent) { - auto rangeBase = field.dottedSubstring(0, pointIntervalPrefixParts); - StringBuilder multikeyPathStart; - multikeyPathStart << rangeBase << "."; - StringBuilder multikeyPathEnd; - multikeyPathEnd << rangeBase << static_cast<char>('.' + 1); - - intervals.emplace_back(BSON("" << multikeyPathStart.str() << "" << multikeyPathEnd.str()), - inclusive, - exclusive); - } - - return intervals; -} - -std::set<FieldRef> WildcardAccessMethod::getMultikeyPathSet( - OperationContext* opCtx, - const stdx::unordered_set<std::string>& fieldSet, - MultikeyMetadataAccessStats* stats) const { - invariant(stats); - IndexBounds indexBounds; - - // Multikey metadata keys are stored with the number "1" in the first position of the index to - // differentiate them from user-data keys, which contain a string representing the path. - OrderedIntervalList multikeyPathFlagOil; - multikeyPathFlagOil.intervals.push_back(IndexBoundsBuilder::makePointInterval(BSON("" << 1))); - indexBounds.fields.push_back(std::move(multikeyPathFlagOil)); - - OrderedIntervalList fieldNameOil; - - for (const auto& field : fieldSet) { - auto intervals = getMultikeyPathIndexIntervalsForField(FieldRef(field)); - fieldNameOil.intervals.insert(fieldNameOil.intervals.end(), - std::make_move_iterator(intervals.begin()), - std::make_move_iterator(intervals.end())); - } - - // IndexBoundsBuilder::unionize() sorts the OrderedIntervalList allowing for in order index - // traversal. - IndexBoundsBuilder::unionize(&fieldNameOil); - indexBounds.fields.push_back(std::move(fieldNameOil)); - - return _getMultikeyPathSet(opCtx, indexBounds, stats); -} - -std::set<FieldRef> WildcardAccessMethod::getMultikeyPathSet( - OperationContext* opCtx, MultikeyMetadataAccessStats* stats) const { - return writeConflictRetry(opCtx, "wildcard multikey path retrieval", "", [&]() { - invariant(stats); - stats->numSeeks = 0; - stats->keysExamined = 0; - - auto cursor = newCursor(opCtx); - - // All of the keys storing multikeyness metadata are prefixed by a value of 1. Establish - // an index cursor which will scan this range. - const BSONObj metadataKeyRangeBegin = BSON("" << 1 << "" << MINKEY); - const BSONObj metadataKeyRangeEnd = BSON("" << 1 << "" << MAXKEY); - - constexpr bool inclusive = true; - cursor->setEndPosition(metadataKeyRangeEnd, inclusive); - - auto keyStringForSeek = IndexEntryComparison::makeKeyStringFromBSONKeyForSeek( - metadataKeyRangeBegin, - getSortedDataInterface()->getKeyStringVersion(), - getSortedDataInterface()->getOrdering(), - true, /* forward */ - inclusive); - auto entry = cursor->seek(keyStringForSeek); - ++stats->numSeeks; - - // Iterate the cursor, copying the multikey paths into an in-memory set. - std::set<FieldRef> multikeyPaths{}; - while (entry) { - ++stats->keysExamined; - multikeyPaths.emplace(WildcardAccessMethod::extractMultikeyPathFromIndexKey(*entry)); - - entry = cursor->next(); - } - - return multikeyPaths; - }); -} } // namespace mongo diff --git a/src/mongo/db/index/wildcard_access_method.h b/src/mongo/db/index/wildcard_access_method.h index fe49c3fcc42..b4d1bdbdb34 100644 --- a/src/mongo/db/index/wildcard_access_method.h +++ b/src/mongo/db/index/wildcard_access_method.h @@ -45,17 +45,6 @@ namespace mongo { */ class WildcardAccessMethod final : public AbstractIndexAccessMethod { public: - /** - * Returns an exact set or super-set of the bounds required to fetch the multikey metadata keys - * relevant to 'field'. - */ - static std::vector<Interval> getMultikeyPathIndexIntervalsForField(FieldRef field); - - /** - * Extracts the multikey path from a metadata key stored within a wildcard index. - */ - static FieldRef extractMultikeyPathFromIndexKey(const IndexKeyEntry& entry); - WildcardAccessMethod(IndexCatalogEntry* wildcardState, std::unique_ptr<SortedDataInterface> btree); @@ -77,21 +66,6 @@ public: return _keyGen.getWildcardProjection(); } - /** - * Returns the intersection of 'fieldSet' and the set of paths for which the $** has multikey - * metadata keys. - */ - std::set<FieldRef> getMultikeyPathSet(OperationContext*, - const stdx::unordered_set<std::string>& fieldSet, - MultikeyMetadataAccessStats* stats) const final; - - - /** - * Returns the entire set of paths for which the $** has multikey metadata keys. - */ - std::set<FieldRef> getMultikeyPathSet(OperationContext* opCtx, - MultikeyMetadataAccessStats* stats) const final; - private: void doGetKeys(SharedBufferFragmentBuilder& pooledBufferBuilder, const BSONObj& obj, @@ -101,10 +75,6 @@ private: MultikeyPaths* multikeyPaths, boost::optional<RecordId> id) const final; - std::set<FieldRef> _getMultikeyPathSet(OperationContext* opCtx, - const IndexBounds& indexBounds, - MultikeyMetadataAccessStats* stats) const; - const WildcardKeyGenerator _keyGen; }; } // namespace mongo diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index f72ae1f898a..b5d0a2849ca 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -87,6 +87,7 @@ #include "mongo/db/query/sbe_sub_planner.h" #include "mongo/db/query/stage_builder_util.h" #include "mongo/db/query/util/make_data_structure.h" +#include "mongo/db/query/wildcard_multikey_paths.h" #include "mongo/db/repl/optime.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/s/collection_sharding_state.h" @@ -197,8 +198,8 @@ IndexEntry indexEntryFromIndexCatalogEntry(OperationContext* opCtx, const WildcardProjection* wildcardProjection = nullptr; std::set<FieldRef> multikeyPathSet; if (desc->getIndexType() == IndexType::INDEX_WILDCARD) { - wildcardProjection = - static_cast<const WildcardAccessMethod*>(accessMethod)->getWildcardProjection(); + auto wam = static_cast<const WildcardAccessMethod*>(accessMethod); + wildcardProjection = wam->getWildcardProjection(); if (isMultikey) { MultikeyMetadataAccessStats mkAccessStats; @@ -209,9 +210,9 @@ IndexEntry indexEntryFromIndexCatalogEntry(OperationContext* opCtx, wildcardProjection->exec(), fields); multikeyPathSet = - accessMethod->getMultikeyPathSet(opCtx, projectedFields, &mkAccessStats); + getWildcardMultikeyPathSet(wam, opCtx, projectedFields, &mkAccessStats); } else { - multikeyPathSet = accessMethod->getMultikeyPathSet(opCtx, &mkAccessStats); + multikeyPathSet = getWildcardMultikeyPathSet(wam, opCtx, &mkAccessStats); } LOGV2_DEBUG(20920, diff --git a/src/mongo/db/query/wildcard_multikey_paths.cpp b/src/mongo/db/query/wildcard_multikey_paths.cpp new file mode 100644 index 00000000000..746925cf7ff --- /dev/null +++ b/src/mongo/db/query/wildcard_multikey_paths.cpp @@ -0,0 +1,239 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/wildcard_multikey_paths.h" + +#include "mongo/db/concurrency/write_conflict_exception.h" +#include "mongo/db/index/wildcard_access_method.h" +#include "mongo/db/query/index_bounds_builder.h" + +namespace mongo { + +/** + * Extracts the multikey path from a metadata key stored within a wildcard index. + */ +static FieldRef extractMultikeyPathFromIndexKey(const IndexKeyEntry& entry) { + invariant(entry.loc.isReserved()); + invariant(entry.loc.repr() == + static_cast<int64_t>(RecordId::ReservedId::kWildcardMultikeyMetadataId)); + + // Validate that the first piece of the key is the integer 1. + BSONObjIterator iter(entry.key); + invariant(iter.more()); + const auto firstElem = iter.next(); + invariant(firstElem.isNumber()); + invariant(firstElem.numberInt() == 1); + invariant(iter.more()); + + // Extract the path from the second piece of the key. + const auto secondElem = iter.next(); + invariant(!iter.more()); + invariant(secondElem.type() == BSONType::String); + + return FieldRef(secondElem.valueStringData()); +} + +/** + * Retrieves from the wildcard index the set of multikey path metadata keys bounded by + * 'indexBounds'. Returns the set of multikey paths represented by the keys. + */ +static std::set<FieldRef> getWildcardMultikeyPathSetHelper(const WildcardAccessMethod* wam, + OperationContext* opCtx, + const IndexBounds& indexBounds, + MultikeyMetadataAccessStats* stats) { + return writeConflictRetry( + opCtx, "wildcard multikey path retrieval", "", [&]() -> std::set<FieldRef> { + stats->numSeeks = 0; + stats->keysExamined = 0; + auto cursor = wam->newCursor(opCtx); + + constexpr int kForward = 1; + const auto keyPattern = BSON("" << 1 << "" << 1); + IndexBoundsChecker checker(&indexBounds, keyPattern, kForward); + IndexSeekPoint seekPoint; + if (!checker.getStartSeekPoint(&seekPoint)) { + return {}; + } + + std::set<FieldRef> multikeyPaths{}; + auto entry = cursor->seek(IndexEntryComparison::makeKeyStringFromSeekPointForSeek( + seekPoint, + wam->getSortedDataInterface()->getKeyStringVersion(), + wam->getSortedDataInterface()->getOrdering(), + kForward)); + + + ++stats->numSeeks; + while (entry) { + ++stats->keysExamined; + + switch (checker.checkKey(entry->key, &seekPoint)) { + case IndexBoundsChecker::VALID: + multikeyPaths.emplace(extractMultikeyPathFromIndexKey(*entry)); + entry = cursor->next(); + break; + + case IndexBoundsChecker::MUST_ADVANCE: + ++stats->numSeeks; + entry = + cursor->seek(IndexEntryComparison::makeKeyStringFromSeekPointForSeek( + seekPoint, + wam->getSortedDataInterface()->getKeyStringVersion(), + wam->getSortedDataInterface()->getOrdering(), + kForward)); + + break; + + case IndexBoundsChecker::DONE: + entry = boost::none; + break; + + default: + MONGO_UNREACHABLE; + } + } + + return multikeyPaths; + }); +} + +std::vector<Interval> getMultikeyPathIndexIntervalsForField(FieldRef field) { + std::vector<Interval> intervals; + + size_t pointIntervalPrefixParts = field.numParts(); + const size_t skipFirstFieldElement = 1; + const auto numericPathComponents = field.getNumericPathComponents(skipFirstFieldElement); + const auto hasNumericPathComponent = !numericPathComponents.empty(); + + if (hasNumericPathComponent) { + pointIntervalPrefixParts = *numericPathComponents.begin(); + invariant(pointIntervalPrefixParts > 0); + } + + constexpr bool inclusive = true; + constexpr bool exclusive = false; + + // Add a point interval for each path up to the first numeric path component. Field "a.b.c" + // would produce point intervals ["a", "a"], ["a.b", "a.b"] and ["a.b.c", "a.b.c"]. Field + // "a.b.0.c" would produce point intervals ["a", "a"] and ["a.b", "a.b"]. + for (size_t i = 1; i <= pointIntervalPrefixParts; ++i) { + auto multikeyPath = field.dottedSubstring(0, i); + intervals.push_back(IndexBoundsBuilder::makePointInterval(multikeyPath)); + } + + // If "field" contains a numeric path component then we add a range covering all subfields + // of the non-numeric prefix. + if (hasNumericPathComponent) { + auto rangeBase = field.dottedSubstring(0, pointIntervalPrefixParts); + StringBuilder multikeyPathStart; + multikeyPathStart << rangeBase << "."; + StringBuilder multikeyPathEnd; + multikeyPathEnd << rangeBase << static_cast<char>('.' + 1); + + intervals.emplace_back(BSON("" << multikeyPathStart.str() << "" << multikeyPathEnd.str()), + inclusive, + exclusive); + } + + return intervals; +} + +std::set<FieldRef> getWildcardMultikeyPathSet(const WildcardAccessMethod* wam, + OperationContext* opCtx, + const stdx::unordered_set<std::string>& fieldSet, + MultikeyMetadataAccessStats* stats) { + invariant(stats); + IndexBounds indexBounds; + + // Multikey metadata keys are stored with the number "1" in the first position of the index to + // differentiate them from user-data keys, which contain a string representing the path. + OrderedIntervalList multikeyPathFlagOil; + multikeyPathFlagOil.intervals.push_back(IndexBoundsBuilder::makePointInterval(BSON("" << 1))); + indexBounds.fields.push_back(std::move(multikeyPathFlagOil)); + + OrderedIntervalList fieldNameOil; + + for (const auto& field : fieldSet) { + auto intervals = getMultikeyPathIndexIntervalsForField(FieldRef(field)); + fieldNameOil.intervals.insert(fieldNameOil.intervals.end(), + std::make_move_iterator(intervals.begin()), + std::make_move_iterator(intervals.end())); + } + + // IndexBoundsBuilder::unionize() sorts the OrderedIntervalList allowing for in order index + // traversal. + IndexBoundsBuilder::unionize(&fieldNameOil); + indexBounds.fields.push_back(std::move(fieldNameOil)); + + return getWildcardMultikeyPathSetHelper(wam, opCtx, indexBounds, stats); +} + +std::set<FieldRef> getWildcardMultikeyPathSet(const WildcardAccessMethod* wam, + OperationContext* opCtx, + MultikeyMetadataAccessStats* stats) { + return writeConflictRetry(opCtx, "wildcard multikey path retrieval", "", [&]() { + invariant(stats); + stats->numSeeks = 0; + stats->keysExamined = 0; + + auto cursor = wam->newCursor(opCtx); + + // All of the keys storing multikeyness metadata are prefixed by a value of 1. Establish + // an index cursor which will scan this range. + const BSONObj metadataKeyRangeBegin = BSON("" << 1 << "" << MINKEY); + const BSONObj metadataKeyRangeEnd = BSON("" << 1 << "" << MAXKEY); + + constexpr bool inclusive = true; + cursor->setEndPosition(metadataKeyRangeEnd, inclusive); + + auto keyStringForSeek = IndexEntryComparison::makeKeyStringFromBSONKeyForSeek( + metadataKeyRangeBegin, + wam->getSortedDataInterface()->getKeyStringVersion(), + wam->getSortedDataInterface()->getOrdering(), + true, /* forward */ + inclusive); + auto entry = cursor->seek(keyStringForSeek); + ++stats->numSeeks; + + // Iterate the cursor, copying the multikey paths into an in-memory set. + std::set<FieldRef> multikeyPaths{}; + while (entry) { + ++stats->keysExamined; + multikeyPaths.emplace(extractMultikeyPathFromIndexKey(*entry)); + + entry = cursor->next(); + } + + return multikeyPaths; + }); +} + +} // namespace mongo diff --git a/src/mongo/db/query/wildcard_multikey_paths.h b/src/mongo/db/query/wildcard_multikey_paths.h new file mode 100644 index 00000000000..0bad9f9656f --- /dev/null +++ b/src/mongo/db/query/wildcard_multikey_paths.h @@ -0,0 +1,70 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <set> +#include <string> + +#include "mongo/db/field_ref.h" +#include "mongo/stdx/unordered_set.h" + +namespace mongo { + +struct IndexBounds; +struct IndexKeyEntry; +struct Interval; +struct MultikeyMetadataAccessStats; +class OperationContext; +class WildcardAccessMethod; + +/** + * Returns an exact set or super-set of the bounds required to fetch the multikey metadata keys + * relevant to 'field'. + */ +std::vector<Interval> getMultikeyPathIndexIntervalsForField(FieldRef field); + +/** + * Returns the intersection of 'fields' and the set of multikey metadata paths stored in the + * wildcard index. Statistics reporting index seeks and keys examined are written to 'stats'. + */ +std::set<FieldRef> getWildcardMultikeyPathSet(const WildcardAccessMethod* wam, + OperationContext* opCtx, + const stdx::unordered_set<std::string>& fieldSet, + MultikeyMetadataAccessStats* stats); + +/** + * Returns the set of all paths for which the wildcard index has multikey metadata keys. + * Statistics reporting index seeks and keys examined are written to 'stats'. + */ +std::set<FieldRef> getWildcardMultikeyPathSet(const WildcardAccessMethod* wam, + OperationContext* opCtx, + MultikeyMetadataAccessStats* stats); + +} // namespace mongo diff --git a/src/mongo/dbtests/wildcard_access_method_test.cpp b/src/mongo/dbtests/wildcard_access_method_test.cpp index 2f24954378a..4f12d060582 100644 --- a/src/mongo/dbtests/wildcard_access_method_test.cpp +++ b/src/mongo/dbtests/wildcard_access_method_test.cpp @@ -32,6 +32,7 @@ #include "mongo/bson/json.h" #include "mongo/db/index/wildcard_access_method.h" #include "mongo/db/query/query_planner_test_lib.h" +#include "mongo/db/query/wildcard_multikey_paths.h" #include "mongo/unittest/unittest.h" namespace mongo { @@ -43,8 +44,7 @@ void assertCorrectMultikeyMetadataPathBoundsGenerated(StringData field, OrderedIntervalList orderedIntervalList; FieldRef fieldRef(field); - orderedIntervalList.intervals = - WildcardAccessMethod::getMultikeyPathIndexIntervalsForField(std::move(fieldRef)); + orderedIntervalList.intervals = getMultikeyPathIndexIntervalsForField(std::move(fieldRef)); indexBounds.fields.push_back(std::move(orderedIntervalList)); diff --git a/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp b/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp index 6b3ceac8ad2..69e45464355 100644 --- a/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp +++ b/src/mongo/dbtests/wildcard_multikey_persistence_test.cpp @@ -34,6 +34,7 @@ #include "mongo/db/catalog/multi_index_block.h" #include "mongo/db/db_raii.h" #include "mongo/db/index/wildcard_access_method.h" +#include "mongo/db/query/wildcard_multikey_paths.h" #include "mongo/db/repl/storage_interface_impl.h" #include "mongo/db/storage/sorted_data_interface.h" #include "mongo/logv2/log.h" @@ -147,7 +148,9 @@ protected: auto collection = autoColl.getCollection(); auto indexAccessMethod = getIndex(collection, indexName); MultikeyMetadataAccessStats stats; - auto multikeyPathSet = indexAccessMethod->getMultikeyPathSet(opCtx(), &stats); + auto wam = dynamic_cast<const WildcardAccessMethod*>(indexAccessMethod); + ASSERT(wam != nullptr); + auto multikeyPathSet = getWildcardMultikeyPathSet(wam, opCtx(), &stats); ASSERT(expectedFieldRefs == multikeyPathSet); } |