diff options
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/query/wildcard_multikey_paths.cpp | 239 | ||||
-rw-r--r-- | src/mongo/db/query/wildcard_multikey_paths.h | 70 |
3 files changed, 314 insertions, 4 deletions
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index f72ae1f898a..b5d0a2849ca 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -87,6 +87,7 @@ #include "mongo/db/query/sbe_sub_planner.h" #include "mongo/db/query/stage_builder_util.h" #include "mongo/db/query/util/make_data_structure.h" +#include "mongo/db/query/wildcard_multikey_paths.h" #include "mongo/db/repl/optime.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/s/collection_sharding_state.h" @@ -197,8 +198,8 @@ IndexEntry indexEntryFromIndexCatalogEntry(OperationContext* opCtx, const WildcardProjection* wildcardProjection = nullptr; std::set<FieldRef> multikeyPathSet; if (desc->getIndexType() == IndexType::INDEX_WILDCARD) { - wildcardProjection = - static_cast<const WildcardAccessMethod*>(accessMethod)->getWildcardProjection(); + auto wam = static_cast<const WildcardAccessMethod*>(accessMethod); + wildcardProjection = wam->getWildcardProjection(); if (isMultikey) { MultikeyMetadataAccessStats mkAccessStats; @@ -209,9 +210,9 @@ IndexEntry indexEntryFromIndexCatalogEntry(OperationContext* opCtx, wildcardProjection->exec(), fields); multikeyPathSet = - accessMethod->getMultikeyPathSet(opCtx, projectedFields, &mkAccessStats); + getWildcardMultikeyPathSet(wam, opCtx, projectedFields, &mkAccessStats); } else { - multikeyPathSet = accessMethod->getMultikeyPathSet(opCtx, &mkAccessStats); + multikeyPathSet = getWildcardMultikeyPathSet(wam, opCtx, &mkAccessStats); } LOGV2_DEBUG(20920, diff --git a/src/mongo/db/query/wildcard_multikey_paths.cpp b/src/mongo/db/query/wildcard_multikey_paths.cpp new file mode 100644 index 00000000000..746925cf7ff --- /dev/null +++ b/src/mongo/db/query/wildcard_multikey_paths.cpp @@ -0,0 +1,239 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/query/wildcard_multikey_paths.h" + +#include "mongo/db/concurrency/write_conflict_exception.h" +#include "mongo/db/index/wildcard_access_method.h" +#include "mongo/db/query/index_bounds_builder.h" + +namespace mongo { + +/** + * Extracts the multikey path from a metadata key stored within a wildcard index. + */ +static FieldRef extractMultikeyPathFromIndexKey(const IndexKeyEntry& entry) { + invariant(entry.loc.isReserved()); + invariant(entry.loc.repr() == + static_cast<int64_t>(RecordId::ReservedId::kWildcardMultikeyMetadataId)); + + // Validate that the first piece of the key is the integer 1. + BSONObjIterator iter(entry.key); + invariant(iter.more()); + const auto firstElem = iter.next(); + invariant(firstElem.isNumber()); + invariant(firstElem.numberInt() == 1); + invariant(iter.more()); + + // Extract the path from the second piece of the key. + const auto secondElem = iter.next(); + invariant(!iter.more()); + invariant(secondElem.type() == BSONType::String); + + return FieldRef(secondElem.valueStringData()); +} + +/** + * Retrieves from the wildcard index the set of multikey path metadata keys bounded by + * 'indexBounds'. Returns the set of multikey paths represented by the keys. + */ +static std::set<FieldRef> getWildcardMultikeyPathSetHelper(const WildcardAccessMethod* wam, + OperationContext* opCtx, + const IndexBounds& indexBounds, + MultikeyMetadataAccessStats* stats) { + return writeConflictRetry( + opCtx, "wildcard multikey path retrieval", "", [&]() -> std::set<FieldRef> { + stats->numSeeks = 0; + stats->keysExamined = 0; + auto cursor = wam->newCursor(opCtx); + + constexpr int kForward = 1; + const auto keyPattern = BSON("" << 1 << "" << 1); + IndexBoundsChecker checker(&indexBounds, keyPattern, kForward); + IndexSeekPoint seekPoint; + if (!checker.getStartSeekPoint(&seekPoint)) { + return {}; + } + + std::set<FieldRef> multikeyPaths{}; + auto entry = cursor->seek(IndexEntryComparison::makeKeyStringFromSeekPointForSeek( + seekPoint, + wam->getSortedDataInterface()->getKeyStringVersion(), + wam->getSortedDataInterface()->getOrdering(), + kForward)); + + + ++stats->numSeeks; + while (entry) { + ++stats->keysExamined; + + switch (checker.checkKey(entry->key, &seekPoint)) { + case IndexBoundsChecker::VALID: + multikeyPaths.emplace(extractMultikeyPathFromIndexKey(*entry)); + entry = cursor->next(); + break; + + case IndexBoundsChecker::MUST_ADVANCE: + ++stats->numSeeks; + entry = + cursor->seek(IndexEntryComparison::makeKeyStringFromSeekPointForSeek( + seekPoint, + wam->getSortedDataInterface()->getKeyStringVersion(), + wam->getSortedDataInterface()->getOrdering(), + kForward)); + + break; + + case IndexBoundsChecker::DONE: + entry = boost::none; + break; + + default: + MONGO_UNREACHABLE; + } + } + + return multikeyPaths; + }); +} + +std::vector<Interval> getMultikeyPathIndexIntervalsForField(FieldRef field) { + std::vector<Interval> intervals; + + size_t pointIntervalPrefixParts = field.numParts(); + const size_t skipFirstFieldElement = 1; + const auto numericPathComponents = field.getNumericPathComponents(skipFirstFieldElement); + const auto hasNumericPathComponent = !numericPathComponents.empty(); + + if (hasNumericPathComponent) { + pointIntervalPrefixParts = *numericPathComponents.begin(); + invariant(pointIntervalPrefixParts > 0); + } + + constexpr bool inclusive = true; + constexpr bool exclusive = false; + + // Add a point interval for each path up to the first numeric path component. Field "a.b.c" + // would produce point intervals ["a", "a"], ["a.b", "a.b"] and ["a.b.c", "a.b.c"]. Field + // "a.b.0.c" would produce point intervals ["a", "a"] and ["a.b", "a.b"]. + for (size_t i = 1; i <= pointIntervalPrefixParts; ++i) { + auto multikeyPath = field.dottedSubstring(0, i); + intervals.push_back(IndexBoundsBuilder::makePointInterval(multikeyPath)); + } + + // If "field" contains a numeric path component then we add a range covering all subfields + // of the non-numeric prefix. + if (hasNumericPathComponent) { + auto rangeBase = field.dottedSubstring(0, pointIntervalPrefixParts); + StringBuilder multikeyPathStart; + multikeyPathStart << rangeBase << "."; + StringBuilder multikeyPathEnd; + multikeyPathEnd << rangeBase << static_cast<char>('.' + 1); + + intervals.emplace_back(BSON("" << multikeyPathStart.str() << "" << multikeyPathEnd.str()), + inclusive, + exclusive); + } + + return intervals; +} + +std::set<FieldRef> getWildcardMultikeyPathSet(const WildcardAccessMethod* wam, + OperationContext* opCtx, + const stdx::unordered_set<std::string>& fieldSet, + MultikeyMetadataAccessStats* stats) { + invariant(stats); + IndexBounds indexBounds; + + // Multikey metadata keys are stored with the number "1" in the first position of the index to + // differentiate them from user-data keys, which contain a string representing the path. + OrderedIntervalList multikeyPathFlagOil; + multikeyPathFlagOil.intervals.push_back(IndexBoundsBuilder::makePointInterval(BSON("" << 1))); + indexBounds.fields.push_back(std::move(multikeyPathFlagOil)); + + OrderedIntervalList fieldNameOil; + + for (const auto& field : fieldSet) { + auto intervals = getMultikeyPathIndexIntervalsForField(FieldRef(field)); + fieldNameOil.intervals.insert(fieldNameOil.intervals.end(), + std::make_move_iterator(intervals.begin()), + std::make_move_iterator(intervals.end())); + } + + // IndexBoundsBuilder::unionize() sorts the OrderedIntervalList allowing for in order index + // traversal. + IndexBoundsBuilder::unionize(&fieldNameOil); + indexBounds.fields.push_back(std::move(fieldNameOil)); + + return getWildcardMultikeyPathSetHelper(wam, opCtx, indexBounds, stats); +} + +std::set<FieldRef> getWildcardMultikeyPathSet(const WildcardAccessMethod* wam, + OperationContext* opCtx, + MultikeyMetadataAccessStats* stats) { + return writeConflictRetry(opCtx, "wildcard multikey path retrieval", "", [&]() { + invariant(stats); + stats->numSeeks = 0; + stats->keysExamined = 0; + + auto cursor = wam->newCursor(opCtx); + + // All of the keys storing multikeyness metadata are prefixed by a value of 1. Establish + // an index cursor which will scan this range. + const BSONObj metadataKeyRangeBegin = BSON("" << 1 << "" << MINKEY); + const BSONObj metadataKeyRangeEnd = BSON("" << 1 << "" << MAXKEY); + + constexpr bool inclusive = true; + cursor->setEndPosition(metadataKeyRangeEnd, inclusive); + + auto keyStringForSeek = IndexEntryComparison::makeKeyStringFromBSONKeyForSeek( + metadataKeyRangeBegin, + wam->getSortedDataInterface()->getKeyStringVersion(), + wam->getSortedDataInterface()->getOrdering(), + true, /* forward */ + inclusive); + auto entry = cursor->seek(keyStringForSeek); + ++stats->numSeeks; + + // Iterate the cursor, copying the multikey paths into an in-memory set. + std::set<FieldRef> multikeyPaths{}; + while (entry) { + ++stats->keysExamined; + multikeyPaths.emplace(extractMultikeyPathFromIndexKey(*entry)); + + entry = cursor->next(); + } + + return multikeyPaths; + }); +} + +} // namespace mongo diff --git a/src/mongo/db/query/wildcard_multikey_paths.h b/src/mongo/db/query/wildcard_multikey_paths.h new file mode 100644 index 00000000000..0bad9f9656f --- /dev/null +++ b/src/mongo/db/query/wildcard_multikey_paths.h @@ -0,0 +1,70 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <set> +#include <string> + +#include "mongo/db/field_ref.h" +#include "mongo/stdx/unordered_set.h" + +namespace mongo { + +struct IndexBounds; +struct IndexKeyEntry; +struct Interval; +struct MultikeyMetadataAccessStats; +class OperationContext; +class WildcardAccessMethod; + +/** + * Returns an exact set or super-set of the bounds required to fetch the multikey metadata keys + * relevant to 'field'. + */ +std::vector<Interval> getMultikeyPathIndexIntervalsForField(FieldRef field); + +/** + * Returns the intersection of 'fields' and the set of multikey metadata paths stored in the + * wildcard index. Statistics reporting index seeks and keys examined are written to 'stats'. + */ +std::set<FieldRef> getWildcardMultikeyPathSet(const WildcardAccessMethod* wam, + OperationContext* opCtx, + const stdx::unordered_set<std::string>& fieldSet, + MultikeyMetadataAccessStats* stats); + +/** + * Returns the set of all paths for which the wildcard index has multikey metadata keys. + * Statistics reporting index seeks and keys examined are written to 'stats'. + */ +std::set<FieldRef> getWildcardMultikeyPathSet(const WildcardAccessMethod* wam, + OperationContext* opCtx, + MultikeyMetadataAccessStats* stats); + +} // namespace mongo |