diff options
author | Mohammad Dashti <mdashti@gmail.com> | 2021-06-17 00:04:04 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-06-17 00:41:40 +0000 |
commit | 1ebaae342dee93f57005792bb331bf90450ee275 (patch) | |
tree | 9bd109b5a6c816c424443a3e314382539d13feb3 | |
parent | 81e66de93a8cf6d08acf627e036b95b4e3ee64bc (diff) | |
download | mongo-1ebaae342dee93f57005792bb331bf90450ee275.tar.gz |
SERVER-54953 Improved performance of UpdateIndexData path matching code
Co-authored-by: Ian Boros <ian.boros@mongodb.com>
(cherry picked from commit d4418cfb08842d04c910f2cabfcdf6e56e5b5f00)
-rw-r--r-- | src/mongo/db/update/document_diff_serialization.cpp | 30 | ||||
-rw-r--r-- | src/mongo/db/update/document_diff_serialization.h | 8 | ||||
-rw-r--r-- | src/mongo/db/update_index_data.cpp | 99 |
3 files changed, 109 insertions, 28 deletions
diff --git a/src/mongo/db/update/document_diff_serialization.cpp b/src/mongo/db/update/document_diff_serialization.cpp index 9fac816d460..285da519c48 100644 --- a/src/mongo/db/update/document_diff_serialization.cpp +++ b/src/mongo/db/update/document_diff_serialization.cpp @@ -31,6 +31,7 @@ #include "mongo/db/update/document_diff_serialization.h" +#include <fmt/format.h> #include <stack> #include "mongo/util/str.h" @@ -277,21 +278,38 @@ public: } } + // +1 for leading char. +1 to round up from digits10. + static constexpr size_t fieldNameSize = std::numeric_limits<std::uint64_t>::digits10 + 2; + char fieldNameStorage[fieldNameSize]; + + auto formatFieldName = [&](char pre, size_t idx) { + const char* fieldNameStorageEnd = + fmt::format_to(fieldNameStorage, FMT_STRING("{}{}"), pre, idx); + return StringData(static_cast<const char*>(fieldNameStorage), fieldNameStorageEnd); + }; + + // Make sure that 'doc_diff::kUpdateSectionFieldName' is a single character. + static_assert(doc_diff::kUpdateSectionFieldName.size() == 1, + "doc_diff::kUpdateSectionFieldName should be a single character."); + for (; _childIt != _node.getChildren().end(); ++_childIt) { auto&& [idx, child] = *_childIt; - auto idxAsStr = std::to_string(idx); switch (child->type()) { case (NodeType::kUpdate): { const auto& valueNode = checked_cast<const UpdateNode&>(*child); appendElementToBuilder( - valueNode.elt, doc_diff::kUpdateSectionFieldName + idxAsStr, &_bob); + valueNode.elt, + formatFieldName(doc_diff::kUpdateSectionFieldName[0], idx), + &_bob); break; } case (NodeType::kInsert): { const auto& valueNode = checked_cast<const InsertNode&>(*child); appendElementToBuilder( - valueNode.elt, doc_diff::kUpdateSectionFieldName + idxAsStr, &_bob); + valueNode.elt, + formatFieldName(doc_diff::kUpdateSectionFieldName[0], idx), + &_bob); break; } case (NodeType::kDocumentInsert): { @@ -303,14 +321,14 @@ public: ++_childIt; return std::make_unique<DocumentInsertFrame>( *checked_cast<DocumentInsertionNode*>(child.get()), - BSONObjBuilder( - _bob.subobjStart(doc_diff::kUpdateSectionFieldName + idxAsStr))); + BSONObjBuilder(_bob.subobjStart( + formatFieldName(doc_diff::kUpdateSectionFieldName[0], idx)))); } case (NodeType::kDocumentSubDiff): case (NodeType::kArray): { InternalNode* subNode = checked_cast<InternalNode*>(child.get()); BSONObjBuilder childBuilder = _bob.subobjStart( - std::string(1, doc_diff::kSubDiffSectionFieldPrefix) + idxAsStr); + formatFieldName(doc_diff::kSubDiffSectionFieldPrefix, idx)); ++_childIt; return makeSubNodeFrameHelper(subNode, std::move(childBuilder)); diff --git a/src/mongo/db/update/document_diff_serialization.h b/src/mongo/db/update/document_diff_serialization.h index 5a1697169e8..a94a481074e 100644 --- a/src/mongo/db/update/document_diff_serialization.h +++ b/src/mongo/db/update/document_diff_serialization.h @@ -34,6 +34,7 @@ #include "mongo/bson/bsonobjbuilder.h" #include "mongo/bson/mutable/document.h" #include "mongo/stdx/variant.h" +#include "mongo/util/itoa.h" #include "mongo/util/string_map.h" #include "mongo/util/visit_helper.h" @@ -342,10 +343,9 @@ public: ArrayNode() : InternalNode(doc_diff::kSizeOfEmptyArrayDiffBuilder){}; Node* addChild(size_t idx, std::unique_ptr<Node> node) { - sizeTracker.addEntry( - 1 /* modification type */ + 1 + - (idx ? static_cast<int>(std::log10(idx)) : 0) /* Count the number of digits */, - node.get()); + sizeTracker.addEntry(1 /* modification type */ + + StringData(ItoA(idx)).size() /* Count the number of digits */, + node.get()); auto itr = children.insert({idx, std::move(node)}); invariant(itr.second); return itr.first->second.get(); diff --git a/src/mongo/db/update_index_data.cpp b/src/mongo/db/update_index_data.cpp index a4fd5552470..5d1ce33fc34 100644 --- a/src/mongo/db/update_index_data.cpp +++ b/src/mongo/db/update_index_data.cpp @@ -32,6 +32,80 @@ namespace mongo { +namespace { +enum class NumericPathComponentResult { + kNumericOrDollar, + kConsecutiveNumbers, + kNonNumericOrDollar +}; + +NumericPathComponentResult checkNumericOrDollarPathComponent(const FieldRef& path, + size_t pathIdx, + const StringData& pathComponent) { + if (pathComponent == "$"_sd) { + return NumericPathComponentResult::kNumericOrDollar; + } + + if (FieldRef::isNumericPathComponentLenient(pathComponent)) { + // Peek ahead to see if the next component is also all digits. This implies that the + // update is attempting to create a numeric field name which would violate the + // "ambiguous field name in array" constraint for multi-key indexes. Break early in this + // case and conservatively return that this path affects the prefix of the consecutive + // numerical path components. For instance, an input such as 'a.0.1.b.c' would return + // the canonical index path of 'a'. + if ((pathIdx + 1) < path.numParts() && + FieldRef::isNumericPathComponentLenient(path.getPart(pathIdx + 1))) { + return NumericPathComponentResult::kConsecutiveNumbers; + } + return NumericPathComponentResult::kNumericOrDollar; + } + + return NumericPathComponentResult::kNonNumericOrDollar; +} + +/** + * Checks whether a given Document path overlaps with an indexed path + */ +bool pathOverlaps(const FieldRef& path, const FieldRef& indexedPath) { + size_t pathIdx = 0; + size_t indexedPathIdx = 0; + + while (pathIdx < path.numParts() && indexedPathIdx < indexedPath.numParts()) { + auto pathComponent = path.getPart(pathIdx); + + // The first part of the path must always be a valid field name, since it's not possible to + // store a top-level array or '$' field name in a document. + if (pathIdx > 0) { + NumericPathComponentResult res = + checkNumericOrDollarPathComponent(path, pathIdx, pathComponent); + if (res == NumericPathComponentResult::kNumericOrDollar) { + ++pathIdx; + continue; + } + if (res == NumericPathComponentResult::kConsecutiveNumbers) { + // This case implies that the update is attempting to create a numeric field name + // which would violate the "ambiguous field name in array" constraint for multi-key + // indexes. Break early in this case and conservatively return that this path + // affects the prefix of the consecutive numerical path components. For instance, an + // input path such as 'a.0.1.b.c' would match indexed path of 'a.d'. + return true; + } + } + + StringData indexedPathComponent = indexedPath.getPart(indexedPathIdx); + if (pathComponent != indexedPathComponent) { + return false; + } + + ++pathIdx; + ++indexedPathIdx; + } + + return true; +} +} // namespace + + UpdateIndexData::UpdateIndexData() : _allPathsIndexed(false) {} void UpdateIndexData::addPath(const FieldRef& path) { @@ -57,11 +131,10 @@ bool UpdateIndexData::mightBeIndexed(const FieldRef& path) const { return true; } - FieldRef canonicalPath = getCanonicalIndexField(path); - - for (const auto& idx : _canonicalPaths) { - if (_startsWith(canonicalPath, idx) || _startsWith(idx, canonicalPath)) + for (const auto& indexedPath : _canonicalPaths) { + if (pathOverlaps(path, indexedPath)) { return true; + } } for (const auto& pathComponent : _pathComponents) { @@ -89,22 +162,12 @@ FieldRef UpdateIndexData::getCanonicalIndexField(const FieldRef& path) { for (size_t i = 1; i < path.numParts(); ++i) { auto pathComponent = path.getPart(i); - if (pathComponent == "$"_sd) { + NumericPathComponentResult res = checkNumericOrDollarPathComponent(path, i, pathComponent); + if (res == NumericPathComponentResult::kNumericOrDollar) { continue; } - - if (FieldRef::isNumericPathComponentLenient(pathComponent)) { - // Peek ahead to see if the next component is also all digits. This implies that the - // update is attempting to create a numeric field name which would violate the - // "ambiguous field name in array" constraint for multi-key indexes. Break early in this - // case and conservatively return that this path affects the prefix of the consecutive - // numerical path components. For instance, an input such as 'a.0.1.b.c' would return - // the canonical index path of 'a'. - if ((i + 1) < path.numParts() && - FieldRef::isNumericPathComponentLenient(path.getPart(i + 1))) { - break; - } - continue; + if (res == NumericPathComponentResult::kConsecutiveNumbers) { + break; } buf.appendPart(pathComponent); |