summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMohammad Dashti <mdashti@gmail.com>2021-06-17 00:04:04 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-17 00:41:40 +0000
commit1ebaae342dee93f57005792bb331bf90450ee275 (patch)
tree9bd109b5a6c816c424443a3e314382539d13feb3
parent81e66de93a8cf6d08acf627e036b95b4e3ee64bc (diff)
downloadmongo-1ebaae342dee93f57005792bb331bf90450ee275.tar.gz
SERVER-54953 Improved performance of UpdateIndexData path matching code
Co-authored-by: Ian Boros <ian.boros@mongodb.com> (cherry picked from commit d4418cfb08842d04c910f2cabfcdf6e56e5b5f00)
-rw-r--r--src/mongo/db/update/document_diff_serialization.cpp30
-rw-r--r--src/mongo/db/update/document_diff_serialization.h8
-rw-r--r--src/mongo/db/update_index_data.cpp99
3 files changed, 109 insertions, 28 deletions
diff --git a/src/mongo/db/update/document_diff_serialization.cpp b/src/mongo/db/update/document_diff_serialization.cpp
index 9fac816d460..285da519c48 100644
--- a/src/mongo/db/update/document_diff_serialization.cpp
+++ b/src/mongo/db/update/document_diff_serialization.cpp
@@ -31,6 +31,7 @@
#include "mongo/db/update/document_diff_serialization.h"
+#include <fmt/format.h>
#include <stack>
#include "mongo/util/str.h"
@@ -277,21 +278,38 @@ public:
}
}
+ // +1 for leading char. +1 to round up from digits10.
+ static constexpr size_t fieldNameSize = std::numeric_limits<std::uint64_t>::digits10 + 2;
+ char fieldNameStorage[fieldNameSize];
+
+ auto formatFieldName = [&](char pre, size_t idx) {
+ const char* fieldNameStorageEnd =
+ fmt::format_to(fieldNameStorage, FMT_STRING("{}{}"), pre, idx);
+ return StringData(static_cast<const char*>(fieldNameStorage), fieldNameStorageEnd);
+ };
+
+ // Make sure that 'doc_diff::kUpdateSectionFieldName' is a single character.
+ static_assert(doc_diff::kUpdateSectionFieldName.size() == 1,
+ "doc_diff::kUpdateSectionFieldName should be a single character.");
+
for (; _childIt != _node.getChildren().end(); ++_childIt) {
auto&& [idx, child] = *_childIt;
- auto idxAsStr = std::to_string(idx);
switch (child->type()) {
case (NodeType::kUpdate): {
const auto& valueNode = checked_cast<const UpdateNode&>(*child);
appendElementToBuilder(
- valueNode.elt, doc_diff::kUpdateSectionFieldName + idxAsStr, &_bob);
+ valueNode.elt,
+ formatFieldName(doc_diff::kUpdateSectionFieldName[0], idx),
+ &_bob);
break;
}
case (NodeType::kInsert): {
const auto& valueNode = checked_cast<const InsertNode&>(*child);
appendElementToBuilder(
- valueNode.elt, doc_diff::kUpdateSectionFieldName + idxAsStr, &_bob);
+ valueNode.elt,
+ formatFieldName(doc_diff::kUpdateSectionFieldName[0], idx),
+ &_bob);
break;
}
case (NodeType::kDocumentInsert): {
@@ -303,14 +321,14 @@ public:
++_childIt;
return std::make_unique<DocumentInsertFrame>(
*checked_cast<DocumentInsertionNode*>(child.get()),
- BSONObjBuilder(
- _bob.subobjStart(doc_diff::kUpdateSectionFieldName + idxAsStr)));
+ BSONObjBuilder(_bob.subobjStart(
+ formatFieldName(doc_diff::kUpdateSectionFieldName[0], idx))));
}
case (NodeType::kDocumentSubDiff):
case (NodeType::kArray): {
InternalNode* subNode = checked_cast<InternalNode*>(child.get());
BSONObjBuilder childBuilder = _bob.subobjStart(
- std::string(1, doc_diff::kSubDiffSectionFieldPrefix) + idxAsStr);
+ formatFieldName(doc_diff::kSubDiffSectionFieldPrefix, idx));
++_childIt;
return makeSubNodeFrameHelper(subNode, std::move(childBuilder));
diff --git a/src/mongo/db/update/document_diff_serialization.h b/src/mongo/db/update/document_diff_serialization.h
index 5a1697169e8..a94a481074e 100644
--- a/src/mongo/db/update/document_diff_serialization.h
+++ b/src/mongo/db/update/document_diff_serialization.h
@@ -34,6 +34,7 @@
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/bson/mutable/document.h"
#include "mongo/stdx/variant.h"
+#include "mongo/util/itoa.h"
#include "mongo/util/string_map.h"
#include "mongo/util/visit_helper.h"
@@ -342,10 +343,9 @@ public:
ArrayNode() : InternalNode(doc_diff::kSizeOfEmptyArrayDiffBuilder){};
Node* addChild(size_t idx, std::unique_ptr<Node> node) {
- sizeTracker.addEntry(
- 1 /* modification type */ + 1 +
- (idx ? static_cast<int>(std::log10(idx)) : 0) /* Count the number of digits */,
- node.get());
+ sizeTracker.addEntry(1 /* modification type */ +
+ StringData(ItoA(idx)).size() /* Count the number of digits */,
+ node.get());
auto itr = children.insert({idx, std::move(node)});
invariant(itr.second);
return itr.first->second.get();
diff --git a/src/mongo/db/update_index_data.cpp b/src/mongo/db/update_index_data.cpp
index a4fd5552470..5d1ce33fc34 100644
--- a/src/mongo/db/update_index_data.cpp
+++ b/src/mongo/db/update_index_data.cpp
@@ -32,6 +32,80 @@
namespace mongo {
+namespace {
+enum class NumericPathComponentResult {
+ kNumericOrDollar,
+ kConsecutiveNumbers,
+ kNonNumericOrDollar
+};
+
+NumericPathComponentResult checkNumericOrDollarPathComponent(const FieldRef& path,
+ size_t pathIdx,
+ const StringData& pathComponent) {
+ if (pathComponent == "$"_sd) {
+ return NumericPathComponentResult::kNumericOrDollar;
+ }
+
+ if (FieldRef::isNumericPathComponentLenient(pathComponent)) {
+ // Peek ahead to see if the next component is also all digits. This implies that the
+ // update is attempting to create a numeric field name which would violate the
+ // "ambiguous field name in array" constraint for multi-key indexes. Break early in this
+ // case and conservatively return that this path affects the prefix of the consecutive
+ // numerical path components. For instance, an input such as 'a.0.1.b.c' would return
+ // the canonical index path of 'a'.
+ if ((pathIdx + 1) < path.numParts() &&
+ FieldRef::isNumericPathComponentLenient(path.getPart(pathIdx + 1))) {
+ return NumericPathComponentResult::kConsecutiveNumbers;
+ }
+ return NumericPathComponentResult::kNumericOrDollar;
+ }
+
+ return NumericPathComponentResult::kNonNumericOrDollar;
+}
+
+/**
+ * Checks whether a given Document path overlaps with an indexed path
+ */
+bool pathOverlaps(const FieldRef& path, const FieldRef& indexedPath) {
+ size_t pathIdx = 0;
+ size_t indexedPathIdx = 0;
+
+ while (pathIdx < path.numParts() && indexedPathIdx < indexedPath.numParts()) {
+ auto pathComponent = path.getPart(pathIdx);
+
+ // The first part of the path must always be a valid field name, since it's not possible to
+ // store a top-level array or '$' field name in a document.
+ if (pathIdx > 0) {
+ NumericPathComponentResult res =
+ checkNumericOrDollarPathComponent(path, pathIdx, pathComponent);
+ if (res == NumericPathComponentResult::kNumericOrDollar) {
+ ++pathIdx;
+ continue;
+ }
+ if (res == NumericPathComponentResult::kConsecutiveNumbers) {
+ // This case implies that the update is attempting to create a numeric field name
+ // which would violate the "ambiguous field name in array" constraint for multi-key
+ // indexes. Break early in this case and conservatively return that this path
+ // affects the prefix of the consecutive numerical path components. For instance, an
+ // input path such as 'a.0.1.b.c' would match indexed path of 'a.d'.
+ return true;
+ }
+ }
+
+ StringData indexedPathComponent = indexedPath.getPart(indexedPathIdx);
+ if (pathComponent != indexedPathComponent) {
+ return false;
+ }
+
+ ++pathIdx;
+ ++indexedPathIdx;
+ }
+
+ return true;
+}
+} // namespace
+
+
UpdateIndexData::UpdateIndexData() : _allPathsIndexed(false) {}
void UpdateIndexData::addPath(const FieldRef& path) {
@@ -57,11 +131,10 @@ bool UpdateIndexData::mightBeIndexed(const FieldRef& path) const {
return true;
}
- FieldRef canonicalPath = getCanonicalIndexField(path);
-
- for (const auto& idx : _canonicalPaths) {
- if (_startsWith(canonicalPath, idx) || _startsWith(idx, canonicalPath))
+ for (const auto& indexedPath : _canonicalPaths) {
+ if (pathOverlaps(path, indexedPath)) {
return true;
+ }
}
for (const auto& pathComponent : _pathComponents) {
@@ -89,22 +162,12 @@ FieldRef UpdateIndexData::getCanonicalIndexField(const FieldRef& path) {
for (size_t i = 1; i < path.numParts(); ++i) {
auto pathComponent = path.getPart(i);
- if (pathComponent == "$"_sd) {
+ NumericPathComponentResult res = checkNumericOrDollarPathComponent(path, i, pathComponent);
+ if (res == NumericPathComponentResult::kNumericOrDollar) {
continue;
}
-
- if (FieldRef::isNumericPathComponentLenient(pathComponent)) {
- // Peek ahead to see if the next component is also all digits. This implies that the
- // update is attempting to create a numeric field name which would violate the
- // "ambiguous field name in array" constraint for multi-key indexes. Break early in this
- // case and conservatively return that this path affects the prefix of the consecutive
- // numerical path components. For instance, an input such as 'a.0.1.b.c' would return
- // the canonical index path of 'a'.
- if ((i + 1) < path.numParts() &&
- FieldRef::isNumericPathComponentLenient(path.getPart(i + 1))) {
- break;
- }
- continue;
+ if (res == NumericPathComponentResult::kConsecutiveNumbers) {
+ break;
}
buf.appendPart(pathComponent);