From 188f24f0d61b62b9b0601f5b890b4820d7570719 Mon Sep 17 00:00:00 2001 From: Irina Yatsenko Date: Wed, 2 Nov 2022 15:16:52 +0000 Subject: SERVER-70944 precompute field hashes for ExpressionFieldPath --- src/mongo/db/pipeline/expression.cpp | 2 +- src/mongo/db/pipeline/field_path.cpp | 14 +++++++------- src/mongo/db/pipeline/field_path.h | 23 ++++++++++------------- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp index e87a39f85ac..080faffe5c3 100644 --- a/src/mongo/db/pipeline/expression.cpp +++ b/src/mongo/db/pipeline/expression.cpp @@ -2414,7 +2414,7 @@ intrusive_ptr ExpressionFieldPath::createVarFromString( ExpressionFieldPath::ExpressionFieldPath(ExpressionContext* const expCtx, const string& theFieldPath, Variables::Id variable) - : Expression(expCtx), _fieldPath(theFieldPath), _variable(variable) { + : Expression(expCtx), _fieldPath(theFieldPath, true /*precomputeHashes*/), _variable(variable) { const auto varName = theFieldPath.substr(0, theFieldPath.find('.')); tassert(5943201, std::string{"Variable with $$ROOT's id is not $$CURRENT or $$ROOT as expected, " diff --git a/src/mongo/db/pipeline/field_path.cpp b/src/mongo/db/pipeline/field_path.cpp index c860e863512..bdbbbabfee0 100644 --- a/src/mongo/db/pipeline/field_path.cpp +++ b/src/mongo/db/pipeline/field_path.cpp @@ -69,10 +69,8 @@ string FieldPath::getFullyQualifiedPath(StringData prefix, StringData suffix) { return str::stream() << prefix << "." << suffix; } -FieldPath::FieldPath(std::string inputPath) - : _fieldPath(std::move(inputPath)), - _fieldPathDotPosition{string::npos}, - _fieldHash{kHashUninitialized} { +FieldPath::FieldPath(std::string inputPath, bool precomputeHashes) + : _fieldPath(std::move(inputPath)), _fieldPathDotPosition{string::npos} { uassert(40352, "FieldPath cannot be constructed with empty string", !_fieldPath.empty()); uassert(40353, "FieldPath must not end with a '.'.", _fieldPath[_fieldPath.size() - 1] != '.'); @@ -81,19 +79,21 @@ FieldPath::FieldPath(std::string inputPath) size_t startPos = 0; while (string::npos != (dotPos = _fieldPath.find('.', startPos))) { _fieldPathDotPosition.push_back(dotPos); - _fieldHash.push_back(kHashUninitialized); startPos = dotPos + 1; } _fieldPathDotPosition.push_back(_fieldPath.size()); - // Validate the path length and the fields. + // Validate the path length and the fields, and precompute their hashes if requested. const auto pathLength = getPathLength(); uassert(ErrorCodes::Overflow, "FieldPath is too long", pathLength <= BSONDepth::getMaxAllowableDepth()); + _fieldHash.reserve(pathLength); for (size_t i = 0; i < pathLength; ++i) { - uassertValidFieldName(getFieldName(i)); + const auto& fieldName = getFieldName(i); + uassertValidFieldName(fieldName); + _fieldHash.push_back(precomputeHashes ? FieldNameHasher()(fieldName) : kHashUninitialized); } } diff --git a/src/mongo/db/pipeline/field_path.h b/src/mongo/db/pipeline/field_path.h index d2ee93734e7..c216bdbc0a4 100644 --- a/src/mongo/db/pipeline/field_path.h +++ b/src/mongo/db/pipeline/field_path.h @@ -69,9 +69,11 @@ public: * * Field names are validated using uassertValidFieldName(). */ - /* implicit */ FieldPath(std::string inputPath); - /* implicit */ FieldPath(StringData inputPath) : FieldPath(inputPath.toString()) {} - /* implicit */ FieldPath(const char* inputPath) : FieldPath(std::string(inputPath)) {} + /* implicit */ FieldPath(std::string inputPath, bool precomputeHashes = false); + /* implicit */ FieldPath(StringData inputPath, bool precomputeHashes = false) + : FieldPath(inputPath.toString(), precomputeHashes) {} + /* implicit */ FieldPath(const char* inputPath, bool precomputeHashes = false) + : FieldPath(std::string(inputPath), precomputeHashes) {} /** * Returns the number of path elements in the field path. @@ -117,13 +119,8 @@ public: */ HashedFieldName getFieldNameHashed(size_t i) const { dassert(i < getPathLength()); - const auto begin = _fieldPathDotPosition[i] + 1; - const auto end = _fieldPathDotPosition[i + 1]; - StringData fieldName{&_fieldPath[begin], end - begin}; - if (_fieldHash[i] == kHashUninitialized) { - _fieldHash[i] = FieldNameHasher()(fieldName); - } - return HashedFieldName{fieldName, _fieldHash[i]}; + invariant(_fieldHash[i] != kHashUninitialized); + return HashedFieldName{getFieldName(i), _fieldHash[i]}; } /** @@ -177,9 +174,9 @@ private: // lookup. std::vector _fieldPathDotPosition; - // Contains the cached hash value for the field. Will initially be set to 'kHashUninitialized', - // and only generated when it is first retrieved via 'getFieldNameHashed'. - mutable std::vector _fieldHash; + // Contains the hash value for the field names if it was requested when creating this path. + // Otherwise all elements are set to 'kHashUninitialized'. + std::vector _fieldHash; static constexpr std::size_t kHashUninitialized = std::numeric_limits::max(); }; -- cgit v1.2.1