diff options
author | Ian Boros <ian.boros@mongodb.com> | 2020-03-17 14:21:45 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-01 01:29:42 +0000 |
commit | 5557279e9ca9c78fbeb1026fc1a48a5c7c334b71 (patch) | |
tree | 786f50500c09991caed8ea9a2b72538abd4c6a77 | |
parent | 45da9589d4ea5929404e9674718927a6be352b4d (diff) | |
download | mongo-5557279e9ca9c78fbeb1026fc1a48a5c7c334b71.tar.gz |
SERVER-45763 optimize sort key generation
-rw-r--r-- | src/mongo/db/exec/document_value/document.cpp | 62 | ||||
-rw-r--r-- | src/mongo/db/exec/document_value/document.h | 19 | ||||
-rw-r--r-- | src/mongo/db/exec/document_value/document_internal.h | 21 | ||||
-rw-r--r-- | src/mongo/db/exec/document_value/document_value_test.cpp | 179 | ||||
-rw-r--r-- | src/mongo/db/index/sort_key_generator.cpp | 33 | ||||
-rw-r--r-- | src/mongo/util/visit_helper.h | 56 |
6 files changed, 368 insertions, 2 deletions
diff --git a/src/mongo/db/exec/document_value/document.cpp b/src/mongo/db/exec/document_value/document.cpp index a59c755d373..b2f502f378d 100644 --- a/src/mongo/db/exec/document_value/document.cpp +++ b/src/mongo/db/exec/document_value/document.cpp @@ -551,6 +551,68 @@ MutableValue MutableDocument::getNestedField(const vector<Position>& positions) return getNestedFieldHelper(positions, 0); } +namespace { +stdx::variant<BSONElement, Value, Document::TraversesArrayTag, stdx::monostate> +getNestedFieldHelperBSON(BSONElement elt, const FieldPath& fp, size_t level) { + if (level == fp.getPathLength()) { + if (elt.ok()) { + return elt; + } else { + return stdx::monostate{}; + } + } + + if (elt.type() == BSONType::Array) { + return Document::TraversesArrayTag{}; + } else if (elt.type() == BSONType::Object) { + auto subFieldElt = elt.embeddedObject()[fp.getFieldName(level)]; + return getNestedFieldHelperBSON(subFieldElt, fp, level + 1); + } + + // The path continues "past" a scalar, and therefore does not exist. + return stdx::monostate{}; +} +} // namespace + +stdx::variant<BSONElement, Value, Document::TraversesArrayTag, stdx::monostate> +Document::getNestedFieldNonCachingHelper(const FieldPath& dottedField, size_t level) const { + if (!_storage) { + return stdx::monostate{}; + } + + StringData fieldName = dottedField.getFieldName(level); + auto bsonEltOrValue = _storage->getFieldNonCaching(fieldName); + + if (stdx::holds_alternative<BSONElement>(bsonEltOrValue)) { + return getNestedFieldHelperBSON( + stdx::get<BSONElement>(bsonEltOrValue), dottedField, level + 1); + } + + const Value& val = stdx::get<Value>(bsonEltOrValue); + + if (level + 1 == dottedField.getPathLength()) { + if (val.missing()) { + return stdx::monostate{}; + } else { + return val; + } + } + + if (val.getType() == BSONType::Array) { + return Document::TraversesArrayTag{}; + } else if (val.getType() == BSONType::Object) { + return val.getDocument().getNestedFieldNonCachingHelper(dottedField, level + 1); + } + + // The path extends beyond a scalar, so it does not exist. + return stdx::monostate{}; +} + +stdx::variant<BSONElement, Value, Document::TraversesArrayTag, stdx::monostate> +Document::getNestedFieldNonCaching(const FieldPath& dottedField) const { + return getNestedFieldNonCachingHelper(dottedField, 0); +} + static Value getNestedFieldHelper(const Document& doc, const FieldPath& fieldNames, vector<Position>* positions, diff --git a/src/mongo/db/exec/document_value/document.h b/src/mongo/db/exec/document_value/document.h index 7719958330c..7675fa6ba6d 100644 --- a/src/mongo/db/exec/document_value/document.h +++ b/src/mongo/db/exec/document_value/document.h @@ -152,6 +152,21 @@ public: const Value getNestedField(const FieldPath& path, std::vector<Position>* positions = nullptr) const; + /** + * Returns field at given path as either BSONElement or Value, depending on how it is + * stored. If an array is encountered in the middle of the path the TraversesArrayTag is + * returned. + * + * It is possible, however, for the returned BSONElement/Value to be an array if the given path + * ends with an array. For example, the document {a: {b:[1,2]}} and the path "a.b" will return + * a BSONElement or Value for the array [1, 2]. + * + * If the field is not found, stdx::monostate is returned. + */ + struct TraversesArrayTag {}; + stdx::variant<BSONElement, Value, TraversesArrayTag, stdx::monostate> getNestedFieldNonCaching( + const FieldPath& dottedField) const; + // Number of fields in this document. Exp. runtime O(n). size_t computeSize() const { return storage().computeSize(); @@ -331,6 +346,10 @@ private: const DocumentStorage& storage() const { return (_storage ? *_storage : DocumentStorage::emptyDoc()); } + + stdx::variant<BSONElement, Value, TraversesArrayTag, stdx::monostate> + getNestedFieldNonCachingHelper(const FieldPath& dottedField, size_t level) const; + boost::intrusive_ptr<const DocumentStorage> _storage; }; diff --git a/src/mongo/db/exec/document_value/document_internal.h b/src/mongo/db/exec/document_value/document_internal.h index 5cd5d4692c4..d77ac6b5014 100644 --- a/src/mongo/db/exec/document_value/document_internal.h +++ b/src/mongo/db/exec/document_value/document_internal.h @@ -37,6 +37,7 @@ #include "mongo/base/static_assert.h" #include "mongo/db/exec/document_value/document_metadata_fields.h" #include "mongo/db/exec/document_value/value.h" +#include "mongo/stdx/variant.h" #include "mongo/util/intrusive_counter.h" namespace mongo { @@ -352,6 +353,26 @@ public: return getField(pos).val; } + /** + * Given a field name either return a Value if the field resides in the cache, or a BSONElement + * if the field resides in the backing BSON. + */ + stdx::variant<BSONElement, Value> getFieldNonCaching(StringData name) const { + Position pos = findField(name, LookupPolicy::kCacheOnly); + if (pos.found()) { + return {getField(pos).val}; + } + + for (auto&& bsonElement : _bson) { + if (name == bsonElement.fieldNameStringData()) { + return {bsonElement}; + } + } + + // Field not found. Return EOO Value. + return {Value()}; + } + /// Adds a new field with missing Value at the end of the document Value& appendField(StringData name, ValueElement::Kind kind); diff --git a/src/mongo/db/exec/document_value/document_value_test.cpp b/src/mongo/db/exec/document_value/document_value_test.cpp index 2eb26618e24..9c365062625 100644 --- a/src/mongo/db/exec/document_value/document_value_test.cpp +++ b/src/mongo/db/exec/document_value/document_value_test.cpp @@ -175,6 +175,185 @@ TEST(DocumentSerialization, CannotSerializeDocumentThatExceedsDepthLimit) { throwaway.abandon(); } +TEST(DocumentGetFieldNonCaching, UncachedTopLevelFields) { + BSONObj bson = BSON("scalar" << 1 << "array" << BSON_ARRAY(1 << 2 << 3) << "scalar2" << true); + Document document = fromBson(bson); + + // Should be able to get all top level fields without caching. + for (auto&& elt : bson) { + auto valueVariant = document.getNestedFieldNonCaching(elt.fieldNameStringData()); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(valueVariant)); + ASSERT_TRUE(stdx::get<BSONElement>(valueVariant).binaryEqual(elt)); + + // Get the field again to be sure it wasn't cached during the access above. + auto valueVariantAfterAccess = document.getNestedFieldNonCaching(elt.fieldNameStringData()); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(valueVariantAfterAccess)); + } + + // Try to get a top level field which doesn't exist. + auto nonExistentFieldVariant = document.getNestedFieldNonCaching("doesnotexist"); + ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(nonExistentFieldVariant)); + + assertRoundTrips(document); +} + +TEST(DocumentGetFieldNonCaching, CachedTopLevelFields) { + BSONObj bson = BSON("scalar" << 1 << "array" << BSON_ARRAY(1 << 2 << 3) << "scalar2" << true); + Document document = fromBson(bson); + + // Force 'scalar2' to be cached. + ASSERT_FALSE(document["scalar2"].missing()); + + // Attempt to access scalar2 with the non caching accessor. It should be cached already. + { + auto valueVariant = document.getNestedFieldNonCaching("scalar2"); + ASSERT_TRUE(stdx::holds_alternative<Value>(valueVariant)); + ASSERT_EQ(stdx::get<Value>(valueVariant).getBool(), true); + } + + // Attempt to access 'array' with the non caching accessor. It should not be cached. + { + auto valueVariant = document.getNestedFieldNonCaching("array"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(valueVariant)); + auto elt = stdx::get<BSONElement>(valueVariant); + ASSERT_TRUE(bson["array"].binaryEqual(elt)); + } + + // Force 'array' to be cached. + ASSERT_FALSE(document["array"].missing()); + + // Check that 'array' is cached and that we can read it with the non caching accessor. + { + auto valueVariant = document.getNestedFieldNonCaching("array"); + ASSERT_TRUE(stdx::holds_alternative<Value>(valueVariant)); + ASSERT_EQ(stdx::get<Value>(valueVariant).getArrayLength(), 3); + } +} + +TEST(DocumentGetFieldNonCaching, NonArrayDottedPaths) { + BSONObj bson = BSON("address" << BSON("zip" << 123 << "street" + << "foo")); + Document document = fromBson(bson); + + // With no fields cached. + { + // Get a nested field without caching. + auto zipVariant = document.getNestedFieldNonCaching("address.zip"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(zipVariant)); + ASSERT_EQ(stdx::get<BSONElement>(zipVariant).numberInt(), 123); + + // Check that it was not cached. + auto zipVariantAfterAccess = document.getNestedFieldNonCaching("address.zip"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(zipVariantAfterAccess)); + + // Check that the top level field isn't cached after accessing one of its children. + auto addressVariant = document.getNestedFieldNonCaching("address"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(addressVariant)); + + // Get a dotted field which does not exist. + auto nonExistentVariant = document.getNestedFieldNonCaching("address.doesnotexist"); + ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(nonExistentVariant)); + + // Check that the top level field isn't cached after a failed attempt to access one of its + // children. + addressVariant = document.getNestedFieldNonCaching("address"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(addressVariant)); + + // Get a dotted field which extends past a scalar. + auto pathPastScalarVariant = document.getNestedFieldNonCaching("address.zip.doesnotexist"); + ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(pathPastScalarVariant)); + } + + // Now force 'address.street' to be cached. + ASSERT_FALSE(document.getNestedField("address.street").missing()); + + // Check that we get the right value when accessing it with the non caching accessor. + { + // The top level field should be cached. + auto topLevelVariant = document.getNestedFieldNonCaching("address"); + ASSERT_TRUE(stdx::holds_alternative<Value>(topLevelVariant)); + + auto variant = document.getNestedFieldNonCaching("address.street"); + ASSERT_TRUE(stdx::holds_alternative<Value>(variant)); + ASSERT_EQ(stdx::get<Value>(variant).getString(), "foo"); + } + + // Check that the other subfield, 'zip' is not cached. + { + auto variant = document.getNestedFieldNonCaching("address.zip"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(variant)); + } + + // Check that attempting to get a subfield of 'address.street' returns monostate. + { + auto variant = document.getNestedFieldNonCaching("address.street.doesnotexist"); + ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(variant)); + } +} + +TEST(DocumentGetFieldNonCaching, NonArrayLongDottedPath) { + BSONObj bson = BSON("a" << BSON("b" << BSON("c" << BSON("d" << BSON("e" << 1))))); + Document document = fromBson(bson); + + // Not cached. + { + auto variant = document.getNestedFieldNonCaching("a.b.c.d.e"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(variant)); + ASSERT_EQ(stdx::get<BSONElement>(variant).numberInt(), 1); + } + + // Force part of the path to get cached. + ASSERT_FALSE(document.getNestedField("a.b.c").missing()); + + // The function should be able to traverse a path which is part Value and part BSONElement. + { + auto variant = document.getNestedFieldNonCaching("a.b.c.d.e"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(variant)); + ASSERT_EQ(stdx::get<BSONElement>(variant).numberInt(), 1); + } + + // Force the entire path to be cached. + ASSERT_FALSE(document.getNestedField("a.b.c.d.e").missing()); + + { + auto variant = document.getNestedFieldNonCaching("a.b.c.d.e"); + ASSERT_TRUE(stdx::holds_alternative<Value>(variant)); + ASSERT_EQ(stdx::get<Value>(variant).getInt(), 1); + } +} + +TEST(DocumentGetFieldNonCaching, TraverseArray) { + BSONObj bson = + BSON("topLevelArray" << BSON_ARRAY(BSON("foo" << 1) << BSON("foo" << 2) << BSON("foo" << 3)) + << "subObj" << BSON("subArray" << BSON_ARRAY(1 << 2))); + Document document = fromBson(bson); + + auto checkArrayTagIsReturned = [&document]() { + auto topLevelArrayVariant = document.getNestedFieldNonCaching("topLevelArray.foo"); + ASSERT_TRUE(stdx::holds_alternative<Document::TraversesArrayTag>(topLevelArrayVariant)); + + // Attempting to traverse an uncached nested array results in TraversesArrayTag being + // returned. + auto subArrayVariant = document.getNestedFieldNonCaching("subObj.subArray.foobar"); + ASSERT_TRUE(stdx::holds_alternative<Document::TraversesArrayTag>(subArrayVariant)); + }; + + // Check with no fields cached. + checkArrayTagIsReturned(); + + // Force the top level array to be cached. + ASSERT_FALSE(document["topLevelArray"].missing()); + + // Check with one field cached. + checkArrayTagIsReturned(); + + // Force the array that's in a sub object to be cached. + ASSERT_FALSE(document.getNestedField("subObj.subArray").missing()); + + // Check it works with both fields (and the full sub object) cached. + checkArrayTagIsReturned(); +} + /** Add Document fields. */ class AddField { public: diff --git a/src/mongo/db/index/sort_key_generator.cpp b/src/mongo/db/index/sort_key_generator.cpp index 1e9ea326255..8799c58b5f9 100644 --- a/src/mongo/db/index/sort_key_generator.cpp +++ b/src/mongo/db/index/sort_key_generator.cpp @@ -33,6 +33,7 @@ #include "mongo/bson/bsonobj_comparator.h" #include "mongo/db/query/collation/collation_index_key.h" +#include "mongo/util/visit_helper.h" namespace mongo { @@ -219,8 +220,36 @@ StatusWith<Value> SortKeyGenerator::extractKeyPart( Value plainKey; if (patternPart.fieldPath) { invariant(!patternPart.expression); - auto key = - document_path_support::extractElementAlongNonArrayPath(doc, *patternPart.fieldPath); + auto keyVariant = doc.getNestedFieldNonCaching(*patternPart.fieldPath); + + const Status arrayError{ErrorCodes::InternalError, "array along path"}; + auto key = stdx::visit( + visit_helper::Overloaded{ + // In this case, the document has an array along the path given. This means the + // document is ineligible for taking the fast path for index key generation. + [&arrayError](Document::TraversesArrayTag) -> StatusWith<Value> { + return arrayError; + }, + // In this case the field was already in the cache (or may not have existed). + [&arrayError](const Value& val) -> StatusWith<Value> { + // The document may have an array at the given path. + if (val.getType() == BSONType::Array) { + return arrayError; + } + return val; + }, + // In this case the field was in the backing BSON, and not in the cache. + [&arrayError](BSONElement elt) -> StatusWith<Value> { + // The document may have an array at the given path. + if (elt.type() == BSONType::Array) { + return arrayError; + } + return Value(elt); + }, + [](stdx::monostate none) -> StatusWith<Value> { return Value(); }, + }, + keyVariant); + if (!key.isOK()) { return key; } diff --git a/src/mongo/util/visit_helper.h b/src/mongo/util/visit_helper.h new file mode 100644 index 00000000000..435e05ee36f --- /dev/null +++ b/src/mongo/util/visit_helper.h @@ -0,0 +1,56 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +namespace mongo { +namespace visit_helper { + +/** + * This is the "overload pattern" designed to be used with std::visit. Example usage: + * + * auto result = std::visit( + * visit_helper::Overloaded{ + * [](int a) { + * ... + * }, + * [](StringData b) { + * ... + * }, + * }, + * someVariant); + */ +template <class... Ts> +struct Overloaded : Ts... { + using Ts::operator()...; +}; +template <class... Ts> +Overloaded(Ts...)->Overloaded<Ts...>; +} // namespace visit_helper +} // namespace mongo |