diff options
author | Ian Boros <ian.boros@mongodb.com> | 2020-03-17 14:21:45 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-01 01:29:42 +0000 |
commit | 5557279e9ca9c78fbeb1026fc1a48a5c7c334b71 (patch) | |
tree | 786f50500c09991caed8ea9a2b72538abd4c6a77 /src/mongo/db/exec | |
parent | 45da9589d4ea5929404e9674718927a6be352b4d (diff) | |
download | mongo-5557279e9ca9c78fbeb1026fc1a48a5c7c334b71.tar.gz |
SERVER-45763 optimize sort key generation
Diffstat (limited to 'src/mongo/db/exec')
-rw-r--r-- | src/mongo/db/exec/document_value/document.cpp | 62 | ||||
-rw-r--r-- | src/mongo/db/exec/document_value/document.h | 19 | ||||
-rw-r--r-- | src/mongo/db/exec/document_value/document_internal.h | 21 | ||||
-rw-r--r-- | src/mongo/db/exec/document_value/document_value_test.cpp | 179 |
4 files changed, 281 insertions, 0 deletions
diff --git a/src/mongo/db/exec/document_value/document.cpp b/src/mongo/db/exec/document_value/document.cpp index a59c755d373..b2f502f378d 100644 --- a/src/mongo/db/exec/document_value/document.cpp +++ b/src/mongo/db/exec/document_value/document.cpp @@ -551,6 +551,68 @@ MutableValue MutableDocument::getNestedField(const vector<Position>& positions) return getNestedFieldHelper(positions, 0); } +namespace { +stdx::variant<BSONElement, Value, Document::TraversesArrayTag, stdx::monostate> +getNestedFieldHelperBSON(BSONElement elt, const FieldPath& fp, size_t level) { + if (level == fp.getPathLength()) { + if (elt.ok()) { + return elt; + } else { + return stdx::monostate{}; + } + } + + if (elt.type() == BSONType::Array) { + return Document::TraversesArrayTag{}; + } else if (elt.type() == BSONType::Object) { + auto subFieldElt = elt.embeddedObject()[fp.getFieldName(level)]; + return getNestedFieldHelperBSON(subFieldElt, fp, level + 1); + } + + // The path continues "past" a scalar, and therefore does not exist. + return stdx::monostate{}; +} +} // namespace + +stdx::variant<BSONElement, Value, Document::TraversesArrayTag, stdx::monostate> +Document::getNestedFieldNonCachingHelper(const FieldPath& dottedField, size_t level) const { + if (!_storage) { + return stdx::monostate{}; + } + + StringData fieldName = dottedField.getFieldName(level); + auto bsonEltOrValue = _storage->getFieldNonCaching(fieldName); + + if (stdx::holds_alternative<BSONElement>(bsonEltOrValue)) { + return getNestedFieldHelperBSON( + stdx::get<BSONElement>(bsonEltOrValue), dottedField, level + 1); + } + + const Value& val = stdx::get<Value>(bsonEltOrValue); + + if (level + 1 == dottedField.getPathLength()) { + if (val.missing()) { + return stdx::monostate{}; + } else { + return val; + } + } + + if (val.getType() == BSONType::Array) { + return Document::TraversesArrayTag{}; + } else if (val.getType() == BSONType::Object) { + return val.getDocument().getNestedFieldNonCachingHelper(dottedField, level + 1); + } + + // The path extends beyond a scalar, so it does not exist. + return stdx::monostate{}; +} + +stdx::variant<BSONElement, Value, Document::TraversesArrayTag, stdx::monostate> +Document::getNestedFieldNonCaching(const FieldPath& dottedField) const { + return getNestedFieldNonCachingHelper(dottedField, 0); +} + static Value getNestedFieldHelper(const Document& doc, const FieldPath& fieldNames, vector<Position>* positions, diff --git a/src/mongo/db/exec/document_value/document.h b/src/mongo/db/exec/document_value/document.h index 7719958330c..7675fa6ba6d 100644 --- a/src/mongo/db/exec/document_value/document.h +++ b/src/mongo/db/exec/document_value/document.h @@ -152,6 +152,21 @@ public: const Value getNestedField(const FieldPath& path, std::vector<Position>* positions = nullptr) const; + /** + * Returns field at given path as either BSONElement or Value, depending on how it is + * stored. If an array is encountered in the middle of the path the TraversesArrayTag is + * returned. + * + * It is possible, however, for the returned BSONElement/Value to be an array if the given path + * ends with an array. For example, the document {a: {b:[1,2]}} and the path "a.b" will return + * a BSONElement or Value for the array [1, 2]. + * + * If the field is not found, stdx::monostate is returned. + */ + struct TraversesArrayTag {}; + stdx::variant<BSONElement, Value, TraversesArrayTag, stdx::monostate> getNestedFieldNonCaching( + const FieldPath& dottedField) const; + // Number of fields in this document. Exp. runtime O(n). size_t computeSize() const { return storage().computeSize(); @@ -331,6 +346,10 @@ private: const DocumentStorage& storage() const { return (_storage ? *_storage : DocumentStorage::emptyDoc()); } + + stdx::variant<BSONElement, Value, TraversesArrayTag, stdx::monostate> + getNestedFieldNonCachingHelper(const FieldPath& dottedField, size_t level) const; + boost::intrusive_ptr<const DocumentStorage> _storage; }; diff --git a/src/mongo/db/exec/document_value/document_internal.h b/src/mongo/db/exec/document_value/document_internal.h index 5cd5d4692c4..d77ac6b5014 100644 --- a/src/mongo/db/exec/document_value/document_internal.h +++ b/src/mongo/db/exec/document_value/document_internal.h @@ -37,6 +37,7 @@ #include "mongo/base/static_assert.h" #include "mongo/db/exec/document_value/document_metadata_fields.h" #include "mongo/db/exec/document_value/value.h" +#include "mongo/stdx/variant.h" #include "mongo/util/intrusive_counter.h" namespace mongo { @@ -352,6 +353,26 @@ public: return getField(pos).val; } + /** + * Given a field name either return a Value if the field resides in the cache, or a BSONElement + * if the field resides in the backing BSON. + */ + stdx::variant<BSONElement, Value> getFieldNonCaching(StringData name) const { + Position pos = findField(name, LookupPolicy::kCacheOnly); + if (pos.found()) { + return {getField(pos).val}; + } + + for (auto&& bsonElement : _bson) { + if (name == bsonElement.fieldNameStringData()) { + return {bsonElement}; + } + } + + // Field not found. Return EOO Value. + return {Value()}; + } + /// Adds a new field with missing Value at the end of the document Value& appendField(StringData name, ValueElement::Kind kind); diff --git a/src/mongo/db/exec/document_value/document_value_test.cpp b/src/mongo/db/exec/document_value/document_value_test.cpp index 2eb26618e24..9c365062625 100644 --- a/src/mongo/db/exec/document_value/document_value_test.cpp +++ b/src/mongo/db/exec/document_value/document_value_test.cpp @@ -175,6 +175,185 @@ TEST(DocumentSerialization, CannotSerializeDocumentThatExceedsDepthLimit) { throwaway.abandon(); } +TEST(DocumentGetFieldNonCaching, UncachedTopLevelFields) { + BSONObj bson = BSON("scalar" << 1 << "array" << BSON_ARRAY(1 << 2 << 3) << "scalar2" << true); + Document document = fromBson(bson); + + // Should be able to get all top level fields without caching. + for (auto&& elt : bson) { + auto valueVariant = document.getNestedFieldNonCaching(elt.fieldNameStringData()); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(valueVariant)); + ASSERT_TRUE(stdx::get<BSONElement>(valueVariant).binaryEqual(elt)); + + // Get the field again to be sure it wasn't cached during the access above. + auto valueVariantAfterAccess = document.getNestedFieldNonCaching(elt.fieldNameStringData()); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(valueVariantAfterAccess)); + } + + // Try to get a top level field which doesn't exist. + auto nonExistentFieldVariant = document.getNestedFieldNonCaching("doesnotexist"); + ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(nonExistentFieldVariant)); + + assertRoundTrips(document); +} + +TEST(DocumentGetFieldNonCaching, CachedTopLevelFields) { + BSONObj bson = BSON("scalar" << 1 << "array" << BSON_ARRAY(1 << 2 << 3) << "scalar2" << true); + Document document = fromBson(bson); + + // Force 'scalar2' to be cached. + ASSERT_FALSE(document["scalar2"].missing()); + + // Attempt to access scalar2 with the non caching accessor. It should be cached already. + { + auto valueVariant = document.getNestedFieldNonCaching("scalar2"); + ASSERT_TRUE(stdx::holds_alternative<Value>(valueVariant)); + ASSERT_EQ(stdx::get<Value>(valueVariant).getBool(), true); + } + + // Attempt to access 'array' with the non caching accessor. It should not be cached. + { + auto valueVariant = document.getNestedFieldNonCaching("array"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(valueVariant)); + auto elt = stdx::get<BSONElement>(valueVariant); + ASSERT_TRUE(bson["array"].binaryEqual(elt)); + } + + // Force 'array' to be cached. + ASSERT_FALSE(document["array"].missing()); + + // Check that 'array' is cached and that we can read it with the non caching accessor. + { + auto valueVariant = document.getNestedFieldNonCaching("array"); + ASSERT_TRUE(stdx::holds_alternative<Value>(valueVariant)); + ASSERT_EQ(stdx::get<Value>(valueVariant).getArrayLength(), 3); + } +} + +TEST(DocumentGetFieldNonCaching, NonArrayDottedPaths) { + BSONObj bson = BSON("address" << BSON("zip" << 123 << "street" + << "foo")); + Document document = fromBson(bson); + + // With no fields cached. + { + // Get a nested field without caching. + auto zipVariant = document.getNestedFieldNonCaching("address.zip"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(zipVariant)); + ASSERT_EQ(stdx::get<BSONElement>(zipVariant).numberInt(), 123); + + // Check that it was not cached. + auto zipVariantAfterAccess = document.getNestedFieldNonCaching("address.zip"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(zipVariantAfterAccess)); + + // Check that the top level field isn't cached after accessing one of its children. + auto addressVariant = document.getNestedFieldNonCaching("address"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(addressVariant)); + + // Get a dotted field which does not exist. + auto nonExistentVariant = document.getNestedFieldNonCaching("address.doesnotexist"); + ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(nonExistentVariant)); + + // Check that the top level field isn't cached after a failed attempt to access one of its + // children. + addressVariant = document.getNestedFieldNonCaching("address"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(addressVariant)); + + // Get a dotted field which extends past a scalar. + auto pathPastScalarVariant = document.getNestedFieldNonCaching("address.zip.doesnotexist"); + ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(pathPastScalarVariant)); + } + + // Now force 'address.street' to be cached. + ASSERT_FALSE(document.getNestedField("address.street").missing()); + + // Check that we get the right value when accessing it with the non caching accessor. + { + // The top level field should be cached. + auto topLevelVariant = document.getNestedFieldNonCaching("address"); + ASSERT_TRUE(stdx::holds_alternative<Value>(topLevelVariant)); + + auto variant = document.getNestedFieldNonCaching("address.street"); + ASSERT_TRUE(stdx::holds_alternative<Value>(variant)); + ASSERT_EQ(stdx::get<Value>(variant).getString(), "foo"); + } + + // Check that the other subfield, 'zip' is not cached. + { + auto variant = document.getNestedFieldNonCaching("address.zip"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(variant)); + } + + // Check that attempting to get a subfield of 'address.street' returns monostate. + { + auto variant = document.getNestedFieldNonCaching("address.street.doesnotexist"); + ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(variant)); + } +} + +TEST(DocumentGetFieldNonCaching, NonArrayLongDottedPath) { + BSONObj bson = BSON("a" << BSON("b" << BSON("c" << BSON("d" << BSON("e" << 1))))); + Document document = fromBson(bson); + + // Not cached. + { + auto variant = document.getNestedFieldNonCaching("a.b.c.d.e"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(variant)); + ASSERT_EQ(stdx::get<BSONElement>(variant).numberInt(), 1); + } + + // Force part of the path to get cached. + ASSERT_FALSE(document.getNestedField("a.b.c").missing()); + + // The function should be able to traverse a path which is part Value and part BSONElement. + { + auto variant = document.getNestedFieldNonCaching("a.b.c.d.e"); + ASSERT_TRUE(stdx::holds_alternative<BSONElement>(variant)); + ASSERT_EQ(stdx::get<BSONElement>(variant).numberInt(), 1); + } + + // Force the entire path to be cached. + ASSERT_FALSE(document.getNestedField("a.b.c.d.e").missing()); + + { + auto variant = document.getNestedFieldNonCaching("a.b.c.d.e"); + ASSERT_TRUE(stdx::holds_alternative<Value>(variant)); + ASSERT_EQ(stdx::get<Value>(variant).getInt(), 1); + } +} + +TEST(DocumentGetFieldNonCaching, TraverseArray) { + BSONObj bson = + BSON("topLevelArray" << BSON_ARRAY(BSON("foo" << 1) << BSON("foo" << 2) << BSON("foo" << 3)) + << "subObj" << BSON("subArray" << BSON_ARRAY(1 << 2))); + Document document = fromBson(bson); + + auto checkArrayTagIsReturned = [&document]() { + auto topLevelArrayVariant = document.getNestedFieldNonCaching("topLevelArray.foo"); + ASSERT_TRUE(stdx::holds_alternative<Document::TraversesArrayTag>(topLevelArrayVariant)); + + // Attempting to traverse an uncached nested array results in TraversesArrayTag being + // returned. + auto subArrayVariant = document.getNestedFieldNonCaching("subObj.subArray.foobar"); + ASSERT_TRUE(stdx::holds_alternative<Document::TraversesArrayTag>(subArrayVariant)); + }; + + // Check with no fields cached. + checkArrayTagIsReturned(); + + // Force the top level array to be cached. + ASSERT_FALSE(document["topLevelArray"].missing()); + + // Check with one field cached. + checkArrayTagIsReturned(); + + // Force the array that's in a sub object to be cached. + ASSERT_FALSE(document.getNestedField("subObj.subArray").missing()); + + // Check it works with both fields (and the full sub object) cached. + checkArrayTagIsReturned(); +} + /** Add Document fields. */ class AddField { public: |