summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Boros <ian.boros@mongodb.com>2020-03-17 14:21:45 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-01 01:29:42 +0000
commit5557279e9ca9c78fbeb1026fc1a48a5c7c334b71 (patch)
tree786f50500c09991caed8ea9a2b72538abd4c6a77
parent45da9589d4ea5929404e9674718927a6be352b4d (diff)
downloadmongo-5557279e9ca9c78fbeb1026fc1a48a5c7c334b71.tar.gz
SERVER-45763 optimize sort key generation
-rw-r--r--src/mongo/db/exec/document_value/document.cpp62
-rw-r--r--src/mongo/db/exec/document_value/document.h19
-rw-r--r--src/mongo/db/exec/document_value/document_internal.h21
-rw-r--r--src/mongo/db/exec/document_value/document_value_test.cpp179
-rw-r--r--src/mongo/db/index/sort_key_generator.cpp33
-rw-r--r--src/mongo/util/visit_helper.h56
6 files changed, 368 insertions, 2 deletions
diff --git a/src/mongo/db/exec/document_value/document.cpp b/src/mongo/db/exec/document_value/document.cpp
index a59c755d373..b2f502f378d 100644
--- a/src/mongo/db/exec/document_value/document.cpp
+++ b/src/mongo/db/exec/document_value/document.cpp
@@ -551,6 +551,68 @@ MutableValue MutableDocument::getNestedField(const vector<Position>& positions)
return getNestedFieldHelper(positions, 0);
}
+namespace {
+stdx::variant<BSONElement, Value, Document::TraversesArrayTag, stdx::monostate>
+getNestedFieldHelperBSON(BSONElement elt, const FieldPath& fp, size_t level) {
+ if (level == fp.getPathLength()) {
+ if (elt.ok()) {
+ return elt;
+ } else {
+ return stdx::monostate{};
+ }
+ }
+
+ if (elt.type() == BSONType::Array) {
+ return Document::TraversesArrayTag{};
+ } else if (elt.type() == BSONType::Object) {
+ auto subFieldElt = elt.embeddedObject()[fp.getFieldName(level)];
+ return getNestedFieldHelperBSON(subFieldElt, fp, level + 1);
+ }
+
+ // The path continues "past" a scalar, and therefore does not exist.
+ return stdx::monostate{};
+}
+} // namespace
+
+stdx::variant<BSONElement, Value, Document::TraversesArrayTag, stdx::monostate>
+Document::getNestedFieldNonCachingHelper(const FieldPath& dottedField, size_t level) const {
+ if (!_storage) {
+ return stdx::monostate{};
+ }
+
+ StringData fieldName = dottedField.getFieldName(level);
+ auto bsonEltOrValue = _storage->getFieldNonCaching(fieldName);
+
+ if (stdx::holds_alternative<BSONElement>(bsonEltOrValue)) {
+ return getNestedFieldHelperBSON(
+ stdx::get<BSONElement>(bsonEltOrValue), dottedField, level + 1);
+ }
+
+ const Value& val = stdx::get<Value>(bsonEltOrValue);
+
+ if (level + 1 == dottedField.getPathLength()) {
+ if (val.missing()) {
+ return stdx::monostate{};
+ } else {
+ return val;
+ }
+ }
+
+ if (val.getType() == BSONType::Array) {
+ return Document::TraversesArrayTag{};
+ } else if (val.getType() == BSONType::Object) {
+ return val.getDocument().getNestedFieldNonCachingHelper(dottedField, level + 1);
+ }
+
+ // The path extends beyond a scalar, so it does not exist.
+ return stdx::monostate{};
+}
+
+stdx::variant<BSONElement, Value, Document::TraversesArrayTag, stdx::monostate>
+Document::getNestedFieldNonCaching(const FieldPath& dottedField) const {
+ return getNestedFieldNonCachingHelper(dottedField, 0);
+}
+
static Value getNestedFieldHelper(const Document& doc,
const FieldPath& fieldNames,
vector<Position>* positions,
diff --git a/src/mongo/db/exec/document_value/document.h b/src/mongo/db/exec/document_value/document.h
index 7719958330c..7675fa6ba6d 100644
--- a/src/mongo/db/exec/document_value/document.h
+++ b/src/mongo/db/exec/document_value/document.h
@@ -152,6 +152,21 @@ public:
const Value getNestedField(const FieldPath& path,
std::vector<Position>* positions = nullptr) const;
+ /**
+ * Returns field at given path as either BSONElement or Value, depending on how it is
+ * stored. If an array is encountered in the middle of the path the TraversesArrayTag is
+ * returned.
+ *
+ * It is possible, however, for the returned BSONElement/Value to be an array if the given path
+ * ends with an array. For example, the document {a: {b:[1,2]}} and the path "a.b" will return
+ * a BSONElement or Value for the array [1, 2].
+ *
+ * If the field is not found, stdx::monostate is returned.
+ */
+ struct TraversesArrayTag {};
+ stdx::variant<BSONElement, Value, TraversesArrayTag, stdx::monostate> getNestedFieldNonCaching(
+ const FieldPath& dottedField) const;
+
// Number of fields in this document. Exp. runtime O(n).
size_t computeSize() const {
return storage().computeSize();
@@ -331,6 +346,10 @@ private:
const DocumentStorage& storage() const {
return (_storage ? *_storage : DocumentStorage::emptyDoc());
}
+
+ stdx::variant<BSONElement, Value, TraversesArrayTag, stdx::monostate>
+ getNestedFieldNonCachingHelper(const FieldPath& dottedField, size_t level) const;
+
boost::intrusive_ptr<const DocumentStorage> _storage;
};
diff --git a/src/mongo/db/exec/document_value/document_internal.h b/src/mongo/db/exec/document_value/document_internal.h
index 5cd5d4692c4..d77ac6b5014 100644
--- a/src/mongo/db/exec/document_value/document_internal.h
+++ b/src/mongo/db/exec/document_value/document_internal.h
@@ -37,6 +37,7 @@
#include "mongo/base/static_assert.h"
#include "mongo/db/exec/document_value/document_metadata_fields.h"
#include "mongo/db/exec/document_value/value.h"
+#include "mongo/stdx/variant.h"
#include "mongo/util/intrusive_counter.h"
namespace mongo {
@@ -352,6 +353,26 @@ public:
return getField(pos).val;
}
+ /**
+ * Given a field name either return a Value if the field resides in the cache, or a BSONElement
+ * if the field resides in the backing BSON.
+ */
+ stdx::variant<BSONElement, Value> getFieldNonCaching(StringData name) const {
+ Position pos = findField(name, LookupPolicy::kCacheOnly);
+ if (pos.found()) {
+ return {getField(pos).val};
+ }
+
+ for (auto&& bsonElement : _bson) {
+ if (name == bsonElement.fieldNameStringData()) {
+ return {bsonElement};
+ }
+ }
+
+ // Field not found. Return EOO Value.
+ return {Value()};
+ }
+
/// Adds a new field with missing Value at the end of the document
Value& appendField(StringData name, ValueElement::Kind kind);
diff --git a/src/mongo/db/exec/document_value/document_value_test.cpp b/src/mongo/db/exec/document_value/document_value_test.cpp
index 2eb26618e24..9c365062625 100644
--- a/src/mongo/db/exec/document_value/document_value_test.cpp
+++ b/src/mongo/db/exec/document_value/document_value_test.cpp
@@ -175,6 +175,185 @@ TEST(DocumentSerialization, CannotSerializeDocumentThatExceedsDepthLimit) {
throwaway.abandon();
}
+TEST(DocumentGetFieldNonCaching, UncachedTopLevelFields) {
+ BSONObj bson = BSON("scalar" << 1 << "array" << BSON_ARRAY(1 << 2 << 3) << "scalar2" << true);
+ Document document = fromBson(bson);
+
+ // Should be able to get all top level fields without caching.
+ for (auto&& elt : bson) {
+ auto valueVariant = document.getNestedFieldNonCaching(elt.fieldNameStringData());
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(valueVariant));
+ ASSERT_TRUE(stdx::get<BSONElement>(valueVariant).binaryEqual(elt));
+
+ // Get the field again to be sure it wasn't cached during the access above.
+ auto valueVariantAfterAccess = document.getNestedFieldNonCaching(elt.fieldNameStringData());
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(valueVariantAfterAccess));
+ }
+
+ // Try to get a top level field which doesn't exist.
+ auto nonExistentFieldVariant = document.getNestedFieldNonCaching("doesnotexist");
+ ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(nonExistentFieldVariant));
+
+ assertRoundTrips(document);
+}
+
+TEST(DocumentGetFieldNonCaching, CachedTopLevelFields) {
+ BSONObj bson = BSON("scalar" << 1 << "array" << BSON_ARRAY(1 << 2 << 3) << "scalar2" << true);
+ Document document = fromBson(bson);
+
+ // Force 'scalar2' to be cached.
+ ASSERT_FALSE(document["scalar2"].missing());
+
+ // Attempt to access scalar2 with the non caching accessor. It should be cached already.
+ {
+ auto valueVariant = document.getNestedFieldNonCaching("scalar2");
+ ASSERT_TRUE(stdx::holds_alternative<Value>(valueVariant));
+ ASSERT_EQ(stdx::get<Value>(valueVariant).getBool(), true);
+ }
+
+ // Attempt to access 'array' with the non caching accessor. It should not be cached.
+ {
+ auto valueVariant = document.getNestedFieldNonCaching("array");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(valueVariant));
+ auto elt = stdx::get<BSONElement>(valueVariant);
+ ASSERT_TRUE(bson["array"].binaryEqual(elt));
+ }
+
+ // Force 'array' to be cached.
+ ASSERT_FALSE(document["array"].missing());
+
+ // Check that 'array' is cached and that we can read it with the non caching accessor.
+ {
+ auto valueVariant = document.getNestedFieldNonCaching("array");
+ ASSERT_TRUE(stdx::holds_alternative<Value>(valueVariant));
+ ASSERT_EQ(stdx::get<Value>(valueVariant).getArrayLength(), 3);
+ }
+}
+
+TEST(DocumentGetFieldNonCaching, NonArrayDottedPaths) {
+ BSONObj bson = BSON("address" << BSON("zip" << 123 << "street"
+ << "foo"));
+ Document document = fromBson(bson);
+
+ // With no fields cached.
+ {
+ // Get a nested field without caching.
+ auto zipVariant = document.getNestedFieldNonCaching("address.zip");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(zipVariant));
+ ASSERT_EQ(stdx::get<BSONElement>(zipVariant).numberInt(), 123);
+
+ // Check that it was not cached.
+ auto zipVariantAfterAccess = document.getNestedFieldNonCaching("address.zip");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(zipVariantAfterAccess));
+
+ // Check that the top level field isn't cached after accessing one of its children.
+ auto addressVariant = document.getNestedFieldNonCaching("address");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(addressVariant));
+
+ // Get a dotted field which does not exist.
+ auto nonExistentVariant = document.getNestedFieldNonCaching("address.doesnotexist");
+ ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(nonExistentVariant));
+
+ // Check that the top level field isn't cached after a failed attempt to access one of its
+ // children.
+ addressVariant = document.getNestedFieldNonCaching("address");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(addressVariant));
+
+ // Get a dotted field which extends past a scalar.
+ auto pathPastScalarVariant = document.getNestedFieldNonCaching("address.zip.doesnotexist");
+ ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(pathPastScalarVariant));
+ }
+
+ // Now force 'address.street' to be cached.
+ ASSERT_FALSE(document.getNestedField("address.street").missing());
+
+ // Check that we get the right value when accessing it with the non caching accessor.
+ {
+ // The top level field should be cached.
+ auto topLevelVariant = document.getNestedFieldNonCaching("address");
+ ASSERT_TRUE(stdx::holds_alternative<Value>(topLevelVariant));
+
+ auto variant = document.getNestedFieldNonCaching("address.street");
+ ASSERT_TRUE(stdx::holds_alternative<Value>(variant));
+ ASSERT_EQ(stdx::get<Value>(variant).getString(), "foo");
+ }
+
+ // Check that the other subfield, 'zip' is not cached.
+ {
+ auto variant = document.getNestedFieldNonCaching("address.zip");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(variant));
+ }
+
+ // Check that attempting to get a subfield of 'address.street' returns monostate.
+ {
+ auto variant = document.getNestedFieldNonCaching("address.street.doesnotexist");
+ ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(variant));
+ }
+}
+
+TEST(DocumentGetFieldNonCaching, NonArrayLongDottedPath) {
+ BSONObj bson = BSON("a" << BSON("b" << BSON("c" << BSON("d" << BSON("e" << 1)))));
+ Document document = fromBson(bson);
+
+ // Not cached.
+ {
+ auto variant = document.getNestedFieldNonCaching("a.b.c.d.e");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(variant));
+ ASSERT_EQ(stdx::get<BSONElement>(variant).numberInt(), 1);
+ }
+
+ // Force part of the path to get cached.
+ ASSERT_FALSE(document.getNestedField("a.b.c").missing());
+
+ // The function should be able to traverse a path which is part Value and part BSONElement.
+ {
+ auto variant = document.getNestedFieldNonCaching("a.b.c.d.e");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(variant));
+ ASSERT_EQ(stdx::get<BSONElement>(variant).numberInt(), 1);
+ }
+
+ // Force the entire path to be cached.
+ ASSERT_FALSE(document.getNestedField("a.b.c.d.e").missing());
+
+ {
+ auto variant = document.getNestedFieldNonCaching("a.b.c.d.e");
+ ASSERT_TRUE(stdx::holds_alternative<Value>(variant));
+ ASSERT_EQ(stdx::get<Value>(variant).getInt(), 1);
+ }
+}
+
+TEST(DocumentGetFieldNonCaching, TraverseArray) {
+ BSONObj bson =
+ BSON("topLevelArray" << BSON_ARRAY(BSON("foo" << 1) << BSON("foo" << 2) << BSON("foo" << 3))
+ << "subObj" << BSON("subArray" << BSON_ARRAY(1 << 2)));
+ Document document = fromBson(bson);
+
+ auto checkArrayTagIsReturned = [&document]() {
+ auto topLevelArrayVariant = document.getNestedFieldNonCaching("topLevelArray.foo");
+ ASSERT_TRUE(stdx::holds_alternative<Document::TraversesArrayTag>(topLevelArrayVariant));
+
+ // Attempting to traverse an uncached nested array results in TraversesArrayTag being
+ // returned.
+ auto subArrayVariant = document.getNestedFieldNonCaching("subObj.subArray.foobar");
+ ASSERT_TRUE(stdx::holds_alternative<Document::TraversesArrayTag>(subArrayVariant));
+ };
+
+ // Check with no fields cached.
+ checkArrayTagIsReturned();
+
+ // Force the top level array to be cached.
+ ASSERT_FALSE(document["topLevelArray"].missing());
+
+ // Check with one field cached.
+ checkArrayTagIsReturned();
+
+ // Force the array that's in a sub object to be cached.
+ ASSERT_FALSE(document.getNestedField("subObj.subArray").missing());
+
+ // Check it works with both fields (and the full sub object) cached.
+ checkArrayTagIsReturned();
+}
+
/** Add Document fields. */
class AddField {
public:
diff --git a/src/mongo/db/index/sort_key_generator.cpp b/src/mongo/db/index/sort_key_generator.cpp
index 1e9ea326255..8799c58b5f9 100644
--- a/src/mongo/db/index/sort_key_generator.cpp
+++ b/src/mongo/db/index/sort_key_generator.cpp
@@ -33,6 +33,7 @@
#include "mongo/bson/bsonobj_comparator.h"
#include "mongo/db/query/collation/collation_index_key.h"
+#include "mongo/util/visit_helper.h"
namespace mongo {
@@ -219,8 +220,36 @@ StatusWith<Value> SortKeyGenerator::extractKeyPart(
Value plainKey;
if (patternPart.fieldPath) {
invariant(!patternPart.expression);
- auto key =
- document_path_support::extractElementAlongNonArrayPath(doc, *patternPart.fieldPath);
+ auto keyVariant = doc.getNestedFieldNonCaching(*patternPart.fieldPath);
+
+ const Status arrayError{ErrorCodes::InternalError, "array along path"};
+ auto key = stdx::visit(
+ visit_helper::Overloaded{
+ // In this case, the document has an array along the path given. This means the
+ // document is ineligible for taking the fast path for index key generation.
+ [&arrayError](Document::TraversesArrayTag) -> StatusWith<Value> {
+ return arrayError;
+ },
+ // In this case the field was already in the cache (or may not have existed).
+ [&arrayError](const Value& val) -> StatusWith<Value> {
+ // The document may have an array at the given path.
+ if (val.getType() == BSONType::Array) {
+ return arrayError;
+ }
+ return val;
+ },
+ // In this case the field was in the backing BSON, and not in the cache.
+ [&arrayError](BSONElement elt) -> StatusWith<Value> {
+ // The document may have an array at the given path.
+ if (elt.type() == BSONType::Array) {
+ return arrayError;
+ }
+ return Value(elt);
+ },
+ [](stdx::monostate none) -> StatusWith<Value> { return Value(); },
+ },
+ keyVariant);
+
if (!key.isOK()) {
return key;
}
diff --git a/src/mongo/util/visit_helper.h b/src/mongo/util/visit_helper.h
new file mode 100644
index 00000000000..435e05ee36f
--- /dev/null
+++ b/src/mongo/util/visit_helper.h
@@ -0,0 +1,56 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+namespace mongo {
+namespace visit_helper {
+
+/**
+ * This is the "overload pattern" designed to be used with std::visit. Example usage:
+ *
+ * auto result = std::visit(
+ * visit_helper::Overloaded{
+ * [](int a) {
+ * ...
+ * },
+ * [](StringData b) {
+ * ...
+ * },
+ * },
+ * someVariant);
+ */
+template <class... Ts>
+struct Overloaded : Ts... {
+ using Ts::operator()...;
+};
+template <class... Ts>
+Overloaded(Ts...)->Overloaded<Ts...>;
+} // namespace visit_helper
+} // namespace mongo