summaryrefslogtreecommitdiff
path: root/src/mongo/db/exec
diff options
context:
space:
mode:
authorIan Boros <ian.boros@mongodb.com>2020-03-17 14:21:45 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-01 01:29:42 +0000
commit5557279e9ca9c78fbeb1026fc1a48a5c7c334b71 (patch)
tree786f50500c09991caed8ea9a2b72538abd4c6a77 /src/mongo/db/exec
parent45da9589d4ea5929404e9674718927a6be352b4d (diff)
downloadmongo-5557279e9ca9c78fbeb1026fc1a48a5c7c334b71.tar.gz
SERVER-45763 optimize sort key generation
Diffstat (limited to 'src/mongo/db/exec')
-rw-r--r--src/mongo/db/exec/document_value/document.cpp62
-rw-r--r--src/mongo/db/exec/document_value/document.h19
-rw-r--r--src/mongo/db/exec/document_value/document_internal.h21
-rw-r--r--src/mongo/db/exec/document_value/document_value_test.cpp179
4 files changed, 281 insertions, 0 deletions
diff --git a/src/mongo/db/exec/document_value/document.cpp b/src/mongo/db/exec/document_value/document.cpp
index a59c755d373..b2f502f378d 100644
--- a/src/mongo/db/exec/document_value/document.cpp
+++ b/src/mongo/db/exec/document_value/document.cpp
@@ -551,6 +551,68 @@ MutableValue MutableDocument::getNestedField(const vector<Position>& positions)
return getNestedFieldHelper(positions, 0);
}
+namespace {
+stdx::variant<BSONElement, Value, Document::TraversesArrayTag, stdx::monostate>
+getNestedFieldHelperBSON(BSONElement elt, const FieldPath& fp, size_t level) {
+ if (level == fp.getPathLength()) {
+ if (elt.ok()) {
+ return elt;
+ } else {
+ return stdx::monostate{};
+ }
+ }
+
+ if (elt.type() == BSONType::Array) {
+ return Document::TraversesArrayTag{};
+ } else if (elt.type() == BSONType::Object) {
+ auto subFieldElt = elt.embeddedObject()[fp.getFieldName(level)];
+ return getNestedFieldHelperBSON(subFieldElt, fp, level + 1);
+ }
+
+ // The path continues "past" a scalar, and therefore does not exist.
+ return stdx::monostate{};
+}
+} // namespace
+
+stdx::variant<BSONElement, Value, Document::TraversesArrayTag, stdx::monostate>
+Document::getNestedFieldNonCachingHelper(const FieldPath& dottedField, size_t level) const {
+ if (!_storage) {
+ return stdx::monostate{};
+ }
+
+ StringData fieldName = dottedField.getFieldName(level);
+ auto bsonEltOrValue = _storage->getFieldNonCaching(fieldName);
+
+ if (stdx::holds_alternative<BSONElement>(bsonEltOrValue)) {
+ return getNestedFieldHelperBSON(
+ stdx::get<BSONElement>(bsonEltOrValue), dottedField, level + 1);
+ }
+
+ const Value& val = stdx::get<Value>(bsonEltOrValue);
+
+ if (level + 1 == dottedField.getPathLength()) {
+ if (val.missing()) {
+ return stdx::monostate{};
+ } else {
+ return val;
+ }
+ }
+
+ if (val.getType() == BSONType::Array) {
+ return Document::TraversesArrayTag{};
+ } else if (val.getType() == BSONType::Object) {
+ return val.getDocument().getNestedFieldNonCachingHelper(dottedField, level + 1);
+ }
+
+ // The path extends beyond a scalar, so it does not exist.
+ return stdx::monostate{};
+}
+
+stdx::variant<BSONElement, Value, Document::TraversesArrayTag, stdx::monostate>
+Document::getNestedFieldNonCaching(const FieldPath& dottedField) const {
+ return getNestedFieldNonCachingHelper(dottedField, 0);
+}
+
static Value getNestedFieldHelper(const Document& doc,
const FieldPath& fieldNames,
vector<Position>* positions,
diff --git a/src/mongo/db/exec/document_value/document.h b/src/mongo/db/exec/document_value/document.h
index 7719958330c..7675fa6ba6d 100644
--- a/src/mongo/db/exec/document_value/document.h
+++ b/src/mongo/db/exec/document_value/document.h
@@ -152,6 +152,21 @@ public:
const Value getNestedField(const FieldPath& path,
std::vector<Position>* positions = nullptr) const;
+ /**
+ * Returns field at given path as either BSONElement or Value, depending on how it is
+ * stored. If an array is encountered in the middle of the path the TraversesArrayTag is
+ * returned.
+ *
+ * It is possible, however, for the returned BSONElement/Value to be an array if the given path
+ * ends with an array. For example, the document {a: {b:[1,2]}} and the path "a.b" will return
+ * a BSONElement or Value for the array [1, 2].
+ *
+ * If the field is not found, stdx::monostate is returned.
+ */
+ struct TraversesArrayTag {};
+ stdx::variant<BSONElement, Value, TraversesArrayTag, stdx::monostate> getNestedFieldNonCaching(
+ const FieldPath& dottedField) const;
+
// Number of fields in this document. Exp. runtime O(n).
size_t computeSize() const {
return storage().computeSize();
@@ -331,6 +346,10 @@ private:
const DocumentStorage& storage() const {
return (_storage ? *_storage : DocumentStorage::emptyDoc());
}
+
+ stdx::variant<BSONElement, Value, TraversesArrayTag, stdx::monostate>
+ getNestedFieldNonCachingHelper(const FieldPath& dottedField, size_t level) const;
+
boost::intrusive_ptr<const DocumentStorage> _storage;
};
diff --git a/src/mongo/db/exec/document_value/document_internal.h b/src/mongo/db/exec/document_value/document_internal.h
index 5cd5d4692c4..d77ac6b5014 100644
--- a/src/mongo/db/exec/document_value/document_internal.h
+++ b/src/mongo/db/exec/document_value/document_internal.h
@@ -37,6 +37,7 @@
#include "mongo/base/static_assert.h"
#include "mongo/db/exec/document_value/document_metadata_fields.h"
#include "mongo/db/exec/document_value/value.h"
+#include "mongo/stdx/variant.h"
#include "mongo/util/intrusive_counter.h"
namespace mongo {
@@ -352,6 +353,26 @@ public:
return getField(pos).val;
}
+ /**
+ * Given a field name either return a Value if the field resides in the cache, or a BSONElement
+ * if the field resides in the backing BSON.
+ */
+ stdx::variant<BSONElement, Value> getFieldNonCaching(StringData name) const {
+ Position pos = findField(name, LookupPolicy::kCacheOnly);
+ if (pos.found()) {
+ return {getField(pos).val};
+ }
+
+ for (auto&& bsonElement : _bson) {
+ if (name == bsonElement.fieldNameStringData()) {
+ return {bsonElement};
+ }
+ }
+
+ // Field not found. Return EOO Value.
+ return {Value()};
+ }
+
/// Adds a new field with missing Value at the end of the document
Value& appendField(StringData name, ValueElement::Kind kind);
diff --git a/src/mongo/db/exec/document_value/document_value_test.cpp b/src/mongo/db/exec/document_value/document_value_test.cpp
index 2eb26618e24..9c365062625 100644
--- a/src/mongo/db/exec/document_value/document_value_test.cpp
+++ b/src/mongo/db/exec/document_value/document_value_test.cpp
@@ -175,6 +175,185 @@ TEST(DocumentSerialization, CannotSerializeDocumentThatExceedsDepthLimit) {
throwaway.abandon();
}
+TEST(DocumentGetFieldNonCaching, UncachedTopLevelFields) {
+ BSONObj bson = BSON("scalar" << 1 << "array" << BSON_ARRAY(1 << 2 << 3) << "scalar2" << true);
+ Document document = fromBson(bson);
+
+ // Should be able to get all top level fields without caching.
+ for (auto&& elt : bson) {
+ auto valueVariant = document.getNestedFieldNonCaching(elt.fieldNameStringData());
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(valueVariant));
+ ASSERT_TRUE(stdx::get<BSONElement>(valueVariant).binaryEqual(elt));
+
+ // Get the field again to be sure it wasn't cached during the access above.
+ auto valueVariantAfterAccess = document.getNestedFieldNonCaching(elt.fieldNameStringData());
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(valueVariantAfterAccess));
+ }
+
+ // Try to get a top level field which doesn't exist.
+ auto nonExistentFieldVariant = document.getNestedFieldNonCaching("doesnotexist");
+ ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(nonExistentFieldVariant));
+
+ assertRoundTrips(document);
+}
+
+TEST(DocumentGetFieldNonCaching, CachedTopLevelFields) {
+ BSONObj bson = BSON("scalar" << 1 << "array" << BSON_ARRAY(1 << 2 << 3) << "scalar2" << true);
+ Document document = fromBson(bson);
+
+ // Force 'scalar2' to be cached.
+ ASSERT_FALSE(document["scalar2"].missing());
+
+ // Attempt to access scalar2 with the non caching accessor. It should be cached already.
+ {
+ auto valueVariant = document.getNestedFieldNonCaching("scalar2");
+ ASSERT_TRUE(stdx::holds_alternative<Value>(valueVariant));
+ ASSERT_EQ(stdx::get<Value>(valueVariant).getBool(), true);
+ }
+
+ // Attempt to access 'array' with the non caching accessor. It should not be cached.
+ {
+ auto valueVariant = document.getNestedFieldNonCaching("array");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(valueVariant));
+ auto elt = stdx::get<BSONElement>(valueVariant);
+ ASSERT_TRUE(bson["array"].binaryEqual(elt));
+ }
+
+ // Force 'array' to be cached.
+ ASSERT_FALSE(document["array"].missing());
+
+ // Check that 'array' is cached and that we can read it with the non caching accessor.
+ {
+ auto valueVariant = document.getNestedFieldNonCaching("array");
+ ASSERT_TRUE(stdx::holds_alternative<Value>(valueVariant));
+ ASSERT_EQ(stdx::get<Value>(valueVariant).getArrayLength(), 3);
+ }
+}
+
+TEST(DocumentGetFieldNonCaching, NonArrayDottedPaths) {
+ BSONObj bson = BSON("address" << BSON("zip" << 123 << "street"
+ << "foo"));
+ Document document = fromBson(bson);
+
+ // With no fields cached.
+ {
+ // Get a nested field without caching.
+ auto zipVariant = document.getNestedFieldNonCaching("address.zip");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(zipVariant));
+ ASSERT_EQ(stdx::get<BSONElement>(zipVariant).numberInt(), 123);
+
+ // Check that it was not cached.
+ auto zipVariantAfterAccess = document.getNestedFieldNonCaching("address.zip");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(zipVariantAfterAccess));
+
+ // Check that the top level field isn't cached after accessing one of its children.
+ auto addressVariant = document.getNestedFieldNonCaching("address");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(addressVariant));
+
+ // Get a dotted field which does not exist.
+ auto nonExistentVariant = document.getNestedFieldNonCaching("address.doesnotexist");
+ ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(nonExistentVariant));
+
+ // Check that the top level field isn't cached after a failed attempt to access one of its
+ // children.
+ addressVariant = document.getNestedFieldNonCaching("address");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(addressVariant));
+
+ // Get a dotted field which extends past a scalar.
+ auto pathPastScalarVariant = document.getNestedFieldNonCaching("address.zip.doesnotexist");
+ ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(pathPastScalarVariant));
+ }
+
+ // Now force 'address.street' to be cached.
+ ASSERT_FALSE(document.getNestedField("address.street").missing());
+
+ // Check that we get the right value when accessing it with the non caching accessor.
+ {
+ // The top level field should be cached.
+ auto topLevelVariant = document.getNestedFieldNonCaching("address");
+ ASSERT_TRUE(stdx::holds_alternative<Value>(topLevelVariant));
+
+ auto variant = document.getNestedFieldNonCaching("address.street");
+ ASSERT_TRUE(stdx::holds_alternative<Value>(variant));
+ ASSERT_EQ(stdx::get<Value>(variant).getString(), "foo");
+ }
+
+ // Check that the other subfield, 'zip' is not cached.
+ {
+ auto variant = document.getNestedFieldNonCaching("address.zip");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(variant));
+ }
+
+ // Check that attempting to get a subfield of 'address.street' returns monostate.
+ {
+ auto variant = document.getNestedFieldNonCaching("address.street.doesnotexist");
+ ASSERT_TRUE(stdx::holds_alternative<stdx::monostate>(variant));
+ }
+}
+
+TEST(DocumentGetFieldNonCaching, NonArrayLongDottedPath) {
+ BSONObj bson = BSON("a" << BSON("b" << BSON("c" << BSON("d" << BSON("e" << 1)))));
+ Document document = fromBson(bson);
+
+ // Not cached.
+ {
+ auto variant = document.getNestedFieldNonCaching("a.b.c.d.e");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(variant));
+ ASSERT_EQ(stdx::get<BSONElement>(variant).numberInt(), 1);
+ }
+
+ // Force part of the path to get cached.
+ ASSERT_FALSE(document.getNestedField("a.b.c").missing());
+
+ // The function should be able to traverse a path which is part Value and part BSONElement.
+ {
+ auto variant = document.getNestedFieldNonCaching("a.b.c.d.e");
+ ASSERT_TRUE(stdx::holds_alternative<BSONElement>(variant));
+ ASSERT_EQ(stdx::get<BSONElement>(variant).numberInt(), 1);
+ }
+
+ // Force the entire path to be cached.
+ ASSERT_FALSE(document.getNestedField("a.b.c.d.e").missing());
+
+ {
+ auto variant = document.getNestedFieldNonCaching("a.b.c.d.e");
+ ASSERT_TRUE(stdx::holds_alternative<Value>(variant));
+ ASSERT_EQ(stdx::get<Value>(variant).getInt(), 1);
+ }
+}
+
+TEST(DocumentGetFieldNonCaching, TraverseArray) {
+ BSONObj bson =
+ BSON("topLevelArray" << BSON_ARRAY(BSON("foo" << 1) << BSON("foo" << 2) << BSON("foo" << 3))
+ << "subObj" << BSON("subArray" << BSON_ARRAY(1 << 2)));
+ Document document = fromBson(bson);
+
+ auto checkArrayTagIsReturned = [&document]() {
+ auto topLevelArrayVariant = document.getNestedFieldNonCaching("topLevelArray.foo");
+ ASSERT_TRUE(stdx::holds_alternative<Document::TraversesArrayTag>(topLevelArrayVariant));
+
+ // Attempting to traverse an uncached nested array results in TraversesArrayTag being
+ // returned.
+ auto subArrayVariant = document.getNestedFieldNonCaching("subObj.subArray.foobar");
+ ASSERT_TRUE(stdx::holds_alternative<Document::TraversesArrayTag>(subArrayVariant));
+ };
+
+ // Check with no fields cached.
+ checkArrayTagIsReturned();
+
+ // Force the top level array to be cached.
+ ASSERT_FALSE(document["topLevelArray"].missing());
+
+ // Check with one field cached.
+ checkArrayTagIsReturned();
+
+ // Force the array that's in a sub object to be cached.
+ ASSERT_FALSE(document.getNestedField("subObj.subArray").missing());
+
+ // Check it works with both fields (and the full sub object) cached.
+ checkArrayTagIsReturned();
+}
+
/** Add Document fields. */
class AddField {
public: