summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Korshunov <anton.korshunov@mongodb.com>2020-03-12 11:28:29 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-02 21:35:39 +0000
commit30088474306375095a67244fcf55391d4ccb02a5 (patch)
treee988fe789fdb1d56680d6487e497242fb83f9a15
parented3a75b6f792dc8f93540622305caa3ea35ad389 (diff)
downloadmongo-30088474306375095a67244fcf55391d4ccb02a5.tar.gz
SERVER-46946 Optimize BtreeKeyGenerator::getKeys for non-multikey indexes
(cherry picked from commit c91a2f5932ac08ca28b25ce11b8ac4949552e271)
-rw-r--r--src/mongo/db/catalog/index_catalog_impl.cpp2
-rw-r--r--src/mongo/db/catalog/validate_adaptor.cpp2
-rw-r--r--src/mongo/db/exec/working_set_common.cpp2
-rw-r--r--src/mongo/db/index/btree_access_method.cpp4
-rw-r--r--src/mongo/db/index/btree_key_generator.cpp74
-rw-r--r--src/mongo/db/index/btree_key_generator.h15
-rw-r--r--src/mongo/db/index/btree_key_generator_test.cpp85
-rw-r--r--src/mongo/db/index/index_access_method.cpp8
-rw-r--r--src/mongo/db/index/index_access_method.h6
-rw-r--r--src/mongo/db/index/sort_key_generator.cpp3
-rw-r--r--src/mongo/dbtests/validate_tests.cpp12
11 files changed, 164 insertions, 49 deletions
diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp
index ff200cad19b..8299fa836cd 100644
--- a/src/mongo/db/catalog/index_catalog_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_impl.cpp
@@ -1418,7 +1418,7 @@ Status IndexCatalogImpl::_indexFilteredRecords(OperationContext* opCtx,
index->accessMethod()->getKeys(*bsonRecord.docPtr,
options.getKeysMode,
- IndexAccessMethod::GetKeysContext::kReadOrAddKeys,
+ IndexAccessMethod::GetKeysContext::kAddingKeys,
&keys,
&multikeyMetadataKeys,
&multikeyPaths,
diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp
index a6d5ca72b6b..a9669cb0348 100644
--- a/src/mongo/db/catalog/validate_adaptor.cpp
+++ b/src/mongo/db/catalog/validate_adaptor.cpp
@@ -105,7 +105,7 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx,
MultikeyPaths multikeyPaths;
iam->getKeys(recordBson,
IndexAccessMethod::GetKeysMode::kEnforceConstraints,
- IndexAccessMethod::GetKeysContext::kReadOrAddKeys,
+ IndexAccessMethod::GetKeysContext::kAddingKeys,
&documentKeySet,
&multikeyMetadataKeys,
&multikeyPaths,
diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp
index f80d3a275e6..9bc07f28be2 100644
--- a/src/mongo/db/exec/working_set_common.cpp
+++ b/src/mongo/db/exec/working_set_common.cpp
@@ -137,7 +137,7 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx,
auto* iam = workingSet->retrieveIndexAccessMethod(memberKey.indexId);
iam->getKeys(member->doc.value().toBson(),
IndexAccessMethod::GetKeysMode::kEnforceConstraints,
- IndexAccessMethod::GetKeysContext::kReadOrAddKeys,
+ IndexAccessMethod::GetKeysContext::kValidatingKeys,
&keys,
multikeyMetadataKeys,
multikeyPaths,
diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp
index cf158940216..308d3684086 100644
--- a/src/mongo/db/index/btree_access_method.cpp
+++ b/src/mongo/db/index/btree_access_method.cpp
@@ -71,7 +71,9 @@ void BtreeAccessMethod::doGetKeys(const BSONObj& obj,
KeyStringSet* multikeyMetadataKeys,
MultikeyPaths* multikeyPaths,
boost::optional<RecordId> id) const {
- _keyGenerator->getKeys(obj, keys, multikeyPaths, id);
+ const auto skipMultikey =
+ context == IndexAccessMethod::GetKeysContext::kValidatingKeys && !_descriptor->isMultikey();
+ _keyGenerator->getKeys(obj, skipMultikey, keys, multikeyPaths, id);
}
} // namespace mongo
diff --git a/src/mongo/db/index/btree_key_generator.cpp b/src/mongo/db/index/btree_key_generator.cpp
index b8dcb6f1024..0b41e8a7451 100644
--- a/src/mongo/db/index/btree_key_generator.cpp
+++ b/src/mongo/db/index/btree_key_generator.cpp
@@ -53,6 +53,38 @@ const BSONElement nullElt = nullObj.firstElement();
const BSONObj undefinedObj = BSON("" << BSONUndefined);
const BSONElement undefinedElt = undefinedObj.firstElement();
+/**
+ * Returns the non-array element at the specified path. This function returns an empty BSON element
+ * if the path doesn't exist.
+ *
+ * The 'path' can be specified using a dotted notation in order to traverse through embedded
+ * objects.
+ *
+ * This function must only be used when there is no an array element along the 'path'. The caller is
+ * responsible to ensure this invariant holds.
+ */
+BSONElement extractNonArrayElementAtPath(const BSONObj& obj, StringData path) {
+ static const auto kEmptyElt = BSONElement{};
+
+ auto&& [elt, tail] = [&]() -> std::pair<BSONElement, StringData> {
+ if (auto dotOffset = path.find("."); dotOffset != std::string::npos) {
+ return {obj.getField(path.substr(0, dotOffset)), path.substr(dotOffset + 1)};
+ }
+ return {obj.getField(path), ""_sd};
+ }();
+ invariant(elt.type() != BSONType::Array);
+
+ if (elt.eoo()) {
+ return kEmptyElt;
+ } else if (tail.empty()) {
+ return elt;
+ } else if (elt.type() == BSONType::Object) {
+ return extractNonArrayElementAtPath(elt.embeddedObject(), tail);
+ }
+ // We found a scalar element, but there is more path to traverse, e.g. {a: 1} with a path of
+ // "a.b".
+ return kEmptyElt;
+}
} // namespace
BtreeKeyGenerator::BtreeKeyGenerator(std::vector<const char*> fieldNames,
@@ -157,6 +189,7 @@ void BtreeKeyGenerator::_getKeysArrEltFixed(std::vector<const char*>* fieldNames
}
void BtreeKeyGenerator::getKeys(const BSONObj& obj,
+ bool skipMultikey,
KeyStringSet* keys,
MultikeyPaths* multikeyPaths,
boost::optional<RecordId> id) const {
@@ -190,6 +223,14 @@ void BtreeKeyGenerator::getKeys(const BSONObj& obj,
if (multikeyPaths) {
multikeyPaths->resize(1);
}
+ } else if (skipMultikey) {
+ // This index doesn't contain array values. We therefore always set 'multikeyPaths' as
+ // [[ ], [], ...].
+ if (multikeyPaths) {
+ invariant(multikeyPaths->empty());
+ multikeyPaths->resize(_fieldNames.size());
+ }
+ _getKeysWithoutArray(obj, id, keys);
} else {
if (multikeyPaths) {
invariant(multikeyPaths->empty());
@@ -200,11 +241,44 @@ void BtreeKeyGenerator::getKeys(const BSONObj& obj,
_getKeysWithArray(
_fieldNames, _fixed, obj, keys, 0, _emptyPositionalInfo, multikeyPaths, id);
}
+
if (keys->empty() && !_isSparse) {
keys->insert(_nullKeyString);
}
}
+void BtreeKeyGenerator::_getKeysWithoutArray(const BSONObj& obj,
+ boost::optional<RecordId> id,
+ KeyStringSet* keys) const {
+
+ KeyString::HeapBuilder keyString{_keyStringVersion, _ordering};
+ size_t numNotFound{0};
+
+ for (auto&& fieldName : _fieldNames) {
+ auto elem = extractNonArrayElementAtPath(obj, fieldName);
+ if (elem.eoo()) {
+ ++numNotFound;
+ }
+
+ if (_collator) {
+ keyString.appendBSONElement(elem, [&](StringData stringData) {
+ return _collator->getComparisonString(stringData);
+ });
+ } else {
+ keyString.appendBSONElement(elem);
+ }
+ }
+
+ if (_isSparse && numNotFound == _fieldNames.size()) {
+ return;
+ }
+
+ if (id) {
+ keyString.appendRecordId(*id);
+ }
+ keys->insert(keyString.release());
+}
+
void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames,
std::vector<BSONElement> fixed,
const BSONObj& obj,
diff --git a/src/mongo/db/index/btree_key_generator.h b/src/mongo/db/index/btree_key_generator.h
index 303927a1fe7..1b1b5d53584 100644
--- a/src/mongo/db/index/btree_key_generator.h
+++ b/src/mongo/db/index/btree_key_generator.h
@@ -68,8 +68,15 @@ public:
* 'multikeyPaths' to have the same number of elements as the index key pattern and fills each
* element with the prefixes of the indexed field that would cause this index to be multikey as
* a result of inserting 'keys'.
+ *
+ * If the caller is certain that the current index is not multikey, and the insertion of 'obj'
+ * will not turn the index into a multikey, then the 'skipMultikey' parameter can be set to
+ * 'true' to be able to use an optimized algorithm for the index key generation. Otherwise,
+ * this parameter must be set to 'false'. In this case a generic algorithm will be used, which
+ * can handle both multikey and non-multikey indexes.
*/
void getKeys(const BSONObj& obj,
+ bool skipMultikey,
KeyStringSet* keys,
MultikeyPaths* multikeyPaths,
boost::optional<RecordId> id = boost::none) const;
@@ -150,6 +157,14 @@ private:
boost::optional<RecordId> id) const;
/**
+ * An optimized version of the key generation algorithm to be used when it is known that 'obj'
+ * doesn't contain an array value in any of the fields in the key pattern.
+ */
+ void _getKeysWithoutArray(const BSONObj& obj,
+ boost::optional<RecordId> id,
+ KeyStringSet* keys) const;
+
+ /**
* A call to _getKeysWithArray() begins by calling this for each field in the key pattern. It
* traverses the path '*field' in 'obj' until either reaching the end of the path or an array
* element.
diff --git a/src/mongo/db/index/btree_key_generator_test.cpp b/src/mongo/db/index/btree_key_generator_test.cpp
index 3c7c0775255..b25f24a8b35 100644
--- a/src/mongo/db/index/btree_key_generator_test.cpp
+++ b/src/mongo/db/index/btree_key_generator_test.cpp
@@ -94,6 +94,16 @@ bool keysetsEqual(const KeyStringSet& expectedKeys, const KeyStringSet& actualKe
return true;
}
+bool containsArrayElement(const BSONObj& obj) {
+ for (auto&& elem : obj) {
+ if (elem.type() == BSONType::Array ||
+ (elem.type() == BSONType::Object && containsArrayElement(elem.Obj()))) {
+ return true;
+ }
+ }
+ return false;
+}
+
bool testKeygen(const BSONObj& kp,
const BSONObj& obj,
const KeyStringSet& expectedKeys,
@@ -103,8 +113,7 @@ bool testKeygen(const BSONObj& kp,
invariant(expectedMultikeyPaths.size() == static_cast<size_t>(kp.nFields()));
//
- // Step 1: construct the btree key generator object, using the
- // index key pattern.
+ // Construct the btree key generator object, using the index key pattern.
//
vector<const char*> fieldNames;
vector<BSONElement> fixed;
@@ -123,39 +132,53 @@ bool testKeygen(const BSONObj& kp,
KeyString::Version::kLatestVersion,
Ordering::make(BSONObj()));
- //
- // Step 2: ask 'keyGen' to generate index keys for the object 'obj' and report any prefixes of
- // the indexed fields that would cause the index to be multikey as a result of inserting
- // 'actualKeys'.
- //
- KeyStringSet actualKeys;
- MultikeyPaths actualMultikeyPaths;
- keyGen->getKeys(obj, &actualKeys, &actualMultikeyPaths);
+ auto runTest = [&](bool skipMultikey) {
+ //
+ // Ask 'keyGen' to generate index keys for the object 'obj' and report any prefixes of the
+ // indexed fields that would cause the index to be multikey as a result of inserting
+ // 'actualKeys'.
+ //
+ KeyStringSet actualKeys;
+ MultikeyPaths actualMultikeyPaths;
+ keyGen->getKeys(obj, skipMultikey, &actualKeys, &actualMultikeyPaths);
+
+ //
+ // Check that the results match the expected result.
+ //
+ bool match = keysetsEqual(expectedKeys, actualKeys);
+ if (!match) {
+ LOGV2(20647,
+ "Expected: {dumpKeyset_expectedKeys}, Actual: {dumpKeyset_actualKeys}",
+ "dumpKeyset_expectedKeys"_attr = dumpKeyset(expectedKeys),
+ "dumpKeyset_actualKeys"_attr = dumpKeyset(actualKeys));
+ return false;
+ }
- //
- // Step 3: check that the results match the expected result.
- //
- bool match = keysetsEqual(expectedKeys, actualKeys);
- if (!match) {
- LOGV2(20647,
- "Expected: {dumpKeyset_expectedKeys}, Actual: {dumpKeyset_actualKeys}",
- "dumpKeyset_expectedKeys"_attr = dumpKeyset(expectedKeys),
- "dumpKeyset_actualKeys"_attr = dumpKeyset(actualKeys));
- return false;
- }
+ match = (expectedMultikeyPaths == actualMultikeyPaths);
+ if (!match) {
+ LOGV2(20648,
+ "Expected: {dumpMultikeyPaths_expectedMultikeyPaths}, Actual: "
+ "{dumpMultikeyPaths_actualMultikeyPaths}",
+ "dumpMultikeyPaths_expectedMultikeyPaths"_attr =
+ dumpMultikeyPaths(expectedMultikeyPaths),
+ "dumpMultikeyPaths_actualMultikeyPaths"_attr =
+ dumpMultikeyPaths(actualMultikeyPaths));
+ return false;
+ }
+
+ return true;
+ };
- match = (expectedMultikeyPaths == actualMultikeyPaths);
- if (!match) {
- LOGV2(20648,
- "Expected: {dumpMultikeyPaths_expectedMultikeyPaths}, Actual: "
- "{dumpMultikeyPaths_actualMultikeyPaths}",
- "dumpMultikeyPaths_expectedMultikeyPaths"_attr =
- dumpMultikeyPaths(expectedMultikeyPaths),
- "dumpMultikeyPaths_actualMultikeyPaths"_attr =
- dumpMultikeyPaths(actualMultikeyPaths));
+ // If it is correct to do so, then test that the fast key generation path for the non-multikey
+ // case works as expected.
+ if (!containsArrayElement(obj)) {
+ if (!runTest(true)) {
+ return false;
+ }
}
- return match;
+ // Test that fully general key generation path works as expected.
+ return runTest(false);
}
//
diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp
index 47c86aef8ab..67483da9ec7 100644
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@@ -137,7 +137,7 @@ Status AbstractIndexAccessMethod::insert(OperationContext* opCtx,
getKeys(obj,
options.getKeysMode,
- GetKeysContext::kReadOrAddKeys,
+ GetKeysContext::kAddingKeys,
&keys,
&multikeyMetadataKeys,
&multikeyPaths,
@@ -257,7 +257,7 @@ RecordId AbstractIndexAccessMethod::findSingle(OperationContext* opCtx,
MultikeyPaths* multikeyPaths = nullptr;
getKeys(requestedKey,
GetKeysMode::kEnforceConstraints,
- GetKeysContext::kReadOrAddKeys,
+ GetKeysContext::kAddingKeys,
&keys,
multikeyMetadataKeys,
multikeyPaths,
@@ -384,7 +384,7 @@ void AbstractIndexAccessMethod::prepareUpdate(OperationContext* opCtx,
if (!indexFilter || indexFilter->matchesBSON(to)) {
getKeys(to,
options.getKeysMode,
- GetKeysContext::kReadOrAddKeys,
+ GetKeysContext::kAddingKeys,
&ticket->newKeys,
&ticket->newMultikeyMetadataKeys,
&ticket->newMultikeyPaths,
@@ -523,7 +523,7 @@ Status AbstractIndexAccessMethod::BulkBuilderImpl::insert(OperationContext* opCt
_indexCatalogEntry->accessMethod()->getKeys(
obj,
options.getKeysMode,
- GetKeysContext::kReadOrAddKeys,
+ GetKeysContext::kAddingKeys,
&keys,
&_multikeyMetadataKeys,
&multikeyPaths,
diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h
index 245e98778b8..a4d9e7ab047 100644
--- a/src/mongo/db/index/index_access_method.h
+++ b/src/mongo/db/index/index_access_method.h
@@ -276,10 +276,10 @@ public:
};
/**
- * Specifies whether getKeys is being used in the context of creating new keys or deleting
- * existing keys.
+ * Specifies whether getKeys is being used in the context of creating new keys, deleting
+ * or validating existing keys.
*/
- enum class GetKeysContext { kRemovingKeys, kReadOrAddKeys };
+ enum class GetKeysContext { kRemovingKeys, kAddingKeys, kValidatingKeys };
/**
* Fills 'keys' with the keys that should be generated for 'obj' on this index.
diff --git a/src/mongo/db/index/sort_key_generator.cpp b/src/mongo/db/index/sort_key_generator.cpp
index b5e80f0f552..8799c58b5f9 100644
--- a/src/mongo/db/index/sort_key_generator.cpp
+++ b/src/mongo/db/index/sort_key_generator.cpp
@@ -166,7 +166,8 @@ StatusWith<BSONObj> SortKeyGenerator::computeSortKeyFromDocumentWithoutMetadata(
// There's no need to compute the prefixes of the indexed fields that cause the index to be
// multikey when getting the index keys for sorting.
MultikeyPaths* multikeyPaths = nullptr;
- _indexKeyGen->getKeys(obj, &keys, multikeyPaths);
+ const auto skipMultikey = false;
+ _indexKeyGen->getKeys(obj, skipMultikey, &keys, multikeyPaths);
} catch (const AssertionException& e) {
// Probably a parallel array.
if (ErrorCodes::CannotIndexParallelArrays == e.code()) {
diff --git a/src/mongo/dbtests/validate_tests.cpp b/src/mongo/dbtests/validate_tests.cpp
index 36a7f3a5b92..bfa33ebdb2f 100644
--- a/src/mongo/dbtests/validate_tests.cpp
+++ b/src/mongo/dbtests/validate_tests.cpp
@@ -887,7 +887,7 @@ public:
KeyStringSet keys;
iam->getKeys(actualKey,
IndexAccessMethod::GetKeysMode::kRelaxConstraintsUnfiltered,
- IndexAccessMethod::GetKeysContext::kReadOrAddKeys,
+ IndexAccessMethod::GetKeysContext::kAddingKeys,
&keys,
nullptr,
nullptr,
@@ -1302,7 +1302,7 @@ public:
KeyStringSet keys;
iam->getKeys(actualKey,
IndexAccessMethod::GetKeysMode::kRelaxConstraintsUnfiltered,
- IndexAccessMethod::GetKeysContext::kReadOrAddKeys,
+ IndexAccessMethod::GetKeysContext::kAddingKeys,
&keys,
nullptr,
nullptr,
@@ -1535,7 +1535,7 @@ public:
KeyStringSet keys;
iam->getKeys(actualKey,
IndexAccessMethod::GetKeysMode::kRelaxConstraintsUnfiltered,
- IndexAccessMethod::GetKeysContext::kReadOrAddKeys,
+ IndexAccessMethod::GetKeysContext::kAddingKeys,
&keys,
nullptr,
nullptr,
@@ -1571,7 +1571,7 @@ public:
KeyStringSet keys;
iam->getKeys(actualKey,
IndexAccessMethod::GetKeysMode::kRelaxConstraintsUnfiltered,
- IndexAccessMethod::GetKeysContext::kReadOrAddKeys,
+ IndexAccessMethod::GetKeysContext::kAddingKeys,
&keys,
nullptr,
nullptr,
@@ -1686,7 +1686,7 @@ public:
KeyStringSet keys;
iam->getKeys(dupObj,
IndexAccessMethod::GetKeysMode::kRelaxConstraints,
- IndexAccessMethod::GetKeysContext::kReadOrAddKeys,
+ IndexAccessMethod::GetKeysContext::kAddingKeys,
&keys,
nullptr,
nullptr,
@@ -1718,7 +1718,7 @@ public:
InsertResult result;
iam->getKeys(dupObj,
IndexAccessMethod::GetKeysMode::kRelaxConstraints,
- IndexAccessMethod::GetKeysContext::kReadOrAddKeys,
+ IndexAccessMethod::GetKeysContext::kAddingKeys,
&keys,
nullptr,
nullptr,