diff options
author | Anton Korshunov <anton.korshunov@mongodb.com> | 2020-03-12 11:28:29 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-02 21:35:39 +0000 |
commit | 30088474306375095a67244fcf55391d4ccb02a5 (patch) | |
tree | e988fe789fdb1d56680d6487e497242fb83f9a15 | |
parent | ed3a75b6f792dc8f93540622305caa3ea35ad389 (diff) | |
download | mongo-30088474306375095a67244fcf55391d4ccb02a5.tar.gz |
SERVER-46946 Optimize BtreeKeyGenerator::getKeys for non-multikey indexes
(cherry picked from commit c91a2f5932ac08ca28b25ce11b8ac4949552e271)
-rw-r--r-- | src/mongo/db/catalog/index_catalog_impl.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/catalog/validate_adaptor.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/exec/working_set_common.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/index/btree_access_method.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/index/btree_key_generator.cpp | 74 | ||||
-rw-r--r-- | src/mongo/db/index/btree_key_generator.h | 15 | ||||
-rw-r--r-- | src/mongo/db/index/btree_key_generator_test.cpp | 85 | ||||
-rw-r--r-- | src/mongo/db/index/index_access_method.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/index/index_access_method.h | 6 | ||||
-rw-r--r-- | src/mongo/db/index/sort_key_generator.cpp | 3 | ||||
-rw-r--r-- | src/mongo/dbtests/validate_tests.cpp | 12 |
11 files changed, 164 insertions, 49 deletions
diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp index ff200cad19b..8299fa836cd 100644 --- a/src/mongo/db/catalog/index_catalog_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_impl.cpp @@ -1418,7 +1418,7 @@ Status IndexCatalogImpl::_indexFilteredRecords(OperationContext* opCtx, index->accessMethod()->getKeys(*bsonRecord.docPtr, options.getKeysMode, - IndexAccessMethod::GetKeysContext::kReadOrAddKeys, + IndexAccessMethod::GetKeysContext::kAddingKeys, &keys, &multikeyMetadataKeys, &multikeyPaths, diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp index a6d5ca72b6b..a9669cb0348 100644 --- a/src/mongo/db/catalog/validate_adaptor.cpp +++ b/src/mongo/db/catalog/validate_adaptor.cpp @@ -105,7 +105,7 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx, MultikeyPaths multikeyPaths; iam->getKeys(recordBson, IndexAccessMethod::GetKeysMode::kEnforceConstraints, - IndexAccessMethod::GetKeysContext::kReadOrAddKeys, + IndexAccessMethod::GetKeysContext::kAddingKeys, &documentKeySet, &multikeyMetadataKeys, &multikeyPaths, diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp index f80d3a275e6..9bc07f28be2 100644 --- a/src/mongo/db/exec/working_set_common.cpp +++ b/src/mongo/db/exec/working_set_common.cpp @@ -137,7 +137,7 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx, auto* iam = workingSet->retrieveIndexAccessMethod(memberKey.indexId); iam->getKeys(member->doc.value().toBson(), IndexAccessMethod::GetKeysMode::kEnforceConstraints, - IndexAccessMethod::GetKeysContext::kReadOrAddKeys, + IndexAccessMethod::GetKeysContext::kValidatingKeys, &keys, multikeyMetadataKeys, multikeyPaths, diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp index cf158940216..308d3684086 100644 --- a/src/mongo/db/index/btree_access_method.cpp +++ b/src/mongo/db/index/btree_access_method.cpp @@ -71,7 +71,9 @@ void BtreeAccessMethod::doGetKeys(const BSONObj& obj, KeyStringSet* multikeyMetadataKeys, MultikeyPaths* multikeyPaths, boost::optional<RecordId> id) const { - _keyGenerator->getKeys(obj, keys, multikeyPaths, id); + const auto skipMultikey = + context == IndexAccessMethod::GetKeysContext::kValidatingKeys && !_descriptor->isMultikey(); + _keyGenerator->getKeys(obj, skipMultikey, keys, multikeyPaths, id); } } // namespace mongo diff --git a/src/mongo/db/index/btree_key_generator.cpp b/src/mongo/db/index/btree_key_generator.cpp index b8dcb6f1024..0b41e8a7451 100644 --- a/src/mongo/db/index/btree_key_generator.cpp +++ b/src/mongo/db/index/btree_key_generator.cpp @@ -53,6 +53,38 @@ const BSONElement nullElt = nullObj.firstElement(); const BSONObj undefinedObj = BSON("" << BSONUndefined); const BSONElement undefinedElt = undefinedObj.firstElement(); +/** + * Returns the non-array element at the specified path. This function returns an empty BSON element + * if the path doesn't exist. + * + * The 'path' can be specified using a dotted notation in order to traverse through embedded + * objects. + * + * This function must only be used when there is no an array element along the 'path'. The caller is + * responsible to ensure this invariant holds. + */ +BSONElement extractNonArrayElementAtPath(const BSONObj& obj, StringData path) { + static const auto kEmptyElt = BSONElement{}; + + auto&& [elt, tail] = [&]() -> std::pair<BSONElement, StringData> { + if (auto dotOffset = path.find("."); dotOffset != std::string::npos) { + return {obj.getField(path.substr(0, dotOffset)), path.substr(dotOffset + 1)}; + } + return {obj.getField(path), ""_sd}; + }(); + invariant(elt.type() != BSONType::Array); + + if (elt.eoo()) { + return kEmptyElt; + } else if (tail.empty()) { + return elt; + } else if (elt.type() == BSONType::Object) { + return extractNonArrayElementAtPath(elt.embeddedObject(), tail); + } + // We found a scalar element, but there is more path to traverse, e.g. {a: 1} with a path of + // "a.b". + return kEmptyElt; +} } // namespace BtreeKeyGenerator::BtreeKeyGenerator(std::vector<const char*> fieldNames, @@ -157,6 +189,7 @@ void BtreeKeyGenerator::_getKeysArrEltFixed(std::vector<const char*>* fieldNames } void BtreeKeyGenerator::getKeys(const BSONObj& obj, + bool skipMultikey, KeyStringSet* keys, MultikeyPaths* multikeyPaths, boost::optional<RecordId> id) const { @@ -190,6 +223,14 @@ void BtreeKeyGenerator::getKeys(const BSONObj& obj, if (multikeyPaths) { multikeyPaths->resize(1); } + } else if (skipMultikey) { + // This index doesn't contain array values. We therefore always set 'multikeyPaths' as + // [[ ], [], ...]. + if (multikeyPaths) { + invariant(multikeyPaths->empty()); + multikeyPaths->resize(_fieldNames.size()); + } + _getKeysWithoutArray(obj, id, keys); } else { if (multikeyPaths) { invariant(multikeyPaths->empty()); @@ -200,11 +241,44 @@ void BtreeKeyGenerator::getKeys(const BSONObj& obj, _getKeysWithArray( _fieldNames, _fixed, obj, keys, 0, _emptyPositionalInfo, multikeyPaths, id); } + if (keys->empty() && !_isSparse) { keys->insert(_nullKeyString); } } +void BtreeKeyGenerator::_getKeysWithoutArray(const BSONObj& obj, + boost::optional<RecordId> id, + KeyStringSet* keys) const { + + KeyString::HeapBuilder keyString{_keyStringVersion, _ordering}; + size_t numNotFound{0}; + + for (auto&& fieldName : _fieldNames) { + auto elem = extractNonArrayElementAtPath(obj, fieldName); + if (elem.eoo()) { + ++numNotFound; + } + + if (_collator) { + keyString.appendBSONElement(elem, [&](StringData stringData) { + return _collator->getComparisonString(stringData); + }); + } else { + keyString.appendBSONElement(elem); + } + } + + if (_isSparse && numNotFound == _fieldNames.size()) { + return; + } + + if (id) { + keyString.appendRecordId(*id); + } + keys->insert(keyString.release()); +} + void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames, std::vector<BSONElement> fixed, const BSONObj& obj, diff --git a/src/mongo/db/index/btree_key_generator.h b/src/mongo/db/index/btree_key_generator.h index 303927a1fe7..1b1b5d53584 100644 --- a/src/mongo/db/index/btree_key_generator.h +++ b/src/mongo/db/index/btree_key_generator.h @@ -68,8 +68,15 @@ public: * 'multikeyPaths' to have the same number of elements as the index key pattern and fills each * element with the prefixes of the indexed field that would cause this index to be multikey as * a result of inserting 'keys'. + * + * If the caller is certain that the current index is not multikey, and the insertion of 'obj' + * will not turn the index into a multikey, then the 'skipMultikey' parameter can be set to + * 'true' to be able to use an optimized algorithm for the index key generation. Otherwise, + * this parameter must be set to 'false'. In this case a generic algorithm will be used, which + * can handle both multikey and non-multikey indexes. */ void getKeys(const BSONObj& obj, + bool skipMultikey, KeyStringSet* keys, MultikeyPaths* multikeyPaths, boost::optional<RecordId> id = boost::none) const; @@ -150,6 +157,14 @@ private: boost::optional<RecordId> id) const; /** + * An optimized version of the key generation algorithm to be used when it is known that 'obj' + * doesn't contain an array value in any of the fields in the key pattern. + */ + void _getKeysWithoutArray(const BSONObj& obj, + boost::optional<RecordId> id, + KeyStringSet* keys) const; + + /** * A call to _getKeysWithArray() begins by calling this for each field in the key pattern. It * traverses the path '*field' in 'obj' until either reaching the end of the path or an array * element. diff --git a/src/mongo/db/index/btree_key_generator_test.cpp b/src/mongo/db/index/btree_key_generator_test.cpp index 3c7c0775255..b25f24a8b35 100644 --- a/src/mongo/db/index/btree_key_generator_test.cpp +++ b/src/mongo/db/index/btree_key_generator_test.cpp @@ -94,6 +94,16 @@ bool keysetsEqual(const KeyStringSet& expectedKeys, const KeyStringSet& actualKe return true; } +bool containsArrayElement(const BSONObj& obj) { + for (auto&& elem : obj) { + if (elem.type() == BSONType::Array || + (elem.type() == BSONType::Object && containsArrayElement(elem.Obj()))) { + return true; + } + } + return false; +} + bool testKeygen(const BSONObj& kp, const BSONObj& obj, const KeyStringSet& expectedKeys, @@ -103,8 +113,7 @@ bool testKeygen(const BSONObj& kp, invariant(expectedMultikeyPaths.size() == static_cast<size_t>(kp.nFields())); // - // Step 1: construct the btree key generator object, using the - // index key pattern. + // Construct the btree key generator object, using the index key pattern. // vector<const char*> fieldNames; vector<BSONElement> fixed; @@ -123,39 +132,53 @@ bool testKeygen(const BSONObj& kp, KeyString::Version::kLatestVersion, Ordering::make(BSONObj())); - // - // Step 2: ask 'keyGen' to generate index keys for the object 'obj' and report any prefixes of - // the indexed fields that would cause the index to be multikey as a result of inserting - // 'actualKeys'. - // - KeyStringSet actualKeys; - MultikeyPaths actualMultikeyPaths; - keyGen->getKeys(obj, &actualKeys, &actualMultikeyPaths); + auto runTest = [&](bool skipMultikey) { + // + // Ask 'keyGen' to generate index keys for the object 'obj' and report any prefixes of the + // indexed fields that would cause the index to be multikey as a result of inserting + // 'actualKeys'. + // + KeyStringSet actualKeys; + MultikeyPaths actualMultikeyPaths; + keyGen->getKeys(obj, skipMultikey, &actualKeys, &actualMultikeyPaths); + + // + // Check that the results match the expected result. + // + bool match = keysetsEqual(expectedKeys, actualKeys); + if (!match) { + LOGV2(20647, + "Expected: {dumpKeyset_expectedKeys}, Actual: {dumpKeyset_actualKeys}", + "dumpKeyset_expectedKeys"_attr = dumpKeyset(expectedKeys), + "dumpKeyset_actualKeys"_attr = dumpKeyset(actualKeys)); + return false; + } - // - // Step 3: check that the results match the expected result. - // - bool match = keysetsEqual(expectedKeys, actualKeys); - if (!match) { - LOGV2(20647, - "Expected: {dumpKeyset_expectedKeys}, Actual: {dumpKeyset_actualKeys}", - "dumpKeyset_expectedKeys"_attr = dumpKeyset(expectedKeys), - "dumpKeyset_actualKeys"_attr = dumpKeyset(actualKeys)); - return false; - } + match = (expectedMultikeyPaths == actualMultikeyPaths); + if (!match) { + LOGV2(20648, + "Expected: {dumpMultikeyPaths_expectedMultikeyPaths}, Actual: " + "{dumpMultikeyPaths_actualMultikeyPaths}", + "dumpMultikeyPaths_expectedMultikeyPaths"_attr = + dumpMultikeyPaths(expectedMultikeyPaths), + "dumpMultikeyPaths_actualMultikeyPaths"_attr = + dumpMultikeyPaths(actualMultikeyPaths)); + return false; + } + + return true; + }; - match = (expectedMultikeyPaths == actualMultikeyPaths); - if (!match) { - LOGV2(20648, - "Expected: {dumpMultikeyPaths_expectedMultikeyPaths}, Actual: " - "{dumpMultikeyPaths_actualMultikeyPaths}", - "dumpMultikeyPaths_expectedMultikeyPaths"_attr = - dumpMultikeyPaths(expectedMultikeyPaths), - "dumpMultikeyPaths_actualMultikeyPaths"_attr = - dumpMultikeyPaths(actualMultikeyPaths)); + // If it is correct to do so, then test that the fast key generation path for the non-multikey + // case works as expected. + if (!containsArrayElement(obj)) { + if (!runTest(true)) { + return false; + } } - return match; + // Test that fully general key generation path works as expected. + return runTest(false); } // diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index 47c86aef8ab..67483da9ec7 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -137,7 +137,7 @@ Status AbstractIndexAccessMethod::insert(OperationContext* opCtx, getKeys(obj, options.getKeysMode, - GetKeysContext::kReadOrAddKeys, + GetKeysContext::kAddingKeys, &keys, &multikeyMetadataKeys, &multikeyPaths, @@ -257,7 +257,7 @@ RecordId AbstractIndexAccessMethod::findSingle(OperationContext* opCtx, MultikeyPaths* multikeyPaths = nullptr; getKeys(requestedKey, GetKeysMode::kEnforceConstraints, - GetKeysContext::kReadOrAddKeys, + GetKeysContext::kAddingKeys, &keys, multikeyMetadataKeys, multikeyPaths, @@ -384,7 +384,7 @@ void AbstractIndexAccessMethod::prepareUpdate(OperationContext* opCtx, if (!indexFilter || indexFilter->matchesBSON(to)) { getKeys(to, options.getKeysMode, - GetKeysContext::kReadOrAddKeys, + GetKeysContext::kAddingKeys, &ticket->newKeys, &ticket->newMultikeyMetadataKeys, &ticket->newMultikeyPaths, @@ -523,7 +523,7 @@ Status AbstractIndexAccessMethod::BulkBuilderImpl::insert(OperationContext* opCt _indexCatalogEntry->accessMethod()->getKeys( obj, options.getKeysMode, - GetKeysContext::kReadOrAddKeys, + GetKeysContext::kAddingKeys, &keys, &_multikeyMetadataKeys, &multikeyPaths, diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index 245e98778b8..a4d9e7ab047 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -276,10 +276,10 @@ public: }; /** - * Specifies whether getKeys is being used in the context of creating new keys or deleting - * existing keys. + * Specifies whether getKeys is being used in the context of creating new keys, deleting + * or validating existing keys. */ - enum class GetKeysContext { kRemovingKeys, kReadOrAddKeys }; + enum class GetKeysContext { kRemovingKeys, kAddingKeys, kValidatingKeys }; /** * Fills 'keys' with the keys that should be generated for 'obj' on this index. diff --git a/src/mongo/db/index/sort_key_generator.cpp b/src/mongo/db/index/sort_key_generator.cpp index b5e80f0f552..8799c58b5f9 100644 --- a/src/mongo/db/index/sort_key_generator.cpp +++ b/src/mongo/db/index/sort_key_generator.cpp @@ -166,7 +166,8 @@ StatusWith<BSONObj> SortKeyGenerator::computeSortKeyFromDocumentWithoutMetadata( // There's no need to compute the prefixes of the indexed fields that cause the index to be // multikey when getting the index keys for sorting. MultikeyPaths* multikeyPaths = nullptr; - _indexKeyGen->getKeys(obj, &keys, multikeyPaths); + const auto skipMultikey = false; + _indexKeyGen->getKeys(obj, skipMultikey, &keys, multikeyPaths); } catch (const AssertionException& e) { // Probably a parallel array. if (ErrorCodes::CannotIndexParallelArrays == e.code()) { diff --git a/src/mongo/dbtests/validate_tests.cpp b/src/mongo/dbtests/validate_tests.cpp index 36a7f3a5b92..bfa33ebdb2f 100644 --- a/src/mongo/dbtests/validate_tests.cpp +++ b/src/mongo/dbtests/validate_tests.cpp @@ -887,7 +887,7 @@ public: KeyStringSet keys; iam->getKeys(actualKey, IndexAccessMethod::GetKeysMode::kRelaxConstraintsUnfiltered, - IndexAccessMethod::GetKeysContext::kReadOrAddKeys, + IndexAccessMethod::GetKeysContext::kAddingKeys, &keys, nullptr, nullptr, @@ -1302,7 +1302,7 @@ public: KeyStringSet keys; iam->getKeys(actualKey, IndexAccessMethod::GetKeysMode::kRelaxConstraintsUnfiltered, - IndexAccessMethod::GetKeysContext::kReadOrAddKeys, + IndexAccessMethod::GetKeysContext::kAddingKeys, &keys, nullptr, nullptr, @@ -1535,7 +1535,7 @@ public: KeyStringSet keys; iam->getKeys(actualKey, IndexAccessMethod::GetKeysMode::kRelaxConstraintsUnfiltered, - IndexAccessMethod::GetKeysContext::kReadOrAddKeys, + IndexAccessMethod::GetKeysContext::kAddingKeys, &keys, nullptr, nullptr, @@ -1571,7 +1571,7 @@ public: KeyStringSet keys; iam->getKeys(actualKey, IndexAccessMethod::GetKeysMode::kRelaxConstraintsUnfiltered, - IndexAccessMethod::GetKeysContext::kReadOrAddKeys, + IndexAccessMethod::GetKeysContext::kAddingKeys, &keys, nullptr, nullptr, @@ -1686,7 +1686,7 @@ public: KeyStringSet keys; iam->getKeys(dupObj, IndexAccessMethod::GetKeysMode::kRelaxConstraints, - IndexAccessMethod::GetKeysContext::kReadOrAddKeys, + IndexAccessMethod::GetKeysContext::kAddingKeys, &keys, nullptr, nullptr, @@ -1718,7 +1718,7 @@ public: InsertResult result; iam->getKeys(dupObj, IndexAccessMethod::GetKeysMode::kRelaxConstraints, - IndexAccessMethod::GetKeysContext::kReadOrAddKeys, + IndexAccessMethod::GetKeysContext::kAddingKeys, &keys, nullptr, nullptr, |