diff options
author | Anton Korshunov <anton.korshunov@mongodb.com> | 2022-05-27 13:41:55 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-05-27 14:30:07 +0000 |
commit | 7a87b42de9fd34123caf6c81ff75835e3b5c8e23 (patch) | |
tree | d33235346edfc2e1a9b018d02bcc68f78b82b73e /src/mongo/db/index | |
parent | ac297836aede311622e60ee502dda7ac6bb344fc (diff) | |
download | mongo-7a87b42de9fd34123caf6c81ff75835e3b5c8e23.tar.gz |
SERVER-66545 Collator should be passed as an argument to "generateSortKey" builtin in SBE
Diffstat (limited to 'src/mongo/db/index')
-rw-r--r-- | src/mongo/db/index/btree_access_method.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/index/btree_key_generator.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/index/btree_key_generator.h | 13 | ||||
-rw-r--r-- | src/mongo/db/index/btree_key_generator_test.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/index/key_gen_bm.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/index/sort_key_generator.cpp | 3 |
6 files changed, 35 insertions, 30 deletions
diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp index 93297808aec..177f03ad34b 100644 --- a/src/mongo/db/index/btree_access_method.cpp +++ b/src/mongo/db/index/btree_access_method.cpp @@ -61,7 +61,6 @@ BtreeAccessMethod::BtreeAccessMethod(IndexCatalogEntry* btreeState, std::make_unique<BtreeKeyGenerator>(fieldNames, fixed, _descriptor->isSparse(), - btreeState->getCollator(), getSortedDataInterface()->getKeyStringVersion(), getSortedDataInterface()->getOrdering()); } @@ -83,7 +82,13 @@ void BtreeAccessMethod::doGetKeys(OperationContext* opCtx, boost::optional<RecordId> id) const { const auto skipMultikey = context == GetKeysContext::kValidatingKeys && !_descriptor->getEntry()->isMultikey(opCtx, collection); - _keyGenerator->getKeys(pooledBufferBuilder, obj, skipMultikey, keys, multikeyPaths, id); + _keyGenerator->getKeys(pooledBufferBuilder, + obj, + skipMultikey, + keys, + multikeyPaths, + _indexCatalogEntry->getCollator(), + id); } } // namespace mongo diff --git a/src/mongo/db/index/btree_key_generator.cpp b/src/mongo/db/index/btree_key_generator.cpp index 62ec65f8080..f386a3aba87 100644 --- a/src/mongo/db/index/btree_key_generator.cpp +++ b/src/mongo/db/index/btree_key_generator.cpp @@ -89,7 +89,6 @@ BSONElement extractNonArrayElementAtPath(const BSONObj& obj, StringData path) { BtreeKeyGenerator::BtreeKeyGenerator(std::vector<const char*> fieldNames, std::vector<BSONElement> fixed, bool isSparse, - const CollatorInterface* collator, KeyString::Version keyStringVersion, Ordering ordering) : _keyStringVersion(keyStringVersion), @@ -99,8 +98,7 @@ BtreeKeyGenerator::BtreeKeyGenerator(std::vector<const char*> fieldNames, _fieldNames(std::move(fieldNames)), _nullKeyString(_buildNullKeyString()), _fixed(std::move(fixed)), - _emptyPositionalInfo(_fieldNames.size()), - _collator(collator) { + _emptyPositionalInfo(_fieldNames.size()) { for (const char* fieldName : _fieldNames) { FieldRef fieldRef{fieldName}; @@ -174,6 +172,7 @@ void BtreeKeyGenerator::_getKeysArrEltFixed(const std::vector<const char*>& fiel bool mayExpandArrayUnembedded, const std::vector<PositionalPathInfo>& positionalInfo, MultikeyPaths* multikeyPaths, + const CollatorInterface* collator, boost::optional<RecordId> id) const { // fieldNamesTemp and fixedTemp are passed in by the caller to be used as temporary data // structures as we need them to be mutable in the recursion. When they are stored outside we @@ -201,6 +200,7 @@ void BtreeKeyGenerator::_getKeysArrEltFixed(const std::vector<const char*>& fiel numNotFound, positionalInfo, multikeyPaths, + collator, id); } @@ -209,6 +209,7 @@ void BtreeKeyGenerator::getKeys(SharedBufferFragmentBuilder& pooledBufferBuilder bool skipMultikey, KeyStringSet* keys, MultikeyPaths* multikeyPaths, + const CollatorInterface* collator, boost::optional<RecordId> id) const { if (_isIdIndex) { // we special case for speed @@ -218,9 +219,9 @@ void BtreeKeyGenerator::getKeys(SharedBufferFragmentBuilder& pooledBufferBuilder } else { KeyString::PooledBuilder keyString(pooledBufferBuilder, _keyStringVersion, _ordering); - if (_collator) { + if (collator) { keyString.appendBSONElement(e, [&](StringData stringData) { - return _collator->getComparisonString(stringData); + return collator->getComparisonString(stringData); }); } else { keyString.appendBSONElement(e); @@ -245,7 +246,7 @@ void BtreeKeyGenerator::getKeys(SharedBufferFragmentBuilder& pooledBufferBuilder invariant(multikeyPaths->empty()); multikeyPaths->resize(_fieldNames.size()); } - _getKeysWithoutArray(pooledBufferBuilder, obj, id, keys); + _getKeysWithoutArray(pooledBufferBuilder, obj, collator, id, keys); } else { if (multikeyPaths) { invariant(multikeyPaths->empty()); @@ -265,6 +266,7 @@ void BtreeKeyGenerator::getKeys(SharedBufferFragmentBuilder& pooledBufferBuilder 0, _emptyPositionalInfo, multikeyPaths, + collator, id); // Put the sequence back into the set, it will sort and guarantee uniqueness, this is // O(NlogN) @@ -308,6 +310,7 @@ size_t BtreeKeyGenerator::PositionalPathInfo::getApproximateSize() const { void BtreeKeyGenerator::_getKeysWithoutArray(SharedBufferFragmentBuilder& pooledBufferBuilder, const BSONObj& obj, + const CollatorInterface* collator, boost::optional<RecordId> id, KeyStringSet* keys) const { @@ -320,9 +323,9 @@ void BtreeKeyGenerator::_getKeysWithoutArray(SharedBufferFragmentBuilder& pooled ++numNotFound; } - if (_collator) { + if (collator) { keyString.appendBSONElement(elem, [&](StringData stringData) { - return _collator->getComparisonString(stringData); + return collator->getComparisonString(stringData); }); } else { keyString.appendBSONElement(elem); @@ -347,6 +350,7 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*>* fieldNames, unsigned numNotFound, const std::vector<PositionalPathInfo>& positionalInfo, MultikeyPaths* multikeyPaths, + const CollatorInterface* collator, boost::optional<RecordId> id) const { BSONElement arrElt; @@ -418,9 +422,9 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*>* fieldNames, } KeyString::PooledBuilder keyString(pooledBufferBuilder, _keyStringVersion, _ordering); for (const auto& elem : *fixed) { - if (_collator) { + if (collator) { keyString.appendBSONElement(elem, [&](StringData stringData) { - return _collator->getComparisonString(stringData); + return collator->getComparisonString(stringData); }); } else { keyString.appendBSONElement(elem); @@ -463,6 +467,7 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*>* fieldNames, true, _emptyPositionalInfo, multikeyPaths, + collator, id); } else { BSONObj arrObj = arrElt.embeddedObject(); @@ -551,6 +556,7 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*>* fieldNames, mayExpandArrayUnembedded, subPositionalInfo, multikeyPaths, + collator, id); } } diff --git a/src/mongo/db/index/btree_key_generator.h b/src/mongo/db/index/btree_key_generator.h index 2cf55d6a770..7287b68420d 100644 --- a/src/mongo/db/index/btree_key_generator.h +++ b/src/mongo/db/index/btree_key_generator.h @@ -56,7 +56,6 @@ public: BtreeKeyGenerator(std::vector<const char*> fieldNames, std::vector<BSONElement> fixed, bool isSparse, - const CollatorInterface* collator, KeyString::Version keyStringVersion, Ordering ordering); @@ -74,12 +73,17 @@ public: * 'true' to be able to use an optimized algorithm for the index key generation. Otherwise, * this parameter must be set to 'false'. In this case a generic algorithm will be used, which * can handle both multikey and non-multikey indexes. + * + * If the 'collator' argument is set to null, this key generator orders strings according to the + * simple binary compare. If non-null, represents the collator used to generate index keys for + * indexed strings. */ void getKeys(SharedBufferFragmentBuilder& pooledBufferBuilder, const BSONObj& obj, bool skipMultikey, KeyStringSet* keys, MultikeyPaths* multikeyPaths, + const CollatorInterface* collator = nullptr, boost::optional<RecordId> id = boost::none) const; size_t getApproximateSize() const; @@ -152,6 +156,7 @@ private: unsigned numNotFound, const std::vector<PositionalPathInfo>& positionalInfo, MultikeyPaths* multikeyPaths, + const CollatorInterface* collator, boost::optional<RecordId> id) const; /** @@ -160,6 +165,7 @@ private: */ void _getKeysWithoutArray(SharedBufferFragmentBuilder& pooledBufferBuilder, const BSONObj& obj, + const CollatorInterface* collator, boost::optional<RecordId> id, KeyStringSet* keys) const; @@ -221,6 +227,7 @@ private: bool mayExpandArrayUnembedded, const std::vector<PositionalPathInfo>& positionalInfo, MultikeyPaths* multikeyPaths, + const CollatorInterface* collator, boost::optional<RecordId> id) const; KeyString::Value _buildNullKeyString() const; @@ -246,10 +253,6 @@ private: // A vector with size equal to the number of elements in the index key pattern. Each element in // the vector is the number of path components in the indexed field. std::vector<size_t> _pathLengths; - - // Null if this key generator orders strings according to the simple binary compare. If - // non-null, represents the collator used to generate index keys for indexed strings. - const CollatorInterface* _collator; }; } // namespace mongo diff --git a/src/mongo/db/index/btree_key_generator_test.cpp b/src/mongo/db/index/btree_key_generator_test.cpp index 50efff709a2..312fad5ee3c 100644 --- a/src/mongo/db/index/btree_key_generator_test.cpp +++ b/src/mongo/db/index/btree_key_generator_test.cpp @@ -118,12 +118,8 @@ bool testKeygen(const BSONObj& kp, fixed.push_back(BSONElement()); } - auto keyGen = std::make_unique<BtreeKeyGenerator>(fieldNames, - fixed, - sparse, - collator, - KeyString::Version::kLatestVersion, - Ordering::make(BSONObj())); + auto keyGen = std::make_unique<BtreeKeyGenerator>( + fieldNames, fixed, sparse, KeyString::Version::kLatestVersion, Ordering::make(BSONObj())); auto runTest = [&](bool skipMultikey) { // @@ -134,7 +130,7 @@ bool testKeygen(const BSONObj& kp, SharedBufferFragmentBuilder allocator(BufBuilder::kDefaultInitSizeBytes); KeyStringSet actualKeys; MultikeyPaths actualMultikeyPaths; - keyGen->getKeys(allocator, obj, skipMultikey, &actualKeys, &actualMultikeyPaths); + keyGen->getKeys(allocator, obj, skipMultikey, &actualKeys, &actualMultikeyPaths, collator); // // Check that the results match the expected result. diff --git a/src/mongo/db/index/key_gen_bm.cpp b/src/mongo/db/index/key_gen_bm.cpp index b6dc520dc70..84c76811497 100644 --- a/src/mongo/db/index/key_gen_bm.cpp +++ b/src/mongo/db/index/key_gen_bm.cpp @@ -54,7 +54,6 @@ void BM_KeyGenBasic(benchmark::State& state, bool skipMultikey) { BtreeKeyGenerator generator({kFieldName}, {BSONElement{}}, false, - nullptr, KeyString::Version::kLatestVersion, makeOrdering(kFieldName)); @@ -85,7 +84,6 @@ void BM_KeyGenArray(benchmark::State& state, int32_t elements) { BtreeKeyGenerator generator({kFieldName}, {BSONElement{}}, false, - nullptr, KeyString::Version::kLatestVersion, makeOrdering(kFieldName)); @@ -114,7 +112,6 @@ void BM_KeyGenArrayZero(benchmark::State& state, int32_t elements) { BtreeKeyGenerator generator({kFieldName}, {BSONElement{}}, false, - nullptr, KeyString::Version::kLatestVersion, makeOrdering(kFieldName)); @@ -148,7 +145,6 @@ void BM_KeyGenArrayOfArray(benchmark::State& state, int32_t elements) { BtreeKeyGenerator generator({kFieldName}, {BSONElement{}}, false, - nullptr, KeyString::Version::kLatestVersion, makeOrdering(kFieldName)); diff --git a/src/mongo/db/index/sort_key_generator.cpp b/src/mongo/db/index/sort_key_generator.cpp index 152f7dc7886..0019027a85a 100644 --- a/src/mongo/db/index/sort_key_generator.cpp +++ b/src/mongo/db/index/sort_key_generator.cpp @@ -72,7 +72,6 @@ SortKeyGenerator::SortKeyGenerator(SortPattern sortPattern, const CollatorInterf _indexKeyGen = std::make_unique<BtreeKeyGenerator>(fieldNames, fixed, isSparse, - _collator, KeyString::Version::kLatestVersion, Ordering::make(_sortSpecWithoutMeta)); } @@ -168,7 +167,7 @@ StatusWith<BSONObj> SortKeyGenerator::computeSortKeyFromDocumentWithoutMetadata( // multikey when getting the index keys for sorting. MultikeyPaths* multikeyPaths = nullptr; const auto skipMultikey = false; - _indexKeyGen->getKeys(allocator, obj, skipMultikey, &keys, multikeyPaths); + _indexKeyGen->getKeys(allocator, obj, skipMultikey, &keys, multikeyPaths, _collator); } catch (const AssertionException& e) { // Probably a parallel array. if (ErrorCodes::CannotIndexParallelArrays == e.code()) { |