diff options
author | Henrik Edin <henrik.edin@mongodb.com> | 2020-04-10 10:19:09 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-14 19:09:38 +0000 |
commit | 0aab1bdb1f18bdb60ca08ad80f687dc6e1351358 (patch) | |
tree | b323369f879f19972e3561e08e9a4b66183ac778 /src/mongo | |
parent | 1740d32001cf77ce0dab6a1b1ec14d4b5be8bfef (diff) | |
download | mongo-0aab1bdb1f18bdb60ca08ad80f687dc6e1351358.tar.gz |
SERVER-47416 Eliminate copies of KeyStringSet when possible
Also reusing memory for temporary data structure in KeyString generation for arrays.
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/catalog/index_catalog_impl.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_catalog_impl.h | 4 | ||||
-rw-r--r-- | src/mongo/db/index/btree_key_generator.cpp | 85 | ||||
-rw-r--r-- | src/mongo/db/index/btree_key_generator.h | 13 | ||||
-rw-r--r-- | src/mongo/db/index/index_access_method.cpp | 73 | ||||
-rw-r--r-- | src/mongo/db/index/index_access_method.h | 31 | ||||
-rw-r--r-- | src/mongo/db/index/index_build_interceptor.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/index/index_build_interceptor.h | 2 | ||||
-rw-r--r-- | src/mongo/db/index/wildcard_access_method.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/index/wildcard_access_method.h | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/key_string.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/storage/key_string.h | 2 | ||||
-rw-r--r-- | src/mongo/dbtests/index_access_method_test.cpp | 23 |
13 files changed, 133 insertions, 123 deletions
diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp index ef4b33de2d6..a7af9985342 100644 --- a/src/mongo/db/catalog/index_catalog_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_impl.cpp @@ -1349,7 +1349,7 @@ const IndexDescriptor* IndexCatalogImpl::refreshEntry(OperationContext* opCtx, Status IndexCatalogImpl::_indexKeys(OperationContext* opCtx, IndexCatalogEntry* index, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, const BSONObj& obj, @@ -1431,7 +1431,7 @@ Status IndexCatalogImpl::_indexFilteredRecords(OperationContext* opCtx, Status status = _indexKeys(opCtx, index, - {keys->begin(), keys->end()}, + *keys, *multikeyMetadataKeys, *multikeyPaths, *bsonRecord.docPtr, @@ -1515,7 +1515,7 @@ Status IndexCatalogImpl::_updateRecord(OperationContext* const opCtx, void IndexCatalogImpl::_unindexKeys(OperationContext* opCtx, IndexCatalogEntry* index, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const BSONObj& obj, RecordId loc, bool logIfError, @@ -1602,7 +1602,7 @@ void IndexCatalogImpl::_unindexRecord(OperationContext* opCtx, return; } } - _unindexKeys(opCtx, entry, {keys->begin(), keys->end()}, obj, loc, logIfError, keysDeletedOut); + _unindexKeys(opCtx, entry, *keys, obj, loc, logIfError, keysDeletedOut); } Status IndexCatalogImpl::indexRecords(OperationContext* opCtx, diff --git a/src/mongo/db/catalog/index_catalog_impl.h b/src/mongo/db/catalog/index_catalog_impl.h index 36709dba180..fb3ec499a0d 100644 --- a/src/mongo/db/catalog/index_catalog_impl.h +++ b/src/mongo/db/catalog/index_catalog_impl.h @@ -308,7 +308,7 @@ private: Status _indexKeys(OperationContext* opCtx, IndexCatalogEntry* index, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, const BSONObj& obj, @@ -336,7 +336,7 @@ private: void _unindexKeys(OperationContext* opCtx, IndexCatalogEntry* index, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const BSONObj& obj, RecordId loc, bool logIfError, diff --git a/src/mongo/db/index/btree_key_generator.cpp b/src/mongo/db/index/btree_key_generator.cpp index 3c32c68fd57..e48097a08cb 100644 --- a/src/mongo/db/index/btree_key_generator.cpp +++ b/src/mongo/db/index/btree_key_generator.cpp @@ -159,8 +159,10 @@ BSONElement BtreeKeyGenerator::_extractNextElement(const BSONObj& obj, return BSONElement(); } -void BtreeKeyGenerator::_getKeysArrEltFixed(std::vector<const char*>* fieldNames, - std::vector<BSONElement>* fixed, +void BtreeKeyGenerator::_getKeysArrEltFixed(const std::vector<const char*>& fieldNames, + const std::vector<BSONElement>& fixed, + std::vector<const char*>* fieldNamesTemp, + std::vector<BSONElement>* fixedTemp, SharedBufferFragmentBuilder& pooledBufferBuilder, const BSONElement& arrEntry, KeyStringSet::sequence_type* keys, @@ -171,16 +173,26 @@ void BtreeKeyGenerator::_getKeysArrEltFixed(std::vector<const char*>* fieldNames const std::vector<PositionalPathInfo>& positionalInfo, MultikeyPaths* multikeyPaths, boost::optional<RecordId> id) const { + // fieldNamesTemp and fixedTemp are passed in by the caller to be used as temporary data + // structures as we need them to be mutable in the recursion. When they are stored outside we + // can reuse their memory. + fieldNamesTemp->clear(); + fixedTemp->clear(); + fieldNamesTemp->reserve(fieldNames.size()); + fixedTemp->reserve(fixed.size()); + std::copy(fieldNames.begin(), fieldNames.end(), std::back_inserter(*fieldNamesTemp)); + std::copy(fixed.begin(), fixed.end(), std::back_inserter(*fixedTemp)); + // Set up any terminal array values. for (const auto idx : arrIdxs) { - if (*(*fieldNames)[idx] == '\0') { - (*fixed)[idx] = mayExpandArrayUnembedded ? arrEntry : arrObjElt; + if (*(*fieldNamesTemp)[idx] == '\0') { + (*fixedTemp)[idx] = mayExpandArrayUnembedded ? arrEntry : arrObjElt; } } // Recurse. - _getKeysWithArray(*fieldNames, - *fixed, + _getKeysWithArray(fieldNamesTemp, + fixedTemp, pooledBufferBuilder, arrEntry.type() == Object ? arrEntry.embeddedObject() : BSONObj(), keys, @@ -240,10 +252,11 @@ void BtreeKeyGenerator::getKeys(SharedBufferFragmentBuilder& pooledBufferBuilder // Extract the underlying sequence and insert elements unsorted to avoid O(N^2) when // inserting element by element if array auto seq = keys->extract_sequence(); - // '_fieldNames' and '_fixed' are passed by value so that their copies can be mutated as - // part of the _getKeysWithArray method. - _getKeysWithArray(_fieldNames, - _fixed, + // '_fieldNames' and '_fixed' are mutated by _getKeysWithArray so pass in copies + auto fieldNamesCopy = _fieldNames; + auto fixedCopy = _fixed; + _getKeysWithArray(&fieldNamesCopy, + &fixedCopy, pooledBufferBuilder, obj, &seq, @@ -294,8 +307,8 @@ void BtreeKeyGenerator::_getKeysWithoutArray(SharedBufferFragmentBuilder& pooled keys->insert(keyString.release()); } -void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames, - std::vector<BSONElement> fixed, +void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*>* fieldNames, + std::vector<BSONElement>* fixed, SharedBufferFragmentBuilder& pooledBufferBuilder, const BSONObj& obj, KeyStringSet::sequence_type* keys, @@ -329,24 +342,24 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames, // path "a.b" causes the index to be multikey, but the key pattern "a.b.0" only indexes the // first element of the array, so we'd have a // std::vector<boost::optional<size_t>>{{1U}, boost::none}. - std::vector<boost::optional<size_t>> arrComponents(fieldNames.size()); + std::vector<boost::optional<size_t>> arrComponents(fieldNames->size()); bool mayExpandArrayUnembedded = true; - for (size_t i = 0; i < fieldNames.size(); ++i) { - if (*fieldNames[i] == '\0') { + for (size_t i = 0; i < fieldNames->size(); ++i) { + if (*(*fieldNames)[i] == '\0') { continue; } bool arrayNestedArray; // Extract element matching fieldName[ i ] from object xor array. BSONElement e = - _extractNextElement(obj, positionalInfo[i], &fieldNames[i], &arrayNestedArray); + _extractNextElement(obj, positionalInfo[i], &(*fieldNames)[i], &arrayNestedArray); if (e.eoo()) { // if field not present, set to null - fixed[i] = nullElt; + (*fixed)[i] = nullElt; // done expanding this field name - fieldNames[i] = ""; + (*fieldNames)[i] = ""; numNotFound++; } else if (e.type() == Array) { arrIdxs.insert(i); @@ -362,17 +375,17 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames, } } else { // not an array - no need for further expansion - fixed[i] = e; + (*fixed)[i] = e; } } if (arrElt.eoo()) { // No array, so generate a single key. - if (_isSparse && numNotFound == fieldNames.size()) { + if (_isSparse && numNotFound == fieldNames->size()) { return; } KeyString::PooledBuilder keyString(pooledBufferBuilder, _keyStringVersion, _ordering); - for (const auto& elem : fixed) { + for (const auto& elem : *fixed) { if (_collator) { keyString.appendBSONElement(elem, [&](StringData stringData) { return _collator->getComparisonString(stringData); @@ -396,15 +409,19 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames, // multikey and may occur mid-path. For instance, the indexed path "a.b.c" has // multikey components {0, 1} given the document {a: [{b: []}, {b: 1}]}. size_t fullPathLength = _pathLengths[i]; - size_t suffixPathLength = FieldRef{fieldNames[i]}.numParts(); + size_t suffixPathLength = FieldRef{(*fieldNames)[i]}.numParts(); invariant(suffixPathLength < fullPathLength); arrComponents[i] = fullPathLength - suffixPathLength - 1; } } // For an empty array, set matching fields to undefined. - _getKeysArrEltFixed(&fieldNames, - &fixed, + std::vector<const char*> fieldNamesTemp; + std::vector<BSONElement> fixedTemp; + _getKeysArrEltFixed(*fieldNames, + *fixed, + &fieldNamesTemp, + &fixedTemp, pooledBufferBuilder, undefinedElt, keys, @@ -422,11 +439,11 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames, // and then traverse the remainder of the field path up front. This prevents us from // having to look up the indexed element again on each recursive call (i.e. once per // array element). - std::vector<PositionalPathInfo> subPositionalInfo(fixed.size()); - for (size_t i = 0; i < fieldNames.size(); ++i) { + std::vector<PositionalPathInfo> subPositionalInfo(fixed->size()); + for (size_t i = 0; i < fieldNames->size(); ++i) { const bool fieldIsArray = arrIdxs.find(i) != arrIdxs.end(); - if (*fieldNames[i] == '\0') { + if (*(*fieldNames)[i] == '\0') { // We've reached the end of the path. if (multikeyPaths && fieldIsArray && mayExpandArrayUnembedded) { // The 'arrElt' array value isn't expanded into multiple elements when the last @@ -444,7 +461,7 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames, // we must have traversed through 'arrElt'. invariant(fieldIsArray); - StringData part = fieldNames[i]; + StringData part = (*fieldNames)[i]; part = part.substr(0, part.find('.')); subPositionalInfo[i].positionallyIndexedElt = arrObj[part]; if (subPositionalInfo[i].positionallyIndexedElt.eoo()) { @@ -466,7 +483,7 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames, // latter from the former yields the number of components in the prefix "a.b", // i.e. 2. size_t fullPathLength = _pathLengths[i]; - size_t suffixPathLength = FieldRef{fieldNames[i]}.numParts(); + size_t suffixPathLength = FieldRef{(*fieldNames)[i]}.numParts(); invariant(suffixPathLength < fullPathLength); arrComponents[i] = fullPathLength - suffixPathLength - 1; } @@ -480,15 +497,19 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames, // 'arrElt' array value when generating keys. It therefore cannot cause the index to be // multikey. subPositionalInfo[i].arrayObj = arrObj; - subPositionalInfo[i].remainingPath = fieldNames[i]; + subPositionalInfo[i].remainingPath = (*fieldNames)[i]; subPositionalInfo[i].dottedElt = dps::extractElementAtPathOrArrayAlongPath( arrObj, subPositionalInfo[i].remainingPath); } // Generate a key for each element of the indexed array. + std::vector<const char*> fieldNamesTemp; + std::vector<BSONElement> fixedTemp; for (const auto arrObjElem : arrObj) { - _getKeysArrEltFixed(&fieldNames, - &fixed, + _getKeysArrEltFixed(*fieldNames, + *fixed, + &fieldNamesTemp, + &fixedTemp, pooledBufferBuilder, arrObjElem, keys, diff --git a/src/mongo/db/index/btree_key_generator.h b/src/mongo/db/index/btree_key_generator.h index f12db6733f7..c9897e961b9 100644 --- a/src/mongo/db/index/btree_key_generator.h +++ b/src/mongo/db/index/btree_key_generator.h @@ -147,9 +147,10 @@ private: /** * This recursive method does the heavy-lifting for getKeys(). + * It will modify 'fieldNames' and 'fixed'. */ - void _getKeysWithArray(std::vector<const char*> fieldNames, - std::vector<BSONElement> fixed, + void _getKeysWithArray(std::vector<const char*>* fieldNames, + std::vector<BSONElement>* fixed, SharedBufferFragmentBuilder& pooledBufferBuilder, const BSONObj& obj, KeyStringSet::sequence_type* keys, @@ -208,10 +209,14 @@ private: /** * Sets extracted elements in 'fixed' for field paths that we have traversed to the end. * + * fieldNamesTemp and fixedTemp are temporary vectors that will be modified by this method + * * Then calls _getKeysWithArray() recursively. */ - void _getKeysArrEltFixed(std::vector<const char*>* fieldNames, - std::vector<BSONElement>* fixed, + void _getKeysArrEltFixed(const std::vector<const char*>& fieldNames, + const std::vector<BSONElement>& fixed, + std::vector<const char*>* fieldNamesTemp, + std::vector<BSONElement>* fixedTemp, SharedBufferFragmentBuilder& pooledBufferBuilder, const BSONElement& arrEntry, KeyStringSet::sequence_type* keys, diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index a5a3fb4c908..49b69bc92d6 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -60,10 +60,8 @@ namespace mongo { -using std::endl; using std::pair; using std::set; -using std::vector; using IndexVersion = IndexDescriptor::IndexVersion; @@ -84,9 +82,6 @@ bool isMultikeyFromPaths(const MultikeyPaths& multikeyPaths) { [](const MultikeyComponents& components) { return !components.empty(); }); } -std::vector<KeyString::Value> asVector(const KeyStringSet& keySet) { - return {keySet.begin(), keySet.end()}; -} } // namespace struct BtreeExternalSortComparison { @@ -148,18 +143,12 @@ Status AbstractIndexAccessMethod::insert(OperationContext* opCtx, loc, kNoopOnSuppressedErrorFn); - return insertKeys(opCtx, - {keys->begin(), keys->end()}, - {multikeyMetadataKeys->begin(), multikeyMetadataKeys->end()}, - *multikeyPaths, - loc, - options, - result); + return insertKeys(opCtx, *keys, *multikeyMetadataKeys, *multikeyPaths, loc, options, result); } Status AbstractIndexAccessMethod::insertKeys(OperationContext* opCtx, - const vector<KeyString::Value>& keys, - const vector<KeyString::Value>& multikeyMetadataKeys, + const KeyStringSet& keys, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, const RecordId& loc, const InsertDeleteOptions& options, @@ -233,7 +222,7 @@ std::unique_ptr<SortedDataInterface::Cursor> AbstractIndexAccessMethod::newCurso } Status AbstractIndexAccessMethod::removeKeys(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const RecordId& loc, const InsertDeleteOptions& options, int64_t* numDeleted) { @@ -318,32 +307,21 @@ long long AbstractIndexAccessMethod::getSpaceUsedBytes(OperationContext* opCtx) return _newInterface->getSpaceUsedBytes(opCtx); } -pair<vector<KeyString::Value>, vector<KeyString::Value>> AbstractIndexAccessMethod::setDifference( - const KeyStringSet& left, const KeyStringSet& right, Ordering ordering) { +pair<KeyStringSet, KeyStringSet> AbstractIndexAccessMethod::setDifference( + const KeyStringSet& left, const KeyStringSet& right) { // Two iterators to traverse the two sets in sorted order. auto leftIt = left.begin(); auto rightIt = right.begin(); - vector<KeyString::Value> onlyLeft; - vector<KeyString::Value> onlyRight; + KeyStringSet::sequence_type onlyLeft; + KeyStringSet::sequence_type onlyRight; while (leftIt != left.end() && rightIt != right.end()) { - const int cmp = leftIt->compare(*rightIt); + // Use compareWithTypeBits instead of the regular compare as we want just a difference in + // typeinfo to also result in an index change. + const int cmp = leftIt->compareWithTypeBits(*rightIt); if (cmp == 0) { - /* - * 'leftIt' and 'rightIt' compare equal using compare(), but may not be identical, which - * should result in an index change. - */ - auto leftKey = KeyString::toBson( - leftIt->getBuffer(), leftIt->getSize(), ordering, leftIt->getTypeBits()); - auto rightKey = KeyString::toBson( - rightIt->getBuffer(), rightIt->getSize(), ordering, rightIt->getTypeBits()); - if (!leftKey.binaryEqual(rightKey)) { - onlyLeft.push_back(*leftIt); - onlyRight.push_back(*rightIt); - } ++leftIt; ++rightIt; - continue; } else if (cmp > 0) { onlyRight.push_back(*rightIt); ++rightIt; @@ -357,7 +335,15 @@ pair<vector<KeyString::Value>, vector<KeyString::Value>> AbstractIndexAccessMeth onlyLeft.insert(onlyLeft.end(), leftIt, left.end()); onlyRight.insert(onlyRight.end(), rightIt, right.end()); - return {std::move(onlyLeft), std::move(onlyRight)}; + KeyStringSet outLeft; + KeyStringSet outRight; + + // The above algorithm guarantees that the elements are sorted and unique, so we can let the + // container know so we get O(1) complexity adopting it. + outLeft.adopt_sequence(boost::container::ordered_unique_range_t(), std::move(onlyLeft)); + outRight.adopt_sequence(boost::container::ordered_unique_range_t(), std::move(onlyRight)); + + return {{std::move(outLeft)}, {std::move(outRight)}}; } void AbstractIndexAccessMethod::prepareUpdate(OperationContext* opCtx, @@ -405,8 +391,7 @@ void AbstractIndexAccessMethod::prepareUpdate(OperationContext* opCtx, ticket->loc = record; ticket->dupsAllowed = options.dupsAllowed; - std::tie(ticket->removed, ticket->added) = - setDifference(ticket->oldKeys, ticket->newKeys, getSortedDataInterface()->getOrdering()); + std::tie(ticket->removed, ticket->added) = setDifference(ticket->oldKeys, ticket->newKeys); ticket->_isValid = true; } @@ -435,8 +420,7 @@ Status AbstractIndexAccessMethod::update(OperationContext* opCtx, // Add all new data keys, and all new multikey metadata keys, into the index. When iterating // over the data keys, each of them should point to the doc's RecordId. When iterating over // the multikey metadata keys, they should point to the reserved 'kMultikeyMetadataKeyId'. - const auto newMultikeyMetadataKeys = asVector(ticket.newMultikeyMetadataKeys); - for (const auto keySet : {&ticket.added, &newMultikeyMetadataKeys}) { + for (const auto keySet : {&ticket.added, &ticket.newMultikeyMetadataKeys}) { for (const auto& keyString : *keySet) { Status status = _newInterface->insert(opCtx, keyString, ticket.dupsAllowed); if (isFatalError(opCtx, status, keyString)) { @@ -446,9 +430,7 @@ Status AbstractIndexAccessMethod::update(OperationContext* opCtx, } if (shouldMarkIndexAsMultikey( - ticket.newKeys.size(), - {ticket.newMultikeyMetadataKeys.begin(), ticket.newMultikeyMetadataKeys.end()}, - ticket.newMultikeyPaths)) { + ticket.newKeys.size(), ticket.newMultikeyMetadataKeys, ticket.newMultikeyPaths)) { _indexCatalogEntry->setMultikey(opCtx, ticket.newMultikeyPaths); } @@ -566,7 +548,8 @@ Status AbstractIndexAccessMethod::BulkBuilderImpl::insert(OperationContext* opCt } else { invariant(_indexMultikeyPaths.size() == multikeyPaths->size()); for (size_t i = 0; i < multikeyPaths->size(); ++i) { - _indexMultikeyPaths[i].insert((*multikeyPaths)[i].begin(), + _indexMultikeyPaths[i].insert(boost::container::ordered_unique_range_t(), + (*multikeyPaths)[i].begin(), (*multikeyPaths)[i].end()); } } @@ -579,9 +562,7 @@ Status AbstractIndexAccessMethod::BulkBuilderImpl::insert(OperationContext* opCt _isMultiKey = _isMultiKey || _indexCatalogEntry->accessMethod()->shouldMarkIndexAsMultikey( - keys->size(), - {_multikeyMetadataKeys.begin(), _multikeyMetadataKeys.end()}, - *multikeyPaths); + keys->size(), _multikeyMetadataKeys, *multikeyPaths); return Status::OK(); } @@ -790,7 +771,7 @@ void AbstractIndexAccessMethod::getKeys(SharedBufferFragmentBuilder& pooledBuffe bool AbstractIndexAccessMethod::shouldMarkIndexAsMultikey( size_t numberOfKeys, - const vector<KeyString::Value>& multikeyMetadataKeys, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const { return numberOfKeys > 1 || isMultikeyFromPaths(multikeyPaths); } diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index 1e873b865ed..e2e125c0f7b 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -91,8 +91,8 @@ public: InsertResult* result) = 0; virtual Status insertKeys(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, - const std::vector<KeyString::Value>& multikeyMetadataKeys, + const KeyStringSet& keys, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, const RecordId& loc, const InsertDeleteOptions& options, @@ -103,7 +103,7 @@ public: * 'numDeleted' will be set to the number of keys removed from the index for the provided keys. */ virtual Status removeKeys(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const RecordId& loc, const InsertDeleteOptions& options, int64_t* numDeleted) = 0; @@ -319,10 +319,9 @@ public: * Given the set of keys, multikeyMetadataKeys and multikeyPaths generated by a particular * document, return 'true' if the index should be marked as multikey and 'false' otherwise. */ - virtual bool shouldMarkIndexAsMultikey( - size_t numberOfKeys, - const std::vector<KeyString::Value>& multikeyMetadataKeys, - const MultikeyPaths& multikeyPaths) const = 0; + virtual bool shouldMarkIndexAsMultikey(size_t numberOfKeys, + const KeyStringSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths) const = 0; /** * Returns the intersection of 'fields' and the set of multikey metadata paths stored in the @@ -392,8 +391,8 @@ struct UpdateTicket { KeyStringSet newMultikeyMetadataKeys; - std::vector<KeyString::Value> removed; - std::vector<KeyString::Value> added; + KeyStringSet removed; + KeyStringSet added; RecordId loc; bool dupsAllowed; @@ -437,15 +436,15 @@ class AbstractIndexAccessMethod : public IndexAccessMethod { public: /** - * Splits the sets 'left' and 'right' into two vectors, the first containing the elements that + * Splits the sets 'left' and 'right' into two sets, the first containing the elements that * only appeared in 'left', and the second containing only elements that appeared in 'right'. * * Note this considers objects which are not identical as distinct objects. For example, * setDifference({BSON("a" << 0.0)}, {BSON("a" << 0LL)}) would result in the pair * ( {BSON("a" << 0.0)}, {BSON("a" << 0LL)} ). */ - static std::pair<std::vector<KeyString::Value>, std::vector<KeyString::Value>> setDifference( - const KeyStringSet& left, const KeyStringSet& right, Ordering ordering); + static std::pair<KeyStringSet, KeyStringSet> setDifference(const KeyStringSet& left, + const KeyStringSet& right); AbstractIndexAccessMethod(IndexCatalogEntry* btreeState, std::unique_ptr<SortedDataInterface> btree); @@ -457,15 +456,15 @@ public: InsertResult* result) final; Status insertKeys(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, - const std::vector<KeyString::Value>& multikeyMetadataKeys, + const KeyStringSet& keys, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, const RecordId& loc, const InsertDeleteOptions& options, InsertResult* result) final; Status removeKeys(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const RecordId& loc, const InsertDeleteOptions& options, int64_t* numDeleted) final; @@ -524,7 +523,7 @@ public: OnSuppressedErrorFn onSuppressedError) const final; bool shouldMarkIndexAsMultikey(size_t numberOfKeys, - const std::vector<KeyString::Value>& multikeyMetadataKeys, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const override; SortedDataInterface* getSortedDataInterface() const override final; diff --git a/src/mongo/db/index/index_build_interceptor.cpp b/src/mongo/db/index/index_build_interceptor.cpp index 77d87b00a4b..a5350dc5701 100644 --- a/src/mongo/db/index/index_build_interceptor.cpp +++ b/src/mongo/db/index/index_build_interceptor.cpp @@ -383,7 +383,7 @@ boost::optional<MultikeyPaths> IndexBuildInterceptor::getMultikeyPaths() const { } Status IndexBuildInterceptor::sideWrite(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, RecordId loc, diff --git a/src/mongo/db/index/index_build_interceptor.h b/src/mongo/db/index/index_build_interceptor.h index 695cbfd3e39..4afb108d15b 100644 --- a/src/mongo/db/index/index_build_interceptor.h +++ b/src/mongo/db/index/index_build_interceptor.h @@ -94,7 +94,7 @@ public: * On success, `numKeysOut` if non-null will contain the number of keys added or removed. */ Status sideWrite(OperationContext* opCtx, - const std::vector<KeyString::Value>& keys, + const KeyStringSet& keys, const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths, RecordId loc, diff --git a/src/mongo/db/index/wildcard_access_method.cpp b/src/mongo/db/index/wildcard_access_method.cpp index 5948ef53a6d..cedc074c7cb 100644 --- a/src/mongo/db/index/wildcard_access_method.cpp +++ b/src/mongo/db/index/wildcard_access_method.cpp @@ -46,10 +46,9 @@ WildcardAccessMethod::WildcardAccessMethod(IndexCatalogEntry* wildcardState, getSortedDataInterface()->getKeyStringVersion(), getSortedDataInterface()->getOrdering()) {} -bool WildcardAccessMethod::shouldMarkIndexAsMultikey( - size_t numberOfKeys, - const std::vector<KeyString::Value>& multikeyMetadataKeys, - const MultikeyPaths& multikeyPaths) const { +bool WildcardAccessMethod::shouldMarkIndexAsMultikey(size_t numberOfKeys, + const KeyStringSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths) const { return !multikeyMetadataKeys.empty(); } diff --git a/src/mongo/db/index/wildcard_access_method.h b/src/mongo/db/index/wildcard_access_method.h index b639ec9cb10..fe49c3fcc42 100644 --- a/src/mongo/db/index/wildcard_access_method.h +++ b/src/mongo/db/index/wildcard_access_method.h @@ -67,7 +67,7 @@ public: * vector is non-empty. */ bool shouldMarkIndexAsMultikey(size_t numberOfKeys, - const std::vector<KeyString::Value>& multikeyMetadataKeys, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const final; /** diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp index a7517cb622d..eeebc0f9d92 100644 --- a/src/mongo/db/storage/key_string.cpp +++ b/src/mongo/db/storage/key_string.cpp @@ -2564,6 +2564,10 @@ int compare(const char* leftBuf, const char* rightBuf, size_t leftSize, size_t r return leftSize < rightSize ? -1 : 1; } +int Value::compareWithTypeBits(const Value& other) const { + return KeyString::compare(getBuffer(), other.getBuffer(), _buffer.size(), other._buffer.size()); +} + template class BuilderBase<Builder>; template class BuilderBase<HeapBuilder>; template class BuilderBase<PooledBuilder>; diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h index 90e512a1e1c..8582e9d2e30 100644 --- a/src/mongo/db/storage/key_string.h +++ b/src/mongo/db/storage/key_string.h @@ -328,6 +328,8 @@ public: template <class T> int compare(const T& other) const; + int compareWithTypeBits(const Value& other) const; + template <class T> int compareWithoutRecordId(const T& other) const; diff --git a/src/mongo/dbtests/index_access_method_test.cpp b/src/mongo/dbtests/index_access_method_test.cpp index 79690c28874..ad4bcca07bd 100644 --- a/src/mongo/dbtests/index_access_method_test.cpp +++ b/src/mongo/dbtests/index_access_method_test.cpp @@ -53,7 +53,7 @@ KeyStringSet makeKeyStringSet(std::initializer_list<BSONObj> objs) { TEST(IndexAccessMethodSetDifference, EmptyInputsShouldHaveNoDifference) { KeyStringSet left; KeyStringSet right; - auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj())); + auto diff = AbstractIndexAccessMethod::setDifference(left, right); ASSERT_EQ(0UL, diff.first.size()); ASSERT_EQ(0UL, diff.second.size()); } @@ -62,7 +62,7 @@ TEST(IndexAccessMethodSetDifference, EmptyLeftShouldHaveNoDifference) { KeyStringSet left; auto right = makeKeyStringSet({BSON("" << 0)}); - auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj())); + auto diff = AbstractIndexAccessMethod::setDifference(left, right); ASSERT_EQ(0UL, diff.first.size()); ASSERT_EQ(1UL, diff.second.size()); } @@ -71,7 +71,7 @@ TEST(IndexAccessMethodSetDifference, EmptyRightShouldReturnAllOfLeft) { auto left = makeKeyStringSet({BSON("" << 0), BSON("" << 1)}); KeyStringSet right; - auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj())); + auto diff = AbstractIndexAccessMethod::setDifference(left, right); ASSERT_EQ(2UL, diff.first.size()); ASSERT_EQ(0UL, diff.second.size()); } @@ -86,7 +86,7 @@ TEST(IndexAccessMethodSetDifference, IdenticalSetsShouldHaveNoDifference) { << "string"), BSON("" << BSONNULL)}); - auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj())); + auto diff = AbstractIndexAccessMethod::setDifference(left, right); ASSERT_EQ(0UL, diff.first.size()); ASSERT_EQ(0UL, diff.second.size()); } @@ -98,8 +98,7 @@ TEST(IndexAccessMethodSetDifference, IdenticalSetsShouldHaveNoDifference) { void assertDistinct(BSONObj left, BSONObj right) { auto leftSet = makeKeyStringSet({left}); auto rightSet = makeKeyStringSet({right}); - auto diff = - AbstractIndexAccessMethod::setDifference(leftSet, rightSet, Ordering::make(BSONObj())); + auto diff = AbstractIndexAccessMethod::setDifference(leftSet, rightSet); ASSERT_EQ(1UL, diff.first.size()); ASSERT_EQ(1UL, diff.second.size()); } @@ -152,7 +151,7 @@ TEST(IndexAccessMethodSetDifference, ShouldDetectOneDifferenceAmongManySimilarit BSON("" << BSON("sub" << "document")), BSON("" << BSON_ARRAY(1 << "hi" << 42))}); - auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj())); + auto diff = AbstractIndexAccessMethod::setDifference(left, right); ASSERT_EQUALS(1UL, diff.first.size()); ASSERT_EQUALS(1UL, diff.second.size()); } @@ -160,7 +159,7 @@ TEST(IndexAccessMethodSetDifference, ShouldDetectOneDifferenceAmongManySimilarit TEST(IndexAccessMethodSetDifference, SingleObjInLeftShouldFindCorrespondingObjInRight) { auto left = makeKeyStringSet({BSON("" << 2)}); auto right = makeKeyStringSet({BSON("" << 1), BSON("" << 2), BSON("" << 3)}); - auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj())); + auto diff = AbstractIndexAccessMethod::setDifference(left, right); ASSERT_EQUALS(0UL, diff.first.size()); ASSERT_EQUALS(2UL, diff.second.size()); } @@ -168,7 +167,7 @@ TEST(IndexAccessMethodSetDifference, SingleObjInLeftShouldFindCorrespondingObjIn TEST(IndexAccessMethodSetDifference, SingleObjInRightShouldFindCorrespondingObjInLeft) { auto left = makeKeyStringSet({BSON("" << 1), BSON("" << 2), BSON("" << 3)}); auto right = makeKeyStringSet({BSON("" << 2)}); - auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj())); + auto diff = AbstractIndexAccessMethod::setDifference(left, right); ASSERT_EQUALS(2UL, diff.first.size()); ASSERT_EQUALS(0UL, diff.second.size()); } @@ -176,7 +175,7 @@ TEST(IndexAccessMethodSetDifference, SingleObjInRightShouldFindCorrespondingObjI TEST(IndexAccessMethodSetDifference, LeftSetAllSmallerThanRightShouldBeDisjoint) { auto left = makeKeyStringSet({BSON("" << 1), BSON("" << 2), BSON("" << 3)}); auto right = makeKeyStringSet({BSON("" << 4), BSON("" << 5), BSON("" << 6)}); - auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj())); + auto diff = AbstractIndexAccessMethod::setDifference(left, right); ASSERT_EQUALS(3UL, diff.first.size()); ASSERT_EQUALS(3UL, diff.second.size()); for (auto&& obj : diff.first) { @@ -190,7 +189,7 @@ TEST(IndexAccessMethodSetDifference, LeftSetAllSmallerThanRightShouldBeDisjoint) TEST(IndexAccessMethodSetDifference, LeftSetAllLargerThanRightShouldBeDisjoint) { auto left = makeKeyStringSet({BSON("" << 4), BSON("" << 5), BSON("" << 6)}); auto right = makeKeyStringSet({BSON("" << 1), BSON("" << 2), BSON("" << 3)}); - auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj())); + auto diff = AbstractIndexAccessMethod::setDifference(left, right); ASSERT_EQUALS(3UL, diff.first.size()); ASSERT_EQUALS(3UL, diff.second.size()); for (auto&& obj : diff.first) { @@ -205,7 +204,7 @@ TEST(IndexAccessMethodSetDifference, ShouldNotReportOverlapsFromNonDisjointSets) auto left = makeKeyStringSet({BSON("" << 0), BSON("" << 1), BSON("" << 4), BSON("" << 6)}); auto right = makeKeyStringSet( {BSON("" << -1), BSON("" << 1), BSON("" << 3), BSON("" << 4), BSON("" << 7)}); - auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj())); + auto diff = AbstractIndexAccessMethod::setDifference(left, right); ASSERT_EQUALS(2UL, diff.first.size()); // 0, 6. ASSERT_EQUALS(3UL, diff.second.size()); // -1, 3, 7. for (auto&& keyString : diff.first) { |