diff options
author | Alberto Massari <alberto.massari@mongodb.com> | 2022-09-29 12:23:22 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-09-29 13:28:17 +0000 |
commit | 19515cea589979e7243ccf101a375a3c63881a27 (patch) | |
tree | e512978d6e7782c3ed2938e7975588f46a09be27 | |
parent | 610dbc3b0bea69b9622429e4d5e550b95d2f8ffe (diff) | |
download | mongo-19515cea589979e7243ccf101a375a3c63881a27.tar.gz |
SERVER-66511 Improve performances of sort+limit stages in SBE
-rw-r--r-- | src/mongo/bson/bsonobj.cpp | 6 | ||||
-rw-r--r-- | src/mongo/bson/bsonobj.h | 6 | ||||
-rw-r--r-- | src/mongo/db/exec/document_value/document.h | 7 | ||||
-rw-r--r-- | src/mongo/db/exec/document_value/value.h | 1 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/sort.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/exec/sort_executor.h | 5 | ||||
-rw-r--r-- | src/mongo/db/exec/working_set.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/exec/working_set.h | 1 | ||||
-rw-r--r-- | src/mongo/db/index/column_store_sorter.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/index/column_store_sorter.h | 6 | ||||
-rw-r--r-- | src/mongo/db/index/index_access_method.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_bucket_auto.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_group.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/sorter/sorter.cpp | 114 | ||||
-rw-r--r-- | src/mongo/db/sorter/sorter.h | 12 | ||||
-rw-r--r-- | src/mongo/db/sorter/sorter_test.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/storage/key_string.h | 1 |
17 files changed, 128 insertions, 86 deletions
diff --git a/src/mongo/bson/bsonobj.cpp b/src/mongo/bson/bsonobj.cpp index 0149eeece7d..c462031bffd 100644 --- a/src/mongo/bson/bsonobj.cpp +++ b/src/mongo/bson/bsonobj.cpp @@ -129,6 +129,12 @@ BSONObj BSONObj::copy() const { return BSONObj(std::move(storage)); } +void BSONObj::makeOwned() { + if (!isOwned()) { + *this = copy(); + } +} + BSONObj BSONObj::getOwned() const { if (isOwned()) return *this; diff --git a/src/mongo/bson/bsonobj.h b/src/mongo/bson/bsonobj.h index 1a51b370678..a0088c8b70d 100644 --- a/src/mongo/bson/bsonobj.h +++ b/src/mongo/bson/bsonobj.h @@ -265,6 +265,12 @@ public: BSONObj copy() const; /** + * If the data buffer is not under the control of this BSONObj, allocate + * a separate copy and make this object a fully owned one. + */ + void makeOwned(); + + /** * @return a new full (and owned) redacted copy of the object. */ BSONObj redact(bool onlyEncryptedFields = false) const; diff --git a/src/mongo/db/exec/document_value/document.h b/src/mongo/db/exec/document_value/document.h index 641a89f099e..b3990b7e0ae 100644 --- a/src/mongo/db/exec/document_value/document.h +++ b/src/mongo/db/exec/document_value/document.h @@ -336,6 +336,13 @@ public: Document getOwned() &&; /** + * Needed to satisfy the Sorter interface. This method throws an assertion. + */ + void makeOwned() { + MONGO_UNREACHABLE; + } + + /** * Returns true if the underlying BSONObj is owned. */ bool isOwned() const { diff --git a/src/mongo/db/exec/document_value/value.h b/src/mongo/db/exec/document_value/value.h index bf54221326c..7da4b971c34 100644 --- a/src/mongo/db/exec/document_value/value.h +++ b/src/mongo/db/exec/document_value/value.h @@ -388,6 +388,7 @@ public: Value getOwned() const { return *this; } + void makeOwned() {} /// Members to support parsing/deserialization from IDL generated code. void serializeForIDL(StringData fieldName, BSONObjBuilder* builder) const; diff --git a/src/mongo/db/exec/sbe/stages/sort.cpp b/src/mongo/db/exec/sbe/stages/sort.cpp index 6181229ff7e..1443a1d5e67 100644 --- a/src/mongo/db/exec/sbe/stages/sort.cpp +++ b/src/mongo/db/exec/sbe/stages/sort.cpp @@ -130,13 +130,11 @@ void SortStage::makeSorter() { _specificStats.limit != std::numeric_limits<size_t>::max() ? _specificStats.limit : 0; opts.moveSortedDataIntoIterator = true; - auto comp = [&](const SorterData& lhs, const SorterData& rhs) { - auto size = lhs.first.size(); - auto& left = lhs.first; - auto& right = rhs.first; + auto comp = [&](const value::MaterializedRow& lhs, const value::MaterializedRow& rhs) { + auto size = lhs.size(); for (size_t idx = 0; idx < size; ++idx) { - auto [lhsTag, lhsVal] = left.getViewOfValue(idx); - auto [rhsTag, rhsVal] = right.getViewOfValue(idx); + auto [lhsTag, lhsVal] = lhs.getViewOfValue(idx); + auto [rhsTag, rhsVal] = rhs.getViewOfValue(idx); auto [tag, val] = value::compareValue(lhsTag, lhsVal, rhsTag, rhsVal); auto result = value::bitcastTo<int32_t>(val); @@ -185,15 +183,13 @@ void SortStage::open(bool reOpen) { size_t idx = 0; for (auto accessor : _inKeyAccessors) { auto [tag, val] = accessor->getViewOfValue(); - auto [cTag, cVal] = copyValue(tag, val); - keys.reset(idx++, true, cTag, cVal); + keys.reset(idx++, false, tag, val); } idx = 0; for (auto accessor : _inValueAccessors) { auto [tag, val] = accessor->getViewOfValue(); - auto [cTag, cVal] = copyValue(tag, val); - vals.reset(idx++, true, cTag, cVal); + vals.reset(idx++, false, tag, val); } _sorter->emplace(std::move(keys), std::move(vals)); diff --git a/src/mongo/db/exec/sort_executor.h b/src/mongo/db/exec/sort_executor.h index 8120f52caa9..ffa88b44479 100644 --- a/src/mongo/db/exec/sort_executor.h +++ b/src/mongo/db/exec/sort_executor.h @@ -55,9 +55,8 @@ public: class Comparator { public: Comparator(const SortPattern& sortPattern) : _sortKeyComparator(sortPattern) {} - int operator()(const typename DocumentSorter::Data& lhs, - const typename DocumentSorter::Data& rhs) const { - return _sortKeyComparator(lhs.first, rhs.first); + int operator()(const Value& lhs, const Value& rhs) const { + return _sortKeyComparator(lhs, rhs); } private: diff --git a/src/mongo/db/exec/working_set.cpp b/src/mongo/db/exec/working_set.cpp index ccb5b010819..747e9bf03c8 100644 --- a/src/mongo/db/exec/working_set.cpp +++ b/src/mongo/db/exec/working_set.cpp @@ -311,6 +311,10 @@ SortableWorkingSetMember SortableWorkingSetMember::getOwned() const { return ret; } +void SortableWorkingSetMember::makeOwned() { + _holder->makeObjOwnedIfNeeded(); +} + WorkingSetRegisteredIndexId WorkingSet::registerIndexAccessMethod( const IndexAccessMethod* indexAccess) { for (WorkingSetRegisteredIndexId i = 0; i < _registeredIndexes.size(); ++i) { diff --git a/src/mongo/db/exec/working_set.h b/src/mongo/db/exec/working_set.h index 67857785266..6f061e93454 100644 --- a/src/mongo/db/exec/working_set.h +++ b/src/mongo/db/exec/working_set.h @@ -289,6 +289,7 @@ public: } SortableWorkingSetMember getOwned() const; + void makeOwned(); private: std::shared_ptr<WorkingSetMember> _holder; diff --git a/src/mongo/db/index/column_store_sorter.cpp b/src/mongo/db/index/column_store_sorter.cpp index 7e0eb141227..cd7dc7993ec 100644 --- a/src/mongo/db/index/column_store_sorter.cpp +++ b/src/mongo/db/index/column_store_sorter.cpp @@ -35,13 +35,11 @@ namespace mongo { struct ComparisonForPathAndRid { - int operator()(const std::pair<ColumnStoreSorter::Key, ColumnStoreSorter::Value>& left, - const std::pair<ColumnStoreSorter::Key, ColumnStoreSorter::Value>& right) const { - auto stringComparison = left.first.path.compare(right.first.path); - return (stringComparison != 0) ? stringComparison - : ((left.first.rowId == right.first.rowId) - ? 0 - : (left.first.rowId > right.first.rowId ? 1 : -1)); + int operator()(const ColumnStoreSorter::Key& left, const ColumnStoreSorter::Key& right) const { + auto stringComparison = left.path.compare(right.path); + return (stringComparison != 0) + ? stringComparison + : ((left.rowId == right.rowId) ? 0 : (left.rowId > right.rowId ? 1 : -1)); } }; diff --git a/src/mongo/db/index/column_store_sorter.h b/src/mongo/db/index/column_store_sorter.h index 3bdf9f72b07..0a3e398e8bf 100644 --- a/src/mongo/db/index/column_store_sorter.h +++ b/src/mongo/db/index/column_store_sorter.h @@ -86,6 +86,9 @@ public: Key getOwned() const { MONGO_UNREACHABLE; } + void makeOwned() { + MONGO_UNREACHABLE; + } }; struct Value { @@ -103,6 +106,9 @@ public: Value getOwned() const { MONGO_UNREACHABLE; } + void makeOwned() { + MONGO_UNREACHABLE; + } }; using Iterator = SortIteratorInterface<Key, Value>; diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index ec2c609a86e..db5a1974aa1 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -158,9 +158,8 @@ MultikeyPaths createMultikeyPaths(const std::vector<MultikeyPath>& multikeyPaths } // namespace struct BtreeExternalSortComparison { - typedef std::pair<KeyString::Value, mongo::NullValue> Data; - int operator()(const Data& l, const Data& r) const { - return l.first.compare(r.first); + int operator()(const KeyString::Value& l, const KeyString::Value& r) const { + return l.compare(r); } }; diff --git a/src/mongo/db/pipeline/document_source_bucket_auto.cpp b/src/mongo/db/pipeline/document_source_bucket_auto.cpp index 9d438d4fca1..9b086ea0d87 100644 --- a/src/mongo/db/pipeline/document_source_bucket_auto.cpp +++ b/src/mongo/db/pipeline/document_source_bucket_auto.cpp @@ -160,9 +160,8 @@ DocumentSource::GetNextResult DocumentSourceBucketAuto::populateSorter() { opts.tempDir = pExpCtx->tempDir; } const auto& valueCmp = pExpCtx->getValueComparator(); - auto comparator = [valueCmp](const Sorter<Value, Document>::Data& lhs, - const Sorter<Value, Document>::Data& rhs) { - return valueCmp.compare(lhs.first, rhs.first); + auto comparator = [valueCmp](const Value& lhs, const Value& rhs) { + return valueCmp.compare(lhs, rhs); }; _sorter.reset(Sorter<Value, Document>::make(opts, comparator)); diff --git a/src/mongo/db/pipeline/document_source_group.cpp b/src/mongo/db/pipeline/document_source_group.cpp index c7ccdaa4d73..5a0ff0328de 100644 --- a/src/mongo/db/pipeline/document_source_group.cpp +++ b/src/mongo/db/pipeline/document_source_group.cpp @@ -554,12 +554,10 @@ using GroupsMap = DocumentSourceGroup::GroupsMap; class SorterComparator { public: - typedef pair<Value, Value> Data; - SorterComparator(ValueComparator valueComparator) : _valueComparator(valueComparator) {} - int operator()(const Data& lhs, const Data& rhs) const { - return _valueComparator.compare(lhs.first, rhs.first); + int operator()(const Value& lhs, const Value& rhs) const { + return _valueComparator.compare(lhs, rhs); } private: diff --git a/src/mongo/db/sorter/sorter.cpp b/src/mongo/db/sorter/sorter.cpp index c39876dd1da..cf325a66b67 100644 --- a/src/mongo/db/sorter/sorter.cpp +++ b/src/mongo/db/sorter/sorter.cpp @@ -121,8 +121,8 @@ inline std::string myErrnoWithDescription() { return sb.str(); } -template <typename Data, typename Comparator> -void dassertCompIsSane(const Comparator& comp, const Data& lhs, const Data& rhs) { +template <typename Key, typename Comparator> +void dassertCompIsSane(const Comparator& comp, const Key& lhs, const Key& rhs) { #if defined(MONGO_CONFIG_DEBUG_BUILD) && !defined(_MSC_VER) // MSVC++ already does similar verification in debug mode in addition to using // algorithms that do more comparisons. Doing our own verification in addition makes @@ -534,8 +534,8 @@ private: template <typename Ptr> bool operator()(const Ptr& lhs, const Ptr& rhs) const { // first compare data - dassertCompIsSane(_comp, lhs->current(), rhs->current()); - int ret = _comp(lhs->current(), rhs->current()); + dassertCompIsSane(_comp, lhs->current().first, rhs->current().first); + int ret = _comp(lhs->current().first, rhs->current().first); if (ret) return ret > 0; @@ -710,12 +710,16 @@ public: this->_stats.setSpilledRanges(this->_iters.size()); } - void add(const Key& key, const Value& val) { + template <typename Generator> + void addImpl(const Key& key, const Value& val, Generator keyValProducer) { invariant(!_done); - _data.emplace_back(key.getOwned(), val.getOwned()); - auto memUsage = key.memUsageForSorter() + val.memUsageForSorter(); + + // Invoking keyValProducer could invalidate key and val if it uses move semantics, + // don't reference them anymore from this point on. + _data.emplace_back(keyValProducer()); + _memUsed += memUsage; this->_totalDataSizeSorted += memUsage; @@ -723,17 +727,16 @@ public: spill(); } - void emplace(Key&& key, Value&& val) override { - invariant(!_done); - - auto memUsage = key.memUsageForSorter() + val.memUsageForSorter(); - _memUsed += memUsage; - this->_totalDataSizeSorted += memUsage; - - _data.emplace_back(std::move(key), std::move(val)); + void add(const Key& key, const Value& val) override { + addImpl(key, val, [&]() -> Data { return {key.getOwned(), val.getOwned()}; }); + } - if (_memUsed > this->_opts.maxMemoryUsageBytes) - spill(); + void emplace(Key&& key, Value&& val) override { + addImpl(key, val, [&]() -> Data { + key.makeOwned(); + val.makeOwned(); + return {std::move(key), std::move(val)}; + }); } Iterator* done() { @@ -758,8 +761,8 @@ private: public: explicit STLComparator(const Comparator& comp) : _comp(comp) {} bool operator()(const Data& lhs, const Data& rhs) const { - dassertCompIsSane(_comp, lhs, rhs); - return _comp(lhs, rhs) < 0; + dassertCompIsSane(_comp, lhs.first, rhs.first); + return _comp(lhs.first, rhs.first) < 0; } private: @@ -820,19 +823,32 @@ public: verify(opts.limit == 1); } - void add(const Key& key, const Value& val) { - Data contender(key, val); - + template <typename Generator> + void addImpl(const Key& key, const Value& val, Generator keyValProducer) { this->_numSorted += 1; if (_haveData) { - dassertCompIsSane(_comp, _best, contender); - if (_comp(_best, contender) <= 0) + dassertCompIsSane(_comp, _best.first, key); + if (_comp(_best.first, key) <= 0) return; // not good enough } else { _haveData = true; } - _best = {contender.first.getOwned(), contender.second.getOwned()}; + // Invoking keyValProducer could invalidate key and val if it uses move semantics, + // don't reference them anymore from this point on. + _best = keyValProducer(); + } + + void add(const Key& key, const Value& val) override { + addImpl(key, val, [&]() -> Data { return {key.getOwned(), val.getOwned()}; }); + } + + void emplace(Key&& key, Value&& val) override { + addImpl(key, val, [&]() -> Data { + key.makeOwned(); + val.makeOwned(); + return {std::move(key), std::move(val)}; + }); } Iterator* done() { @@ -883,21 +899,24 @@ public: } } - void add(const Key& key, const Value& val) { + template <typename Generator> + void addImpl(const Key& key, const Value& val, Generator keyValProducer) { invariant(!_done); this->_numSorted += 1; STLComparator less(this->_comp); - Data contender(key, val); if (_data.size() < this->_opts.limit) { - if (_haveCutoff && !less(contender, _cutoff)) + if (_haveCutoff && this->_comp(key, _cutoff.first) >= 0) return; - _data.emplace_back(contender.first.getOwned(), contender.second.getOwned()); - auto memUsage = key.memUsageForSorter() + val.memUsageForSorter(); + + // Invoking keyValProducer could invalidate key and val if it uses move semantics, + // don't reference them anymore from this point on. + _data.emplace_back(keyValProducer()); + _memUsed += memUsage; this->_totalDataSizeSorted += memUsage; @@ -912,7 +931,7 @@ public: invariant(_data.size() == this->_opts.limit); - if (!less(contender, _data.front())) + if (this->_comp(key, _data.front().first) >= 0) return; // not good enough // Remove the old worst pair and insert the contender, adjusting _memUsed @@ -925,13 +944,27 @@ public: _memUsed -= _data.front().second.memUsageForSorter(); std::pop_heap(_data.begin(), _data.end(), less); - _data.back() = {contender.first.getOwned(), contender.second.getOwned()}; + // Invoking keyValProducer could invalidate key and val if it uses move semantics, + // don't reference them anymore from this point on. + _data.back() = keyValProducer(); std::push_heap(_data.begin(), _data.end(), less); if (_memUsed > this->_opts.maxMemoryUsageBytes) spill(); } + void add(const Key& key, const Value& val) override { + addImpl(key, val, [&]() -> Data { return {key.getOwned(), val.getOwned()}; }); + } + + void emplace(Key&& key, Value&& val) override { + addImpl(key, val, [&]() -> Data { + key.makeOwned(); + val.makeOwned(); + return {std::move(key), std::move(val)}; + }); + } + Iterator* done() { if (this->_iters.empty()) { sort(); @@ -954,8 +987,8 @@ private: public: explicit STLComparator(const Comparator& comp) : _comp(comp) {} bool operator()(const Data& lhs, const Data& rhs) const { - dassertCompIsSane(_comp, lhs, rhs); - return _comp(lhs, rhs) < 0; + dassertCompIsSane(_comp, lhs.first, rhs.first); + return _comp(lhs.first, rhs.first) < 0; } private: @@ -1381,7 +1414,6 @@ BoundedSorter<Key, Value, Comparator, BoundMaker>::BoundedSorter(const SortOptio : BoundedSorterInterface<Key, Value>(opts), compare(comp), makeBound(makeBound), - _comparePairs{compare}, _checkInput(checkInput), _opts(opts), _heap(Greater{&compare}), @@ -1494,7 +1526,7 @@ std::pair<Key, Value> BoundedSorter<Key, Value, Comparator, BoundMaker>::next() }; if (!_heap.empty() && _spillIter) { - if (_comparePairs(_heap.top(), _spillIter->current()) <= 0) { + if (compare(_heap.top().first, _spillIter->current().first) <= 0) { pullFromHeap(); } else { pullFromSpilled(); @@ -1548,12 +1580,12 @@ void BoundedSorter<Key, Value, Comparator, BoundMaker>::_spill() { } std::shared_ptr<SpillIterator> iteratorPtr(writer.done()); - if (auto* mergeIter = static_cast<typename sorter::MergeIterator<Key, Value, PairComparator>*>( + if (auto* mergeIter = static_cast<typename sorter::MergeIterator<Key, Value, Comparator>*>( _spillIter.get())) { mergeIter->addSource(std::move(iteratorPtr)); } else { std::vector<std::shared_ptr<SpillIterator>> iters{std::move(iteratorPtr)}; - _spillIter.reset(SpillIterator::merge(iters, _opts, _comparePairs)); + _spillIter.reset(SpillIterator::merge(iters, _opts, compare)); } dassert(_spillIter->more()); @@ -1561,12 +1593,6 @@ void BoundedSorter<Key, Value, Comparator, BoundMaker>::_spill() { _memUsed = 0; } -template <typename Key, typename Value, typename Comparator, typename BoundMaker> -int BoundedSorter<Key, Value, Comparator, BoundMaker>::PairComparator::operator()( - const std::pair<Key, Value>& p1, const std::pair<Key, Value>& p2) const { - return compare(p1.first, p2.first); -} - // // Factory Functions // diff --git a/src/mongo/db/sorter/sorter.h b/src/mongo/db/sorter/sorter.h index d0e31cce4a8..ae9200389d5 100644 --- a/src/mongo/db/sorter/sorter.h +++ b/src/mongo/db/sorter/sorter.h @@ -199,6 +199,7 @@ public: NullValue getOwned() const { return {}; } + void makeOwned() {} }; /** @@ -366,9 +367,8 @@ public: const Settings& settings = Settings()); virtual void add(const Key&, const Value&) = 0; - virtual void emplace(Key&& k, Value&& v) { - add(k, v); - } + virtual void emplace(Key&& k, Value&& v) = 0; + /** * Cannot add more data after calling done(). */ @@ -544,15 +544,9 @@ public: private: using SpillIterator = SortIteratorInterface<Key, Value>; - struct PairComparator { - int operator()(const std::pair<Key, Value>& p1, const std::pair<Key, Value>& p2) const; - const Comparator& compare; - }; void _spill(); - const PairComparator _comparePairs; - bool _checkInput; size_t _numSorted = 0; // Keeps track of the number of keys sorted. diff --git a/src/mongo/db/sorter/sorter_test.cpp b/src/mongo/db/sorter/sorter_test.cpp index 0394e1f0fa0..12e433217ef 100644 --- a/src/mongo/db/sorter/sorter_test.cpp +++ b/src/mongo/db/sorter/sorter_test.cpp @@ -100,6 +100,7 @@ public: IntWrapper getOwned() const { return *this; } + void makeOwned() {} std::string toString() const { return std::to_string(_i); @@ -117,10 +118,10 @@ enum Direction { ASC = 1, DESC = -1 }; class IWComparator { public: IWComparator(Direction dir = ASC) : _dir(dir) {} - int operator()(const IWPair& lhs, const IWPair& rhs) const { - if (lhs.first == rhs.first) + int operator()(const IntWrapper& lhs, const IntWrapper& rhs) const { + if (lhs == rhs) return 0; - if (lhs.first < rhs.first) + if (lhs < rhs) return -1 * _dir; return 1 * _dir; } diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h index 6a7842ef203..4539b946f6d 100644 --- a/src/mongo/db/storage/key_string.h +++ b/src/mongo/db/storage/key_string.h @@ -494,6 +494,7 @@ public: Value getOwned() const { return *this; } + void makeOwned() {} Version getVersion() const { return _version; |