summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlberto Massari <alberto.massari@mongodb.com>2022-09-29 12:23:22 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-09-29 13:28:17 +0000
commit19515cea589979e7243ccf101a375a3c63881a27 (patch)
treee512978d6e7782c3ed2938e7975588f46a09be27
parent610dbc3b0bea69b9622429e4d5e550b95d2f8ffe (diff)
downloadmongo-19515cea589979e7243ccf101a375a3c63881a27.tar.gz
SERVER-66511 Improve performances of sort+limit stages in SBE
-rw-r--r--src/mongo/bson/bsonobj.cpp6
-rw-r--r--src/mongo/bson/bsonobj.h6
-rw-r--r--src/mongo/db/exec/document_value/document.h7
-rw-r--r--src/mongo/db/exec/document_value/value.h1
-rw-r--r--src/mongo/db/exec/sbe/stages/sort.cpp16
-rw-r--r--src/mongo/db/exec/sort_executor.h5
-rw-r--r--src/mongo/db/exec/working_set.cpp4
-rw-r--r--src/mongo/db/exec/working_set.h1
-rw-r--r--src/mongo/db/index/column_store_sorter.cpp12
-rw-r--r--src/mongo/db/index/column_store_sorter.h6
-rw-r--r--src/mongo/db/index/index_access_method.cpp5
-rw-r--r--src/mongo/db/pipeline/document_source_bucket_auto.cpp5
-rw-r--r--src/mongo/db/pipeline/document_source_group.cpp6
-rw-r--r--src/mongo/db/sorter/sorter.cpp114
-rw-r--r--src/mongo/db/sorter/sorter.h12
-rw-r--r--src/mongo/db/sorter/sorter_test.cpp7
-rw-r--r--src/mongo/db/storage/key_string.h1
17 files changed, 128 insertions, 86 deletions
diff --git a/src/mongo/bson/bsonobj.cpp b/src/mongo/bson/bsonobj.cpp
index 0149eeece7d..c462031bffd 100644
--- a/src/mongo/bson/bsonobj.cpp
+++ b/src/mongo/bson/bsonobj.cpp
@@ -129,6 +129,12 @@ BSONObj BSONObj::copy() const {
return BSONObj(std::move(storage));
}
+void BSONObj::makeOwned() {
+ if (!isOwned()) {
+ *this = copy();
+ }
+}
+
BSONObj BSONObj::getOwned() const {
if (isOwned())
return *this;
diff --git a/src/mongo/bson/bsonobj.h b/src/mongo/bson/bsonobj.h
index 1a51b370678..a0088c8b70d 100644
--- a/src/mongo/bson/bsonobj.h
+++ b/src/mongo/bson/bsonobj.h
@@ -265,6 +265,12 @@ public:
BSONObj copy() const;
/**
+ * If the data buffer is not under the control of this BSONObj, allocate
+ * a separate copy and make this object a fully owned one.
+ */
+ void makeOwned();
+
+ /**
* @return a new full (and owned) redacted copy of the object.
*/
BSONObj redact(bool onlyEncryptedFields = false) const;
diff --git a/src/mongo/db/exec/document_value/document.h b/src/mongo/db/exec/document_value/document.h
index 641a89f099e..b3990b7e0ae 100644
--- a/src/mongo/db/exec/document_value/document.h
+++ b/src/mongo/db/exec/document_value/document.h
@@ -336,6 +336,13 @@ public:
Document getOwned() &&;
/**
+ * Needed to satisfy the Sorter interface. This method throws an assertion.
+ */
+ void makeOwned() {
+ MONGO_UNREACHABLE;
+ }
+
+ /**
* Returns true if the underlying BSONObj is owned.
*/
bool isOwned() const {
diff --git a/src/mongo/db/exec/document_value/value.h b/src/mongo/db/exec/document_value/value.h
index bf54221326c..7da4b971c34 100644
--- a/src/mongo/db/exec/document_value/value.h
+++ b/src/mongo/db/exec/document_value/value.h
@@ -388,6 +388,7 @@ public:
Value getOwned() const {
return *this;
}
+ void makeOwned() {}
/// Members to support parsing/deserialization from IDL generated code.
void serializeForIDL(StringData fieldName, BSONObjBuilder* builder) const;
diff --git a/src/mongo/db/exec/sbe/stages/sort.cpp b/src/mongo/db/exec/sbe/stages/sort.cpp
index 6181229ff7e..1443a1d5e67 100644
--- a/src/mongo/db/exec/sbe/stages/sort.cpp
+++ b/src/mongo/db/exec/sbe/stages/sort.cpp
@@ -130,13 +130,11 @@ void SortStage::makeSorter() {
_specificStats.limit != std::numeric_limits<size_t>::max() ? _specificStats.limit : 0;
opts.moveSortedDataIntoIterator = true;
- auto comp = [&](const SorterData& lhs, const SorterData& rhs) {
- auto size = lhs.first.size();
- auto& left = lhs.first;
- auto& right = rhs.first;
+ auto comp = [&](const value::MaterializedRow& lhs, const value::MaterializedRow& rhs) {
+ auto size = lhs.size();
for (size_t idx = 0; idx < size; ++idx) {
- auto [lhsTag, lhsVal] = left.getViewOfValue(idx);
- auto [rhsTag, rhsVal] = right.getViewOfValue(idx);
+ auto [lhsTag, lhsVal] = lhs.getViewOfValue(idx);
+ auto [rhsTag, rhsVal] = rhs.getViewOfValue(idx);
auto [tag, val] = value::compareValue(lhsTag, lhsVal, rhsTag, rhsVal);
auto result = value::bitcastTo<int32_t>(val);
@@ -185,15 +183,13 @@ void SortStage::open(bool reOpen) {
size_t idx = 0;
for (auto accessor : _inKeyAccessors) {
auto [tag, val] = accessor->getViewOfValue();
- auto [cTag, cVal] = copyValue(tag, val);
- keys.reset(idx++, true, cTag, cVal);
+ keys.reset(idx++, false, tag, val);
}
idx = 0;
for (auto accessor : _inValueAccessors) {
auto [tag, val] = accessor->getViewOfValue();
- auto [cTag, cVal] = copyValue(tag, val);
- vals.reset(idx++, true, cTag, cVal);
+ vals.reset(idx++, false, tag, val);
}
_sorter->emplace(std::move(keys), std::move(vals));
diff --git a/src/mongo/db/exec/sort_executor.h b/src/mongo/db/exec/sort_executor.h
index 8120f52caa9..ffa88b44479 100644
--- a/src/mongo/db/exec/sort_executor.h
+++ b/src/mongo/db/exec/sort_executor.h
@@ -55,9 +55,8 @@ public:
class Comparator {
public:
Comparator(const SortPattern& sortPattern) : _sortKeyComparator(sortPattern) {}
- int operator()(const typename DocumentSorter::Data& lhs,
- const typename DocumentSorter::Data& rhs) const {
- return _sortKeyComparator(lhs.first, rhs.first);
+ int operator()(const Value& lhs, const Value& rhs) const {
+ return _sortKeyComparator(lhs, rhs);
}
private:
diff --git a/src/mongo/db/exec/working_set.cpp b/src/mongo/db/exec/working_set.cpp
index ccb5b010819..747e9bf03c8 100644
--- a/src/mongo/db/exec/working_set.cpp
+++ b/src/mongo/db/exec/working_set.cpp
@@ -311,6 +311,10 @@ SortableWorkingSetMember SortableWorkingSetMember::getOwned() const {
return ret;
}
+void SortableWorkingSetMember::makeOwned() {
+ _holder->makeObjOwnedIfNeeded();
+}
+
WorkingSetRegisteredIndexId WorkingSet::registerIndexAccessMethod(
const IndexAccessMethod* indexAccess) {
for (WorkingSetRegisteredIndexId i = 0; i < _registeredIndexes.size(); ++i) {
diff --git a/src/mongo/db/exec/working_set.h b/src/mongo/db/exec/working_set.h
index 67857785266..6f061e93454 100644
--- a/src/mongo/db/exec/working_set.h
+++ b/src/mongo/db/exec/working_set.h
@@ -289,6 +289,7 @@ public:
}
SortableWorkingSetMember getOwned() const;
+ void makeOwned();
private:
std::shared_ptr<WorkingSetMember> _holder;
diff --git a/src/mongo/db/index/column_store_sorter.cpp b/src/mongo/db/index/column_store_sorter.cpp
index 7e0eb141227..cd7dc7993ec 100644
--- a/src/mongo/db/index/column_store_sorter.cpp
+++ b/src/mongo/db/index/column_store_sorter.cpp
@@ -35,13 +35,11 @@
namespace mongo {
struct ComparisonForPathAndRid {
- int operator()(const std::pair<ColumnStoreSorter::Key, ColumnStoreSorter::Value>& left,
- const std::pair<ColumnStoreSorter::Key, ColumnStoreSorter::Value>& right) const {
- auto stringComparison = left.first.path.compare(right.first.path);
- return (stringComparison != 0) ? stringComparison
- : ((left.first.rowId == right.first.rowId)
- ? 0
- : (left.first.rowId > right.first.rowId ? 1 : -1));
+ int operator()(const ColumnStoreSorter::Key& left, const ColumnStoreSorter::Key& right) const {
+ auto stringComparison = left.path.compare(right.path);
+ return (stringComparison != 0)
+ ? stringComparison
+ : ((left.rowId == right.rowId) ? 0 : (left.rowId > right.rowId ? 1 : -1));
}
};
diff --git a/src/mongo/db/index/column_store_sorter.h b/src/mongo/db/index/column_store_sorter.h
index 3bdf9f72b07..0a3e398e8bf 100644
--- a/src/mongo/db/index/column_store_sorter.h
+++ b/src/mongo/db/index/column_store_sorter.h
@@ -86,6 +86,9 @@ public:
Key getOwned() const {
MONGO_UNREACHABLE;
}
+ void makeOwned() {
+ MONGO_UNREACHABLE;
+ }
};
struct Value {
@@ -103,6 +106,9 @@ public:
Value getOwned() const {
MONGO_UNREACHABLE;
}
+ void makeOwned() {
+ MONGO_UNREACHABLE;
+ }
};
using Iterator = SortIteratorInterface<Key, Value>;
diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp
index ec2c609a86e..db5a1974aa1 100644
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@@ -158,9 +158,8 @@ MultikeyPaths createMultikeyPaths(const std::vector<MultikeyPath>& multikeyPaths
} // namespace
struct BtreeExternalSortComparison {
- typedef std::pair<KeyString::Value, mongo::NullValue> Data;
- int operator()(const Data& l, const Data& r) const {
- return l.first.compare(r.first);
+ int operator()(const KeyString::Value& l, const KeyString::Value& r) const {
+ return l.compare(r);
}
};
diff --git a/src/mongo/db/pipeline/document_source_bucket_auto.cpp b/src/mongo/db/pipeline/document_source_bucket_auto.cpp
index 9d438d4fca1..9b086ea0d87 100644
--- a/src/mongo/db/pipeline/document_source_bucket_auto.cpp
+++ b/src/mongo/db/pipeline/document_source_bucket_auto.cpp
@@ -160,9 +160,8 @@ DocumentSource::GetNextResult DocumentSourceBucketAuto::populateSorter() {
opts.tempDir = pExpCtx->tempDir;
}
const auto& valueCmp = pExpCtx->getValueComparator();
- auto comparator = [valueCmp](const Sorter<Value, Document>::Data& lhs,
- const Sorter<Value, Document>::Data& rhs) {
- return valueCmp.compare(lhs.first, rhs.first);
+ auto comparator = [valueCmp](const Value& lhs, const Value& rhs) {
+ return valueCmp.compare(lhs, rhs);
};
_sorter.reset(Sorter<Value, Document>::make(opts, comparator));
diff --git a/src/mongo/db/pipeline/document_source_group.cpp b/src/mongo/db/pipeline/document_source_group.cpp
index c7ccdaa4d73..5a0ff0328de 100644
--- a/src/mongo/db/pipeline/document_source_group.cpp
+++ b/src/mongo/db/pipeline/document_source_group.cpp
@@ -554,12 +554,10 @@ using GroupsMap = DocumentSourceGroup::GroupsMap;
class SorterComparator {
public:
- typedef pair<Value, Value> Data;
-
SorterComparator(ValueComparator valueComparator) : _valueComparator(valueComparator) {}
- int operator()(const Data& lhs, const Data& rhs) const {
- return _valueComparator.compare(lhs.first, rhs.first);
+ int operator()(const Value& lhs, const Value& rhs) const {
+ return _valueComparator.compare(lhs, rhs);
}
private:
diff --git a/src/mongo/db/sorter/sorter.cpp b/src/mongo/db/sorter/sorter.cpp
index c39876dd1da..cf325a66b67 100644
--- a/src/mongo/db/sorter/sorter.cpp
+++ b/src/mongo/db/sorter/sorter.cpp
@@ -121,8 +121,8 @@ inline std::string myErrnoWithDescription() {
return sb.str();
}
-template <typename Data, typename Comparator>
-void dassertCompIsSane(const Comparator& comp, const Data& lhs, const Data& rhs) {
+template <typename Key, typename Comparator>
+void dassertCompIsSane(const Comparator& comp, const Key& lhs, const Key& rhs) {
#if defined(MONGO_CONFIG_DEBUG_BUILD) && !defined(_MSC_VER)
// MSVC++ already does similar verification in debug mode in addition to using
// algorithms that do more comparisons. Doing our own verification in addition makes
@@ -534,8 +534,8 @@ private:
template <typename Ptr>
bool operator()(const Ptr& lhs, const Ptr& rhs) const {
// first compare data
- dassertCompIsSane(_comp, lhs->current(), rhs->current());
- int ret = _comp(lhs->current(), rhs->current());
+ dassertCompIsSane(_comp, lhs->current().first, rhs->current().first);
+ int ret = _comp(lhs->current().first, rhs->current().first);
if (ret)
return ret > 0;
@@ -710,12 +710,16 @@ public:
this->_stats.setSpilledRanges(this->_iters.size());
}
- void add(const Key& key, const Value& val) {
+ template <typename Generator>
+ void addImpl(const Key& key, const Value& val, Generator keyValProducer) {
invariant(!_done);
- _data.emplace_back(key.getOwned(), val.getOwned());
-
auto memUsage = key.memUsageForSorter() + val.memUsageForSorter();
+
+ // Invoking keyValProducer could invalidate key and val if it uses move semantics,
+ // don't reference them anymore from this point on.
+ _data.emplace_back(keyValProducer());
+
_memUsed += memUsage;
this->_totalDataSizeSorted += memUsage;
@@ -723,17 +727,16 @@ public:
spill();
}
- void emplace(Key&& key, Value&& val) override {
- invariant(!_done);
-
- auto memUsage = key.memUsageForSorter() + val.memUsageForSorter();
- _memUsed += memUsage;
- this->_totalDataSizeSorted += memUsage;
-
- _data.emplace_back(std::move(key), std::move(val));
+ void add(const Key& key, const Value& val) override {
+ addImpl(key, val, [&]() -> Data { return {key.getOwned(), val.getOwned()}; });
+ }
- if (_memUsed > this->_opts.maxMemoryUsageBytes)
- spill();
+ void emplace(Key&& key, Value&& val) override {
+ addImpl(key, val, [&]() -> Data {
+ key.makeOwned();
+ val.makeOwned();
+ return {std::move(key), std::move(val)};
+ });
}
Iterator* done() {
@@ -758,8 +761,8 @@ private:
public:
explicit STLComparator(const Comparator& comp) : _comp(comp) {}
bool operator()(const Data& lhs, const Data& rhs) const {
- dassertCompIsSane(_comp, lhs, rhs);
- return _comp(lhs, rhs) < 0;
+ dassertCompIsSane(_comp, lhs.first, rhs.first);
+ return _comp(lhs.first, rhs.first) < 0;
}
private:
@@ -820,19 +823,32 @@ public:
verify(opts.limit == 1);
}
- void add(const Key& key, const Value& val) {
- Data contender(key, val);
-
+ template <typename Generator>
+ void addImpl(const Key& key, const Value& val, Generator keyValProducer) {
this->_numSorted += 1;
if (_haveData) {
- dassertCompIsSane(_comp, _best, contender);
- if (_comp(_best, contender) <= 0)
+ dassertCompIsSane(_comp, _best.first, key);
+ if (_comp(_best.first, key) <= 0)
return; // not good enough
} else {
_haveData = true;
}
- _best = {contender.first.getOwned(), contender.second.getOwned()};
+ // Invoking keyValProducer could invalidate key and val if it uses move semantics,
+ // don't reference them anymore from this point on.
+ _best = keyValProducer();
+ }
+
+ void add(const Key& key, const Value& val) override {
+ addImpl(key, val, [&]() -> Data { return {key.getOwned(), val.getOwned()}; });
+ }
+
+ void emplace(Key&& key, Value&& val) override {
+ addImpl(key, val, [&]() -> Data {
+ key.makeOwned();
+ val.makeOwned();
+ return {std::move(key), std::move(val)};
+ });
}
Iterator* done() {
@@ -883,21 +899,24 @@ public:
}
}
- void add(const Key& key, const Value& val) {
+ template <typename Generator>
+ void addImpl(const Key& key, const Value& val, Generator keyValProducer) {
invariant(!_done);
this->_numSorted += 1;
STLComparator less(this->_comp);
- Data contender(key, val);
if (_data.size() < this->_opts.limit) {
- if (_haveCutoff && !less(contender, _cutoff))
+ if (_haveCutoff && this->_comp(key, _cutoff.first) >= 0)
return;
- _data.emplace_back(contender.first.getOwned(), contender.second.getOwned());
-
auto memUsage = key.memUsageForSorter() + val.memUsageForSorter();
+
+ // Invoking keyValProducer could invalidate key and val if it uses move semantics,
+ // don't reference them anymore from this point on.
+ _data.emplace_back(keyValProducer());
+
_memUsed += memUsage;
this->_totalDataSizeSorted += memUsage;
@@ -912,7 +931,7 @@ public:
invariant(_data.size() == this->_opts.limit);
- if (!less(contender, _data.front()))
+ if (this->_comp(key, _data.front().first) >= 0)
return; // not good enough
// Remove the old worst pair and insert the contender, adjusting _memUsed
@@ -925,13 +944,27 @@ public:
_memUsed -= _data.front().second.memUsageForSorter();
std::pop_heap(_data.begin(), _data.end(), less);
- _data.back() = {contender.first.getOwned(), contender.second.getOwned()};
+ // Invoking keyValProducer could invalidate key and val if it uses move semantics,
+ // don't reference them anymore from this point on.
+ _data.back() = keyValProducer();
std::push_heap(_data.begin(), _data.end(), less);
if (_memUsed > this->_opts.maxMemoryUsageBytes)
spill();
}
+ void add(const Key& key, const Value& val) override {
+ addImpl(key, val, [&]() -> Data { return {key.getOwned(), val.getOwned()}; });
+ }
+
+ void emplace(Key&& key, Value&& val) override {
+ addImpl(key, val, [&]() -> Data {
+ key.makeOwned();
+ val.makeOwned();
+ return {std::move(key), std::move(val)};
+ });
+ }
+
Iterator* done() {
if (this->_iters.empty()) {
sort();
@@ -954,8 +987,8 @@ private:
public:
explicit STLComparator(const Comparator& comp) : _comp(comp) {}
bool operator()(const Data& lhs, const Data& rhs) const {
- dassertCompIsSane(_comp, lhs, rhs);
- return _comp(lhs, rhs) < 0;
+ dassertCompIsSane(_comp, lhs.first, rhs.first);
+ return _comp(lhs.first, rhs.first) < 0;
}
private:
@@ -1381,7 +1414,6 @@ BoundedSorter<Key, Value, Comparator, BoundMaker>::BoundedSorter(const SortOptio
: BoundedSorterInterface<Key, Value>(opts),
compare(comp),
makeBound(makeBound),
- _comparePairs{compare},
_checkInput(checkInput),
_opts(opts),
_heap(Greater{&compare}),
@@ -1494,7 +1526,7 @@ std::pair<Key, Value> BoundedSorter<Key, Value, Comparator, BoundMaker>::next()
};
if (!_heap.empty() && _spillIter) {
- if (_comparePairs(_heap.top(), _spillIter->current()) <= 0) {
+ if (compare(_heap.top().first, _spillIter->current().first) <= 0) {
pullFromHeap();
} else {
pullFromSpilled();
@@ -1548,12 +1580,12 @@ void BoundedSorter<Key, Value, Comparator, BoundMaker>::_spill() {
}
std::shared_ptr<SpillIterator> iteratorPtr(writer.done());
- if (auto* mergeIter = static_cast<typename sorter::MergeIterator<Key, Value, PairComparator>*>(
+ if (auto* mergeIter = static_cast<typename sorter::MergeIterator<Key, Value, Comparator>*>(
_spillIter.get())) {
mergeIter->addSource(std::move(iteratorPtr));
} else {
std::vector<std::shared_ptr<SpillIterator>> iters{std::move(iteratorPtr)};
- _spillIter.reset(SpillIterator::merge(iters, _opts, _comparePairs));
+ _spillIter.reset(SpillIterator::merge(iters, _opts, compare));
}
dassert(_spillIter->more());
@@ -1561,12 +1593,6 @@ void BoundedSorter<Key, Value, Comparator, BoundMaker>::_spill() {
_memUsed = 0;
}
-template <typename Key, typename Value, typename Comparator, typename BoundMaker>
-int BoundedSorter<Key, Value, Comparator, BoundMaker>::PairComparator::operator()(
- const std::pair<Key, Value>& p1, const std::pair<Key, Value>& p2) const {
- return compare(p1.first, p2.first);
-}
-
//
// Factory Functions
//
diff --git a/src/mongo/db/sorter/sorter.h b/src/mongo/db/sorter/sorter.h
index d0e31cce4a8..ae9200389d5 100644
--- a/src/mongo/db/sorter/sorter.h
+++ b/src/mongo/db/sorter/sorter.h
@@ -199,6 +199,7 @@ public:
NullValue getOwned() const {
return {};
}
+ void makeOwned() {}
};
/**
@@ -366,9 +367,8 @@ public:
const Settings& settings = Settings());
virtual void add(const Key&, const Value&) = 0;
- virtual void emplace(Key&& k, Value&& v) {
- add(k, v);
- }
+ virtual void emplace(Key&& k, Value&& v) = 0;
+
/**
* Cannot add more data after calling done().
*/
@@ -544,15 +544,9 @@ public:
private:
using SpillIterator = SortIteratorInterface<Key, Value>;
- struct PairComparator {
- int operator()(const std::pair<Key, Value>& p1, const std::pair<Key, Value>& p2) const;
- const Comparator& compare;
- };
void _spill();
- const PairComparator _comparePairs;
-
bool _checkInput;
size_t _numSorted = 0; // Keeps track of the number of keys sorted.
diff --git a/src/mongo/db/sorter/sorter_test.cpp b/src/mongo/db/sorter/sorter_test.cpp
index 0394e1f0fa0..12e433217ef 100644
--- a/src/mongo/db/sorter/sorter_test.cpp
+++ b/src/mongo/db/sorter/sorter_test.cpp
@@ -100,6 +100,7 @@ public:
IntWrapper getOwned() const {
return *this;
}
+ void makeOwned() {}
std::string toString() const {
return std::to_string(_i);
@@ -117,10 +118,10 @@ enum Direction { ASC = 1, DESC = -1 };
class IWComparator {
public:
IWComparator(Direction dir = ASC) : _dir(dir) {}
- int operator()(const IWPair& lhs, const IWPair& rhs) const {
- if (lhs.first == rhs.first)
+ int operator()(const IntWrapper& lhs, const IntWrapper& rhs) const {
+ if (lhs == rhs)
return 0;
- if (lhs.first < rhs.first)
+ if (lhs < rhs)
return -1 * _dir;
return 1 * _dir;
}
diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h
index 6a7842ef203..4539b946f6d 100644
--- a/src/mongo/db/storage/key_string.h
+++ b/src/mongo/db/storage/key_string.h
@@ -494,6 +494,7 @@ public:
Value getOwned() const {
return *this;
}
+ void makeOwned() {}
Version getVersion() const {
return _version;