16 files changed, 188 insertions, 359 deletions
diff --git a/src/mongo/db/exec/plan_stats.h b/src/mongo/db/exec/plan_stats.h
index cff5085e47a..65cafb5d3bc 100644
--- a/src/mongo/db/exec/plan_stats.h
+++ b/src/mongo/db/exec/plan_stats.h
@@ -615,17 +615,24 @@ struct SortStats : public SpecificStats {
         return sortPattern.objsize() + sizeof(*this);
     }
 
-    // What's our current memory usage?
-    size_t memUsage = 0u;
-
-    // What's our memory limit?
-    size_t memLimit = 0u;
+    // The pattern according to which we are sorting.
+    BSONObj sortPattern;
 
     // The number of results to return from the sort.
-    size_t limit = 0u;
+    uint64_t limit = 0u;
 
-    // The pattern according to which we are sorting.
-    BSONObj sortPattern;
+    // The maximum number of bytes of memory we're willing to use during execution of the sort. If
+    // this limit is exceeded and 'allowDiskUse' is false, the query will fail at execution time. If
+    // 'allowDiskUse' is true, the data will be spilled to disk.
+    uint64_t maxMemoryUsageBytes = 0u;
+
+    // The amount of data we've sorted in bytes. At various times this data may be buffered in
+    // memory or disk-resident, depending on the configuration of 'maxMemoryUsageBytes' and whether
+    // disk use is allowed.
+    uint64_t totalDataSizeBytes = 0u;
+
+    // Whether we spilled data to disk during the execution of this query.
+    bool wasDiskUsed = false;
 };
 
 struct MergeSortStats : public SpecificStats {
diff --git a/src/mongo/db/exec/sort.cpp b/src/mongo/db/exec/sort.cpp
index c7c3cbe3ea7..eb5486b61ee 100644
--- a/src/mongo/db/exec/sort.cpp
+++ b/src/mongo/db/exec/sort.cpp
@@ -27,266 +27,93 @@
  *    it in the license file.
  */
 
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery
+#include "mongo/platform/basic.h"
 
 #include "mongo/db/exec/sort.h"
-
-#include <algorithm>
-#include <memory>
-
-#include "mongo/db/catalog/collection.h"
-#include "mongo/db/exec/scoped_timer.h"
 #include "mongo/db/exec/working_set_common.h"
-#include "mongo/db/index/btree_key_generator.h"
-#include "mongo/db/index_names.h"
-#include "mongo/db/query/find_common.h"
-#include "mongo/db/query/query_knobs_gen.h"
-#include "mongo/db/query/query_planner.h"
-#include "mongo/util/log.h"
 
 namespace mongo {
 
-using std::endl;
-using std::unique_ptr;
-using std::vector;
-
-// static
-const char* SortStage::kStageType = "SORT";
-
-bool SortStage::WorkingSetComparator::operator()(const SortableDataItem& lhs,
-                                                 const SortableDataItem& rhs) const {
-    int cmp = sortKeyComparator(lhs.sortKey, rhs.sortKey);
-    if (cmp != 0) {
-        return cmp < 0;
-    } else {
-        // Indexes use recordId as a final tiebreaker, and we do the same here, so that SortStage
-        // outputs documents in the same order that an index would have.
-        return lhs.recordId < rhs.recordId;
-    }
-}
-
 SortStage::SortStage(boost::intrusive_ptr<ExpressionContext> expCtx,
                      WorkingSet* ws,
-                     BSONObj sortPattern,
+                     SortPattern sortPattern,
                      uint64_t limit,
                      uint64_t maxMemoryUsageBytes,
                      std::unique_ptr<PlanStage> child)
-    : PlanStage(kStageType, expCtx->opCtx),
+    : PlanStage(kStageType.rawData(), expCtx->opCtx),
       _ws(ws),
-      _sortExecutor(SortPattern{sortPattern, expCtx},
+      _sortExecutor(std::move(sortPattern),
                     limit,
                     maxMemoryUsageBytes,
                     expCtx->tempDir,
-                    expCtx->allowDiskUse),
-      _pattern(std::move(sortPattern)),
-      _limit(limit),
-      _sorted(false),
-      _resultIterator(_data.end()),
-      _memUsage(0) {
+                    expCtx->allowDiskUse) {
     _children.emplace_back(std::move(child));
-
-    BSONObj sortComparator = FindCommon::transformSortSpec(_pattern);
-    _workingSetComparator = std::make_unique<WorkingSetComparator>(sortComparator);
-
-    // If limit > 1, we need to initialize _dataSet here to maintain ordered set of data items while
-    // fetching from the child stage.
-    if (_limit > 1) {
-        const WorkingSetComparator& cmp = *_workingSetComparator;
-        _dataSet.reset(new SortableDataItemSet(cmp));
-    }
-}
-
-bool SortStage::isEOF() {
-    // We're done when our child has no more results, we've sorted the child's results, and
-    // we've returned all sorted results.
-    return child()->isEOF() && _sorted && (_data.end() == _resultIterator);
 }
 
 PlanStage::StageState SortStage::doWork(WorkingSetID* out) {
-    const size_t maxBytes = static_cast<size_t>(internalQueryExecMaxBlockingSortBytes.load());
-    if (_memUsage > maxBytes) {
-        str::stream ss;
-        ss << "Sort operation used more than the maximum " << maxBytes
-           << " bytes of RAM. Add an index, or specify a smaller limit.";
-        Status status(ErrorCodes::OperationFailed, ss);
-        *out = WorkingSetCommon::allocateStatusMember(_ws, status);
-        return PlanStage::FAILURE;
-    }
-
     if (isEOF()) {
         return PlanStage::IS_EOF;
     }
 
-    // Still reading in results to sort.
-    if (!_sorted) {
+    if (!_populated) {
         WorkingSetID id = WorkingSet::INVALID_ID;
-        StageState code = child()->work(&id);
-
-        if (PlanStage::ADVANCED == code) {
-            WorkingSetMember* member = _ws->get(id);
-
-            SortableDataItem item;
-            item.wsid = id;
-
-            // We extract the sort key from the WSM's metadata. This must have been generated by a
-            // SortKeyGeneratorStage descendent in the execution tree.
-            item.sortKey = member->metadata().getSortKey();
-
-            if (member->hasRecordId()) {
-                // The RecordId breaks ties when sorting two WSMs with the same sort key.
-                item.recordId = member->recordId;
+        const StageState code = child()->work(&id);
+
+        if (code == PlanStage::ADVANCED) {
+            // The plan must be structured such that a previous stage has attached the sort key
+            // metadata.
+            auto member = _ws->get(id);
+            invariant(member->metadata().hasSortKey());
+
+            auto&& extractedMember = _ws->extract(id);
+
+            try {
+                auto sortKey = extractedMember.metadata().getSortKey();
+                _sortExecutor.add(std::move(sortKey), std::move(extractedMember));
+            } catch (const AssertionException&) {
+                // Propagate runtime errors using the FAILED status code.
+                *out = WorkingSetCommon::allocateStatusMember(_ws, exceptionToStatus());
+                return PlanStage::FAILURE;
             }
 
-            addToBuffer(item);
-
             return PlanStage::NEED_TIME;
-        } else if (PlanStage::IS_EOF == code) {
-            // TODO: We don't need the lock for this.  We could ask for a yield and do this work
-            // unlocked.  Also, this is performing a lot of work for one call to work(...)
-            sortBuffer();
-            _resultIterator = _data.begin();
-            _sorted = true;
+        } else if (code == PlanStage::IS_EOF) {
+            // The child has returned all of its results. Record this fact so that subsequent calls
+            // to 'doWork()' will perform sorting and unspool the sorted results.
+            _populated = true;
+
+            try {
+                _sortExecutor.loadingDone();
+            } catch (const AssertionException&) {
+                // Propagate runtime errors using the FAILED status code.
+                *out = WorkingSetCommon::allocateStatusMember(_ws, exceptionToStatus());
+                return PlanStage::FAILURE;
+            }
+
             return PlanStage::NEED_TIME;
-        } else if (PlanStage::FAILURE == code) {
-            // The stage which produces a failure is responsible for allocating a working set member
-            // with error details.
-            invariant(WorkingSet::INVALID_ID != id);
-            *out = id;
-            return code;
-        } else if (PlanStage::NEED_YIELD == code) {
+        } else {
             *out = id;
         }
 
         return code;
     }
 
-    // Returning results.
-    verify(_resultIterator != _data.end());
-    verify(_sorted);
-    *out = _resultIterator->wsid;
-    _resultIterator++;
+    auto nextWsm = _sortExecutor.getNextWsm();
+    if (!nextWsm) {
+        return PlanStage::IS_EOF;
+    }
 
+    *out = _ws->emplace(std::move(*nextWsm));
     return PlanStage::ADVANCED;
 }
 
-unique_ptr<PlanStageStats> SortStage::getStats() {
+std::unique_ptr<PlanStageStats> SortStage::getStats() {
     _commonStats.isEOF = isEOF();
-    const size_t maxBytes = static_cast<size_t>(internalQueryExecMaxBlockingSortBytes.load());
-    _specificStats.memLimit = maxBytes;
-    _specificStats.memUsage = _memUsage;
-    _specificStats.limit = _limit;
-    _specificStats.sortPattern = _pattern.getOwned();
-
-    unique_ptr<PlanStageStats> ret = std::make_unique<PlanStageStats>(_commonStats, STAGE_SORT);
-    ret->specific = std::make_unique<SortStats>(_specificStats);
+    std::unique_ptr<PlanStageStats> ret =
+        std::make_unique<PlanStageStats>(_commonStats, STAGE_SORT);
+    ret->specific = _sortExecutor.stats();
     ret->children.emplace_back(child()->getStats());
     return ret;
 }
 
-const SpecificStats* SortStage::getSpecificStats() const {
-    return &_specificStats;
-}
-
-/**
- * addToBuffer() and sortBuffer() work differently based on the
- * configured limit. addToBuffer() is also responsible for
- * performing some accounting on the overall memory usage to
- * make sure we're not using too much memory.
- *
- * limit == 0:
- *     addToBuffer() - Adds item to vector.
- *     sortBuffer() - Sorts vector.
- * limit == 1:
- *     addToBuffer() - Replaces first item in vector with max of
- *                     current and new item.
- *                     Updates memory usage if item was replaced.
- *     sortBuffer() - Does nothing.
- * limit > 1:
- *     addToBuffer() - Does not update vector. Adds item to set.
- *                     If size of set exceeds limit, remove item from set
- *                     with lowest key. Updates memory usage accordingly.
- *     sortBuffer() - Copies items from set to vectors.
- */
-void SortStage::addToBuffer(const SortableDataItem& item) {
-    // Holds ID of working set member to be freed at end of this function.
-    WorkingSetID wsidToFree = WorkingSet::INVALID_ID;
-
-    WorkingSetMember* member = _ws->get(item.wsid);
-    if (_limit == 0) {
-        // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
-        member->makeObjOwnedIfNeeded();
-        _data.push_back(item);
-        _memUsage += member->getMemUsage();
-    } else if (_limit == 1) {
-        if (_data.empty()) {
-            member->makeObjOwnedIfNeeded();
-            _data.push_back(item);
-            _memUsage = member->getMemUsage();
-            return;
-        }
-        wsidToFree = item.wsid;
-        const WorkingSetComparator& cmp = *_workingSetComparator;
-        // Compare new item with existing item in vector.
-        if (cmp(item, _data[0])) {
-            wsidToFree = _data[0].wsid;
-            member->makeObjOwnedIfNeeded();
-            _data[0] = item;
-            _memUsage = member->getMemUsage();
-        }
-    } else {
-        // Update data item set instead of vector
-        // Limit not reached - insert and return
-        vector<SortableDataItem>::size_type limit(_limit);
-        if (_dataSet->size() < limit) {
-            member->makeObjOwnedIfNeeded();
-            _dataSet->insert(item);
-            _memUsage += member->getMemUsage();
-            return;
-        }
-        // Limit will be exceeded - compare with item with lowest key
-        // If new item does not have a lower key value than last item,
-        // do nothing.
-        wsidToFree = item.wsid;
-        SortableDataItemSet::const_iterator lastItemIt = --(_dataSet->end());
-        const SortableDataItem& lastItem = *lastItemIt;
-        const WorkingSetComparator& cmp = *_workingSetComparator;
-        if (cmp(item, lastItem)) {
-            _memUsage -= _ws->get(lastItem.wsid)->getMemUsage();
-            _memUsage += member->getMemUsage();
-            wsidToFree = lastItem.wsid;
-            // According to std::set iterator validity rules,
-            // it does not matter which of erase()/insert() happens first.
-            // Here, we choose to erase first to release potential resources
-            // used by the last item and to keep the scope of the iterator to a minimum.
-            _dataSet->erase(lastItemIt);
-            member->makeObjOwnedIfNeeded();
-            _dataSet->insert(item);
-        }
-    }
-
-    // There was a buffered result which we can throw out because we are executing a sort with a
-    // limit, and the result is now known not to be in the top k set. Free the working set member
-    // associated with 'wsidToFree'.
-    if (wsidToFree != WorkingSet::INVALID_ID) {
-        _ws->free(wsidToFree);
-    }
-}
-
-void SortStage::sortBuffer() {
-    if (_limit == 0) {
-        const WorkingSetComparator& cmp = *_workingSetComparator;
-        std::sort(_data.begin(), _data.end(), cmp);
-    } else if (_limit == 1) {
-        // Buffer contains either 0 or 1 item so it is already in a sorted state.
-        return;
-    } else {
-        // Set already contains items in sorted order, so we simply copy the items
-        // from the set to the vector.
-        // Release the memory for the set after the copy.
-        vector<SortableDataItem> newData(_dataSet->begin(), _dataSet->end());
-        _data.swap(newData);
-        _dataSet.reset();
-    }
-}
-
 }  // namespace mongo
diff --git a/src/mongo/db/exec/sort.h b/src/mongo/db/exec/sort.h
index 53957917581..f437ce0b719 100644
--- a/src/mongo/db/exec/sort.h
+++ b/src/mongo/db/exec/sort.h
@@ -51,14 +51,19 @@ namespace mongo {
  */
 class SortStage final : public PlanStage {
 public:
+    static constexpr StringData kStageType = "SORT"_sd;
+
     SortStage(boost::intrusive_ptr<ExpressionContext> expCtx,
               WorkingSet* ws,
-              BSONObj sortPattern,
+              SortPattern sortPattern,
               uint64_t limit,
               uint64_t maxMemoryUsageBytes,
               std::unique_ptr<PlanStage> child);
 
-    bool isEOF() final;
+    bool isEOF() final {
+        return _sortExecutor.isEOF();
+    }
+
     StageState doWork(WorkingSetID* out) final;
 
     StageType stageType() const final {
@@ -67,91 +72,24 @@ public:
 
     std::unique_ptr<PlanStageStats> getStats();
 
-    const SpecificStats* getSpecificStats() const final;
-
-    static const char* kStageType;
+    /**
+     * Returns nullptr. Stats related to sort execution must be extracted with 'getStats()', since
+     * they are retrieved on demand from the underlying sort execution machinery.
+     */
+    const SpecificStats* getSpecificStats() const final {
+        return nullptr;
+    }
 
 private:
     // Not owned by us.
     WorkingSet* _ws;
 
-    // TODO SERVER-42182: Use SortExecutor to implement 'doWork()'.
     SortExecutor _sortExecutor;
 
-    // The raw sort _pattern as expressed by the user
-    BSONObj _pattern;
-
-    // Equal to 0 for no limit.
-    size_t _limit;
-
-    //
-    // Data storage
-    //
-
-    // Have we sorted our data? If so, we can access _resultIterator. If not,
-    // we're still populating _data.
-    bool _sorted;
-
-    // Collection of working set members to sort with their respective sort key.
-    struct SortableDataItem {
-        WorkingSetID wsid;
-        Value sortKey;
-        // Since we must replicate the behavior of a covered sort as much as possible we use the
-        // RecordId to break sortKey ties.
-        // See sorta.js.
-        RecordId recordId;
-    };
-
-    // Comparison object for data buffers (vector and set). Items are compared on (sortKey, loc).
-    // This is also how the items are ordered in the indices. Keys are compared using
-    // BSONObj::woCompare() with RecordId as a tie-breaker.
-    //
-    // We are comparing keys generated by the SortKeyGenerator, which are already ordered with
-    // respect the collation. Therefore, we explicitly avoid comparing using a collator here.
-    struct WorkingSetComparator {
-        explicit WorkingSetComparator(const BSONObj& pattern) : sortKeyComparator(pattern) {}
-
-        bool operator()(const SortableDataItem& lhs, const SortableDataItem& rhs) const;
-
-        SortKeyComparator sortKeyComparator;
-    };
-
-    /**
-     * Inserts one item into data buffer (vector or set).
-     * If limit is exceeded, remove item with lowest key.
-     */
-    void addToBuffer(const SortableDataItem& item);
-
-    /**
-     * Sorts data buffer.
-     * Assumes no more items will be added to buffer.
-     * If data is stored in set, copy set
-     * contents to vector and clear set.
-     */
-    void sortBuffer();
-
-    // Comparator for data buffer
-    // Initialization follows sort key generator
-    std::unique_ptr<WorkingSetComparator> _workingSetComparator;
-
-    // The data we buffer and sort.
-    // _data will contain sorted data when all data is gathered
-    // and sorted.
-    // When _limit is greater than 1 and not all data has been gathered from child stage,
-    // _dataSet is used instead to maintain an ordered set of the incomplete data set.
-    // When the data set is complete, we copy the items from _dataSet to _data which will
-    // be used to provide the results of this stage through _resultIterator.
-    std::vector<SortableDataItem> _data;
-    typedef std::set<SortableDataItem, WorkingSetComparator> SortableDataItemSet;
-    std::unique_ptr<SortableDataItemSet> _dataSet;
-
-    // Iterates through _data post-sort returning it.
-    std::vector<SortableDataItem>::iterator _resultIterator;
-
     SortStats _specificStats;
 
-    // The usage in bytes of all buffered data that we're sorting.
-    size_t _memUsage;
+    // Whether or not we have finished loading data into '_sortExecutor'.
+    bool _populated = false;
 };
 
 }  // namespace mongo
diff --git a/src/mongo/db/exec/sort_executor.cpp b/src/mongo/db/exec/sort_executor.cpp
index f90c8576bbf..5cc11e004b0 100644
--- a/src/mongo/db/exec/sort_executor.cpp
+++ b/src/mongo/db/exec/sort_executor.cpp
@@ -109,6 +109,8 @@ void SortExecutor::add(Value sortKey, WorkingSetMember data) {
     // Metadata should be attached directly to the WSM rather than inside the Document.
     invariant(!data.doc.value().metadata());
 
+    _totalDataSizeBytes += data.getMemUsage();
+
     if (!_sorter) {
         _sorter.reset(DocumentSorter::make(makeSortOptions(), Comparator(_sortPattern)));
     }
@@ -139,6 +141,17 @@ SortOptions SortExecutor::makeSortOptions() const {
 
     return opts;
 }
+
+std::unique_ptr<SortStats> SortExecutor::stats() const {
+    auto stats = std::make_unique<SortStats>();
+    stats->sortPattern =
+        _sortPattern.serialize(SortPattern::SortKeySerialization::kForExplain).toBson();
+    stats->limit = _limit;
+    stats->maxMemoryUsageBytes = _maxMemoryUsageBytes;
+    stats->totalDataSizeBytes = _totalDataSizeBytes;
+    stats->wasDiskUsed = _wasDiskUsed;
+    return stats;
+}
 }  // namespace mongo
 
 #include "mongo/db/sorter/sorter.cpp"
diff --git a/src/mongo/db/exec/sort_executor.h b/src/mongo/db/exec/sort_executor.h
index 3ec21b65fd0..c0945fe1fd4 100644
--- a/src/mongo/db/exec/sort_executor.h
+++ b/src/mongo/db/exec/sort_executor.h
@@ -30,6 +30,7 @@
 #pragma once
 
 #include "mongo/db/exec/document_value/document.h"
+#include "mongo/db/exec/plan_stats.h"
 #include "mongo/db/exec/sort_key_comparator.h"
 #include "mongo/db/exec/working_set.h"
 #include "mongo/db/pipeline/expression.h"
@@ -98,6 +99,16 @@ public:
      */
     void add(Value, WorkingSetMember);
 
+    /**
+     * Returns true if the loading phase has been explicitly completed, and then the stream of
+     * documents has subsequently been exhausted by "get next" calls.
+     */
+    bool isEOF() const {
+        return _isEOF;
+    }
+
+    std::unique_ptr<SortStats> stats() const;
+
 private:
     using DocumentSorter = Sorter<Value, WorkingSetMember>;
     class Comparator {
@@ -125,5 +136,6 @@ private:
 
     bool _isEOF = false;
     bool _wasDiskUsed = false;
+    uint64_t _totalDataSizeBytes = 0u;
 };
 }  // namespace mongo
diff --git a/src/mongo/db/exec/sort_test.cpp b/src/mongo/db/exec/sort_test.cpp
index 349d3a7a1af..9e4494f27a6 100644
--- a/src/mongo/db/exec/sort_test.cpp
+++ b/src/mongo/db/exec/sort_test.cpp
@@ -109,8 +109,12 @@ public:
         auto sortKeyGen = std::make_unique<SortKeyGeneratorStage>(
             expCtx, std::move(queuedDataStage), &ws, sortPattern);
 
-        SortStage sort(
-            expCtx, &ws, sortPattern, limit, kMaxMemoryUsageBytes, std::move(sortKeyGen));
+        SortStage sort(expCtx,
+                       &ws,
+                       SortPattern{sortPattern, expCtx},
+                       limit,
+                       kMaxMemoryUsageBytes,
+                       std::move(sortKeyGen));
 
         WorkingSetID id = WorkingSet::INVALID_ID;
         PlanStage::StageState state = PlanStage::NEED_TIME;
@@ -170,7 +174,12 @@ TEST_F(SortStageTest, SortEmptyWorkingSet) {
     auto sortKeyGen =
         std::make_unique<SortKeyGeneratorStage>(expCtx, std::move(queuedDataStage), &ws, BSONObj());
     auto sortPattern = BSON("a" << 1);
-    SortStage sort(expCtx, &ws, sortPattern, 0u, kMaxMemoryUsageBytes, std::move(sortKeyGen));
+    SortStage sort(expCtx,
+                   &ws,
+                   SortPattern{sortPattern, expCtx},
+                   0u,
+                   kMaxMemoryUsageBytes,
+                   std::move(sortKeyGen));
 
     // Check initial EOF state.
     ASSERT_FALSE(sort.isEOF());
diff --git a/src/mongo/db/exec/working_set.h b/src/mongo/db/exec/working_set.h
index 846111d059d..7021ed5eb62 100644
--- a/src/mongo/db/exec/working_set.h
+++ b/src/mongo/db/exec/working_set.h
@@ -230,6 +230,12 @@ public:
         return getMemUsage();
     }
 
+    WorkingSetMember getOwned() const {
+        auto ret = *this;
+        ret.makeObjOwnedIfNeeded();
+        return ret;
+    }
+
 private:
     friend class WorkingSet;
 
diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp
index df15b11e94d..41bf643a7d4 100644
--- a/src/mongo/db/query/explain.cpp
+++ b/src/mongo/db/query/explain.cpp
@@ -541,15 +541,16 @@ void Explain::statsToBSON(const PlanStageStats& stats,
     } else if (STAGE_SORT == stats.stageType) {
         SortStats* spec = static_cast<SortStats*>(stats.specific.get());
         bob->append("sortPattern", spec->sortPattern);
-
-        if (verbosity >= ExplainOptions::Verbosity::kExecStats) {
-            bob->appendNumber("memUsage", spec->memUsage);
-            bob->appendNumber("memLimit", spec->memLimit);
-        }
+        bob->appendNumber("memLimit", spec->maxMemoryUsageBytes);
 
         if (spec->limit > 0) {
             bob->appendNumber("limitAmount", spec->limit);
         }
+
+        if (verbosity >= ExplainOptions::Verbosity::kExecStats) {
+            bob->appendNumber("totalDataSizeSorted", spec->totalDataSizeBytes);
+            bob->appendBool("usedDisk", spec->wasDiskUsed);
+        }
     } else if (STAGE_SORT_MERGE == stats.stageType) {
         MergeSortStats* spec = static_cast<MergeSortStats*>(stats.specific.get());
         bob->append("sortPattern", spec->sortPattern);
diff --git a/src/mongo/db/query/find_common.cpp b/src/mongo/db/query/find_common.cpp
index 33de26862c9..e236faccfb7 100644
--- a/src/mongo/db/query/find_common.cpp
+++ b/src/mongo/db/query/find_common.cpp
@@ -76,25 +76,6 @@ bool FindCommon::haveSpaceForNext(const BSONObj& nextDoc, long long numDocs, int
     return (bytesBuffered + nextDoc.objsize()) <= kMaxBytesToReturnToClientAtOnce;
 }
 
-BSONObj FindCommon::transformSortSpec(const BSONObj& sortSpec) {
-    BSONObjBuilder comparatorBob;
-
-    for (BSONElement elt : sortSpec) {
-        if (elt.isNumber()) {
-            comparatorBob.append(elt);
-        } else if (QueryRequest::isTextScoreMeta(elt)) {
-            // Sort text score decreasing by default. Field name doesn't matter but we choose
-            // something that a user shouldn't ever have.
-            comparatorBob.append("$metaTextScore", -1);
-        } else {
-            // Sort spec should have been validated before here.
-            fassertFailed(28784);
-        }
-    }
-
-    return comparatorBob.obj();
-}
-
 void FindCommon::waitInFindBeforeMakingBatch(OperationContext* opCtx, const CanonicalQuery& cq) {
     auto whileWaitingFunc = [&, hasLogged = false]() mutable {
         if (!std::exchange(hasLogged, true)) {
diff --git a/src/mongo/db/query/find_common.h b/src/mongo/db/query/find_common.h
index 90216d9138c..31c6b010692 100644
--- a/src/mongo/db/query/find_common.h
+++ b/src/mongo/db/query/find_common.h
@@ -117,17 +117,6 @@ public:
     static bool haveSpaceForNext(const BSONObj& nextDoc, long long numDocs, int bytesBuffered);
 
     /**
-     * Transforms the raw sort spec into one suitable for use as the ordering specification in
-     * BSONObj::woCompare().
-     *
-     * In particular, eliminates text score meta-sort from 'sortSpec'.
-     *
-     * The input must be validated (each BSON element must be either a number or text score
-     * meta-sort specification).
-     */
-    static BSONObj transformSortSpec(const BSONObj& sortSpec);
-
-    /**
      * This function wraps waitWhileFailPointEnabled() on waitInFindBeforeMakingBatch.
      *
      * Since query processing happens in three different places, this function makes it easier to
diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp
index 215beb2c9bf..0beb57d2024 100644
--- a/src/mongo/db/query/stage_builder.cpp
+++ b/src/mongo/db/query/stage_builder.cpp
@@ -124,7 +124,7 @@ std::unique_ptr<PlanStage> buildStages(OperationContext* opCtx,
             auto childStage = buildStages(opCtx, collection, cq, qsol, sn->children[0], ws);
             return std::make_unique<SortStage>(cq.getExpCtx(),
                                                ws,
-                                               sn->pattern,
+                                               SortPattern{sn->pattern, cq.getExpCtx()},
                                                sn->limit,
                                                internalQueryExecMaxBlockingSortBytes.load(),
                                                std::move(childStage));
diff --git a/src/mongo/db/sorter/sorter.cpp b/src/mongo/db/sorter/sorter.cpp
index 2d3703ea73e..3d85948fe27 100644
--- a/src/mongo/db/sorter/sorter.cpp
+++ b/src/mongo/db/sorter/sorter.cpp
@@ -542,7 +542,7 @@ public:
     void add(const Key& key, const Value& val) {
         invariant(!_done);
 
-        _data.push_back(std::make_pair(key, val));
+        _data.emplace_back(key.getOwned(), val.getOwned());
 
         _memUsed += key.memUsageForSorter();
         _memUsed += val.memUsageForSorter();
@@ -656,7 +656,7 @@ public:
             _haveData = true;
         }
 
-        _best = contender;
+        _best = {contender.first.getOwned(), contender.second.getOwned()};
     }
 
     Iterator* done() {
@@ -726,7 +726,7 @@ public:
             if (_haveCutoff && !less(contender, _cutoff))
                 return;
 
-            _data.push_back(contender);
+            _data.emplace_back(contender.first.getOwned(), contender.second.getOwned());
 
             _memUsed += key.memUsageForSorter();
             _memUsed += val.memUsageForSorter();
@@ -754,7 +754,7 @@ public:
         _memUsed -= _data.front().second.memUsageForSorter();
 
         std::pop_heap(_data.begin(), _data.end(), less);
-        _data.back() = contender;
+        _data.back() = {contender.first.getOwned(), contender.second.getOwned()};
         std::push_heap(_data.begin(), _data.end(), less);
 
         if (_memUsed > _opts.maxMemoryUsageBytes)
diff --git a/src/mongo/db/sorter/sorter.h b/src/mongo/db/sorter/sorter.h
index 4eddb0320bf..8a43c8f91b7 100644
--- a/src/mongo/db/sorter/sorter.h
+++ b/src/mongo/db/sorter/sorter.h
@@ -61,8 +61,9 @@
  * // How much memory is used by your type? Include sizeof(*this) and any memory you reference.
  * int memUsageForSorter() const;
  *
- * // For types with owned and unowned states, such as BSON, return an owned version.
- * // Return *this if your type doesn't have an unowned state
+ * // For types with owned and unowned states, such as BSON, return an owned version. The Sorter
+ * // is responsible for converting any unowned data to an owned state if it needs to be buffered.
+ * // Return *this if your type doesn't have an unowned state.
  * Type getOwned() const;
  *
  * Comparators are functors that that compare std::pair<Key, Value> and return an
@@ -228,13 +229,14 @@ public:
 
     virtual ~Sorter() {}
 
-    bool usedDisk() {
+    bool usedDisk() const {
         return _usedDisk;
     }
 
 protected:
+    Sorter() {}  // can only be constructed as a base
+
     bool _usedDisk{false};  // Keeps track of whether the sorter used disk or not
-    Sorter() {}             // can only be constructed as a base
 };
 
 /**
diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h
index d4e0aebb24b..dc739804204 100644
--- a/src/mongo/db/storage/key_string.h
+++ b/src/mongo/db/storage/key_string.h
@@ -390,6 +390,10 @@ public:
         // Use buffer capacity as a more accurate measure of memory usage.
         return sizeof(Value) + _buffer.capacity();
     }
+
+    Value getOwned() const {
+        return *this;
+    }
     /// Members for Sorter
 
 private:
diff --git a/src/mongo/dbtests/query_stage_sort.cpp b/src/mongo/dbtests/query_stage_sort.cpp
index db699ced854..6332810b5cc 100644
--- a/src/mongo/dbtests/query_stage_sort.cpp
+++ b/src/mongo/dbtests/query_stage_sort.cpp
@@ -120,8 +120,12 @@ public:
         auto keyGenStage = std::make_unique<SortKeyGeneratorStage>(
             _expCtx, std::move(queuedDataStage), ws.get(), sortPattern);
 
-        auto ss = std::make_unique<SortStage>(
-            _expCtx, ws.get(), sortPattern, limit(), maxMemoryUsageBytes(), std::move(keyGenStage));
+        auto ss = std::make_unique<SortStage>(_expCtx,
+                                              ws.get(),
+                                              SortPattern{sortPattern, _expCtx},
+                                              limit(),
+                                              maxMemoryUsageBytes(),
+                                              std::move(keyGenStage));
 
         // The PlanExecutor will be automatically registered on construction due to the auto
         // yield policy, so it can receive invalidations when we remove documents later.
@@ -156,8 +160,12 @@ public:
         auto keyGenStage = std::make_unique<SortKeyGeneratorStage>(
             _expCtx, std::move(queuedDataStage), ws.get(), sortPattern);
 
-        auto sortStage = std::make_unique<SortStage>(
-            _expCtx, ws.get(), sortPattern, limit(), maxMemoryUsageBytes(), std::move(keyGenStage));
+        auto sortStage = std::make_unique<SortStage>(_expCtx,
+                                                     ws.get(),
+                                                     SortPattern{sortPattern, _expCtx},
+                                                     limit(),
+                                                     maxMemoryUsageBytes(),
+                                                     std::move(keyGenStage));
 
         auto fetchStage =
             std::make_unique<FetchStage>(&_opCtx, ws.get(), std::move(sortStage), nullptr, coll);
@@ -560,8 +568,12 @@ public:
         auto keyGenStage = std::make_unique<SortKeyGeneratorStage>(
             _expCtx, std::move(queuedDataStage), ws.get(), sortPattern);
 
-        auto sortStage = std::make_unique<SortStage>(
-            _expCtx, ws.get(), sortPattern, 0u, maxMemoryUsageBytes(), std::move(keyGenStage));
+        auto sortStage = std::make_unique<SortStage>(_expCtx,
+                                                     ws.get(),
+                                                     SortPattern{sortPattern, _expCtx},
+                                                     0u,
+                                                     maxMemoryUsageBytes(),
+                                                     std::move(keyGenStage));
 
         auto fetchStage =
             std::make_unique<FetchStage>(&_opCtx, ws.get(), std::move(sortStage), nullptr, coll);
diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp
index 69fe465d4cb..c466bc35329 100644
--- a/src/mongo/s/query/cluster_find.cpp
+++ b/src/mongo/s/query/cluster_find.cpp
@@ -85,6 +85,34 @@ static const int kPerDocumentOverheadBytesUpperBound = 10;
 const char kFindCmdName[] = "find";
 
 /**
+ * Transforms the raw sort spec into one suitable for use as the ordering specification in
+ * BSONObj::woCompare().
+ *
+ * In particular, eliminates text score meta-sort from 'sortSpec'.
+ *
+ * The input must be validated (each BSON element must be either a number or text score meta-sort
+ * specification).
+ */
+BSONObj transformSortSpec(const BSONObj& sortSpec) {
+    BSONObjBuilder comparatorBob;
+
+    for (BSONElement elt : sortSpec) {
+        if (elt.isNumber()) {
+            comparatorBob.append(elt);
+        } else if (QueryRequest::isTextScoreMeta(elt)) {
+            // Sort text score decreasing by default. Field name doesn't matter but we choose
+            // something that a user shouldn't ever have.
+            comparatorBob.append("$metaTextScore", -1);
+        } else {
+            // Sort spec should have been validated before here.
+            fassertFailed(28784);
+        }
+    }
+
+    return comparatorBob.obj();
+}
+
+/**
  * Given the QueryRequest 'qr' being executed by mongos, returns a copy of the query which is
  * suitable for forwarding to the targeted hosts.
  */
@@ -246,7 +274,7 @@ CursorId runQueryWithoutRetrying(OperationContext* opCtx,
     // sort on mongos. Including a $natural anywhere in the sort spec results in the whole sort
     // being considered a hint to use a collection scan.
     if (!query.getQueryRequest().getSort().hasField("$natural")) {
-        params.sort = FindCommon::transformSortSpec(query.getQueryRequest().getSort());
+        params.sort = transformSortSpec(query.getQueryRequest().getSort());
     }
 
     bool appendGeoNearDistanceProjection = false;