SERVER-17010 each Sorter instance spills to a single file rather than a new file per spill to disk

(cherry picked from commit 2be7f2677a40a863f336d2964f456c9d87ddc838)
author: Dianna Hohensee <dianna.hohensee@10gen.com> 2018-09-13 15:09:08 -0400
committer: Eric Milkie <milkie@10gen.com> 2019-02-25 12:48:03 -0500
commit: 9491e6fb0ec2b38dd758d8acfe0e0aa11aa7bad9 (patch)
tree: 2b7a3854e0009d3a1e9b8a47f1be6ffa546ca55a
parent: c1760b39b29dc412ca1c20361c500e18e90c71cd (diff)
download: mongo-9491e6fb0ec2b38dd758d8acfe0e0aa11aa7bad9.tar.gz
11 files changed, 480 insertions, 180 deletions
diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp
index 7b8682b171e..35df1dd865b 100644
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@@ -614,6 +614,20 @@ bool IndexAccessMethod::BulkBuilder::isMultikey() const {
     return _everGeneratedMultipleKeys || isMultikeyFromPaths(_indexMultikeyPaths);
 }
 
+/**
+ * Generates a new file name on each call using a static, atomic and monotonically increasing
+ * number.
+ *
+ * Each user of the Sorter must implement this function to ensure that all temporary files that the
+ * Sorter instances produce are uniquely identified using a unique file name extension with separate
+ * atomic variable. This is necessary because the sorter.cpp code is separately included in multiple
+ * places, rather than compiled in one place and linked, and so cannot provide a globally unique ID.
+ */
+std::string nextFileName() {
+    static AtomicUInt32 indexAccessMethodFileCounter;
+    return "extsort-index." + std::to_string(indexAccessMethodFileCounter.fetchAndAdd(1));
+}
+
 }  // namespace mongo
 
 #include "mongo/db/sorter/sorter.cpp"
diff --git a/src/mongo/db/pipeline/aggregation_context_fixture.h b/src/mongo/db/pipeline/aggregation_context_fixture.h
index 009c10b42b0..aeacb8ec71e 100644
--- a/src/mongo/db/pipeline/aggregation_context_fixture.h
+++ b/src/mongo/db/pipeline/aggregation_context_fixture.h
@@ -36,6 +36,7 @@
 #include "mongo/db/pipeline/expression_context_for_test.h"
 #include "mongo/db/service_context_test_fixture.h"
 #include "mongo/stdx/memory.h"
+#include "mongo/unittest/temp_dir.h"
 #include "mongo/unittest/unittest.h"
 
 namespace mongo {
@@ -54,6 +55,8 @@ public:
         // context.
         _expCtx = new ExpressionContext(_opCtx.get(), nullptr);
         _expCtx->ns = std::move(nss);
+        unittest::TempDir tempDir("AggregationContextFixture");
+        _expCtx->tempDir = tempDir.path();
     }
 
     boost::intrusive_ptr<ExpressionContext> getExpCtx() {
diff --git a/src/mongo/db/pipeline/document_source_bucket_auto.cpp b/src/mongo/db/pipeline/document_source_bucket_auto.cpp
index c811fc9e262..9308328e7c6 100644
--- a/src/mongo/db/pipeline/document_source_bucket_auto.cpp
+++ b/src/mongo/db/pipeline/document_source_bucket_auto.cpp
@@ -46,6 +46,44 @@ REGISTER_DOCUMENT_SOURCE(bucketAuto,
                          LiteParsedDocumentSourceDefault::parse,
                          DocumentSourceBucketAuto::createFromBson);
 
+namespace {
+
+boost::intrusive_ptr<Expression> parseGroupByExpression(
+    const boost::intrusive_ptr<ExpressionContext>& expCtx,
+    const BSONElement& groupByField,
+    const VariablesParseState& vps) {
+    if (groupByField.type() == BSONType::Object &&
+        groupByField.embeddedObject().firstElementFieldName()[0] == '$') {
+        return Expression::parseObject(expCtx, groupByField.embeddedObject(), vps);
+    } else if (groupByField.type() == BSONType::String &&
+               groupByField.valueStringData()[0] == '$') {
+        return ExpressionFieldPath::parse(expCtx, groupByField.str(), vps);
+    } else {
+        uasserted(
+            40239,
+            str::stream() << "The $bucketAuto 'groupBy' field must be defined as a $-prefixed "
+                             "path or an expression object, but found: "
+                          << groupByField.toString(false, false));
+    }
+}
+
+/**
+ * Generates a new file name on each call using a static, atomic and monotonically increasing
+ * number.
+ *
+ * Each user of the Sorter must implement this function to ensure that all temporary files that the
+ * Sorter instances produce are uniquely identified using a unique file name extension with separate
+ * atomic variable. This is necessary because the sorter.cpp code is separately included in multiple
+ * places, rather than compiled in one place and linked, and so cannot provide a globally unique ID.
+ */
+std::string nextFileName() {
+    static AtomicUInt32 documentSourceBucketAutoFileCounter;
+    return "extsort-doc-bucket." +
+        std::to_string(documentSourceBucketAutoFileCounter.fetchAndAdd(1));
+}
+
+}  // namespace
+
 const char* DocumentSourceBucketAuto::getSourceName() const {
     return "$bucketAuto";
 }
@@ -400,28 +438,6 @@ DocumentSourceBucketAuto::DocumentSourceBucketAuto(
     }
 }
 
-namespace {
-
-boost::intrusive_ptr<Expression> parseGroupByExpression(
-    const boost::intrusive_ptr<ExpressionContext>& expCtx,
-    const BSONElement& groupByField,
-    const VariablesParseState& vps) {
-    if (groupByField.type() == BSONType::Object &&
-        groupByField.embeddedObject().firstElementFieldName()[0] == '$') {
-        return Expression::parseObject(expCtx, groupByField.embeddedObject(), vps);
-    } else if (groupByField.type() == BSONType::String &&
-               groupByField.valueStringData()[0] == '$') {
-        return ExpressionFieldPath::parse(expCtx, groupByField.str(), vps);
-    } else {
-        uasserted(
-            40239,
-            str::stream() << "The $bucketAuto 'groupBy' field must be defined as a $-prefixed "
-                             "path or an expression object, but found: "
-                          << groupByField.toString(false, false));
-    }
-}
-}  // namespace
-
 intrusive_ptr<DocumentSource> DocumentSourceBucketAuto::createFromBson(
     BSONElement elem, const intrusive_ptr<ExpressionContext>& pExpCtx) {
     uassert(40240,
@@ -486,6 +502,7 @@ intrusive_ptr<DocumentSource> DocumentSourceBucketAuto::createFromBson(
     return DocumentSourceBucketAuto::create(
         pExpCtx, groupByExpression, numBuckets.get(), accumulationStatements, granularityRounder);
 }
+
 }  // namespace mongo
 
 #include "mongo/db/sorter/sorter.cpp"
diff --git a/src/mongo/db/pipeline/document_source_group.cpp b/src/mongo/db/pipeline/document_source_group.cpp
index ca94e560140..097aeb725bc 100644
--- a/src/mongo/db/pipeline/document_source_group.cpp
+++ b/src/mongo/db/pipeline/document_source_group.cpp
@@ -30,6 +30,8 @@
 
 #include "mongo/platform/basic.h"
 
+#include <boost/filesystem/operations.hpp>
+
 #include "mongo/db/jsobj.h"
 #include "mongo/db/pipeline/accumulation_statement.h"
 #include "mongo/db/pipeline/accumulator.h"
@@ -41,9 +43,28 @@
 #include "mongo/db/pipeline/value.h"
 #include "mongo/db/pipeline/value_comparator.h"
 #include "mongo/stdx/memory.h"
+#include "mongo/util/destructor_guard.h"
 
 namespace mongo {
 
+namespace {
+
+/**
+ * Generates a new file name on each call using a static, atomic and monotonically increasing
+ * number.
+ *
+ * Each user of the Sorter must implement this function to ensure that all temporary files that the
+ * Sorter instances produce are uniquely identified using a unique file name extension with separate
+ * atomic variable. This is necessary because the sorter.cpp code is separately included in multiple
+ * places, rather than compiled in one place and linked, and so cannot provide a globally unique ID.
+ */
+std::string nextFileName() {
+    static AtomicUInt32 documentSourceGroupFileCounter;
+    return "extsort-doc-group." + std::to_string(documentSourceGroupFileCounter.fetchAndAdd(1));
+}
+
+}  // namespace
+
 using boost::intrusive_ptr;
 using std::shared_ptr;
 using std::pair;
@@ -268,7 +289,18 @@ DocumentSourceGroup::DocumentSourceGroup(const intrusive_ptr<ExpressionContext>&
       _initialized(false),
       _groups(pExpCtx->getValueComparator().makeUnorderedValueMap<Accumulators>()),
       _spilled(false),
-      _allowDiskUse(pExpCtx->allowDiskUse && !pExpCtx->inMongos) {}
+      _allowDiskUse(pExpCtx->allowDiskUse && !pExpCtx->inMongos) {
+    if (!pExpCtx->inMongos && (pExpCtx->allowDiskUse || kDebugBuild)) {
+        // We spill to disk in debug mode, regardless of allowDiskUse, to stress the system.
+        _fileName = pExpCtx->tempDir + "/" + nextFileName();
+    }
+}
+
+DocumentSourceGroup::~DocumentSourceGroup() {
+    if (_ownsFileDeletion) {
+        DESTRUCTOR_GUARD(boost::filesystem::remove(_fileName));
+    }
+}
 
 void DocumentSourceGroup::addAccumulator(AccumulationStatement accumulationStatement) {
     _accumulatedFields.push_back(accumulationStatement);
@@ -560,7 +592,11 @@ DocumentSource::GetNextResult DocumentSourceGroup::initialize() {
                 _groups = pExpCtx->getValueComparator().makeUnorderedValueMap<Accumulators>();
 
                 _sorterIterator.reset(Sorter<Value, Value>::Iterator::merge(
-                    _sortedFiles, SortOptions(), SorterComparator(pExpCtx->getValueComparator())));
+                    _sortedFiles,
+                    _fileName,
+                    SortOptions(),
+                    SorterComparator(pExpCtx->getValueComparator())));
+                _ownsFileDeletion = false;
 
                 // prepare current to accumulate data
                 _currentAccumulators.reserve(numAccumulators);
@@ -593,7 +629,8 @@ shared_ptr<Sorter<Value, Value>::Iterator> DocumentSourceGroup::spill() {
 
     stable_sort(ptrs.begin(), ptrs.end(), SpillSTLComparator(pExpCtx->getValueComparator()));
 
-    SortedFileWriter<Value, Value> writer(SortOptions().TempDir(pExpCtx->tempDir));
+    SortedFileWriter<Value, Value> writer(
+        SortOptions().TempDir(pExpCtx->tempDir), _fileName, _nextSortedFileWriterOffset);
     switch (_accumulatedFields.size()) {  // same as ptrs[i]->second.size() for all i.
         case 0:                           // no values, essentially a distinct
             for (size_t i = 0; i < ptrs.size(); i++) {
@@ -621,7 +658,9 @@ shared_ptr<Sorter<Value, Value>::Iterator> DocumentSourceGroup::spill() {
 
     _groups->clear();
 
-    return shared_ptr<Sorter<Value, Value>::Iterator>(writer.done());
+    Sorter<Value, Value>::Iterator* iteratorPtr = writer.done();
+    _nextSortedFileWriterOffset = writer.getFileEndOffset();
+    return shared_ptr<Sorter<Value, Value>::Iterator>(iteratorPtr);
 }
 
 boost::optional<BSONObj> DocumentSourceGroup::findRelevantInputSort() const {
@@ -857,5 +896,7 @@ std::list<intrusive_ptr<DocumentSource>> DocumentSourceGroup::getMergeSources()
 }
 }
 
+}  // namespace mongo
+
 #include "mongo/db/sorter/sorter.cpp"
 // Explicit instantiation unneeded since we aren't exposing Sorter outside of this file.
diff --git a/src/mongo/db/pipeline/document_source_group.h b/src/mongo/db/pipeline/document_source_group.h
index 951fefcce0b..bbafa42e1d9 100644
--- a/src/mongo/db/pipeline/document_source_group.h
+++ b/src/mongo/db/pipeline/document_source_group.h
@@ -112,6 +112,8 @@ private:
     explicit DocumentSourceGroup(const boost::intrusive_ptr<ExpressionContext>& pExpCtx,
                                  size_t maxMemoryUsageBytes = kDefaultMaxMemoryUsageBytes);
 
+    ~DocumentSourceGroup();
+
     /**
      * getNext() dispatches to one of these three depending on what type of $group it is. All three
      * of these methods expect '_currentAccumulators' to have been reset before being called, and
@@ -164,6 +166,10 @@ private:
     bool _doingMerge;
     size_t _memoryUsageBytes = 0;
     size_t _maxMemoryUsageBytes;
+    std::string _fileName;
+    unsigned int _nextSortedFileWriterOffset = 0;
+    bool _ownsFileDeletion = true;  // unless a MergeIterator is made that takes over.
+
     std::vector<std::string> _idFieldNames;  // used when id is a document
     std::vector<boost::intrusive_ptr<Expression>> _idExpressions;
 
diff --git a/src/mongo/db/pipeline/document_source_group_test.cpp b/src/mongo/db/pipeline/document_source_group_test.cpp
index b1dda8c0a13..bd916dac0b6 100644
--- a/src/mongo/db/pipeline/document_source_group_test.cpp
+++ b/src/mongo/db/pipeline/document_source_group_test.cpp
@@ -220,7 +220,7 @@ protected:
         expressionContext->tempDir = _tempDir.path();
 
         _group = DocumentSourceGroup::createFromBson(specElement, expressionContext);
-        assertRoundTrips(_group);
+        assertRoundTrips(_group, expressionContext);
     }
     DocumentSourceGroup* group() {
         return static_cast<DocumentSourceGroup*>(_group.get());
@@ -239,13 +239,14 @@ protected:
 
 private:
     /** Check that the group's spec round trips. */
-    void assertRoundTrips(const intrusive_ptr<DocumentSource>& group) {
+    void assertRoundTrips(const intrusive_ptr<DocumentSource>& group,
+                          const boost::intrusive_ptr<ExpressionContext>& expCtx) {
         // We don't check against the spec that generated 'group' originally, because
         // $const operators may be introduced in the first serialization.
         BSONObj spec = toBson(group);
         BSONElement specElement = spec.firstElement();
         intrusive_ptr<DocumentSource> generated =
-            DocumentSourceGroup::createFromBson(specElement, ctx());
+            DocumentSourceGroup::createFromBson(specElement, expCtx);
         ASSERT_BSONOBJ_EQ(spec, toBson(generated));
     }
     std::unique_ptr<QueryTestServiceContext> _queryServiceContext;
diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp
index 2a915a97be1..904373ef04c 100644
--- a/src/mongo/db/pipeline/document_source_sort.cpp
+++ b/src/mongo/db/pipeline/document_source_sort.cpp
@@ -52,6 +52,7 @@ using std::string;
 using std::vector;
 
 namespace {
+
 Value missingToNull(Value maybeMissing) {
     return maybeMissing.missing() ? Value(BSONNULL) : maybeMissing;
 }
@@ -96,7 +97,23 @@ Value deserializeSortKey(size_t sortPatternSize, BSONObj bsonSortKey) {
     return Value{std::move(keys)};
 }
 
+/**
+ * Generates a new file name on each call using a static, atomic and monotonically increasing
+ * number.
+ *
+ * Each user of the Sorter must implement this function to ensure that all temporary files that the
+ * Sorter instances produce are uniquely identified using a unique file name extension with separate
+ * atomic variable. This is necessary because the sorter.cpp code is separately included in multiple
+ * places, rather than compiled in one place and linked, and so cannot provide a globally unique ID.
+ */
+std::string nextFileName() {
+    static AtomicUInt32 documentSourceSortFileCounter;
+    return "extsort-doc-source-sort." +
+        std::to_string(documentSourceSortFileCounter.fetchAndAdd(1));
+}
+
 }  // namespace
+
 constexpr StringData DocumentSourceSort::kStageName;
 
 DocumentSourceSort::DocumentSourceSort(const intrusive_ptr<ExpressionContext>& pExpCtx)
@@ -537,7 +554,8 @@ std::list<intrusive_ptr<DocumentSource>> DocumentSourceSort::getMergeSources() {
     other->_rawSort = _rawSort;
     return {other};
 }
-}
+
+}  // namespace mongo
 
 #include "mongo/db/sorter/sorter.cpp"
 // Explicit instantiation unneeded since we aren't exposing Sorter outside of this file.
diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp
index 5dfd2825050..f44713cd2bb 100644
--- a/src/mongo/db/pipeline/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/pipeline_test.cpp
@@ -56,6 +56,7 @@
 #include "mongo/db/repl/replication_coordinator_mock.h"
 #include "mongo/dbtests/dbtests.h"
 #include "mongo/unittest/death_test.h"
+#include "mongo/unittest/temp_dir.h"
 
 namespace mongo {
 namespace {
@@ -98,6 +99,8 @@ void assertPipelineOptimizesAndSerializesTo(std::string inputPipeJson,
     AggregationRequest request(kTestNss, rawPipeline);
     intrusive_ptr<ExpressionContextForTest> ctx =
         new ExpressionContextForTest(opCtx.get(), request);
+    TempDir tempDir("PipelineTest");
+    ctx->tempDir = tempDir.path();
 
     // For $graphLookup and $lookup, we have to populate the resolvedNamespaces so that the
     // operations will be able to have a resolved view definition.
@@ -1765,6 +1768,8 @@ public:
         AggregationRequest request(kTestNss, rawPipeline);
         intrusive_ptr<ExpressionContextForTest> ctx =
             new ExpressionContextForTest(&_opCtx, request);
+        TempDir tempDir("PipelineTest");
+        ctx->tempDir = tempDir.path();
 
         // For $graphLookup and $lookup, we have to populate the resolvedNamespaces so that the
         // operations will be able to have a resolved view definition.
diff --git a/src/mongo/db/sorter/sorter.cpp b/src/mongo/db/sorter/sorter.cpp
index feac372b599..c17205e7718 100644
--- a/src/mongo/db/sorter/sorter.cpp
+++ b/src/mongo/db/sorter/sorter.cpp
@@ -68,6 +68,7 @@
 #include "mongo/util/unowned_ptr.h"
 
 namespace mongo {
+
 namespace sorter {
 
 using std::shared_ptr;
@@ -104,19 +105,9 @@ void dassertCompIsSane(const Comparator& comp, const Data& lhs, const Data& rhs)
 #endif
 }
 
-/** Ensures a named file is deleted when this object goes out of scope */
-class FileDeleter {
-public:
-    FileDeleter(const std::string& fileName) : _fileName(fileName) {}
-    ~FileDeleter() {
-        DESTRUCTOR_GUARD(boost::filesystem::remove(_fileName);)
-    }
-
-private:
-    const std::string _fileName;
-};
-
-/** Returns results from sorted in-memory storage */
+/**
+ * Returns results from sorted in-memory storage.
+ */
 template <typename Key, typename Value>
 class InMemIterator : public SortIteratorInterface<Key, Value> {
 public:
@@ -132,6 +123,9 @@ public:
     template <typename Container>
     InMemIterator(const Container& input) : _data(input.begin(), input.end()) {}
 
+    void openSource() {}
+    void closeSource() {}
+
     bool more() {
         return !_data.empty();
     }
@@ -145,7 +139,15 @@ private:
     std::deque<Data> _data;
 };
 
-/** Returns results in order from a single file */
+/**
+ * Returns results from a sorted range within a file. Each instance is given a file name and start
+ * and end offsets.
+ *
+ * This class is NOT responsible for file clean up / deletion. There are openSource() and
+ * closeSource() functions to ensure the FileIterator is not holding the file open when the file is
+ * deleted. Since it is one among many FileIterators, it cannot close a file that may still be in
+ * use elsewhere.
+ */
 template <typename Key, typename Value>
 class FileIterator : public SortIteratorInterface<Key, Value> {
 public:
@@ -155,48 +157,78 @@ public:
     typedef std::pair<Key, Value> Data;
 
     FileIterator(const std::string& fileName,
-                 const Settings& settings,
-                 std::shared_ptr<FileDeleter> fileDeleter)
+                 unsigned int fileStartOffset,
+                 unsigned int fileEndOffset,
+                 const Settings& settings)
         : _settings(settings),
           _done(false),
           _fileName(fileName),
-          _fileDeleter(fileDeleter),
-          _file(_fileName.c_str(), std::ios::in | std::ios::binary) {
-        massert(16814,
+          _fileStartOffset(fileStartOffset),
+          _fileEndOffset(fileEndOffset) {
+        uassert(16815,
+                str::stream() << "unexpected empty file: " << _fileName,
+                boost::filesystem::file_size(_fileName) != 0);
+    }
+
+    void openSource() {
+        _file.open(_fileName.c_str(), std::ios::in | std::ios::binary);
+        uassert(16814,
                 str::stream() << "error opening file \"" << _fileName << "\": "
                               << myErrnoWithDescription(),
                 _file.good());
+        _file.seekg(_fileStartOffset);
+        uassert(50979,
+                str::stream() << "error seeking starting offset of '" << _fileStartOffset
+                              << "' in file \""
+                              << _fileName
+                              << "\": "
+                              << myErrnoWithDescription(),
+                _file.good());
+    }
 
-        massert(16815,
-                str::stream() << "unexpected empty file: " << _fileName,
-                boost::filesystem::file_size(_fileName) != 0);
+    void closeSource() {
+        _file.close();
+        uassert(50969,
+                str::stream() << "error closing file \"" << _fileName << "\": "
+                              << myErrnoWithDescription(),
+                !_file.fail());
     }
 
     bool more() {
         if (!_done)
-            fillIfNeeded();  // may change _done
+            fillBufferIfNeeded();  // may change _done
         return !_done;
     }
 
     Data next() {
         verify(!_done);
-        fillIfNeeded();
-
-        // Note: key must be read before value so can't pass directly to Data constructor
-        auto first = Key::deserializeForSorter(*_reader, _settings.first);
-        auto second = Value::deserializeForSorter(*_reader, _settings.second);
+        fillBufferIfNeeded();
+
+        // Note: calling read() on the _bufferReader buffer in the deserialize function advances the
+        // buffer. Since Key comes before Value in the _bufferReader, and C++ makes no function
+        // parameter evaluation order guarantees, we cannot deserialize Key and Value straight into
+        // the Data constructor
+        auto first = Key::deserializeForSorter(*_bufferReader, _settings.first);
+        auto second = Value::deserializeForSorter(*_bufferReader, _settings.second);
         return Data(std::move(first), std::move(second));
     }
 
 private:
-    void fillIfNeeded() {
+    /**
+     * Attempts to refill the _bufferReader if it is empty. Expects _done to be false.
+     */
+    void fillBufferIfNeeded() {
         verify(!_done);
 
-        if (!_reader || _reader->atEof())
-            fill();
+        if (!_bufferReader || _bufferReader->atEof())
+            fillBufferFromDisk();
     }
 
-    void fill() {
+    /**
+     * Tries to read from disk and places any results in _bufferReader. If there is no more data to
+     * read, then _done is set to true and the function returns immediately.
+     */
+    void fillBufferFromDisk() {
         int32_t rawSize;
         read(&rawSize, sizeof(rawSize));
         if (_done)
@@ -208,7 +240,7 @@ private:
 
         _buffer.reset(new char[blockSize]);
         read(_buffer.get(), blockSize);
-        massert(16816, "file too short?", !_done);
+        uassert(16816, "file too short?", !_done);
 
         auto encryptionHooks = EncryptionHooks::get(getGlobalServiceContext());
         if (encryptionHooks->enabled()) {
@@ -220,7 +252,7 @@ private:
                                                   reinterpret_cast<uint8_t*>(out.get()),
                                                   blockSize,
                                                   &outLen);
-            massert(28841,
+            uassert(28841,
                     str::stream() << "Failed to unprotect data: " << status.toString(),
                     status.isOK());
             blockSize = outLen;
@@ -228,70 +260,85 @@ private:
         }
 
         if (!compressed) {
-            _reader.reset(new BufReader(_buffer.get(), blockSize));
+            _bufferReader.reset(new BufReader(_buffer.get(), blockSize));
             return;
         }
 
         dassert(snappy::IsValidCompressedBuffer(_buffer.get(), blockSize));
 
         size_t uncompressedSize;
-        massert(17061,
+        uassert(17061,
                 "couldn't get uncompressed length",
                 snappy::GetUncompressedLength(_buffer.get(), blockSize, &uncompressedSize));
 
         std::unique_ptr<char[]> decompressionBuffer(new char[uncompressedSize]);
-        massert(17062,
+        uassert(17062,
                 "decompression failed",
                 snappy::RawUncompress(_buffer.get(), blockSize, decompressionBuffer.get()));
 
         // hold on to decompressed data and throw out compressed data at block exit
         _buffer.swap(decompressionBuffer);
-        _reader.reset(new BufReader(_buffer.get(), uncompressedSize));
+        _bufferReader.reset(new BufReader(_buffer.get(), uncompressedSize));
     }
 
-    // sets _done to true on EOF - asserts on any other error
+    /**
+     * Attempts to read data from disk. Sets _done to true when file offset reaches _fileEndOffset.
+     *
+     * Masserts on any file errors
+     */
     void read(void* out, size_t size) {
-        _file.read(reinterpret_cast<char*>(out), size);
-        if (!_file.good()) {
-            if (_file.eof()) {
-                _done = true;
-                return;
-            }
+        invariant(_file.is_open());
 
-            msgasserted(16817,
-                        str::stream() << "error reading file \"" << _fileName << "\": "
-                                      << myErrnoWithDescription());
+        if (static_cast<unsigned int>(_file.tellg()) >= _fileEndOffset) {
+            invariant(static_cast<unsigned int>(_file.tellg()) == _fileEndOffset);
+            _done = true;
+            return;
         }
+
+        _file.read(reinterpret_cast<char*>(out), size);
+        uassert(16817,
+                str::stream() << "error reading file \"" << _fileName << "\": "
+                              << myErrnoWithDescription(),
+                _file.good());
         verify(_file.gcount() == static_cast<std::streamsize>(size));
     }
 
     const Settings _settings;
     bool _done;
     std::unique_ptr<char[]> _buffer;
-    std::unique_ptr<BufReader> _reader;
-    std::string _fileName;
-    std::shared_ptr<FileDeleter> _fileDeleter;  // Must outlive _file
+    std::unique_ptr<BufReader> _bufferReader;
+    std::string _fileName;          // File containing the sorted data range.
+    unsigned int _fileStartOffset;  // File offset at which the sorted data range starts.
+    unsigned int _fileEndOffset;    // File offset at which the sorted data range ends.
     std::ifstream _file;
 };
 
-/** Merge-sorts results from 0 or more FileIterators */
+/**
+ * Merge-sorts results from 0 or more FileIterators, all of which should be iterating over sorted
+ * ranges within the same file. This class is given the data source file name upon construction and
+ * is responsible for deleting the data source file upon destruction.
+ */
 template <typename Key, typename Value, typename Comparator>
 class MergeIterator : public SortIteratorInterface<Key, Value> {
 public:
     typedef SortIteratorInterface<Key, Value> Input;
     typedef std::pair<Key, Value> Data;
 
-
     MergeIterator(const std::vector<std::shared_ptr<Input>>& iters,
+                  const std::string& itersSourceFileName,
                   const SortOptions& opts,
                   const Comparator& comp)
         : _opts(opts),
           _remaining(opts.limit ? opts.limit : std::numeric_limits<unsigned long long>::max()),
           _first(true),
-          _greater(comp) {
+          _greater(comp),
+          _itersSourceFileName(itersSourceFileName) {
         for (size_t i = 0; i < iters.size(); i++) {
+            iters[i]->openSource();
             if (iters[i]->more()) {
                 _heap.push_back(std::make_shared<Stream>(i, iters[i]->next(), iters[i]));
+            } else {
+                iters[i]->closeSource();
             }
         }
 
@@ -306,16 +353,22 @@ public:
         _heap.pop_back();
     }
 
+    ~MergeIterator() {
+        // Clear the remaining Stream objects first, to close the file handles before deleting the
+        // file. Some systems will error closing the file if any file handles are still open.
+        _current.reset();
+        _heap.clear();
+        DESTRUCTOR_GUARD(boost::filesystem::remove(_itersSourceFileName));
+    }
+
+    void openSource() {}
+    void closeSource() {}
+
     bool more() {
         if (_remaining > 0 && (_first || !_heap.empty() || _current->more()))
             return true;
 
-        // We are done so clean up resources.
-        // Can't do this in next() due to lifetime guarantees of unowned Data.
-        _heap.clear();
-        _current.reset();
         _remaining = 0;
-
         return false;
     }
 
@@ -331,7 +384,6 @@ public:
 
         if (!_current->advance()) {
             verify(!_heap.empty());
-
             std::pop_heap(_heap.begin(), _heap.end(), _greater);
             _current = _heap.back();
             _heap.pop_back();
@@ -346,11 +398,22 @@ public:
 
 
 private:
-    class Stream {  // Data + Iterator
+    /**
+     * Data iterator over an Input stream.
+     *
+     * This class is responsible for closing the Input source upon destruction, unfortunately,
+     * because that is the path of least resistence to a design change requiring MergeIterator to
+     * handle eventual deletion of said Input source.
+     */
+    class Stream {
     public:
         Stream(size_t fileNum, const Data& first, std::shared_ptr<Input> rest)
             : fileNum(fileNum), _current(first), _rest(rest) {}
 
+        ~Stream() {
+            _rest->closeSource();
+        }
+
         const Data& current() const {
             return _current;
         }
@@ -396,6 +459,7 @@ private:
     std::shared_ptr<Stream> _current;
     std::vector<std::shared_ptr<Stream>> _heap;  // MinHeap
     STLComparator _greater;                      // named so calls make sense
+    std::string _itersSourceFileName;
 };
 
 template <typename Key, typename Value, typename Comparator>
@@ -412,9 +476,22 @@ public:
                   const Settings& settings = Settings())
         : _comp(comp), _settings(settings), _opts(opts), _memUsed(0) {
         verify(_opts.limit == 0);
+        if (_opts.extSortAllowed) {
+            _fileName = _opts.tempDir + "/" + nextFileName();
+        }
+    }
+
+    ~NoLimitSorter() {
+        if (!_done) {
+            // If done() was never called to return a MergeIterator, then this Sorter still owns
+            // file deletion.
+            DESTRUCTOR_GUARD(boost::filesystem::remove(_fileName));
+        }
     }
 
     void add(const Key& key, const Value& val) {
+        invariant(!_done);
+
         _data.push_back(std::make_pair(key, val));
 
         _memUsed += key.memUsageForSorter();
@@ -425,21 +502,17 @@ public:
     }
 
     Iterator* done() {
+        invariant(!_done);
+
         if (_iters.empty()) {
             sort();
             return new InMemIterator<Key, Value>(_data);
         }
 
         spill();
-        return Iterator::merge(_iters, _opts, _comp);
-    }
-
-    // TEMP these are here for compatibility. Will be replaced with a general stats API
-    int numFiles() const {
-        return _iters.size();
-    }
-    size_t memUsed() const {
-        return _memUsed;
+        Iterator* mergeIt = Iterator::merge(_iters, _fileName, _opts, _comp);
+        _done = true;
+        return mergeIt;
     }
 
 private:
@@ -465,6 +538,9 @@ private:
     }
 
     void spill() {
+        invariant(!_done);
+
+        this->_usedDisk = true;
         if (_data.empty())
             return;
 
@@ -483,12 +559,15 @@ private:
 
         sort();
 
-        SortedFileWriter<Key, Value> writer(_opts, _settings);
+        SortedFileWriter<Key, Value> writer(
+            _opts, _fileName, _nextSortedFileWriterOffset, _settings);
         for (; !_data.empty(); _data.pop_front()) {
             writer.addAlreadySorted(_data.front().first, _data.front().second);
         }
+        Iterator* iteratorPtr = writer.done();
+        _nextSortedFileWriterOffset = writer.getFileEndOffset();
 
-        _iters.push_back(std::shared_ptr<Iterator>(writer.done()));
+        _iters.push_back(std::shared_ptr<Iterator>(iteratorPtr));
 
         _memUsed = 0;
     }
@@ -496,6 +575,9 @@ private:
     const Comparator _comp;
     const Settings _settings;
     SortOptions _opts;
+    std::string _fileName;
+    unsigned int _nextSortedFileWriterOffset = 0;
+    bool _done = false;
     size_t _memUsed;
     std::deque<Data> _data;                         // the "current" data
     std::vector<std::shared_ptr<Iterator>> _iters;  // data that has already been spilled
@@ -536,14 +618,6 @@ public:
         }
     }
 
-    // TEMP these are here for compatibility. Will be replaced with a general stats API
-    int numFiles() const {
-        return 0;
-    }
-    size_t memUsed() const {
-        return _best.first.memUsageForSorter() + _best.second.memUsageForSorter();
-    }
-
 private:
     const Comparator _comp;
     Data _best;
@@ -572,6 +646,10 @@ public:
         // This also *works* with limit==1 but LimitOneSorter should be used instead
         verify(_opts.limit > 1);
 
+        if (_opts.extSortAllowed) {
+            _fileName = _opts.tempDir + "/" + nextFileName();
+        }
+
         // Preallocate a fixed sized vector of the required size if we
         // don't expect it to have a major impact on our memory budget.
         // This is the common case with small limits.
@@ -580,7 +658,17 @@ public:
         }
     }
 
+    ~TopKSorter() {
+        if (!_done) {
+            // If done() was never called to return a MergeIterator, then this Sorter still owns
+            // file deletion.
+            DESTRUCTOR_GUARD(boost::filesystem::remove(_fileName));
+        }
+    }
+
     void add(const Key& key, const Value& val) {
+        invariant(!_done);
+
         STLComparator less(_comp);
         Data contender(key, val);
 
@@ -630,15 +718,9 @@ public:
         }
 
         spill();
-        return Iterator::merge(_iters, _opts, _comp);
-    }
-
-    // TEMP these are here for compatibility. Will be replaced with a general stats API
-    int numFiles() const {
-        return _iters.size();
-    }
-    size_t memUsed() const {
-        return _memUsed;
+        Iterator* iterator = Iterator::merge(_iters, _fileName, _opts, _comp);
+        _done = true;
+        return iterator;
     }
 
 private:
@@ -743,6 +825,9 @@ private:
     }
 
     void spill() {
+        invariant(!_done);
+
+        this->_usedDisk = true;
         if (_data.empty())
             return;
 
@@ -765,7 +850,8 @@ private:
         sort();
         updateCutoff();
 
-        SortedFileWriter<Key, Value> writer(_opts, _settings);
+        SortedFileWriter<Key, Value> writer(
+            _opts, _fileName, _nextSortedFileWriterOffset, _settings);
         for (size_t i = 0; i < _data.size(); i++) {
             writer.addAlreadySorted(_data[i].first, _data[i].second);
         }
@@ -773,7 +859,9 @@ private:
         // clear _data and release backing array's memory
         std::vector<Data>().swap(_data);
 
-        _iters.push_back(std::shared_ptr<Iterator>(writer.done()));
+        Iterator* iteratorPtr = writer.done();
+        _nextSortedFileWriterOffset = writer.getFileEndOffset();
+        _iters.push_back(std::shared_ptr<Iterator>(iteratorPtr));
 
         _memUsed = 0;
     }
@@ -781,6 +869,9 @@ private:
     const Comparator _comp;
     const Settings _settings;
     SortOptions _opts;
+    std::string _fileName;
+    unsigned int _nextSortedFileWriterOffset = 0;
+    bool _done = false;
     size_t _memUsed;
     std::vector<Data> _data;  // the "current" data. Organized as max-heap if size == limit.
     std::vector<std::shared_ptr<Iterator>> _iters;  // data that has already been spilled
@@ -794,46 +885,44 @@ private:
     size_t _medianCount;  // Number of docs better or equal to _lastMedian kept so far.
 };
 
-inline unsigned nextFileNumber() {
-    // This is unified across all Sorter types and instances.
-    static AtomicUInt32 fileCounter;
-    return fileCounter.fetchAndAdd(1);
-}
 }  // namespace sorter
 
 //
 // SortedFileWriter
 //
 
-
 template <typename Key, typename Value>
-SortedFileWriter<Key, Value>::SortedFileWriter(const SortOptions& opts, const Settings& settings)
+SortedFileWriter<Key, Value>::SortedFileWriter(const SortOptions& opts,
+                                               const std::string& fileName,
+                                               const unsigned int fileStartOffset,
+                                               const Settings& settings)
     : _settings(settings) {
     namespace str = mongoutils::str;
 
     // This should be checked by consumers, but if we get here don't allow writes.
-    massert(
+    uassert(
         16946, "Attempting to use external sort from mongos. This is not allowed.", !isMongos());
 
-    massert(17148,
+    uassert(17148,
             "Attempting to use external sort without setting SortOptions::tempDir",
             !opts.tempDir.empty());
 
-    {
-        StringBuilder sb;
-        sb << opts.tempDir << "/extsort." << sorter::nextFileNumber();
-        _fileName = sb.str();
-    }
-
     boost::filesystem::create_directories(opts.tempDir);
 
-    _file.open(_fileName.c_str(), std::ios::binary | std::ios::out);
-    massert(16818,
+    _fileName = fileName;
+
+    // We open the provided file in append mode so that SortedFileWriter instances can share the
+    // same file, used serially. We want to share files in order to stay below system open file
+    // limits.
+    _file.open(_fileName.c_str(), std::ios::binary | std::ios::app | std::ios::out);
+    uassert(16818,
             str::stream() << "error opening file \"" << _fileName << "\": "
                           << sorter::myErrnoWithDescription(),
             _file.good());
-
-    _fileDeleter = std::make_shared<sorter::FileDeleter>(_fileName);
+    // The file descriptor is positioned at the end of a file when opened in append mode, but
+    // _file.tellp() is not initialized on all systems to reflect this. Therefore, we must also pass
+    // in the expected offset to this constructor.
+    _fileStartOffset = fileStartOffset;
 
     // throw on failure
     _file.exceptions(std::ios::failbit | std::ios::badbit | std::ios::eofbit);
@@ -879,7 +968,7 @@ void SortedFileWriter<Key, Value>::spill() {
                                                         reinterpret_cast<uint8_t*>(out.get()),
                                                         protectedSizeMax,
                                                         &resultLen);
-        massert(28842,
+        uassert(28842,
                 str::stream() << "Failed to compress data: " << status.toString(),
                 status.isOK());
         outBuffer = out.get();
@@ -891,7 +980,6 @@ void SortedFileWriter<Key, Value>::spill() {
     try {
         _file.write(reinterpret_cast<const char*>(&size), sizeof(size));
         _file.write(outBuffer, std::abs(size));
-
     } catch (const std::exception&) {
         msgasserted(16821,
                     str::stream() << "error writing to file \"" << _fileName << "\": "
@@ -904,8 +992,22 @@ void SortedFileWriter<Key, Value>::spill() {
 template <typename Key, typename Value>
 SortIteratorInterface<Key, Value>* SortedFileWriter<Key, Value>::done() {
     spill();
+    long currentFileOffset = _file.tellp();
+    uassert(50980,
+            str::stream() << "error fetching current file descriptor offset in file \"" << _fileName
+                          << "\": "
+                          << sorter::myErrnoWithDescription(),
+            currentFileOffset >= 0);
+
+    // In case nothing was written to disk, use _fileStartOffset because tellp() may not be
+    // initialized on all systems upon opening the file.
+    _fileEndOffset = static_cast<unsigned int>(currentFileOffset) < _fileStartOffset
+        ? _fileStartOffset
+        : static_cast<unsigned int>(currentFileOffset);
     _file.close();
-    return new sorter::FileIterator<Key, Value>(_fileName, _settings, _fileDeleter);
+
+    return new sorter::FileIterator<Key, Value>(
+        _fileName, _fileStartOffset, _fileEndOffset, _settings);
 }
 
 //
@@ -916,9 +1018,10 @@ template <typename Key, typename Value>
 template <typename Comparator>
 SortIteratorInterface<Key, Value>* SortIteratorInterface<Key, Value>::merge(
     const std::vector<std::shared_ptr<SortIteratorInterface>>& iters,
+    const std::string& fileName,
     const SortOptions& opts,
     const Comparator& comp) {
-    return new sorter::MergeIterator<Key, Value, Comparator>(iters, opts, comp);
+    return new sorter::MergeIterator<Key, Value, Comparator>(iters, fileName, opts, comp);
 }
 
 template <typename Key, typename Value>
@@ -927,11 +1030,11 @@ Sorter<Key, Value>* Sorter<Key, Value>::make(const SortOptions& opts,
                                              const Comparator& comp,
                                              const Settings& settings) {
     // This should be checked by consumers, but if it isn't try to fail early.
-    massert(16947,
+    uassert(16947,
             "Attempting to use external sort from mongos. This is not allowed.",
             !(isMongos() && opts.extSortAllowed));
 
-    massert(17149,
+    uassert(17149,
             "Attempting to use external sort without setting SortOptions::tempDir",
             !(opts.extSortAllowed && opts.tempDir.empty()));
 
diff --git a/src/mongo/db/sorter/sorter.h b/src/mongo/db/sorter/sorter.h
index afbd459e057..0a2f3580f6c 100644
--- a/src/mongo/db/sorter/sorter.h
+++ b/src/mongo/db/sorter/sorter.h
@@ -86,24 +86,28 @@
  */
 
 namespace mongo {
-namespace sorter {
-// Everything in this namespace is internal to the sorter
-class FileDeleter;
-}
 
 /**
  * Runtime options that control the Sorter's behavior
  */
 struct SortOptions {
-    unsigned long long limit;    /// number of KV pairs to be returned. 0 for no limit.
-    size_t maxMemoryUsageBytes;  /// Approximate.
-    bool extSortAllowed;         /// If false, uassert if more mem needed than allowed.
-    std::string tempDir;         /// Directory to directly place files in.
-                                 /// Must be explicitly set if extSortAllowed is true.
+    // The number of KV pairs to be returned. 0 indicates no limit.
+    unsigned long long limit;
+
+    // When in-memory memory usage exceeds this value, we try to spill to disk. This is approximate.
+    size_t maxMemoryUsageBytes;
+
+    // Whether we are allowed to spill to disk. If this is false and in-memory exceeds
+    // maxMemoryUsageBytes, we will uassert.
+    bool extSortAllowed;
+
+    // Directory into which we place a file when spilling to disk. Must be explicitly set if
+    // extSortAllowed is true.
+    std::string tempDir;
 
     SortOptions() : limit(0), maxMemoryUsageBytes(64 * 1024 * 1024), extSortAllowed(false) {}
 
-    /// Fluent API to support expressions like SortOptions().Limit(1000).ExtSortAllowed(true)
+    // Fluent API to support expressions like SortOptions().Limit(1000).ExtSortAllowed(true)
 
     SortOptions& Limit(unsigned long long newLimit) {
         limit = newLimit;
@@ -126,7 +130,9 @@ struct SortOptions {
     }
 };
 
-/// This is the output from the sorting framework
+/**
+ * This is the sorted output iterator from the sorting framework.
+ */
 template <typename Key, typename Value>
 class SortIteratorInterface {
     MONGO_DISALLOW_COPYING(SortIteratorInterface);
@@ -141,18 +147,37 @@ public:
 
     virtual ~SortIteratorInterface() {}
 
-    /// Returns an iterator that merges the passed in iterators
+    // Returns an iterator that merges the passed in iterators
     template <typename Comparator>
     static SortIteratorInterface* merge(
         const std::vector<std::shared_ptr<SortIteratorInterface>>& iters,
+        const std::string& fileName,
         const SortOptions& opts,
         const Comparator& comp);
 
+    // Opens and closes the source of data over which this class iterates, if applicable.
+    virtual void openSource() = 0;
+    virtual void closeSource() = 0;
+
 protected:
     SortIteratorInterface() {}  // can only be constructed as a base
 };
 
-/// This is the main way to input data to the sorting framework
+/**
+ * This is the way to input data to the sorting framework.
+ *
+ * Each instance of this class will generate a file name and spill sorted data ranges to that file
+ * if allowed in its given Settings. If the instance destructs before done() is called, it will
+ * handle deleting the data file used for spills. Otherwise, if done() is called, responsibility for
+ * file deletion moves to the returned Iterator object, which must then delete the file upon its own
+ * destruction.
+ *
+ * All users of Sorter implementations must define their own nextFileName() function to generate
+ * unique file names for spills to disk. This is necessary because the sorter.cpp file is separately
+ * directly included in multiple places, rather than compiled in one place and linked, and so cannot
+ * itself provide a globally unique ID for file names. See existing function implementations of
+ * nextFileName() for example.
+ */
 template <typename Key, typename Value>
 class Sorter {
     MONGO_DISALLOW_COPYING(Sorter);
@@ -170,19 +195,28 @@ public:
                         const Settings& settings = Settings());
 
     virtual void add(const Key&, const Value&) = 0;
-    virtual Iterator* done() = 0;  /// Can't add more data after calling done()
+
+    /**
+     * Cannot add more data after calling done().
+     *
+     * The returned Iterator must not outlive the Sorter instance, which manages file clean up.
+     */
+    virtual Iterator* done() = 0;
 
     virtual ~Sorter() {}
 
-    // TEMP these are here for compatibility. Will be replaced with a general stats API
-    virtual int numFiles() const = 0;
-    virtual size_t memUsed() const = 0;
+    bool usedDisk() {
+        return _usedDisk;
+    }
 
 protected:
     Sorter() {}  // can only be constructed as a base
 };
 
-/// Writes pre-sorted data to a sorted file and hands-back an Iterator over that file.
+/**
+ * Appends a pre-sorted range of data to a given file and hands back an Iterator over that file
+ * range.
+ */
 template <typename Key, typename Value>
 class SortedFileWriter {
     MONGO_DISALLOW_COPYING(SortedFileWriter);
@@ -193,19 +227,42 @@ public:
                       typename Value::SorterDeserializeSettings>
         Settings;
 
-    explicit SortedFileWriter(const SortOptions& opts, const Settings& settings = Settings());
+    explicit SortedFileWriter(const SortOptions& opts,
+                              const std::string& fileName,
+                              const unsigned int fileStartOffset,
+                              const Settings& settings = Settings());
 
     void addAlreadySorted(const Key&, const Value&);
-    Iterator* done();  /// Can't add more data after calling done()
+
+    /**
+     * Spills any data remaining in the buffer to disk and then closes the file to which data was
+     * written.
+     *
+     * No more data can be added via addAlreadySorted() after calling done().
+     */
+    Iterator* done();
+
+    /**
+     * Only call this after done() has been called to set the end offset.
+     */
+    unsigned int getFileEndOffset() {
+        invariant(!_file.is_open());
+        return _fileEndOffset;
+    }
 
 private:
     void spill();
 
     const Settings _settings;
     std::string _fileName;
-    std::shared_ptr<sorter::FileDeleter> _fileDeleter;  // Must outlive _file
     std::ofstream _file;
     BufBuilder _buffer;
+
+    // Tracks where in the file we started and finished writing the sorted data range so that the
+    // information can be given to the Iterator in done(), and to the user via getFileEndOffset()
+    // for the next SortedFileWriter instance using the same file.
+    unsigned int _fileStartOffset;
+    unsigned int _fileEndOffset;
 };
 }
 
@@ -229,6 +286,7 @@ private:
     template ::mongo::SortIteratorInterface<Key, Value>* ::mongo::                       \
         SortIteratorInterface<Key, Value>::merge<Comparator>(                            \
             const std::vector<std::shared_ptr<SortIteratorInterface>>& iters,            \
+            const std::string& fileName,                                                 \
             const SortOptions& opts,                                                     \
             const Comparator& comp);                                                     \
     template ::mongo::Sorter<Key, Value>* ::mongo::Sorter<Key, Value>::make<Comparator>( \
diff --git a/src/mongo/db/sorter/sorter_test.cpp b/src/mongo/db/sorter/sorter_test.cpp
index a37311c58e1..fe34e993edd 100644
--- a/src/mongo/db/sorter/sorter_test.cpp
+++ b/src/mongo/db/sorter/sorter_test.cpp
@@ -44,12 +44,31 @@
 #include "mongo/unittest/unittest.h"
 #include "mongo/util/mongoutils/str.h"
 
+#include <memory>
+
+namespace mongo {
+
+/**
+ * Generates a new file name on each call using a static, atomic and monotonically increasing
+ * number.
+ *
+ * Each user of the Sorter must implement this function to ensure that all temporary files that the
+ * Sorter instances produce are uniquely identified using a unique file name extension with separate
+ * atomic variable. This is necessary because the sorter.cpp code is separately included in multiple
+ * places, rather than compiled in one place and linked, and so cannot provide a globally unique ID.
+ */
+std::string nextFileName() {
+    static AtomicUInt32 sorterTestFileCounter;
+    return "extsort-sorter-test." + std::to_string(sorterTestFileCounter.fetchAndAdd(1));
+}
+
+}  // namespace mongo
+
 // Need access to internal classes
 #include "mongo/db/sorter/sorter.cpp"
 
-#include <memory>
-
 namespace mongo {
+
 using namespace mongo::sorter;
 using std::make_shared;
 using std::pair;
@@ -108,6 +127,8 @@ class IntIterator : public IWIterator {
 public:
     IntIterator(int start = 0, int stop = INT_MAX, int increment = 1)
         : _current(start), _increment(increment), _stop(stop) {}
+    void openSource() {}
+    void closeSource() {}
     bool more() {
         if (_increment == 0)
             return true;
@@ -129,6 +150,8 @@ private:
 
 class EmptyIterator : public IWIterator {
 public:
+    void openSource() {}
+    void closeSource() {}
     bool more() {
         return false;
     }
@@ -144,6 +167,9 @@ public:
         verify(limit > 0);
     }
 
+    void openSource() {}
+    void closeSource() {}
+
     bool more() {
         return _remaining && _source->more();
     }
@@ -162,6 +188,8 @@ template <typename It1, typename It2>
 void _assertIteratorsEquivalent(It1 it1, It2 it2, int line) {
     int iteration;
     try {
+        it1->openSource();
+        it2->openSource();
         for (iteration = 0; true; iteration++) {
             ASSERT_EQUALS(it1->more(), it2->more());
             ASSERT_EQUALS(it1->more(), it2->more());  // make sure more() is safe to call twice
@@ -173,10 +201,13 @@ void _assertIteratorsEquivalent(It1 it1, It2 it2, int line) {
             ASSERT_EQUALS(pair1.first, pair2.first);
             ASSERT_EQUALS(pair1.second, pair2.second);
         }
-
+        it1->closeSource();
+        it2->closeSource();
     } catch (...) {
         mongo::unittest::log() << "Failure from line " << line << " on iteration " << iteration
                                << std::endl;
+        it1->closeSource();
+        it2->closeSource();
         throw;
     }
 }
@@ -194,10 +225,11 @@ template <typename IteratorPtr, int N>
 std::shared_ptr<IWIterator> mergeIterators(IteratorPtr (&array)[N],
                                            Direction Dir = ASC,
                                            const SortOptions& opts = SortOptions()) {
+    invariant(!opts.extSortAllowed);
     std::vector<std::shared_ptr<IWIterator>> vec;
     for (int i = 0; i < N; i++)
         vec.push_back(std::shared_ptr<IWIterator>(array[i]));
-    return std::shared_ptr<IWIterator>(IWIterator::merge(vec, opts, IWComparator(Dir)));
+    return std::shared_ptr<IWIterator>(IWIterator::merge(vec, "", opts, IWComparator(Dir)));
 }
 
 //
@@ -224,6 +256,8 @@ public:
             class UnsortedIter : public IWIterator {
             public:
                 UnsortedIter() : _pos(0) {}
+                void openSource() {}
+                void closeSource() {}
                 bool more() {
                     return _pos < sizeof(unsorted) / sizeof(unsorted[0]);
                 }
@@ -247,7 +281,8 @@ public:
         unittest::TempDir tempDir("sortedFileWriterTests");
         const SortOptions opts = SortOptions().TempDir(tempDir.path());
         {  // small
-            SortedFileWriter<IntWrapper, IntWrapper> sorter(opts);
+            std::string fileName = opts.tempDir + "/" + nextFileName();
+            SortedFileWriter<IntWrapper, IntWrapper> sorter(opts, fileName, 0);
             sorter.addAlreadySorted(0, 0);
             sorter.addAlreadySorted(1, -1);
             sorter.addAlreadySorted(2, -2);
@@ -255,14 +290,19 @@ public:
             sorter.addAlreadySorted(4, -4);
             ASSERT_ITERATORS_EQUIVALENT(std::shared_ptr<IWIterator>(sorter.done()),
                                         make_shared<IntIterator>(0, 5));
+
+            ASSERT_TRUE(boost::filesystem::remove(fileName));
         }
         {  // big
-            SortedFileWriter<IntWrapper, IntWrapper> sorter(opts);
+            std::string fileName = opts.tempDir + "/" + nextFileName();
+            SortedFileWriter<IntWrapper, IntWrapper> sorter(opts, fileName, 0);
             for (int i = 0; i < 10 * 1000 * 1000; i++)
                 sorter.addAlreadySorted(i, -i);
 
             ASSERT_ITERATORS_EQUIVALENT(std::shared_ptr<IWIterator>(sorter.done()),
                                         make_shared<IntIterator>(0, 10 * 1000 * 1000));
+
+            ASSERT_TRUE(boost::filesystem::remove(fileName));
         }
 
         ASSERT(boost::filesystem::is_empty(tempDir.path()));
@@ -276,7 +316,7 @@ public:
         {  // test empty (no inputs)
             std::vector<std::shared_ptr<IWIterator>> vec;
             std::shared_ptr<IWIterator> mergeIter(
-                IWIterator::merge(vec, SortOptions(), IWComparator()));
+                IWIterator::merge(vec, "", SortOptions(), IWComparator()));
             ASSERT_ITERATORS_EQUIVALENT(mergeIter, make_shared<EmptyIterator>());
         }
         {  // test empty (only empty inputs)
@@ -488,12 +528,6 @@ public:
     void addData(unowned_ptr<IWSorter> sorter) {
         for (int i = 0; i < NUM_ITEMS; i++)
             sorter->add(_array[i], -_array[i]);
-
-        if (typeid(*this) == typeid(LotsOfDataLittleMemory)) {
-            // don't do this check in subclasses since they may set a limit
-            ASSERT_GREATER_THAN_OR_EQUALS(static_cast<size_t>(sorter->numFiles()),
-                                          (NUM_ITEMS * sizeof(IWPair)) / MEM_LIMIT);
-        }
     }
 
     virtual std::shared_ptr<IWIterator> correct() {
author	Dianna Hohensee <dianna.hohensee@10gen.com>	2018-09-13 15:09:08 -0400
committer	Eric Milkie <milkie@10gen.com>	2019-02-25 12:48:03 -0500
commit	9491e6fb0ec2b38dd758d8acfe0e0aa11aa7bad9 (patch)
tree	2b7a3854e0009d3a1e9b8a47f1be6ffa546ca55a
parent	c1760b39b29dc412ca1c20361c500e18e90c71cd (diff)
download	mongo-9491e6fb0ec2b38dd758d8acfe0e0aa11aa7bad9.tar.gz