summaryrefslogtreecommitdiff
path: root/src/mongo/db/sorter/sorter.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/sorter/sorter.h')
-rw-r--r--src/mongo/db/sorter/sorter.h100
1 files changed, 76 insertions, 24 deletions
diff --git a/src/mongo/db/sorter/sorter.h b/src/mongo/db/sorter/sorter.h
index 8a43c8f91b7..aed12ffdfe2 100644
--- a/src/mongo/db/sorter/sorter.h
+++ b/src/mongo/db/sorter/sorter.h
@@ -31,6 +31,7 @@
#include <third_party/murmurhash3/MurmurHash3.h>
+#include <boost/filesystem/path.hpp>
#include <deque>
#include <fstream>
#include <memory>
@@ -174,7 +175,6 @@ public:
template <typename Comparator>
static SortIteratorInterface* merge(
const std::vector<std::shared_ptr<SortIteratorInterface>>& iters,
- const std::string& fileName,
const SortOptions& opts,
const Comparator& comp);
@@ -213,30 +213,95 @@ public:
typename Value::SorterDeserializeSettings>
Settings;
+ /**
+ * Represents the file that a Sorter uses to spill to disk. Supports reading after writing (or
+ * reading without any writing), but does not support writing after any reading has been done.
+ */
+ class File {
+ public:
+ File(std::string path) : _path(std::move(path)) {
+ invariant(!_path.empty());
+ }
+
+ ~File();
+
+ const boost::filesystem::path& path() const {
+ return _path;
+ }
+
+ /**
+ * Signals that the on-disk file should not be cleaned up.
+ */
+ void keep() {
+ _keep = true;
+ };
+
+ /**
+ * Reads the requested data from the file. Cannot write more to the file once this has been
+ * called.
+ */
+ void read(std::streamoff offset, std::streamsize size, void* out);
+
+ /**
+ * Writes the given data to the end of the file. Cannot be called after reading.
+ */
+ void write(const char* data, std::streamsize size);
+
+ /**
+ * Returns the current offset of the end of the file. Cannot be called after reading.
+ */
+ std::streamoff currentOffset();
+
+ private:
+ void _open();
+
+ void _ensureOpenForWriting();
+
+ boost::filesystem::path _path;
+ std::fstream _file;
+
+ // The current offset of the end of the file, or -1 if the file either has not yet been
+ // opened or is already being read.
+ std::streamoff _offset = -1;
+
+ // Whether to keep the on-disk file even after this in-memory object has been destructed.
+ bool _keep = false;
+ };
+
+ explicit Sorter(const SortOptions& opts);
+
+ /**
+ * ExtSort-only constructor. fileName is the base name of a file in the temp directory.
+ */
+ Sorter(const SortOptions& opts, const std::string& fileName);
+
template <typename Comparator>
static Sorter* make(const SortOptions& opts,
const Comparator& comp,
const Settings& settings = Settings());
virtual void add(const Key&, const Value&) = 0;
-
/**
* Cannot add more data after calling done().
- *
- * The returned Iterator must not outlive the Sorter instance, which manages file clean up.
*/
virtual Iterator* done() = 0;
virtual ~Sorter() {}
bool usedDisk() const {
- return _usedDisk;
+ return !_iters.empty();
}
protected:
Sorter() {} // can only be constructed as a base
- bool _usedDisk{false}; // Keeps track of whether the sorter used disk or not
+ virtual void spill() = 0;
+
+ SortOptions _opts;
+
+ std::shared_ptr<File> _file;
+
+ std::vector<std::shared_ptr<Iterator>> _iters; // Data that has already been spilled.
};
/**
@@ -255,8 +320,7 @@ public:
Settings;
explicit SortedFileWriter(const SortOptions& opts,
- const std::string& fileName,
- const std::streampos fileStartOffset,
+ std::shared_ptr<typename Sorter<Key, Value>::File> file,
const Settings& settings = Settings());
void addAlreadySorted(const Key&, const Value&);
@@ -269,31 +333,20 @@ public:
*/
Iterator* done();
- /**
- * Only call this after done() has been called to set the end offset.
- */
- std::streampos getFileEndOffset() {
- invariant(!_file.is_open());
- return _fileEndOffset;
- }
-
private:
void spill();
const Settings _settings;
- std::string _fileName;
- std::ofstream _file;
+ std::shared_ptr<typename Sorter<Key, Value>::File> _file;
BufBuilder _buffer;
// Keeps track of the hash of all data objects spilled to disk. Passed to the FileIterator
// to ensure data has not been corrupted after reading from disk.
uint32_t _checksum = 0;
- // Tracks where in the file we started and finished writing the sorted data range so that the
- // information can be given to the Iterator in done(), and to the user via getFileEndOffset()
- // for the next SortedFileWriter instance using the same file.
- std::streampos _fileStartOffset;
- std::streampos _fileEndOffset;
+ // Tracks where in the file we started writing the sorted data range so that the information can
+ // be given to the Iterator in done().
+ std::streamoff _fileStartOffset;
};
} // namespace mongo
@@ -317,7 +370,6 @@ private:
template ::mongo::SortIteratorInterface<Key, Value>* ::mongo:: \
SortIteratorInterface<Key, Value>::merge<Comparator>( \
const std::vector<std::shared_ptr<SortIteratorInterface>>& iters, \
- const std::string& fileName, \
const SortOptions& opts, \
const Comparator& comp); \
template ::mongo::Sorter<Key, Value>* ::mongo::Sorter<Key, Value>::make<Comparator>( \