diff options
Diffstat (limited to 'src/mongo/db')
21 files changed, 414 insertions, 122 deletions
diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript index b2368e49830..992201118d8 100644 --- a/src/mongo/db/exec/SConscript +++ b/src/mongo/db/exec/SConscript @@ -84,6 +84,7 @@ sortExecutorEnv.Library( ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/sorter/sorter_idl', + '$BUILD_DIR/mongo/db/sorter/sorter_stats', ], ) diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript index 859cf09da2f..9805906a59c 100644 --- a/src/mongo/db/exec/sbe/SConscript +++ b/src/mongo/db/exec/sbe/SConscript @@ -61,6 +61,7 @@ sbeEnv.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/bson/dotted_path_support', '$BUILD_DIR/mongo/db/sorter/sorter_idl', + '$BUILD_DIR/mongo/db/sorter/sorter_stats', ], ) @@ -107,6 +108,7 @@ sbeEnv.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/bson/dotted_path_support', '$BUILD_DIR/mongo/db/sorter/sorter_idl', + '$BUILD_DIR/mongo/db/sorter/sorter_stats', 'query_sbe', 'query_sbe_storage', ], diff --git a/src/mongo/db/exec/sbe/stages/sort.cpp b/src/mongo/db/exec/sbe/stages/sort.cpp index 0968b0bea68..6181229ff7e 100644 --- a/src/mongo/db/exec/sbe/stages/sort.cpp +++ b/src/mongo/db/exec/sbe/stages/sort.cpp @@ -215,11 +215,11 @@ void SortStage::open(bool reOpen) { _specificStats.totalDataSizeBytes += _sorter->totalDataSizeSorted(); _mergeIt.reset(_sorter->done()); - _specificStats.spills += _sorter->numSpills(); + _specificStats.spills += _sorter->stats().spilledRanges(); _specificStats.keysSorted += _sorter->numSorted(); auto& metricsCollector = ResourceConsumption::MetricsCollector::get(_opCtx); metricsCollector.incrementKeysSorted(_sorter->numSorted()); - metricsCollector.incrementSorterSpills(_sorter->numSpills()); + metricsCollector.incrementSorterSpills(_sorter->stats().spilledRanges()); _children[0]->close(); } diff --git a/src/mongo/db/exec/sort_executor.h b/src/mongo/db/exec/sort_executor.h index 6509c767e9a..8120f52caa9 100644 --- a/src/mongo/db/exec/sort_executor.h +++ b/src/mongo/db/exec/sort_executor.h @@ -139,7 +139,7 @@ public: } _output.reset(_sorter->done()); _stats.keysSorted += _sorter->numSorted(); - _stats.spills += _sorter->numSpills(); + _stats.spills += _sorter->stats().spilledRanges(); _stats.totalDataSizeBytes += _sorter->totalDataSizeSorted(); _sorter.reset(); } diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript index f71d78da857..8ddebaefe07 100644 --- a/src/mongo/db/index/SConscript +++ b/src/mongo/db/index/SConscript @@ -105,6 +105,7 @@ iamEnv.Library( '$BUILD_DIR/mongo/db/resumable_index_builds_idl', '$BUILD_DIR/mongo/db/service_context', '$BUILD_DIR/mongo/db/sorter/sorter_idl', + '$BUILD_DIR/mongo/db/sorter/sorter_stats', '$BUILD_DIR/mongo/db/storage/encryption_hooks', '$BUILD_DIR/mongo/db/storage/execution_context', '$BUILD_DIR/mongo/db/storage/index_entry_comparison', @@ -128,6 +129,7 @@ iamEnv.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/base', '$BUILD_DIR/mongo/db/sorter/sorter_idl', + '$BUILD_DIR/mongo/db/sorter/sorter_stats', '$BUILD_DIR/mongo/db/storage/encryption_hooks', '$BUILD_DIR/mongo/db/storage/storage_options', '$BUILD_DIR/mongo/s/is_mongos', @@ -188,6 +190,7 @@ indexTestEnv.CppUnitTest( '$BUILD_DIR/mongo/db/query/query_test_service_context', '$BUILD_DIR/mongo/db/query/sort_pattern', '$BUILD_DIR/mongo/db/record_id_helpers', + '$BUILD_DIR/mongo/db/sorter/sorter_stats', 'column_store_index', 'expression_params', 'key_generator', diff --git a/src/mongo/db/index/column_store_sorter.cpp b/src/mongo/db/index/column_store_sorter.cpp index ef82ad9bf13..af2c3c1411f 100644 --- a/src/mongo/db/index/column_store_sorter.cpp +++ b/src/mongo/db/index/column_store_sorter.cpp @@ -76,11 +76,13 @@ ColumnStoreSorter::Value ColumnStoreSorter::Value::deserializeForSorter( ColumnStoreSorter::ColumnStoreSorter(size_t maxMemoryUsageBytes, StringData dbName, - SorterFileStats* stats) - : _dbName(dbName.toString()), - _stats(stats), + SorterFileStats* stats, + SorterTracker* tracker) + : SorterBase(tracker), + _dbName(dbName.toString()), + _fileStats(stats), _maxMemoryUsageBytes(maxMemoryUsageBytes), - _spillFile(std::make_shared<Sorter<Key, Value>::File>(pathForNewSpillFile(), _stats)) {} + _spillFile(std::make_shared<Sorter<Key, Value>::File>(pathForNewSpillFile(), _fileStats)) {} void ColumnStoreSorter::add(PathView path, const RecordId& recordId, CellView cellContents) { auto& cellListAtPath = _dataByPath[path]; @@ -123,9 +125,9 @@ void ColumnStoreSorter::spill() { if (_dataByPath.empty()) { return; } - ++_numSpills; + this->_stats.incrementSpilledRanges(); - SortedFileWriter<Key, Value> writer(makeSortOptions(_dbName, _stats), _spillFile, {}); + SortedFileWriter<Key, Value> writer(makeSortOptions(_dbName, _fileStats), _spillFile, {}); // Cells loaded into memory are sorted by record id but not yet sorted by path. We perform that // sort now, so that we can output cells sorted by (path, rid) for later consumption by our @@ -212,7 +214,7 @@ ColumnStoreSorter::Iterator* ColumnStoreSorter::done() { spill(); return SortIteratorInterface<Key, Value>::merge( - _spilledFileIterators, makeSortOptions(_dbName, _stats), ComparisonForPathAndRid()); + _spilledFileIterators, makeSortOptions(_dbName, _fileStats), ComparisonForPathAndRid()); } /** diff --git a/src/mongo/db/index/column_store_sorter.h b/src/mongo/db/index/column_store_sorter.h index 56f203c5f64..9eed5e33538 100644 --- a/src/mongo/db/index/column_store_sorter.h +++ b/src/mongo/db/index/column_store_sorter.h @@ -50,16 +50,15 @@ namespace mongo { * preferable to defer the cost of sorting to the end in order to avoid the cost of a binary tree * traversal for each inserted cell. */ -class ColumnStoreSorter { +class ColumnStoreSorter : public SorterBase { public: - ColumnStoreSorter(size_t maxMemoryUsageBytes, StringData dbName, SorterFileStats* stats); + ColumnStoreSorter(size_t maxMemoryUsageBytes, + StringData dbName, + SorterFileStats* stats, + SorterTracker* tracker = nullptr); void add(PathView path, const RecordId& recordId, CellView cellContents); - size_t numSpills() const { - return _numSpills; - } - struct Key { PathView path; RecordId recordId; @@ -114,11 +113,10 @@ private: Iterator* inMemoryIterator() const; const std::string _dbName; - SorterFileStats* _stats; // Unowned + SorterFileStats* _fileStats; // Unowned const size_t _maxMemoryUsageBytes; size_t _memUsed = 0; - size_t _numSpills = 0; /** * Mapping from path name to the sorted list of (RecordId, Cell) pairs. diff --git a/src/mongo/db/index/column_store_sorter_test.cpp b/src/mongo/db/index/column_store_sorter_test.cpp index 85c1005efad..57534ed4b78 100644 --- a/src/mongo/db/index/column_store_sorter_test.cpp +++ b/src/mongo/db/index/column_store_sorter_test.cpp @@ -69,11 +69,11 @@ TEST(ColumnStoreSorter, SortTest) { // We test two sorters: one that can perform the sort in memory and one that is constrained so // that it must spill to disk. - SorterFileStats statsForInMemorySorter; + SorterFileStats statsForInMemorySorter(nullptr); auto inMemorySorter = std::make_unique<ColumnStoreSorter>( 1000000 /* maxMemoryUsageBytes */, "dbName", &statsForInMemorySorter); - SorterFileStats statsForExternalSorter; + SorterFileStats statsForExternalSorter(nullptr); auto externalSorter = std::make_unique<ColumnStoreSorter>( 500 /* maxMemoryUsageBytes */, "dbName", &statsForExternalSorter); @@ -122,9 +122,9 @@ TEST(ColumnStoreSorter, SortTest) { // Ensure that statistics for spills and file accesses are as expected. // Note: The number of spills in the external sorter depends on the size of C++ data structures, // which can be different between architectures. The test allows a range of reasonable values. - ASSERT_EQ(0, inMemorySorter->numSpills()); - ASSERT_LTE(3, externalSorter->numSpills()); - ASSERT_GTE(5, externalSorter->numSpills()); + ASSERT_EQ(0, inMemorySorter->stats().spilledRanges()); + ASSERT_LTE(3, externalSorter->stats().spilledRanges()); + ASSERT_GTE(5, externalSorter->stats().spilledRanges()); ASSERT_EQ(0, statsForInMemorySorter.opened.load()); ASSERT_EQ(0, statsForInMemorySorter.closed.load()); diff --git a/src/mongo/db/index/columns_access_method.cpp b/src/mongo/db/index/columns_access_method.cpp index fca25a83669..b29ec20967a 100644 --- a/src/mongo/db/index/columns_access_method.cpp +++ b/src/mongo/db/index/columns_access_method.cpp @@ -108,7 +108,8 @@ private: ColumnStoreAccessMethod::BulkBuilder::BulkBuilder(ColumnStoreAccessMethod* index, size_t maxMemoryUsageBytes, StringData dbName) - : _columnsAccess(index), _sorter(maxMemoryUsageBytes, dbName, bulkBuilderFileStats()) { + : _columnsAccess(index), + _sorter(maxMemoryUsageBytes, dbName, bulkBuilderFileStats(), bulkBuilderTracker()) { countNewBuildInStats(); } @@ -116,7 +117,8 @@ ColumnStoreAccessMethod::BulkBuilder::BulkBuilder(ColumnStoreAccessMethod* index size_t maxMemoryUsageBytes, const IndexStateInfo& stateInfo, StringData dbName) - : _columnsAccess(index), _sorter(maxMemoryUsageBytes, dbName, bulkBuilderFileStats()) { + : _columnsAccess(index), + _sorter(maxMemoryUsageBytes, dbName, bulkBuilderFileStats(), bulkBuilderTracker()) { countResumedBuildInStats(); // TODO SERVER-66925: Add this support. tasserted(6548103, "No support for resuming interrupted columnstore index builds."); diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index f9cd8f7c7c2..bd12a9add26 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -99,6 +99,8 @@ public: builder.append("resumed", resumed.loadRelaxed()); builder.append("filesOpenedForExternalSort", sorterFileStats.opened.loadRelaxed()); builder.append("filesClosedForExternalSort", sorterFileStats.closed.loadRelaxed()); + builder.append("spilledRanges", sorterTracker.spilledRanges.loadRelaxed()); + builder.append("bytesSpilled", sorterTracker.bytesSpilled.loadRelaxed()); return builder.obj(); } @@ -109,11 +111,15 @@ public: // This value should not exceed 'count'. AtomicWord<long long> resumed; + // Sorter statistics that are aggregate of all sorters. + SorterTracker sorterTracker; + // Number of times the external sorter opened/closed a file handle to spill data to disk. // This pair of counters in aggregate indicate the number of open file handles used by // the external sorter and may be useful in diagnosing situations where the process is // close to exhausting this finite resource. - SorterFileStats sorterFileStats; + SorterFileStats sorterFileStats = {&sorterTracker}; + } indexBulkBuilderSSS; /** @@ -133,6 +139,7 @@ SortOptions makeSortOptions(size_t maxMemoryUsageBytes, StringData dbName, Sorte .ExtSortAllowed() .MaxMemoryUsageBytes(maxMemoryUsageBytes) .FileStats(stats) + .Tracker(&indexBulkBuilderSSS.sorterTracker) .DBName(dbName.toString()); } @@ -622,6 +629,10 @@ SorterFileStats* IndexAccessMethod::BulkBuilder::bulkBuilderFileStats() { return &indexBulkBuilderSSS.sorterFileStats; } +SorterTracker* IndexAccessMethod::BulkBuilder::bulkBuilderTracker() { + return &indexBulkBuilderSSS.sorterTracker; +} + void IndexAccessMethod::BulkBuilder::yield(OperationContext* opCtx, const Yieldable* yieldable, const NamespaceString& ns) { diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index 5136faed942..7190e31131a 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -225,6 +225,7 @@ public: static void countNewBuildInStats(); static void countResumedBuildInStats(); static SorterFileStats* bulkBuilderFileStats(); + static SorterTracker* bulkBuilderTracker(); /** * Abandon the current snapshot and release then reacquire locks. Tests that target the diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index ee68344f22c..169a1e816bd 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -392,6 +392,7 @@ pipelineEnv.Library( '$BUILD_DIR/mongo/db/query/projection_ast', '$BUILD_DIR/mongo/db/repl/image_collection_entry', '$BUILD_DIR/mongo/db/sorter/sorter_idl', + '$BUILD_DIR/mongo/db/sorter/sorter_stats', '$BUILD_DIR/mongo/db/timeseries/timeseries_conversion_util', '$BUILD_DIR/mongo/db/timeseries/timeseries_options', '$BUILD_DIR/mongo/rpc/command_status', diff --git a/src/mongo/db/pipeline/document_source_bucket_auto.cpp b/src/mongo/db/pipeline/document_source_bucket_auto.cpp index a2d00ad2472..bb16662791f 100644 --- a/src/mongo/db/pipeline/document_source_bucket_auto.cpp +++ b/src/mongo/db/pipeline/document_source_bucket_auto.cpp @@ -221,7 +221,7 @@ void DocumentSourceBucketAuto::initializeBucketIteration() { auto& metricsCollector = ResourceConsumption::MetricsCollector::get(pExpCtx->opCtx); metricsCollector.incrementKeysSorted(_sorter->numSorted()); - metricsCollector.incrementSorterSpills(_sorter->numSpills()); + metricsCollector.incrementSorterSpills(_sorter->stats().spilledRanges()); _sorter.reset(); diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp index 62436bffa84..ce416165e66 100644 --- a/src/mongo/db/pipeline/document_source_sort.cpp +++ b/src/mongo/db/pipeline/document_source_sort.cpp @@ -300,8 +300,8 @@ void DocumentSourceSort::serializeToArray( if (explain >= ExplainOptions::Verbosity::kExecStats) { mutDoc["totalDataSizeSortedBytesEstimate"] = Value(static_cast<long long>(_timeSorter->totalDataSizeBytes())); - mutDoc["usedDisk"] = Value(_timeSorter->numSpills() > 0); - mutDoc["spills"] = Value(static_cast<long long>(_timeSorter->numSpills())); + mutDoc["usedDisk"] = Value(_timeSorter->stats().spilledRanges() > 0); + mutDoc["spills"] = Value(static_cast<long long>(_timeSorter->stats().spilledRanges())); } array.push_back(Value{mutDoc.freeze()}); diff --git a/src/mongo/db/sorter/SConscript b/src/mongo/db/sorter/SConscript index fc796b9319f..7e9be557984 100644 --- a/src/mongo/db/sorter/SConscript +++ b/src/mongo/db/sorter/SConscript @@ -9,6 +9,7 @@ sorterEnv.CppUnitTest( target='db_sorter_test', source=[ 'sorter_test.cpp', + 'sorter_stats_test.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/db/exec/document_value/document_value', @@ -18,9 +19,16 @@ sorterEnv.CppUnitTest( '$BUILD_DIR/mongo/s/is_mongos', '$BUILD_DIR/third_party/shim_snappy', 'sorter_idl', + 'sorter_stats', ], ) +sorterEnv.Library(target='sorter_stats', source=[ + 'sorter_stats.cpp', +], LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/commands/server_status_core', +]) + env.Library( target='sorter_idl', source=[ @@ -28,6 +36,7 @@ env.Library( ], LIBDEPS=[ "$BUILD_DIR/mongo/base", + '$BUILD_DIR/mongo/db/sorter/sorter_stats', '$BUILD_DIR/mongo/idl/idl_parser', ], ) diff --git a/src/mongo/db/sorter/sorter.cpp b/src/mongo/db/sorter/sorter.cpp index ecb9e10118e..ec6a0a0e4cf 100644 --- a/src/mongo/db/sorter/sorter.cpp +++ b/src/mongo/db/sorter/sorter.cpp @@ -597,7 +597,6 @@ protected: * following: * * {1, 2, 3, 4, 5} - * {12, 3, 4, 5} * {12, 34, 5} * {1234, 5} */ @@ -647,7 +646,7 @@ protected: auto iteratorPtr = std::shared_ptr<Iterator>(writer.done()); mergeIterator->closeSource(); mergedIterators.push_back(std::move(iteratorPtr)); - this->_numSpills++; + this->_stats.incrementSpilledRanges(); } LOGV2_DEBUG(6033101, @@ -708,7 +707,7 @@ public: this->_opts.dbName, range.getChecksum()); }); - this->_numSpills = this->_iters.size(); + this->_stats.setSpilledRanges(this->_iters.size()); } void add(const Key& key, const Value& val) { @@ -800,7 +799,7 @@ private: _memUsed = 0; - this->_numSpills++; + this->_stats.incrementSpilledRanges(); } bool _done = false; @@ -817,7 +816,7 @@ public: typedef SortIteratorInterface<Key, Value> Iterator; LimitOneSorter(const SortOptions& opts, const Comparator& comp) - : _comp(comp), _haveData(false) { + : Sorter<Key, Value>(opts), _comp(comp), _haveData(false) { verify(opts.limit == 1); } @@ -1084,7 +1083,7 @@ private: _memUsed = 0; - this->_numSpills++; + this->_stats.incrementSpilledRanges(); } bool _done = false; @@ -1106,14 +1105,16 @@ private: template <typename Key, typename Value> Sorter<Key, Value>::Sorter(const SortOptions& opts) - : _opts(opts), + : SorterBase(opts.sorterTracker), + _opts(opts), _file(opts.extSortAllowed ? std::make_shared<Sorter<Key, Value>::File>( opts.tempDir + "/" + nextFileName(), opts.sorterFileStats) : nullptr) {} template <typename Key, typename Value> Sorter<Key, Value>::Sorter(const SortOptions& opts, const std::string& fileName) - : _opts(opts), + : SorterBase(opts.sorterTracker), + _opts(opts), _file(std::make_shared<Sorter<Key, Value>::File>(opts.tempDir + "/" + fileName, opts.sorterFileStats)) { invariant(opts.extSortAllowed); @@ -1136,6 +1137,15 @@ typename Sorter<Key, Value>::PersistedState Sorter<Key, Value>::persistDataForSh } template <typename Key, typename Value> +Sorter<Key, Value>::File::File(std::string path, SorterFileStats* stats) + : _path(std::move(path)), _stats(stats) { + invariant(!_path.empty()); + if (_stats && boost::filesystem::exists(_path) && boost::filesystem::is_regular_file(_path)) { + _stats->addSpilledDataSize(boost::filesystem::file_size(_path)); + } +} + +template <typename Key, typename Value> Sorter<Key, Value>::File::~File() { if (_stats && _file.is_open()) { _stats->closed.addAndFetch(1); @@ -1196,6 +1206,9 @@ void Sorter<Key, Value>::File::write(const char* data, std::streamsize size) { try { _file.write(data, size); _offset += size; + if (_stats) { + this->_stats->addSpilledDataSize(size); + }; } catch (const std::system_error& ex) { if (ex.code() == std::errc::no_space_on_device) { uasserted(ErrorCodes::OutOfDiskSpace, @@ -1351,7 +1364,8 @@ BoundedSorter<Key, Value, Comparator, BoundMaker>::BoundedSorter(const SortOptio Comparator comp, BoundMaker makeBound, bool checkInput) - : compare(comp), + : BoundedSorterInterface<Key, Value>(opts), + compare(comp), makeBound(makeBound), _comparePairs{compare}, _checkInput(checkInput), @@ -1509,7 +1523,7 @@ void BoundedSorter<Key, Value, Comparator, BoundMaker>::_spill() { << " bytes, but did not opt in to external sorting.", _opts.extSortAllowed); - ++_numSpills; + this->_stats.incrementSpilledRanges(); // Write out all the values from the heap in sorted order. SortedFileWriter<Key, Value> writer(_opts, _file, {}); diff --git a/src/mongo/db/sorter/sorter.h b/src/mongo/db/sorter/sorter.h index 85c0f279ac2..159ac89ce4a 100644 --- a/src/mongo/db/sorter/sorter.h +++ b/src/mongo/db/sorter/sorter.h @@ -43,6 +43,7 @@ #include "mongo/bson/util/builder.h" #include "mongo/db/exec/document_value/document.h" #include "mongo/db/sorter/sorter_gen.h" +#include "mongo/db/sorter/sorter_stats.h" #include "mongo/platform/atomic_word.h" #include "mongo/util/assert_util.h" #include "mongo/util/bufreader.h" @@ -96,14 +97,6 @@ namespace mongo { /** - * For collecting file usage metrics. - */ -struct SorterFileStats { - AtomicWord<long long> opened; - AtomicWord<long long> closed; -}; - -/** * Runtime options that control the Sorter's behavior */ struct SortOptions { @@ -130,6 +123,9 @@ struct SortOptions { // If set, allows us to observe Sorter file handle usage. SorterFileStats* sorterFileStats; + // If set, allows us to observe aggregate Sorter behaviors. + SorterTracker* sorterTracker; + // If set to true and sorted data fits into memory, sorted data will be moved into iterator // instead of copying. bool moveSortedDataIntoIterator; @@ -139,6 +135,7 @@ struct SortOptions { maxMemoryUsageBytes(64 * 1024 * 1024), extSortAllowed(false), sorterFileStats(nullptr), + sorterTracker(nullptr), moveSortedDataIntoIterator(false) {} // Fluent API to support expressions like SortOptions().Limit(1000).ExtSortAllowed(true) @@ -173,6 +170,11 @@ struct SortOptions { return *this; } + SortOptions& Tracker(SorterTracker* newSorterTracker) { + sorterTracker = newSorterTracker; + return *this; + } + SortOptions& MoveSortedDataIntoIterator(bool newMoveSortedDataIntoIterator = true) { moveSortedDataIntoIterator = newMoveSortedDataIntoIterator; return *this; @@ -241,6 +243,18 @@ protected: SortIteratorInterface() {} // can only be constructed as a base }; +class SorterBase { +public: + SorterBase(SorterTracker* sorterTracker = nullptr) : _stats(sorterTracker) {} + + const SorterStats& stats() const { + return _stats; + } + +protected: + SorterStats _stats; +}; + /** * This is the way to input data to the sorting framework. * @@ -257,7 +271,7 @@ protected: * nextFileName() for example. */ template <typename Key, typename Value> -class Sorter { +class Sorter : public SorterBase { Sorter(const Sorter&) = delete; Sorter& operator=(const Sorter&) = delete; @@ -279,10 +293,7 @@ public: */ class File { public: - File(std::string path, SorterFileStats* stats = nullptr) - : _path(std::move(path)), _stats(stats) { - invariant(!_path.empty()); - } + File(std::string path, SorterFileStats* stats = nullptr); ~File(); @@ -365,10 +376,6 @@ public: virtual ~Sorter() {} - size_t numSpills() const { - return _numSpills; - } - size_t numSorted() const { return _numSorted; } @@ -380,8 +387,6 @@ public: PersistedState persistDataForShutdown(); protected: - Sorter() {} // can only be constructed as a base - virtual void spill() = 0; size_t _numSorted = 0; // Keeps track of the number of keys sorted. @@ -391,14 +396,16 @@ protected: std::shared_ptr<File> _file; - std::size_t _numSpills = 0; // Keeps track of the number of spills that have happened. std::vector<std::shared_ptr<Iterator>> _iters; // Data that has already been spilled. }; template <typename Key, typename Value> -class BoundedSorterInterface { +class BoundedSorterInterface : public SorterBase { + public: + BoundedSorterInterface(const SortOptions& opts) : SorterBase(opts.sorterTracker) {} + virtual ~BoundedSorterInterface() {} // Feed one item of input to the sorter. @@ -439,7 +446,6 @@ public: virtual Document serializeBound() const = 0; virtual size_t totalDataSizeBytes() const = 0; - virtual size_t numSpills() const = 0; virtual size_t limit() const = 0; // By default, uassert that the input meets our assumptions of being almost-sorted. @@ -525,10 +531,6 @@ public: return _totalDataSizeSorted; } - size_t numSpills() const { - return _numSpills; - } - size_t limit() const { return _opts.limit; } @@ -563,7 +565,6 @@ private: std::shared_ptr<typename Sorter<Key, Value>::File> _file; std::shared_ptr<SpillIterator> _spillIter; - std::size_t _numSpills = 0; // Keeps track of the number of spills that have happened. boost::optional<Key> _min; bool _done = false; diff --git a/src/mongo/db/sorter/sorter_stats.cpp b/src/mongo/db/sorter/sorter_stats.cpp new file mode 100644 index 00000000000..ec68b02efcd --- /dev/null +++ b/src/mongo/db/sorter/sorter_stats.cpp @@ -0,0 +1,59 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/sorter/sorter_stats.h" + +#include "mongo/util/assert_util_core.h" + +namespace mongo { +SorterStats::SorterStats(SorterTracker* sorterTracker) : _sorterTracker(sorterTracker){}; + +void SorterStats::incrementSpilledRanges() { + _spilledRanges++; + if (_sorterTracker) { + _sorterTracker->spilledRanges.fetchAndAdd(1); + } +} + +void SorterStats::setSpilledRanges(long long spills) { + invariant(_spilledRanges == 0); + _spilledRanges = spills; + if (_sorterTracker) { + _sorterTracker->spilledRanges.fetchAndAdd(spills); + } +} + +SorterFileStats::SorterFileStats(SorterTracker* sorterTracker) : _sorterTracker(sorterTracker){}; + +void SorterFileStats::addSpilledDataSize(long long data) { + if (_sorterTracker) { + _sorterTracker->bytesSpilled.fetchAndAdd(data); + } +} +} // namespace mongo diff --git a/src/mongo/db/sorter/sorter_stats.h b/src/mongo/db/sorter/sorter_stats.h new file mode 100644 index 00000000000..05bfa8998d4 --- /dev/null +++ b/src/mongo/db/sorter/sorter_stats.h @@ -0,0 +1,78 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/atomic_word.h" + +namespace mongo { + +struct SorterTracker { + AtomicWord<long long> spilledRanges; + AtomicWord<long long> bytesSpilled; +}; + +/** + * For collecting file usage metrics. + */ +class SorterFileStats { +public: + SorterFileStats(SorterTracker* sorterTracker); + + void addSpilledDataSize(long long data); + + AtomicWord<long long> opened; + AtomicWord<long long> closed; + +private: + SorterTracker* _sorterTracker; +}; + +class SorterStats { +public: + SorterStats(SorterTracker* sorterTracker); + + void incrementSpilledRanges(); + + /** + * Sets the number of spilled ranges to the specified amount. Cannot be called after + * incrementSpilledRanges. + */ + void setSpilledRanges(long long spills); + + long long spilledRanges() const { + return _spilledRanges; + } + +private: + long long _spilledRanges = 0; + + // All SorterStats update the SorterTracker to report sorter statistics for the + // server. + SorterTracker* _sorterTracker; +}; +} // namespace mongo diff --git a/src/mongo/db/sorter/sorter_stats_test.cpp b/src/mongo/db/sorter/sorter_stats_test.cpp new file mode 100644 index 00000000000..e94d9d4efb8 --- /dev/null +++ b/src/mongo/db/sorter/sorter_stats_test.cpp @@ -0,0 +1,68 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/sorter/sorter_stats.h" +#include "mongo/unittest/death_test.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace { +TEST(SorterStatsTest, Basic) { + SorterTracker sorterTracker; + SorterStats sorterStats(&sorterTracker); + + sorterStats.incrementSpilledRanges(); + ASSERT_EQ(sorterTracker.spilledRanges.load(), 1); +} + +TEST(SorterStatsTest, MultipleSorters) { + SorterTracker sorterTracker; + SorterStats sorterStats1(&sorterTracker); + SorterStats sorterStats2(&sorterTracker); + SorterStats sorterStats3(&sorterTracker); + + sorterStats1.incrementSpilledRanges(); + sorterStats2.incrementSpilledRanges(); + ASSERT_EQ(sorterTracker.spilledRanges.load(), 2); + + sorterStats3.setSpilledRanges(10); + ASSERT_EQ(sorterTracker.spilledRanges.load(), 12); +} + +DEATH_TEST(SorterStatsTest, SetNonZeroNumSpilledRanges, "invariant") { + SorterTracker sorterTracker; + SorterStats sorterStats(&sorterTracker); + + sorterStats.incrementSpilledRanges(); + ASSERT_EQ(sorterTracker.spilledRanges.load(), 1); + + sorterStats.setSpilledRanges(10); +} +} // namespace +} // namespace mongo diff --git a/src/mongo/db/sorter/sorter_test.cpp b/src/mongo/db/sorter/sorter_test.cpp index aa5a8dc4078..0394e1f0fa0 100644 --- a/src/mongo/db/sorter/sorter_test.cpp +++ b/src/mongo/db/sorter/sorter_test.cpp @@ -331,41 +331,55 @@ class SortedFileWriterAndFileIteratorTests { public: void run() { unittest::TempDir tempDir("sortedFileWriterTests"); - SorterFileStats sorterFileStats; + SorterTracker sorterTracker; + SorterFileStats sorterFileStats(&sorterTracker); const SortOptions opts = SortOptions().TempDir(tempDir.path()).FileStats(&sorterFileStats); - auto makeFile = [&] { - return std::make_shared<Sorter<IntWrapper, IntWrapper>::File>( - opts.tempDir + "/" + nextFileName(), opts.sorterFileStats); - }; - { // small - SortedFileWriter<IntWrapper, IntWrapper> sorter(opts, makeFile()); - sorter.addAlreadySorted(0, 0); - sorter.addAlreadySorted(1, -1); - sorter.addAlreadySorted(2, -2); - sorter.addAlreadySorted(3, -3); - sorter.addAlreadySorted(4, -4); - ASSERT_ITERATORS_EQUIVALENT(std::shared_ptr<IWIterator>(sorter.done()), - std::make_shared<IntIterator>(0, 5)); - } + int currentFileSize = 0; + + // small + currentFileSize = _appendToFile(&opts, currentFileSize, 5); ASSERT_EQ(sorterFileStats.opened.load(), 1); ASSERT_EQ(sorterFileStats.closed.load(), 1); + ASSERT_LTE(sorterTracker.bytesSpilled.load(), currentFileSize); - { // big - SortedFileWriter<IntWrapper, IntWrapper> sorter(opts, makeFile()); - for (int i = 0; i < 10 * 1000 * 1000; i++) - sorter.addAlreadySorted(i, -i); - - ASSERT_ITERATORS_EQUIVALENT(std::shared_ptr<IWIterator>(sorter.done()), - std::make_shared<IntIterator>(0, 10 * 1000 * 1000)); - } + // big + currentFileSize = _appendToFile(&opts, currentFileSize, 10 * 1000 * 1000); ASSERT_EQ(sorterFileStats.opened.load(), 2); ASSERT_EQ(sorterFileStats.closed.load(), 2); + ASSERT_LTE(sorterTracker.bytesSpilled.load(), currentFileSize); ASSERT(boost::filesystem::is_empty(tempDir.path())); } + +private: + int _appendToFile(const SortOptions* opts, int currentFileSize, int range) { + auto makeFile = [&] { + return std::make_shared<Sorter<IntWrapper, IntWrapper>::File>( + opts->tempDir + "/" + nextFileName(), opts->sorterFileStats); + }; + + int currentBufSize = 0; + SortedFileWriter<IntWrapper, IntWrapper> sorter(*opts, makeFile()); + for (int i = 0; i < range; ++i) { + sorter.addAlreadySorted(i, -i); + currentBufSize += sizeof(i) + sizeof(-i); + + if (currentBufSize > static_cast<int>(kSortedFileBufferSize)) { + // File size only increases if buffer size exceeds limit and spills. Each spill + // includes the buffer and the size of the spill. + currentFileSize += currentBufSize + sizeof(uint32_t); + currentBufSize = 0; + } + } + ASSERT_ITERATORS_EQUIVALENT(std::shared_ptr<IWIterator>(sorter.done()), + std::make_shared<IntIterator>(0, range)); + // Anything left in-memory is spilled to disk when sorter.done(). + currentFileSize += currentBufSize + sizeof(uint32_t); + return currentFileSize; + } }; @@ -448,7 +462,9 @@ public: void run() { unittest::TempDir tempDir("sorterTests"); - const SortOptions opts = SortOptions().TempDir(tempDir.path()).ExtSortAllowed(); + SorterTracker sorterTracker; + const SortOptions opts = + SortOptions().TempDir(tempDir.path()).ExtSortAllowed().Tracker(&sorterTracker); { // test empty (no limit) ASSERT_ITERATORS_EQUIVALENT(done(makeSorter(opts).get()), @@ -582,7 +598,7 @@ public: return 0; } - virtual size_t correctNumSpills() const { + virtual size_t correctSpilledRanges() const { return 0; } @@ -606,13 +622,13 @@ private: if (numRanges == 0) return; - auto numSpillsOccurred = correctNumSpills(); + auto numSpilledRangesOccurred = correctSpilledRanges(); auto state = sorter->persistDataForShutdown(); if (opts.extSortAllowed) { ASSERT_NE(state.fileName, ""); } ASSERT_EQ(state.ranges.size(), numRanges); - ASSERT_EQ(sorter->numSpills(), numSpillsOccurred); + ASSERT_EQ(sorter->stats().spilledRanges(), numSpilledRangesOccurred); } }; @@ -712,7 +728,7 @@ public: static_cast<std::size_t>(2)); } - size_t correctNumSpills() const override { + size_t correctSpilledRanges() const override { // We add 1 to the calculation since the call to persistDataForShutdown() spills the // remaining in-memory Sorter data to disk, adding one extra range. std::size_t spillsToMerge = NUM_ITEMS * sizeof(IWPair) / MEM_LIMIT + 1; @@ -857,11 +873,12 @@ DEATH_TEST_F(SorterMakeFromExistingRangesTest, EmptyFileName, "!fileName.empty() TEST_F(SorterMakeFromExistingRangesTest, SkipFileCheckingOnEmptyRanges) { auto fileName = "unused_sorter_file"; - auto opts = SortOptions().ExtSortAllowed().TempDir("unused_temp_dir"); + SorterTracker sorterTracker; + auto opts = SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").Tracker(&sorterTracker); auto sorter = std::unique_ptr<IWSorter>( IWSorter::makeFromExistingRanges(fileName, {}, opts, IWComparator(ASC))); - ASSERT_EQ(0, sorter->numSpills()); + ASSERT_EQ(0, sorter->stats().spilledRanges()); auto iter = std::unique_ptr<IWIterator>(sorter->done()); ASSERT_EQ(0, sorter->numSorted()); @@ -907,12 +924,13 @@ TEST_F(SorterMakeFromExistingRangesTest, CorruptedFile) { ofs << "invalid sorter data"; } auto fileName = tempFilePath.filename().string(); - auto opts = SortOptions().ExtSortAllowed().TempDir(tempDir.path()); + SorterTracker sorterTracker; + auto opts = SortOptions().ExtSortAllowed().TempDir(tempDir.path()).Tracker(&sorterTracker); auto sorter = std::unique_ptr<IWSorter>( IWSorter::makeFromExistingRanges(fileName, makeSampleRanges(), opts, IWComparator(ASC))); // The number of spills is set when NoLimitSorter is constructed from existing ranges. - ASSERT_EQ(makeSampleRanges().size(), sorter->numSpills()); + ASSERT_EQ(makeSampleRanges().size(), sorter->stats().spilledRanges()); ASSERT_EQ(0, sorter->numSorted()); // 16817 - error reading file. @@ -921,11 +939,13 @@ TEST_F(SorterMakeFromExistingRangesTest, CorruptedFile) { TEST_F(SorterMakeFromExistingRangesTest, RoundTrip) { unittest::TempDir tempDir(_agent.getSuiteName() + "_" + _agent.getTestName()); + SorterTracker sorterTracker; auto opts = SortOptions() .ExtSortAllowed() .TempDir(tempDir.path()) - .MaxMemoryUsageBytes(sizeof(IWSorter::Data)); + .MaxMemoryUsageBytes(sizeof(IWSorter::Data)) + .Tracker(&sorterTracker); IWPair pairInsertedBeforeShutdown(1, 100); @@ -949,7 +969,7 @@ TEST_F(SorterMakeFromExistingRangesTest, RoundTrip) { IWSorter::makeFromExistingRanges(state.fileName, state.ranges, opts, IWComparator(ASC))); // The number of spills is set when NoLimitSorter is constructed from existing ranges. - ASSERT_EQ(state.ranges.size(), sorter->numSpills()); + ASSERT_EQ(state.ranges.size(), sorter->stats().spilledRanges()); // Ensure that the restored sorter can accept additional data. IWPair pairInsertedAfterStartup(2, 200); @@ -1188,8 +1208,12 @@ TEST_F(BoundedSorterTest, MemoryLimitsNoExtSortAllowed) { } TEST_F(BoundedSorterTest, SpillSorted) { - auto options = - SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(16); + SorterTracker sorterTracker; + auto options = SortOptions() + .ExtSortAllowed() + .TempDir("unused_temp_dir") + .MaxMemoryUsageBytes(16) + .Tracker(&sorterTracker); sorter = makeAsc(options); auto output = sort({ @@ -1205,7 +1229,7 @@ TEST_F(BoundedSorterTest, SpillSorted) { }); assertSorted(output); - ASSERT_EQ(sorter->numSpills(), 3); + ASSERT_EQ(sorter->stats().spilledRanges(), 3); } TEST_F(BoundedSorterTest, SpillSortedExceptOne) { @@ -1227,12 +1251,16 @@ TEST_F(BoundedSorterTest, SpillSortedExceptOne) { }); assertSorted(output); - ASSERT_EQ(sorter->numSpills(), 3); + ASSERT_EQ(sorter->stats().spilledRanges(), 3); } TEST_F(BoundedSorterTest, SpillAlmostSorted) { - auto options = - SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(16); + SorterTracker sorterTracker; + auto options = SortOptions() + .ExtSortAllowed() + .TempDir("unused_temp_dir") + .MaxMemoryUsageBytes(16) + .Tracker(&sorterTracker); sorter = makeAsc(options); auto output = sort({ @@ -1250,7 +1278,7 @@ TEST_F(BoundedSorterTest, SpillAlmostSorted) { }); assertSorted(output); - ASSERT_EQ(sorter->numSpills(), 2); + ASSERT_EQ(sorter->stats().spilledRanges(), 2); } TEST_F(BoundedSorterTest, SpillWrongInput) { @@ -1284,7 +1312,7 @@ TEST_F(BoundedSorterTest, SpillWrongInput) { ASSERT_EQ(output[5].time, 15); ASSERT_EQ(output[6].time, 16); - ASSERT_EQ(sorter->numSpills(), 2); + ASSERT_EQ(sorter->stats().spilledRanges(), 2); // Test that by default, bad input like this would be detected. sorter = makeAsc(options); @@ -1293,8 +1321,13 @@ TEST_F(BoundedSorterTest, SpillWrongInput) { } TEST_F(BoundedSorterTest, LimitNoSpill) { - auto options = - SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(40).Limit(2); + SorterTracker sorterTracker; + auto options = SortOptions() + .ExtSortAllowed() + .TempDir("unused_temp_dir") + .MaxMemoryUsageBytes(40) + .Tracker(&sorterTracker) + .Limit(2); sorter = makeAsc(options); auto output = sort( @@ -1317,12 +1350,17 @@ TEST_F(BoundedSorterTest, LimitNoSpill) { ASSERT_EQ(output[0].time, 0); ASSERT_EQ(output[1].time, 3); - ASSERT_EQ(sorter->numSpills(), 0); + ASSERT_EQ(sorter->stats().spilledRanges(), 0); } TEST_F(BoundedSorterTest, LimitSpill) { - auto options = - SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(40).Limit(3); + SorterTracker sorterTracker; + auto options = SortOptions() + .ExtSortAllowed() + .TempDir("unused_temp_dir") + .MaxMemoryUsageBytes(40) + .Tracker(&sorterTracker) + .Limit(3); sorter = makeAsc(options); auto output = sort( @@ -1346,7 +1384,7 @@ TEST_F(BoundedSorterTest, LimitSpill) { ASSERT_EQ(output[1].time, 3); ASSERT_EQ(output[2].time, 10); - ASSERT_EQ(sorter->numSpills(), 1); + ASSERT_EQ(sorter->stats().spilledRanges(), 1); } TEST_F(BoundedSorterTest, DescSorted) { @@ -1591,22 +1629,26 @@ TEST_F(BoundedSorterTest, CompoundLimit) { } TEST_F(BoundedSorterTest, CompoundSpill) { - auto options = - SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(40); + SorterTracker sorterTracker; + auto options = SortOptions() + .ExtSortAllowed() + .TempDir("unused_temp_dir") + .Tracker(&sorterTracker) + .MaxMemoryUsageBytes(40); sorter = makeAsc(options); // When each partition is small enough, we don't spill. - ASSERT_EQ(sorter->numSpills(), 0); + ASSERT_EQ(sorter->stats().spilledRanges(), 0); auto output = sort({ {1001}, {1007}, }); assertSorted(output); - ASSERT_EQ(sorter->numSpills(), 0); + ASSERT_EQ(sorter->stats().spilledRanges(), 0); // If any individual partition is large enough, we do spill. sorter->restart(); - ASSERT_EQ(sorter->numSpills(), 0); + ASSERT_EQ(sorter->stats().spilledRanges(), 0); output = sort({ {1}, {5}, @@ -1621,17 +1663,17 @@ TEST_F(BoundedSorterTest, CompoundSpill) { {7}, }); assertSorted(output); - ASSERT_EQ(sorter->numSpills(), 1); + ASSERT_EQ(sorter->stats().spilledRanges(), 1); // If later partitions are small again, they don't spill. sorter->restart(); - ASSERT_EQ(sorter->numSpills(), 1); + ASSERT_EQ(sorter->stats().spilledRanges(), 1); output = sort({ {11}, {17}, }); assertSorted(output); - ASSERT_EQ(sorter->numSpills(), 1); + ASSERT_EQ(sorter->stats().spilledRanges(), 1); } } // namespace |