diff options
-rw-r--r-- | jstests/noPassthrough/serverstatus_indexbulkbuilder.js | 47 | ||||
-rw-r--r-- | src/mongo/db/exec/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/SConscript | 6 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/sort.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/exec/sort_executor.h | 2 | ||||
-rw-r--r-- | src/mongo/db/index/SConscript | 2 | ||||
-rw-r--r-- | src/mongo/db/index/index_access_method.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/index/index_access_method.h | 7 | ||||
-rw-r--r-- | src/mongo/db/pipeline/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_bucket_auto.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_sort.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/sorter/SConscript | 9 | ||||
-rw-r--r-- | src/mongo/db/sorter/sorter.cpp | 34 | ||||
-rw-r--r-- | src/mongo/db/sorter/sorter.h | 55 | ||||
-rw-r--r-- | src/mongo/db/sorter/sorter_stats.cpp | 59 | ||||
-rw-r--r-- | src/mongo/db/sorter/sorter_stats.h | 78 | ||||
-rw-r--r-- | src/mongo/db/sorter/sorter_stats_test.cpp | 68 | ||||
-rw-r--r-- | src/mongo/db/sorter/sorter_test.cpp | 154 |
18 files changed, 453 insertions, 106 deletions
diff --git a/jstests/noPassthrough/serverstatus_indexbulkbuilder.js b/jstests/noPassthrough/serverstatus_indexbulkbuilder.js index ca72e721b1f..7d10f5fcc49 100644 --- a/jstests/noPassthrough/serverstatus_indexbulkbuilder.js +++ b/jstests/noPassthrough/serverstatus_indexbulkbuilder.js @@ -12,9 +12,15 @@ load('jstests/noPassthrough/libs/index_build.js'); +const maxMemUsageMegabytes = 50; +const numDocs = 10; +const fieldSize = 10 * 1024 * 1024; +const approxMemoryUsage = numDocs * fieldSize; +let expectedSpilledRanges = approxMemoryUsage / (maxMemUsageMegabytes * 1024 * 1024); + const replSet = new ReplSetTest({ nodes: 1, - nodeOptions: {setParameter: {maxIndexBuildMemoryUsageMegabytes: 50}}, + nodeOptions: {setParameter: {maxIndexBuildMemoryUsageMegabytes: maxMemUsageMegabytes}}, }); replSet.startSet(); replSet.initiate(); @@ -23,10 +29,10 @@ let primary = replSet.getPrimary(); let testDB = primary.getDB('test'); let coll = testDB.getCollection('t'); -for (let i = 0; i < 10; i++) { +for (let i = 0; i < numDocs; i++) { assert.commandWorked(coll.insert({ _id: i, - a: 'a'.repeat(10 * 1024 * 1024), + a: 'a'.repeat(fieldSize), })); } @@ -41,6 +47,13 @@ assert.eq(indexBulkBuilderSection.count, 1, tojson(indexBulkBuilderSection)); assert.eq(indexBulkBuilderSection.resumed, 0, tojson(indexBulkBuilderSection)); assert.eq(indexBulkBuilderSection.filesOpenedForExternalSort, 1, tojson(indexBulkBuilderSection)); assert.eq(indexBulkBuilderSection.filesClosedForExternalSort, 1, tojson(indexBulkBuilderSection)); +assert.eq( + indexBulkBuilderSection.spilledRanges, expectedSpilledRanges, tojson(indexBulkBuilderSection)); +assert.between(0, + indexBulkBuilderSection.bytesSpilled, + approxMemoryUsage, + tojson(indexBulkBuilderSection), + true); // Shut down server during an index to verify 'resumable' value on restart. IndexBuildTest.pauseIndexBuilds(primary); @@ -68,9 +81,16 @@ assert.eq(indexBulkBuilderSection.resumed, 1, tojson(indexBulkBuilderSection)); // and read it back on startup. assert.eq(indexBulkBuilderSection.filesOpenedForExternalSort, 1, tojson(indexBulkBuilderSection)); assert.eq(indexBulkBuilderSection.filesClosedForExternalSort, 1, tojson(indexBulkBuilderSection)); +assert.eq(indexBulkBuilderSection.spilledRanges, 1, tojson(indexBulkBuilderSection)); +assert.between(0, + indexBulkBuilderSection.bytesSpilled, + approxMemoryUsage, + tojson(indexBulkBuilderSection), + true); // Confirm that metrics are updated during initial sync. -const newNode = replSet.add({setParameter: {maxIndexBuildMemoryUsageMegabytes: 50}}); +const newNode = + replSet.add({setParameter: {maxIndexBuildMemoryUsageMegabytes: maxMemUsageMegabytes}}); replSet.reInitiate(); replSet.waitForState(newNode, ReplSetTest.State.SECONDARY); replSet.awaitReplication(); @@ -85,6 +105,17 @@ indexBulkBuilderSection = newNodeTestDB.serverStatus().indexBulkBuilder; assert.gte(indexBulkBuilderSection.count, 3, tojson(indexBulkBuilderSection)); assert.eq(indexBulkBuilderSection.filesOpenedForExternalSort, 1, tojson(indexBulkBuilderSection)); assert.eq(indexBulkBuilderSection.filesClosedForExternalSort, 1, tojson(indexBulkBuilderSection)); +// We try building two indexes for the test collection so the memory usage limit for each index +// build during initial sync is the maxIndexBuildMemoryUsageMegabytes divided by the number of index +// builds. We end up with half of the in-memory memory so we double the amount of spills expected. +expectedSpilledRanges *= 2; +assert.eq( + indexBulkBuilderSection.spilledRanges, expectedSpilledRanges, tojson(indexBulkBuilderSection)); +assert.between(0, + indexBulkBuilderSection.bytesSpilled, + approxMemoryUsage, + tojson(indexBulkBuilderSection), + true); // Building multiple indexes in a single createIndex command increases count by the number of // indexes requested. @@ -99,6 +130,14 @@ assert.eq(indexBulkBuilderSection.count, 4, tojson(indexBulkBuilderSection)); assert.eq(indexBulkBuilderSection.resumed, 1, tojson(indexBulkBuilderSection)); assert.eq(indexBulkBuilderSection.filesOpenedForExternalSort, 2, tojson(indexBulkBuilderSection)); assert.eq(indexBulkBuilderSection.filesClosedForExternalSort, 2, tojson(indexBulkBuilderSection)); +expectedSpilledRanges += 2; +assert.eq( + indexBulkBuilderSection.spilledRanges, expectedSpilledRanges, tojson(indexBulkBuilderSection)); +assert.between(0, + indexBulkBuilderSection.bytesSpilled, + approxMemoryUsage, + tojson(indexBulkBuilderSection), + true); replSet.stopSet(); })(); diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript index c2caf69f0f2..e8d1cb729a7 100644 --- a/src/mongo/db/exec/SConscript +++ b/src/mongo/db/exec/SConscript @@ -84,6 +84,7 @@ sortExecutorEnv.Library( ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/sorter/sorter_idl', + '$BUILD_DIR/mongo/db/sorter/sorter_stats', ], ) diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript index b99625b0833..7113466affb 100644 --- a/src/mongo/db/exec/sbe/SConscript +++ b/src/mongo/db/exec/sbe/SConscript @@ -59,8 +59,9 @@ sbeEnv.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/bson/dotted_path_support', '$BUILD_DIR/mongo/db/sorter/sorter_idl', - ] - ) + '$BUILD_DIR/mongo/db/sorter/sorter_stats', + ], +) sbeEnv.Library( target='query_sbe_stages', @@ -106,6 +107,7 @@ sbeEnv.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/bson/dotted_path_support', '$BUILD_DIR/mongo/db/sorter/sorter_idl', + '$BUILD_DIR/mongo/db/sorter/sorter_stats', 'query_sbe', 'query_sbe_storage', ] diff --git a/src/mongo/db/exec/sbe/stages/sort.cpp b/src/mongo/db/exec/sbe/stages/sort.cpp index 5acf73afe8d..eebdc0c2443 100644 --- a/src/mongo/db/exec/sbe/stages/sort.cpp +++ b/src/mongo/db/exec/sbe/stages/sort.cpp @@ -213,11 +213,11 @@ void SortStage::open(bool reOpen) { _specificStats.totalDataSizeBytes += _sorter->totalDataSizeSorted(); _mergeIt.reset(_sorter->done()); - _specificStats.spills += _sorter->numSpills(); + _specificStats.spills += _sorter->stats().spilledRanges(); _specificStats.keysSorted += _sorter->numSorted(); auto& metricsCollector = ResourceConsumption::MetricsCollector::get(_opCtx); metricsCollector.incrementKeysSorted(_sorter->numSorted()); - metricsCollector.incrementSorterSpills(_sorter->numSpills()); + metricsCollector.incrementSorterSpills(_sorter->stats().spilledRanges()); _children[0]->close(); } diff --git a/src/mongo/db/exec/sort_executor.h b/src/mongo/db/exec/sort_executor.h index 6509c767e9a..8120f52caa9 100644 --- a/src/mongo/db/exec/sort_executor.h +++ b/src/mongo/db/exec/sort_executor.h @@ -139,7 +139,7 @@ public: } _output.reset(_sorter->done()); _stats.keysSorted += _sorter->numSorted(); - _stats.spills += _sorter->numSpills(); + _stats.spills += _sorter->stats().spilledRanges(); _stats.totalDataSizeBytes += _sorter->totalDataSizeSorted(); _sorter.reset(); } diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript index 48ab6f723cc..34dab260123 100644 --- a/src/mongo/db/index/SConscript +++ b/src/mongo/db/index/SConscript @@ -108,6 +108,7 @@ serveronlyEnv.Library( '$BUILD_DIR/mongo/db/resumable_index_builds_idl', '$BUILD_DIR/mongo/db/service_context', '$BUILD_DIR/mongo/db/sorter/sorter_idl', + '$BUILD_DIR/mongo/db/sorter/sorter_stats', '$BUILD_DIR/mongo/db/storage/encryption_hooks', '$BUILD_DIR/mongo/db/storage/execution_context', '$BUILD_DIR/mongo/db/storage/index_entry_comparison', @@ -167,6 +168,7 @@ env.CppUnitTest( '$BUILD_DIR/mongo/db/query/query_test_service_context', '$BUILD_DIR/mongo/db/query/sort_pattern', '$BUILD_DIR/mongo/db/record_id_helpers', + '$BUILD_DIR/mongo/db/sorter/sorter_stats', 'expression_params', 'key_generator', ], diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index 48621af7a33..36d968efc07 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -99,6 +99,8 @@ public: builder.append("resumed", resumed.loadRelaxed()); builder.append("filesOpenedForExternalSort", sorterFileStats.opened.loadRelaxed()); builder.append("filesClosedForExternalSort", sorterFileStats.closed.loadRelaxed()); + builder.append("spilledRanges", sorterTracker.spilledRanges.loadRelaxed()); + builder.append("bytesSpilled", sorterTracker.bytesSpilled.loadRelaxed()); return builder.obj(); } @@ -109,11 +111,15 @@ public: // This value should not exceed 'count'. AtomicWord<long long> resumed; + // Sorter statistics that are aggregate of all sorters. + SorterTracker sorterTracker; + // Number of times the external sorter opened/closed a file handle to spill data to disk. // This pair of counters in aggregate indicate the number of open file handles used by // the external sorter and may be useful in diagnosing situations where the process is // close to exhausting this finite resource. - SorterFileStats sorterFileStats; + SorterFileStats sorterFileStats = {&sorterTracker}; + } indexBulkBuilderSSS; /** @@ -133,6 +139,7 @@ SortOptions makeSortOptions(size_t maxMemoryUsageBytes, StringData dbName) { .ExtSortAllowed() .MaxMemoryUsageBytes(maxMemoryUsageBytes) .FileStats(&indexBulkBuilderSSS.sorterFileStats) + .Tracker(&indexBulkBuilderSSS.sorterTracker) .DBName(dbName.toString()); } @@ -609,6 +616,23 @@ Ident* SortedDataIndexAccessMethod::getIdentPtr() const { return this->_newInterface.get(); } +void IndexAccessMethod::BulkBuilder::countNewBuildInStats() { + indexBulkBuilderSSS.count.addAndFetch(1); +} + +void IndexAccessMethod::BulkBuilder::countResumedBuildInStats() { + indexBulkBuilderSSS.count.addAndFetch(1); + indexBulkBuilderSSS.resumed.addAndFetch(1); +} + +SorterFileStats* IndexAccessMethod::BulkBuilder::bulkBuilderFileStats() { + return &indexBulkBuilderSSS.sorterFileStats; +} + +SorterTracker* IndexAccessMethod::BulkBuilder::bulkBuilderTracker() { + return &indexBulkBuilderSSS.sorterTracker; +} + class SortedDataIndexAccessMethod::BulkBuilderImpl final : public IndexAccessMethod::BulkBuilder { public: using Sorter = mongo::Sorter<KeyString::Value, mongo::NullValue>; diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index ad61cc5a25e..190da10d908 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -40,6 +40,7 @@ #include "mongo/db/jsobj.h" #include "mongo/db/operation_context.h" #include "mongo/db/record_id.h" +#include "mongo/db/sorter/sorter.h" #include "mongo/db/storage/sorted_data_interface.h" #include "mongo/db/yieldable.h" @@ -218,6 +219,12 @@ public: * Persists on disk the keys that have been inserted using this BulkBuilder. */ virtual IndexStateInfo persistDataForShutdown() = 0; + + protected: + static void countNewBuildInStats(); + static void countResumedBuildInStats(); + static SorterFileStats* bulkBuilderFileStats(); + static SorterTracker* bulkBuilderTracker(); }; /** diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index 749d3adc6e0..ac80e4dc7b3 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -395,6 +395,7 @@ pipelineEnv.Library( '$BUILD_DIR/mongo/db/query/projection_ast', '$BUILD_DIR/mongo/db/repl/image_collection_entry', '$BUILD_DIR/mongo/db/sorter/sorter_idl', + '$BUILD_DIR/mongo/db/sorter/sorter_stats', '$BUILD_DIR/mongo/db/timeseries/timeseries_conversion_util', '$BUILD_DIR/mongo/db/timeseries/timeseries_options', '$BUILD_DIR/mongo/rpc/command_status', diff --git a/src/mongo/db/pipeline/document_source_bucket_auto.cpp b/src/mongo/db/pipeline/document_source_bucket_auto.cpp index 10300e9c334..b0349125a98 100644 --- a/src/mongo/db/pipeline/document_source_bucket_auto.cpp +++ b/src/mongo/db/pipeline/document_source_bucket_auto.cpp @@ -221,7 +221,7 @@ void DocumentSourceBucketAuto::initializeBucketIteration() { auto& metricsCollector = ResourceConsumption::MetricsCollector::get(pExpCtx->opCtx); metricsCollector.incrementKeysSorted(_sorter->numSorted()); - metricsCollector.incrementSorterSpills(_sorter->numSpills()); + metricsCollector.incrementSorterSpills(_sorter->stats().spilledRanges()); _sorter.reset(); diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp index 3ba411e0499..711641c8cd6 100644 --- a/src/mongo/db/pipeline/document_source_sort.cpp +++ b/src/mongo/db/pipeline/document_source_sort.cpp @@ -304,8 +304,8 @@ void DocumentSourceSort::serializeToArray( if (explain >= ExplainOptions::Verbosity::kExecStats) { mutDoc["totalDataSizeSortedBytesEstimate"] = Value(static_cast<long long>(_timeSorter->totalDataSizeBytes())); - mutDoc["usedDisk"] = Value(_timeSorter->numSpills() > 0); - mutDoc["spills"] = Value(static_cast<long long>(_timeSorter->numSpills())); + mutDoc["usedDisk"] = Value(_timeSorter->stats().spilledRanges() > 0); + mutDoc["spills"] = Value(static_cast<long long>(_timeSorter->stats().spilledRanges())); } array.push_back(Value{mutDoc.freeze()}); diff --git a/src/mongo/db/sorter/SConscript b/src/mongo/db/sorter/SConscript index ae8184f222d..a560eac0642 100644 --- a/src/mongo/db/sorter/SConscript +++ b/src/mongo/db/sorter/SConscript @@ -9,6 +9,7 @@ sorterEnv.CppUnitTest( target='db_sorter_test', source=[ 'sorter_test.cpp', + 'sorter_stats_test.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/db/exec/document_value/document_value', @@ -18,9 +19,16 @@ sorterEnv.CppUnitTest( '$BUILD_DIR/mongo/s/is_mongos', '$BUILD_DIR/third_party/shim_snappy', 'sorter_idl', + 'sorter_stats', ], ) +sorterEnv.Library(target='sorter_stats', source=[ + 'sorter_stats.cpp', +], LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/commands/server_status_core', +]) + env.Library( target='sorter_idl', source=[ @@ -28,6 +36,7 @@ env.Library( ], LIBDEPS=[ "$BUILD_DIR/mongo/base", + '$BUILD_DIR/mongo/db/sorter/sorter_stats', '$BUILD_DIR/mongo/idl/idl_parser', ] ) diff --git a/src/mongo/db/sorter/sorter.cpp b/src/mongo/db/sorter/sorter.cpp index e11a020f3ab..54a60723ec1 100644 --- a/src/mongo/db/sorter/sorter.cpp +++ b/src/mongo/db/sorter/sorter.cpp @@ -604,7 +604,6 @@ protected: * following: * * {1, 2, 3, 4, 5} - * {12, 3, 4, 5} * {12, 34, 5} * {1234, 5} */ @@ -654,7 +653,7 @@ protected: auto iteratorPtr = std::shared_ptr<Iterator>(writer.done()); mergeIterator->closeSource(); mergedIterators.push_back(std::move(iteratorPtr)); - this->_numSpills++; + this->_stats.incrementSpilledRanges(); } LOGV2_DEBUG(6033101, @@ -715,7 +714,7 @@ public: this->_opts.dbName, range.getChecksum()); }); - this->_numSpills = this->_iters.size(); + this->_stats.setSpilledRanges(this->_iters.size()); } void add(const Key& key, const Value& val) { @@ -807,7 +806,7 @@ private: _memUsed = 0; - this->_numSpills++; + this->_stats.incrementSpilledRanges(); } bool _done = false; @@ -824,7 +823,7 @@ public: typedef SortIteratorInterface<Key, Value> Iterator; LimitOneSorter(const SortOptions& opts, const Comparator& comp) - : _comp(comp), _haveData(false) { + : Sorter<Key, Value>(opts), _comp(comp), _haveData(false) { verify(opts.limit == 1); } @@ -1094,7 +1093,7 @@ private: _memUsed = 0; - this->_numSpills++; + this->_stats.incrementSpilledRanges(); } bool _done = false; @@ -1116,14 +1115,16 @@ private: template <typename Key, typename Value> Sorter<Key, Value>::Sorter(const SortOptions& opts) - : _opts(opts), + : SorterBase(opts.sorterTracker), + _opts(opts), _file(opts.extSortAllowed ? std::make_shared<Sorter<Key, Value>::File>( opts.tempDir + "/" + nextFileName(), opts.sorterFileStats) : nullptr) {} template <typename Key, typename Value> Sorter<Key, Value>::Sorter(const SortOptions& opts, const std::string& fileName) - : _opts(opts), + : SorterBase(opts.sorterTracker), + _opts(opts), _file(std::make_shared<Sorter<Key, Value>::File>(opts.tempDir + "/" + fileName, opts.sorterFileStats)) { invariant(opts.extSortAllowed); @@ -1146,6 +1147,15 @@ typename Sorter<Key, Value>::PersistedState Sorter<Key, Value>::persistDataForSh } template <typename Key, typename Value> +Sorter<Key, Value>::File::File(std::string path, SorterFileStats* stats) + : _path(std::move(path)), _stats(stats) { + invariant(!_path.empty()); + if (_stats && boost::filesystem::exists(_path) && boost::filesystem::is_regular_file(_path)) { + _stats->addSpilledDataSize(boost::filesystem::file_size(_path)); + } +} + +template <typename Key, typename Value> Sorter<Key, Value>::File::~File() { if (_stats && _file.is_open()) { _stats->closed.addAndFetch(1); @@ -1206,6 +1216,9 @@ void Sorter<Key, Value>::File::write(const char* data, std::streamsize size) { try { _file.write(data, size); _offset += size; + if (_stats) { + this->_stats->addSpilledDataSize(size); + }; } catch (const std::system_error& ex) { if (ex.code() == std::errc::no_space_on_device) { uasserted(ErrorCodes::OutOfDiskSpace, @@ -1361,7 +1374,8 @@ BoundedSorter<Key, Value, Comparator, BoundMaker>::BoundedSorter(const SortOptio Comparator comp, BoundMaker makeBound, bool checkInput) - : compare(comp), + : BoundedSorterInterface<Key, Value>(opts), + compare(comp), makeBound(makeBound), _comparePairs{compare}, _checkInput(checkInput), @@ -1520,7 +1534,7 @@ void BoundedSorter<Key, Value, Comparator, BoundMaker>::_spill() { << " bytes, but did not opt in to external sorting.", _opts.extSortAllowed); - ++_numSpills; + this->_stats.incrementSpilledRanges(); // Write out all the values from the heap in sorted order. SortedFileWriter<Key, Value> writer(_opts, _file, {}); diff --git a/src/mongo/db/sorter/sorter.h b/src/mongo/db/sorter/sorter.h index 12bccee1178..5398e4c685b 100644 --- a/src/mongo/db/sorter/sorter.h +++ b/src/mongo/db/sorter/sorter.h @@ -43,6 +43,7 @@ #include "mongo/bson/util/builder.h" #include "mongo/db/exec/document_value/document.h" #include "mongo/db/sorter/sorter_gen.h" +#include "mongo/db/sorter/sorter_stats.h" #include "mongo/platform/atomic_word.h" #include "mongo/util/assert_util.h" #include "mongo/util/bufreader.h" @@ -96,14 +97,6 @@ namespace mongo { /** - * For collecting file usage metrics. - */ -struct SorterFileStats { - AtomicWord<long long> opened; - AtomicWord<long long> closed; -}; - -/** * Runtime options that control the Sorter's behavior */ struct SortOptions { @@ -130,6 +123,9 @@ struct SortOptions { // If set, allows us to observe Sorter file handle usage. SorterFileStats* sorterFileStats; + // If set, allows us to observe aggregate Sorter behaviors. + SorterTracker* sorterTracker; + // If set to true and sorted data fits into memory, sorted data will be moved into iterator // instead of copying. bool moveSortedDataIntoIterator; @@ -139,6 +135,7 @@ struct SortOptions { maxMemoryUsageBytes(64 * 1024 * 1024), extSortAllowed(false), sorterFileStats(nullptr), + sorterTracker(nullptr), moveSortedDataIntoIterator(false) {} // Fluent API to support expressions like SortOptions().Limit(1000).ExtSortAllowed(true) @@ -173,6 +170,11 @@ struct SortOptions { return *this; } + SortOptions& Tracker(SorterTracker* newSorterTracker) { + sorterTracker = newSorterTracker; + return *this; + } + SortOptions& MoveSortedDataIntoIterator(bool newMoveSortedDataIntoIterator = true) { moveSortedDataIntoIterator = newMoveSortedDataIntoIterator; return *this; @@ -241,6 +243,18 @@ protected: SortIteratorInterface() {} // can only be constructed as a base }; +class SorterBase { +public: + SorterBase(SorterTracker* sorterTracker = nullptr) : _stats(sorterTracker) {} + + const SorterStats& stats() const { + return _stats; + } + +protected: + SorterStats _stats; +}; + /** * This is the way to input data to the sorting framework. * @@ -257,7 +271,7 @@ protected: * nextFileName() for example. */ template <typename Key, typename Value> -class Sorter { +class Sorter : public SorterBase { Sorter(const Sorter&) = delete; Sorter& operator=(const Sorter&) = delete; @@ -279,10 +293,7 @@ public: */ class File { public: - File(std::string path, SorterFileStats* stats = nullptr) - : _path(std::move(path)), _stats(stats) { - invariant(!_path.empty()); - } + File(std::string path, SorterFileStats* stats = nullptr); ~File(); @@ -365,10 +376,6 @@ public: virtual ~Sorter() {} - size_t numSpills() const { - return _numSpills; - } - size_t numSorted() const { return _numSorted; } @@ -380,8 +387,6 @@ public: PersistedState persistDataForShutdown(); protected: - Sorter() {} // can only be constructed as a base - virtual void spill() = 0; size_t _numSorted = 0; // Keeps track of the number of keys sorted. @@ -391,14 +396,16 @@ protected: std::shared_ptr<File> _file; - std::size_t _numSpills = 0; // Keeps track of the number of spills that have happened. std::vector<std::shared_ptr<Iterator>> _iters; // Data that has already been spilled. }; template <typename Key, typename Value> -class BoundedSorterInterface { +class BoundedSorterInterface : public SorterBase { + public: + BoundedSorterInterface(const SortOptions& opts) : SorterBase(opts.sorterTracker) {} + virtual ~BoundedSorterInterface() {} // Feed one item of input to the sorter. @@ -439,7 +446,6 @@ public: virtual Document serializeBound() const = 0; virtual size_t totalDataSizeBytes() const = 0; - virtual size_t numSpills() const = 0; virtual size_t limit() const = 0; // By default, uassert that the input meets our assumptions of being almost-sorted. @@ -525,10 +531,6 @@ public: return _totalDataSizeSorted; } - size_t numSpills() const { - return _numSpills; - } - size_t limit() const { return _opts.limit; } @@ -563,7 +565,6 @@ private: std::shared_ptr<typename Sorter<Key, Value>::File> _file; std::shared_ptr<SpillIterator> _spillIter; - std::size_t _numSpills = 0; // Keeps track of the number of spills that have happened. boost::optional<Key> _min; bool _done = false; diff --git a/src/mongo/db/sorter/sorter_stats.cpp b/src/mongo/db/sorter/sorter_stats.cpp new file mode 100644 index 00000000000..ec68b02efcd --- /dev/null +++ b/src/mongo/db/sorter/sorter_stats.cpp @@ -0,0 +1,59 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/sorter/sorter_stats.h" + +#include "mongo/util/assert_util_core.h" + +namespace mongo { +SorterStats::SorterStats(SorterTracker* sorterTracker) : _sorterTracker(sorterTracker){}; + +void SorterStats::incrementSpilledRanges() { + _spilledRanges++; + if (_sorterTracker) { + _sorterTracker->spilledRanges.fetchAndAdd(1); + } +} + +void SorterStats::setSpilledRanges(long long spills) { + invariant(_spilledRanges == 0); + _spilledRanges = spills; + if (_sorterTracker) { + _sorterTracker->spilledRanges.fetchAndAdd(spills); + } +} + +SorterFileStats::SorterFileStats(SorterTracker* sorterTracker) : _sorterTracker(sorterTracker){}; + +void SorterFileStats::addSpilledDataSize(long long data) { + if (_sorterTracker) { + _sorterTracker->bytesSpilled.fetchAndAdd(data); + } +} +} // namespace mongo diff --git a/src/mongo/db/sorter/sorter_stats.h b/src/mongo/db/sorter/sorter_stats.h new file mode 100644 index 00000000000..05bfa8998d4 --- /dev/null +++ b/src/mongo/db/sorter/sorter_stats.h @@ -0,0 +1,78 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/atomic_word.h" + +namespace mongo { + +struct SorterTracker { + AtomicWord<long long> spilledRanges; + AtomicWord<long long> bytesSpilled; +}; + +/** + * For collecting file usage metrics. + */ +class SorterFileStats { +public: + SorterFileStats(SorterTracker* sorterTracker); + + void addSpilledDataSize(long long data); + + AtomicWord<long long> opened; + AtomicWord<long long> closed; + +private: + SorterTracker* _sorterTracker; +}; + +class SorterStats { +public: + SorterStats(SorterTracker* sorterTracker); + + void incrementSpilledRanges(); + + /** + * Sets the number of spilled ranges to the specified amount. Cannot be called after + * incrementSpilledRanges. + */ + void setSpilledRanges(long long spills); + + long long spilledRanges() const { + return _spilledRanges; + } + +private: + long long _spilledRanges = 0; + + // All SorterStats update the SorterTracker to report sorter statistics for the + // server. + SorterTracker* _sorterTracker; +}; +} // namespace mongo diff --git a/src/mongo/db/sorter/sorter_stats_test.cpp b/src/mongo/db/sorter/sorter_stats_test.cpp new file mode 100644 index 00000000000..e94d9d4efb8 --- /dev/null +++ b/src/mongo/db/sorter/sorter_stats_test.cpp @@ -0,0 +1,68 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/sorter/sorter_stats.h" +#include "mongo/unittest/death_test.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace { +TEST(SorterStatsTest, Basic) { + SorterTracker sorterTracker; + SorterStats sorterStats(&sorterTracker); + + sorterStats.incrementSpilledRanges(); + ASSERT_EQ(sorterTracker.spilledRanges.load(), 1); +} + +TEST(SorterStatsTest, MultipleSorters) { + SorterTracker sorterTracker; + SorterStats sorterStats1(&sorterTracker); + SorterStats sorterStats2(&sorterTracker); + SorterStats sorterStats3(&sorterTracker); + + sorterStats1.incrementSpilledRanges(); + sorterStats2.incrementSpilledRanges(); + ASSERT_EQ(sorterTracker.spilledRanges.load(), 2); + + sorterStats3.setSpilledRanges(10); + ASSERT_EQ(sorterTracker.spilledRanges.load(), 12); +} + +DEATH_TEST(SorterStatsTest, SetNonZeroNumSpilledRanges, "invariant") { + SorterTracker sorterTracker; + SorterStats sorterStats(&sorterTracker); + + sorterStats.incrementSpilledRanges(); + ASSERT_EQ(sorterTracker.spilledRanges.load(), 1); + + sorterStats.setSpilledRanges(10); +} +} // namespace +} // namespace mongo diff --git a/src/mongo/db/sorter/sorter_test.cpp b/src/mongo/db/sorter/sorter_test.cpp index 3388d35b8f5..531a217a41e 100644 --- a/src/mongo/db/sorter/sorter_test.cpp +++ b/src/mongo/db/sorter/sorter_test.cpp @@ -329,41 +329,55 @@ class SortedFileWriterAndFileIteratorTests { public: void run() { unittest::TempDir tempDir("sortedFileWriterTests"); - SorterFileStats sorterFileStats; + SorterTracker sorterTracker; + SorterFileStats sorterFileStats(&sorterTracker); const SortOptions opts = SortOptions().TempDir(tempDir.path()).FileStats(&sorterFileStats); - auto makeFile = [&] { - return std::make_shared<Sorter<IntWrapper, IntWrapper>::File>( - opts.tempDir + "/" + nextFileName(), opts.sorterFileStats); - }; - { // small - SortedFileWriter<IntWrapper, IntWrapper> sorter(opts, makeFile()); - sorter.addAlreadySorted(0, 0); - sorter.addAlreadySorted(1, -1); - sorter.addAlreadySorted(2, -2); - sorter.addAlreadySorted(3, -3); - sorter.addAlreadySorted(4, -4); - ASSERT_ITERATORS_EQUIVALENT(std::shared_ptr<IWIterator>(sorter.done()), - std::make_shared<IntIterator>(0, 5)); - } + int currentFileSize = 0; + + // small + currentFileSize = _appendToFile(&opts, currentFileSize, 5); ASSERT_EQ(sorterFileStats.opened.load(), 1); ASSERT_EQ(sorterFileStats.closed.load(), 1); + ASSERT_LTE(sorterTracker.bytesSpilled.load(), currentFileSize); - { // big - SortedFileWriter<IntWrapper, IntWrapper> sorter(opts, makeFile()); - for (int i = 0; i < 10 * 1000 * 1000; i++) - sorter.addAlreadySorted(i, -i); - - ASSERT_ITERATORS_EQUIVALENT(std::shared_ptr<IWIterator>(sorter.done()), - std::make_shared<IntIterator>(0, 10 * 1000 * 1000)); - } + // big + currentFileSize = _appendToFile(&opts, currentFileSize, 10 * 1000 * 1000); ASSERT_EQ(sorterFileStats.opened.load(), 2); ASSERT_EQ(sorterFileStats.closed.load(), 2); + ASSERT_LTE(sorterTracker.bytesSpilled.load(), currentFileSize); ASSERT(boost::filesystem::is_empty(tempDir.path())); } + +private: + int _appendToFile(const SortOptions* opts, int currentFileSize, int range) { + auto makeFile = [&] { + return std::make_shared<Sorter<IntWrapper, IntWrapper>::File>( + opts->tempDir + "/" + nextFileName(), opts->sorterFileStats); + }; + + int currentBufSize = 0; + SortedFileWriter<IntWrapper, IntWrapper> sorter(*opts, makeFile()); + for (int i = 0; i < range; ++i) { + sorter.addAlreadySorted(i, -i); + currentBufSize += sizeof(i) + sizeof(-i); + + if (currentBufSize > static_cast<int>(kSortedFileBufferSize)) { + // File size only increases if buffer size exceeds limit and spills. Each spill + // includes the buffer and the size of the spill. + currentFileSize += currentBufSize + sizeof(uint32_t); + currentBufSize = 0; + } + } + ASSERT_ITERATORS_EQUIVALENT(std::shared_ptr<IWIterator>(sorter.done()), + std::make_shared<IntIterator>(0, range)); + // Anything left in-memory is spilled to disk when sorter.done(). + currentFileSize += currentBufSize + sizeof(uint32_t); + return currentFileSize; + } }; @@ -446,7 +460,9 @@ public: void run() { unittest::TempDir tempDir("sorterTests"); - const SortOptions opts = SortOptions().TempDir(tempDir.path()).ExtSortAllowed(); + SorterTracker sorterTracker; + const SortOptions opts = + SortOptions().TempDir(tempDir.path()).ExtSortAllowed().Tracker(&sorterTracker); { // test empty (no limit) ASSERT_ITERATORS_EQUIVALENT(done(makeSorter(opts).get()), @@ -580,7 +596,7 @@ public: return 0; } - virtual size_t correctNumSpills() const { + virtual size_t correctSpilledRanges() const { return 0; } @@ -604,13 +620,13 @@ private: if (numRanges == 0) return; - auto numSpillsOccurred = correctNumSpills(); + auto numSpilledRangesOccurred = correctSpilledRanges(); auto state = sorter->persistDataForShutdown(); if (opts.extSortAllowed) { ASSERT_NE(state.fileName, ""); } ASSERT_EQ(state.ranges.size(), numRanges); - ASSERT_EQ(sorter->numSpills(), numSpillsOccurred); + ASSERT_EQ(sorter->stats().spilledRanges(), numSpilledRangesOccurred); } }; @@ -710,7 +726,7 @@ public: static_cast<std::size_t>(2)); } - size_t correctNumSpills() const override { + size_t correctSpilledRanges() const override { // We add 1 to the calculation since the call to persistDataForShutdown() spills the // remaining in-memory Sorter data to disk, adding one extra range. std::size_t spillsToMerge = NUM_ITEMS * sizeof(IWPair) / MEM_LIMIT + 1; @@ -855,11 +871,12 @@ DEATH_TEST_F(SorterMakeFromExistingRangesTest, EmptyFileName, "!fileName.empty() TEST_F(SorterMakeFromExistingRangesTest, SkipFileCheckingOnEmptyRanges) { auto fileName = "unused_sorter_file"; - auto opts = SortOptions().ExtSortAllowed().TempDir("unused_temp_dir"); + SorterTracker sorterTracker; + auto opts = SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").Tracker(&sorterTracker); auto sorter = std::unique_ptr<IWSorter>( IWSorter::makeFromExistingRanges(fileName, {}, opts, IWComparator(ASC))); - ASSERT_EQ(0, sorter->numSpills()); + ASSERT_EQ(0, sorter->stats().spilledRanges()); auto iter = std::unique_ptr<IWIterator>(sorter->done()); ASSERT_EQ(0, sorter->numSorted()); @@ -905,12 +922,13 @@ TEST_F(SorterMakeFromExistingRangesTest, CorruptedFile) { ofs << "invalid sorter data"; } auto fileName = tempFilePath.filename().string(); - auto opts = SortOptions().ExtSortAllowed().TempDir(tempDir.path()); + SorterTracker sorterTracker; + auto opts = SortOptions().ExtSortAllowed().TempDir(tempDir.path()).Tracker(&sorterTracker); auto sorter = std::unique_ptr<IWSorter>( IWSorter::makeFromExistingRanges(fileName, makeSampleRanges(), opts, IWComparator(ASC))); // The number of spills is set when NoLimitSorter is constructed from existing ranges. - ASSERT_EQ(makeSampleRanges().size(), sorter->numSpills()); + ASSERT_EQ(makeSampleRanges().size(), sorter->stats().spilledRanges()); ASSERT_EQ(0, sorter->numSorted()); // 16817 - error reading file. @@ -919,11 +937,13 @@ TEST_F(SorterMakeFromExistingRangesTest, CorruptedFile) { TEST_F(SorterMakeFromExistingRangesTest, RoundTrip) { unittest::TempDir tempDir(_agent.getSuiteName() + "_" + _agent.getTestName()); + SorterTracker sorterTracker; auto opts = SortOptions() .ExtSortAllowed() .TempDir(tempDir.path()) - .MaxMemoryUsageBytes(sizeof(IWSorter::Data)); + .MaxMemoryUsageBytes(sizeof(IWSorter::Data)) + .Tracker(&sorterTracker); IWPair pairInsertedBeforeShutdown(1, 100); @@ -947,7 +967,7 @@ TEST_F(SorterMakeFromExistingRangesTest, RoundTrip) { IWSorter::makeFromExistingRanges(state.fileName, state.ranges, opts, IWComparator(ASC))); // The number of spills is set when NoLimitSorter is constructed from existing ranges. - ASSERT_EQ(state.ranges.size(), sorter->numSpills()); + ASSERT_EQ(state.ranges.size(), sorter->stats().spilledRanges()); // Ensure that the restored sorter can accept additional data. IWPair pairInsertedAfterStartup(2, 200); @@ -1186,8 +1206,12 @@ TEST_F(BoundedSorterTest, MemoryLimitsNoExtSortAllowed) { } TEST_F(BoundedSorterTest, SpillSorted) { - auto options = - SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(16); + SorterTracker sorterTracker; + auto options = SortOptions() + .ExtSortAllowed() + .TempDir("unused_temp_dir") + .MaxMemoryUsageBytes(16) + .Tracker(&sorterTracker); sorter = makeAsc(options); auto output = sort({ @@ -1203,7 +1227,7 @@ TEST_F(BoundedSorterTest, SpillSorted) { }); assertSorted(output); - ASSERT_EQ(sorter->numSpills(), 3); + ASSERT_EQ(sorter->stats().spilledRanges(), 3); } TEST_F(BoundedSorterTest, SpillSortedExceptOne) { @@ -1225,12 +1249,16 @@ TEST_F(BoundedSorterTest, SpillSortedExceptOne) { }); assertSorted(output); - ASSERT_EQ(sorter->numSpills(), 3); + ASSERT_EQ(sorter->stats().spilledRanges(), 3); } TEST_F(BoundedSorterTest, SpillAlmostSorted) { - auto options = - SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(16); + SorterTracker sorterTracker; + auto options = SortOptions() + .ExtSortAllowed() + .TempDir("unused_temp_dir") + .MaxMemoryUsageBytes(16) + .Tracker(&sorterTracker); sorter = makeAsc(options); auto output = sort({ @@ -1248,7 +1276,7 @@ TEST_F(BoundedSorterTest, SpillAlmostSorted) { }); assertSorted(output); - ASSERT_EQ(sorter->numSpills(), 2); + ASSERT_EQ(sorter->stats().spilledRanges(), 2); } TEST_F(BoundedSorterTest, SpillWrongInput) { @@ -1282,7 +1310,7 @@ TEST_F(BoundedSorterTest, SpillWrongInput) { ASSERT_EQ(output[5].time, 15); ASSERT_EQ(output[6].time, 16); - ASSERT_EQ(sorter->numSpills(), 2); + ASSERT_EQ(sorter->stats().spilledRanges(), 2); // Test that by default, bad input like this would be detected. sorter = makeAsc(options); @@ -1291,8 +1319,13 @@ TEST_F(BoundedSorterTest, SpillWrongInput) { } TEST_F(BoundedSorterTest, LimitNoSpill) { - auto options = - SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(40).Limit(2); + SorterTracker sorterTracker; + auto options = SortOptions() + .ExtSortAllowed() + .TempDir("unused_temp_dir") + .MaxMemoryUsageBytes(40) + .Tracker(&sorterTracker) + .Limit(2); sorter = makeAsc(options); auto output = sort( @@ -1315,12 +1348,17 @@ TEST_F(BoundedSorterTest, LimitNoSpill) { ASSERT_EQ(output[0].time, 0); ASSERT_EQ(output[1].time, 3); - ASSERT_EQ(sorter->numSpills(), 0); + ASSERT_EQ(sorter->stats().spilledRanges(), 0); } TEST_F(BoundedSorterTest, LimitSpill) { - auto options = - SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(40).Limit(3); + SorterTracker sorterTracker; + auto options = SortOptions() + .ExtSortAllowed() + .TempDir("unused_temp_dir") + .MaxMemoryUsageBytes(40) + .Tracker(&sorterTracker) + .Limit(3); sorter = makeAsc(options); auto output = sort( @@ -1344,7 +1382,7 @@ TEST_F(BoundedSorterTest, LimitSpill) { ASSERT_EQ(output[1].time, 3); ASSERT_EQ(output[2].time, 10); - ASSERT_EQ(sorter->numSpills(), 1); + ASSERT_EQ(sorter->stats().spilledRanges(), 1); } TEST_F(BoundedSorterTest, DescSorted) { @@ -1589,22 +1627,26 @@ TEST_F(BoundedSorterTest, CompoundLimit) { } TEST_F(BoundedSorterTest, CompoundSpill) { - auto options = - SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(40); + SorterTracker sorterTracker; + auto options = SortOptions() + .ExtSortAllowed() + .TempDir("unused_temp_dir") + .Tracker(&sorterTracker) + .MaxMemoryUsageBytes(40); sorter = makeAsc(options); // When each partition is small enough, we don't spill. - ASSERT_EQ(sorter->numSpills(), 0); + ASSERT_EQ(sorter->stats().spilledRanges(), 0); auto output = sort({ {1001}, {1007}, }); assertSorted(output); - ASSERT_EQ(sorter->numSpills(), 0); + ASSERT_EQ(sorter->stats().spilledRanges(), 0); // If any individual partition is large enough, we do spill. sorter->restart(); - ASSERT_EQ(sorter->numSpills(), 0); + ASSERT_EQ(sorter->stats().spilledRanges(), 0); output = sort({ {1}, {5}, @@ -1619,17 +1661,17 @@ TEST_F(BoundedSorterTest, CompoundSpill) { {7}, }); assertSorted(output); - ASSERT_EQ(sorter->numSpills(), 1); + ASSERT_EQ(sorter->stats().spilledRanges(), 1); // If later partitions are small again, they don't spill. sorter->restart(); - ASSERT_EQ(sorter->numSpills(), 1); + ASSERT_EQ(sorter->stats().spilledRanges(), 1); output = sort({ {11}, {17}, }); assertSorted(output); - ASSERT_EQ(sorter->numSpills(), 1); + ASSERT_EQ(sorter->stats().spilledRanges(), 1); } } // namespace |