summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShin Yee Tan <shinyee.tan@mongodb.com>2022-07-15 19:02:18 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-07-15 20:23:32 +0000
commitdc6803c67067003e6575fdd57e7c387fbcb8b23b (patch)
tree1fd3302e8cf77043f8a641f255bf6587b023b7dc
parenta2c6e80b303962a370a6a05b2fdde3a6d31a0f61 (diff)
downloadmongo-dc6803c67067003e6575fdd57e7c387fbcb8b23b.tar.gz
SERVER-60455 serverStatus metrics for external sorter
-rw-r--r--jstests/noPassthrough/column_store_index_load.js37
-rw-r--r--jstests/noPassthrough/serverstatus_indexbulkbuilder.js47
-rw-r--r--src/mongo/db/exec/SConscript1
-rw-r--r--src/mongo/db/exec/sbe/SConscript2
-rw-r--r--src/mongo/db/exec/sbe/stages/sort.cpp4
-rw-r--r--src/mongo/db/exec/sort_executor.h2
-rw-r--r--src/mongo/db/index/SConscript3
-rw-r--r--src/mongo/db/index/column_store_sorter.cpp16
-rw-r--r--src/mongo/db/index/column_store_sorter.h14
-rw-r--r--src/mongo/db/index/column_store_sorter_test.cpp10
-rw-r--r--src/mongo/db/index/columns_access_method.cpp6
-rw-r--r--src/mongo/db/index/index_access_method.cpp13
-rw-r--r--src/mongo/db/index/index_access_method.h1
-rw-r--r--src/mongo/db/pipeline/SConscript1
-rw-r--r--src/mongo/db/pipeline/document_source_bucket_auto.cpp2
-rw-r--r--src/mongo/db/pipeline/document_source_sort.cpp4
-rw-r--r--src/mongo/db/sorter/SConscript9
-rw-r--r--src/mongo/db/sorter/sorter.cpp34
-rw-r--r--src/mongo/db/sorter/sorter.h55
-rw-r--r--src/mongo/db/sorter/sorter_stats.cpp59
-rw-r--r--src/mongo/db/sorter/sorter_stats.h78
-rw-r--r--src/mongo/db/sorter/sorter_stats_test.cpp68
-rw-r--r--src/mongo/db/sorter/sorter_test.cpp154
23 files changed, 485 insertions, 135 deletions
diff --git a/jstests/noPassthrough/column_store_index_load.js b/jstests/noPassthrough/column_store_index_load.js
index 757a2eeb73d..92baccbe06e 100644
--- a/jstests/noPassthrough/column_store_index_load.js
+++ b/jstests/noPassthrough/column_store_index_load.js
@@ -79,6 +79,12 @@ const testProjection = {
"bar.baz": 1,
};
+const maxMemUsageBytes = 20000;
+const numDocs = testDocs.length;
+const approxDocSize = 800;
+const approxMemoryUsage = numDocs * approxDocSize;
+const expectedSpilledRanges = Math.ceil(approxMemoryUsage / maxMemUsageBytes);
+
// The test query would normally not qualify for a column store index plan, because it projects a
// large number of fields. We raise the limit on the number of fields to allow column store plans
// for the purposes of this test.
@@ -125,7 +131,9 @@ assert.docEq({
count: NumberLong(1),
resumed: NumberLong(0),
filesOpenedForExternalSort: NumberLong(0),
- filesClosedForExternalSort: NumberLong(0)
+ filesClosedForExternalSort: NumberLong(0),
+ spilledRanges: NumberLong(0),
+ bytesSpilled: NumberLong(0),
},
statsAfterInMemoryBuild.indexBulkBuilder);
@@ -135,19 +143,30 @@ bulkLoadExternalColl.drop();
assert.commandWorked(db.adminCommand({
configureFailPoint: "constrainMemoryForBulkBuild",
mode: "alwaysOn",
- data: {maxBytes: 20000}
+ data: {maxBytes: maxMemUsageBytes},
}));
loadDocs(bulkLoadExternalColl, testDocs);
assert.commandWorked(bulkLoadExternalColl.createIndex({"$**": "columnstore"}));
const statsAfterExternalLoad = assert.commandWorked(db.runCommand({serverStatus: 1}));
-assert.docEq({
- count: NumberLong(2),
- resumed: NumberLong(0),
- filesOpenedForExternalSort: NumberLong(1),
- filesClosedForExternalSort: NumberLong(1)
-},
- statsAfterExternalLoad.indexBulkBuilder);
+let indexBulkBuilderSection = statsAfterExternalLoad.indexBulkBuilder;
+assert.eq(indexBulkBuilderSection.count, 2, tojson(indexBulkBuilderSection));
+assert.eq(indexBulkBuilderSection.resumed, 0, tojson(indexBulkBuilderSection));
+assert.eq(indexBulkBuilderSection.filesOpenedForExternalSort, 1, tojson(indexBulkBuilderSection));
+assert.eq(indexBulkBuilderSection.filesClosedForExternalSort, 1, tojson(indexBulkBuilderSection));
+// Note: The number of spills in the external sorter depends on the size of C++ data structures,
+// which can be different between architectures. The test allows a range of reasonable values.
+assert.between(expectedSpilledRanges - 1,
+ indexBulkBuilderSection.spilledRanges,
+ expectedSpilledRanges + 1,
+ tojson(indexBulkBuilderSection),
+ true);
+// We can only approximate the memory usage and bytes that will be spilled.
+assert.between(0,
+ indexBulkBuilderSection.bytesSpilled,
+ approxMemoryUsage,
+ tojson(indexBulkBuilderSection),
+ true);
// Perfom the online load.
onlineLoadColl.drop();
diff --git a/jstests/noPassthrough/serverstatus_indexbulkbuilder.js b/jstests/noPassthrough/serverstatus_indexbulkbuilder.js
index 74a3baa0c27..0a3f2d77ff0 100644
--- a/jstests/noPassthrough/serverstatus_indexbulkbuilder.js
+++ b/jstests/noPassthrough/serverstatus_indexbulkbuilder.js
@@ -12,9 +12,15 @@
load('jstests/noPassthrough/libs/index_build.js');
+const maxMemUsageMegabytes = 50;
+const numDocs = 10;
+const fieldSize = 10 * 1024 * 1024;
+const approxMemoryUsage = numDocs * fieldSize;
+let expectedSpilledRanges = approxMemoryUsage / (maxMemUsageMegabytes * 1024 * 1024);
+
const replSet = new ReplSetTest({
nodes: 1,
- nodeOptions: {setParameter: {maxIndexBuildMemoryUsageMegabytes: 50}},
+ nodeOptions: {setParameter: {maxIndexBuildMemoryUsageMegabytes: maxMemUsageMegabytes}},
});
replSet.startSet();
replSet.initiate();
@@ -23,10 +29,10 @@ let primary = replSet.getPrimary();
let testDB = primary.getDB('test');
let coll = testDB.getCollection('t');
-for (let i = 0; i < 10; i++) {
+for (let i = 0; i < numDocs; i++) {
assert.commandWorked(coll.insert({
_id: i,
- a: 'a'.repeat(10 * 1024 * 1024),
+ a: 'a'.repeat(fieldSize),
}));
}
@@ -41,6 +47,13 @@ assert.eq(indexBulkBuilderSection.count, 1, tojson(indexBulkBuilderSection));
assert.eq(indexBulkBuilderSection.resumed, 0, tojson(indexBulkBuilderSection));
assert.eq(indexBulkBuilderSection.filesOpenedForExternalSort, 1, tojson(indexBulkBuilderSection));
assert.eq(indexBulkBuilderSection.filesClosedForExternalSort, 1, tojson(indexBulkBuilderSection));
+assert.eq(
+ indexBulkBuilderSection.spilledRanges, expectedSpilledRanges, tojson(indexBulkBuilderSection));
+assert.between(0,
+ indexBulkBuilderSection.bytesSpilled,
+ approxMemoryUsage,
+ tojson(indexBulkBuilderSection),
+ true);
// Shut down server during an index to verify 'resumable' value on restart.
IndexBuildTest.pauseIndexBuilds(primary);
@@ -68,9 +81,16 @@ assert.eq(indexBulkBuilderSection.resumed, 1, tojson(indexBulkBuilderSection));
// and read it back on startup.
assert.eq(indexBulkBuilderSection.filesOpenedForExternalSort, 1, tojson(indexBulkBuilderSection));
assert.eq(indexBulkBuilderSection.filesClosedForExternalSort, 1, tojson(indexBulkBuilderSection));
+assert.eq(indexBulkBuilderSection.spilledRanges, 1, tojson(indexBulkBuilderSection));
+assert.between(0,
+ indexBulkBuilderSection.bytesSpilled,
+ approxMemoryUsage,
+ tojson(indexBulkBuilderSection),
+ true);
// Confirm that metrics are updated during initial sync.
-const newNode = replSet.add({setParameter: {maxIndexBuildMemoryUsageMegabytes: 50}});
+const newNode =
+ replSet.add({setParameter: {maxIndexBuildMemoryUsageMegabytes: maxMemUsageMegabytes}});
replSet.reInitiate();
replSet.waitForState(newNode, ReplSetTest.State.SECONDARY);
replSet.awaitReplication();
@@ -85,6 +105,17 @@ indexBulkBuilderSection = newNodeTestDB.serverStatus().indexBulkBuilder;
assert.gte(indexBulkBuilderSection.count, 3, tojson(indexBulkBuilderSection));
assert.eq(indexBulkBuilderSection.filesOpenedForExternalSort, 1, tojson(indexBulkBuilderSection));
assert.eq(indexBulkBuilderSection.filesClosedForExternalSort, 1, tojson(indexBulkBuilderSection));
+// We try building two indexes for the test collection so the memory usage limit for each index
+// build during initial sync is the maxIndexBuildMemoryUsageMegabytes divided by the number of index
+// builds. We end up with half of the in-memory memory so we double the amount of spills expected.
+expectedSpilledRanges *= 2;
+assert.eq(
+ indexBulkBuilderSection.spilledRanges, expectedSpilledRanges, tojson(indexBulkBuilderSection));
+assert.between(0,
+ indexBulkBuilderSection.bytesSpilled,
+ approxMemoryUsage,
+ tojson(indexBulkBuilderSection),
+ true);
// Building multiple indexes in a single createIndex command increases count by the number of
// indexes requested.
@@ -99,6 +130,14 @@ assert.eq(indexBulkBuilderSection.count, 4, tojson(indexBulkBuilderSection));
assert.eq(indexBulkBuilderSection.resumed, 1, tojson(indexBulkBuilderSection));
assert.eq(indexBulkBuilderSection.filesOpenedForExternalSort, 2, tojson(indexBulkBuilderSection));
assert.eq(indexBulkBuilderSection.filesClosedForExternalSort, 2, tojson(indexBulkBuilderSection));
+expectedSpilledRanges += 2;
+assert.eq(
+ indexBulkBuilderSection.spilledRanges, expectedSpilledRanges, tojson(indexBulkBuilderSection));
+assert.between(0,
+ indexBulkBuilderSection.bytesSpilled,
+ approxMemoryUsage,
+ tojson(indexBulkBuilderSection),
+ true);
replSet.stopSet();
})();
diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript
index b2368e49830..992201118d8 100644
--- a/src/mongo/db/exec/SConscript
+++ b/src/mongo/db/exec/SConscript
@@ -84,6 +84,7 @@ sortExecutorEnv.Library(
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/sorter/sorter_idl',
+ '$BUILD_DIR/mongo/db/sorter/sorter_stats',
],
)
diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript
index 859cf09da2f..9805906a59c 100644
--- a/src/mongo/db/exec/sbe/SConscript
+++ b/src/mongo/db/exec/sbe/SConscript
@@ -61,6 +61,7 @@ sbeEnv.Library(
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/bson/dotted_path_support',
'$BUILD_DIR/mongo/db/sorter/sorter_idl',
+ '$BUILD_DIR/mongo/db/sorter/sorter_stats',
],
)
@@ -107,6 +108,7 @@ sbeEnv.Library(
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/bson/dotted_path_support',
'$BUILD_DIR/mongo/db/sorter/sorter_idl',
+ '$BUILD_DIR/mongo/db/sorter/sorter_stats',
'query_sbe',
'query_sbe_storage',
],
diff --git a/src/mongo/db/exec/sbe/stages/sort.cpp b/src/mongo/db/exec/sbe/stages/sort.cpp
index 0968b0bea68..6181229ff7e 100644
--- a/src/mongo/db/exec/sbe/stages/sort.cpp
+++ b/src/mongo/db/exec/sbe/stages/sort.cpp
@@ -215,11 +215,11 @@ void SortStage::open(bool reOpen) {
_specificStats.totalDataSizeBytes += _sorter->totalDataSizeSorted();
_mergeIt.reset(_sorter->done());
- _specificStats.spills += _sorter->numSpills();
+ _specificStats.spills += _sorter->stats().spilledRanges();
_specificStats.keysSorted += _sorter->numSorted();
auto& metricsCollector = ResourceConsumption::MetricsCollector::get(_opCtx);
metricsCollector.incrementKeysSorted(_sorter->numSorted());
- metricsCollector.incrementSorterSpills(_sorter->numSpills());
+ metricsCollector.incrementSorterSpills(_sorter->stats().spilledRanges());
_children[0]->close();
}
diff --git a/src/mongo/db/exec/sort_executor.h b/src/mongo/db/exec/sort_executor.h
index 6509c767e9a..8120f52caa9 100644
--- a/src/mongo/db/exec/sort_executor.h
+++ b/src/mongo/db/exec/sort_executor.h
@@ -139,7 +139,7 @@ public:
}
_output.reset(_sorter->done());
_stats.keysSorted += _sorter->numSorted();
- _stats.spills += _sorter->numSpills();
+ _stats.spills += _sorter->stats().spilledRanges();
_stats.totalDataSizeBytes += _sorter->totalDataSizeSorted();
_sorter.reset();
}
diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript
index f71d78da857..8ddebaefe07 100644
--- a/src/mongo/db/index/SConscript
+++ b/src/mongo/db/index/SConscript
@@ -105,6 +105,7 @@ iamEnv.Library(
'$BUILD_DIR/mongo/db/resumable_index_builds_idl',
'$BUILD_DIR/mongo/db/service_context',
'$BUILD_DIR/mongo/db/sorter/sorter_idl',
+ '$BUILD_DIR/mongo/db/sorter/sorter_stats',
'$BUILD_DIR/mongo/db/storage/encryption_hooks',
'$BUILD_DIR/mongo/db/storage/execution_context',
'$BUILD_DIR/mongo/db/storage/index_entry_comparison',
@@ -128,6 +129,7 @@ iamEnv.Library(
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/base',
'$BUILD_DIR/mongo/db/sorter/sorter_idl',
+ '$BUILD_DIR/mongo/db/sorter/sorter_stats',
'$BUILD_DIR/mongo/db/storage/encryption_hooks',
'$BUILD_DIR/mongo/db/storage/storage_options',
'$BUILD_DIR/mongo/s/is_mongos',
@@ -188,6 +190,7 @@ indexTestEnv.CppUnitTest(
'$BUILD_DIR/mongo/db/query/query_test_service_context',
'$BUILD_DIR/mongo/db/query/sort_pattern',
'$BUILD_DIR/mongo/db/record_id_helpers',
+ '$BUILD_DIR/mongo/db/sorter/sorter_stats',
'column_store_index',
'expression_params',
'key_generator',
diff --git a/src/mongo/db/index/column_store_sorter.cpp b/src/mongo/db/index/column_store_sorter.cpp
index ef82ad9bf13..af2c3c1411f 100644
--- a/src/mongo/db/index/column_store_sorter.cpp
+++ b/src/mongo/db/index/column_store_sorter.cpp
@@ -76,11 +76,13 @@ ColumnStoreSorter::Value ColumnStoreSorter::Value::deserializeForSorter(
ColumnStoreSorter::ColumnStoreSorter(size_t maxMemoryUsageBytes,
StringData dbName,
- SorterFileStats* stats)
- : _dbName(dbName.toString()),
- _stats(stats),
+ SorterFileStats* stats,
+ SorterTracker* tracker)
+ : SorterBase(tracker),
+ _dbName(dbName.toString()),
+ _fileStats(stats),
_maxMemoryUsageBytes(maxMemoryUsageBytes),
- _spillFile(std::make_shared<Sorter<Key, Value>::File>(pathForNewSpillFile(), _stats)) {}
+ _spillFile(std::make_shared<Sorter<Key, Value>::File>(pathForNewSpillFile(), _fileStats)) {}
void ColumnStoreSorter::add(PathView path, const RecordId& recordId, CellView cellContents) {
auto& cellListAtPath = _dataByPath[path];
@@ -123,9 +125,9 @@ void ColumnStoreSorter::spill() {
if (_dataByPath.empty()) {
return;
}
- ++_numSpills;
+ this->_stats.incrementSpilledRanges();
- SortedFileWriter<Key, Value> writer(makeSortOptions(_dbName, _stats), _spillFile, {});
+ SortedFileWriter<Key, Value> writer(makeSortOptions(_dbName, _fileStats), _spillFile, {});
// Cells loaded into memory are sorted by record id but not yet sorted by path. We perform that
// sort now, so that we can output cells sorted by (path, rid) for later consumption by our
@@ -212,7 +214,7 @@ ColumnStoreSorter::Iterator* ColumnStoreSorter::done() {
spill();
return SortIteratorInterface<Key, Value>::merge(
- _spilledFileIterators, makeSortOptions(_dbName, _stats), ComparisonForPathAndRid());
+ _spilledFileIterators, makeSortOptions(_dbName, _fileStats), ComparisonForPathAndRid());
}
/**
diff --git a/src/mongo/db/index/column_store_sorter.h b/src/mongo/db/index/column_store_sorter.h
index 56f203c5f64..9eed5e33538 100644
--- a/src/mongo/db/index/column_store_sorter.h
+++ b/src/mongo/db/index/column_store_sorter.h
@@ -50,16 +50,15 @@ namespace mongo {
* preferable to defer the cost of sorting to the end in order to avoid the cost of a binary tree
* traversal for each inserted cell.
*/
-class ColumnStoreSorter {
+class ColumnStoreSorter : public SorterBase {
public:
- ColumnStoreSorter(size_t maxMemoryUsageBytes, StringData dbName, SorterFileStats* stats);
+ ColumnStoreSorter(size_t maxMemoryUsageBytes,
+ StringData dbName,
+ SorterFileStats* stats,
+ SorterTracker* tracker = nullptr);
void add(PathView path, const RecordId& recordId, CellView cellContents);
- size_t numSpills() const {
- return _numSpills;
- }
-
struct Key {
PathView path;
RecordId recordId;
@@ -114,11 +113,10 @@ private:
Iterator* inMemoryIterator() const;
const std::string _dbName;
- SorterFileStats* _stats; // Unowned
+ SorterFileStats* _fileStats; // Unowned
const size_t _maxMemoryUsageBytes;
size_t _memUsed = 0;
- size_t _numSpills = 0;
/**
* Mapping from path name to the sorted list of (RecordId, Cell) pairs.
diff --git a/src/mongo/db/index/column_store_sorter_test.cpp b/src/mongo/db/index/column_store_sorter_test.cpp
index 85c1005efad..57534ed4b78 100644
--- a/src/mongo/db/index/column_store_sorter_test.cpp
+++ b/src/mongo/db/index/column_store_sorter_test.cpp
@@ -69,11 +69,11 @@ TEST(ColumnStoreSorter, SortTest) {
// We test two sorters: one that can perform the sort in memory and one that is constrained so
// that it must spill to disk.
- SorterFileStats statsForInMemorySorter;
+ SorterFileStats statsForInMemorySorter(nullptr);
auto inMemorySorter = std::make_unique<ColumnStoreSorter>(
1000000 /* maxMemoryUsageBytes */, "dbName", &statsForInMemorySorter);
- SorterFileStats statsForExternalSorter;
+ SorterFileStats statsForExternalSorter(nullptr);
auto externalSorter = std::make_unique<ColumnStoreSorter>(
500 /* maxMemoryUsageBytes */, "dbName", &statsForExternalSorter);
@@ -122,9 +122,9 @@ TEST(ColumnStoreSorter, SortTest) {
// Ensure that statistics for spills and file accesses are as expected.
// Note: The number of spills in the external sorter depends on the size of C++ data structures,
// which can be different between architectures. The test allows a range of reasonable values.
- ASSERT_EQ(0, inMemorySorter->numSpills());
- ASSERT_LTE(3, externalSorter->numSpills());
- ASSERT_GTE(5, externalSorter->numSpills());
+ ASSERT_EQ(0, inMemorySorter->stats().spilledRanges());
+ ASSERT_LTE(3, externalSorter->stats().spilledRanges());
+ ASSERT_GTE(5, externalSorter->stats().spilledRanges());
ASSERT_EQ(0, statsForInMemorySorter.opened.load());
ASSERT_EQ(0, statsForInMemorySorter.closed.load());
diff --git a/src/mongo/db/index/columns_access_method.cpp b/src/mongo/db/index/columns_access_method.cpp
index fca25a83669..b29ec20967a 100644
--- a/src/mongo/db/index/columns_access_method.cpp
+++ b/src/mongo/db/index/columns_access_method.cpp
@@ -108,7 +108,8 @@ private:
ColumnStoreAccessMethod::BulkBuilder::BulkBuilder(ColumnStoreAccessMethod* index,
size_t maxMemoryUsageBytes,
StringData dbName)
- : _columnsAccess(index), _sorter(maxMemoryUsageBytes, dbName, bulkBuilderFileStats()) {
+ : _columnsAccess(index),
+ _sorter(maxMemoryUsageBytes, dbName, bulkBuilderFileStats(), bulkBuilderTracker()) {
countNewBuildInStats();
}
@@ -116,7 +117,8 @@ ColumnStoreAccessMethod::BulkBuilder::BulkBuilder(ColumnStoreAccessMethod* index
size_t maxMemoryUsageBytes,
const IndexStateInfo& stateInfo,
StringData dbName)
- : _columnsAccess(index), _sorter(maxMemoryUsageBytes, dbName, bulkBuilderFileStats()) {
+ : _columnsAccess(index),
+ _sorter(maxMemoryUsageBytes, dbName, bulkBuilderFileStats(), bulkBuilderTracker()) {
countResumedBuildInStats();
// TODO SERVER-66925: Add this support.
tasserted(6548103, "No support for resuming interrupted columnstore index builds.");
diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp
index f9cd8f7c7c2..bd12a9add26 100644
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@@ -99,6 +99,8 @@ public:
builder.append("resumed", resumed.loadRelaxed());
builder.append("filesOpenedForExternalSort", sorterFileStats.opened.loadRelaxed());
builder.append("filesClosedForExternalSort", sorterFileStats.closed.loadRelaxed());
+ builder.append("spilledRanges", sorterTracker.spilledRanges.loadRelaxed());
+ builder.append("bytesSpilled", sorterTracker.bytesSpilled.loadRelaxed());
return builder.obj();
}
@@ -109,11 +111,15 @@ public:
// This value should not exceed 'count'.
AtomicWord<long long> resumed;
+ // Sorter statistics that are aggregate of all sorters.
+ SorterTracker sorterTracker;
+
// Number of times the external sorter opened/closed a file handle to spill data to disk.
// This pair of counters in aggregate indicate the number of open file handles used by
// the external sorter and may be useful in diagnosing situations where the process is
// close to exhausting this finite resource.
- SorterFileStats sorterFileStats;
+ SorterFileStats sorterFileStats = {&sorterTracker};
+
} indexBulkBuilderSSS;
/**
@@ -133,6 +139,7 @@ SortOptions makeSortOptions(size_t maxMemoryUsageBytes, StringData dbName, Sorte
.ExtSortAllowed()
.MaxMemoryUsageBytes(maxMemoryUsageBytes)
.FileStats(stats)
+ .Tracker(&indexBulkBuilderSSS.sorterTracker)
.DBName(dbName.toString());
}
@@ -622,6 +629,10 @@ SorterFileStats* IndexAccessMethod::BulkBuilder::bulkBuilderFileStats() {
return &indexBulkBuilderSSS.sorterFileStats;
}
+SorterTracker* IndexAccessMethod::BulkBuilder::bulkBuilderTracker() {
+ return &indexBulkBuilderSSS.sorterTracker;
+}
+
void IndexAccessMethod::BulkBuilder::yield(OperationContext* opCtx,
const Yieldable* yieldable,
const NamespaceString& ns) {
diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h
index 5136faed942..7190e31131a 100644
--- a/src/mongo/db/index/index_access_method.h
+++ b/src/mongo/db/index/index_access_method.h
@@ -225,6 +225,7 @@ public:
static void countNewBuildInStats();
static void countResumedBuildInStats();
static SorterFileStats* bulkBuilderFileStats();
+ static SorterTracker* bulkBuilderTracker();
/**
* Abandon the current snapshot and release then reacquire locks. Tests that target the
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index ee68344f22c..169a1e816bd 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -392,6 +392,7 @@ pipelineEnv.Library(
'$BUILD_DIR/mongo/db/query/projection_ast',
'$BUILD_DIR/mongo/db/repl/image_collection_entry',
'$BUILD_DIR/mongo/db/sorter/sorter_idl',
+ '$BUILD_DIR/mongo/db/sorter/sorter_stats',
'$BUILD_DIR/mongo/db/timeseries/timeseries_conversion_util',
'$BUILD_DIR/mongo/db/timeseries/timeseries_options',
'$BUILD_DIR/mongo/rpc/command_status',
diff --git a/src/mongo/db/pipeline/document_source_bucket_auto.cpp b/src/mongo/db/pipeline/document_source_bucket_auto.cpp
index a2d00ad2472..bb16662791f 100644
--- a/src/mongo/db/pipeline/document_source_bucket_auto.cpp
+++ b/src/mongo/db/pipeline/document_source_bucket_auto.cpp
@@ -221,7 +221,7 @@ void DocumentSourceBucketAuto::initializeBucketIteration() {
auto& metricsCollector = ResourceConsumption::MetricsCollector::get(pExpCtx->opCtx);
metricsCollector.incrementKeysSorted(_sorter->numSorted());
- metricsCollector.incrementSorterSpills(_sorter->numSpills());
+ metricsCollector.incrementSorterSpills(_sorter->stats().spilledRanges());
_sorter.reset();
diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp
index 62436bffa84..ce416165e66 100644
--- a/src/mongo/db/pipeline/document_source_sort.cpp
+++ b/src/mongo/db/pipeline/document_source_sort.cpp
@@ -300,8 +300,8 @@ void DocumentSourceSort::serializeToArray(
if (explain >= ExplainOptions::Verbosity::kExecStats) {
mutDoc["totalDataSizeSortedBytesEstimate"] =
Value(static_cast<long long>(_timeSorter->totalDataSizeBytes()));
- mutDoc["usedDisk"] = Value(_timeSorter->numSpills() > 0);
- mutDoc["spills"] = Value(static_cast<long long>(_timeSorter->numSpills()));
+ mutDoc["usedDisk"] = Value(_timeSorter->stats().spilledRanges() > 0);
+ mutDoc["spills"] = Value(static_cast<long long>(_timeSorter->stats().spilledRanges()));
}
array.push_back(Value{mutDoc.freeze()});
diff --git a/src/mongo/db/sorter/SConscript b/src/mongo/db/sorter/SConscript
index fc796b9319f..7e9be557984 100644
--- a/src/mongo/db/sorter/SConscript
+++ b/src/mongo/db/sorter/SConscript
@@ -9,6 +9,7 @@ sorterEnv.CppUnitTest(
target='db_sorter_test',
source=[
'sorter_test.cpp',
+ 'sorter_stats_test.cpp',
],
LIBDEPS=[
'$BUILD_DIR/mongo/db/exec/document_value/document_value',
@@ -18,9 +19,16 @@ sorterEnv.CppUnitTest(
'$BUILD_DIR/mongo/s/is_mongos',
'$BUILD_DIR/third_party/shim_snappy',
'sorter_idl',
+ 'sorter_stats',
],
)
+sorterEnv.Library(target='sorter_stats', source=[
+ 'sorter_stats.cpp',
+], LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/db/commands/server_status_core',
+])
+
env.Library(
target='sorter_idl',
source=[
@@ -28,6 +36,7 @@ env.Library(
],
LIBDEPS=[
"$BUILD_DIR/mongo/base",
+ '$BUILD_DIR/mongo/db/sorter/sorter_stats',
'$BUILD_DIR/mongo/idl/idl_parser',
],
)
diff --git a/src/mongo/db/sorter/sorter.cpp b/src/mongo/db/sorter/sorter.cpp
index ecb9e10118e..ec6a0a0e4cf 100644
--- a/src/mongo/db/sorter/sorter.cpp
+++ b/src/mongo/db/sorter/sorter.cpp
@@ -597,7 +597,6 @@ protected:
* following:
*
* {1, 2, 3, 4, 5}
- * {12, 3, 4, 5}
* {12, 34, 5}
* {1234, 5}
*/
@@ -647,7 +646,7 @@ protected:
auto iteratorPtr = std::shared_ptr<Iterator>(writer.done());
mergeIterator->closeSource();
mergedIterators.push_back(std::move(iteratorPtr));
- this->_numSpills++;
+ this->_stats.incrementSpilledRanges();
}
LOGV2_DEBUG(6033101,
@@ -708,7 +707,7 @@ public:
this->_opts.dbName,
range.getChecksum());
});
- this->_numSpills = this->_iters.size();
+ this->_stats.setSpilledRanges(this->_iters.size());
}
void add(const Key& key, const Value& val) {
@@ -800,7 +799,7 @@ private:
_memUsed = 0;
- this->_numSpills++;
+ this->_stats.incrementSpilledRanges();
}
bool _done = false;
@@ -817,7 +816,7 @@ public:
typedef SortIteratorInterface<Key, Value> Iterator;
LimitOneSorter(const SortOptions& opts, const Comparator& comp)
- : _comp(comp), _haveData(false) {
+ : Sorter<Key, Value>(opts), _comp(comp), _haveData(false) {
verify(opts.limit == 1);
}
@@ -1084,7 +1083,7 @@ private:
_memUsed = 0;
- this->_numSpills++;
+ this->_stats.incrementSpilledRanges();
}
bool _done = false;
@@ -1106,14 +1105,16 @@ private:
template <typename Key, typename Value>
Sorter<Key, Value>::Sorter(const SortOptions& opts)
- : _opts(opts),
+ : SorterBase(opts.sorterTracker),
+ _opts(opts),
_file(opts.extSortAllowed ? std::make_shared<Sorter<Key, Value>::File>(
opts.tempDir + "/" + nextFileName(), opts.sorterFileStats)
: nullptr) {}
template <typename Key, typename Value>
Sorter<Key, Value>::Sorter(const SortOptions& opts, const std::string& fileName)
- : _opts(opts),
+ : SorterBase(opts.sorterTracker),
+ _opts(opts),
_file(std::make_shared<Sorter<Key, Value>::File>(opts.tempDir + "/" + fileName,
opts.sorterFileStats)) {
invariant(opts.extSortAllowed);
@@ -1136,6 +1137,15 @@ typename Sorter<Key, Value>::PersistedState Sorter<Key, Value>::persistDataForSh
}
template <typename Key, typename Value>
+Sorter<Key, Value>::File::File(std::string path, SorterFileStats* stats)
+ : _path(std::move(path)), _stats(stats) {
+ invariant(!_path.empty());
+ if (_stats && boost::filesystem::exists(_path) && boost::filesystem::is_regular_file(_path)) {
+ _stats->addSpilledDataSize(boost::filesystem::file_size(_path));
+ }
+}
+
+template <typename Key, typename Value>
Sorter<Key, Value>::File::~File() {
if (_stats && _file.is_open()) {
_stats->closed.addAndFetch(1);
@@ -1196,6 +1206,9 @@ void Sorter<Key, Value>::File::write(const char* data, std::streamsize size) {
try {
_file.write(data, size);
_offset += size;
+ if (_stats) {
+ this->_stats->addSpilledDataSize(size);
+ };
} catch (const std::system_error& ex) {
if (ex.code() == std::errc::no_space_on_device) {
uasserted(ErrorCodes::OutOfDiskSpace,
@@ -1351,7 +1364,8 @@ BoundedSorter<Key, Value, Comparator, BoundMaker>::BoundedSorter(const SortOptio
Comparator comp,
BoundMaker makeBound,
bool checkInput)
- : compare(comp),
+ : BoundedSorterInterface<Key, Value>(opts),
+ compare(comp),
makeBound(makeBound),
_comparePairs{compare},
_checkInput(checkInput),
@@ -1509,7 +1523,7 @@ void BoundedSorter<Key, Value, Comparator, BoundMaker>::_spill() {
<< " bytes, but did not opt in to external sorting.",
_opts.extSortAllowed);
- ++_numSpills;
+ this->_stats.incrementSpilledRanges();
// Write out all the values from the heap in sorted order.
SortedFileWriter<Key, Value> writer(_opts, _file, {});
diff --git a/src/mongo/db/sorter/sorter.h b/src/mongo/db/sorter/sorter.h
index 85c0f279ac2..159ac89ce4a 100644
--- a/src/mongo/db/sorter/sorter.h
+++ b/src/mongo/db/sorter/sorter.h
@@ -43,6 +43,7 @@
#include "mongo/bson/util/builder.h"
#include "mongo/db/exec/document_value/document.h"
#include "mongo/db/sorter/sorter_gen.h"
+#include "mongo/db/sorter/sorter_stats.h"
#include "mongo/platform/atomic_word.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/bufreader.h"
@@ -96,14 +97,6 @@
namespace mongo {
/**
- * For collecting file usage metrics.
- */
-struct SorterFileStats {
- AtomicWord<long long> opened;
- AtomicWord<long long> closed;
-};
-
-/**
* Runtime options that control the Sorter's behavior
*/
struct SortOptions {
@@ -130,6 +123,9 @@ struct SortOptions {
// If set, allows us to observe Sorter file handle usage.
SorterFileStats* sorterFileStats;
+ // If set, allows us to observe aggregate Sorter behaviors.
+ SorterTracker* sorterTracker;
+
// If set to true and sorted data fits into memory, sorted data will be moved into iterator
// instead of copying.
bool moveSortedDataIntoIterator;
@@ -139,6 +135,7 @@ struct SortOptions {
maxMemoryUsageBytes(64 * 1024 * 1024),
extSortAllowed(false),
sorterFileStats(nullptr),
+ sorterTracker(nullptr),
moveSortedDataIntoIterator(false) {}
// Fluent API to support expressions like SortOptions().Limit(1000).ExtSortAllowed(true)
@@ -173,6 +170,11 @@ struct SortOptions {
return *this;
}
+ SortOptions& Tracker(SorterTracker* newSorterTracker) {
+ sorterTracker = newSorterTracker;
+ return *this;
+ }
+
SortOptions& MoveSortedDataIntoIterator(bool newMoveSortedDataIntoIterator = true) {
moveSortedDataIntoIterator = newMoveSortedDataIntoIterator;
return *this;
@@ -241,6 +243,18 @@ protected:
SortIteratorInterface() {} // can only be constructed as a base
};
+class SorterBase {
+public:
+ SorterBase(SorterTracker* sorterTracker = nullptr) : _stats(sorterTracker) {}
+
+ const SorterStats& stats() const {
+ return _stats;
+ }
+
+protected:
+ SorterStats _stats;
+};
+
/**
* This is the way to input data to the sorting framework.
*
@@ -257,7 +271,7 @@ protected:
* nextFileName() for example.
*/
template <typename Key, typename Value>
-class Sorter {
+class Sorter : public SorterBase {
Sorter(const Sorter&) = delete;
Sorter& operator=(const Sorter&) = delete;
@@ -279,10 +293,7 @@ public:
*/
class File {
public:
- File(std::string path, SorterFileStats* stats = nullptr)
- : _path(std::move(path)), _stats(stats) {
- invariant(!_path.empty());
- }
+ File(std::string path, SorterFileStats* stats = nullptr);
~File();
@@ -365,10 +376,6 @@ public:
virtual ~Sorter() {}
- size_t numSpills() const {
- return _numSpills;
- }
-
size_t numSorted() const {
return _numSorted;
}
@@ -380,8 +387,6 @@ public:
PersistedState persistDataForShutdown();
protected:
- Sorter() {} // can only be constructed as a base
-
virtual void spill() = 0;
size_t _numSorted = 0; // Keeps track of the number of keys sorted.
@@ -391,14 +396,16 @@ protected:
std::shared_ptr<File> _file;
- std::size_t _numSpills = 0; // Keeps track of the number of spills that have happened.
std::vector<std::shared_ptr<Iterator>> _iters; // Data that has already been spilled.
};
template <typename Key, typename Value>
-class BoundedSorterInterface {
+class BoundedSorterInterface : public SorterBase {
+
public:
+ BoundedSorterInterface(const SortOptions& opts) : SorterBase(opts.sorterTracker) {}
+
virtual ~BoundedSorterInterface() {}
// Feed one item of input to the sorter.
@@ -439,7 +446,6 @@ public:
virtual Document serializeBound() const = 0;
virtual size_t totalDataSizeBytes() const = 0;
- virtual size_t numSpills() const = 0;
virtual size_t limit() const = 0;
// By default, uassert that the input meets our assumptions of being almost-sorted.
@@ -525,10 +531,6 @@ public:
return _totalDataSizeSorted;
}
- size_t numSpills() const {
- return _numSpills;
- }
-
size_t limit() const {
return _opts.limit;
}
@@ -563,7 +565,6 @@ private:
std::shared_ptr<typename Sorter<Key, Value>::File> _file;
std::shared_ptr<SpillIterator> _spillIter;
- std::size_t _numSpills = 0; // Keeps track of the number of spills that have happened.
boost::optional<Key> _min;
bool _done = false;
diff --git a/src/mongo/db/sorter/sorter_stats.cpp b/src/mongo/db/sorter/sorter_stats.cpp
new file mode 100644
index 00000000000..ec68b02efcd
--- /dev/null
+++ b/src/mongo/db/sorter/sorter_stats.cpp
@@ -0,0 +1,59 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/sorter/sorter_stats.h"
+
+#include "mongo/util/assert_util_core.h"
+
+namespace mongo {
+SorterStats::SorterStats(SorterTracker* sorterTracker) : _sorterTracker(sorterTracker){};
+
+void SorterStats::incrementSpilledRanges() {
+ _spilledRanges++;
+ if (_sorterTracker) {
+ _sorterTracker->spilledRanges.fetchAndAdd(1);
+ }
+}
+
+void SorterStats::setSpilledRanges(long long spills) {
+ invariant(_spilledRanges == 0);
+ _spilledRanges = spills;
+ if (_sorterTracker) {
+ _sorterTracker->spilledRanges.fetchAndAdd(spills);
+ }
+}
+
+SorterFileStats::SorterFileStats(SorterTracker* sorterTracker) : _sorterTracker(sorterTracker){};
+
+void SorterFileStats::addSpilledDataSize(long long data) {
+ if (_sorterTracker) {
+ _sorterTracker->bytesSpilled.fetchAndAdd(data);
+ }
+}
+} // namespace mongo
diff --git a/src/mongo/db/sorter/sorter_stats.h b/src/mongo/db/sorter/sorter_stats.h
new file mode 100644
index 00000000000..05bfa8998d4
--- /dev/null
+++ b/src/mongo/db/sorter/sorter_stats.h
@@ -0,0 +1,78 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/atomic_word.h"
+
+namespace mongo {
+
+struct SorterTracker {
+ AtomicWord<long long> spilledRanges;
+ AtomicWord<long long> bytesSpilled;
+};
+
+/**
+ * For collecting file usage metrics.
+ */
+class SorterFileStats {
+public:
+ SorterFileStats(SorterTracker* sorterTracker);
+
+ void addSpilledDataSize(long long data);
+
+ AtomicWord<long long> opened;
+ AtomicWord<long long> closed;
+
+private:
+ SorterTracker* _sorterTracker;
+};
+
+class SorterStats {
+public:
+ SorterStats(SorterTracker* sorterTracker);
+
+ void incrementSpilledRanges();
+
+ /**
+ * Sets the number of spilled ranges to the specified amount. Cannot be called after
+ * incrementSpilledRanges.
+ */
+ void setSpilledRanges(long long spills);
+
+ long long spilledRanges() const {
+ return _spilledRanges;
+ }
+
+private:
+ long long _spilledRanges = 0;
+
+ // All SorterStats update the SorterTracker to report sorter statistics for the
+ // server.
+ SorterTracker* _sorterTracker;
+};
+} // namespace mongo
diff --git a/src/mongo/db/sorter/sorter_stats_test.cpp b/src/mongo/db/sorter/sorter_stats_test.cpp
new file mode 100644
index 00000000000..e94d9d4efb8
--- /dev/null
+++ b/src/mongo/db/sorter/sorter_stats_test.cpp
@@ -0,0 +1,68 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/sorter/sorter_stats.h"
+#include "mongo/unittest/death_test.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+namespace {
+TEST(SorterStatsTest, Basic) {
+ SorterTracker sorterTracker;
+ SorterStats sorterStats(&sorterTracker);
+
+ sorterStats.incrementSpilledRanges();
+ ASSERT_EQ(sorterTracker.spilledRanges.load(), 1);
+}
+
+TEST(SorterStatsTest, MultipleSorters) {
+ SorterTracker sorterTracker;
+ SorterStats sorterStats1(&sorterTracker);
+ SorterStats sorterStats2(&sorterTracker);
+ SorterStats sorterStats3(&sorterTracker);
+
+ sorterStats1.incrementSpilledRanges();
+ sorterStats2.incrementSpilledRanges();
+ ASSERT_EQ(sorterTracker.spilledRanges.load(), 2);
+
+ sorterStats3.setSpilledRanges(10);
+ ASSERT_EQ(sorterTracker.spilledRanges.load(), 12);
+}
+
+DEATH_TEST(SorterStatsTest, SetNonZeroNumSpilledRanges, "invariant") {
+ SorterTracker sorterTracker;
+ SorterStats sorterStats(&sorterTracker);
+
+ sorterStats.incrementSpilledRanges();
+ ASSERT_EQ(sorterTracker.spilledRanges.load(), 1);
+
+ sorterStats.setSpilledRanges(10);
+}
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/sorter/sorter_test.cpp b/src/mongo/db/sorter/sorter_test.cpp
index aa5a8dc4078..0394e1f0fa0 100644
--- a/src/mongo/db/sorter/sorter_test.cpp
+++ b/src/mongo/db/sorter/sorter_test.cpp
@@ -331,41 +331,55 @@ class SortedFileWriterAndFileIteratorTests {
public:
void run() {
unittest::TempDir tempDir("sortedFileWriterTests");
- SorterFileStats sorterFileStats;
+ SorterTracker sorterTracker;
+ SorterFileStats sorterFileStats(&sorterTracker);
const SortOptions opts = SortOptions().TempDir(tempDir.path()).FileStats(&sorterFileStats);
- auto makeFile = [&] {
- return std::make_shared<Sorter<IntWrapper, IntWrapper>::File>(
- opts.tempDir + "/" + nextFileName(), opts.sorterFileStats);
- };
- { // small
- SortedFileWriter<IntWrapper, IntWrapper> sorter(opts, makeFile());
- sorter.addAlreadySorted(0, 0);
- sorter.addAlreadySorted(1, -1);
- sorter.addAlreadySorted(2, -2);
- sorter.addAlreadySorted(3, -3);
- sorter.addAlreadySorted(4, -4);
- ASSERT_ITERATORS_EQUIVALENT(std::shared_ptr<IWIterator>(sorter.done()),
- std::make_shared<IntIterator>(0, 5));
- }
+ int currentFileSize = 0;
+
+ // small
+ currentFileSize = _appendToFile(&opts, currentFileSize, 5);
ASSERT_EQ(sorterFileStats.opened.load(), 1);
ASSERT_EQ(sorterFileStats.closed.load(), 1);
+ ASSERT_LTE(sorterTracker.bytesSpilled.load(), currentFileSize);
- { // big
- SortedFileWriter<IntWrapper, IntWrapper> sorter(opts, makeFile());
- for (int i = 0; i < 10 * 1000 * 1000; i++)
- sorter.addAlreadySorted(i, -i);
-
- ASSERT_ITERATORS_EQUIVALENT(std::shared_ptr<IWIterator>(sorter.done()),
- std::make_shared<IntIterator>(0, 10 * 1000 * 1000));
- }
+ // big
+ currentFileSize = _appendToFile(&opts, currentFileSize, 10 * 1000 * 1000);
ASSERT_EQ(sorterFileStats.opened.load(), 2);
ASSERT_EQ(sorterFileStats.closed.load(), 2);
+ ASSERT_LTE(sorterTracker.bytesSpilled.load(), currentFileSize);
ASSERT(boost::filesystem::is_empty(tempDir.path()));
}
+
+private:
+ int _appendToFile(const SortOptions* opts, int currentFileSize, int range) {
+ auto makeFile = [&] {
+ return std::make_shared<Sorter<IntWrapper, IntWrapper>::File>(
+ opts->tempDir + "/" + nextFileName(), opts->sorterFileStats);
+ };
+
+ int currentBufSize = 0;
+ SortedFileWriter<IntWrapper, IntWrapper> sorter(*opts, makeFile());
+ for (int i = 0; i < range; ++i) {
+ sorter.addAlreadySorted(i, -i);
+ currentBufSize += sizeof(i) + sizeof(-i);
+
+ if (currentBufSize > static_cast<int>(kSortedFileBufferSize)) {
+ // File size only increases if buffer size exceeds limit and spills. Each spill
+ // includes the buffer and the size of the spill.
+ currentFileSize += currentBufSize + sizeof(uint32_t);
+ currentBufSize = 0;
+ }
+ }
+ ASSERT_ITERATORS_EQUIVALENT(std::shared_ptr<IWIterator>(sorter.done()),
+ std::make_shared<IntIterator>(0, range));
+ // Anything left in-memory is spilled to disk when sorter.done().
+ currentFileSize += currentBufSize + sizeof(uint32_t);
+ return currentFileSize;
+ }
};
@@ -448,7 +462,9 @@ public:
void run() {
unittest::TempDir tempDir("sorterTests");
- const SortOptions opts = SortOptions().TempDir(tempDir.path()).ExtSortAllowed();
+ SorterTracker sorterTracker;
+ const SortOptions opts =
+ SortOptions().TempDir(tempDir.path()).ExtSortAllowed().Tracker(&sorterTracker);
{ // test empty (no limit)
ASSERT_ITERATORS_EQUIVALENT(done(makeSorter(opts).get()),
@@ -582,7 +598,7 @@ public:
return 0;
}
- virtual size_t correctNumSpills() const {
+ virtual size_t correctSpilledRanges() const {
return 0;
}
@@ -606,13 +622,13 @@ private:
if (numRanges == 0)
return;
- auto numSpillsOccurred = correctNumSpills();
+ auto numSpilledRangesOccurred = correctSpilledRanges();
auto state = sorter->persistDataForShutdown();
if (opts.extSortAllowed) {
ASSERT_NE(state.fileName, "");
}
ASSERT_EQ(state.ranges.size(), numRanges);
- ASSERT_EQ(sorter->numSpills(), numSpillsOccurred);
+ ASSERT_EQ(sorter->stats().spilledRanges(), numSpilledRangesOccurred);
}
};
@@ -712,7 +728,7 @@ public:
static_cast<std::size_t>(2));
}
- size_t correctNumSpills() const override {
+ size_t correctSpilledRanges() const override {
// We add 1 to the calculation since the call to persistDataForShutdown() spills the
// remaining in-memory Sorter data to disk, adding one extra range.
std::size_t spillsToMerge = NUM_ITEMS * sizeof(IWPair) / MEM_LIMIT + 1;
@@ -857,11 +873,12 @@ DEATH_TEST_F(SorterMakeFromExistingRangesTest, EmptyFileName, "!fileName.empty()
TEST_F(SorterMakeFromExistingRangesTest, SkipFileCheckingOnEmptyRanges) {
auto fileName = "unused_sorter_file";
- auto opts = SortOptions().ExtSortAllowed().TempDir("unused_temp_dir");
+ SorterTracker sorterTracker;
+ auto opts = SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").Tracker(&sorterTracker);
auto sorter = std::unique_ptr<IWSorter>(
IWSorter::makeFromExistingRanges(fileName, {}, opts, IWComparator(ASC)));
- ASSERT_EQ(0, sorter->numSpills());
+ ASSERT_EQ(0, sorter->stats().spilledRanges());
auto iter = std::unique_ptr<IWIterator>(sorter->done());
ASSERT_EQ(0, sorter->numSorted());
@@ -907,12 +924,13 @@ TEST_F(SorterMakeFromExistingRangesTest, CorruptedFile) {
ofs << "invalid sorter data";
}
auto fileName = tempFilePath.filename().string();
- auto opts = SortOptions().ExtSortAllowed().TempDir(tempDir.path());
+ SorterTracker sorterTracker;
+ auto opts = SortOptions().ExtSortAllowed().TempDir(tempDir.path()).Tracker(&sorterTracker);
auto sorter = std::unique_ptr<IWSorter>(
IWSorter::makeFromExistingRanges(fileName, makeSampleRanges(), opts, IWComparator(ASC)));
// The number of spills is set when NoLimitSorter is constructed from existing ranges.
- ASSERT_EQ(makeSampleRanges().size(), sorter->numSpills());
+ ASSERT_EQ(makeSampleRanges().size(), sorter->stats().spilledRanges());
ASSERT_EQ(0, sorter->numSorted());
// 16817 - error reading file.
@@ -921,11 +939,13 @@ TEST_F(SorterMakeFromExistingRangesTest, CorruptedFile) {
TEST_F(SorterMakeFromExistingRangesTest, RoundTrip) {
unittest::TempDir tempDir(_agent.getSuiteName() + "_" + _agent.getTestName());
+ SorterTracker sorterTracker;
auto opts = SortOptions()
.ExtSortAllowed()
.TempDir(tempDir.path())
- .MaxMemoryUsageBytes(sizeof(IWSorter::Data));
+ .MaxMemoryUsageBytes(sizeof(IWSorter::Data))
+ .Tracker(&sorterTracker);
IWPair pairInsertedBeforeShutdown(1, 100);
@@ -949,7 +969,7 @@ TEST_F(SorterMakeFromExistingRangesTest, RoundTrip) {
IWSorter::makeFromExistingRanges(state.fileName, state.ranges, opts, IWComparator(ASC)));
// The number of spills is set when NoLimitSorter is constructed from existing ranges.
- ASSERT_EQ(state.ranges.size(), sorter->numSpills());
+ ASSERT_EQ(state.ranges.size(), sorter->stats().spilledRanges());
// Ensure that the restored sorter can accept additional data.
IWPair pairInsertedAfterStartup(2, 200);
@@ -1188,8 +1208,12 @@ TEST_F(BoundedSorterTest, MemoryLimitsNoExtSortAllowed) {
}
TEST_F(BoundedSorterTest, SpillSorted) {
- auto options =
- SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(16);
+ SorterTracker sorterTracker;
+ auto options = SortOptions()
+ .ExtSortAllowed()
+ .TempDir("unused_temp_dir")
+ .MaxMemoryUsageBytes(16)
+ .Tracker(&sorterTracker);
sorter = makeAsc(options);
auto output = sort({
@@ -1205,7 +1229,7 @@ TEST_F(BoundedSorterTest, SpillSorted) {
});
assertSorted(output);
- ASSERT_EQ(sorter->numSpills(), 3);
+ ASSERT_EQ(sorter->stats().spilledRanges(), 3);
}
TEST_F(BoundedSorterTest, SpillSortedExceptOne) {
@@ -1227,12 +1251,16 @@ TEST_F(BoundedSorterTest, SpillSortedExceptOne) {
});
assertSorted(output);
- ASSERT_EQ(sorter->numSpills(), 3);
+ ASSERT_EQ(sorter->stats().spilledRanges(), 3);
}
TEST_F(BoundedSorterTest, SpillAlmostSorted) {
- auto options =
- SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(16);
+ SorterTracker sorterTracker;
+ auto options = SortOptions()
+ .ExtSortAllowed()
+ .TempDir("unused_temp_dir")
+ .MaxMemoryUsageBytes(16)
+ .Tracker(&sorterTracker);
sorter = makeAsc(options);
auto output = sort({
@@ -1250,7 +1278,7 @@ TEST_F(BoundedSorterTest, SpillAlmostSorted) {
});
assertSorted(output);
- ASSERT_EQ(sorter->numSpills(), 2);
+ ASSERT_EQ(sorter->stats().spilledRanges(), 2);
}
TEST_F(BoundedSorterTest, SpillWrongInput) {
@@ -1284,7 +1312,7 @@ TEST_F(BoundedSorterTest, SpillWrongInput) {
ASSERT_EQ(output[5].time, 15);
ASSERT_EQ(output[6].time, 16);
- ASSERT_EQ(sorter->numSpills(), 2);
+ ASSERT_EQ(sorter->stats().spilledRanges(), 2);
// Test that by default, bad input like this would be detected.
sorter = makeAsc(options);
@@ -1293,8 +1321,13 @@ TEST_F(BoundedSorterTest, SpillWrongInput) {
}
TEST_F(BoundedSorterTest, LimitNoSpill) {
- auto options =
- SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(40).Limit(2);
+ SorterTracker sorterTracker;
+ auto options = SortOptions()
+ .ExtSortAllowed()
+ .TempDir("unused_temp_dir")
+ .MaxMemoryUsageBytes(40)
+ .Tracker(&sorterTracker)
+ .Limit(2);
sorter = makeAsc(options);
auto output = sort(
@@ -1317,12 +1350,17 @@ TEST_F(BoundedSorterTest, LimitNoSpill) {
ASSERT_EQ(output[0].time, 0);
ASSERT_EQ(output[1].time, 3);
- ASSERT_EQ(sorter->numSpills(), 0);
+ ASSERT_EQ(sorter->stats().spilledRanges(), 0);
}
TEST_F(BoundedSorterTest, LimitSpill) {
- auto options =
- SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(40).Limit(3);
+ SorterTracker sorterTracker;
+ auto options = SortOptions()
+ .ExtSortAllowed()
+ .TempDir("unused_temp_dir")
+ .MaxMemoryUsageBytes(40)
+ .Tracker(&sorterTracker)
+ .Limit(3);
sorter = makeAsc(options);
auto output = sort(
@@ -1346,7 +1384,7 @@ TEST_F(BoundedSorterTest, LimitSpill) {
ASSERT_EQ(output[1].time, 3);
ASSERT_EQ(output[2].time, 10);
- ASSERT_EQ(sorter->numSpills(), 1);
+ ASSERT_EQ(sorter->stats().spilledRanges(), 1);
}
TEST_F(BoundedSorterTest, DescSorted) {
@@ -1591,22 +1629,26 @@ TEST_F(BoundedSorterTest, CompoundLimit) {
}
TEST_F(BoundedSorterTest, CompoundSpill) {
- auto options =
- SortOptions().ExtSortAllowed().TempDir("unused_temp_dir").MaxMemoryUsageBytes(40);
+ SorterTracker sorterTracker;
+ auto options = SortOptions()
+ .ExtSortAllowed()
+ .TempDir("unused_temp_dir")
+ .Tracker(&sorterTracker)
+ .MaxMemoryUsageBytes(40);
sorter = makeAsc(options);
// When each partition is small enough, we don't spill.
- ASSERT_EQ(sorter->numSpills(), 0);
+ ASSERT_EQ(sorter->stats().spilledRanges(), 0);
auto output = sort({
{1001},
{1007},
});
assertSorted(output);
- ASSERT_EQ(sorter->numSpills(), 0);
+ ASSERT_EQ(sorter->stats().spilledRanges(), 0);
// If any individual partition is large enough, we do spill.
sorter->restart();
- ASSERT_EQ(sorter->numSpills(), 0);
+ ASSERT_EQ(sorter->stats().spilledRanges(), 0);
output = sort({
{1},
{5},
@@ -1621,17 +1663,17 @@ TEST_F(BoundedSorterTest, CompoundSpill) {
{7},
});
assertSorted(output);
- ASSERT_EQ(sorter->numSpills(), 1);
+ ASSERT_EQ(sorter->stats().spilledRanges(), 1);
// If later partitions are small again, they don't spill.
sorter->restart();
- ASSERT_EQ(sorter->numSpills(), 1);
+ ASSERT_EQ(sorter->stats().spilledRanges(), 1);
output = sort({
{11},
{17},
});
assertSorted(output);
- ASSERT_EQ(sorter->numSpills(), 1);
+ ASSERT_EQ(sorter->stats().spilledRanges(), 1);
}
} // namespace